feat: add document pipeline, ReAct agent, and knowledge core services

Document Pipeline (Team A):
- LibrarianService: document storage with filesystem backend, metadata
  persistence, child document hierarchy, collection management
- ChunkingService: recursive character text splitter with configurable
  chunk size/overlap, FlowProcessor pattern
- KnowledgeExtractService: combined relationship + definition extraction
  using prompt service and LLM, emits RDF triples and entity contexts
- KnowledgeCoreService: knowledge core CRUD with streaming export and
  flow-based loading

ReAct Agent (Team B):
- StreamingReActParser: state machine for parsing LLM output into
  Thought/Action/ActionInput/FinalAnswer sections
- Three MVP tools: KnowledgeQuery (GraphRAG), DocumentQuery (DocRAG),
  TriplesQuery with RequestResponse clients
- AgentService FlowProcessor with ReAct loop, tool execution, and
  streaming chunk responses (thought/observation/answer)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
elpresidank 2026-04-06 00:19:37 -05:00
parent 5ed3f0e2d8
commit f09ef4de45
18 changed files with 2145 additions and 2 deletions

View file

@ -0,0 +1,106 @@
/**
* Recursive character text splitter.
*
* Matches the behaviour of LangChain's RecursiveCharacterTextSplitter:
* 1. Try separators in order: "\n\n", "\n", " ", ""
* 2. Split on the best separator that exists in the text
* 3. Merge small pieces until they approach chunkSize
* 4. Recursively split pieces that exceed chunkSize with the next separator
* 5. Apply overlap: include trailing chunkOverlap chars from the previous chunk
*
* Python reference: trustgraph-flow/trustgraph/chunking/recursive_splitter/service.py
*/
const DEFAULT_SEPARATORS = ["\n\n", "\n", " ", ""];
export function recursiveSplit(
text: string,
chunkSize: number,
chunkOverlap: number,
): string[] {
return splitRecursive(text, chunkSize, chunkOverlap, DEFAULT_SEPARATORS);
}
function splitRecursive(
text: string,
chunkSize: number,
chunkOverlap: number,
separators: string[],
): string[] {
if (text.length <= chunkSize) {
return text.trim().length > 0 ? [text] : [];
}
// Find the best separator that exists in the text
let separator = "";
let remainingSeparators = separators;
for (let i = 0; i < separators.length; i++) {
const sep = separators[i];
if (sep === "" || text.includes(sep)) {
separator = sep;
remainingSeparators = separators.slice(i + 1);
break;
}
}
// Split on the selected separator
const pieces = separator === "" ? [...text] : text.split(separator);
// Merge small pieces into chunks
const merged = mergePieces(pieces, separator, chunkSize);
// Recursively split oversized chunks with the next separator
const results: string[] = [];
for (const chunk of merged) {
if (chunk.length > chunkSize && remainingSeparators.length > 0) {
const subChunks = splitRecursive(chunk, chunkSize, chunkOverlap, remainingSeparators);
results.push(...subChunks);
} else if (chunk.trim().length > 0) {
results.push(chunk);
}
}
// Apply overlap
return applyOverlap(results, chunkOverlap);
}
function mergePieces(
pieces: string[],
separator: string,
chunkSize: number,
): string[] {
const chunks: string[] = [];
let current = "";
for (const piece of pieces) {
const candidate = current.length > 0 ? current + separator + piece : piece;
if (candidate.length > chunkSize && current.length > 0) {
chunks.push(current);
current = piece;
} else {
current = candidate;
}
}
if (current.length > 0) {
chunks.push(current);
}
return chunks;
}
function applyOverlap(chunks: string[], overlapSize: number): string[] {
if (overlapSize <= 0 || chunks.length <= 1) return chunks;
const result: string[] = [chunks[0]];
for (let i = 1; i < chunks.length; i++) {
const prev = chunks[i - 1];
const overlapText = prev.slice(Math.max(0, prev.length - overlapSize));
result.push(overlapText + chunks[i]);
}
return result;
}

View file

@ -0,0 +1,94 @@
/**
* Chunking service splits text documents into chunks for downstream processing.
*
* A FlowProcessor that:
* 1. Consumes TextDocument messages
* 2. Splits text using recursive character text splitting
* 3. Emits Chunk messages for each resulting chunk
*
* Python reference: trustgraph-flow/trustgraph/chunking/recursive_splitter/service.py
*/
import {
FlowProcessor,
ConsumerSpec,
ProducerSpec,
ParameterSpec,
type ProcessorConfig,
type FlowContext,
type TextDocument,
type Chunk,
type Triples,
} from "@trustgraph/base";
import { recursiveSplit } from "./recursive-splitter.js";
const DEFAULT_CHUNK_SIZE = 2000;
const DEFAULT_CHUNK_OVERLAP = 100;
export class ChunkingService extends FlowProcessor {
constructor(config: ProcessorConfig) {
super(config);
this.registerSpecification(
new ConsumerSpec<TextDocument>("input", this.onMessage.bind(this)),
);
this.registerSpecification(new ProducerSpec<Chunk>("output"));
this.registerSpecification(new ProducerSpec<Triples>("triples"));
this.registerSpecification(new ParameterSpec("chunk-size"));
this.registerSpecification(new ParameterSpec("chunk-overlap"));
console.log("[ChunkingService] Service initialized");
}
private async onMessage(
msg: TextDocument,
properties: Record<string, string>,
flowCtx: FlowContext,
): Promise<void> {
const requestId = properties.id;
if (!requestId) return;
let chunkSize: number;
let chunkOverlap: number;
try {
chunkSize = flowCtx.flow.parameter<number>("chunk-size");
} catch {
chunkSize = DEFAULT_CHUNK_SIZE;
}
try {
chunkOverlap = flowCtx.flow.parameter<number>("chunk-overlap");
} catch {
chunkOverlap = DEFAULT_CHUNK_OVERLAP;
}
const text = msg.text;
if (!text || text.trim().length === 0) {
console.warn(`[ChunkingService] Empty text received for document ${msg.documentId}`);
return;
}
const chunks = recursiveSplit(text, chunkSize, chunkOverlap);
console.log(
`[ChunkingService] Split document ${msg.documentId} into ${chunks.length} chunks (size=${chunkSize}, overlap=${chunkOverlap})`,
);
const outputProducer = flowCtx.flow.producer<Chunk>("output");
for (const chunkText of chunks) {
const chunk: Chunk = {
metadata: msg.metadata,
chunk: chunkText,
documentId: msg.documentId,
};
await outputProducer.send(requestId, chunk);
}
}
}
export async function run(): Promise<void> {
await ChunkingService.launch("chunking");
}