feat: add document pipeline, ReAct agent, and knowledge core services

Document Pipeline (Team A): - LibrarianService: document storage with filesystem backend, metadata persistence, child document hierarchy, collection management - ChunkingService: recursive character text splitter with configurable chunk size/overlap, FlowProcessor pattern - KnowledgeExtractService: combined relationship + definition extraction using prompt service and LLM, emits RDF triples and entity contexts - KnowledgeCoreService: knowledge core CRUD with streaming export and flow-based loading ReAct Agent (Team B): - StreamingReActParser: state machine for parsing LLM output into Thought/Action/ActionInput/FinalAnswer sections - Three MVP tools: KnowledgeQuery (GraphRAG), DocumentQuery (DocRAG), TriplesQuery with RequestResponse clients - AgentService FlowProcessor with ReAct loop, tool execution, and streaming chunk responses (thought/observation/answer) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-07-01 17:39:39 +02:00 · 2026-04-06 00:19:37 -05:00 · 2026-04-06 00:19:37 -05:00 · f09ef4de45
commit f09ef4de45
parent 5ed3f0e2d8
18 changed files with 2145 additions and 2 deletions
--- a/ts/packages/flow/src/agent/react/index.ts
+++ b/ts/packages/flow/src/agent/react/index.ts
@ -0,0 +1,19 @@
+// ReAct agent -- barrel exports
+
+export { AgentService } from "./service.js";
+export { StreamingReActParser } from "./parser.js";
+export { buildReActPrompt } from "./prompt.js";
+export {
+  createKnowledgeQueryTool,
+  createDocumentQueryTool,
+  createTriplesQueryTool,
+} from "./tools.js";
+export type {
+  AgentTool,
+  ToolArg,
+  ReActState,
+  ParsedEvent,
+  OnThought,
+  OnObservation,
+  OnAnswer,
+} from "./types.js";
--- a/ts/packages/flow/src/agent/react/parser.ts
+++ b/ts/packages/flow/src/agent/react/parser.ts
@ -0,0 +1,130 @@
+/**
+ * Streaming ReAct parser -- state machine that processes LLM output one chunk at a time.
+ *
+ * Detects these markers in the LLM output:
+ * - "Thought:" -> emit thought content
+ * - "Action:" -> emit action name (tool name)
+ * - "Action Input:" -> emit action input (JSON args)
+ * - "Final Answer:" -> emit final answer content
+ *
+ * Handles markers split across chunks by buffering lines.
+ */
+
+import type { ReActState } from "./types.js";
+
+const MARKERS = [
+  { prefix: "Thought:", state: "thought" as ReActState },
+  { prefix: "Action Input:", state: "action_input" as ReActState },
+  { prefix: "Action:", state: "action" as ReActState },
+  { prefix: "Final Answer:", state: "final_answer" as ReActState },
+];
+
+// Longest marker prefix for partial-match detection
+const MAX_MARKER_LEN = Math.max(...MARKERS.map((m) => m.prefix.length));
+
+export class StreamingReActParser {
+  private state: ReActState = "initial";
+  private buffer = "";
+
+  constructor(
+    private onThought: (text: string) => void,
+    private onAction: (name: string) => void,
+    private onActionInput: (input: string) => void,
+    private onFinalAnswer: (text: string) => void,
+  ) {}
+
+  /**
+   * Feed a chunk of LLM output text into the parser.
+   * Accumulates in a buffer and processes complete lines.
+   */
+  feed(text: string): void {
+    this.buffer += text;
+    this.processBuffer(false);
+  }
+
+  /**
+   * Flush any remaining buffered content at the end of output.
+   */
+  flush(): void {
+    this.processBuffer(true);
+    // Emit any remaining buffer content in the current state
+    if (this.buffer.trim().length > 0) {
+      this.emitContent(this.buffer);
+      this.buffer = "";
+    }
+  }
+
+  private processBuffer(isFinal: boolean): void {
+    // Process complete lines (terminated by newline)
+    while (true) {
+      const newlineIdx = this.buffer.indexOf("\n");
+      if (newlineIdx === -1) {
+        // No complete line yet.
+        // If not final, check for partial marker match at the end and wait.
+        if (!isFinal) {
+          // If the remaining buffer could be the start of a marker, wait for more input.
+          const trimmed = this.buffer.trimStart();
+          if (trimmed.length > 0 && trimmed.length < MAX_MARKER_LEN) {
+            const couldBeMarker = MARKERS.some((m) =>
+              m.prefix.startsWith(trimmed),
+            );
+            if (couldBeMarker) {
+              // Wait for more input before deciding
+              return;
+            }
+          }
+        }
+        break;
+      }
+
+      const line = this.buffer.slice(0, newlineIdx);
+      this.buffer = this.buffer.slice(newlineIdx + 1);
+      this.processLine(line);
+    }
+  }
+
+  private processLine(line: string): void {
+    const trimmed = line.trimStart();
+
+    // Check if this line starts a new section
+    for (const marker of MARKERS) {
+      if (trimmed.startsWith(marker.prefix)) {
+        const content = trimmed.slice(marker.prefix.length).trim();
+        this.state = marker.state;
+        this.emitContent(content);
+        return;
+      }
+    }
+
+    // Otherwise, this is continuation content for the current state
+    if (trimmed.length > 0) {
+      this.emitContent(trimmed);
+    }
+  }
+
+  private emitContent(content: string): void {
+    if (content.length === 0) return;
+
+    switch (this.state) {
+      case "thought":
+        this.onThought(content);
+        break;
+      case "action":
+        this.onAction(content);
+        break;
+      case "action_input":
+        this.onActionInput(content);
+        break;
+      case "final_answer":
+        this.onFinalAnswer(content);
+        break;
+      case "initial":
+        // Content before any marker -- treat as thought
+        this.state = "thought";
+        this.onThought(content);
+        break;
+      case "complete":
+        break;
+    }
+  }
+}
--- a/ts/packages/flow/src/agent/react/prompt.ts
+++ b/ts/packages/flow/src/agent/react/prompt.ts
@ -0,0 +1,50 @@
+/**
+ * Build the ReAct system prompt for the agent.
+ *
+ * Formats available tools into the prompt template so the LLM knows what tools
+ * it can use and what format to follow.
+ */
+
+import type { AgentTool } from "./types.js";
+
+export function buildReActPrompt(
+  tools: AgentTool[],
+  question: string,
+): { system: string; prompt: string } {
+  const toolDescriptions = tools
+    .map((t) => {
+      const argDesc = t.args
+        .map((a) => `  - ${a.name} (${a.type}): ${a.description}`)
+        .join("\n");
+      return `${t.name}: ${t.description}\n  Arguments:\n${argDesc}`;
+    })
+    .join("\n\n");
+
+  const toolNames = tools.map((t) => t.name).join(", ");
+
+  const system = `You are a helpful AI assistant that answers questions using available tools.
+
+You have access to the following tools:
+
+${toolDescriptions}
+
+Use this exact format for your response:
+
+Thought: [your reasoning about what to do]
+Action: [tool name, one of: ${toolNames}]
+Action Input: {"argument_name": "value"}
+Observation: [tool result will be inserted here]
+... (repeat Thought/Action/Action Input/Observation as needed)
+Thought: I now have enough information to answer.
+Final Answer: [your comprehensive answer]
+
+Important:
+- Always start with a Thought.
+- Action must be one of: ${toolNames}
+- Action Input must be valid JSON.
+- After receiving an Observation, continue with another Thought.
+- When you have enough information, provide a Final Answer.
+- Do NOT make up observations. Wait for the tool result.`;
+
+  return { system, prompt: question };
+}
--- a/ts/packages/flow/src/agent/react/service.ts
+++ b/ts/packages/flow/src/agent/react/service.ts
@ -0,0 +1,306 @@
+/**
+ * ReAct agent service -- a FlowProcessor that implements a streaming ReAct
+ * (Reasoning + Acting) agent with tool execution.
+ *
+ * The agent:
+ * 1. Receives an AgentRequest (a user question)
+ * 2. Builds a ReAct prompt with available tools
+ * 3. Iteratively calls the LLM, parses Thought/Action/Action Input/Final Answer
+ * 4. Executes tools and feeds observations back to the LLM
+ * 5. Sends streaming AgentResponse chunks (thought, observation, answer, error)
+ *
+ * Python reference: trustgraph-flow/trustgraph/agent/react/service.py
+ */
+
+import {
+  FlowProcessor,
+  ConsumerSpec,
+  ProducerSpec,
+  RequestResponseSpec,
+  type ProcessorConfig,
+  type FlowContext,
+  type AgentRequest,
+  type AgentResponse,
+  type TextCompletionRequest,
+  type TextCompletionResponse,
+  type GraphRagRequest,
+  type GraphRagResponse,
+  type DocumentRagRequest,
+  type DocumentRagResponse,
+  type TriplesQueryRequest,
+  type TriplesQueryResponse,
+} from "@trustgraph/base";
+
+import {
+  createKnowledgeQueryTool,
+  createDocumentQueryTool,
+  createTriplesQueryTool,
+} from "./tools.js";
+import { buildReActPrompt } from "./prompt.js";
+import type { AgentTool } from "./types.js";
+
+const MAX_ITERATIONS = 10;
+
+export class AgentService extends FlowProcessor {
+  constructor(config: ProcessorConfig) {
+    super(config);
+
+    // Consumer: agent requests
+    this.registerSpecification(
+      new ConsumerSpec<AgentRequest>("request", this.onRequest.bind(this)),
+    );
+
+    // Producer: agent responses (streaming chunks)
+    this.registerSpecification(new ProducerSpec<AgentResponse>("response"));
+
+    // Request-response clients for tool execution
+    this.registerSpecification(
+      new RequestResponseSpec<TextCompletionRequest, TextCompletionResponse>(
+        "llm",
+        "text-completion-request",
+        "text-completion-response",
+      ),
+    );
+    this.registerSpecification(
+      new RequestResponseSpec<GraphRagRequest, GraphRagResponse>(
+        "graph-rag",
+        "graph-rag-request",
+        "graph-rag-response",
+      ),
+    );
+    this.registerSpecification(
+      new RequestResponseSpec<DocumentRagRequest, DocumentRagResponse>(
+        "doc-rag",
+        "document-rag-request",
+        "document-rag-response",
+      ),
+    );
+    this.registerSpecification(
+      new RequestResponseSpec<TriplesQueryRequest, TriplesQueryResponse>(
+        "triples",
+        "triples-request",
+        "triples-response",
+      ),
+    );
+
+    console.log("[AgentService] Service initialized");
+  }
+
+  private async onRequest(
+    msg: AgentRequest,
+    properties: Record<string, string>,
+    flowCtx: FlowContext,
+  ): Promise<void> {
+    const requestId = properties.id;
+    if (!requestId) return;
+
+    const responseProducer = flowCtx.flow.producer<AgentResponse>("response");
+
+    try {
+      // Build tools from flow requestors
+      const tools: AgentTool[] = [
+        createKnowledgeQueryTool(
+          flowCtx.flow.requestor<GraphRagRequest, GraphRagResponse>("graph-rag"),
+          msg.collection,
+        ),
+        createDocumentQueryTool(
+          flowCtx.flow.requestor<DocumentRagRequest, DocumentRagResponse>("doc-rag"),
+          msg.collection,
+        ),
+        createTriplesQueryTool(
+          flowCtx.flow.requestor<TriplesQueryRequest, TriplesQueryResponse>("triples"),
+          msg.collection,
+        ),
+      ];
+
+      // Build the ReAct prompt
+      const { system, prompt: initialPrompt } = buildReActPrompt(
+        tools,
+        msg.question,
+      );
+
+      const llmClient = flowCtx.flow.requestor<
+        TextCompletionRequest,
+        TextCompletionResponse
+      >("llm");
+
+      // Conversation accumulates the full exchange for multi-turn reasoning
+      let conversation = initialPrompt;
+
+      for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
+        console.log(
+          `[AgentService] Iteration ${iteration + 1}/${MAX_ITERATIONS} for request ${requestId}`,
+        );
+
+        // Call LLM (non-streaming for MVP)
+        const llmResponse = await llmClient.request({
+          system,
+          prompt: conversation,
+        });
+
+        if (llmResponse.error) {
+          await responseProducer.send(requestId, {
+            chunk_type: "error",
+            content: `LLM error: ${llmResponse.error.message}`,
+            end_of_dialog: true,
+          });
+          return;
+        }
+
+        const text = llmResponse.response;
+
+        // Parse the LLM response with simple line-based parsing
+        const parsed = parseReActResponse(text);
+
+        // Send thought chunk
+        if (parsed.thought) {
+          await responseProducer.send(requestId, {
+            chunk_type: "thought",
+            content: parsed.thought,
+            end_of_message: true,
+          });
+        }
+
+        // If we got a final answer, send it and return
+        if (parsed.finalAnswer) {
+          await responseProducer.send(requestId, {
+            chunk_type: "answer",
+            content: parsed.finalAnswer,
+            end_of_message: true,
+            end_of_dialog: true,
+          });
+          return;
+        }
+
+        // Execute tool if action was specified
+        if (parsed.action && parsed.actionInput) {
+          const tool = tools.find((t) => t.name === parsed.action);
+          let observation: string;
+
+          if (tool) {
+            try {
+              observation = await tool.execute(parsed.actionInput);
+            } catch (err) {
+              observation = `Error executing tool: ${err instanceof Error ? err.message : String(err)}`;
+            }
+          } else {
+            observation = `Unknown tool: ${parsed.action}. Available tools: ${tools.map((t) => t.name).join(", ")}`;
+          }
+
+          // Send observation chunk
+          await responseProducer.send(requestId, {
+            chunk_type: "observation",
+            content: observation,
+            end_of_message: true,
+          });
+
+          // Append the full exchange to conversation for the next iteration
+          conversation += `\n${text}\nObservation: ${observation}\n`;
+        } else if (!parsed.finalAnswer) {
+          // LLM didn't produce a valid action or final answer -- nudge it
+          conversation += `\n${text}\nObservation: You must either use a tool (Action + Action Input) or provide a Final Answer.\n`;
+        }
+      }
+
+      // Max iterations reached without a final answer
+      await responseProducer.send(requestId, {
+        chunk_type: "error",
+        content:
+          "Maximum reasoning iterations reached without a final answer. " +
+          "The agent was unable to complete the task within the allowed steps.",
+        end_of_message: true,
+        end_of_dialog: true,
+      });
+    } catch (err) {
+      console.error(`[AgentService] Error processing request ${requestId}:`, err);
+
+      await responseProducer.send(requestId, {
+        chunk_type: "error",
+        content: `Agent error: ${err instanceof Error ? err.message : String(err)}`,
+        end_of_message: true,
+        end_of_dialog: true,
+      });
+    }
+  }
+}
+
+/**
+ * Simple line-based parser for ReAct LLM output.
+ *
+ * Extracts Thought, Action, Action Input, and Final Answer sections.
+ * For the MVP this avoids the complexity of the streaming parser --
+ * we parse the complete response at once.
+ */
+function parseReActResponse(text: string): {
+  thought: string;
+  action: string;
+  actionInput: string;
+  finalAnswer: string;
+} {
+  let thought = "";
+  let action = "";
+  let actionInput = "";
+  let finalAnswer = "";
+
+  const lines = text.split("\n");
+  let currentSection: "thought" | "action" | "action_input" | null = null;
+
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    const trimmed = line.trimStart();
+
+    if (trimmed.startsWith("Final Answer:")) {
+      // Everything from "Final Answer:" to end of text is the answer
+      const firstLine = trimmed.slice("Final Answer:".length).trim();
+      const remainingLines = lines.slice(i + 1).join("\n").trim();
+      finalAnswer = firstLine + (remainingLines ? "\n" + remainingLines : "");
+      break;
+    } else if (trimmed.startsWith("Thought:")) {
+      currentSection = "thought";
+      const content = trimmed.slice("Thought:".length).trim();
+      if (content) {
+        thought += (thought ? "\n" : "") + content;
+      }
+    } else if (trimmed.startsWith("Action Input:")) {
+      currentSection = "action_input";
+      const content = trimmed.slice("Action Input:".length).trim();
+      if (content) {
+        actionInput += content;
+      }
+    } else if (trimmed.startsWith("Action:")) {
+      currentSection = "action";
+      const content = trimmed.slice("Action:".length).trim();
+      if (content) {
+        action = content;
+      }
+    } else if (trimmed.startsWith("Observation:")) {
+      // Stop processing -- observations are injected by us, not the LLM
+      currentSection = null;
+    } else if (trimmed.length > 0 && currentSection) {
+      // Continuation line for current section
+      switch (currentSection) {
+        case "thought":
+          thought += "\n" + trimmed;
+          break;
+        case "action":
+          // Action should be a single line (tool name), but handle multi-line
+          action += " " + trimmed;
+          break;
+        case "action_input":
+          actionInput += "\n" + trimmed;
+          break;
+      }
+    }
+  }
+
+  return {
+    thought: thought.trim(),
+    action: action.trim(),
+    actionInput: actionInput.trim(),
+    finalAnswer: finalAnswer.trim(),
+  };
+}
+
+export async function run(): Promise<void> {
+  await AgentService.launch("agent");
+}
--- a/ts/packages/flow/src/agent/react/tools.ts
+++ b/ts/packages/flow/src/agent/react/tools.ts
@ -0,0 +1,199 @@
+/**
+ * MVP tools for the ReAct agent.
+ *
+ * Each tool wraps a RequestResponse client from the flow, providing the agent
+ * with access to existing TrustGraph retrieval services.
+ */
+
+import type {
+  RequestResponse,
+  GraphRagRequest,
+  GraphRagResponse,
+  DocumentRagRequest,
+  DocumentRagResponse,
+  TriplesQueryRequest,
+  TriplesQueryResponse,
+  Term,
+} from "@trustgraph/base";
+
+import type { AgentTool } from "./types.js";
+
+/**
+ * Format a Term to a human-readable string.
+ */
+function termToString(term: Term): string {
+  switch (term.type) {
+    case "IRI":
+      return term.iri;
+    case "LITERAL":
+      return term.value;
+    case "BLANK":
+      return `_:${term.id}`;
+    case "TRIPLE":
+      return `(${termToString(term.triple.s)} ${termToString(term.triple.p)} ${termToString(term.triple.o)})`;
+  }
+}
+
+/**
+ * Parse tool input -- accepts either raw JSON or a plain string question.
+ */
+function parseQuestion(input: string): string {
+  try {
+    const parsed = JSON.parse(input) as Record<string, unknown>;
+    if (typeof parsed === "object" && parsed !== null && "question" in parsed) {
+      return String(parsed.question);
+    }
+    // If it's a string JSON value, use it directly
+    if (typeof parsed === "string") {
+      return parsed;
+    }
+  } catch {
+    // Not valid JSON -- treat as plain text
+  }
+  return input;
+}
+
+/**
+ * Query the knowledge graph for information about entities and their relationships.
+ */
+export function createKnowledgeQueryTool(
+  client: RequestResponse<GraphRagRequest, GraphRagResponse>,
+  collection?: string,
+): AgentTool {
+  return {
+    name: "KnowledgeQuery",
+    description:
+      "Query the knowledge graph for information about entities and their relationships.",
+    args: [
+      {
+        name: "question",
+        type: "string",
+        description: "The question to ask the knowledge graph",
+      },
+    ],
+    async execute(input: string): Promise<string> {
+      const question = parseQuestion(input);
+      const res = await client.request({ query: question, collection });
+      if (res.error) return `Error: ${res.error.message}`;
+      return res.response;
+    },
+  };
+}
+
+/**
+ * Search documents for relevant information.
+ */
+export function createDocumentQueryTool(
+  client: RequestResponse<DocumentRagRequest, DocumentRagResponse>,
+  collection?: string,
+): AgentTool {
+  return {
+    name: "DocumentQuery",
+    description:
+      "Search the document library for relevant information using semantic search.",
+    args: [
+      {
+        name: "question",
+        type: "string",
+        description: "The question to search documents for",
+      },
+    ],
+    async execute(input: string): Promise<string> {
+      const question = parseQuestion(input);
+      const res = await client.request({ query: question, collection });
+      if (res.error) return `Error: ${res.error.message}`;
+      return res.response;
+    },
+  };
+}
+
+/**
+ * Parse triples query input. Accepts JSON with optional s, p, o fields.
+ */
+function parseTriplesInput(input: string): {
+  s?: Term;
+  p?: Term;
+  o?: Term;
+  limit?: number;
+} {
+  try {
+    const parsed = JSON.parse(input) as Record<string, unknown>;
+
+    const toTerm = (val: unknown): Term | undefined => {
+      if (typeof val === "string") {
+        return { type: "LITERAL", value: val };
+      }
+      if (typeof val === "object" && val !== null && "type" in val) {
+        return val as Term;
+      }
+      return undefined;
+    };
+
+    return {
+      s: toTerm(parsed.subject ?? parsed.s),
+      p: toTerm(parsed.predicate ?? parsed.p),
+      o: toTerm(parsed.object ?? parsed.o),
+      limit:
+        typeof parsed.limit === "number" ? parsed.limit : undefined,
+    };
+  } catch {
+    // If not valid JSON, treat as a subject search
+    return {
+      s: { type: "LITERAL", value: input },
+    };
+  }
+}
+
+/**
+ * Query for specific triples (subject-predicate-object relationships) in the knowledge graph.
+ */
+export function createTriplesQueryTool(
+  client: RequestResponse<TriplesQueryRequest, TriplesQueryResponse>,
+  collection?: string,
+): AgentTool {
+  return {
+    name: "TriplesQuery",
+    description:
+      "Query for specific triples (subject-predicate-object relationships) in the knowledge graph. " +
+      "Provide subject, predicate, and/or object to filter results.",
+    args: [
+      {
+        name: "subject",
+        type: "string",
+        description: "The subject entity to search for (optional)",
+      },
+      {
+        name: "predicate",
+        type: "string",
+        description: "The predicate/relationship to search for (optional)",
+      },
+      {
+        name: "object",
+        type: "string",
+        description: "The object entity to search for (optional)",
+      },
+    ],
+    async execute(input: string): Promise<string> {
+      const { s, p, o, limit } = parseTriplesInput(input);
+      const res = await client.request({
+        s,
+        p,
+        o,
+        collection,
+        limit: limit ?? 20,
+      });
+
+      if (res.error) return `Error: ${res.error.message}`;
+
+      if (!res.triples || res.triples.length === 0) {
+        return "No triples found matching the query.";
+      }
+
+      const lines = res.triples.map(
+        (t) =>
+          `(${termToString(t.s)}) -[${termToString(t.p)}]-> (${termToString(t.o)})`,
+      );
+      return lines.join("\n");
+    },
+  };
+}
--- a/ts/packages/flow/src/agent/react/types.ts
+++ b/ts/packages/flow/src/agent/react/types.ts
@ -0,0 +1,33 @@
+/**
+ * Types for the ReAct agent service.
+ */
+
+export interface ToolArg {
+  name: string;
+  type: string;
+  description: string;
+}
+
+export interface AgentTool {
+  name: string;
+  description: string;
+  args: ToolArg[];
+  execute: (input: string) => Promise<string>;
+}
+
+export type ReActState =
+  | "initial"
+  | "thought"
+  | "action"
+  | "action_input"
+  | "final_answer"
+  | "complete";
+
+export interface ParsedEvent {
+  type: "thought" | "action" | "action_input" | "final_answer";
+  content: string;
+}
+
+export type OnThought = (text: string, isFinal: boolean) => Promise<void>;
+export type OnObservation = (text: string, isFinal: boolean) => Promise<void>;
+export type OnAnswer = (text: string) => Promise<void>;