trustgraph/ts/packages/flow/src/retrieval/document-rag.ts

83 lines
2.4 KiB
TypeScript
Raw Normal View History

2026-04-05 21:09:33 -05:00
/**
* Document RAG retrieval pipeline.
*
* Simpler than Graph RAG embeds the query, finds similar document chunks,
* and synthesizes an answer from the chunk content.
*
* Python reference: trustgraph-flow/trustgraph/retrieval/document_rag/
*/
import type {
2026-05-12 08:06:58 -05:00
FlowRequestor,
2026-04-05 21:09:33 -05:00
TextCompletionRequest,
TextCompletionResponse,
EmbeddingsRequest,
EmbeddingsResponse,
DocumentEmbeddingsRequest,
DocumentEmbeddingsResponse,
2026-04-05 21:09:33 -05:00
PromptRequest,
PromptResponse,
} from "@trustgraph/base";
export interface DocumentRagClients {
2026-05-12 08:06:58 -05:00
llm: FlowRequestor<TextCompletionRequest, TextCompletionResponse>;
embeddings: FlowRequestor<EmbeddingsRequest, EmbeddingsResponse>;
docEmbeddings: FlowRequestor<DocumentEmbeddingsRequest, DocumentEmbeddingsResponse>;
prompt: FlowRequestor<PromptRequest, PromptResponse>;
2026-04-05 21:09:33 -05:00
}
export type ChunkCallback = (text: string, endOfStream: boolean) => Promise<void>;
export class DocumentRag {
2026-05-12 08:06:58 -05:00
private readonly clients: DocumentRagClients;
constructor(clients: DocumentRagClients) {
this.clients = clients;
}
2026-04-05 21:09:33 -05:00
async query(
queryText: string,
options?: {
2026-04-05 21:09:33 -05:00
collection?: string;
streaming?: boolean;
chunkCallback?: ChunkCallback;
},
): Promise<string> {
const collection = options?.collection ?? "default";
2026-04-05 21:09:33 -05:00
// Step 1: Embed the query
const embResp = await this.clients.embeddings.request({ text: [queryText] });
const vectors = (embResp as EmbeddingsResponse).vectors;
// Step 2: Find similar document chunks
const docResp = await this.clients.docEmbeddings.request({
vectors,
limit: 10,
collection,
user: "default",
});
const chunks = (docResp as DocumentEmbeddingsResponse).chunks ?? [];
console.log(`[DocumentRag] Found ${chunks.length} matching chunks`);
2026-04-05 21:09:33 -05:00
// Step 3: Build context from chunks
const context = chunks
2026-05-12 08:06:58 -05:00
.flatMap((c) =>
c.content !== undefined && c.content.length > 0 ? [c.content] : [],
)
2026-04-05 21:09:33 -05:00
.join("\n\n---\n\n");
// Step 4: Synthesize answer
const promptResp = await this.clients.prompt.request({
name: "document-rag-synthesize",
variables: { query: queryText, context },
});
const resp = await this.clients.llm.request({
system: (promptResp as PromptResponse).system,
prompt: (promptResp as PromptResponse).prompt,
});
return (resp as TextCompletionResponse).response;
}
}