feat: add query/retrieval FlowProcessor services and missing runner scripts

Wire up the query and retrieval side of the pipeline so the agent can
answer questions from stored knowledge:

- Triples query service (FalkorDB) — all SPO pattern queries via NATS
- Graph embeddings query service (Qdrant) — entity vector similarity
- Document embeddings query service (Qdrant) — chunk vector similarity
- Graph RAG service — full concept→entity→traverse→score→synthesize pipeline
- Document RAG service — embed→find chunks→synthesize pipeline
- Runner scripts for chunker, extractor, embeddings (missing from Phase 5)
- Add DocumentEmbeddingsRequest/Response schema types
- Add RAG prompt templates (extract-concepts, edge-scoring, synthesize)
- Add graph/doc embeddings query topics to seed config + flow manager
- Add all pipeline/query/retrieval services to docker-compose
- 8 new runner scripts, 8 new pnpm script aliases

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
elpresidank 2026-04-07 01:05:54 -05:00
parent 8f7008822a
commit c545213224
19 changed files with 763 additions and 1 deletions

View file

@ -0,0 +1,81 @@
/**
* Document embeddings query service finds similar document chunks in Qdrant.
*
* Wraps QdrantDocEmbeddingsQuery as a NATS consumer so Document RAG can look up
* chunks by vector similarity over the message bus.
*
* Python reference: trustgraph-flow/trustgraph/query/doc_embeddings/qdrant/service.py
*/
import {
FlowProcessor,
ConsumerSpec,
ProducerSpec,
type ProcessorConfig,
type FlowContext,
type DocumentEmbeddingsRequest,
type DocumentEmbeddingsResponse,
} from "@trustgraph/base";
import { QdrantDocEmbeddingsQuery } from "./qdrant-doc.js";
export class DocEmbeddingsQueryService extends FlowProcessor {
private query: QdrantDocEmbeddingsQuery;
constructor(config: ProcessorConfig) {
super(config);
this.query = new QdrantDocEmbeddingsQuery();
this.registerSpecification(
new ConsumerSpec<DocumentEmbeddingsRequest>(
"document-embeddings-request",
this.onMessage.bind(this),
),
);
this.registerSpecification(
new ProducerSpec<DocumentEmbeddingsResponse>("document-embeddings-response"),
);
console.log("[DocEmbeddingsQuery] Service initialized");
}
private async onMessage(
msg: DocumentEmbeddingsRequest,
properties: Record<string, string>,
flowCtx: FlowContext,
): Promise<void> {
const requestId = properties.id;
if (!requestId) return;
const producer = flowCtx.flow.producer<DocumentEmbeddingsResponse>("document-embeddings-response");
const collection = msg.collection ?? "default";
try {
const allChunks: DocumentEmbeddingsResponse["chunks"] = [];
for (const vector of msg.vectors ?? []) {
const matches = await this.query.query({
vector,
user: "default",
collection,
limit: msg.limit ?? 10,
});
for (const match of matches) {
allChunks.push({ chunkId: match.chunkId, score: match.score });
}
}
await producer.send(requestId, { chunks: allChunks });
} catch (err) {
console.error("[DocEmbeddingsQuery] Query failed:", err);
await producer.send(requestId, {
chunks: [],
error: { type: "query-error", message: String(err) },
});
}
}
}
export async function run(): Promise<void> {
await DocEmbeddingsQueryService.launch("doc-embeddings-query");
}

View file

@ -0,0 +1,83 @@
/**
* Graph embeddings query service finds similar entities in Qdrant via FlowProcessor.
*
* Wraps QdrantGraphEmbeddingsQuery as a NATS consumer so Graph RAG can look up
* entities by vector similarity over the message bus.
*
* Python reference: trustgraph-flow/trustgraph/query/graph_embeddings/qdrant/service.py
*/
import {
FlowProcessor,
ConsumerSpec,
ProducerSpec,
type ProcessorConfig,
type FlowContext,
type GraphEmbeddingsRequest,
type GraphEmbeddingsResponse,
} from "@trustgraph/base";
import { QdrantGraphEmbeddingsQuery } from "./qdrant-graph.js";
export class GraphEmbeddingsQueryService extends FlowProcessor {
private query: QdrantGraphEmbeddingsQuery;
constructor(config: ProcessorConfig) {
super(config);
this.query = new QdrantGraphEmbeddingsQuery();
this.registerSpecification(
new ConsumerSpec<GraphEmbeddingsRequest>(
"graph-embeddings-request",
this.onMessage.bind(this),
),
);
this.registerSpecification(
new ProducerSpec<GraphEmbeddingsResponse>("graph-embeddings-response"),
);
console.log("[GraphEmbeddingsQuery] Service initialized");
}
private async onMessage(
msg: GraphEmbeddingsRequest,
properties: Record<string, string>,
flowCtx: FlowContext,
): Promise<void> {
const requestId = properties.id;
if (!requestId) return;
const producer = flowCtx.flow.producer<GraphEmbeddingsResponse>("graph-embeddings-response");
const user = msg.collection ?? "default";
const collection = msg.collection ?? "default";
try {
// Query for each vector and aggregate results
const allEntities: GraphEmbeddingsResponse["entities"] = [];
for (const vector of msg.vectors ?? []) {
const matches = await this.query.query({
vector,
user,
collection,
limit: msg.limit ?? 50,
});
for (const match of matches) {
allEntities.push(match.entity);
}
}
await producer.send(requestId, { entities: allEntities });
} catch (err) {
console.error("[GraphEmbeddingsQuery] Query failed:", err);
await producer.send(requestId, {
entities: [],
error: { type: "query-error", message: String(err) },
});
}
}
}
export async function run(): Promise<void> {
await GraphEmbeddingsQueryService.launch("graph-embeddings-query");
}