mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-04 10:52:27 +02:00
feat: add document pipeline — PDF decoder, Ollama LLM, storage services
Add end-to-end document processing pipeline: - PDF decoder service (pdfjs-dist) extracts text per page from librarian docs - Ollama native LLM service for local model inference - FalkorDB triples store FlowProcessor consumer - Qdrant graph embeddings store FlowProcessor consumer - Fix spec name collisions in chunker/extractor (input→chunk-input, etc.) - Gateway /load endpoint to trigger document processing - Align flow manager blueprint and seed config with full pipeline topics - Add runner scripts and test coverage for document load Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
8f9de7604e
commit
8f7008822a
20 changed files with 894 additions and 37 deletions
|
|
@ -49,10 +49,10 @@ export class KnowledgeExtractService extends FlowProcessor {
|
|||
super(config);
|
||||
|
||||
this.registerSpecification(
|
||||
new ConsumerSpec<Chunk>("input", this.onMessage.bind(this)),
|
||||
new ConsumerSpec<Chunk>("extract-input", this.onMessage.bind(this)),
|
||||
);
|
||||
this.registerSpecification(new ProducerSpec<Triples>("triples"));
|
||||
this.registerSpecification(new ProducerSpec<EntityContexts>("entity-contexts"));
|
||||
this.registerSpecification(new ProducerSpec<Triples>("extract-triples"));
|
||||
this.registerSpecification(new ProducerSpec<EntityContexts>("extract-entity-contexts"));
|
||||
|
||||
this.registerSpecification(
|
||||
new RequestResponseSpec<PromptRequest, PromptResponse>(
|
||||
|
|
@ -85,8 +85,8 @@ export class KnowledgeExtractService extends FlowProcessor {
|
|||
|
||||
const promptClient = flowCtx.flow.requestor<PromptRequest, PromptResponse>("prompt-client");
|
||||
const llmClient = flowCtx.flow.requestor<TextCompletionRequest, TextCompletionResponse>("llm-client");
|
||||
const triplesProducer = flowCtx.flow.producer<Triples>("triples");
|
||||
const entityContextsProducer = flowCtx.flow.producer<EntityContexts>("entity-contexts");
|
||||
const triplesProducer = flowCtx.flow.producer<Triples>("extract-triples");
|
||||
const entityContextsProducer = flowCtx.flow.producer<EntityContexts>("extract-entity-contexts");
|
||||
|
||||
const allTriples: Triple[] = [];
|
||||
const allEntityContexts: EntityContext[] = [];
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue