mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-03 15:01:00 +02:00
git-subtree-dir: ai-context/trustgraph-templates git-subtree-split: 42a5fd1b678f32be378062e30451e2052ccb95dd
51 lines
1.9 KiB
Jsonnet
51 lines
1.9 KiB
Jsonnet
// Document loading and preprocessing module
|
|
// Handles document ingestion, format conversion, and chunking
|
|
// Converts PDFs to text and splits documents into processable chunks
|
|
|
|
local helpers = import "helpers.jsonnet";
|
|
local flow = helpers.flow;
|
|
local request = helpers.request;
|
|
local response = helpers.response;
|
|
local request_response = helpers.request_response;
|
|
|
|
// Import shared services (load requires embeddings for chunk processing)
|
|
local embeddings_service = import "embeddings-service.jsonnet";
|
|
|
|
// Merge shared services with load-specific configuration
|
|
embeddings_service + {
|
|
|
|
// External interfaces for document loading
|
|
"interfaces" +: {
|
|
"document-load": flow("document-load:{id}"),
|
|
"text-load": flow("text-document-load:{id}"),
|
|
},
|
|
|
|
// Flow-level processors for document preprocessing
|
|
"flow" +: {
|
|
// PDF decoder converts PDF documents to text
|
|
// Also emits page provenance triples and saves pages via librarian
|
|
"pdf-decoder:{id}": {
|
|
input: flow("document-load:{id}"),
|
|
output: flow("text-document-load:{id}"),
|
|
triples: flow("triples-store:{id}"),
|
|
"librarian-request": request("librarian"),
|
|
"librarian-response": response("librarian"),
|
|
},
|
|
|
|
// Chunker splits documents into smaller, processable pieces
|
|
// Also emits chunk provenance triples and saves chunks via librarian
|
|
"chunker:{id}": {
|
|
input: flow("text-document-load:{id}"),
|
|
output: flow("chunk-load:{id}"),
|
|
triples: flow("triples-store:{id}"),
|
|
"librarian-request": request("librarian"),
|
|
"librarian-response": response("librarian"),
|
|
"chunk-size": "{chunk-size}",
|
|
"chunk-overlap": "{chunk-overlap}",
|
|
},
|
|
},
|
|
|
|
// Blueprint-level processors for document loading services
|
|
"blueprint" +: {
|
|
},
|
|
}
|