trustgraph/trustgraph_configurator/templates/2.0/flows/load.jsonnet
elpresidank 74cc8a4685 Squashed 'ai-context/trustgraph-templates/' content from commit 42a5fd1b
git-subtree-dir: ai-context/trustgraph-templates
git-subtree-split: 42a5fd1b678f32be378062e30451e2052ccb95dd
2026-04-05 21:09:49 -05:00

43 lines
1.4 KiB
Jsonnet

// Document loading and preprocessing module
// Handles document ingestion, format conversion, and chunking
// Converts PDFs to text and splits documents into processable chunks
local helpers = import "helpers.jsonnet";
local flow = helpers.flow;
local request = helpers.request;
local response = helpers.response;
local request_response = helpers.request_response;
// Import shared services (load requires embeddings for chunk processing)
local embeddings_service = import "embeddings-service.jsonnet";
// Merge shared services with load-specific configuration
embeddings_service + {
// External interfaces for document loading
"interfaces" +: {
"document-load": flow("document-load:{id}"),
"text-load": flow("text-document-load:{id}"),
},
// Flow-level processors for document preprocessing
"flow" +: {
// PDF decoder converts PDF documents to text
"pdf-decoder:{id}": {
input: flow("document-load:{id}"),
output: flow("text-document-load:{id}"),
},
// Chunker splits documents into smaller, processable pieces
"chunker:{id}": {
input: flow("text-document-load:{id}"),
output: flow("chunk-load:{id}"),
"chunk-size": "{chunk-size}",
"chunk-overlap": "{chunk-overlap}",
},
},
// Blueprint-level processors for document loading services
"blueprint" +: {
},
}