// Document loading and preprocessing module // Handles document ingestion, format conversion, and chunking // Converts PDFs to text and splits documents into processable chunks local helpers = import "helpers.jsonnet"; local flow = helpers.flow; local request = helpers.request; local response = helpers.response; local request_response = helpers.request_response; // Import shared services (load requires embeddings for chunk processing) local embeddings_service = import "embeddings-service.jsonnet"; // Merge shared services with load-specific configuration embeddings_service + { // External interfaces for document loading "interfaces" +: { "document-load": flow("document-load:{id}"), "text-load": flow("text-document-load:{id}"), }, // Flow-level processors for document preprocessing "flow" +: { // PDF decoder converts PDF documents to text // Also emits page provenance triples and saves pages via librarian "pdf-decoder:{id}": { input: flow("document-load:{id}"), output: flow("text-document-load:{id}"), triples: flow("triples-store:{id}"), "librarian-request": request("librarian"), "librarian-response": response("librarian"), }, // Chunker splits documents into smaller, processable pieces // Also emits chunk provenance triples and saves chunks via librarian "chunker:{id}": { input: flow("text-document-load:{id}"), output: flow("chunk-load:{id}"), triples: flow("triples-store:{id}"), "librarian-request": request("librarian"), "librarian-response": response("librarian"), "chunk-size": "{chunk-size}", "chunk-overlap": "{chunk-overlap}", }, }, // Blueprint-level processors for document loading services "blueprint" +: { }, }