feat: add document pipeline — PDF decoder, Ollama LLM, storage services

Add end-to-end document processing pipeline: - PDF decoder service (pdfjs-dist) extracts text per page from librarian docs - Ollama native LLM service for local model inference - FalkorDB triples store FlowProcessor consumer - Qdrant graph embeddings store FlowProcessor consumer - Fix spec name collisions in chunker/extractor (input→chunk-input, etc.) - Gateway /load endpoint to trigger document processing - Align flow manager blueprint and seed config with full pipeline topics - Add runner scripts and test coverage for document load Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-07-02 22:41:01 +02:00 · 2026-04-06 23:47:43 -05:00 · 2026-04-06 23:47:43 -05:00 · 8f7008822a
commit 8f7008822a
parent 8f9de7604e
20 changed files with 894 additions and 37 deletions
--- a/ts/packages/flow/package.json
+++ b/ts/packages/flow/package.json
@ -11,13 +11,15 @@
    "test": "vitest run"
  },
  "dependencies": {
-    "@trustgraph/base": "workspace:*",
-    "openai": "^4.85.0",
    "@anthropic-ai/sdk": "^0.39.0",
+    "@fastify/websocket": "^11.0.0",
    "@qdrant/js-client-rest": "^1.13.0",
+    "@trustgraph/base": "workspace:*",
    "falkordb": "^5.0.0",
    "fastify": "^5.2.0",
-    "@fastify/websocket": "^11.0.0"
+    "ollama": "^0.6.3",
+    "openai": "^4.85.0",
+    "pdfjs-dist": "^5.6.205"
  },
  "devDependencies": {
    "typescript": "^5.8.0",