Per-flow librarian clients and per-workspace response queues (#865)

Replace singleton LibrarianClient with per-flow instances via the new LibrarianSpec, giving each flow its own librarian tied to the workspace-scoped request/response queues from the blueprint. Move all workspace-scoped services (config, flow, librarian, knowledge) from a single base-queue response producer to per-workspace response producers created alongside the existing per-workspace request consumers. Update the gateway dispatcher and bootstrapper flow client to subscribe to the matching workspace-scoped response queues. Fix WorkspaceInit to register workspaces through the IAM create-workspace API so they appear in __workspaces__ and are visible to the gateway. Simplify the bootstrapper gate to only check config-svc reachability. Updated tests accordingly.
2026-06-30 08:59:37 +02:00 · 2026-05-06 12:01:01 +01:00 · 2026-05-06 12:01:01 +01:00 · 03cc5ac80f
commit 03cc5ac80f
parent 01bf1d89d5
30 changed files with 405 additions and 735 deletions
--- a/trustgraph-unstructured/trustgraph/decoding/universal/processor.py
+++ b/trustgraph-unstructured/trustgraph/decoding/universal/processor.py
@ -23,9 +23,8 @@ import os
 from unstructured.partition.auto import partition

 from ... schema import Document, TextDocument, Metadata
-from ... schema import librarian_request_queue, librarian_response_queue
 from ... schema import Triples
-from ... base import FlowProcessor, ConsumerSpec, ProducerSpec, LibrarianClient
+from ... base import FlowProcessor, ConsumerSpec, ProducerSpec, LibrarianSpec

 from ... provenance import (
    document_uri, page_uri as make_page_uri,
@ -44,9 +43,6 @@ logger = logging.getLogger(__name__)

 default_ident = "document-decoder"

-default_librarian_request_queue = librarian_request_queue
-default_librarian_response_queue = librarian_response_queue
-
 # Mime type to unstructured content_type mapping
 # unstructured auto-detects most formats, but we pass the hint when available
 MIME_EXTENSIONS = {
@ -162,17 +158,12 @@ class Processor(FlowProcessor):
            )
        )

-        # Librarian client
-        self.librarian = LibrarianClient(
-            id=id, backend=self.pubsub, taskgroup=self.taskgroup,
+        self.register_specification(
+            LibrarianSpec()
        )

        logger.info("Universal decoder initialized")

-    async def start(self):
-        await super(Processor, self).start()
-        await self.librarian.start()
-
    def extract_elements(self, blob, mime_type=None):
        """
        Extract elements from a document using unstructured.
@ -272,10 +263,9 @@ class Processor(FlowProcessor):
        page_content = text.encode("utf-8")

        # Save to librarian
-        await self.librarian.save_child_document(
+        await flow.librarian.save_child_document(
            doc_id=doc_id,
            parent_id=parent_doc_id,
-            workspace=flow.workspace,
            content=page_content,
            document_type="page" if is_page else "section",
            title=label,
@ -351,10 +341,9 @@ class Processor(FlowProcessor):

        # Save to librarian
        if img_content:
-            await self.librarian.save_child_document(
+            await flow.librarian.save_child_document(
                doc_id=img_uri,
                parent_id=parent_doc_id,
-                workspace=flow.workspace,
                content=img_content,
                document_type="image",
                title=f"Image from page {page_number}" if page_number else "Image",
@ -399,15 +388,13 @@ class Processor(FlowProcessor):
                f"Fetching document {v.document_id} from librarian..."
            )

-            doc_meta = await self.librarian.fetch_document_metadata(
+            doc_meta = await flow.librarian.fetch_document_metadata(
                document_id=v.document_id,
-                workspace=flow.workspace,
            )
            mime_type = doc_meta.kind if doc_meta else None

-            content = await self.librarian.fetch_document_content(
+            content = await flow.librarian.fetch_document_content(
                document_id=v.document_id,
-                workspace=flow.workspace,
            )

            if isinstance(content, str):
@ -571,19 +558,6 @@ class Processor(FlowProcessor):
            help='Apply section strategy within pages too (default: false)',
        )

-        parser.add_argument(
-            '--librarian-request-queue',
-            default=default_librarian_request_queue,
-            help=f'Librarian request queue '
-                 f'(default: {default_librarian_request_queue})',
-        )
-
-        parser.add_argument(
-            '--librarian-response-queue',
-            default=default_librarian_response_queue,
-            help=f'Librarian response queue '
-                 f'(default: {default_librarian_response_queue})',
-        )


 def run():