Fix/chunking not enabled (#364)

* Enable chunking by default in producer * Fix some issues including uploading large docs
2026-06-21 20:58:06 +02:00 · 2025-05-06 00:28:20 +01:00 · 2025-05-06 00:28:20 +01:00 · 9e4eb634a4
commit 9e4eb634a4
parent 844547ab5f
8 changed files with 17 additions and 6 deletions
--- a/trustgraph-base/trustgraph/base/producer.py
+++ b/trustgraph-base/trustgraph/base/producer.py
@ -4,7 +4,9 @@ import asyncio

 class Producer:

-    def __init__(self, client, topic, schema, metrics=None):
+    def __init__(self, client, topic, schema, metrics=None,
+                 chunking_enabled=True):
+
        self.client = client
        self.topic = topic
        self.schema = schema
@ -14,6 +16,8 @@ class Producer:
        self.running = True
        self.producer = None

+        self.chunking_enabled = chunking_enabled
+
    def __del__(self):

        self.running = False
@ -38,7 +42,8 @@ class Producer:
                print("Connect publisher to", self.topic, "...", flush=True)
                self.producer = self.client.create_producer(
                    topic = self.topic,
-                    schema = JsonSchema(self.schema)
+                    schema = JsonSchema(self.schema),
+                    chunking_enabled = self.chunking_enabled,
                )
                print("Connected to", self.topic, flush=True)
            except Exception as e:
--- a/trustgraph-base/trustgraph/base/publisher.py
+++ b/trustgraph-base/trustgraph/base/publisher.py
@ -37,6 +37,7 @@ class Publisher:
        while self.running:

            try:
+
                producer = self.client.create_producer(
                    topic=self.topic,
                    schema=JsonSchema(self.schema),
--- a/trustgraph-base/trustgraph/schema/library.py
+++ b/trustgraph-base/trustgraph/schema/library.py
@ -125,10 +125,13 @@ class LibrarianResponse(Record):
    document_metadatas = Array(DocumentMetadata())
    processing_metadatas = Array(ProcessingMetadata())

+# FIXME: Is this right?  Using persistence on librarian so that
+# message chunking works
+
 librarian_request_queue = topic(
-    'librarian', kind='non-persistent', namespace='request'
+    'librarian', kind='persistent', namespace='request'
 )
 librarian_response_queue = topic(
-    'librarian', kind='non-persistent', namespace='response',
+    'librarian', kind='persistent', namespace='response',
 )