Fix/chunking not enabled (#364)

* Enable chunking by default in producer

* Fix some issues including uploading large docs
This commit is contained in:
cybermaggedon 2025-05-06 00:28:20 +01:00 committed by GitHub
parent 844547ab5f
commit 9e4eb634a4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 17 additions and 6 deletions

View file

@ -4,7 +4,9 @@ import asyncio
class Producer:
def __init__(self, client, topic, schema, metrics=None):
def __init__(self, client, topic, schema, metrics=None,
chunking_enabled=True):
self.client = client
self.topic = topic
self.schema = schema
@ -14,6 +16,8 @@ class Producer:
self.running = True
self.producer = None
self.chunking_enabled = chunking_enabled
def __del__(self):
self.running = False
@ -38,7 +42,8 @@ class Producer:
print("Connect publisher to", self.topic, "...", flush=True)
self.producer = self.client.create_producer(
topic = self.topic,
schema = JsonSchema(self.schema)
schema = JsonSchema(self.schema),
chunking_enabled = self.chunking_enabled,
)
print("Connected to", self.topic, flush=True)
except Exception as e:

View file

@ -37,6 +37,7 @@ class Publisher:
while self.running:
try:
producer = self.client.create_producer(
topic=self.topic,
schema=JsonSchema(self.schema),

View file

@ -125,10 +125,13 @@ class LibrarianResponse(Record):
document_metadatas = Array(DocumentMetadata())
processing_metadatas = Array(ProcessingMetadata())
# FIXME: Is this right? Using persistence on librarian so that
# message chunking works
librarian_request_queue = topic(
'librarian', kind='non-persistent', namespace='request'
'librarian', kind='persistent', namespace='request'
)
librarian_response_queue = topic(
'librarian', kind='non-persistent', namespace='response',
'librarian', kind='persistent', namespace='response',
)