Fix/chunking not enabled (#364)

* Enable chunking by default in producer

* Fix some issues including uploading large docs
This commit is contained in:
cybermaggedon 2025-05-06 00:28:20 +01:00 committed by GitHub
parent 844547ab5f
commit 9e4eb634a4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 17 additions and 6 deletions

View file

@ -4,7 +4,9 @@ import asyncio
class Producer:
def __init__(self, client, topic, schema, metrics=None):
def __init__(self, client, topic, schema, metrics=None,
chunking_enabled=True):
self.client = client
self.topic = topic
self.schema = schema
@ -14,6 +16,8 @@ class Producer:
self.running = True
self.producer = None
self.chunking_enabled = chunking_enabled
def __del__(self):
self.running = False
@ -38,7 +42,8 @@ class Producer:
print("Connect publisher to", self.topic, "...", flush=True)
self.producer = self.client.create_producer(
topic = self.topic,
schema = JsonSchema(self.schema)
schema = JsonSchema(self.schema),
chunking_enabled = self.chunking_enabled,
)
print("Connected to", self.topic, flush=True)
except Exception as e:

View file

@ -37,6 +37,7 @@ class Publisher:
while self.running:
try:
producer = self.client.create_producer(
topic=self.topic,
schema=JsonSchema(self.schema),

View file

@ -125,10 +125,13 @@ class LibrarianResponse(Record):
document_metadatas = Array(DocumentMetadata())
processing_metadatas = Array(ProcessingMetadata())
# FIXME: Is this right? Using persistence on librarian so that
# message chunking works
librarian_request_queue = topic(
'librarian', kind='non-persistent', namespace='request'
'librarian', kind='persistent', namespace='request'
)
librarian_response_queue = topic(
'librarian', kind='non-persistent', namespace='response',
'librarian', kind='persistent', namespace='response',
)

1
trustgraph-cli/scripts/tg-show-library-documents Normal file → Executable file
View file

@ -36,6 +36,7 @@ def show_docs(url, user):
table,
tablefmt="pretty",
stralign="left",
maxcolwidths=[None, 55],
))
print()

1
trustgraph-cli/scripts/tg-show-library-processing Normal file → Executable file
View file

@ -36,6 +36,7 @@ def show_procs(url, user):
table,
tablefmt="pretty",
stralign="left",
maxcolwidths=[None, 50],
))
print()

0
trustgraph-cli/scripts/tg-start-library-processing Normal file → Executable file
View file

0
trustgraph-cli/scripts/tg-stop-library-processing Normal file → Executable file
View file

View file

@ -105,7 +105,7 @@ def serialize_document_metadata(message):
if message.user:
ret["user"] = message.user
if message.tags:
if message.tags is not None:
ret["tags"] = message.tags
return ret
@ -132,7 +132,7 @@ def serialize_processing_metadata(message):
if message.collection:
ret["collection"] = message.collection
if message.tags:
if message.tags is not None:
ret["tags"] = message.tags
return ret