Entity & triple batch size limits (#635)

* Entities and triples are emitted in batches with a batch limit to manage
overloading downstream.

* Update tests
This commit is contained in:
cybermaggedon 2026-02-16 17:38:03 +00:00 committed by GitHub
parent fe389354f6
commit d886358be6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 79 additions and 15 deletions

View file

@ -16,12 +16,14 @@ import logging
logger = logging.getLogger(__name__)
default_ident = "graph-embeddings"
default_batch_size = 5
class Processor(FlowProcessor):
def __init__(self, **params):
id = params.get("id")
self.batch_size = params.get("batch_size", default_batch_size)
super(Processor, self).__init__(
**params | {
@ -73,12 +75,13 @@ class Processor(FlowProcessor):
)
)
if entities:
# Send in batches to avoid oversized messages
for i in range(0, len(entities), self.batch_size):
batch = entities[i:i + self.batch_size]
r = GraphEmbeddings(
metadata=v.metadata,
entities=entities,
entities=batch,
)
await flow("output").send(r)
except Exception as e:
@ -92,6 +95,13 @@ class Processor(FlowProcessor):
@staticmethod
def add_args(parser):
parser.add_argument(
'--batch-size',
type=int,
default=default_batch_size,
help=f'Maximum entities per output message (default: {default_batch_size})'
)
FlowProcessor.add_args(parser)
def run():