mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-03 15:01:00 +02:00
Entity & triple batch size limits (#635)
* Entities and triples are emitted in batches with a batch limit to manage overloading downstream. * Update tests
This commit is contained in:
parent
fe389354f6
commit
d886358be6
5 changed files with 79 additions and 15 deletions
|
|
@ -16,12 +16,14 @@ import logging
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "graph-embeddings"
|
||||
default_batch_size = 5
|
||||
|
||||
class Processor(FlowProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
id = params.get("id")
|
||||
self.batch_size = params.get("batch_size", default_batch_size)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
|
|
@ -73,12 +75,13 @@ class Processor(FlowProcessor):
|
|||
)
|
||||
)
|
||||
|
||||
if entities:
|
||||
# Send in batches to avoid oversized messages
|
||||
for i in range(0, len(entities), self.batch_size):
|
||||
batch = entities[i:i + self.batch_size]
|
||||
r = GraphEmbeddings(
|
||||
metadata=v.metadata,
|
||||
entities=entities,
|
||||
entities=batch,
|
||||
)
|
||||
|
||||
await flow("output").send(r)
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -92,6 +95,13 @@ class Processor(FlowProcessor):
|
|||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
parser.add_argument(
|
||||
'--batch-size',
|
||||
type=int,
|
||||
default=default_batch_size,
|
||||
help=f'Maximum entities per output message (default: {default_batch_size})'
|
||||
)
|
||||
|
||||
FlowProcessor.add_args(parser)
|
||||
|
||||
def run():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue