mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-16 19:05:14 +02:00
- Keeps processing in different flows separate so that data can go to different stores / collections etc. - Potentially supports different processing flows - Tidies the processing API with common base-classes for e.g. LLMs, and automatic configuration of 'clients' to use the right queue names in a flow
65 lines
1.7 KiB
Python
65 lines
1.7 KiB
Python
|
|
from pulsar.schema import JsonSchema
|
|
|
|
import asyncio
|
|
import time
|
|
import pulsar
|
|
|
|
class Publisher:
|
|
|
|
def __init__(self, client, topic, schema=None, max_size=10,
|
|
chunking_enabled=True):
|
|
self.client = client
|
|
self.topic = topic
|
|
self.schema = schema
|
|
self.q = asyncio.Queue(maxsize=max_size)
|
|
self.chunking_enabled = chunking_enabled
|
|
self.running = True
|
|
|
|
async def start(self):
|
|
self.task = asyncio.create_task(self.run())
|
|
|
|
async def stop(self):
|
|
self.running = False
|
|
|
|
async def join(self):
|
|
await self.stop()
|
|
await self.task
|
|
|
|
async def run(self):
|
|
|
|
while self.running:
|
|
|
|
try:
|
|
producer = self.client.create_producer(
|
|
topic=self.topic,
|
|
schema=JsonSchema(self.schema),
|
|
chunking_enabled=self.chunking_enabled,
|
|
)
|
|
|
|
while self.running:
|
|
|
|
try:
|
|
id, item = await asyncio.wait_for(
|
|
self.q.get(),
|
|
timeout=0.5
|
|
)
|
|
except asyncio.TimeoutError:
|
|
continue
|
|
except asyncio.QueueEmpty:
|
|
continue
|
|
|
|
if id:
|
|
producer.send(item, { "id": id })
|
|
else:
|
|
producer.send(item)
|
|
|
|
except Exception as e:
|
|
print("Exception:", e, flush=True)
|
|
|
|
# If handler drops out, sleep a retry
|
|
time.sleep(2)
|
|
|
|
async def send(self, id, item):
|
|
await self.q.put((id, item))
|
|
|