Feature/configure flows (#345)

- Keeps processing in different flows separate so that data can go to different stores / collections etc.
- Potentially supports different processing flows
- Tidies the processing API with common base-classes for e.g. LLMs, and automatic configuration of 'clients' to use the right queue names in a flow
This commit is contained in:
cybermaggedon 2025-04-22 20:21:38 +01:00 committed by GitHub
parent a06a814a41
commit a9197d11ee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
125 changed files with 3751 additions and 2628 deletions

View file

@ -1,47 +1,52 @@
import queue
from pulsar.schema import JsonSchema
import asyncio
import time
import pulsar
import threading
class Publisher:
def __init__(self, pulsar_client, topic, schema=None, max_size=10,
def __init__(self, client, topic, schema=None, max_size=10,
chunking_enabled=True):
self.client = pulsar_client
self.client = client
self.topic = topic
self.schema = schema
self.q = queue.Queue(maxsize=max_size)
self.q = asyncio.Queue(maxsize=max_size)
self.chunking_enabled = chunking_enabled
self.running = True
def start(self):
self.task = threading.Thread(target=self.run)
self.task.start()
async def start(self):
self.task = asyncio.create_task(self.run())
def stop(self):
async def stop(self):
self.running = False
def join(self):
self.stop()
self.task.join()
async def join(self):
await self.stop()
await self.task
def run(self):
async def run(self):
while self.running:
try:
producer = self.client.create_producer(
topic=self.topic,
schema=self.schema,
schema=JsonSchema(self.schema),
chunking_enabled=self.chunking_enabled,
)
while self.running:
try:
id, item = self.q.get(timeout=0.5)
except queue.Empty:
id, item = await asyncio.wait_for(
self.q.get(),
timeout=0.5
)
except asyncio.TimeoutError:
continue
except asyncio.QueueEmpty:
continue
if id:
@ -55,7 +60,6 @@ class Publisher:
# If handler drops out, sleep a retry
time.sleep(2)
def send(self, id, msg):
self.q.put((id, msg))
async def send(self, id, item):
await self.q.put((id, item))