Librarian (#304)

This commit is contained in:
cybermaggedon 2025-02-11 16:01:03 +00:00 committed by GitHub
parent e99c0ac238
commit a0bf2362f6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
32 changed files with 922 additions and 66 deletions

View file

@ -3,4 +3,6 @@ from . base_processor import BaseProcessor
from . consumer import Consumer
from . producer import Producer
from . consumer_producer import ConsumerProducer
from . publisher import Publisher
from . subscriber import Subscriber

View file

@ -28,15 +28,19 @@ class BaseProcessor:
})
pulsar_host = params.get("pulsar_host", self.default_pulsar_host)
pulsar_listener = params.get("pulsar_listener", None)
log_level = params.get("log_level", LogLevel.INFO)
self.pulsar_host = pulsar_host
self.client = pulsar.Client(
pulsar_host,
listener_name=pulsar_listener,
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
)
self.pulsar_listener = pulsar_listener
def __del__(self):
if hasattr(self, "client"):
@ -52,6 +56,11 @@ class BaseProcessor:
help=f'Pulsar host (default: {__class__.default_pulsar_host})',
)
parser.add_argument(
'--pulsar-listener',
help=f'Pulsar listener (default: none)',
)
parser.add_argument(
'-l', '--log-level',
type=LogLevel,

View file

@ -0,0 +1,67 @@
import queue
import time
import pulsar
import threading
class Publisher:
def __init__(self, pulsar_host, topic, schema=None, max_size=10,
chunking_enabled=True, listener=None):
self.pulsar_host = pulsar_host
self.topic = topic
self.schema = schema
self.q = queue.Queue(maxsize=max_size)
self.chunking_enabled = chunking_enabled
self.listener_name = listener
self.running = True
def start(self):
self.task = threading.Thread(target=self.run)
self.task.start()
def stop(self):
self.running = False
def join(self):
self.stop()
self.task.join()
def run(self):
while self.running:
try:
client = pulsar.Client(
self.pulsar_host, listener_name=self.listener_name
)
producer = client.create_producer(
topic=self.topic,
schema=self.schema,
chunking_enabled=self.chunking_enabled,
)
while self.running:
try:
id, item = self.q.get(timeout=0.5)
except queue.Empty:
continue
if id:
producer.send(item, { "id": id })
else:
producer.send(item)
except Exception as e:
print("Exception:", e, flush=True)
# If handler drops out, sleep a retry
time.sleep(2)
def send(self, id, msg):
self.q.put((id, msg))

View file

@ -0,0 +1,120 @@
import queue
import pulsar
import threading
import time
class Subscriber:
def __init__(self, pulsar_host, topic, subscription, consumer_name,
schema=None, max_size=100, listener=None):
self.pulsar_host = pulsar_host
self.topic = topic
self.subscription = subscription
self.consumer_name = consumer_name
self.schema = schema
self.q = {}
self.full = {}
self.max_size = max_size
self.lock = threading.Lock()
self.listener_name = listener
self.running = True
def start(self):
self.task = threading.Thread(target=self.run)
self.task.start()
def stop(self):
self.running = False
def join(self):
self.task.join()
def run(self):
while self.running:
try:
client = pulsar.Client(
self.pulsar_host,
listener_name=self.listener_name,
)
consumer = client.subscribe(
topic=self.topic,
subscription_name=self.subscription,
consumer_name=self.consumer_name,
schema=self.schema,
)
while self.running:
msg = consumer.receive()
# Acknowledge successful reception of the message
consumer.acknowledge(msg)
try:
id = msg.properties()["id"]
except:
id = None
value = msg.value()
with self.lock:
if id in self.q:
try:
# FIXME: Timeout means data goes missing
self.q[id].put(value, timeout=0.5)
except:
pass
for q in self.full.values():
try:
# FIXME: Timeout means data goes missing
q.put(value, timeout=0.5)
except:
pass
except Exception as e:
print("Exception:", e, flush=True)
# If handler drops out, sleep a retry
time.sleep(2)
def subscribe(self, id):
with self.lock:
q = queue.Queue(maxsize=self.max_size)
self.q[id] = q
return q
def unsubscribe(self, id):
with self.lock:
if id in self.q:
# self.q[id].shutdown(immediate=True)
del self.q[id]
def subscribe_all(self, id):
with self.lock:
q = queue.Queue(maxsize=self.max_size)
self.full[id] = q
return q
def unsubscribe_all(self, id):
with self.lock:
if id in self.full:
# self.full[id].shutdown(immediate=True)
del self.full[id]