mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 00:46:22 +02:00
350 lines
12 KiB
Python
350 lines
12 KiB
Python
"""
|
|
Pulsar backend implementation for pub/sub abstraction.
|
|
|
|
This module provides a Pulsar-specific implementation of the backend interfaces,
|
|
handling topic mapping, serialization, and Pulsar client management.
|
|
"""
|
|
|
|
import pulsar
|
|
import _pulsar
|
|
import json
|
|
import logging
|
|
import base64
|
|
import types
|
|
from dataclasses import asdict, is_dataclass
|
|
from typing import Any
|
|
|
|
from .backend import PubSubBackend, BackendProducer, BackendConsumer, Message
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def dataclass_to_dict(obj: Any) -> dict:
|
|
"""
|
|
Recursively convert a dataclass to a dictionary, handling None values and bytes.
|
|
|
|
None values are excluded from the dictionary (not serialized).
|
|
Bytes values are decoded as UTF-8 strings for JSON serialization (matching Pulsar behavior).
|
|
Handles nested dataclasses, lists, and dictionaries recursively.
|
|
"""
|
|
if obj is None:
|
|
return None
|
|
|
|
# Handle bytes - decode to UTF-8 for JSON serialization
|
|
if isinstance(obj, bytes):
|
|
return obj.decode('utf-8')
|
|
|
|
# Handle dataclass - convert to dict then recursively process all values
|
|
if is_dataclass(obj):
|
|
result = {}
|
|
for key, value in asdict(obj).items():
|
|
result[key] = dataclass_to_dict(value) if value is not None else None
|
|
return result
|
|
|
|
# Handle list - recursively process all items
|
|
if isinstance(obj, list):
|
|
return [dataclass_to_dict(item) for item in obj]
|
|
|
|
# Handle dict - recursively process all values
|
|
if isinstance(obj, dict):
|
|
return {k: dataclass_to_dict(v) for k, v in obj.items()}
|
|
|
|
# Return primitive types as-is
|
|
return obj
|
|
|
|
|
|
def dict_to_dataclass(data: dict, cls: type) -> Any:
|
|
"""
|
|
Convert a dictionary back to a dataclass instance.
|
|
|
|
Handles nested dataclasses and missing fields.
|
|
"""
|
|
if data is None:
|
|
return None
|
|
|
|
if not is_dataclass(cls):
|
|
return data
|
|
|
|
# Get field types from the dataclass
|
|
field_types = {f.name: f.type for f in cls.__dataclass_fields__.values()}
|
|
kwargs = {}
|
|
|
|
for key, value in data.items():
|
|
if key in field_types:
|
|
field_type = field_types[key]
|
|
|
|
# Handle modern union types (X | Y)
|
|
if isinstance(field_type, types.UnionType):
|
|
# Check if it's Optional (X | None)
|
|
if type(None) in field_type.__args__:
|
|
# Get the non-None type
|
|
actual_type = next((t for t in field_type.__args__ if t is not type(None)), None)
|
|
if actual_type and is_dataclass(actual_type) and isinstance(value, dict):
|
|
kwargs[key] = dict_to_dataclass(value, actual_type)
|
|
else:
|
|
kwargs[key] = value
|
|
else:
|
|
kwargs[key] = value
|
|
# Check if this is a generic type (list, dict, etc.)
|
|
elif hasattr(field_type, '__origin__'):
|
|
# Handle list[T]
|
|
if field_type.__origin__ == list:
|
|
item_type = field_type.__args__[0] if field_type.__args__ else None
|
|
if item_type and is_dataclass(item_type) and isinstance(value, list):
|
|
kwargs[key] = [
|
|
dict_to_dataclass(item, item_type) if isinstance(item, dict) else item
|
|
for item in value
|
|
]
|
|
else:
|
|
kwargs[key] = value
|
|
# Handle old-style Optional[T] (which is Union[T, None])
|
|
elif hasattr(field_type, '__args__') and type(None) in field_type.__args__:
|
|
# Get the non-None type from Union
|
|
actual_type = next((t for t in field_type.__args__ if t is not type(None)), None)
|
|
if actual_type and is_dataclass(actual_type) and isinstance(value, dict):
|
|
kwargs[key] = dict_to_dataclass(value, actual_type)
|
|
else:
|
|
kwargs[key] = value
|
|
else:
|
|
kwargs[key] = value
|
|
# Handle direct dataclass fields
|
|
elif is_dataclass(field_type) and isinstance(value, dict):
|
|
kwargs[key] = dict_to_dataclass(value, field_type)
|
|
# Handle bytes fields (UTF-8 encoded strings from JSON)
|
|
elif field_type == bytes and isinstance(value, str):
|
|
kwargs[key] = value.encode('utf-8')
|
|
else:
|
|
kwargs[key] = value
|
|
|
|
return cls(**kwargs)
|
|
|
|
|
|
class PulsarMessage:
|
|
"""Wrapper for Pulsar messages to match Message protocol."""
|
|
|
|
def __init__(self, pulsar_msg, schema_cls):
|
|
self._msg = pulsar_msg
|
|
self._schema_cls = schema_cls
|
|
self._value = None
|
|
|
|
def value(self) -> Any:
|
|
"""Deserialize and return the message value as a dataclass."""
|
|
if self._value is None:
|
|
# Get JSON string from Pulsar message
|
|
json_data = self._msg.data().decode('utf-8')
|
|
data_dict = json.loads(json_data)
|
|
# Convert to dataclass
|
|
self._value = dict_to_dataclass(data_dict, self._schema_cls)
|
|
return self._value
|
|
|
|
def properties(self) -> dict:
|
|
"""Return message properties."""
|
|
return self._msg.properties()
|
|
|
|
|
|
class PulsarBackendProducer:
|
|
"""Pulsar-specific producer implementation."""
|
|
|
|
def __init__(self, pulsar_producer, schema_cls):
|
|
self._producer = pulsar_producer
|
|
self._schema_cls = schema_cls
|
|
|
|
def send(self, message: Any, properties: dict = {}) -> None:
|
|
"""Send a dataclass message."""
|
|
# Convert dataclass to dict, excluding None values
|
|
data_dict = dataclass_to_dict(message)
|
|
# Serialize to JSON
|
|
json_data = json.dumps(data_dict)
|
|
# Send via Pulsar
|
|
self._producer.send(json_data.encode('utf-8'), properties=properties)
|
|
|
|
def flush(self) -> None:
|
|
"""Flush buffered messages."""
|
|
self._producer.flush()
|
|
|
|
def close(self) -> None:
|
|
"""Close the producer."""
|
|
self._producer.close()
|
|
|
|
|
|
class PulsarBackendConsumer:
|
|
"""Pulsar-specific consumer implementation."""
|
|
|
|
def __init__(self, pulsar_consumer, schema_cls):
|
|
self._consumer = pulsar_consumer
|
|
self._schema_cls = schema_cls
|
|
|
|
def receive(self, timeout_millis: int = 2000) -> Message:
|
|
"""Receive a message."""
|
|
pulsar_msg = self._consumer.receive(timeout_millis=timeout_millis)
|
|
return PulsarMessage(pulsar_msg, self._schema_cls)
|
|
|
|
def acknowledge(self, message: Message) -> None:
|
|
"""Acknowledge a message."""
|
|
if isinstance(message, PulsarMessage):
|
|
self._consumer.acknowledge(message._msg)
|
|
|
|
def negative_acknowledge(self, message: Message) -> None:
|
|
"""Negative acknowledge a message."""
|
|
if isinstance(message, PulsarMessage):
|
|
self._consumer.negative_acknowledge(message._msg)
|
|
|
|
def unsubscribe(self) -> None:
|
|
"""Unsubscribe from the topic."""
|
|
self._consumer.unsubscribe()
|
|
|
|
def close(self) -> None:
|
|
"""Close the consumer."""
|
|
self._consumer.close()
|
|
|
|
|
|
class PulsarBackend:
|
|
"""
|
|
Pulsar backend implementation.
|
|
|
|
Handles topic mapping, client management, and creation of Pulsar-specific
|
|
producers and consumers.
|
|
"""
|
|
|
|
def __init__(self, host: str, api_key: str = None, listener: str = None):
|
|
"""
|
|
Initialize Pulsar backend.
|
|
|
|
Args:
|
|
host: Pulsar broker URL (e.g., pulsar://localhost:6650)
|
|
api_key: Optional API key for authentication
|
|
listener: Optional listener name for multi-homed setups
|
|
"""
|
|
self.host = host
|
|
self.api_key = api_key
|
|
self.listener = listener
|
|
|
|
# Create Pulsar client
|
|
client_args = {'service_url': host}
|
|
|
|
if listener:
|
|
client_args['listener_name'] = listener
|
|
|
|
if api_key:
|
|
client_args['authentication'] = pulsar.AuthenticationToken(api_key)
|
|
|
|
self.client = pulsar.Client(**client_args)
|
|
logger.info(f"Pulsar client connected to {host}")
|
|
|
|
def map_topic(self, generic_topic: str) -> str:
|
|
"""
|
|
Map generic topic format to Pulsar URI.
|
|
|
|
Format: qos/tenant/namespace/queue
|
|
Example: q1/tg/flow/my-queue -> persistent://tg/flow/my-queue
|
|
|
|
Args:
|
|
generic_topic: Generic topic string or already-formatted Pulsar URI
|
|
|
|
Returns:
|
|
Pulsar topic URI
|
|
"""
|
|
# If already a Pulsar URI, return as-is
|
|
if '://' in generic_topic:
|
|
return generic_topic
|
|
|
|
parts = generic_topic.split('/', 3)
|
|
if len(parts) != 4:
|
|
raise ValueError(f"Invalid topic format: {generic_topic}, expected qos/tenant/namespace/queue")
|
|
|
|
qos, tenant, namespace, queue = parts
|
|
|
|
# Map QoS to persistence
|
|
if qos == 'q0':
|
|
persistence = 'non-persistent'
|
|
elif qos in ['q1', 'q2']:
|
|
persistence = 'persistent'
|
|
else:
|
|
raise ValueError(f"Invalid QoS level: {qos}, expected q0, q1, or q2")
|
|
|
|
return f"{persistence}://{tenant}/{namespace}/{queue}"
|
|
|
|
def create_producer(self, topic: str, schema: type, **options) -> BackendProducer:
|
|
"""
|
|
Create a Pulsar producer.
|
|
|
|
Args:
|
|
topic: Generic topic format (qos/tenant/namespace/queue)
|
|
schema: Dataclass type for messages
|
|
**options: Backend-specific options (e.g., chunking_enabled)
|
|
|
|
Returns:
|
|
PulsarBackendProducer instance
|
|
"""
|
|
pulsar_topic = self.map_topic(topic)
|
|
|
|
producer_args = {
|
|
'topic': pulsar_topic,
|
|
'schema': pulsar.schema.BytesSchema(), # We handle serialization ourselves
|
|
}
|
|
|
|
# Add optional parameters
|
|
if 'chunking_enabled' in options:
|
|
producer_args['chunking_enabled'] = options['chunking_enabled']
|
|
|
|
pulsar_producer = self.client.create_producer(**producer_args)
|
|
logger.debug(f"Created producer for topic: {pulsar_topic}")
|
|
|
|
return PulsarBackendProducer(pulsar_producer, schema)
|
|
|
|
def create_consumer(
|
|
self,
|
|
topic: str,
|
|
subscription: str,
|
|
schema: type,
|
|
initial_position: str = 'latest',
|
|
consumer_type: str = 'shared',
|
|
**options
|
|
) -> BackendConsumer:
|
|
"""
|
|
Create a Pulsar consumer.
|
|
|
|
Args:
|
|
topic: Generic topic format (qos/tenant/namespace/queue)
|
|
subscription: Subscription name
|
|
schema: Dataclass type for messages
|
|
initial_position: 'earliest' or 'latest'
|
|
consumer_type: 'shared', 'exclusive', or 'failover'
|
|
**options: Backend-specific options
|
|
|
|
Returns:
|
|
PulsarBackendConsumer instance
|
|
"""
|
|
pulsar_topic = self.map_topic(topic)
|
|
|
|
# Map initial position
|
|
if initial_position == 'earliest':
|
|
pos = pulsar.InitialPosition.Earliest
|
|
else:
|
|
pos = pulsar.InitialPosition.Latest
|
|
|
|
# Map consumer type
|
|
if consumer_type == 'exclusive':
|
|
ctype = pulsar.ConsumerType.Exclusive
|
|
elif consumer_type == 'failover':
|
|
ctype = pulsar.ConsumerType.Failover
|
|
else:
|
|
ctype = pulsar.ConsumerType.Shared
|
|
|
|
consumer_args = {
|
|
'topic': pulsar_topic,
|
|
'subscription_name': subscription,
|
|
'schema': pulsar.schema.BytesSchema(), # We handle deserialization ourselves
|
|
'initial_position': pos,
|
|
'consumer_type': ctype,
|
|
}
|
|
|
|
pulsar_consumer = self.client.subscribe(**consumer_args)
|
|
logger.debug(f"Created consumer for topic: {pulsar_topic}, subscription: {subscription}")
|
|
|
|
return PulsarBackendConsumer(pulsar_consumer, schema)
|
|
|
|
def close(self) -> None:
|
|
"""Close the Pulsar client."""
|
|
self.client.close()
|
|
logger.info("Pulsar client closed")
|