Messaging fabric plugins (#592)

* Plugin architecture for messaging fabric

* Schemas use a technology neutral expression

* Schemas strictness has uncovered some incorrect schema use which is fixed
This commit is contained in:
cybermaggedon 2025-12-17 21:40:43 +00:00 committed by GitHub
parent 1865b3f3c8
commit 34eb083836
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
100 changed files with 2342 additions and 828 deletions

View file

@ -159,12 +159,12 @@ class AsyncFlowInstance:
result = await self.request("text-completion", request_data)
return result.get("response", "")
async def graph_rag(self, question: str, user: str, collection: str,
async def graph_rag(self, query: str, user: str, collection: str,
max_subgraph_size: int = 1000, max_subgraph_count: int = 5,
max_entity_distance: int = 3, **kwargs: Any) -> str:
"""Graph RAG (non-streaming, use async_socket for streaming)"""
request_data = {
"question": question,
"query": query,
"user": user,
"collection": collection,
"max-subgraph-size": max_subgraph_size,
@ -177,11 +177,11 @@ class AsyncFlowInstance:
result = await self.request("graph-rag", request_data)
return result.get("response", "")
async def document_rag(self, question: str, user: str, collection: str,
async def document_rag(self, query: str, user: str, collection: str,
doc_limit: int = 10, **kwargs: Any) -> str:
"""Document RAG (non-streaming, use async_socket for streaming)"""
request_data = {
"question": question,
"query": query,
"user": user,
"collection": collection,
"doc-limit": doc_limit,

View file

@ -208,12 +208,12 @@ class AsyncSocketFlowInstance:
if hasattr(chunk, 'content'):
yield chunk.content
async def graph_rag(self, question: str, user: str, collection: str,
async def graph_rag(self, query: str, user: str, collection: str,
max_subgraph_size: int = 1000, max_subgraph_count: int = 5,
max_entity_distance: int = 3, streaming: bool = False, **kwargs):
"""Graph RAG with optional streaming"""
request = {
"question": question,
"query": query,
"user": user,
"collection": collection,
"max-subgraph-size": max_subgraph_size,
@ -235,11 +235,11 @@ class AsyncSocketFlowInstance:
if hasattr(chunk, 'content'):
yield chunk.content
async def document_rag(self, question: str, user: str, collection: str,
async def document_rag(self, query: str, user: str, collection: str,
doc_limit: int = 10, streaming: bool = False, **kwargs):
"""Document RAG with optional streaming"""
request = {
"question": question,
"query": query,
"user": user,
"collection": collection,
"doc-limit": doc_limit,

View file

@ -160,14 +160,14 @@ class FlowInstance:
)["answer"]
def graph_rag(
self, question, user="trustgraph", collection="default",
self, query, user="trustgraph", collection="default",
entity_limit=50, triple_limit=30, max_subgraph_size=150,
max_path_length=2,
):
# The input consists of a question
input = {
"query": question,
"query": query,
"user": user,
"collection": collection,
"entity-limit": entity_limit,
@ -182,13 +182,13 @@ class FlowInstance:
)["response"]
def document_rag(
self, question, user="trustgraph", collection="default",
self, query, user="trustgraph", collection="default",
doc_limit=10,
):
# The input consists of a question
input = {
"query": question,
"query": query,
"user": user,
"collection": collection,
"doc-limit": doc_limit,

View file

@ -284,7 +284,7 @@ class SocketFlowInstance:
def graph_rag(
self,
question: str,
query: str,
user: str,
collection: str,
max_subgraph_size: int = 1000,
@ -295,7 +295,7 @@ class SocketFlowInstance:
) -> Union[str, Iterator[str]]:
"""Graph RAG with optional streaming"""
request = {
"question": question,
"query": query,
"user": user,
"collection": collection,
"max-subgraph-size": max_subgraph_size,
@ -316,7 +316,7 @@ class SocketFlowInstance:
def document_rag(
self,
question: str,
query: str,
user: str,
collection: str,
doc_limit: int = 10,
@ -325,7 +325,7 @@ class SocketFlowInstance:
) -> Union[str, Iterator[str]]:
"""Document RAG with optional streaming"""
request = {
"question": question,
"query": query,
"user": user,
"collection": collection,
"doc-limit": doc_limit,

View file

@ -15,7 +15,7 @@ from prometheus_client import start_http_server, Info
from .. schema import ConfigPush, config_push_queue
from .. log_level import LogLevel
from . pubsub import PulsarClient
from . pubsub import PulsarClient, get_pubsub
from . producer import Producer
from . consumer import Consumer
from . metrics import ProcessorMetrics, ConsumerMetrics
@ -34,8 +34,11 @@ class AsyncProcessor:
# Store the identity
self.id = params.get("id")
# Register a pulsar client
self.pulsar_client_object = PulsarClient(**params)
# Create pub/sub backend via factory
self.pubsub_backend = get_pubsub(**params)
# Store pulsar_host for backward compatibility
self._pulsar_host = params.get("pulsar_host", "pulsar://pulsar:6650")
# Initialise metrics, records the parameters
ProcessorMetrics(processor = self.id).info({
@ -70,7 +73,7 @@ class AsyncProcessor:
self.config_sub_task = Consumer(
taskgroup = self.taskgroup,
client = self.pulsar_client,
backend = self.pubsub_backend, # Changed from client to backend
subscriber = config_subscriber_id,
flow = None,
@ -96,16 +99,16 @@ class AsyncProcessor:
# This is called to stop all threads. An over-ride point for extra
# functionality
def stop(self):
self.pulsar_client.close()
self.pubsub_backend.close()
self.running = False
# Returns the pulsar host
# Returns the pub/sub backend (new interface)
@property
def pulsar_host(self): return self.pulsar_client_object.pulsar_host
def pubsub(self): return self.pubsub_backend
# Returns the pulsar client
# Returns the pulsar host (backward compatibility)
@property
def pulsar_client(self): return self.pulsar_client_object.client
def pulsar_host(self): return self._pulsar_host
# Register a new event handler for configuration change
def register_config_handler(self, handler):
@ -247,6 +250,14 @@ class AsyncProcessor:
@staticmethod
def add_args(parser):
# Pub/sub backend selection
parser.add_argument(
'--pubsub-backend',
default=os.getenv('PUBSUB_BACKEND', 'pulsar'),
choices=['pulsar', 'mqtt'],
help='Pub/sub backend (default: pulsar, env: PUBSUB_BACKEND)',
)
PulsarClient.add_args(parser)
add_logging_args(parser)

View file

@ -0,0 +1,148 @@
"""
Backend abstraction interfaces for pub/sub systems.
This module defines Protocol classes that all pub/sub backends must implement,
allowing TrustGraph to work with different messaging systems (Pulsar, MQTT, Kafka, etc.)
"""
from typing import Protocol, Any, runtime_checkable
@runtime_checkable
class Message(Protocol):
"""Protocol for a received message."""
def value(self) -> Any:
"""
Get the deserialized message content.
Returns:
Dataclass instance representing the message
"""
...
def properties(self) -> dict:
"""
Get message properties/metadata.
Returns:
Dictionary of message properties
"""
...
@runtime_checkable
class BackendProducer(Protocol):
"""Protocol for backend-specific producer."""
def send(self, message: Any, properties: dict = {}) -> None:
"""
Send a message (dataclass instance) with optional properties.
Args:
message: Dataclass instance to send
properties: Optional metadata properties
"""
...
def flush(self) -> None:
"""Flush any buffered messages."""
...
def close(self) -> None:
"""Close the producer."""
...
@runtime_checkable
class BackendConsumer(Protocol):
"""Protocol for backend-specific consumer."""
def receive(self, timeout_millis: int = 2000) -> Message:
"""
Receive a message from the topic.
Args:
timeout_millis: Timeout in milliseconds
Returns:
Message object
Raises:
TimeoutError: If no message received within timeout
"""
...
def acknowledge(self, message: Message) -> None:
"""
Acknowledge successful processing of a message.
Args:
message: The message to acknowledge
"""
...
def negative_acknowledge(self, message: Message) -> None:
"""
Negative acknowledge - triggers redelivery.
Args:
message: The message to negatively acknowledge
"""
...
def unsubscribe(self) -> None:
"""Unsubscribe from the topic."""
...
def close(self) -> None:
"""Close the consumer."""
...
@runtime_checkable
class PubSubBackend(Protocol):
"""Protocol defining the interface all pub/sub backends must implement."""
def create_producer(self, topic: str, schema: type, **options) -> BackendProducer:
"""
Create a producer for a topic.
Args:
topic: Generic topic format (qos/tenant/namespace/queue)
schema: Dataclass type for messages
**options: Backend-specific options (e.g., chunking_enabled)
Returns:
Backend-specific producer instance
"""
...
def create_consumer(
self,
topic: str,
subscription: str,
schema: type,
initial_position: str = 'latest',
consumer_type: str = 'shared',
**options
) -> BackendConsumer:
"""
Create a consumer for a topic.
Args:
topic: Generic topic format (qos/tenant/namespace/queue)
subscription: Subscription/consumer group name
schema: Dataclass type for messages
initial_position: 'earliest' or 'latest' (some backends may ignore)
consumer_type: 'shared', 'exclusive', 'failover' (some backends may ignore)
**options: Backend-specific options
Returns:
Backend-specific consumer instance
"""
...
def close(self) -> None:
"""Close the backend connection."""
...

View file

@ -9,9 +9,6 @@
# one handler, and a single thread of concurrency, nothing too outrageous
# will happen if synchronous / blocking code is used
from pulsar.schema import JsonSchema
import pulsar
import _pulsar
import asyncio
import time
import logging
@ -21,11 +18,15 @@ from .. exceptions import TooManyRequests
# Module logger
logger = logging.getLogger(__name__)
# Timeout exception - can come from different backends
class TimeoutError(Exception):
pass
class Consumer:
def __init__(
self, taskgroup, flow, client, topic, subscriber, schema,
handler,
self, taskgroup, flow, backend, topic, subscriber, schema,
handler,
metrics = None,
start_of_messages=False,
rate_limit_retry_time = 10, rate_limit_timeout = 7200,
@ -35,7 +36,7 @@ class Consumer:
self.taskgroup = taskgroup
self.flow = flow
self.client = client
self.backend = backend # Changed from 'client' to 'backend'
self.topic = topic
self.subscriber = subscriber
self.schema = schema
@ -96,18 +97,20 @@ class Consumer:
logger.info(f"Subscribing to topic: {self.topic}")
# Determine initial position
if self.start_of_messages:
pos = pulsar.InitialPosition.Earliest
initial_pos = 'earliest'
else:
pos = pulsar.InitialPosition.Latest
initial_pos = 'latest'
# Create consumer via backend
self.consumer = await asyncio.to_thread(
self.client.subscribe,
self.backend.create_consumer,
topic = self.topic,
subscription_name = self.subscriber,
schema = JsonSchema(self.schema),
initial_position = pos,
consumer_type = pulsar.ConsumerType.Shared,
subscription = self.subscriber,
schema = self.schema,
initial_position = initial_pos,
consumer_type = 'shared',
)
except Exception as e:
@ -159,9 +162,10 @@ class Consumer:
self.consumer.receive,
timeout_millis=2000
)
except _pulsar.Timeout:
continue
except Exception as e:
# Handle timeout from any backend
if 'timeout' in str(type(e)).lower() or 'timeout' in str(e).lower():
continue
raise e
await self.handle_one_from_queue(msg)

View file

@ -19,7 +19,7 @@ class ConsumerSpec(Spec):
consumer = Consumer(
taskgroup = processor.taskgroup,
flow = flow,
client = processor.pulsar_client,
backend = processor.pubsub,
topic = definition[self.name],
subscriber = processor.id + "--" + flow.name + "--" + self.name,
schema = self.schema,

View file

@ -1,5 +1,4 @@
from pulsar.schema import JsonSchema
import asyncio
import logging
@ -8,10 +7,10 @@ logger = logging.getLogger(__name__)
class Producer:
def __init__(self, client, topic, schema, metrics=None,
def __init__(self, backend, topic, schema, metrics=None,
chunking_enabled=True):
self.client = client
self.backend = backend # Changed from 'client' to 'backend'
self.topic = topic
self.schema = schema
@ -44,9 +43,9 @@ class Producer:
try:
logger.info(f"Connecting publisher to {self.topic}...")
self.producer = self.client.create_producer(
self.producer = self.backend.create_producer(
topic = self.topic,
schema = JsonSchema(self.schema),
schema = self.schema,
chunking_enabled = self.chunking_enabled,
)
logger.info(f"Connected publisher to {self.topic}")

View file

@ -15,7 +15,7 @@ class ProducerSpec(Spec):
)
producer = Producer(
client = processor.pulsar_client,
backend = processor.pubsub,
topic = definition[self.name],
schema = self.schema,
metrics = producer_metrics,

View file

@ -37,21 +37,20 @@ class PromptClient(RequestResponse):
else:
logger.info("DEBUG prompt_client: Streaming path")
# Streaming path - collect all chunks
full_text = ""
full_object = None
# Streaming path - just forward chunks, don't accumulate
last_text = ""
last_object = None
async def collect_chunks(resp):
nonlocal full_text, full_object
logger.info(f"DEBUG prompt_client: collect_chunks called, resp.text={resp.text[:50] if resp.text else None}, end_of_stream={getattr(resp, 'end_of_stream', False)}")
async def forward_chunks(resp):
nonlocal last_text, last_object
logger.info(f"DEBUG prompt_client: forward_chunks called, resp.text={resp.text[:50] if resp.text else None}, end_of_stream={getattr(resp, 'end_of_stream', False)}")
if resp.error:
logger.error(f"DEBUG prompt_client: Error in response: {resp.error.message}")
raise RuntimeError(resp.error.message)
if resp.text:
full_text += resp.text
logger.info(f"DEBUG prompt_client: Accumulated {len(full_text)} chars")
last_text = resp.text
# Call chunk callback if provided
if chunk_callback:
logger.info(f"DEBUG prompt_client: Calling chunk_callback")
@ -61,7 +60,7 @@ class PromptClient(RequestResponse):
chunk_callback(resp.text)
elif resp.object:
logger.info(f"DEBUG prompt_client: Got object response")
full_object = resp.object
last_object = resp.object
end_stream = getattr(resp, 'end_of_stream', False)
logger.info(f"DEBUG prompt_client: Returning end_of_stream={end_stream}")
@ -79,17 +78,17 @@ class PromptClient(RequestResponse):
logger.info(f"DEBUG prompt_client: About to call self.request with recipient, timeout={timeout}")
await self.request(
req,
recipient=collect_chunks,
recipient=forward_chunks,
timeout=timeout
)
logger.info(f"DEBUG prompt_client: self.request returned, full_text has {len(full_text)} chars")
logger.info(f"DEBUG prompt_client: self.request returned, last_text={last_text[:50] if last_text else None}")
if full_text:
logger.info("DEBUG prompt_client: Returning full_text")
return full_text
if last_text:
logger.info("DEBUG prompt_client: Returning last_text")
return last_text
logger.info("DEBUG prompt_client: Returning parsed full_object")
return json.loads(full_object)
logger.info("DEBUG prompt_client: Returning parsed last_object")
return json.loads(last_object) if last_object else None
async def extract_definitions(self, text, timeout=600):
return await self.prompt(

View file

@ -1,9 +1,6 @@
from pulsar.schema import JsonSchema
import asyncio
import time
import pulsar
import logging
# Module logger
@ -11,9 +8,9 @@ logger = logging.getLogger(__name__)
class Publisher:
def __init__(self, client, topic, schema=None, max_size=10,
def __init__(self, backend, topic, schema=None, max_size=10,
chunking_enabled=True, drain_timeout=5.0):
self.client = client
self.backend = backend # Changed from 'client' to 'backend'
self.topic = topic
self.schema = schema
self.q = asyncio.Queue(maxsize=max_size)
@ -47,9 +44,9 @@ class Publisher:
try:
producer = self.client.create_producer(
producer = self.backend.create_producer(
topic=self.topic,
schema=JsonSchema(self.schema),
schema=self.schema,
chunking_enabled=self.chunking_enabled,
)

View file

@ -4,8 +4,45 @@ import pulsar
import _pulsar
import uuid
from pulsar.schema import JsonSchema
import logging
from .. log_level import LogLevel
from .pulsar_backend import PulsarBackend
logger = logging.getLogger(__name__)
def get_pubsub(**config):
"""
Factory function to create a pub/sub backend based on configuration.
Args:
config: Configuration dictionary from command-line args
Must include 'pubsub_backend' key
Returns:
Backend instance (PulsarBackend, MQTTBackend, etc.)
Example:
backend = get_pubsub(
pubsub_backend='pulsar',
pulsar_host='pulsar://localhost:6650'
)
"""
backend_type = config.get('pubsub_backend', 'pulsar')
if backend_type == 'pulsar':
return PulsarBackend(
host=config.get('pulsar_host', PulsarClient.default_pulsar_host),
api_key=config.get('pulsar_api_key', PulsarClient.default_pulsar_api_key),
listener=config.get('pulsar_listener'),
)
elif backend_type == 'mqtt':
# TODO: Implement MQTT backend
raise NotImplementedError("MQTT backend not yet implemented")
else:
raise ValueError(f"Unknown pub/sub backend: {backend_type}")
class PulsarClient:

View file

@ -0,0 +1,350 @@
"""
Pulsar backend implementation for pub/sub abstraction.
This module provides a Pulsar-specific implementation of the backend interfaces,
handling topic mapping, serialization, and Pulsar client management.
"""
import pulsar
import _pulsar
import json
import logging
import base64
import types
from dataclasses import asdict, is_dataclass
from typing import Any
from .backend import PubSubBackend, BackendProducer, BackendConsumer, Message
logger = logging.getLogger(__name__)
def dataclass_to_dict(obj: Any) -> dict:
"""
Recursively convert a dataclass to a dictionary, handling None values and bytes.
None values are excluded from the dictionary (not serialized).
Bytes values are decoded as UTF-8 strings for JSON serialization (matching Pulsar behavior).
"""
if obj is None:
return None
if is_dataclass(obj):
result = {}
for key, value in asdict(obj).items():
if value is not None:
if isinstance(value, bytes):
# Decode bytes as UTF-8 for JSON serialization (like Pulsar did)
result[key] = value.decode('utf-8')
elif is_dataclass(value):
result[key] = dataclass_to_dict(value)
elif isinstance(value, list):
result[key] = [
item.decode('utf-8') if isinstance(item, bytes)
else dataclass_to_dict(item) if is_dataclass(item)
else item
for item in value
]
elif isinstance(value, dict):
result[key] = {k: dataclass_to_dict(v) if is_dataclass(v) else v for k, v in value.items()}
else:
result[key] = value
return result
return obj
def dict_to_dataclass(data: dict, cls: type) -> Any:
"""
Convert a dictionary back to a dataclass instance.
Handles nested dataclasses and missing fields.
"""
if data is None:
return None
if not is_dataclass(cls):
return data
# Get field types from the dataclass
field_types = {f.name: f.type for f in cls.__dataclass_fields__.values()}
kwargs = {}
for key, value in data.items():
if key in field_types:
field_type = field_types[key]
# Handle modern union types (X | Y)
if isinstance(field_type, types.UnionType):
# Check if it's Optional (X | None)
if type(None) in field_type.__args__:
# Get the non-None type
actual_type = next((t for t in field_type.__args__ if t is not type(None)), None)
if actual_type and is_dataclass(actual_type) and isinstance(value, dict):
kwargs[key] = dict_to_dataclass(value, actual_type)
else:
kwargs[key] = value
else:
kwargs[key] = value
# Check if this is a generic type (list, dict, etc.)
elif hasattr(field_type, '__origin__'):
# Handle list[T]
if field_type.__origin__ == list:
item_type = field_type.__args__[0] if field_type.__args__ else None
if item_type and is_dataclass(item_type) and isinstance(value, list):
kwargs[key] = [
dict_to_dataclass(item, item_type) if isinstance(item, dict) else item
for item in value
]
else:
kwargs[key] = value
# Handle old-style Optional[T] (which is Union[T, None])
elif hasattr(field_type, '__args__') and type(None) in field_type.__args__:
# Get the non-None type from Union
actual_type = next((t for t in field_type.__args__ if t is not type(None)), None)
if actual_type and is_dataclass(actual_type) and isinstance(value, dict):
kwargs[key] = dict_to_dataclass(value, actual_type)
else:
kwargs[key] = value
else:
kwargs[key] = value
# Handle direct dataclass fields
elif is_dataclass(field_type) and isinstance(value, dict):
kwargs[key] = dict_to_dataclass(value, field_type)
# Handle bytes fields (UTF-8 encoded strings from JSON)
elif field_type == bytes and isinstance(value, str):
kwargs[key] = value.encode('utf-8')
else:
kwargs[key] = value
return cls(**kwargs)
class PulsarMessage:
"""Wrapper for Pulsar messages to match Message protocol."""
def __init__(self, pulsar_msg, schema_cls):
self._msg = pulsar_msg
self._schema_cls = schema_cls
self._value = None
def value(self) -> Any:
"""Deserialize and return the message value as a dataclass."""
if self._value is None:
# Get JSON string from Pulsar message
json_data = self._msg.data().decode('utf-8')
data_dict = json.loads(json_data)
# Convert to dataclass
self._value = dict_to_dataclass(data_dict, self._schema_cls)
return self._value
def properties(self) -> dict:
"""Return message properties."""
return self._msg.properties()
class PulsarBackendProducer:
"""Pulsar-specific producer implementation."""
def __init__(self, pulsar_producer, schema_cls):
self._producer = pulsar_producer
self._schema_cls = schema_cls
def send(self, message: Any, properties: dict = {}) -> None:
"""Send a dataclass message."""
# Convert dataclass to dict, excluding None values
data_dict = dataclass_to_dict(message)
# Serialize to JSON
json_data = json.dumps(data_dict)
# Send via Pulsar
self._producer.send(json_data.encode('utf-8'), properties=properties)
def flush(self) -> None:
"""Flush buffered messages."""
self._producer.flush()
def close(self) -> None:
"""Close the producer."""
self._producer.close()
class PulsarBackendConsumer:
"""Pulsar-specific consumer implementation."""
def __init__(self, pulsar_consumer, schema_cls):
self._consumer = pulsar_consumer
self._schema_cls = schema_cls
def receive(self, timeout_millis: int = 2000) -> Message:
"""Receive a message."""
pulsar_msg = self._consumer.receive(timeout_millis=timeout_millis)
return PulsarMessage(pulsar_msg, self._schema_cls)
def acknowledge(self, message: Message) -> None:
"""Acknowledge a message."""
if isinstance(message, PulsarMessage):
self._consumer.acknowledge(message._msg)
def negative_acknowledge(self, message: Message) -> None:
"""Negative acknowledge a message."""
if isinstance(message, PulsarMessage):
self._consumer.negative_acknowledge(message._msg)
def unsubscribe(self) -> None:
"""Unsubscribe from the topic."""
self._consumer.unsubscribe()
def close(self) -> None:
"""Close the consumer."""
self._consumer.close()
class PulsarBackend:
"""
Pulsar backend implementation.
Handles topic mapping, client management, and creation of Pulsar-specific
producers and consumers.
"""
def __init__(self, host: str, api_key: str = None, listener: str = None):
"""
Initialize Pulsar backend.
Args:
host: Pulsar broker URL (e.g., pulsar://localhost:6650)
api_key: Optional API key for authentication
listener: Optional listener name for multi-homed setups
"""
self.host = host
self.api_key = api_key
self.listener = listener
# Create Pulsar client
client_args = {'service_url': host}
if listener:
client_args['listener_name'] = listener
if api_key:
client_args['authentication'] = pulsar.AuthenticationToken(api_key)
self.client = pulsar.Client(**client_args)
logger.info(f"Pulsar client connected to {host}")
def map_topic(self, generic_topic: str) -> str:
"""
Map generic topic format to Pulsar URI.
Format: qos/tenant/namespace/queue
Example: q1/tg/flow/my-queue -> persistent://tg/flow/my-queue
Args:
generic_topic: Generic topic string or already-formatted Pulsar URI
Returns:
Pulsar topic URI
"""
# If already a Pulsar URI, return as-is
if '://' in generic_topic:
return generic_topic
parts = generic_topic.split('/', 3)
if len(parts) != 4:
raise ValueError(f"Invalid topic format: {generic_topic}, expected qos/tenant/namespace/queue")
qos, tenant, namespace, queue = parts
# Map QoS to persistence
if qos == 'q0':
persistence = 'non-persistent'
elif qos in ['q1', 'q2']:
persistence = 'persistent'
else:
raise ValueError(f"Invalid QoS level: {qos}, expected q0, q1, or q2")
return f"{persistence}://{tenant}/{namespace}/{queue}"
def create_producer(self, topic: str, schema: type, **options) -> BackendProducer:
"""
Create a Pulsar producer.
Args:
topic: Generic topic format (qos/tenant/namespace/queue)
schema: Dataclass type for messages
**options: Backend-specific options (e.g., chunking_enabled)
Returns:
PulsarBackendProducer instance
"""
pulsar_topic = self.map_topic(topic)
producer_args = {
'topic': pulsar_topic,
'schema': pulsar.schema.BytesSchema(), # We handle serialization ourselves
}
# Add optional parameters
if 'chunking_enabled' in options:
producer_args['chunking_enabled'] = options['chunking_enabled']
pulsar_producer = self.client.create_producer(**producer_args)
logger.debug(f"Created producer for topic: {pulsar_topic}")
return PulsarBackendProducer(pulsar_producer, schema)
def create_consumer(
self,
topic: str,
subscription: str,
schema: type,
initial_position: str = 'latest',
consumer_type: str = 'shared',
**options
) -> BackendConsumer:
"""
Create a Pulsar consumer.
Args:
topic: Generic topic format (qos/tenant/namespace/queue)
subscription: Subscription name
schema: Dataclass type for messages
initial_position: 'earliest' or 'latest'
consumer_type: 'shared', 'exclusive', or 'failover'
**options: Backend-specific options
Returns:
PulsarBackendConsumer instance
"""
pulsar_topic = self.map_topic(topic)
# Map initial position
if initial_position == 'earliest':
pos = pulsar.InitialPosition.Earliest
else:
pos = pulsar.InitialPosition.Latest
# Map consumer type
if consumer_type == 'exclusive':
ctype = pulsar.ConsumerType.Exclusive
elif consumer_type == 'failover':
ctype = pulsar.ConsumerType.Failover
else:
ctype = pulsar.ConsumerType.Shared
consumer_args = {
'topic': pulsar_topic,
'subscription_name': subscription,
'schema': pulsar.schema.BytesSchema(), # We handle deserialization ourselves
'initial_position': pos,
'consumer_type': ctype,
}
pulsar_consumer = self.client.subscribe(**consumer_args)
logger.debug(f"Created consumer for topic: {pulsar_topic}, subscription: {subscription}")
return PulsarBackendConsumer(pulsar_consumer, schema)
def close(self) -> None:
"""Close the Pulsar client."""
self.client.close()
logger.info("Pulsar client closed")

View file

@ -14,7 +14,7 @@ logger = logging.getLogger(__name__)
class RequestResponse(Subscriber):
def __init__(
self, client, subscription, consumer_name,
self, backend, subscription, consumer_name,
request_topic, request_schema,
request_metrics,
response_topic, response_schema,
@ -22,7 +22,7 @@ class RequestResponse(Subscriber):
):
super(RequestResponse, self).__init__(
client = client,
backend = backend,
subscription = subscription,
consumer_name = consumer_name,
topic = response_topic,
@ -31,7 +31,7 @@ class RequestResponse(Subscriber):
)
self.producer = Producer(
client = client,
backend = backend,
topic = request_topic,
schema = request_schema,
metrics = request_metrics,
@ -126,7 +126,7 @@ class RequestResponseSpec(Spec):
)
rr = self.impl(
client = processor.pulsar_client,
backend = processor.pubsub,
# Make subscription names unique, so that all subscribers get
# to see all response messages

View file

@ -3,9 +3,7 @@
# off of a queue and make it available using an internal broker system,
# so suitable for when multiple recipients are reading from the same queue
from pulsar.schema import JsonSchema
import asyncio
import _pulsar
import time
import logging
import uuid
@ -13,12 +11,16 @@ import uuid
# Module logger
logger = logging.getLogger(__name__)
# Timeout exception - can come from different backends
class TimeoutError(Exception):
pass
class Subscriber:
def __init__(self, client, topic, subscription, consumer_name,
def __init__(self, backend, topic, subscription, consumer_name,
schema=None, max_size=100, metrics=None,
backpressure_strategy="block", drain_timeout=5.0):
self.client = client
self.backend = backend # Changed from 'client' to 'backend'
self.topic = topic
self.subscription = subscription
self.consumer_name = consumer_name
@ -43,18 +45,14 @@ class Subscriber:
async def start(self):
# Build subscribe arguments
subscribe_args = {
'topic': self.topic,
'subscription_name': self.subscription,
'consumer_name': self.consumer_name,
}
# Only add schema if provided (omit if None)
if self.schema is not None:
subscribe_args['schema'] = JsonSchema(self.schema)
self.consumer = self.client.subscribe(**subscribe_args)
# Create consumer via backend
self.consumer = await asyncio.to_thread(
self.backend.create_consumer,
topic=self.topic,
subscription=self.subscription,
schema=self.schema,
consumer_type='shared',
)
self.task = asyncio.create_task(self.run())
@ -94,12 +92,13 @@ class Subscriber:
drain_end_time = time.time() + self.drain_timeout
logger.info(f"Subscriber entering drain mode, timeout={self.drain_timeout}s")
# Stop accepting new messages from Pulsar during drain
if self.consumer:
# Stop accepting new messages during drain
# Note: Not all backends support pausing message listeners
if self.consumer and hasattr(self.consumer, 'pause_message_listener'):
try:
self.consumer.pause_message_listener()
except _pulsar.InvalidConfiguration:
# Not all consumers have message listeners (e.g., blocking receive mode)
except Exception:
# Not all consumers support message listeners
pass
# Check drain timeout
@ -133,9 +132,10 @@ class Subscriber:
self.consumer.receive,
timeout_millis=250
)
except _pulsar.Timeout:
continue
except Exception as e:
# Handle timeout from any backend
if 'timeout' in str(type(e)).lower() or 'timeout' in str(e).lower():
continue
logger.error(f"Exception in subscriber receive: {e}", exc_info=True)
raise e
@ -157,19 +157,20 @@ class Subscriber:
for msg in self.pending_acks.values():
try:
self.consumer.negative_acknowledge(msg)
except _pulsar.AlreadyClosed:
pass # Consumer already closed
except Exception:
pass # Consumer already closed or error
self.pending_acks.clear()
if self.consumer:
try:
self.consumer.unsubscribe()
except _pulsar.AlreadyClosed:
pass # Already closed
if hasattr(self.consumer, 'unsubscribe'):
try:
self.consumer.unsubscribe()
except Exception:
pass # Already closed or error
try:
self.consumer.close()
except _pulsar.AlreadyClosed:
pass # Already closed
except Exception:
pass # Already closed or error
self.consumer = None

View file

@ -16,7 +16,7 @@ class SubscriberSpec(Spec):
)
subscriber = Subscriber(
client = processor.pulsar_client,
backend = processor.pubsub,
topic = definition[self.name],
subscription = flow.id,
consumer_name = flow.id,

View file

@ -7,6 +7,7 @@ import time
from pulsar.schema import JsonSchema
from .. exceptions import *
from ..base.pubsub import get_pubsub
# Default timeout for a request/response. In seconds.
DEFAULT_TIMEOUT=300
@ -39,30 +40,25 @@ class BaseClient:
if subscriber == None:
subscriber = str(uuid.uuid4())
if pulsar_api_key:
auth = pulsar.AuthenticationToken(pulsar_api_key)
self.client = pulsar.Client(
pulsar_host,
logger=pulsar.ConsoleLogger(log_level),
authentication=auth,
listener=listener,
)
else:
self.client = pulsar.Client(
pulsar_host,
logger=pulsar.ConsoleLogger(log_level),
listener_name=listener,
)
# Create backend using factory
self.backend = get_pubsub(
pulsar_host=pulsar_host,
pulsar_api_key=pulsar_api_key,
pulsar_listener=listener,
pubsub_backend='pulsar'
)
self.producer = self.client.create_producer(
self.producer = self.backend.create_producer(
topic=input_queue,
schema=JsonSchema(input_schema),
schema=input_schema,
chunking_enabled=True,
)
self.consumer = self.client.subscribe(
output_queue, subscriber,
schema=JsonSchema(output_schema),
self.consumer = self.backend.create_consumer(
topic=output_queue,
subscription=subscriber,
schema=output_schema,
consumer_type='shared',
)
self.input_schema = input_schema
@ -136,10 +132,11 @@ class BaseClient:
if hasattr(self, "consumer"):
self.consumer.close()
if hasattr(self, "producer"):
self.producer.flush()
self.producer.close()
self.client.close()
if hasattr(self, "backend"):
self.backend.close()

View file

@ -64,7 +64,6 @@ class ConfigClient(BaseClient):
def get(self, keys, timeout=300):
resp = self.call(
id=id,
operation="get",
keys=[
ConfigKey(
@ -88,7 +87,6 @@ class ConfigClient(BaseClient):
def list(self, type, timeout=300):
resp = self.call(
id=id,
operation="list",
type=type,
timeout=timeout
@ -99,7 +97,6 @@ class ConfigClient(BaseClient):
def getvalues(self, type, timeout=300):
resp = self.call(
id=id,
operation="getvalues",
type=type,
timeout=timeout
@ -117,7 +114,6 @@ class ConfigClient(BaseClient):
def delete(self, keys, timeout=300):
resp = self.call(
id=id,
operation="delete",
keys=[
ConfigKey(
@ -134,7 +130,6 @@ class ConfigClient(BaseClient):
def put(self, values, timeout=300):
resp = self.call(
id=id,
operation="put",
values=[
ConfigValue(
@ -152,7 +147,6 @@ class ConfigClient(BaseClient):
def config(self, timeout=300):
resp = self.call(
id=id,
operation="config",
timeout=timeout
)

View file

@ -34,14 +34,12 @@ class DocumentRagResponseTranslator(MessageTranslator):
def from_pulsar(self, obj: DocumentRagResponse) -> Dict[str, Any]:
result = {}
# Check if this is a streaming response (has chunk)
if hasattr(obj, 'chunk') and obj.chunk:
result["chunk"] = obj.chunk
result["end_of_stream"] = getattr(obj, "end_of_stream", False)
else:
# Non-streaming response
if obj.response:
result["response"] = obj.response
# Include response content (chunk or complete)
if obj.response:
result["response"] = obj.response
# Include end_of_stream flag
result["end_of_stream"] = getattr(obj, "end_of_stream", False)
# Always include error if present
if hasattr(obj, 'error') and obj.error and obj.error.message:
@ -51,13 +49,7 @@ class DocumentRagResponseTranslator(MessageTranslator):
def from_response_with_completion(self, obj: DocumentRagResponse) -> Tuple[Dict[str, Any], bool]:
"""Returns (response_dict, is_final)"""
# For streaming responses, check end_of_stream
if hasattr(obj, 'chunk') and obj.chunk:
is_final = getattr(obj, 'end_of_stream', False)
else:
# For non-streaming responses, it's always final
is_final = True
is_final = getattr(obj, 'end_of_stream', False)
return self.from_pulsar(obj), is_final
@ -98,14 +90,12 @@ class GraphRagResponseTranslator(MessageTranslator):
def from_pulsar(self, obj: GraphRagResponse) -> Dict[str, Any]:
result = {}
# Check if this is a streaming response (has chunk)
if hasattr(obj, 'chunk') and obj.chunk:
result["chunk"] = obj.chunk
result["end_of_stream"] = getattr(obj, "end_of_stream", False)
else:
# Non-streaming response
if obj.response:
result["response"] = obj.response
# Include response content (chunk or complete)
if obj.response:
result["response"] = obj.response
# Include end_of_stream flag
result["end_of_stream"] = getattr(obj, "end_of_stream", False)
# Always include error if present
if hasattr(obj, 'error') and obj.error and obj.error.message:
@ -115,11 +105,5 @@ class GraphRagResponseTranslator(MessageTranslator):
def from_response_with_completion(self, obj: GraphRagResponse) -> Tuple[Dict[str, Any], bool]:
"""Returns (response_dict, is_final)"""
# For streaming responses, check end_of_stream
if hasattr(obj, 'chunk') and obj.chunk:
is_final = getattr(obj, 'end_of_stream', False)
else:
# For non-streaming responses, it's always final
is_final = True
is_final = getattr(obj, 'end_of_stream', False)
return self.from_pulsar(obj), is_final

View file

@ -1,16 +1,14 @@
from pulsar.schema import Record, String, Array
from dataclasses import dataclass, field
from .primitives import Triple
class Metadata(Record):
@dataclass
class Metadata:
# Source identifier
id = String()
id: str = ""
# Subgraph
metadata = Array(Triple())
metadata: list[Triple] = field(default_factory=list)
# Collection management
user = String()
collection = String()
user: str = ""
collection: str = ""

View file

@ -1,34 +1,39 @@
from pulsar.schema import Record, String, Boolean, Array, Integer
from dataclasses import dataclass, field
class Error(Record):
type = String()
message = String()
@dataclass
class Error:
type: str = ""
message: str = ""
class Value(Record):
value = String()
is_uri = Boolean()
type = String()
@dataclass
class Value:
value: str = ""
is_uri: bool = False
type: str = ""
class Triple(Record):
s = Value()
p = Value()
o = Value()
@dataclass
class Triple:
s: Value | None = None
p: Value | None = None
o: Value | None = None
class Field(Record):
name = String()
@dataclass
class Field:
name: str = ""
# int, string, long, bool, float, double, timestamp
type = String()
size = Integer()
primary = Boolean()
description = String()
type: str = ""
size: int = 0
primary: bool = False
description: str = ""
# NEW FIELDS for structured data:
required = Boolean() # Whether field is required
enum_values = Array(String()) # For enum type fields
indexed = Boolean() # Whether field should be indexed
required: bool = False # Whether field is required
enum_values: list[str] = field(default_factory=list) # For enum type fields
indexed: bool = False # Whether field should be indexed
class RowSchema(Record):
name = String()
description = String()
fields = Array(Field())
@dataclass
class RowSchema:
name: str = ""
description: str = ""
fields: list[Field] = field(default_factory=list)

View file

@ -1,4 +1,23 @@
def topic(topic, kind='persistent', tenant='tg', namespace='flow'):
return f"{kind}://{tenant}/{namespace}/{topic}"
def topic(queue_name, qos='q1', tenant='tg', namespace='flow'):
"""
Create a generic topic identifier that can be mapped by backends.
Args:
queue_name: The queue/topic name
qos: Quality of service
- 'q0' = best-effort (no ack)
- 'q1' = at-least-once (ack required)
- 'q2' = exactly-once (two-phase ack)
tenant: Tenant identifier for multi-tenancy
namespace: Namespace within tenant
Returns:
Generic topic string: qos/tenant/namespace/queue_name
Examples:
topic('my-queue') # q1/tg/flow/my-queue
topic('config', qos='q2', namespace='config') # q2/tg/config/config
"""
return f"{qos}/{tenant}/{namespace}/{queue_name}"

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, Bytes
from dataclasses import dataclass
from ..core.metadata import Metadata
from ..core.topic import topic
@ -6,24 +6,27 @@ from ..core.topic import topic
############################################################################
# PDF docs etc.
class Document(Record):
metadata = Metadata()
data = Bytes()
@dataclass
class Document:
metadata: Metadata | None = None
data: bytes = b""
############################################################################
# Text documents / text from PDF
class TextDocument(Record):
metadata = Metadata()
text = Bytes()
@dataclass
class TextDocument:
metadata: Metadata | None = None
text: bytes = b""
############################################################################
# Chunks of text
class Chunk(Record):
metadata = Metadata()
chunk = Bytes()
@dataclass
class Chunk:
metadata: Metadata | None = None
chunk: bytes = b""
############################################################################
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double, Map
from dataclasses import dataclass, field
from ..core.metadata import Metadata
from ..core.primitives import Value, RowSchema
@ -8,49 +8,55 @@ from ..core.topic import topic
# Graph embeddings are embeddings associated with a graph entity
class EntityEmbeddings(Record):
entity = Value()
vectors = Array(Array(Double()))
@dataclass
class EntityEmbeddings:
entity: Value | None = None
vectors: list[list[float]] = field(default_factory=list)
# This is a 'batching' mechanism for the above data
class GraphEmbeddings(Record):
metadata = Metadata()
entities = Array(EntityEmbeddings())
@dataclass
class GraphEmbeddings:
metadata: Metadata | None = None
entities: list[EntityEmbeddings] = field(default_factory=list)
############################################################################
# Document embeddings are embeddings associated with a chunk
class ChunkEmbeddings(Record):
chunk = Bytes()
vectors = Array(Array(Double()))
@dataclass
class ChunkEmbeddings:
chunk: bytes = b""
vectors: list[list[float]] = field(default_factory=list)
# This is a 'batching' mechanism for the above data
class DocumentEmbeddings(Record):
metadata = Metadata()
chunks = Array(ChunkEmbeddings())
@dataclass
class DocumentEmbeddings:
metadata: Metadata | None = None
chunks: list[ChunkEmbeddings] = field(default_factory=list)
############################################################################
# Object embeddings are embeddings associated with the primary key of an
# object
class ObjectEmbeddings(Record):
metadata = Metadata()
vectors = Array(Array(Double()))
name = String()
key_name = String()
id = String()
@dataclass
class ObjectEmbeddings:
metadata: Metadata | None = None
vectors: list[list[float]] = field(default_factory=list)
name: str = ""
key_name: str = ""
id: str = ""
############################################################################
# Structured object embeddings with enhanced capabilities
class StructuredObjectEmbedding(Record):
metadata = Metadata()
vectors = Array(Array(Double()))
schema_name = String()
object_id = String() # Primary key value
field_embeddings = Map(Array(Double())) # Per-field embeddings
@dataclass
class StructuredObjectEmbedding:
metadata: Metadata | None = None
vectors: list[list[float]] = field(default_factory=list)
schema_name: str = ""
object_id: str = "" # Primary key value
field_embeddings: dict[str, list[float]] = field(default_factory=dict) # Per-field embeddings
############################################################################
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Array
from dataclasses import dataclass, field
from ..core.primitives import Value, Triple
from ..core.metadata import Metadata
@ -8,21 +8,24 @@ from ..core.topic import topic
# Entity context are an entity associated with textual context
class EntityContext(Record):
entity = Value()
context = String()
@dataclass
class EntityContext:
entity: Value | None = None
context: str = ""
# This is a 'batching' mechanism for the above data
class EntityContexts(Record):
metadata = Metadata()
entities = Array(EntityContext())
@dataclass
class EntityContexts:
metadata: Metadata | None = None
entities: list[EntityContext] = field(default_factory=list)
############################################################################
# Graph triples
class Triples(Record):
metadata = Metadata()
triples = Array(Triple())
@dataclass
class Triples:
metadata: Metadata | None = None
triples: list[Triple] = field(default_factory=list)
############################################################################
############################################################################

View file

@ -1,5 +1,4 @@
from pulsar.schema import Record, Bytes, String, Array, Long, Boolean
from dataclasses import dataclass, field
from ..core.primitives import Triple, Error
from ..core.topic import topic
from ..core.metadata import Metadata
@ -22,40 +21,40 @@ from .embeddings import GraphEmbeddings
# <- ()
# <- (error)
class KnowledgeRequest(Record):
@dataclass
class KnowledgeRequest:
# get-kg-core, delete-kg-core, list-kg-cores, put-kg-core
# load-kg-core, unload-kg-core
operation = String()
operation: str = ""
# list-kg-cores, delete-kg-core, put-kg-core
user = String()
user: str = ""
# get-kg-core, list-kg-cores, delete-kg-core, put-kg-core,
# load-kg-core, unload-kg-core
id = String()
id: str = ""
# load-kg-core
flow = String()
flow: str = ""
# load-kg-core
collection = String()
collection: str = ""
# put-kg-core
triples = Triples()
graph_embeddings = GraphEmbeddings()
triples: Triples | None = None
graph_embeddings: GraphEmbeddings | None = None
class KnowledgeResponse(Record):
error = Error()
ids = Array(String())
eos = Boolean() # Indicates end of knowledge core stream
triples = Triples()
graph_embeddings = GraphEmbeddings()
@dataclass
class KnowledgeResponse:
error: Error | None = None
ids: list[str] = field(default_factory=list)
eos: bool = False # Indicates end of knowledge core stream
triples: Triples | None = None
graph_embeddings: GraphEmbeddings | None = None
knowledge_request_queue = topic(
'knowledge', kind='non-persistent', namespace='request'
'knowledge', qos='q0', namespace='request'
)
knowledge_response_queue = topic(
'knowledge', kind='non-persistent', namespace='response',
'knowledge', qos='q0', namespace='response',
)

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Boolean
from dataclasses import dataclass
from ..core.topic import topic
@ -6,21 +6,25 @@ from ..core.topic import topic
# NLP extraction data types
class Definition(Record):
name = String()
definition = String()
@dataclass
class Definition:
name: str = ""
definition: str = ""
class Topic(Record):
name = String()
definition = String()
@dataclass
class Topic:
name: str = ""
definition: str = ""
class Relationship(Record):
s = String()
p = String()
o = String()
o_entity = Boolean()
@dataclass
class Relationship:
s: str = ""
p: str = ""
o: str = ""
o_entity: bool = False
class Fact(Record):
s = String()
p = String()
o = String()
@dataclass
class Fact:
s: str = ""
p: str = ""
o: str = ""

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Map, Double, Array
from dataclasses import dataclass, field
from ..core.metadata import Metadata
from ..core.topic import topic
@ -7,11 +7,13 @@ from ..core.topic import topic
# Extracted object from text processing
class ExtractedObject(Record):
metadata = Metadata()
schema_name = String() # Which schema this object belongs to
values = Array(Map(String())) # Array of objects, each object is field name -> value
confidence = Double()
source_span = String() # Text span where object was found
@dataclass
class ExtractedObject:
metadata: Metadata | None = None
schema_name: str = "" # Which schema this object belongs to
values: list[dict[str, str]] = field(default_factory=list) # Array of objects, each object is field name -> value
confidence: float = 0.0
source_span: str = "" # Text span where object was found
############################################################################
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, Array, Map, String
from dataclasses import dataclass, field
from ..core.metadata import Metadata
from ..core.primitives import RowSchema
@ -8,9 +8,10 @@ from ..core.topic import topic
# Stores rows of information
class Rows(Record):
metadata = Metadata()
row_schema = RowSchema()
rows = Array(Map(String()))
@dataclass
class Rows:
metadata: Metadata | None = None
row_schema: RowSchema | None = None
rows: list[dict[str, str]] = field(default_factory=list)
############################################################################
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Bytes, Map
from dataclasses import dataclass, field
from ..core.metadata import Metadata
from ..core.topic import topic
@ -7,11 +7,13 @@ from ..core.topic import topic
# Structured data submission for fire-and-forget processing
class StructuredDataSubmission(Record):
metadata = Metadata()
format = String() # "json", "csv", "xml"
schema_name = String() # Reference to schema in config
data = Bytes() # Raw data to ingest
options = Map(String()) # Format-specific options
@dataclass
class StructuredDataSubmission:
metadata: Metadata | None = None
format: str = "" # "json", "csv", "xml"
schema_name: str = "" # Reference to schema in config
data: bytes = b"" # Raw data to ingest
options: dict[str, str] = field(default_factory=dict) # Format-specific options
############################################################################
############################################################################

View file

@ -1,5 +1,5 @@
from pulsar.schema import Record, String, Array, Map, Boolean
from dataclasses import dataclass, field
from ..core.topic import topic
from ..core.primitives import Error
@ -8,33 +8,36 @@ from ..core.primitives import Error
# Prompt services, abstract the prompt generation
class AgentStep(Record):
thought = String()
action = String()
arguments = Map(String())
observation = String()
user = String() # User context for the step
@dataclass
class AgentStep:
thought: str = ""
action: str = ""
arguments: dict[str, str] = field(default_factory=dict)
observation: str = ""
user: str = "" # User context for the step
class AgentRequest(Record):
question = String()
state = String()
group = Array(String())
history = Array(AgentStep())
user = String() # User context for multi-tenancy
streaming = Boolean() # NEW: Enable streaming response delivery (default false)
@dataclass
class AgentRequest:
question: str = ""
state: str = ""
group: list[str] | None = None
history: list[AgentStep] = field(default_factory=list)
user: str = "" # User context for multi-tenancy
streaming: bool = False # NEW: Enable streaming response delivery (default false)
class AgentResponse(Record):
@dataclass
class AgentResponse:
# Streaming-first design
chunk_type = String() # "thought", "action", "observation", "answer", "error"
content = String() # The actual content (interpretation depends on chunk_type)
end_of_message = Boolean() # Current chunk type (thought/action/etc.) is complete
end_of_dialog = Boolean() # Entire agent dialog is complete
chunk_type: str = "" # "thought", "action", "observation", "answer", "error"
content: str = "" # The actual content (interpretation depends on chunk_type)
end_of_message: bool = False # Current chunk type (thought/action/etc.) is complete
end_of_dialog: bool = False # Entire agent dialog is complete
# Legacy fields (deprecated but kept for backward compatibility)
answer = String()
error = Error()
thought = String()
observation = String()
answer: str = ""
error: Error | None = None
thought: str = ""
observation: str = ""
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Integer, Array
from dataclasses import dataclass, field
from datetime import datetime
from ..core.primitives import Error
@ -10,37 +10,40 @@ from ..core.topic import topic
# Collection metadata operations (for librarian service)
class CollectionMetadata(Record):
@dataclass
class CollectionMetadata:
"""Collection metadata record"""
user = String()
collection = String()
name = String()
description = String()
tags = Array(String())
user: str = ""
collection: str = ""
name: str = ""
description: str = ""
tags: list[str] = field(default_factory=list)
############################################################################
class CollectionManagementRequest(Record):
@dataclass
class CollectionManagementRequest:
"""Request for collection management operations"""
operation = String() # e.g., "delete-collection"
operation: str = "" # e.g., "delete-collection"
# For 'list-collections'
user = String()
collection = String()
timestamp = String() # ISO timestamp
name = String()
description = String()
tags = Array(String())
user: str = ""
collection: str = ""
timestamp: str = "" # ISO timestamp
name: str = ""
description: str = ""
tags: list[str] = field(default_factory=list)
# For list
tag_filter = Array(String()) # Optional filter by tags
limit = Integer()
tag_filter: list[str] = field(default_factory=list) # Optional filter by tags
limit: int = 0
class CollectionManagementResponse(Record):
@dataclass
class CollectionManagementResponse:
"""Response for collection management operations"""
error = Error() # Only populated if there's an error
timestamp = String() # ISO timestamp
collections = Array(CollectionMetadata())
error: Error | None = None # Only populated if there's an error
timestamp: str = "" # ISO timestamp
collections: list[CollectionMetadata] = field(default_factory=list)
############################################################################
@ -48,8 +51,9 @@ class CollectionManagementResponse(Record):
# Topics
collection_request_queue = topic(
'collection', kind='non-persistent', namespace='request'
'collection', qos='q0', namespace='request'
)
collection_response_queue = topic(
'collection', kind='non-persistent', namespace='response'
'collection', qos='q0', namespace='response'
)

View file

@ -1,5 +1,5 @@
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
from dataclasses import dataclass, field
from ..core.topic import topic
from ..core.primitives import Error
@ -13,58 +13,61 @@ from ..core.primitives import Error
# put(values) -> ()
# delete(keys) -> ()
# config() -> (version, config)
class ConfigKey(Record):
type = String()
key = String()
@dataclass
class ConfigKey:
type: str = ""
key: str = ""
class ConfigValue(Record):
type = String()
key = String()
value = String()
@dataclass
class ConfigValue:
type: str = ""
key: str = ""
value: str = ""
# Prompt services, abstract the prompt generation
class ConfigRequest(Record):
operation = String() # get, list, getvalues, delete, put, config
@dataclass
class ConfigRequest:
operation: str = "" # get, list, getvalues, delete, put, config
# get, delete
keys = Array(ConfigKey())
keys: list[ConfigKey] = field(default_factory=list)
# list, getvalues
type = String()
type: str = ""
# put
values = Array(ConfigValue())
class ConfigResponse(Record):
values: list[ConfigValue] = field(default_factory=list)
@dataclass
class ConfigResponse:
# get, list, getvalues, config
version = Integer()
version: int = 0
# get, getvalues
values = Array(ConfigValue())
values: list[ConfigValue] = field(default_factory=list)
# list
directory = Array(String())
directory: list[str] = field(default_factory=list)
# config
config = Map(Map(String()))
config: dict[str, dict[str, str]] = field(default_factory=dict)
# Everything
error = Error()
error: Error | None = None
class ConfigPush(Record):
version = Integer()
config = Map(Map(String()))
@dataclass
class ConfigPush:
version: int = 0
config: dict[str, dict[str, str]] = field(default_factory=dict)
config_request_queue = topic(
'config', kind='non-persistent', namespace='request'
'config', qos='q0', namespace='request'
)
config_response_queue = topic(
'config', kind='non-persistent', namespace='response'
'config', qos='q0', namespace='response'
)
config_push_queue = topic(
'config', kind='persistent', namespace='config'
'config', qos='q2', namespace='config'
)
############################################################################

View file

@ -1,33 +1,36 @@
from pulsar.schema import Record, String, Map, Double, Array
from dataclasses import dataclass, field
from ..core.primitives import Error
############################################################################
# Structured data diagnosis services
class StructuredDataDiagnosisRequest(Record):
operation = String() # "detect-type", "generate-descriptor", "diagnose", or "schema-selection"
sample = String() # Data sample to analyze (text content)
type = String() # Data type (csv, json, xml) - optional, required for generate-descriptor
schema_name = String() # Target schema name for descriptor generation - optional
@dataclass
class StructuredDataDiagnosisRequest:
operation: str = "" # "detect-type", "generate-descriptor", "diagnose", or "schema-selection"
sample: str = "" # Data sample to analyze (text content)
type: str = "" # Data type (csv, json, xml) - optional, required for generate-descriptor
schema_name: str = "" # Target schema name for descriptor generation - optional
# JSON encoded options (e.g., delimiter for CSV)
options = Map(String())
options: dict[str, str] = field(default_factory=dict)
class StructuredDataDiagnosisResponse(Record):
error = Error()
@dataclass
class StructuredDataDiagnosisResponse:
error: Error | None = None
operation = String() # The operation that was performed
detected_type = String() # Detected data type (for detect-type/diagnose) - optional
confidence = Double() # Confidence score for type detection - optional
operation: str = "" # The operation that was performed
detected_type: str = "" # Detected data type (for detect-type/diagnose) - optional
confidence: float = 0.0 # Confidence score for type detection - optional
# JSON encoded descriptor (for generate-descriptor/diagnose) - optional
descriptor = String()
descriptor: str = ""
# JSON encoded additional metadata (e.g., field count, sample records)
metadata = Map(String())
metadata: dict[str, str] = field(default_factory=dict)
# Array of matching schema IDs (for schema-selection operation) - optional
schema_matches = Array(String())
schema_matches: list[str] = field(default_factory=list)
############################################################################
############################################################################

View file

@ -1,5 +1,5 @@
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
from dataclasses import dataclass, field
from ..core.topic import topic
from ..core.primitives import Error
@ -11,61 +11,61 @@ from ..core.primitives import Error
# get_class(classname) -> (class)
# put_class(class) -> (class)
# delete_class(classname) -> ()
#
#
# list_flows() -> (flowid[])
# get_flow(flowid) -> (flow)
# start_flow(flowid, classname) -> ()
# stop_flow(flowid) -> ()
# Prompt services, abstract the prompt generation
class FlowRequest(Record):
operation = String() # list-classes, get-class, put-class, delete-class
@dataclass
class FlowRequest:
operation: str = "" # list-classes, get-class, put-class, delete-class
# list-flows, get-flow, start-flow, stop-flow
# get_class, put_class, delete_class, start_flow
class_name = String()
class_name: str = ""
# put_class
class_definition = String()
class_definition: str = ""
# start_flow
description = String()
description: str = ""
# get_flow, start_flow, stop_flow
flow_id = String()
flow_id: str = ""
# start_flow - optional parameters for flow customization
parameters = Map(String())
class FlowResponse(Record):
parameters: dict[str, str] = field(default_factory=dict)
@dataclass
class FlowResponse:
# list_classes
class_names = Array(String())
class_names: list[str] = field(default_factory=list)
# list_flows
flow_ids = Array(String())
flow_ids: list[str] = field(default_factory=list)
# get_class
class_definition = String()
class_definition: str = ""
# get_flow
flow = String()
flow: str = ""
# get_flow
description = String()
description: str = ""
# get_flow - parameters used when flow was started
parameters = Map(String())
parameters: dict[str, str] = field(default_factory=dict)
# Everything
error = Error()
error: Error | None = None
flow_request_queue = topic(
'flow', kind='non-persistent', namespace='request'
'flow', qos='q0', namespace='request'
)
flow_response_queue = topic(
'flow', kind='non-persistent', namespace='response'
'flow', qos='q0', namespace='response'
)
############################################################################

View file

@ -1,9 +1,8 @@
from pulsar.schema import Record, Bytes, String, Array, Long
from dataclasses import dataclass, field
from ..core.primitives import Triple, Error
from ..core.topic import topic
from ..core.metadata import Metadata
from ..knowledge.document import Document, TextDocument
# Note: Document imports will be updated after knowledge schemas are converted
# add-document
# -> (document_id, document_metadata, content)
@ -50,76 +49,79 @@ from ..knowledge.document import Document, TextDocument
# <- (processing_metadata[])
# <- (error)
class DocumentMetadata(Record):
id = String()
time = Long()
kind = String()
title = String()
comments = String()
metadata = Array(Triple())
user = String()
tags = Array(String())
@dataclass
class DocumentMetadata:
id: str = ""
time: int = 0
kind: str = ""
title: str = ""
comments: str = ""
metadata: list[Triple] = field(default_factory=list)
user: str = ""
tags: list[str] = field(default_factory=list)
class ProcessingMetadata(Record):
id = String()
document_id = String()
time = Long()
flow = String()
user = String()
collection = String()
tags = Array(String())
@dataclass
class ProcessingMetadata:
id: str = ""
document_id: str = ""
time: int = 0
flow: str = ""
user: str = ""
collection: str = ""
tags: list[str] = field(default_factory=list)
class Criteria(Record):
key = String()
value = String()
operator = String()
class LibrarianRequest(Record):
@dataclass
class Criteria:
key: str = ""
value: str = ""
operator: str = ""
@dataclass
class LibrarianRequest:
# add-document, remove-document, update-document, get-document-metadata,
# get-document-content, add-processing, remove-processing, list-documents,
# list-processing
operation = String()
operation: str = ""
# add-document, remove-document, update-document, get-document-metadata,
# get-document-content
document_id = String()
document_id: str = ""
# add-processing, remove-processing
processing_id = String()
processing_id: str = ""
# add-document, update-document
document_metadata = DocumentMetadata()
document_metadata: DocumentMetadata | None = None
# add-processing
processing_metadata = ProcessingMetadata()
processing_metadata: ProcessingMetadata | None = None
# add-document
content = Bytes()
content: bytes = b""
# list-documents, list-processing
user = String()
user: str = ""
# list-documents?, list-processing?
collection = String()
collection: str = ""
#
criteria = Array(Criteria())
#
criteria: list[Criteria] = field(default_factory=list)
class LibrarianResponse(Record):
error = Error()
document_metadata = DocumentMetadata()
content = Bytes()
document_metadatas = Array(DocumentMetadata())
processing_metadatas = Array(ProcessingMetadata())
@dataclass
class LibrarianResponse:
error: Error | None = None
document_metadata: DocumentMetadata | None = None
content: bytes = b""
document_metadatas: list[DocumentMetadata] = field(default_factory=list)
processing_metadatas: list[ProcessingMetadata] = field(default_factory=list)
# FIXME: Is this right? Using persistence on librarian so that
# message chunking works
librarian_request_queue = topic(
'librarian', kind='persistent', namespace='request'
'librarian', qos='q1', namespace='request'
)
librarian_response_queue = topic(
'librarian', kind='persistent', namespace='response',
'librarian', qos='q1', namespace='response',
)

View file

@ -1,5 +1,5 @@
from pulsar.schema import Record, String, Array, Double, Integer, Boolean
from dataclasses import dataclass, field
from ..core.topic import topic
from ..core.primitives import Error
@ -8,46 +8,49 @@ from ..core.primitives import Error
# LLM text completion
class TextCompletionRequest(Record):
system = String()
prompt = String()
streaming = Boolean() # Default false for backward compatibility
@dataclass
class TextCompletionRequest:
system: str = ""
prompt: str = ""
streaming: bool = False # Default false for backward compatibility
class TextCompletionResponse(Record):
error = Error()
response = String()
in_token = Integer()
out_token = Integer()
model = String()
end_of_stream = Boolean() # Indicates final message in stream
@dataclass
class TextCompletionResponse:
error: Error | None = None
response: str = ""
in_token: int = 0
out_token: int = 0
model: str = ""
end_of_stream: bool = False # Indicates final message in stream
############################################################################
# Embeddings
class EmbeddingsRequest(Record):
text = String()
@dataclass
class EmbeddingsRequest:
text: str = ""
class EmbeddingsResponse(Record):
error = Error()
vectors = Array(Array(Double()))
@dataclass
class EmbeddingsResponse:
error: Error | None = None
vectors: list[list[float]] = field(default_factory=list)
############################################################################
# Tool request/response
class ToolRequest(Record):
name = String()
@dataclass
class ToolRequest:
name: str = ""
# Parameters are JSON encoded
parameters = String()
class ToolResponse(Record):
error = Error()
parameters: str = ""
@dataclass
class ToolResponse:
error: Error | None = None
# Plain text aka "unstructured"
text = String()
text: str = ""
# JSON-encoded object aka "structured"
object = String()
object: str = ""

View file

@ -1,5 +1,4 @@
from pulsar.schema import Record, String
from dataclasses import dataclass
from ..core.primitives import Error, Value, Triple
from ..core.topic import topic
@ -9,13 +8,14 @@ from ..core.metadata import Metadata
# Lookups
class LookupRequest(Record):
kind = String()
term = String()
@dataclass
class LookupRequest:
kind: str = ""
term: str = ""
class LookupResponse(Record):
text = String()
error = Error()
@dataclass
class LookupResponse:
text: str = ""
error: Error | None = None
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Array, Map, Integer, Double
from dataclasses import dataclass, field
from ..core.primitives import Error
from ..core.topic import topic
@ -7,15 +7,18 @@ from ..core.topic import topic
# NLP to Structured Query Service - converts natural language to GraphQL
class QuestionToStructuredQueryRequest(Record):
question = String()
max_results = Integer()
@dataclass
class QuestionToStructuredQueryRequest:
question: str = ""
max_results: int = 0
class QuestionToStructuredQueryResponse(Record):
error = Error()
graphql_query = String() # Generated GraphQL query
variables = Map(String()) # GraphQL variables if any
detected_schemas = Array(String()) # Which schemas the query targets
confidence = Double()
@dataclass
class QuestionToStructuredQueryResponse:
error: Error | None = None
graphql_query: str = "" # Generated GraphQL query
variables: dict[str, str] = field(default_factory=dict) # GraphQL variables if any
detected_schemas: list[str] = field(default_factory=list) # Which schemas the query targets
confidence: float = 0.0
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Map, Array
from dataclasses import dataclass, field
from ..core.primitives import Error
from ..core.topic import topic
@ -7,22 +7,25 @@ from ..core.topic import topic
# Objects Query Service - executes GraphQL queries against structured data
class GraphQLError(Record):
message = String()
path = Array(String()) # Path to the field that caused the error
extensions = Map(String()) # Additional error metadata
@dataclass
class GraphQLError:
message: str = ""
path: list[str] = field(default_factory=list) # Path to the field that caused the error
extensions: dict[str, str] = field(default_factory=dict) # Additional error metadata
class ObjectsQueryRequest(Record):
user = String() # Cassandra keyspace (follows pattern from TriplesQueryRequest)
collection = String() # Data collection identifier (required for partition key)
query = String() # GraphQL query string
variables = Map(String()) # GraphQL variables
operation_name = String() # Operation to execute for multi-operation documents
@dataclass
class ObjectsQueryRequest:
user: str = "" # Cassandra keyspace (follows pattern from TriplesQueryRequest)
collection: str = "" # Data collection identifier (required for partition key)
query: str = "" # GraphQL query string
variables: dict[str, str] = field(default_factory=dict) # GraphQL variables
operation_name: str = "" # Operation to execute for multi-operation documents
class ObjectsQueryResponse(Record):
error = Error() # System-level error (connection, timeout, etc.)
data = String() # JSON-encoded GraphQL response data
errors = Array(GraphQLError()) # GraphQL field-level errors
extensions = Map(String()) # Query metadata (execution time, etc.)
@dataclass
class ObjectsQueryResponse:
error: Error | None = None # System-level error (connection, timeout, etc.)
data: str = "" # JSON-encoded GraphQL response data
errors: list[GraphQLError] = field(default_factory=list) # GraphQL field-level errors
extensions: dict[str, str] = field(default_factory=dict) # Query metadata (execution time, etc.)
############################################################################
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Map, Boolean
from dataclasses import dataclass, field
from ..core.primitives import Error
from ..core.topic import topic
@ -18,27 +18,28 @@ from ..core.topic import topic
# extract-rows
# schema, chunk -> rows
class PromptRequest(Record):
id = String()
@dataclass
class PromptRequest:
id: str = ""
# JSON encoded values
terms = Map(String())
terms: dict[str, str] = field(default_factory=dict)
# Streaming support (default false for backward compatibility)
streaming = Boolean()
class PromptResponse(Record):
streaming: bool = False
@dataclass
class PromptResponse:
# Error case
error = Error()
error: Error | None = None
# Just plain text
text = String()
text: str = ""
# JSON encoded
object = String()
object: str = ""
# Indicates final message in stream
end_of_stream = Boolean()
end_of_stream: bool = False
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Integer, Array, Double
from dataclasses import dataclass, field
from ..core.primitives import Error, Value, Triple
from ..core.topic import topic
@ -7,49 +7,55 @@ from ..core.topic import topic
# Graph embeddings query
class GraphEmbeddingsRequest(Record):
vectors = Array(Array(Double()))
limit = Integer()
user = String()
collection = String()
@dataclass
class GraphEmbeddingsRequest:
vectors: list[list[float]] = field(default_factory=list)
limit: int = 0
user: str = ""
collection: str = ""
class GraphEmbeddingsResponse(Record):
error = Error()
entities = Array(Value())
@dataclass
class GraphEmbeddingsResponse:
error: Error | None = None
entities: list[Value] = field(default_factory=list)
############################################################################
# Graph triples query
class TriplesQueryRequest(Record):
user = String()
collection = String()
s = Value()
p = Value()
o = Value()
limit = Integer()
@dataclass
class TriplesQueryRequest:
user: str = ""
collection: str = ""
s: Value | None = None
p: Value | None = None
o: Value | None = None
limit: int = 0
class TriplesQueryResponse(Record):
error = Error()
triples = Array(Triple())
@dataclass
class TriplesQueryResponse:
error: Error | None = None
triples: list[Triple] = field(default_factory=list)
############################################################################
# Doc embeddings query
class DocumentEmbeddingsRequest(Record):
vectors = Array(Array(Double()))
limit = Integer()
user = String()
collection = String()
@dataclass
class DocumentEmbeddingsRequest:
vectors: list[list[float]] = field(default_factory=list)
limit: int = 0
user: str = ""
collection: str = ""
class DocumentEmbeddingsResponse(Record):
error = Error()
chunks = Array(String())
@dataclass
class DocumentEmbeddingsResponse:
error: Error | None = None
chunks: list[str] = field(default_factory=list)
document_embeddings_request_queue = topic(
"non-persistent://trustgraph/document-embeddings-request"
"document-embeddings-request", qos='q0', tenant='trustgraph', namespace='flow'
)
document_embeddings_response_queue = topic(
"non-persistent://trustgraph/document-embeddings-response"
"document-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
)

View file

@ -1,5 +1,4 @@
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
from dataclasses import dataclass
from ..core.topic import topic
from ..core.primitives import Error, Value
@ -7,36 +6,37 @@ from ..core.primitives import Error, Value
# Graph RAG text retrieval
class GraphRagQuery(Record):
query = String()
user = String()
collection = String()
entity_limit = Integer()
triple_limit = Integer()
max_subgraph_size = Integer()
max_path_length = Integer()
streaming = Boolean()
@dataclass
class GraphRagQuery:
query: str = ""
user: str = ""
collection: str = ""
entity_limit: int = 0
triple_limit: int = 0
max_subgraph_size: int = 0
max_path_length: int = 0
streaming: bool = False
class GraphRagResponse(Record):
error = Error()
response = String()
chunk = String()
end_of_stream = Boolean()
@dataclass
class GraphRagResponse:
error: Error | None = None
response: str = ""
end_of_stream: bool = False
############################################################################
# Document RAG text retrieval
class DocumentRagQuery(Record):
query = String()
user = String()
collection = String()
doc_limit = Integer()
streaming = Boolean()
class DocumentRagResponse(Record):
error = Error()
response = String()
chunk = String()
end_of_stream = Boolean()
@dataclass
class DocumentRagQuery:
query: str = ""
user: str = ""
collection: str = ""
doc_limit: int = 0
streaming: bool = False
@dataclass
class DocumentRagResponse:
error: Error | None = None
response: str = ""
end_of_stream: bool = False

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String
from dataclasses import dataclass
from ..core.primitives import Error
from ..core.topic import topic
@ -7,15 +7,17 @@ from ..core.topic import topic
# Storage management operations
class StorageManagementRequest(Record):
@dataclass
class StorageManagementRequest:
"""Request for storage management operations sent to store processors"""
operation = String() # e.g., "delete-collection"
user = String()
collection = String()
operation: str = "" # e.g., "delete-collection"
user: str = ""
collection: str = ""
class StorageManagementResponse(Record):
@dataclass
class StorageManagementResponse:
"""Response from storage processors for management operations"""
error = Error() # Only populated if there's an error, if null success
error: Error | None = None # Only populated if there's an error, if null success
############################################################################
@ -23,20 +25,21 @@ class StorageManagementResponse(Record):
# Topics for sending collection management requests to different storage types
vector_storage_management_topic = topic(
'vector-storage-management', kind='non-persistent', namespace='request'
'vector-storage-management', qos='q0', namespace='request'
)
object_storage_management_topic = topic(
'object-storage-management', kind='non-persistent', namespace='request'
'object-storage-management', qos='q0', namespace='request'
)
triples_storage_management_topic = topic(
'triples-storage-management', kind='non-persistent', namespace='request'
'triples-storage-management', qos='q0', namespace='request'
)
# Topic for receiving responses from storage processors
storage_management_response_topic = topic(
'storage-management', kind='non-persistent', namespace='response'
'storage-management', qos='q0', namespace='response'
)
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Map, Array
from dataclasses import dataclass, field
from ..core.primitives import Error
from ..core.topic import topic
@ -7,14 +7,17 @@ from ..core.topic import topic
# Structured Query Service - executes GraphQL queries
class StructuredQueryRequest(Record):
question = String()
user = String() # Cassandra keyspace identifier
collection = String() # Data collection identifier
@dataclass
class StructuredQueryRequest:
question: str = ""
user: str = "" # Cassandra keyspace identifier
collection: str = "" # Data collection identifier
class StructuredQueryResponse(Record):
error = Error()
data = String() # JSON-encoded GraphQL response data
errors = Array(String()) # GraphQL errors if any
@dataclass
class StructuredQueryResponse:
error: Error | None = None
data: str = "" # JSON-encoded GraphQL response data
errors: list[str] = field(default_factory=list) # GraphQL errors if any
############################################################################