mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-28 01:46:22 +02:00
Messaging fabric plugins (#592)
* Plugin architecture for messaging fabric * Schemas use a technology neutral expression * Schemas strictness has uncovered some incorrect schema use which is fixed
This commit is contained in:
parent
1865b3f3c8
commit
34eb083836
100 changed files with 2342 additions and 828 deletions
|
|
@ -159,12 +159,12 @@ class AsyncFlowInstance:
|
|||
result = await self.request("text-completion", request_data)
|
||||
return result.get("response", "")
|
||||
|
||||
async def graph_rag(self, question: str, user: str, collection: str,
|
||||
async def graph_rag(self, query: str, user: str, collection: str,
|
||||
max_subgraph_size: int = 1000, max_subgraph_count: int = 5,
|
||||
max_entity_distance: int = 3, **kwargs: Any) -> str:
|
||||
"""Graph RAG (non-streaming, use async_socket for streaming)"""
|
||||
request_data = {
|
||||
"question": question,
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"max-subgraph-size": max_subgraph_size,
|
||||
|
|
@ -177,11 +177,11 @@ class AsyncFlowInstance:
|
|||
result = await self.request("graph-rag", request_data)
|
||||
return result.get("response", "")
|
||||
|
||||
async def document_rag(self, question: str, user: str, collection: str,
|
||||
async def document_rag(self, query: str, user: str, collection: str,
|
||||
doc_limit: int = 10, **kwargs: Any) -> str:
|
||||
"""Document RAG (non-streaming, use async_socket for streaming)"""
|
||||
request_data = {
|
||||
"question": question,
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"doc-limit": doc_limit,
|
||||
|
|
|
|||
|
|
@ -208,12 +208,12 @@ class AsyncSocketFlowInstance:
|
|||
if hasattr(chunk, 'content'):
|
||||
yield chunk.content
|
||||
|
||||
async def graph_rag(self, question: str, user: str, collection: str,
|
||||
async def graph_rag(self, query: str, user: str, collection: str,
|
||||
max_subgraph_size: int = 1000, max_subgraph_count: int = 5,
|
||||
max_entity_distance: int = 3, streaming: bool = False, **kwargs):
|
||||
"""Graph RAG with optional streaming"""
|
||||
request = {
|
||||
"question": question,
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"max-subgraph-size": max_subgraph_size,
|
||||
|
|
@ -235,11 +235,11 @@ class AsyncSocketFlowInstance:
|
|||
if hasattr(chunk, 'content'):
|
||||
yield chunk.content
|
||||
|
||||
async def document_rag(self, question: str, user: str, collection: str,
|
||||
async def document_rag(self, query: str, user: str, collection: str,
|
||||
doc_limit: int = 10, streaming: bool = False, **kwargs):
|
||||
"""Document RAG with optional streaming"""
|
||||
request = {
|
||||
"question": question,
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"doc-limit": doc_limit,
|
||||
|
|
|
|||
|
|
@ -160,14 +160,14 @@ class FlowInstance:
|
|||
)["answer"]
|
||||
|
||||
def graph_rag(
|
||||
self, question, user="trustgraph", collection="default",
|
||||
self, query, user="trustgraph", collection="default",
|
||||
entity_limit=50, triple_limit=30, max_subgraph_size=150,
|
||||
max_path_length=2,
|
||||
):
|
||||
|
||||
# The input consists of a question
|
||||
input = {
|
||||
"query": question,
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"entity-limit": entity_limit,
|
||||
|
|
@ -182,13 +182,13 @@ class FlowInstance:
|
|||
)["response"]
|
||||
|
||||
def document_rag(
|
||||
self, question, user="trustgraph", collection="default",
|
||||
self, query, user="trustgraph", collection="default",
|
||||
doc_limit=10,
|
||||
):
|
||||
|
||||
# The input consists of a question
|
||||
input = {
|
||||
"query": question,
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"doc-limit": doc_limit,
|
||||
|
|
|
|||
|
|
@ -284,7 +284,7 @@ class SocketFlowInstance:
|
|||
|
||||
def graph_rag(
|
||||
self,
|
||||
question: str,
|
||||
query: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
max_subgraph_size: int = 1000,
|
||||
|
|
@ -295,7 +295,7 @@ class SocketFlowInstance:
|
|||
) -> Union[str, Iterator[str]]:
|
||||
"""Graph RAG with optional streaming"""
|
||||
request = {
|
||||
"question": question,
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"max-subgraph-size": max_subgraph_size,
|
||||
|
|
@ -316,7 +316,7 @@ class SocketFlowInstance:
|
|||
|
||||
def document_rag(
|
||||
self,
|
||||
question: str,
|
||||
query: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
doc_limit: int = 10,
|
||||
|
|
@ -325,7 +325,7 @@ class SocketFlowInstance:
|
|||
) -> Union[str, Iterator[str]]:
|
||||
"""Document RAG with optional streaming"""
|
||||
request = {
|
||||
"question": question,
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"doc-limit": doc_limit,
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ from prometheus_client import start_http_server, Info
|
|||
|
||||
from .. schema import ConfigPush, config_push_queue
|
||||
from .. log_level import LogLevel
|
||||
from . pubsub import PulsarClient
|
||||
from . pubsub import PulsarClient, get_pubsub
|
||||
from . producer import Producer
|
||||
from . consumer import Consumer
|
||||
from . metrics import ProcessorMetrics, ConsumerMetrics
|
||||
|
|
@ -34,8 +34,11 @@ class AsyncProcessor:
|
|||
# Store the identity
|
||||
self.id = params.get("id")
|
||||
|
||||
# Register a pulsar client
|
||||
self.pulsar_client_object = PulsarClient(**params)
|
||||
# Create pub/sub backend via factory
|
||||
self.pubsub_backend = get_pubsub(**params)
|
||||
|
||||
# Store pulsar_host for backward compatibility
|
||||
self._pulsar_host = params.get("pulsar_host", "pulsar://pulsar:6650")
|
||||
|
||||
# Initialise metrics, records the parameters
|
||||
ProcessorMetrics(processor = self.id).info({
|
||||
|
|
@ -70,7 +73,7 @@ class AsyncProcessor:
|
|||
self.config_sub_task = Consumer(
|
||||
|
||||
taskgroup = self.taskgroup,
|
||||
client = self.pulsar_client,
|
||||
backend = self.pubsub_backend, # Changed from client to backend
|
||||
subscriber = config_subscriber_id,
|
||||
flow = None,
|
||||
|
||||
|
|
@ -96,16 +99,16 @@ class AsyncProcessor:
|
|||
# This is called to stop all threads. An over-ride point for extra
|
||||
# functionality
|
||||
def stop(self):
|
||||
self.pulsar_client.close()
|
||||
self.pubsub_backend.close()
|
||||
self.running = False
|
||||
|
||||
# Returns the pulsar host
|
||||
# Returns the pub/sub backend (new interface)
|
||||
@property
|
||||
def pulsar_host(self): return self.pulsar_client_object.pulsar_host
|
||||
def pubsub(self): return self.pubsub_backend
|
||||
|
||||
# Returns the pulsar client
|
||||
# Returns the pulsar host (backward compatibility)
|
||||
@property
|
||||
def pulsar_client(self): return self.pulsar_client_object.client
|
||||
def pulsar_host(self): return self._pulsar_host
|
||||
|
||||
# Register a new event handler for configuration change
|
||||
def register_config_handler(self, handler):
|
||||
|
|
@ -247,6 +250,14 @@ class AsyncProcessor:
|
|||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
# Pub/sub backend selection
|
||||
parser.add_argument(
|
||||
'--pubsub-backend',
|
||||
default=os.getenv('PUBSUB_BACKEND', 'pulsar'),
|
||||
choices=['pulsar', 'mqtt'],
|
||||
help='Pub/sub backend (default: pulsar, env: PUBSUB_BACKEND)',
|
||||
)
|
||||
|
||||
PulsarClient.add_args(parser)
|
||||
add_logging_args(parser)
|
||||
|
||||
|
|
|
|||
148
trustgraph-base/trustgraph/base/backend.py
Normal file
148
trustgraph-base/trustgraph/base/backend.py
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
"""
|
||||
Backend abstraction interfaces for pub/sub systems.
|
||||
|
||||
This module defines Protocol classes that all pub/sub backends must implement,
|
||||
allowing TrustGraph to work with different messaging systems (Pulsar, MQTT, Kafka, etc.)
|
||||
"""
|
||||
|
||||
from typing import Protocol, Any, runtime_checkable
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Message(Protocol):
|
||||
"""Protocol for a received message."""
|
||||
|
||||
def value(self) -> Any:
|
||||
"""
|
||||
Get the deserialized message content.
|
||||
|
||||
Returns:
|
||||
Dataclass instance representing the message
|
||||
"""
|
||||
...
|
||||
|
||||
def properties(self) -> dict:
|
||||
"""
|
||||
Get message properties/metadata.
|
||||
|
||||
Returns:
|
||||
Dictionary of message properties
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class BackendProducer(Protocol):
|
||||
"""Protocol for backend-specific producer."""
|
||||
|
||||
def send(self, message: Any, properties: dict = {}) -> None:
|
||||
"""
|
||||
Send a message (dataclass instance) with optional properties.
|
||||
|
||||
Args:
|
||||
message: Dataclass instance to send
|
||||
properties: Optional metadata properties
|
||||
"""
|
||||
...
|
||||
|
||||
def flush(self) -> None:
|
||||
"""Flush any buffered messages."""
|
||||
...
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the producer."""
|
||||
...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class BackendConsumer(Protocol):
|
||||
"""Protocol for backend-specific consumer."""
|
||||
|
||||
def receive(self, timeout_millis: int = 2000) -> Message:
|
||||
"""
|
||||
Receive a message from the topic.
|
||||
|
||||
Args:
|
||||
timeout_millis: Timeout in milliseconds
|
||||
|
||||
Returns:
|
||||
Message object
|
||||
|
||||
Raises:
|
||||
TimeoutError: If no message received within timeout
|
||||
"""
|
||||
...
|
||||
|
||||
def acknowledge(self, message: Message) -> None:
|
||||
"""
|
||||
Acknowledge successful processing of a message.
|
||||
|
||||
Args:
|
||||
message: The message to acknowledge
|
||||
"""
|
||||
...
|
||||
|
||||
def negative_acknowledge(self, message: Message) -> None:
|
||||
"""
|
||||
Negative acknowledge - triggers redelivery.
|
||||
|
||||
Args:
|
||||
message: The message to negatively acknowledge
|
||||
"""
|
||||
...
|
||||
|
||||
def unsubscribe(self) -> None:
|
||||
"""Unsubscribe from the topic."""
|
||||
...
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the consumer."""
|
||||
...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class PubSubBackend(Protocol):
|
||||
"""Protocol defining the interface all pub/sub backends must implement."""
|
||||
|
||||
def create_producer(self, topic: str, schema: type, **options) -> BackendProducer:
|
||||
"""
|
||||
Create a producer for a topic.
|
||||
|
||||
Args:
|
||||
topic: Generic topic format (qos/tenant/namespace/queue)
|
||||
schema: Dataclass type for messages
|
||||
**options: Backend-specific options (e.g., chunking_enabled)
|
||||
|
||||
Returns:
|
||||
Backend-specific producer instance
|
||||
"""
|
||||
...
|
||||
|
||||
def create_consumer(
|
||||
self,
|
||||
topic: str,
|
||||
subscription: str,
|
||||
schema: type,
|
||||
initial_position: str = 'latest',
|
||||
consumer_type: str = 'shared',
|
||||
**options
|
||||
) -> BackendConsumer:
|
||||
"""
|
||||
Create a consumer for a topic.
|
||||
|
||||
Args:
|
||||
topic: Generic topic format (qos/tenant/namespace/queue)
|
||||
subscription: Subscription/consumer group name
|
||||
schema: Dataclass type for messages
|
||||
initial_position: 'earliest' or 'latest' (some backends may ignore)
|
||||
consumer_type: 'shared', 'exclusive', 'failover' (some backends may ignore)
|
||||
**options: Backend-specific options
|
||||
|
||||
Returns:
|
||||
Backend-specific consumer instance
|
||||
"""
|
||||
...
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the backend connection."""
|
||||
...
|
||||
|
|
@ -9,9 +9,6 @@
|
|||
# one handler, and a single thread of concurrency, nothing too outrageous
|
||||
# will happen if synchronous / blocking code is used
|
||||
|
||||
from pulsar.schema import JsonSchema
|
||||
import pulsar
|
||||
import _pulsar
|
||||
import asyncio
|
||||
import time
|
||||
import logging
|
||||
|
|
@ -21,11 +18,15 @@ from .. exceptions import TooManyRequests
|
|||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Timeout exception - can come from different backends
|
||||
class TimeoutError(Exception):
|
||||
pass
|
||||
|
||||
class Consumer:
|
||||
|
||||
def __init__(
|
||||
self, taskgroup, flow, client, topic, subscriber, schema,
|
||||
handler,
|
||||
self, taskgroup, flow, backend, topic, subscriber, schema,
|
||||
handler,
|
||||
metrics = None,
|
||||
start_of_messages=False,
|
||||
rate_limit_retry_time = 10, rate_limit_timeout = 7200,
|
||||
|
|
@ -35,7 +36,7 @@ class Consumer:
|
|||
|
||||
self.taskgroup = taskgroup
|
||||
self.flow = flow
|
||||
self.client = client
|
||||
self.backend = backend # Changed from 'client' to 'backend'
|
||||
self.topic = topic
|
||||
self.subscriber = subscriber
|
||||
self.schema = schema
|
||||
|
|
@ -96,18 +97,20 @@ class Consumer:
|
|||
|
||||
logger.info(f"Subscribing to topic: {self.topic}")
|
||||
|
||||
# Determine initial position
|
||||
if self.start_of_messages:
|
||||
pos = pulsar.InitialPosition.Earliest
|
||||
initial_pos = 'earliest'
|
||||
else:
|
||||
pos = pulsar.InitialPosition.Latest
|
||||
initial_pos = 'latest'
|
||||
|
||||
# Create consumer via backend
|
||||
self.consumer = await asyncio.to_thread(
|
||||
self.client.subscribe,
|
||||
self.backend.create_consumer,
|
||||
topic = self.topic,
|
||||
subscription_name = self.subscriber,
|
||||
schema = JsonSchema(self.schema),
|
||||
initial_position = pos,
|
||||
consumer_type = pulsar.ConsumerType.Shared,
|
||||
subscription = self.subscriber,
|
||||
schema = self.schema,
|
||||
initial_position = initial_pos,
|
||||
consumer_type = 'shared',
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -159,9 +162,10 @@ class Consumer:
|
|||
self.consumer.receive,
|
||||
timeout_millis=2000
|
||||
)
|
||||
except _pulsar.Timeout:
|
||||
continue
|
||||
except Exception as e:
|
||||
# Handle timeout from any backend
|
||||
if 'timeout' in str(type(e)).lower() or 'timeout' in str(e).lower():
|
||||
continue
|
||||
raise e
|
||||
|
||||
await self.handle_one_from_queue(msg)
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ class ConsumerSpec(Spec):
|
|||
consumer = Consumer(
|
||||
taskgroup = processor.taskgroup,
|
||||
flow = flow,
|
||||
client = processor.pulsar_client,
|
||||
backend = processor.pubsub,
|
||||
topic = definition[self.name],
|
||||
subscriber = processor.id + "--" + flow.name + "--" + self.name,
|
||||
schema = self.schema,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
|
|
@ -8,10 +7,10 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
class Producer:
|
||||
|
||||
def __init__(self, client, topic, schema, metrics=None,
|
||||
def __init__(self, backend, topic, schema, metrics=None,
|
||||
chunking_enabled=True):
|
||||
|
||||
self.client = client
|
||||
self.backend = backend # Changed from 'client' to 'backend'
|
||||
self.topic = topic
|
||||
self.schema = schema
|
||||
|
||||
|
|
@ -44,9 +43,9 @@ class Producer:
|
|||
|
||||
try:
|
||||
logger.info(f"Connecting publisher to {self.topic}...")
|
||||
self.producer = self.client.create_producer(
|
||||
self.producer = self.backend.create_producer(
|
||||
topic = self.topic,
|
||||
schema = JsonSchema(self.schema),
|
||||
schema = self.schema,
|
||||
chunking_enabled = self.chunking_enabled,
|
||||
)
|
||||
logger.info(f"Connected publisher to {self.topic}")
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ class ProducerSpec(Spec):
|
|||
)
|
||||
|
||||
producer = Producer(
|
||||
client = processor.pulsar_client,
|
||||
backend = processor.pubsub,
|
||||
topic = definition[self.name],
|
||||
schema = self.schema,
|
||||
metrics = producer_metrics,
|
||||
|
|
|
|||
|
|
@ -37,21 +37,20 @@ class PromptClient(RequestResponse):
|
|||
|
||||
else:
|
||||
logger.info("DEBUG prompt_client: Streaming path")
|
||||
# Streaming path - collect all chunks
|
||||
full_text = ""
|
||||
full_object = None
|
||||
# Streaming path - just forward chunks, don't accumulate
|
||||
last_text = ""
|
||||
last_object = None
|
||||
|
||||
async def collect_chunks(resp):
|
||||
nonlocal full_text, full_object
|
||||
logger.info(f"DEBUG prompt_client: collect_chunks called, resp.text={resp.text[:50] if resp.text else None}, end_of_stream={getattr(resp, 'end_of_stream', False)}")
|
||||
async def forward_chunks(resp):
|
||||
nonlocal last_text, last_object
|
||||
logger.info(f"DEBUG prompt_client: forward_chunks called, resp.text={resp.text[:50] if resp.text else None}, end_of_stream={getattr(resp, 'end_of_stream', False)}")
|
||||
|
||||
if resp.error:
|
||||
logger.error(f"DEBUG prompt_client: Error in response: {resp.error.message}")
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
||||
if resp.text:
|
||||
full_text += resp.text
|
||||
logger.info(f"DEBUG prompt_client: Accumulated {len(full_text)} chars")
|
||||
last_text = resp.text
|
||||
# Call chunk callback if provided
|
||||
if chunk_callback:
|
||||
logger.info(f"DEBUG prompt_client: Calling chunk_callback")
|
||||
|
|
@ -61,7 +60,7 @@ class PromptClient(RequestResponse):
|
|||
chunk_callback(resp.text)
|
||||
elif resp.object:
|
||||
logger.info(f"DEBUG prompt_client: Got object response")
|
||||
full_object = resp.object
|
||||
last_object = resp.object
|
||||
|
||||
end_stream = getattr(resp, 'end_of_stream', False)
|
||||
logger.info(f"DEBUG prompt_client: Returning end_of_stream={end_stream}")
|
||||
|
|
@ -79,17 +78,17 @@ class PromptClient(RequestResponse):
|
|||
logger.info(f"DEBUG prompt_client: About to call self.request with recipient, timeout={timeout}")
|
||||
await self.request(
|
||||
req,
|
||||
recipient=collect_chunks,
|
||||
recipient=forward_chunks,
|
||||
timeout=timeout
|
||||
)
|
||||
logger.info(f"DEBUG prompt_client: self.request returned, full_text has {len(full_text)} chars")
|
||||
logger.info(f"DEBUG prompt_client: self.request returned, last_text={last_text[:50] if last_text else None}")
|
||||
|
||||
if full_text:
|
||||
logger.info("DEBUG prompt_client: Returning full_text")
|
||||
return full_text
|
||||
if last_text:
|
||||
logger.info("DEBUG prompt_client: Returning last_text")
|
||||
return last_text
|
||||
|
||||
logger.info("DEBUG prompt_client: Returning parsed full_object")
|
||||
return json.loads(full_object)
|
||||
logger.info("DEBUG prompt_client: Returning parsed last_object")
|
||||
return json.loads(last_object) if last_object else None
|
||||
|
||||
async def extract_definitions(self, text, timeout=600):
|
||||
return await self.prompt(
|
||||
|
|
|
|||
|
|
@ -1,9 +1,6 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
import pulsar
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
|
|
@ -11,9 +8,9 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
class Publisher:
|
||||
|
||||
def __init__(self, client, topic, schema=None, max_size=10,
|
||||
def __init__(self, backend, topic, schema=None, max_size=10,
|
||||
chunking_enabled=True, drain_timeout=5.0):
|
||||
self.client = client
|
||||
self.backend = backend # Changed from 'client' to 'backend'
|
||||
self.topic = topic
|
||||
self.schema = schema
|
||||
self.q = asyncio.Queue(maxsize=max_size)
|
||||
|
|
@ -47,9 +44,9 @@ class Publisher:
|
|||
|
||||
try:
|
||||
|
||||
producer = self.client.create_producer(
|
||||
producer = self.backend.create_producer(
|
||||
topic=self.topic,
|
||||
schema=JsonSchema(self.schema),
|
||||
schema=self.schema,
|
||||
chunking_enabled=self.chunking_enabled,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -4,8 +4,45 @@ import pulsar
|
|||
import _pulsar
|
||||
import uuid
|
||||
from pulsar.schema import JsonSchema
|
||||
import logging
|
||||
|
||||
from .. log_level import LogLevel
|
||||
from .pulsar_backend import PulsarBackend
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_pubsub(**config):
|
||||
"""
|
||||
Factory function to create a pub/sub backend based on configuration.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary from command-line args
|
||||
Must include 'pubsub_backend' key
|
||||
|
||||
Returns:
|
||||
Backend instance (PulsarBackend, MQTTBackend, etc.)
|
||||
|
||||
Example:
|
||||
backend = get_pubsub(
|
||||
pubsub_backend='pulsar',
|
||||
pulsar_host='pulsar://localhost:6650'
|
||||
)
|
||||
"""
|
||||
backend_type = config.get('pubsub_backend', 'pulsar')
|
||||
|
||||
if backend_type == 'pulsar':
|
||||
return PulsarBackend(
|
||||
host=config.get('pulsar_host', PulsarClient.default_pulsar_host),
|
||||
api_key=config.get('pulsar_api_key', PulsarClient.default_pulsar_api_key),
|
||||
listener=config.get('pulsar_listener'),
|
||||
)
|
||||
elif backend_type == 'mqtt':
|
||||
# TODO: Implement MQTT backend
|
||||
raise NotImplementedError("MQTT backend not yet implemented")
|
||||
else:
|
||||
raise ValueError(f"Unknown pub/sub backend: {backend_type}")
|
||||
|
||||
|
||||
class PulsarClient:
|
||||
|
||||
|
|
|
|||
350
trustgraph-base/trustgraph/base/pulsar_backend.py
Normal file
350
trustgraph-base/trustgraph/base/pulsar_backend.py
Normal file
|
|
@ -0,0 +1,350 @@
|
|||
"""
|
||||
Pulsar backend implementation for pub/sub abstraction.
|
||||
|
||||
This module provides a Pulsar-specific implementation of the backend interfaces,
|
||||
handling topic mapping, serialization, and Pulsar client management.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
import _pulsar
|
||||
import json
|
||||
import logging
|
||||
import base64
|
||||
import types
|
||||
from dataclasses import asdict, is_dataclass
|
||||
from typing import Any
|
||||
|
||||
from .backend import PubSubBackend, BackendProducer, BackendConsumer, Message
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def dataclass_to_dict(obj: Any) -> dict:
|
||||
"""
|
||||
Recursively convert a dataclass to a dictionary, handling None values and bytes.
|
||||
|
||||
None values are excluded from the dictionary (not serialized).
|
||||
Bytes values are decoded as UTF-8 strings for JSON serialization (matching Pulsar behavior).
|
||||
"""
|
||||
if obj is None:
|
||||
return None
|
||||
|
||||
if is_dataclass(obj):
|
||||
result = {}
|
||||
for key, value in asdict(obj).items():
|
||||
if value is not None:
|
||||
if isinstance(value, bytes):
|
||||
# Decode bytes as UTF-8 for JSON serialization (like Pulsar did)
|
||||
result[key] = value.decode('utf-8')
|
||||
elif is_dataclass(value):
|
||||
result[key] = dataclass_to_dict(value)
|
||||
elif isinstance(value, list):
|
||||
result[key] = [
|
||||
item.decode('utf-8') if isinstance(item, bytes)
|
||||
else dataclass_to_dict(item) if is_dataclass(item)
|
||||
else item
|
||||
for item in value
|
||||
]
|
||||
elif isinstance(value, dict):
|
||||
result[key] = {k: dataclass_to_dict(v) if is_dataclass(v) else v for k, v in value.items()}
|
||||
else:
|
||||
result[key] = value
|
||||
return result
|
||||
return obj
|
||||
|
||||
|
||||
def dict_to_dataclass(data: dict, cls: type) -> Any:
|
||||
"""
|
||||
Convert a dictionary back to a dataclass instance.
|
||||
|
||||
Handles nested dataclasses and missing fields.
|
||||
"""
|
||||
if data is None:
|
||||
return None
|
||||
|
||||
if not is_dataclass(cls):
|
||||
return data
|
||||
|
||||
# Get field types from the dataclass
|
||||
field_types = {f.name: f.type for f in cls.__dataclass_fields__.values()}
|
||||
kwargs = {}
|
||||
|
||||
for key, value in data.items():
|
||||
if key in field_types:
|
||||
field_type = field_types[key]
|
||||
|
||||
# Handle modern union types (X | Y)
|
||||
if isinstance(field_type, types.UnionType):
|
||||
# Check if it's Optional (X | None)
|
||||
if type(None) in field_type.__args__:
|
||||
# Get the non-None type
|
||||
actual_type = next((t for t in field_type.__args__ if t is not type(None)), None)
|
||||
if actual_type and is_dataclass(actual_type) and isinstance(value, dict):
|
||||
kwargs[key] = dict_to_dataclass(value, actual_type)
|
||||
else:
|
||||
kwargs[key] = value
|
||||
else:
|
||||
kwargs[key] = value
|
||||
# Check if this is a generic type (list, dict, etc.)
|
||||
elif hasattr(field_type, '__origin__'):
|
||||
# Handle list[T]
|
||||
if field_type.__origin__ == list:
|
||||
item_type = field_type.__args__[0] if field_type.__args__ else None
|
||||
if item_type and is_dataclass(item_type) and isinstance(value, list):
|
||||
kwargs[key] = [
|
||||
dict_to_dataclass(item, item_type) if isinstance(item, dict) else item
|
||||
for item in value
|
||||
]
|
||||
else:
|
||||
kwargs[key] = value
|
||||
# Handle old-style Optional[T] (which is Union[T, None])
|
||||
elif hasattr(field_type, '__args__') and type(None) in field_type.__args__:
|
||||
# Get the non-None type from Union
|
||||
actual_type = next((t for t in field_type.__args__ if t is not type(None)), None)
|
||||
if actual_type and is_dataclass(actual_type) and isinstance(value, dict):
|
||||
kwargs[key] = dict_to_dataclass(value, actual_type)
|
||||
else:
|
||||
kwargs[key] = value
|
||||
else:
|
||||
kwargs[key] = value
|
||||
# Handle direct dataclass fields
|
||||
elif is_dataclass(field_type) and isinstance(value, dict):
|
||||
kwargs[key] = dict_to_dataclass(value, field_type)
|
||||
# Handle bytes fields (UTF-8 encoded strings from JSON)
|
||||
elif field_type == bytes and isinstance(value, str):
|
||||
kwargs[key] = value.encode('utf-8')
|
||||
else:
|
||||
kwargs[key] = value
|
||||
|
||||
return cls(**kwargs)
|
||||
|
||||
|
||||
class PulsarMessage:
|
||||
"""Wrapper for Pulsar messages to match Message protocol."""
|
||||
|
||||
def __init__(self, pulsar_msg, schema_cls):
|
||||
self._msg = pulsar_msg
|
||||
self._schema_cls = schema_cls
|
||||
self._value = None
|
||||
|
||||
def value(self) -> Any:
|
||||
"""Deserialize and return the message value as a dataclass."""
|
||||
if self._value is None:
|
||||
# Get JSON string from Pulsar message
|
||||
json_data = self._msg.data().decode('utf-8')
|
||||
data_dict = json.loads(json_data)
|
||||
# Convert to dataclass
|
||||
self._value = dict_to_dataclass(data_dict, self._schema_cls)
|
||||
return self._value
|
||||
|
||||
def properties(self) -> dict:
|
||||
"""Return message properties."""
|
||||
return self._msg.properties()
|
||||
|
||||
|
||||
class PulsarBackendProducer:
|
||||
"""Pulsar-specific producer implementation."""
|
||||
|
||||
def __init__(self, pulsar_producer, schema_cls):
|
||||
self._producer = pulsar_producer
|
||||
self._schema_cls = schema_cls
|
||||
|
||||
def send(self, message: Any, properties: dict = {}) -> None:
|
||||
"""Send a dataclass message."""
|
||||
# Convert dataclass to dict, excluding None values
|
||||
data_dict = dataclass_to_dict(message)
|
||||
# Serialize to JSON
|
||||
json_data = json.dumps(data_dict)
|
||||
# Send via Pulsar
|
||||
self._producer.send(json_data.encode('utf-8'), properties=properties)
|
||||
|
||||
def flush(self) -> None:
|
||||
"""Flush buffered messages."""
|
||||
self._producer.flush()
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the producer."""
|
||||
self._producer.close()
|
||||
|
||||
|
||||
class PulsarBackendConsumer:
|
||||
"""Pulsar-specific consumer implementation."""
|
||||
|
||||
def __init__(self, pulsar_consumer, schema_cls):
|
||||
self._consumer = pulsar_consumer
|
||||
self._schema_cls = schema_cls
|
||||
|
||||
def receive(self, timeout_millis: int = 2000) -> Message:
|
||||
"""Receive a message."""
|
||||
pulsar_msg = self._consumer.receive(timeout_millis=timeout_millis)
|
||||
return PulsarMessage(pulsar_msg, self._schema_cls)
|
||||
|
||||
def acknowledge(self, message: Message) -> None:
|
||||
"""Acknowledge a message."""
|
||||
if isinstance(message, PulsarMessage):
|
||||
self._consumer.acknowledge(message._msg)
|
||||
|
||||
def negative_acknowledge(self, message: Message) -> None:
|
||||
"""Negative acknowledge a message."""
|
||||
if isinstance(message, PulsarMessage):
|
||||
self._consumer.negative_acknowledge(message._msg)
|
||||
|
||||
def unsubscribe(self) -> None:
|
||||
"""Unsubscribe from the topic."""
|
||||
self._consumer.unsubscribe()
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the consumer."""
|
||||
self._consumer.close()
|
||||
|
||||
|
||||
class PulsarBackend:
|
||||
"""
|
||||
Pulsar backend implementation.
|
||||
|
||||
Handles topic mapping, client management, and creation of Pulsar-specific
|
||||
producers and consumers.
|
||||
"""
|
||||
|
||||
def __init__(self, host: str, api_key: str = None, listener: str = None):
|
||||
"""
|
||||
Initialize Pulsar backend.
|
||||
|
||||
Args:
|
||||
host: Pulsar broker URL (e.g., pulsar://localhost:6650)
|
||||
api_key: Optional API key for authentication
|
||||
listener: Optional listener name for multi-homed setups
|
||||
"""
|
||||
self.host = host
|
||||
self.api_key = api_key
|
||||
self.listener = listener
|
||||
|
||||
# Create Pulsar client
|
||||
client_args = {'service_url': host}
|
||||
|
||||
if listener:
|
||||
client_args['listener_name'] = listener
|
||||
|
||||
if api_key:
|
||||
client_args['authentication'] = pulsar.AuthenticationToken(api_key)
|
||||
|
||||
self.client = pulsar.Client(**client_args)
|
||||
logger.info(f"Pulsar client connected to {host}")
|
||||
|
||||
def map_topic(self, generic_topic: str) -> str:
|
||||
"""
|
||||
Map generic topic format to Pulsar URI.
|
||||
|
||||
Format: qos/tenant/namespace/queue
|
||||
Example: q1/tg/flow/my-queue -> persistent://tg/flow/my-queue
|
||||
|
||||
Args:
|
||||
generic_topic: Generic topic string or already-formatted Pulsar URI
|
||||
|
||||
Returns:
|
||||
Pulsar topic URI
|
||||
"""
|
||||
# If already a Pulsar URI, return as-is
|
||||
if '://' in generic_topic:
|
||||
return generic_topic
|
||||
|
||||
parts = generic_topic.split('/', 3)
|
||||
if len(parts) != 4:
|
||||
raise ValueError(f"Invalid topic format: {generic_topic}, expected qos/tenant/namespace/queue")
|
||||
|
||||
qos, tenant, namespace, queue = parts
|
||||
|
||||
# Map QoS to persistence
|
||||
if qos == 'q0':
|
||||
persistence = 'non-persistent'
|
||||
elif qos in ['q1', 'q2']:
|
||||
persistence = 'persistent'
|
||||
else:
|
||||
raise ValueError(f"Invalid QoS level: {qos}, expected q0, q1, or q2")
|
||||
|
||||
return f"{persistence}://{tenant}/{namespace}/{queue}"
|
||||
|
||||
def create_producer(self, topic: str, schema: type, **options) -> BackendProducer:
|
||||
"""
|
||||
Create a Pulsar producer.
|
||||
|
||||
Args:
|
||||
topic: Generic topic format (qos/tenant/namespace/queue)
|
||||
schema: Dataclass type for messages
|
||||
**options: Backend-specific options (e.g., chunking_enabled)
|
||||
|
||||
Returns:
|
||||
PulsarBackendProducer instance
|
||||
"""
|
||||
pulsar_topic = self.map_topic(topic)
|
||||
|
||||
producer_args = {
|
||||
'topic': pulsar_topic,
|
||||
'schema': pulsar.schema.BytesSchema(), # We handle serialization ourselves
|
||||
}
|
||||
|
||||
# Add optional parameters
|
||||
if 'chunking_enabled' in options:
|
||||
producer_args['chunking_enabled'] = options['chunking_enabled']
|
||||
|
||||
pulsar_producer = self.client.create_producer(**producer_args)
|
||||
logger.debug(f"Created producer for topic: {pulsar_topic}")
|
||||
|
||||
return PulsarBackendProducer(pulsar_producer, schema)
|
||||
|
||||
def create_consumer(
|
||||
self,
|
||||
topic: str,
|
||||
subscription: str,
|
||||
schema: type,
|
||||
initial_position: str = 'latest',
|
||||
consumer_type: str = 'shared',
|
||||
**options
|
||||
) -> BackendConsumer:
|
||||
"""
|
||||
Create a Pulsar consumer.
|
||||
|
||||
Args:
|
||||
topic: Generic topic format (qos/tenant/namespace/queue)
|
||||
subscription: Subscription name
|
||||
schema: Dataclass type for messages
|
||||
initial_position: 'earliest' or 'latest'
|
||||
consumer_type: 'shared', 'exclusive', or 'failover'
|
||||
**options: Backend-specific options
|
||||
|
||||
Returns:
|
||||
PulsarBackendConsumer instance
|
||||
"""
|
||||
pulsar_topic = self.map_topic(topic)
|
||||
|
||||
# Map initial position
|
||||
if initial_position == 'earliest':
|
||||
pos = pulsar.InitialPosition.Earliest
|
||||
else:
|
||||
pos = pulsar.InitialPosition.Latest
|
||||
|
||||
# Map consumer type
|
||||
if consumer_type == 'exclusive':
|
||||
ctype = pulsar.ConsumerType.Exclusive
|
||||
elif consumer_type == 'failover':
|
||||
ctype = pulsar.ConsumerType.Failover
|
||||
else:
|
||||
ctype = pulsar.ConsumerType.Shared
|
||||
|
||||
consumer_args = {
|
||||
'topic': pulsar_topic,
|
||||
'subscription_name': subscription,
|
||||
'schema': pulsar.schema.BytesSchema(), # We handle deserialization ourselves
|
||||
'initial_position': pos,
|
||||
'consumer_type': ctype,
|
||||
}
|
||||
|
||||
pulsar_consumer = self.client.subscribe(**consumer_args)
|
||||
logger.debug(f"Created consumer for topic: {pulsar_topic}, subscription: {subscription}")
|
||||
|
||||
return PulsarBackendConsumer(pulsar_consumer, schema)
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the Pulsar client."""
|
||||
self.client.close()
|
||||
logger.info("Pulsar client closed")
|
||||
|
|
@ -14,7 +14,7 @@ logger = logging.getLogger(__name__)
|
|||
class RequestResponse(Subscriber):
|
||||
|
||||
def __init__(
|
||||
self, client, subscription, consumer_name,
|
||||
self, backend, subscription, consumer_name,
|
||||
request_topic, request_schema,
|
||||
request_metrics,
|
||||
response_topic, response_schema,
|
||||
|
|
@ -22,7 +22,7 @@ class RequestResponse(Subscriber):
|
|||
):
|
||||
|
||||
super(RequestResponse, self).__init__(
|
||||
client = client,
|
||||
backend = backend,
|
||||
subscription = subscription,
|
||||
consumer_name = consumer_name,
|
||||
topic = response_topic,
|
||||
|
|
@ -31,7 +31,7 @@ class RequestResponse(Subscriber):
|
|||
)
|
||||
|
||||
self.producer = Producer(
|
||||
client = client,
|
||||
backend = backend,
|
||||
topic = request_topic,
|
||||
schema = request_schema,
|
||||
metrics = request_metrics,
|
||||
|
|
@ -126,7 +126,7 @@ class RequestResponseSpec(Spec):
|
|||
)
|
||||
|
||||
rr = self.impl(
|
||||
client = processor.pulsar_client,
|
||||
backend = processor.pubsub,
|
||||
|
||||
# Make subscription names unique, so that all subscribers get
|
||||
# to see all response messages
|
||||
|
|
|
|||
|
|
@ -3,9 +3,7 @@
|
|||
# off of a queue and make it available using an internal broker system,
|
||||
# so suitable for when multiple recipients are reading from the same queue
|
||||
|
||||
from pulsar.schema import JsonSchema
|
||||
import asyncio
|
||||
import _pulsar
|
||||
import time
|
||||
import logging
|
||||
import uuid
|
||||
|
|
@ -13,12 +11,16 @@ import uuid
|
|||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Timeout exception - can come from different backends
|
||||
class TimeoutError(Exception):
|
||||
pass
|
||||
|
||||
class Subscriber:
|
||||
|
||||
def __init__(self, client, topic, subscription, consumer_name,
|
||||
def __init__(self, backend, topic, subscription, consumer_name,
|
||||
schema=None, max_size=100, metrics=None,
|
||||
backpressure_strategy="block", drain_timeout=5.0):
|
||||
self.client = client
|
||||
self.backend = backend # Changed from 'client' to 'backend'
|
||||
self.topic = topic
|
||||
self.subscription = subscription
|
||||
self.consumer_name = consumer_name
|
||||
|
|
@ -43,18 +45,14 @@ class Subscriber:
|
|||
|
||||
async def start(self):
|
||||
|
||||
# Build subscribe arguments
|
||||
subscribe_args = {
|
||||
'topic': self.topic,
|
||||
'subscription_name': self.subscription,
|
||||
'consumer_name': self.consumer_name,
|
||||
}
|
||||
|
||||
# Only add schema if provided (omit if None)
|
||||
if self.schema is not None:
|
||||
subscribe_args['schema'] = JsonSchema(self.schema)
|
||||
|
||||
self.consumer = self.client.subscribe(**subscribe_args)
|
||||
# Create consumer via backend
|
||||
self.consumer = await asyncio.to_thread(
|
||||
self.backend.create_consumer,
|
||||
topic=self.topic,
|
||||
subscription=self.subscription,
|
||||
schema=self.schema,
|
||||
consumer_type='shared',
|
||||
)
|
||||
|
||||
self.task = asyncio.create_task(self.run())
|
||||
|
||||
|
|
@ -94,12 +92,13 @@ class Subscriber:
|
|||
drain_end_time = time.time() + self.drain_timeout
|
||||
logger.info(f"Subscriber entering drain mode, timeout={self.drain_timeout}s")
|
||||
|
||||
# Stop accepting new messages from Pulsar during drain
|
||||
if self.consumer:
|
||||
# Stop accepting new messages during drain
|
||||
# Note: Not all backends support pausing message listeners
|
||||
if self.consumer and hasattr(self.consumer, 'pause_message_listener'):
|
||||
try:
|
||||
self.consumer.pause_message_listener()
|
||||
except _pulsar.InvalidConfiguration:
|
||||
# Not all consumers have message listeners (e.g., blocking receive mode)
|
||||
except Exception:
|
||||
# Not all consumers support message listeners
|
||||
pass
|
||||
|
||||
# Check drain timeout
|
||||
|
|
@ -133,9 +132,10 @@ class Subscriber:
|
|||
self.consumer.receive,
|
||||
timeout_millis=250
|
||||
)
|
||||
except _pulsar.Timeout:
|
||||
continue
|
||||
except Exception as e:
|
||||
# Handle timeout from any backend
|
||||
if 'timeout' in str(type(e)).lower() or 'timeout' in str(e).lower():
|
||||
continue
|
||||
logger.error(f"Exception in subscriber receive: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
|
|
@ -157,19 +157,20 @@ class Subscriber:
|
|||
for msg in self.pending_acks.values():
|
||||
try:
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
except _pulsar.AlreadyClosed:
|
||||
pass # Consumer already closed
|
||||
except Exception:
|
||||
pass # Consumer already closed or error
|
||||
self.pending_acks.clear()
|
||||
|
||||
if self.consumer:
|
||||
try:
|
||||
self.consumer.unsubscribe()
|
||||
except _pulsar.AlreadyClosed:
|
||||
pass # Already closed
|
||||
if hasattr(self.consumer, 'unsubscribe'):
|
||||
try:
|
||||
self.consumer.unsubscribe()
|
||||
except Exception:
|
||||
pass # Already closed or error
|
||||
try:
|
||||
self.consumer.close()
|
||||
except _pulsar.AlreadyClosed:
|
||||
pass # Already closed
|
||||
except Exception:
|
||||
pass # Already closed or error
|
||||
self.consumer = None
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ class SubscriberSpec(Spec):
|
|||
)
|
||||
|
||||
subscriber = Subscriber(
|
||||
client = processor.pulsar_client,
|
||||
backend = processor.pubsub,
|
||||
topic = definition[self.name],
|
||||
subscription = flow.id,
|
||||
consumer_name = flow.id,
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import time
|
|||
from pulsar.schema import JsonSchema
|
||||
|
||||
from .. exceptions import *
|
||||
from ..base.pubsub import get_pubsub
|
||||
|
||||
# Default timeout for a request/response. In seconds.
|
||||
DEFAULT_TIMEOUT=300
|
||||
|
|
@ -39,30 +40,25 @@ class BaseClient:
|
|||
if subscriber == None:
|
||||
subscriber = str(uuid.uuid4())
|
||||
|
||||
if pulsar_api_key:
|
||||
auth = pulsar.AuthenticationToken(pulsar_api_key)
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level),
|
||||
authentication=auth,
|
||||
listener=listener,
|
||||
)
|
||||
else:
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level),
|
||||
listener_name=listener,
|
||||
)
|
||||
# Create backend using factory
|
||||
self.backend = get_pubsub(
|
||||
pulsar_host=pulsar_host,
|
||||
pulsar_api_key=pulsar_api_key,
|
||||
pulsar_listener=listener,
|
||||
pubsub_backend='pulsar'
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
self.producer = self.backend.create_producer(
|
||||
topic=input_queue,
|
||||
schema=JsonSchema(input_schema),
|
||||
schema=input_schema,
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
output_queue, subscriber,
|
||||
schema=JsonSchema(output_schema),
|
||||
self.consumer = self.backend.create_consumer(
|
||||
topic=output_queue,
|
||||
subscription=subscriber,
|
||||
schema=output_schema,
|
||||
consumer_type='shared',
|
||||
)
|
||||
|
||||
self.input_schema = input_schema
|
||||
|
|
@ -136,10 +132,11 @@ class BaseClient:
|
|||
|
||||
if hasattr(self, "consumer"):
|
||||
self.consumer.close()
|
||||
|
||||
|
||||
if hasattr(self, "producer"):
|
||||
self.producer.flush()
|
||||
self.producer.close()
|
||||
|
||||
self.client.close()
|
||||
|
||||
if hasattr(self, "backend"):
|
||||
self.backend.close()
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,6 @@ class ConfigClient(BaseClient):
|
|||
def get(self, keys, timeout=300):
|
||||
|
||||
resp = self.call(
|
||||
id=id,
|
||||
operation="get",
|
||||
keys=[
|
||||
ConfigKey(
|
||||
|
|
@ -88,7 +87,6 @@ class ConfigClient(BaseClient):
|
|||
def list(self, type, timeout=300):
|
||||
|
||||
resp = self.call(
|
||||
id=id,
|
||||
operation="list",
|
||||
type=type,
|
||||
timeout=timeout
|
||||
|
|
@ -99,7 +97,6 @@ class ConfigClient(BaseClient):
|
|||
def getvalues(self, type, timeout=300):
|
||||
|
||||
resp = self.call(
|
||||
id=id,
|
||||
operation="getvalues",
|
||||
type=type,
|
||||
timeout=timeout
|
||||
|
|
@ -117,7 +114,6 @@ class ConfigClient(BaseClient):
|
|||
def delete(self, keys, timeout=300):
|
||||
|
||||
resp = self.call(
|
||||
id=id,
|
||||
operation="delete",
|
||||
keys=[
|
||||
ConfigKey(
|
||||
|
|
@ -134,7 +130,6 @@ class ConfigClient(BaseClient):
|
|||
def put(self, values, timeout=300):
|
||||
|
||||
resp = self.call(
|
||||
id=id,
|
||||
operation="put",
|
||||
values=[
|
||||
ConfigValue(
|
||||
|
|
@ -152,7 +147,6 @@ class ConfigClient(BaseClient):
|
|||
def config(self, timeout=300):
|
||||
|
||||
resp = self.call(
|
||||
id=id,
|
||||
operation="config",
|
||||
timeout=timeout
|
||||
)
|
||||
|
|
|
|||
|
|
@ -34,14 +34,12 @@ class DocumentRagResponseTranslator(MessageTranslator):
|
|||
def from_pulsar(self, obj: DocumentRagResponse) -> Dict[str, Any]:
|
||||
result = {}
|
||||
|
||||
# Check if this is a streaming response (has chunk)
|
||||
if hasattr(obj, 'chunk') and obj.chunk:
|
||||
result["chunk"] = obj.chunk
|
||||
result["end_of_stream"] = getattr(obj, "end_of_stream", False)
|
||||
else:
|
||||
# Non-streaming response
|
||||
if obj.response:
|
||||
result["response"] = obj.response
|
||||
# Include response content (chunk or complete)
|
||||
if obj.response:
|
||||
result["response"] = obj.response
|
||||
|
||||
# Include end_of_stream flag
|
||||
result["end_of_stream"] = getattr(obj, "end_of_stream", False)
|
||||
|
||||
# Always include error if present
|
||||
if hasattr(obj, 'error') and obj.error and obj.error.message:
|
||||
|
|
@ -51,13 +49,7 @@ class DocumentRagResponseTranslator(MessageTranslator):
|
|||
|
||||
def from_response_with_completion(self, obj: DocumentRagResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
"""Returns (response_dict, is_final)"""
|
||||
# For streaming responses, check end_of_stream
|
||||
if hasattr(obj, 'chunk') and obj.chunk:
|
||||
is_final = getattr(obj, 'end_of_stream', False)
|
||||
else:
|
||||
# For non-streaming responses, it's always final
|
||||
is_final = True
|
||||
|
||||
is_final = getattr(obj, 'end_of_stream', False)
|
||||
return self.from_pulsar(obj), is_final
|
||||
|
||||
|
||||
|
|
@ -98,14 +90,12 @@ class GraphRagResponseTranslator(MessageTranslator):
|
|||
def from_pulsar(self, obj: GraphRagResponse) -> Dict[str, Any]:
|
||||
result = {}
|
||||
|
||||
# Check if this is a streaming response (has chunk)
|
||||
if hasattr(obj, 'chunk') and obj.chunk:
|
||||
result["chunk"] = obj.chunk
|
||||
result["end_of_stream"] = getattr(obj, "end_of_stream", False)
|
||||
else:
|
||||
# Non-streaming response
|
||||
if obj.response:
|
||||
result["response"] = obj.response
|
||||
# Include response content (chunk or complete)
|
||||
if obj.response:
|
||||
result["response"] = obj.response
|
||||
|
||||
# Include end_of_stream flag
|
||||
result["end_of_stream"] = getattr(obj, "end_of_stream", False)
|
||||
|
||||
# Always include error if present
|
||||
if hasattr(obj, 'error') and obj.error and obj.error.message:
|
||||
|
|
@ -115,11 +105,5 @@ class GraphRagResponseTranslator(MessageTranslator):
|
|||
|
||||
def from_response_with_completion(self, obj: GraphRagResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
"""Returns (response_dict, is_final)"""
|
||||
# For streaming responses, check end_of_stream
|
||||
if hasattr(obj, 'chunk') and obj.chunk:
|
||||
is_final = getattr(obj, 'end_of_stream', False)
|
||||
else:
|
||||
# For non-streaming responses, it's always final
|
||||
is_final = True
|
||||
|
||||
is_final = getattr(obj, 'end_of_stream', False)
|
||||
return self.from_pulsar(obj), is_final
|
||||
|
|
@ -1,16 +1,14 @@
|
|||
|
||||
from pulsar.schema import Record, String, Array
|
||||
from dataclasses import dataclass, field
|
||||
from .primitives import Triple
|
||||
|
||||
class Metadata(Record):
|
||||
|
||||
@dataclass
|
||||
class Metadata:
|
||||
# Source identifier
|
||||
id = String()
|
||||
id: str = ""
|
||||
|
||||
# Subgraph
|
||||
metadata = Array(Triple())
|
||||
metadata: list[Triple] = field(default_factory=list)
|
||||
|
||||
# Collection management
|
||||
user = String()
|
||||
collection = String()
|
||||
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
|
|
|
|||
|
|
@ -1,34 +1,39 @@
|
|||
|
||||
from pulsar.schema import Record, String, Boolean, Array, Integer
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
class Error(Record):
|
||||
type = String()
|
||||
message = String()
|
||||
@dataclass
|
||||
class Error:
|
||||
type: str = ""
|
||||
message: str = ""
|
||||
|
||||
class Value(Record):
|
||||
value = String()
|
||||
is_uri = Boolean()
|
||||
type = String()
|
||||
@dataclass
|
||||
class Value:
|
||||
value: str = ""
|
||||
is_uri: bool = False
|
||||
type: str = ""
|
||||
|
||||
class Triple(Record):
|
||||
s = Value()
|
||||
p = Value()
|
||||
o = Value()
|
||||
@dataclass
|
||||
class Triple:
|
||||
s: Value | None = None
|
||||
p: Value | None = None
|
||||
o: Value | None = None
|
||||
|
||||
class Field(Record):
|
||||
name = String()
|
||||
@dataclass
|
||||
class Field:
|
||||
name: str = ""
|
||||
# int, string, long, bool, float, double, timestamp
|
||||
type = String()
|
||||
size = Integer()
|
||||
primary = Boolean()
|
||||
description = String()
|
||||
type: str = ""
|
||||
size: int = 0
|
||||
primary: bool = False
|
||||
description: str = ""
|
||||
# NEW FIELDS for structured data:
|
||||
required = Boolean() # Whether field is required
|
||||
enum_values = Array(String()) # For enum type fields
|
||||
indexed = Boolean() # Whether field should be indexed
|
||||
required: bool = False # Whether field is required
|
||||
enum_values: list[str] = field(default_factory=list) # For enum type fields
|
||||
indexed: bool = False # Whether field should be indexed
|
||||
|
||||
class RowSchema(Record):
|
||||
name = String()
|
||||
description = String()
|
||||
fields = Array(Field())
|
||||
@dataclass
|
||||
class RowSchema:
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
fields: list[Field] = field(default_factory=list)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,23 @@
|
|||
|
||||
def topic(topic, kind='persistent', tenant='tg', namespace='flow'):
|
||||
return f"{kind}://{tenant}/{namespace}/{topic}"
|
||||
def topic(queue_name, qos='q1', tenant='tg', namespace='flow'):
|
||||
"""
|
||||
Create a generic topic identifier that can be mapped by backends.
|
||||
|
||||
Args:
|
||||
queue_name: The queue/topic name
|
||||
qos: Quality of service
|
||||
- 'q0' = best-effort (no ack)
|
||||
- 'q1' = at-least-once (ack required)
|
||||
- 'q2' = exactly-once (two-phase ack)
|
||||
tenant: Tenant identifier for multi-tenancy
|
||||
namespace: Namespace within tenant
|
||||
|
||||
Returns:
|
||||
Generic topic string: qos/tenant/namespace/queue_name
|
||||
|
||||
Examples:
|
||||
topic('my-queue') # q1/tg/flow/my-queue
|
||||
topic('config', qos='q2', namespace='config') # q2/tg/config/config
|
||||
"""
|
||||
return f"{qos}/{tenant}/{namespace}/{queue_name}"
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from pulsar.schema import Record, Bytes
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.topic import topic
|
||||
|
|
@ -6,24 +6,27 @@ from ..core.topic import topic
|
|||
############################################################################
|
||||
|
||||
# PDF docs etc.
|
||||
class Document(Record):
|
||||
metadata = Metadata()
|
||||
data = Bytes()
|
||||
@dataclass
|
||||
class Document:
|
||||
metadata: Metadata | None = None
|
||||
data: bytes = b""
|
||||
|
||||
############################################################################
|
||||
|
||||
# Text documents / text from PDF
|
||||
|
||||
class TextDocument(Record):
|
||||
metadata = Metadata()
|
||||
text = Bytes()
|
||||
@dataclass
|
||||
class TextDocument:
|
||||
metadata: Metadata | None = None
|
||||
text: bytes = b""
|
||||
|
||||
############################################################################
|
||||
|
||||
# Chunks of text
|
||||
|
||||
class Chunk(Record):
|
||||
metadata = Metadata()
|
||||
chunk = Bytes()
|
||||
@dataclass
|
||||
class Chunk:
|
||||
metadata: Metadata | None = None
|
||||
chunk: bytes = b""
|
||||
|
||||
############################################################################
|
||||
############################################################################
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double, Map
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.primitives import Value, RowSchema
|
||||
|
|
@ -8,49 +8,55 @@ from ..core.topic import topic
|
|||
|
||||
# Graph embeddings are embeddings associated with a graph entity
|
||||
|
||||
class EntityEmbeddings(Record):
|
||||
entity = Value()
|
||||
vectors = Array(Array(Double()))
|
||||
@dataclass
|
||||
class EntityEmbeddings:
|
||||
entity: Value | None = None
|
||||
vectors: list[list[float]] = field(default_factory=list)
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
class GraphEmbeddings(Record):
|
||||
metadata = Metadata()
|
||||
entities = Array(EntityEmbeddings())
|
||||
@dataclass
|
||||
class GraphEmbeddings:
|
||||
metadata: Metadata | None = None
|
||||
entities: list[EntityEmbeddings] = field(default_factory=list)
|
||||
|
||||
############################################################################
|
||||
|
||||
# Document embeddings are embeddings associated with a chunk
|
||||
|
||||
class ChunkEmbeddings(Record):
|
||||
chunk = Bytes()
|
||||
vectors = Array(Array(Double()))
|
||||
@dataclass
|
||||
class ChunkEmbeddings:
|
||||
chunk: bytes = b""
|
||||
vectors: list[list[float]] = field(default_factory=list)
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
class DocumentEmbeddings(Record):
|
||||
metadata = Metadata()
|
||||
chunks = Array(ChunkEmbeddings())
|
||||
@dataclass
|
||||
class DocumentEmbeddings:
|
||||
metadata: Metadata | None = None
|
||||
chunks: list[ChunkEmbeddings] = field(default_factory=list)
|
||||
|
||||
############################################################################
|
||||
|
||||
# Object embeddings are embeddings associated with the primary key of an
|
||||
# object
|
||||
|
||||
class ObjectEmbeddings(Record):
|
||||
metadata = Metadata()
|
||||
vectors = Array(Array(Double()))
|
||||
name = String()
|
||||
key_name = String()
|
||||
id = String()
|
||||
@dataclass
|
||||
class ObjectEmbeddings:
|
||||
metadata: Metadata | None = None
|
||||
vectors: list[list[float]] = field(default_factory=list)
|
||||
name: str = ""
|
||||
key_name: str = ""
|
||||
id: str = ""
|
||||
|
||||
############################################################################
|
||||
|
||||
# Structured object embeddings with enhanced capabilities
|
||||
|
||||
class StructuredObjectEmbedding(Record):
|
||||
metadata = Metadata()
|
||||
vectors = Array(Array(Double()))
|
||||
schema_name = String()
|
||||
object_id = String() # Primary key value
|
||||
field_embeddings = Map(Array(Double())) # Per-field embeddings
|
||||
@dataclass
|
||||
class StructuredObjectEmbedding:
|
||||
metadata: Metadata | None = None
|
||||
vectors: list[list[float]] = field(default_factory=list)
|
||||
schema_name: str = ""
|
||||
object_id: str = "" # Primary key value
|
||||
field_embeddings: dict[str, list[float]] = field(default_factory=dict) # Per-field embeddings
|
||||
|
||||
############################################################################
|
||||
############################################################################
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from pulsar.schema import Record, String, Array
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.primitives import Value, Triple
|
||||
from ..core.metadata import Metadata
|
||||
|
|
@ -8,21 +8,24 @@ from ..core.topic import topic
|
|||
|
||||
# Entity context are an entity associated with textual context
|
||||
|
||||
class EntityContext(Record):
|
||||
entity = Value()
|
||||
context = String()
|
||||
@dataclass
|
||||
class EntityContext:
|
||||
entity: Value | None = None
|
||||
context: str = ""
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
class EntityContexts(Record):
|
||||
metadata = Metadata()
|
||||
entities = Array(EntityContext())
|
||||
@dataclass
|
||||
class EntityContexts:
|
||||
metadata: Metadata | None = None
|
||||
entities: list[EntityContext] = field(default_factory=list)
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph triples
|
||||
|
||||
class Triples(Record):
|
||||
metadata = Metadata()
|
||||
triples = Array(Triple())
|
||||
@dataclass
|
||||
class Triples:
|
||||
metadata: Metadata | None = None
|
||||
triples: list[Triple] = field(default_factory=list)
|
||||
|
||||
############################################################################
|
||||
############################################################################
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Array, Long, Boolean
|
||||
from dataclasses import dataclass, field
|
||||
from ..core.primitives import Triple, Error
|
||||
from ..core.topic import topic
|
||||
from ..core.metadata import Metadata
|
||||
|
|
@ -22,40 +21,40 @@ from .embeddings import GraphEmbeddings
|
|||
# <- ()
|
||||
# <- (error)
|
||||
|
||||
class KnowledgeRequest(Record):
|
||||
|
||||
@dataclass
|
||||
class KnowledgeRequest:
|
||||
# get-kg-core, delete-kg-core, list-kg-cores, put-kg-core
|
||||
# load-kg-core, unload-kg-core
|
||||
operation = String()
|
||||
operation: str = ""
|
||||
|
||||
# list-kg-cores, delete-kg-core, put-kg-core
|
||||
user = String()
|
||||
user: str = ""
|
||||
|
||||
# get-kg-core, list-kg-cores, delete-kg-core, put-kg-core,
|
||||
# load-kg-core, unload-kg-core
|
||||
id = String()
|
||||
id: str = ""
|
||||
|
||||
# load-kg-core
|
||||
flow = String()
|
||||
flow: str = ""
|
||||
|
||||
# load-kg-core
|
||||
collection = String()
|
||||
collection: str = ""
|
||||
|
||||
# put-kg-core
|
||||
triples = Triples()
|
||||
graph_embeddings = GraphEmbeddings()
|
||||
triples: Triples | None = None
|
||||
graph_embeddings: GraphEmbeddings | None = None
|
||||
|
||||
class KnowledgeResponse(Record):
|
||||
error = Error()
|
||||
ids = Array(String())
|
||||
eos = Boolean() # Indicates end of knowledge core stream
|
||||
triples = Triples()
|
||||
graph_embeddings = GraphEmbeddings()
|
||||
@dataclass
|
||||
class KnowledgeResponse:
|
||||
error: Error | None = None
|
||||
ids: list[str] = field(default_factory=list)
|
||||
eos: bool = False # Indicates end of knowledge core stream
|
||||
triples: Triples | None = None
|
||||
graph_embeddings: GraphEmbeddings | None = None
|
||||
|
||||
knowledge_request_queue = topic(
|
||||
'knowledge', kind='non-persistent', namespace='request'
|
||||
'knowledge', qos='q0', namespace='request'
|
||||
)
|
||||
knowledge_response_queue = topic(
|
||||
'knowledge', kind='non-persistent', namespace='response',
|
||||
'knowledge', qos='q0', namespace='response',
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from pulsar.schema import Record, String, Boolean
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ..core.topic import topic
|
||||
|
||||
|
|
@ -6,21 +6,25 @@ from ..core.topic import topic
|
|||
|
||||
# NLP extraction data types
|
||||
|
||||
class Definition(Record):
|
||||
name = String()
|
||||
definition = String()
|
||||
@dataclass
|
||||
class Definition:
|
||||
name: str = ""
|
||||
definition: str = ""
|
||||
|
||||
class Topic(Record):
|
||||
name = String()
|
||||
definition = String()
|
||||
@dataclass
|
||||
class Topic:
|
||||
name: str = ""
|
||||
definition: str = ""
|
||||
|
||||
class Relationship(Record):
|
||||
s = String()
|
||||
p = String()
|
||||
o = String()
|
||||
o_entity = Boolean()
|
||||
@dataclass
|
||||
class Relationship:
|
||||
s: str = ""
|
||||
p: str = ""
|
||||
o: str = ""
|
||||
o_entity: bool = False
|
||||
|
||||
class Fact(Record):
|
||||
s = String()
|
||||
p = String()
|
||||
o = String()
|
||||
@dataclass
|
||||
class Fact:
|
||||
s: str = ""
|
||||
p: str = ""
|
||||
o: str = ""
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from pulsar.schema import Record, String, Map, Double, Array
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.topic import topic
|
||||
|
|
@ -7,11 +7,13 @@ from ..core.topic import topic
|
|||
|
||||
# Extracted object from text processing
|
||||
|
||||
class ExtractedObject(Record):
|
||||
metadata = Metadata()
|
||||
schema_name = String() # Which schema this object belongs to
|
||||
values = Array(Map(String())) # Array of objects, each object is field name -> value
|
||||
confidence = Double()
|
||||
source_span = String() # Text span where object was found
|
||||
@dataclass
|
||||
class ExtractedObject:
|
||||
metadata: Metadata | None = None
|
||||
schema_name: str = "" # Which schema this object belongs to
|
||||
values: list[dict[str, str]] = field(default_factory=list) # Array of objects, each object is field name -> value
|
||||
confidence: float = 0.0
|
||||
source_span: str = "" # Text span where object was found
|
||||
|
||||
############################################################################
|
||||
|
||||
############################################################################
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
from pulsar.schema import Record, Array, Map, String
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.primitives import RowSchema
|
||||
|
|
@ -8,9 +8,10 @@ from ..core.topic import topic
|
|||
|
||||
# Stores rows of information
|
||||
|
||||
class Rows(Record):
|
||||
metadata = Metadata()
|
||||
row_schema = RowSchema()
|
||||
rows = Array(Map(String()))
|
||||
@dataclass
|
||||
class Rows:
|
||||
metadata: Metadata | None = None
|
||||
row_schema: RowSchema | None = None
|
||||
rows: list[dict[str, str]] = field(default_factory=list)
|
||||
|
||||
############################################################################
|
||||
############################################################################
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from pulsar.schema import Record, String, Bytes, Map
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.topic import topic
|
||||
|
|
@ -7,11 +7,13 @@ from ..core.topic import topic
|
|||
|
||||
# Structured data submission for fire-and-forget processing
|
||||
|
||||
class StructuredDataSubmission(Record):
|
||||
metadata = Metadata()
|
||||
format = String() # "json", "csv", "xml"
|
||||
schema_name = String() # Reference to schema in config
|
||||
data = Bytes() # Raw data to ingest
|
||||
options = Map(String()) # Format-specific options
|
||||
@dataclass
|
||||
class StructuredDataSubmission:
|
||||
metadata: Metadata | None = None
|
||||
format: str = "" # "json", "csv", "xml"
|
||||
schema_name: str = "" # Reference to schema in config
|
||||
data: bytes = b"" # Raw data to ingest
|
||||
options: dict[str, str] = field(default_factory=dict) # Format-specific options
|
||||
|
||||
############################################################################
|
||||
|
||||
############################################################################
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
from pulsar.schema import Record, String, Array, Map, Boolean
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error
|
||||
|
|
@ -8,33 +8,36 @@ from ..core.primitives import Error
|
|||
|
||||
# Prompt services, abstract the prompt generation
|
||||
|
||||
class AgentStep(Record):
|
||||
thought = String()
|
||||
action = String()
|
||||
arguments = Map(String())
|
||||
observation = String()
|
||||
user = String() # User context for the step
|
||||
@dataclass
|
||||
class AgentStep:
|
||||
thought: str = ""
|
||||
action: str = ""
|
||||
arguments: dict[str, str] = field(default_factory=dict)
|
||||
observation: str = ""
|
||||
user: str = "" # User context for the step
|
||||
|
||||
class AgentRequest(Record):
|
||||
question = String()
|
||||
state = String()
|
||||
group = Array(String())
|
||||
history = Array(AgentStep())
|
||||
user = String() # User context for multi-tenancy
|
||||
streaming = Boolean() # NEW: Enable streaming response delivery (default false)
|
||||
@dataclass
|
||||
class AgentRequest:
|
||||
question: str = ""
|
||||
state: str = ""
|
||||
group: list[str] | None = None
|
||||
history: list[AgentStep] = field(default_factory=list)
|
||||
user: str = "" # User context for multi-tenancy
|
||||
streaming: bool = False # NEW: Enable streaming response delivery (default false)
|
||||
|
||||
class AgentResponse(Record):
|
||||
@dataclass
|
||||
class AgentResponse:
|
||||
# Streaming-first design
|
||||
chunk_type = String() # "thought", "action", "observation", "answer", "error"
|
||||
content = String() # The actual content (interpretation depends on chunk_type)
|
||||
end_of_message = Boolean() # Current chunk type (thought/action/etc.) is complete
|
||||
end_of_dialog = Boolean() # Entire agent dialog is complete
|
||||
chunk_type: str = "" # "thought", "action", "observation", "answer", "error"
|
||||
content: str = "" # The actual content (interpretation depends on chunk_type)
|
||||
end_of_message: bool = False # Current chunk type (thought/action/etc.) is complete
|
||||
end_of_dialog: bool = False # Entire agent dialog is complete
|
||||
|
||||
# Legacy fields (deprecated but kept for backward compatibility)
|
||||
answer = String()
|
||||
error = Error()
|
||||
thought = String()
|
||||
observation = String()
|
||||
answer: str = ""
|
||||
error: Error | None = None
|
||||
thought: str = ""
|
||||
observation: str = ""
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from pulsar.schema import Record, String, Integer, Array
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
from ..core.primitives import Error
|
||||
|
|
@ -10,37 +10,40 @@ from ..core.topic import topic
|
|||
|
||||
# Collection metadata operations (for librarian service)
|
||||
|
||||
class CollectionMetadata(Record):
|
||||
@dataclass
|
||||
class CollectionMetadata:
|
||||
"""Collection metadata record"""
|
||||
user = String()
|
||||
collection = String()
|
||||
name = String()
|
||||
description = String()
|
||||
tags = Array(String())
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
tags: list[str] = field(default_factory=list)
|
||||
|
||||
############################################################################
|
||||
|
||||
class CollectionManagementRequest(Record):
|
||||
@dataclass
|
||||
class CollectionManagementRequest:
|
||||
"""Request for collection management operations"""
|
||||
operation = String() # e.g., "delete-collection"
|
||||
operation: str = "" # e.g., "delete-collection"
|
||||
|
||||
# For 'list-collections'
|
||||
user = String()
|
||||
collection = String()
|
||||
timestamp = String() # ISO timestamp
|
||||
name = String()
|
||||
description = String()
|
||||
tags = Array(String())
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
timestamp: str = "" # ISO timestamp
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
tags: list[str] = field(default_factory=list)
|
||||
|
||||
# For list
|
||||
tag_filter = Array(String()) # Optional filter by tags
|
||||
limit = Integer()
|
||||
tag_filter: list[str] = field(default_factory=list) # Optional filter by tags
|
||||
limit: int = 0
|
||||
|
||||
class CollectionManagementResponse(Record):
|
||||
@dataclass
|
||||
class CollectionManagementResponse:
|
||||
"""Response for collection management operations"""
|
||||
error = Error() # Only populated if there's an error
|
||||
timestamp = String() # ISO timestamp
|
||||
collections = Array(CollectionMetadata())
|
||||
error: Error | None = None # Only populated if there's an error
|
||||
timestamp: str = "" # ISO timestamp
|
||||
collections: list[CollectionMetadata] = field(default_factory=list)
|
||||
|
||||
|
||||
############################################################################
|
||||
|
|
@ -48,8 +51,9 @@ class CollectionManagementResponse(Record):
|
|||
# Topics
|
||||
|
||||
collection_request_queue = topic(
|
||||
'collection', kind='non-persistent', namespace='request'
|
||||
'collection', qos='q0', namespace='request'
|
||||
)
|
||||
collection_response_queue = topic(
|
||||
'collection', kind='non-persistent', namespace='response'
|
||||
'collection', qos='q0', namespace='response'
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error
|
||||
|
|
@ -13,58 +13,61 @@ from ..core.primitives import Error
|
|||
# put(values) -> ()
|
||||
# delete(keys) -> ()
|
||||
# config() -> (version, config)
|
||||
class ConfigKey(Record):
|
||||
type = String()
|
||||
key = String()
|
||||
@dataclass
|
||||
class ConfigKey:
|
||||
type: str = ""
|
||||
key: str = ""
|
||||
|
||||
class ConfigValue(Record):
|
||||
type = String()
|
||||
key = String()
|
||||
value = String()
|
||||
@dataclass
|
||||
class ConfigValue:
|
||||
type: str = ""
|
||||
key: str = ""
|
||||
value: str = ""
|
||||
|
||||
# Prompt services, abstract the prompt generation
|
||||
class ConfigRequest(Record):
|
||||
|
||||
operation = String() # get, list, getvalues, delete, put, config
|
||||
@dataclass
|
||||
class ConfigRequest:
|
||||
operation: str = "" # get, list, getvalues, delete, put, config
|
||||
|
||||
# get, delete
|
||||
keys = Array(ConfigKey())
|
||||
keys: list[ConfigKey] = field(default_factory=list)
|
||||
|
||||
# list, getvalues
|
||||
type = String()
|
||||
type: str = ""
|
||||
|
||||
# put
|
||||
values = Array(ConfigValue())
|
||||
|
||||
class ConfigResponse(Record):
|
||||
values: list[ConfigValue] = field(default_factory=list)
|
||||
|
||||
@dataclass
|
||||
class ConfigResponse:
|
||||
# get, list, getvalues, config
|
||||
version = Integer()
|
||||
version: int = 0
|
||||
|
||||
# get, getvalues
|
||||
values = Array(ConfigValue())
|
||||
values: list[ConfigValue] = field(default_factory=list)
|
||||
|
||||
# list
|
||||
directory = Array(String())
|
||||
directory: list[str] = field(default_factory=list)
|
||||
|
||||
# config
|
||||
config = Map(Map(String()))
|
||||
config: dict[str, dict[str, str]] = field(default_factory=dict)
|
||||
|
||||
# Everything
|
||||
error = Error()
|
||||
error: Error | None = None
|
||||
|
||||
class ConfigPush(Record):
|
||||
version = Integer()
|
||||
config = Map(Map(String()))
|
||||
@dataclass
|
||||
class ConfigPush:
|
||||
version: int = 0
|
||||
config: dict[str, dict[str, str]] = field(default_factory=dict)
|
||||
|
||||
config_request_queue = topic(
|
||||
'config', kind='non-persistent', namespace='request'
|
||||
'config', qos='q0', namespace='request'
|
||||
)
|
||||
config_response_queue = topic(
|
||||
'config', kind='non-persistent', namespace='response'
|
||||
'config', qos='q0', namespace='response'
|
||||
)
|
||||
config_push_queue = topic(
|
||||
'config', kind='persistent', namespace='config'
|
||||
'config', qos='q2', namespace='config'
|
||||
)
|
||||
|
||||
############################################################################
|
||||
|
|
|
|||
|
|
@ -1,33 +1,36 @@
|
|||
from pulsar.schema import Record, String, Map, Double, Array
|
||||
from dataclasses import dataclass, field
|
||||
from ..core.primitives import Error
|
||||
|
||||
############################################################################
|
||||
|
||||
# Structured data diagnosis services
|
||||
|
||||
class StructuredDataDiagnosisRequest(Record):
|
||||
operation = String() # "detect-type", "generate-descriptor", "diagnose", or "schema-selection"
|
||||
sample = String() # Data sample to analyze (text content)
|
||||
type = String() # Data type (csv, json, xml) - optional, required for generate-descriptor
|
||||
schema_name = String() # Target schema name for descriptor generation - optional
|
||||
@dataclass
|
||||
class StructuredDataDiagnosisRequest:
|
||||
operation: str = "" # "detect-type", "generate-descriptor", "diagnose", or "schema-selection"
|
||||
sample: str = "" # Data sample to analyze (text content)
|
||||
type: str = "" # Data type (csv, json, xml) - optional, required for generate-descriptor
|
||||
schema_name: str = "" # Target schema name for descriptor generation - optional
|
||||
|
||||
# JSON encoded options (e.g., delimiter for CSV)
|
||||
options = Map(String())
|
||||
options: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
class StructuredDataDiagnosisResponse(Record):
|
||||
error = Error()
|
||||
@dataclass
|
||||
class StructuredDataDiagnosisResponse:
|
||||
error: Error | None = None
|
||||
|
||||
operation = String() # The operation that was performed
|
||||
detected_type = String() # Detected data type (for detect-type/diagnose) - optional
|
||||
confidence = Double() # Confidence score for type detection - optional
|
||||
operation: str = "" # The operation that was performed
|
||||
detected_type: str = "" # Detected data type (for detect-type/diagnose) - optional
|
||||
confidence: float = 0.0 # Confidence score for type detection - optional
|
||||
|
||||
# JSON encoded descriptor (for generate-descriptor/diagnose) - optional
|
||||
descriptor = String()
|
||||
descriptor: str = ""
|
||||
|
||||
# JSON encoded additional metadata (e.g., field count, sample records)
|
||||
metadata = Map(String())
|
||||
metadata: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# Array of matching schema IDs (for schema-selection operation) - optional
|
||||
schema_matches = Array(String())
|
||||
schema_matches: list[str] = field(default_factory=list)
|
||||
|
||||
############################################################################
|
||||
|
||||
############################################################################
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error
|
||||
|
|
@ -11,61 +11,61 @@ from ..core.primitives import Error
|
|||
# get_class(classname) -> (class)
|
||||
# put_class(class) -> (class)
|
||||
# delete_class(classname) -> ()
|
||||
#
|
||||
#
|
||||
# list_flows() -> (flowid[])
|
||||
# get_flow(flowid) -> (flow)
|
||||
# start_flow(flowid, classname) -> ()
|
||||
# stop_flow(flowid) -> ()
|
||||
|
||||
# Prompt services, abstract the prompt generation
|
||||
class FlowRequest(Record):
|
||||
|
||||
operation = String() # list-classes, get-class, put-class, delete-class
|
||||
@dataclass
|
||||
class FlowRequest:
|
||||
operation: str = "" # list-classes, get-class, put-class, delete-class
|
||||
# list-flows, get-flow, start-flow, stop-flow
|
||||
|
||||
# get_class, put_class, delete_class, start_flow
|
||||
class_name = String()
|
||||
class_name: str = ""
|
||||
|
||||
# put_class
|
||||
class_definition = String()
|
||||
class_definition: str = ""
|
||||
|
||||
# start_flow
|
||||
description = String()
|
||||
description: str = ""
|
||||
|
||||
# get_flow, start_flow, stop_flow
|
||||
flow_id = String()
|
||||
flow_id: str = ""
|
||||
|
||||
# start_flow - optional parameters for flow customization
|
||||
parameters = Map(String())
|
||||
|
||||
class FlowResponse(Record):
|
||||
parameters: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
@dataclass
|
||||
class FlowResponse:
|
||||
# list_classes
|
||||
class_names = Array(String())
|
||||
class_names: list[str] = field(default_factory=list)
|
||||
|
||||
# list_flows
|
||||
flow_ids = Array(String())
|
||||
flow_ids: list[str] = field(default_factory=list)
|
||||
|
||||
# get_class
|
||||
class_definition = String()
|
||||
class_definition: str = ""
|
||||
|
||||
# get_flow
|
||||
flow = String()
|
||||
flow: str = ""
|
||||
|
||||
# get_flow
|
||||
description = String()
|
||||
description: str = ""
|
||||
|
||||
# get_flow - parameters used when flow was started
|
||||
parameters = Map(String())
|
||||
parameters: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# Everything
|
||||
error = Error()
|
||||
error: Error | None = None
|
||||
|
||||
flow_request_queue = topic(
|
||||
'flow', kind='non-persistent', namespace='request'
|
||||
'flow', qos='q0', namespace='request'
|
||||
)
|
||||
flow_response_queue = topic(
|
||||
'flow', kind='non-persistent', namespace='response'
|
||||
'flow', qos='q0', namespace='response'
|
||||
)
|
||||
|
||||
############################################################################
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Array, Long
|
||||
from dataclasses import dataclass, field
|
||||
from ..core.primitives import Triple, Error
|
||||
from ..core.topic import topic
|
||||
from ..core.metadata import Metadata
|
||||
from ..knowledge.document import Document, TextDocument
|
||||
# Note: Document imports will be updated after knowledge schemas are converted
|
||||
|
||||
# add-document
|
||||
# -> (document_id, document_metadata, content)
|
||||
|
|
@ -50,76 +49,79 @@ from ..knowledge.document import Document, TextDocument
|
|||
# <- (processing_metadata[])
|
||||
# <- (error)
|
||||
|
||||
class DocumentMetadata(Record):
|
||||
id = String()
|
||||
time = Long()
|
||||
kind = String()
|
||||
title = String()
|
||||
comments = String()
|
||||
metadata = Array(Triple())
|
||||
user = String()
|
||||
tags = Array(String())
|
||||
@dataclass
|
||||
class DocumentMetadata:
|
||||
id: str = ""
|
||||
time: int = 0
|
||||
kind: str = ""
|
||||
title: str = ""
|
||||
comments: str = ""
|
||||
metadata: list[Triple] = field(default_factory=list)
|
||||
user: str = ""
|
||||
tags: list[str] = field(default_factory=list)
|
||||
|
||||
class ProcessingMetadata(Record):
|
||||
id = String()
|
||||
document_id = String()
|
||||
time = Long()
|
||||
flow = String()
|
||||
user = String()
|
||||
collection = String()
|
||||
tags = Array(String())
|
||||
@dataclass
|
||||
class ProcessingMetadata:
|
||||
id: str = ""
|
||||
document_id: str = ""
|
||||
time: int = 0
|
||||
flow: str = ""
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
tags: list[str] = field(default_factory=list)
|
||||
|
||||
class Criteria(Record):
|
||||
key = String()
|
||||
value = String()
|
||||
operator = String()
|
||||
|
||||
class LibrarianRequest(Record):
|
||||
@dataclass
|
||||
class Criteria:
|
||||
key: str = ""
|
||||
value: str = ""
|
||||
operator: str = ""
|
||||
|
||||
@dataclass
|
||||
class LibrarianRequest:
|
||||
# add-document, remove-document, update-document, get-document-metadata,
|
||||
# get-document-content, add-processing, remove-processing, list-documents,
|
||||
# list-processing
|
||||
operation = String()
|
||||
operation: str = ""
|
||||
|
||||
# add-document, remove-document, update-document, get-document-metadata,
|
||||
# get-document-content
|
||||
document_id = String()
|
||||
document_id: str = ""
|
||||
|
||||
# add-processing, remove-processing
|
||||
processing_id = String()
|
||||
processing_id: str = ""
|
||||
|
||||
# add-document, update-document
|
||||
document_metadata = DocumentMetadata()
|
||||
document_metadata: DocumentMetadata | None = None
|
||||
|
||||
# add-processing
|
||||
processing_metadata = ProcessingMetadata()
|
||||
processing_metadata: ProcessingMetadata | None = None
|
||||
|
||||
# add-document
|
||||
content = Bytes()
|
||||
content: bytes = b""
|
||||
|
||||
# list-documents, list-processing
|
||||
user = String()
|
||||
user: str = ""
|
||||
|
||||
# list-documents?, list-processing?
|
||||
collection = String()
|
||||
collection: str = ""
|
||||
|
||||
#
|
||||
criteria = Array(Criteria())
|
||||
#
|
||||
criteria: list[Criteria] = field(default_factory=list)
|
||||
|
||||
class LibrarianResponse(Record):
|
||||
error = Error()
|
||||
document_metadata = DocumentMetadata()
|
||||
content = Bytes()
|
||||
document_metadatas = Array(DocumentMetadata())
|
||||
processing_metadatas = Array(ProcessingMetadata())
|
||||
@dataclass
|
||||
class LibrarianResponse:
|
||||
error: Error | None = None
|
||||
document_metadata: DocumentMetadata | None = None
|
||||
content: bytes = b""
|
||||
document_metadatas: list[DocumentMetadata] = field(default_factory=list)
|
||||
processing_metadatas: list[ProcessingMetadata] = field(default_factory=list)
|
||||
|
||||
# FIXME: Is this right? Using persistence on librarian so that
|
||||
# message chunking works
|
||||
|
||||
librarian_request_queue = topic(
|
||||
'librarian', kind='persistent', namespace='request'
|
||||
'librarian', qos='q1', namespace='request'
|
||||
)
|
||||
librarian_response_queue = topic(
|
||||
'librarian', kind='persistent', namespace='response',
|
||||
'librarian', qos='q1', namespace='response',
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
from pulsar.schema import Record, String, Array, Double, Integer, Boolean
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error
|
||||
|
|
@ -8,46 +8,49 @@ from ..core.primitives import Error
|
|||
|
||||
# LLM text completion
|
||||
|
||||
class TextCompletionRequest(Record):
|
||||
system = String()
|
||||
prompt = String()
|
||||
streaming = Boolean() # Default false for backward compatibility
|
||||
@dataclass
|
||||
class TextCompletionRequest:
|
||||
system: str = ""
|
||||
prompt: str = ""
|
||||
streaming: bool = False # Default false for backward compatibility
|
||||
|
||||
class TextCompletionResponse(Record):
|
||||
error = Error()
|
||||
response = String()
|
||||
in_token = Integer()
|
||||
out_token = Integer()
|
||||
model = String()
|
||||
end_of_stream = Boolean() # Indicates final message in stream
|
||||
@dataclass
|
||||
class TextCompletionResponse:
|
||||
error: Error | None = None
|
||||
response: str = ""
|
||||
in_token: int = 0
|
||||
out_token: int = 0
|
||||
model: str = ""
|
||||
end_of_stream: bool = False # Indicates final message in stream
|
||||
|
||||
############################################################################
|
||||
|
||||
# Embeddings
|
||||
|
||||
class EmbeddingsRequest(Record):
|
||||
text = String()
|
||||
@dataclass
|
||||
class EmbeddingsRequest:
|
||||
text: str = ""
|
||||
|
||||
class EmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
vectors = Array(Array(Double()))
|
||||
@dataclass
|
||||
class EmbeddingsResponse:
|
||||
error: Error | None = None
|
||||
vectors: list[list[float]] = field(default_factory=list)
|
||||
|
||||
############################################################################
|
||||
|
||||
# Tool request/response
|
||||
|
||||
class ToolRequest(Record):
|
||||
name = String()
|
||||
|
||||
@dataclass
|
||||
class ToolRequest:
|
||||
name: str = ""
|
||||
# Parameters are JSON encoded
|
||||
parameters = String()
|
||||
|
||||
class ToolResponse(Record):
|
||||
error = Error()
|
||||
parameters: str = ""
|
||||
|
||||
@dataclass
|
||||
class ToolResponse:
|
||||
error: Error | None = None
|
||||
# Plain text aka "unstructured"
|
||||
text = String()
|
||||
|
||||
text: str = ""
|
||||
# JSON-encoded object aka "structured"
|
||||
object = String()
|
||||
object: str = ""
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
|
||||
from pulsar.schema import Record, String
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ..core.primitives import Error, Value, Triple
|
||||
from ..core.topic import topic
|
||||
|
|
@ -9,13 +8,14 @@ from ..core.metadata import Metadata
|
|||
|
||||
# Lookups
|
||||
|
||||
class LookupRequest(Record):
|
||||
kind = String()
|
||||
term = String()
|
||||
@dataclass
|
||||
class LookupRequest:
|
||||
kind: str = ""
|
||||
term: str = ""
|
||||
|
||||
class LookupResponse(Record):
|
||||
text = String()
|
||||
error = Error()
|
||||
@dataclass
|
||||
class LookupResponse:
|
||||
text: str = ""
|
||||
error: Error | None = None
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from pulsar.schema import Record, String, Array, Map, Integer, Double
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.primitives import Error
|
||||
from ..core.topic import topic
|
||||
|
|
@ -7,15 +7,18 @@ from ..core.topic import topic
|
|||
|
||||
# NLP to Structured Query Service - converts natural language to GraphQL
|
||||
|
||||
class QuestionToStructuredQueryRequest(Record):
|
||||
question = String()
|
||||
max_results = Integer()
|
||||
@dataclass
|
||||
class QuestionToStructuredQueryRequest:
|
||||
question: str = ""
|
||||
max_results: int = 0
|
||||
|
||||
class QuestionToStructuredQueryResponse(Record):
|
||||
error = Error()
|
||||
graphql_query = String() # Generated GraphQL query
|
||||
variables = Map(String()) # GraphQL variables if any
|
||||
detected_schemas = Array(String()) # Which schemas the query targets
|
||||
confidence = Double()
|
||||
@dataclass
|
||||
class QuestionToStructuredQueryResponse:
|
||||
error: Error | None = None
|
||||
graphql_query: str = "" # Generated GraphQL query
|
||||
variables: dict[str, str] = field(default_factory=dict) # GraphQL variables if any
|
||||
detected_schemas: list[str] = field(default_factory=list) # Which schemas the query targets
|
||||
confidence: float = 0.0
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from pulsar.schema import Record, String, Map, Array
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.primitives import Error
|
||||
from ..core.topic import topic
|
||||
|
|
@ -7,22 +7,25 @@ from ..core.topic import topic
|
|||
|
||||
# Objects Query Service - executes GraphQL queries against structured data
|
||||
|
||||
class GraphQLError(Record):
|
||||
message = String()
|
||||
path = Array(String()) # Path to the field that caused the error
|
||||
extensions = Map(String()) # Additional error metadata
|
||||
@dataclass
|
||||
class GraphQLError:
|
||||
message: str = ""
|
||||
path: list[str] = field(default_factory=list) # Path to the field that caused the error
|
||||
extensions: dict[str, str] = field(default_factory=dict) # Additional error metadata
|
||||
|
||||
class ObjectsQueryRequest(Record):
|
||||
user = String() # Cassandra keyspace (follows pattern from TriplesQueryRequest)
|
||||
collection = String() # Data collection identifier (required for partition key)
|
||||
query = String() # GraphQL query string
|
||||
variables = Map(String()) # GraphQL variables
|
||||
operation_name = String() # Operation to execute for multi-operation documents
|
||||
@dataclass
|
||||
class ObjectsQueryRequest:
|
||||
user: str = "" # Cassandra keyspace (follows pattern from TriplesQueryRequest)
|
||||
collection: str = "" # Data collection identifier (required for partition key)
|
||||
query: str = "" # GraphQL query string
|
||||
variables: dict[str, str] = field(default_factory=dict) # GraphQL variables
|
||||
operation_name: str = "" # Operation to execute for multi-operation documents
|
||||
|
||||
class ObjectsQueryResponse(Record):
|
||||
error = Error() # System-level error (connection, timeout, etc.)
|
||||
data = String() # JSON-encoded GraphQL response data
|
||||
errors = Array(GraphQLError()) # GraphQL field-level errors
|
||||
extensions = Map(String()) # Query metadata (execution time, etc.)
|
||||
@dataclass
|
||||
class ObjectsQueryResponse:
|
||||
error: Error | None = None # System-level error (connection, timeout, etc.)
|
||||
data: str = "" # JSON-encoded GraphQL response data
|
||||
errors: list[GraphQLError] = field(default_factory=list) # GraphQL field-level errors
|
||||
extensions: dict[str, str] = field(default_factory=dict) # Query metadata (execution time, etc.)
|
||||
|
||||
############################################################################
|
||||
############################################################################
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from pulsar.schema import Record, String, Map, Boolean
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.primitives import Error
|
||||
from ..core.topic import topic
|
||||
|
|
@ -18,27 +18,28 @@ from ..core.topic import topic
|
|||
# extract-rows
|
||||
# schema, chunk -> rows
|
||||
|
||||
class PromptRequest(Record):
|
||||
id = String()
|
||||
@dataclass
|
||||
class PromptRequest:
|
||||
id: str = ""
|
||||
|
||||
# JSON encoded values
|
||||
terms = Map(String())
|
||||
terms: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# Streaming support (default false for backward compatibility)
|
||||
streaming = Boolean()
|
||||
|
||||
class PromptResponse(Record):
|
||||
streaming: bool = False
|
||||
|
||||
@dataclass
|
||||
class PromptResponse:
|
||||
# Error case
|
||||
error = Error()
|
||||
error: Error | None = None
|
||||
|
||||
# Just plain text
|
||||
text = String()
|
||||
text: str = ""
|
||||
|
||||
# JSON encoded
|
||||
object = String()
|
||||
object: str = ""
|
||||
|
||||
# Indicates final message in stream
|
||||
end_of_stream = Boolean()
|
||||
end_of_stream: bool = False
|
||||
|
||||
############################################################################
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
from pulsar.schema import Record, String, Integer, Array, Double
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.primitives import Error, Value, Triple
|
||||
from ..core.topic import topic
|
||||
|
|
@ -7,49 +7,55 @@ from ..core.topic import topic
|
|||
|
||||
# Graph embeddings query
|
||||
|
||||
class GraphEmbeddingsRequest(Record):
|
||||
vectors = Array(Array(Double()))
|
||||
limit = Integer()
|
||||
user = String()
|
||||
collection = String()
|
||||
@dataclass
|
||||
class GraphEmbeddingsRequest:
|
||||
vectors: list[list[float]] = field(default_factory=list)
|
||||
limit: int = 0
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
|
||||
class GraphEmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
entities = Array(Value())
|
||||
@dataclass
|
||||
class GraphEmbeddingsResponse:
|
||||
error: Error | None = None
|
||||
entities: list[Value] = field(default_factory=list)
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph triples query
|
||||
|
||||
class TriplesQueryRequest(Record):
|
||||
user = String()
|
||||
collection = String()
|
||||
s = Value()
|
||||
p = Value()
|
||||
o = Value()
|
||||
limit = Integer()
|
||||
@dataclass
|
||||
class TriplesQueryRequest:
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
s: Value | None = None
|
||||
p: Value | None = None
|
||||
o: Value | None = None
|
||||
limit: int = 0
|
||||
|
||||
class TriplesQueryResponse(Record):
|
||||
error = Error()
|
||||
triples = Array(Triple())
|
||||
@dataclass
|
||||
class TriplesQueryResponse:
|
||||
error: Error | None = None
|
||||
triples: list[Triple] = field(default_factory=list)
|
||||
|
||||
############################################################################
|
||||
|
||||
# Doc embeddings query
|
||||
|
||||
class DocumentEmbeddingsRequest(Record):
|
||||
vectors = Array(Array(Double()))
|
||||
limit = Integer()
|
||||
user = String()
|
||||
collection = String()
|
||||
@dataclass
|
||||
class DocumentEmbeddingsRequest:
|
||||
vectors: list[list[float]] = field(default_factory=list)
|
||||
limit: int = 0
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
|
||||
class DocumentEmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
chunks = Array(String())
|
||||
@dataclass
|
||||
class DocumentEmbeddingsResponse:
|
||||
error: Error | None = None
|
||||
chunks: list[str] = field(default_factory=list)
|
||||
|
||||
document_embeddings_request_queue = topic(
|
||||
"non-persistent://trustgraph/document-embeddings-request"
|
||||
"document-embeddings-request", qos='q0', tenant='trustgraph', namespace='flow'
|
||||
)
|
||||
document_embeddings_response_queue = topic(
|
||||
"non-persistent://trustgraph/document-embeddings-response"
|
||||
"document-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
|
||||
)
|
||||
|
|
@ -1,5 +1,4 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
|
||||
from dataclasses import dataclass
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error, Value
|
||||
|
||||
|
|
@ -7,36 +6,37 @@ from ..core.primitives import Error, Value
|
|||
|
||||
# Graph RAG text retrieval
|
||||
|
||||
class GraphRagQuery(Record):
|
||||
query = String()
|
||||
user = String()
|
||||
collection = String()
|
||||
entity_limit = Integer()
|
||||
triple_limit = Integer()
|
||||
max_subgraph_size = Integer()
|
||||
max_path_length = Integer()
|
||||
streaming = Boolean()
|
||||
@dataclass
|
||||
class GraphRagQuery:
|
||||
query: str = ""
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
entity_limit: int = 0
|
||||
triple_limit: int = 0
|
||||
max_subgraph_size: int = 0
|
||||
max_path_length: int = 0
|
||||
streaming: bool = False
|
||||
|
||||
class GraphRagResponse(Record):
|
||||
error = Error()
|
||||
response = String()
|
||||
chunk = String()
|
||||
end_of_stream = Boolean()
|
||||
@dataclass
|
||||
class GraphRagResponse:
|
||||
error: Error | None = None
|
||||
response: str = ""
|
||||
end_of_stream: bool = False
|
||||
|
||||
############################################################################
|
||||
|
||||
# Document RAG text retrieval
|
||||
|
||||
class DocumentRagQuery(Record):
|
||||
query = String()
|
||||
user = String()
|
||||
collection = String()
|
||||
doc_limit = Integer()
|
||||
streaming = Boolean()
|
||||
|
||||
class DocumentRagResponse(Record):
|
||||
error = Error()
|
||||
response = String()
|
||||
chunk = String()
|
||||
end_of_stream = Boolean()
|
||||
@dataclass
|
||||
class DocumentRagQuery:
|
||||
query: str = ""
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
doc_limit: int = 0
|
||||
streaming: bool = False
|
||||
|
||||
@dataclass
|
||||
class DocumentRagResponse:
|
||||
error: Error | None = None
|
||||
response: str = ""
|
||||
end_of_stream: bool = False
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from pulsar.schema import Record, String
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ..core.primitives import Error
|
||||
from ..core.topic import topic
|
||||
|
|
@ -7,15 +7,17 @@ from ..core.topic import topic
|
|||
|
||||
# Storage management operations
|
||||
|
||||
class StorageManagementRequest(Record):
|
||||
@dataclass
|
||||
class StorageManagementRequest:
|
||||
"""Request for storage management operations sent to store processors"""
|
||||
operation = String() # e.g., "delete-collection"
|
||||
user = String()
|
||||
collection = String()
|
||||
operation: str = "" # e.g., "delete-collection"
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
|
||||
class StorageManagementResponse(Record):
|
||||
@dataclass
|
||||
class StorageManagementResponse:
|
||||
"""Response from storage processors for management operations"""
|
||||
error = Error() # Only populated if there's an error, if null success
|
||||
error: Error | None = None # Only populated if there's an error, if null success
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
@ -23,20 +25,21 @@ class StorageManagementResponse(Record):
|
|||
|
||||
# Topics for sending collection management requests to different storage types
|
||||
vector_storage_management_topic = topic(
|
||||
'vector-storage-management', kind='non-persistent', namespace='request'
|
||||
'vector-storage-management', qos='q0', namespace='request'
|
||||
)
|
||||
|
||||
object_storage_management_topic = topic(
|
||||
'object-storage-management', kind='non-persistent', namespace='request'
|
||||
'object-storage-management', qos='q0', namespace='request'
|
||||
)
|
||||
|
||||
triples_storage_management_topic = topic(
|
||||
'triples-storage-management', kind='non-persistent', namespace='request'
|
||||
'triples-storage-management', qos='q0', namespace='request'
|
||||
)
|
||||
|
||||
# Topic for receiving responses from storage processors
|
||||
storage_management_response_topic = topic(
|
||||
'storage-management', kind='non-persistent', namespace='response'
|
||||
'storage-management', qos='q0', namespace='response'
|
||||
)
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from pulsar.schema import Record, String, Map, Array
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.primitives import Error
|
||||
from ..core.topic import topic
|
||||
|
|
@ -7,14 +7,17 @@ from ..core.topic import topic
|
|||
|
||||
# Structured Query Service - executes GraphQL queries
|
||||
|
||||
class StructuredQueryRequest(Record):
|
||||
question = String()
|
||||
user = String() # Cassandra keyspace identifier
|
||||
collection = String() # Data collection identifier
|
||||
@dataclass
|
||||
class StructuredQueryRequest:
|
||||
question: str = ""
|
||||
user: str = "" # Cassandra keyspace identifier
|
||||
collection: str = "" # Data collection identifier
|
||||
|
||||
class StructuredQueryResponse(Record):
|
||||
error = Error()
|
||||
data = String() # JSON-encoded GraphQL response data
|
||||
errors = Array(String()) # GraphQL errors if any
|
||||
@dataclass
|
||||
class StructuredQueryResponse:
|
||||
error: Error | None = None
|
||||
data: str = "" # JSON-encoded GraphQL response data
|
||||
errors: list[str] = field(default_factory=list) # GraphQL errors if any
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue