diff --git a/docs/tech-specs/pubsub.md b/docs/tech-specs/pubsub.md new file mode 100644 index 00000000..38836838 --- /dev/null +++ b/docs/tech-specs/pubsub.md @@ -0,0 +1,958 @@ +# Pub/Sub Infrastructure + +## Overview + +This document catalogs all connections between the TrustGraph codebase and the pub/sub infrastructure. Currently, the system is hardcoded to use Apache Pulsar. This analysis identifies all integration points to inform future refactoring toward a configurable pub/sub abstraction. + +## Current State: Pulsar Integration Points + +### 1. Direct Pulsar Client Usage + +**Location:** `trustgraph-flow/trustgraph/gateway/service.py` + +The API gateway directly imports and instantiates the Pulsar client: + +- **Line 20:** `import pulsar` +- **Lines 54-61:** Direct instantiation of `pulsar.Client()` with optional `pulsar.AuthenticationToken()` +- **Lines 33-35:** Default Pulsar host configuration from environment variables +- **Lines 178-192:** CLI arguments for `--pulsar-host`, `--pulsar-api-key`, and `--pulsar-listener` +- **Lines 78, 124:** Passes `pulsar_client` to `ConfigReceiver` and `DispatcherManager` + +This is the only location that directly instantiates a Pulsar client outside of the abstraction layer. + +### 2. Base Processor Framework + +**Location:** `trustgraph-base/trustgraph/base/async_processor.py` + +The base class for all processors provides Pulsar connectivity: + +- **Line 9:** `import _pulsar` (for exception handling) +- **Line 18:** `from . pubsub import PulsarClient` +- **Line 38:** Creates `pulsar_client_object = PulsarClient(**params)` +- **Lines 104-108:** Properties exposing `pulsar_host` and `pulsar_client` +- **Line 250:** Static method `add_args()` calls `PulsarClient.add_args(parser)` for CLI arguments +- **Lines 223-225:** Exception handling for `_pulsar.Interrupted` + +All processors inherit from `AsyncProcessor`, making this the central integration point. + +### 3. Consumer Abstraction + +**Location:** `trustgraph-base/trustgraph/base/consumer.py` + +Consumes messages from queues and invokes handler functions: + +**Pulsar imports:** +- **Line 12:** `from pulsar.schema import JsonSchema` +- **Line 13:** `import pulsar` +- **Line 14:** `import _pulsar` + +**Pulsar-specific usage:** +- **Lines 100, 102:** `pulsar.InitialPosition.Earliest` / `pulsar.InitialPosition.Latest` +- **Line 108:** `JsonSchema(self.schema)` wrapper +- **Line 110:** `pulsar.ConsumerType.Shared` +- **Lines 104-111:** `self.client.subscribe()` with Pulsar-specific parameters +- **Lines 143, 150, 65:** `consumer.unsubscribe()` and `consumer.close()` methods +- **Line 162:** `_pulsar.Timeout` exception +- **Lines 182, 205, 232:** `consumer.acknowledge()` / `consumer.negative_acknowledge()` + +**Spec file:** `trustgraph-base/trustgraph/base/consumer_spec.py` +- **Line 22:** References `processor.pulsar_client` + +### 4. Producer Abstraction + +**Location:** `trustgraph-base/trustgraph/base/producer.py` + +Sends messages to queues: + +**Pulsar imports:** +- **Line 2:** `from pulsar.schema import JsonSchema` + +**Pulsar-specific usage:** +- **Line 49:** `JsonSchema(self.schema)` wrapper +- **Lines 47-51:** `self.client.create_producer()` with Pulsar-specific parameters (topic, schema, chunking_enabled) +- **Lines 31, 76:** `producer.close()` method +- **Lines 64-65:** `producer.send()` with message and properties + +**Spec file:** `trustgraph-base/trustgraph/base/producer_spec.py` +- **Line 18:** References `processor.pulsar_client` + +### 5. Publisher Abstraction + +**Location:** `trustgraph-base/trustgraph/base/publisher.py` + +Asynchronous message publishing with queue buffering: + +**Pulsar imports:** +- **Line 2:** `from pulsar.schema import JsonSchema` +- **Line 6:** `import pulsar` + +**Pulsar-specific usage:** +- **Line 52:** `JsonSchema(self.schema)` wrapper +- **Lines 50-54:** `self.client.create_producer()` with Pulsar-specific parameters +- **Lines 101, 103:** `producer.send()` with message and optional properties +- **Lines 106-107:** `producer.flush()` and `producer.close()` methods + +### 6. Subscriber Abstraction + +**Location:** `trustgraph-base/trustgraph/base/subscriber.py` + +Provides multi-recipient message distribution from queues: + +**Pulsar imports:** +- **Line 6:** `from pulsar.schema import JsonSchema` +- **Line 8:** `import _pulsar` + +**Pulsar-specific usage:** +- **Line 55:** `JsonSchema(self.schema)` wrapper +- **Line 57:** `self.client.subscribe(**subscribe_args)` +- **Lines 101, 136, 160, 167-172:** Pulsar exceptions: `_pulsar.Timeout`, `_pulsar.InvalidConfiguration`, `_pulsar.AlreadyClosed` +- **Lines 159, 166, 170:** Consumer methods: `negative_acknowledge()`, `unsubscribe()`, `close()` +- **Lines 247, 251:** Message acknowledgment: `acknowledge()`, `negative_acknowledge()` + +**Spec file:** `trustgraph-base/trustgraph/base/subscriber_spec.py` +- **Line 19:** References `processor.pulsar_client` + +### 7. Schema System (Heart of Darkness) + +**Location:** `trustgraph-base/trustgraph/schema/` + +Every message schema in the system is defined using Pulsar's schema framework. + +**Core primitives:** `schema/core/primitives.py` +- **Line 2:** `from pulsar.schema import Record, String, Boolean, Array, Integer` +- All schemas inherit from Pulsar's `Record` base class +- All field types are Pulsar types: `String()`, `Integer()`, `Boolean()`, `Array()`, `Map()`, `Double()` + +**Example schemas:** +- `schema/services/llm.py` (Line 2): `from pulsar.schema import Record, String, Array, Double, Integer, Boolean` +- `schema/services/config.py` (Line 2): `from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer` + +**Topic naming:** `schema/core/topic.py` +- **Lines 2-3:** Topic format: `{kind}://{tenant}/{namespace}/{topic}` +- This URI structure is Pulsar-specific (e.g., `persistent://tg/flow/config`) + +**Impact:** +- All request/response message definitions throughout the codebase use Pulsar schemas +- This includes services for: config, flow, llm, prompt, query, storage, agent, collection, diagnosis, library, lookup, nlp_query, objects_query, retrieval, structured_query +- Schema definitions are imported and used extensively across all processors and services + +## Summary + +### Pulsar Dependencies by Category + +1. **Client instantiation:** + - Direct: `gateway/service.py` + - Abstracted: `async_processor.py` → `pubsub.py` (PulsarClient) + +2. **Message transport:** + - Consumer: `consumer.py`, `consumer_spec.py` + - Producer: `producer.py`, `producer_spec.py` + - Publisher: `publisher.py` + - Subscriber: `subscriber.py`, `subscriber_spec.py` + +3. **Schema system:** + - Base types: `schema/core/primitives.py` + - All service schemas: `schema/services/*.py` + - Topic naming: `schema/core/topic.py` + +4. **Pulsar-specific concepts required:** + - Topic-based messaging + - Schema system (Record, field types) + - Shared subscriptions + - Message acknowledgment (positive/negative) + - Consumer positioning (earliest/latest) + - Message properties + - Initial positions and consumer types + - Chunking support + - Persistent vs non-persistent topics + +### Refactoring Challenges + +The good news: The abstraction layer (Consumer, Producer, Publisher, Subscriber) provides a clean encapsulation of most Pulsar interactions. + +The challenges: +1. **Schema system pervasiveness:** Every message definition uses `pulsar.schema.Record` and Pulsar field types +2. **Pulsar-specific enums:** `InitialPosition`, `ConsumerType` +3. **Pulsar exceptions:** `_pulsar.Timeout`, `_pulsar.Interrupted`, `_pulsar.InvalidConfiguration`, `_pulsar.AlreadyClosed` +4. **Method signatures:** `acknowledge()`, `negative_acknowledge()`, `subscribe()`, `create_producer()`, etc. +5. **Topic URI format:** Pulsar's `kind://tenant/namespace/topic` structure + +### Next Steps + +To make the pub/sub infrastructure configurable, we need to: + +1. Create an abstraction interface for the client/schema system +2. Abstract Pulsar-specific enums and exceptions +3. Create schema wrappers or alternative schema definitions +4. Implement the interface for both Pulsar and alternative systems (Kafka, RabbitMQ, Redis Streams, etc.) +5. Update `pubsub.py` to be configurable and support multiple backends +6. Provide migration path for existing deployments + +## Approach Draft 1: Adapter Pattern with Schema Translation Layer + +### Key Insight +The **schema system** is the deepest integration point - everything else flows from it. We need to solve this first, or we'll be rewriting the entire codebase. + +### Strategy: Minimal Disruption with Adapters + +**1. Keep Pulsar schemas as the internal representation** +- Don't rewrite all the schema definitions +- Schemas remain `pulsar.schema.Record` internally +- Use adapters to translate at the boundary between our code and the pub/sub backend + +**2. Create a pub/sub abstraction layer:** + +``` +┌─────────────────────────────────────┐ +│ Existing Code (unchanged) │ +│ - Uses Pulsar schemas internally │ +│ - Consumer/Producer/Publisher │ +└──────────────┬──────────────────────┘ + │ +┌──────────────┴──────────────────────┐ +│ PubSubFactory (configurable) │ +│ - Creates backend-specific client │ +└──────────────┬──────────────────────┘ + │ + ┌──────┴──────┐ + │ │ +┌───────▼─────┐ ┌────▼─────────┐ +│ PulsarAdapter│ │ KafkaAdapter │ etc... +│ (passthrough)│ │ (translates) │ +└──────────────┘ └──────────────┘ +``` + +**3. Define abstract interfaces:** +- `PubSubClient` - client connection +- `PubSubProducer` - sending messages +- `PubSubConsumer` - receiving messages +- `SchemaAdapter` - translating Pulsar schemas to/from JSON or backend-specific formats + +**4. Implementation details:** + +For **Pulsar adapter**: Nearly passthrough, minimal translation + +For **other backends** (Kafka, RabbitMQ, etc.): +- Serialize Pulsar Record objects to JSON/bytes +- Map concepts like: + - `InitialPosition.Earliest/Latest` → Kafka's auto.offset.reset + - `acknowledge()` → Kafka's commit + - `negative_acknowledge()` → Re-queue or DLQ pattern + - Topic URIs → Backend-specific topic names + +### Analysis + +**Pros:** +- ✅ Minimal code changes to existing services +- ✅ Schemas stay as-is (no massive rewrite) +- ✅ Gradual migration path +- ✅ Pulsar users see no difference +- ✅ New backends added via adapters + +**Cons:** +- ⚠️ Still carries Pulsar dependency (for schema definitions) +- ⚠️ Some impedance mismatch translating concepts + +### Alternative Consideration + +Create a **TrustGraph schema system** that's pub/sub agnostic (using dataclasses or Pydantic), then generate Pulsar/Kafka/etc schemas from it. This requires rewriting every schema file and potentially breaking changes. + +### Recommendation for Draft 1 + +Start with the **adapter approach** because: +1. It's pragmatic - works with existing code +2. Proves the concept with minimal risk +3. Can evolve to a native schema system later if needed +4. Configuration-driven: one env var switches backends + +## Approach Draft 2: Backend-Agnostic Schema System with Dataclasses + +### Core Concept + +Use Python **dataclasses** as the neutral schema definition format. Each pub/sub backend provides its own serialization/deserialization for dataclasses, eliminating the need for Pulsar schemas to remain in the codebase. + +### Schema Polymorphism at the Factory Level + +Instead of translating Pulsar schemas, **each backend provides its own schema handling** that works with standard Python dataclasses. + +### Publisher Flow + +```python +# 1. Get the configured backend from factory +pubsub = get_pubsub() # Returns PulsarBackend, MQTTBackend, etc. + +# 2. Get schema class from the backend +# (Can be imported directly - backend-agnostic) +from trustgraph.schema.services.llm import TextCompletionRequest + +# 3. Create a producer/publisher for a specific topic +producer = pubsub.create_producer( + topic="text-completion-requests", + schema=TextCompletionRequest # Tells backend what schema to use +) + +# 4. Create message instances (same API regardless of backend) +request = TextCompletionRequest( + system="You are helpful", + prompt="Hello world", + streaming=False +) + +# 5. Send the message +producer.send(request) # Backend serializes appropriately +``` + +### Consumer Flow + +```python +# 1. Get the configured backend +pubsub = get_pubsub() + +# 2. Create a consumer +consumer = pubsub.subscribe( + topic="text-completion-requests", + schema=TextCompletionRequest # Tells backend how to deserialize +) + +# 3. Receive and deserialize +msg = consumer.receive() +request = msg.value() # Returns TextCompletionRequest dataclass instance + +# 4. Use the data (type-safe access) +print(request.system) # "You are helpful" +print(request.prompt) # "Hello world" +print(request.streaming) # False +``` + +### What Happens Behind the Scenes + +**For Pulsar backend:** +- `create_producer()` → creates Pulsar producer with JSON schema or dynamically generated Record +- `send(request)` → serializes dataclass to JSON/Pulsar format, sends to Pulsar +- `receive()` → gets Pulsar message, deserializes back to dataclass + +**For MQTT backend:** +- `create_producer()` → connects to MQTT broker, no schema registration needed +- `send(request)` → converts dataclass to JSON, publishes to MQTT topic +- `receive()` → subscribes to MQTT topic, deserializes JSON to dataclass + +**For Kafka backend:** +- `create_producer()` → creates Kafka producer, registers Avro schema if needed +- `send(request)` → serializes dataclass to Avro format, sends to Kafka +- `receive()` → gets Kafka message, deserializes Avro back to dataclass + +### Key Design Points + +1. **Schema object creation**: The dataclass instance (`TextCompletionRequest(...)`) is identical regardless of backend +2. **Backend handles encoding**: Each backend knows how to serialize its dataclass to the wire format +3. **Schema definition at creation**: When creating producer/consumer, you specify the schema type +4. **Type safety preserved**: You get back a proper `TextCompletionRequest` object, not a dict +5. **No backend leakage**: Application code never imports backend-specific libraries + +### Example Transformation + +**Current (Pulsar-specific):** +```python +# schema/services/llm.py +from pulsar.schema import Record, String, Boolean, Integer + +class TextCompletionRequest(Record): + system = String() + prompt = String() + streaming = Boolean() +``` + +**New (Backend-agnostic):** +```python +# schema/services/llm.py +from dataclasses import dataclass + +@dataclass +class TextCompletionRequest: + system: str + prompt: str + streaming: bool = False +``` + +### Backend Integration + +Each backend handles serialization/deserialization of dataclasses: + +**Pulsar backend:** +- Dynamically generate `pulsar.schema.Record` classes from dataclasses +- Or serialize dataclasses to JSON and use Pulsar's JSON schema +- Maintains compatibility with existing Pulsar deployments + +**MQTT/Redis backend:** +- Direct JSON serialization of dataclass instances +- Use `dataclasses.asdict()` / `from_dict()` +- Lightweight, no schema registry needed + +**Kafka backend:** +- Generate Avro schemas from dataclass definitions +- Use Confluent's schema registry +- Type-safe serialization with schema evolution support + +### Architecture + +``` +┌─────────────────────────────────────┐ +│ Application Code │ +│ - Uses dataclass schemas │ +│ - Backend-agnostic │ +└──────────────┬──────────────────────┘ + │ +┌──────────────┴──────────────────────┐ +│ PubSubFactory (configurable) │ +│ - get_pubsub() returns backend │ +└──────────────┬──────────────────────┘ + │ + ┌──────┴──────┐ + │ │ +┌───────▼─────────┐ ┌────▼──────────────┐ +│ PulsarBackend │ │ MQTTBackend │ +│ - JSON schema │ │ - JSON serialize │ +│ - or dynamic │ │ - Simple queues │ +│ Record gen │ │ │ +└─────────────────┘ └───────────────────┘ +``` + +### Implementation Details + +**1. Schema definitions:** Plain dataclasses with type hints + - `str`, `int`, `bool`, `float` for primitives + - `list[T]` for arrays + - `dict[str, T]` for maps + - Nested dataclasses for complex types + +**2. Each backend provides:** + - Serializer: `dataclass → bytes/wire format` + - Deserializer: `bytes/wire format → dataclass` + - Schema registration (if needed, like Pulsar/Kafka) + +**3. Consumer/Producer abstraction:** + - Already exists (consumer.py, producer.py) + - Update to use backend's serialization + - Remove direct Pulsar imports + +**4. Type mappings:** + - Pulsar `String()` → Python `str` + - Pulsar `Integer()` → Python `int` + - Pulsar `Boolean()` → Python `bool` + - Pulsar `Array(T)` → Python `list[T]` + - Pulsar `Map(K, V)` → Python `dict[K, V]` + - Pulsar `Double()` → Python `float` + - Pulsar `Bytes()` → Python `bytes` + +### Migration Path + +1. **Create dataclass versions** of all schemas in `trustgraph/schema/` +2. **Update backend classes** (Consumer, Producer, Publisher, Subscriber) to use backend-provided serialization +3. **Implement PulsarBackend** with JSON schema or dynamic Record generation +4. **Test with Pulsar** to ensure backward compatibility with existing deployments +5. **Add new backends** (MQTT, Kafka, Redis, etc.) as needed +6. **Remove Pulsar imports** from schema files + +### Benefits + +✅ **No pub/sub dependency** in schema definitions +✅ **Standard Python** - easy to understand, type-check, document +✅ **Modern tooling** - works with mypy, IDE autocomplete, linters +✅ **Backend-optimized** - each backend uses native serialization +✅ **No translation overhead** - direct serialization, no adapters +✅ **Type safety** - real objects with proper types +✅ **Easy validation** - can use Pydantic if needed + +### Challenges & Solutions + +**Challenge:** Pulsar's `Record` has runtime field validation +**Solution:** Use Pydantic dataclasses for validation if needed, or Python 3.10+ dataclass features with `__post_init__` + +**Challenge:** Some Pulsar-specific features (like `Bytes` type) +**Solution:** Map to `bytes` type in dataclass, backend handles encoding appropriately + +**Challenge:** Topic naming (`persistent://tenant/namespace/topic`) +**Solution:** Abstract topic names in schema definitions, backend converts to proper format + +**Challenge:** Schema evolution and versioning +**Solution:** Each backend handles this according to its capabilities (Pulsar schema versions, Kafka schema registry, etc.) + +**Challenge:** Nested complex types +**Solution:** Use nested dataclasses, backends recursively serialize/deserialize + +### Design Decisions + +1. **Plain dataclasses or Pydantic?** + - ✅ **Decision: Use plain Python dataclasses** + - Simpler, no additional dependencies + - Validation not required in practice + - Easier to understand and maintain + +2. **Schema evolution:** + - ✅ **Decision: No versioning mechanism needed** + - Schemas are stable and long-lasting + - Updates typically add new fields (backward compatible) + - Backends handle schema evolution according to their capabilities + +3. **Backward compatibility:** + - ✅ **Decision: Major version change, no backward compatibility required** + - Will be a breaking change with migration instructions + - Clean break allows for better design + - Migration guide will be provided for existing deployments + +4. **Nested types and complex structures:** + - ✅ **Decision: Use nested dataclasses naturally** + - Python dataclasses handle nesting perfectly + - `list[T]` for arrays, `dict[K, V]` for maps + - Backends recursively serialize/deserialize + - Example: + ```python + @dataclass + class Value: + value: str + is_uri: bool + + @dataclass + class Triple: + s: Value # Nested dataclass + p: Value + o: Value + + @dataclass + class GraphQuery: + triples: list[Triple] # Array of nested dataclasses + metadata: dict[str, str] + ``` + +5. **Default values and optional fields:** + - ✅ **Decision: Mix of required, defaults, and optional fields** + - Required fields: No default value + - Fields with defaults: Always present, have sensible default + - Truly optional fields: `T | None = None`, omitted from serialization when `None` + - Example: + ```python + @dataclass + class TextCompletionRequest: + system: str # Required, no default + prompt: str # Required, no default + streaming: bool = False # Optional with default value + metadata: dict | None = None # Truly optional, can be absent + ``` + + **Important serialization semantics:** + + When `metadata = None`: + ```json + { + "system": "...", + "prompt": "...", + "streaming": false + // metadata field NOT PRESENT + } + ``` + + When `metadata = {}` (explicitly empty): + ```json + { + "system": "...", + "prompt": "...", + "streaming": false, + "metadata": {} // Field PRESENT but empty + } + ``` + + **Key distinction:** + - `None` → field absent from JSON (not serialized) + - Empty value (`{}`, `[]`, `""`) → field present with empty value + - This matters semantically: "not provided" vs "explicitly empty" + - Serialization backends must skip `None` fields, not encode as `null` + +## Approach Draft 3: Implementation Details + +### Generic Queue Naming Format + +Replace backend-specific queue names with a generic format that backends can map appropriately. + +**Format:** `{qos}/{tenant}/{namespace}/{queue-name}` + +Where: +- `qos`: Quality of Service level + - `q0` = best-effort (fire and forget, no acknowledgment) + - `q1` = at-least-once (requires acknowledgment) + - `q2` = exactly-once (two-phase acknowledgment) +- `tenant`: Logical grouping for multi-tenancy +- `namespace`: Sub-grouping within tenant +- `queue-name`: Actual queue/topic name + +**Examples:** +``` +q1/tg/flow/text-completion-requests +q2/tg/config/config-push +q0/tg/metrics/stats +``` + +### Backend Topic Mapping + +Each backend maps the generic format to its native format: + +**Pulsar Backend:** +```python +def map_topic(self, generic_topic: str) -> str: + # Parse: q1/tg/flow/text-completion-requests + qos, tenant, namespace, queue = generic_topic.split('/', 3) + + # Map QoS to persistence + persistence = 'persistent' if qos in ['q1', 'q2'] else 'non-persistent' + + # Return Pulsar URI: persistent://tg/flow/text-completion-requests + return f"{persistence}://{tenant}/{namespace}/{queue}" +``` + +**MQTT Backend:** +```python +def map_topic(self, generic_topic: str) -> tuple[str, int]: + # Parse: q1/tg/flow/text-completion-requests + qos, tenant, namespace, queue = generic_topic.split('/', 3) + + # Map QoS level + qos_level = {'q0': 0, 'q1': 1, 'q2': 2}[qos] + + # Build MQTT topic including tenant/namespace for proper namespacing + mqtt_topic = f"{tenant}/{namespace}/{queue}" + + return mqtt_topic, qos_level +``` + +### Updated Topic Helper Function + +```python +# schema/core/topic.py +def topic(queue_name, qos='q1', tenant='tg', namespace='flow'): + """ + Create a generic topic identifier that can be mapped by backends. + + Args: + queue_name: The queue/topic name + qos: Quality of service + - 'q0' = best-effort (no ack) + - 'q1' = at-least-once (ack required) + - 'q2' = exactly-once (two-phase ack) + tenant: Tenant identifier for multi-tenancy + namespace: Namespace within tenant + + Returns: + Generic topic string: qos/tenant/namespace/queue_name + + Examples: + topic('my-queue') # q1/tg/flow/my-queue + topic('config', qos='q2', namespace='config') # q2/tg/config/config + """ + return f"{qos}/{tenant}/{namespace}/{queue_name}" +``` + +### Configuration and Initialization + +**Command-Line Arguments + Environment Variables:** + +```python +# In base/async_processor.py - add_args() method +@staticmethod +def add_args(parser): + # Pub/sub backend selection + parser.add_argument( + '--pubsub-backend', + default=os.getenv('PUBSUB_BACKEND', 'pulsar'), + choices=['pulsar', 'mqtt'], + help='Pub/sub backend (default: pulsar, env: PUBSUB_BACKEND)' + ) + + # Pulsar-specific configuration + parser.add_argument( + '--pulsar-host', + default=os.getenv('PULSAR_HOST', 'pulsar://localhost:6650'), + help='Pulsar host (default: pulsar://localhost:6650, env: PULSAR_HOST)' + ) + + parser.add_argument( + '--pulsar-api-key', + default=os.getenv('PULSAR_API_KEY', None), + help='Pulsar API key (env: PULSAR_API_KEY)' + ) + + parser.add_argument( + '--pulsar-listener', + default=os.getenv('PULSAR_LISTENER', None), + help='Pulsar listener name (env: PULSAR_LISTENER)' + ) + + # MQTT-specific configuration + parser.add_argument( + '--mqtt-host', + default=os.getenv('MQTT_HOST', 'localhost'), + help='MQTT broker host (default: localhost, env: MQTT_HOST)' + ) + + parser.add_argument( + '--mqtt-port', + type=int, + default=int(os.getenv('MQTT_PORT', '1883')), + help='MQTT broker port (default: 1883, env: MQTT_PORT)' + ) + + parser.add_argument( + '--mqtt-username', + default=os.getenv('MQTT_USERNAME', None), + help='MQTT username (env: MQTT_USERNAME)' + ) + + parser.add_argument( + '--mqtt-password', + default=os.getenv('MQTT_PASSWORD', None), + help='MQTT password (env: MQTT_PASSWORD)' + ) +``` + +**Factory Function:** + +```python +# In base/pubsub.py or base/pubsub_factory.py +def get_pubsub(**config) -> PubSubBackend: + """ + Create and return a pub/sub backend based on configuration. + + Args: + config: Configuration dict from command-line args + Must include 'pubsub_backend' key + + Returns: + Backend instance (PulsarBackend, MQTTBackend, etc.) + """ + backend_type = config.get('pubsub_backend', 'pulsar') + + if backend_type == 'pulsar': + return PulsarBackend( + host=config.get('pulsar_host'), + api_key=config.get('pulsar_api_key'), + listener=config.get('pulsar_listener'), + ) + elif backend_type == 'mqtt': + return MQTTBackend( + host=config.get('mqtt_host'), + port=config.get('mqtt_port'), + username=config.get('mqtt_username'), + password=config.get('mqtt_password'), + ) + else: + raise ValueError(f"Unknown pub/sub backend: {backend_type}") +``` + +**Usage in AsyncProcessor:** + +```python +# In async_processor.py +class AsyncProcessor: + def __init__(self, **params): + self.id = params.get("id") + + # Create backend from config (replaces PulsarClient) + self.pubsub = get_pubsub(**params) + + # Rest of initialization... +``` + +### Backend Interface + +```python +class PubSubBackend(Protocol): + """Protocol defining the interface all pub/sub backends must implement.""" + + def create_producer(self, topic: str, schema: type, **options) -> BackendProducer: + """ + Create a producer for a topic. + + Args: + topic: Generic topic format (qos/tenant/namespace/queue) + schema: Dataclass type for messages + options: Backend-specific options (e.g., chunking_enabled) + + Returns: + Backend-specific producer instance + """ + ... + + def create_consumer( + self, + topic: str, + subscription: str, + schema: type, + initial_position: str = 'latest', + consumer_type: str = 'shared', + **options + ) -> BackendConsumer: + """ + Create a consumer for a topic. + + Args: + topic: Generic topic format (qos/tenant/namespace/queue) + subscription: Subscription/consumer group name + schema: Dataclass type for messages + initial_position: 'earliest' or 'latest' (MQTT may ignore) + consumer_type: 'shared', 'exclusive', 'failover' (MQTT may ignore) + options: Backend-specific options + + Returns: + Backend-specific consumer instance + """ + ... + + def close(self) -> None: + """Close the backend connection.""" + ... +``` + +```python +class BackendProducer(Protocol): + """Protocol for backend-specific producer.""" + + def send(self, message: Any, properties: dict = {}) -> None: + """Send a message (dataclass instance) with optional properties.""" + ... + + def flush(self) -> None: + """Flush any buffered messages.""" + ... + + def close(self) -> None: + """Close the producer.""" + ... +``` + +```python +class BackendConsumer(Protocol): + """Protocol for backend-specific consumer.""" + + def receive(self, timeout_millis: int = 2000) -> Message: + """ + Receive a message from the topic. + + Raises: + TimeoutError: If no message received within timeout + """ + ... + + def acknowledge(self, message: Message) -> None: + """Acknowledge successful processing of a message.""" + ... + + def negative_acknowledge(self, message: Message) -> None: + """Negative acknowledge - triggers redelivery.""" + ... + + def unsubscribe(self) -> None: + """Unsubscribe from the topic.""" + ... + + def close(self) -> None: + """Close the consumer.""" + ... +``` + +```python +class Message(Protocol): + """Protocol for a received message.""" + + def value(self) -> Any: + """Get the deserialized message (dataclass instance).""" + ... + + def properties(self) -> dict: + """Get message properties/metadata.""" + ... +``` + +### Existing Classes Refactoring + +The existing `Consumer`, `Producer`, `Publisher`, `Subscriber` classes remain largely intact: + +**Current responsibilities (keep):** +- Async threading model and taskgroups +- Reconnection logic and retry handling +- Metrics collection +- Rate limiting +- Concurrency management + +**Changes needed:** +- Remove direct Pulsar imports (`pulsar.schema`, `pulsar.InitialPosition`, etc.) +- Accept `BackendProducer`/`BackendConsumer` instead of Pulsar client +- Delegate actual pub/sub operations to backend instances +- Map generic concepts to backend calls + +**Example refactoring:** + +```python +# OLD - consumer.py +class Consumer: + def __init__(self, client, topic, subscriber, schema, ...): + self.client = client # Direct Pulsar client + # ... + + async def consumer_run(self): + # Uses pulsar.InitialPosition, pulsar.ConsumerType + self.consumer = self.client.subscribe( + topic=self.topic, + schema=JsonSchema(self.schema), + initial_position=pulsar.InitialPosition.Earliest, + consumer_type=pulsar.ConsumerType.Shared, + ) + +# NEW - consumer.py +class Consumer: + def __init__(self, backend_consumer, schema, ...): + self.backend_consumer = backend_consumer # Backend-specific consumer + self.schema = schema + # ... + + async def consumer_run(self): + # Backend consumer already created with right settings + # Just use it directly + while self.running: + msg = await asyncio.to_thread( + self.backend_consumer.receive, + timeout_millis=2000 + ) + await self.handle_message(msg) +``` + +### Backend-Specific Behaviors + +**Pulsar Backend:** +- Maps `q0` → `non-persistent://`, `q1`/`q2` → `persistent://` +- Supports all consumer types (shared, exclusive, failover) +- Supports initial position (earliest/latest) +- Native message acknowledgment +- Schema registry support + +**MQTT Backend:** +- Maps `q0`/`q1`/`q2` → MQTT QoS levels 0/1/2 +- Includes tenant/namespace in topic path for namespacing +- Auto-generates client IDs from subscription names +- Ignores initial position (no message history in basic MQTT) +- Ignores consumer type (MQTT uses client IDs, not consumer groups) +- Simple publish/subscribe model + +### Design Decisions Summary + +1. ✅ **Generic queue naming**: `qos/tenant/namespace/queue-name` format +2. ✅ **QoS in queue ID**: Determined by queue definition, not configuration +3. ✅ **Reconnection**: Handled by Consumer/Producer classes, not backends +4. ✅ **MQTT topics**: Include tenant/namespace for proper namespacing +5. ✅ **Message history**: MQTT ignores `initial_position` parameter (future enhancement) +6. ✅ **Client IDs**: MQTT backend auto-generates from subscription name + +### Future Enhancements + +**MQTT message history:** +- Could add optional persistence layer (e.g., retained messages, external store) +- Would allow supporting `initial_position='earliest'` +- Not required for initial implementation + diff --git a/trustgraph-base/trustgraph/api/async_flow.py b/trustgraph-base/trustgraph/api/async_flow.py index ba8b9598..5d3cd486 100644 --- a/trustgraph-base/trustgraph/api/async_flow.py +++ b/trustgraph-base/trustgraph/api/async_flow.py @@ -159,12 +159,12 @@ class AsyncFlowInstance: result = await self.request("text-completion", request_data) return result.get("response", "") - async def graph_rag(self, question: str, user: str, collection: str, + async def graph_rag(self, query: str, user: str, collection: str, max_subgraph_size: int = 1000, max_subgraph_count: int = 5, max_entity_distance: int = 3, **kwargs: Any) -> str: """Graph RAG (non-streaming, use async_socket for streaming)""" request_data = { - "question": question, + "query": query, "user": user, "collection": collection, "max-subgraph-size": max_subgraph_size, @@ -177,11 +177,11 @@ class AsyncFlowInstance: result = await self.request("graph-rag", request_data) return result.get("response", "") - async def document_rag(self, question: str, user: str, collection: str, + async def document_rag(self, query: str, user: str, collection: str, doc_limit: int = 10, **kwargs: Any) -> str: """Document RAG (non-streaming, use async_socket for streaming)""" request_data = { - "question": question, + "query": query, "user": user, "collection": collection, "doc-limit": doc_limit, diff --git a/trustgraph-base/trustgraph/api/async_socket_client.py b/trustgraph-base/trustgraph/api/async_socket_client.py index 7c2a5aab..cb6c8605 100644 --- a/trustgraph-base/trustgraph/api/async_socket_client.py +++ b/trustgraph-base/trustgraph/api/async_socket_client.py @@ -208,12 +208,12 @@ class AsyncSocketFlowInstance: if hasattr(chunk, 'content'): yield chunk.content - async def graph_rag(self, question: str, user: str, collection: str, + async def graph_rag(self, query: str, user: str, collection: str, max_subgraph_size: int = 1000, max_subgraph_count: int = 5, max_entity_distance: int = 3, streaming: bool = False, **kwargs): """Graph RAG with optional streaming""" request = { - "question": question, + "query": query, "user": user, "collection": collection, "max-subgraph-size": max_subgraph_size, @@ -235,11 +235,11 @@ class AsyncSocketFlowInstance: if hasattr(chunk, 'content'): yield chunk.content - async def document_rag(self, question: str, user: str, collection: str, + async def document_rag(self, query: str, user: str, collection: str, doc_limit: int = 10, streaming: bool = False, **kwargs): """Document RAG with optional streaming""" request = { - "question": question, + "query": query, "user": user, "collection": collection, "doc-limit": doc_limit, diff --git a/trustgraph-base/trustgraph/api/flow.py b/trustgraph-base/trustgraph/api/flow.py index f7cda5c2..744ad2e7 100644 --- a/trustgraph-base/trustgraph/api/flow.py +++ b/trustgraph-base/trustgraph/api/flow.py @@ -160,14 +160,14 @@ class FlowInstance: )["answer"] def graph_rag( - self, question, user="trustgraph", collection="default", + self, query, user="trustgraph", collection="default", entity_limit=50, triple_limit=30, max_subgraph_size=150, max_path_length=2, ): # The input consists of a question input = { - "query": question, + "query": query, "user": user, "collection": collection, "entity-limit": entity_limit, @@ -182,13 +182,13 @@ class FlowInstance: )["response"] def document_rag( - self, question, user="trustgraph", collection="default", + self, query, user="trustgraph", collection="default", doc_limit=10, ): # The input consists of a question input = { - "query": question, + "query": query, "user": user, "collection": collection, "doc-limit": doc_limit, diff --git a/trustgraph-base/trustgraph/api/socket_client.py b/trustgraph-base/trustgraph/api/socket_client.py index c0fd9cd9..b1be0195 100644 --- a/trustgraph-base/trustgraph/api/socket_client.py +++ b/trustgraph-base/trustgraph/api/socket_client.py @@ -284,7 +284,7 @@ class SocketFlowInstance: def graph_rag( self, - question: str, + query: str, user: str, collection: str, max_subgraph_size: int = 1000, @@ -295,7 +295,7 @@ class SocketFlowInstance: ) -> Union[str, Iterator[str]]: """Graph RAG with optional streaming""" request = { - "question": question, + "query": query, "user": user, "collection": collection, "max-subgraph-size": max_subgraph_size, @@ -316,7 +316,7 @@ class SocketFlowInstance: def document_rag( self, - question: str, + query: str, user: str, collection: str, doc_limit: int = 10, @@ -325,7 +325,7 @@ class SocketFlowInstance: ) -> Union[str, Iterator[str]]: """Document RAG with optional streaming""" request = { - "question": question, + "query": query, "user": user, "collection": collection, "doc-limit": doc_limit, diff --git a/trustgraph-base/trustgraph/base/async_processor.py b/trustgraph-base/trustgraph/base/async_processor.py index 6b663118..8068c67d 100644 --- a/trustgraph-base/trustgraph/base/async_processor.py +++ b/trustgraph-base/trustgraph/base/async_processor.py @@ -15,7 +15,7 @@ from prometheus_client import start_http_server, Info from .. schema import ConfigPush, config_push_queue from .. log_level import LogLevel -from . pubsub import PulsarClient +from . pubsub import PulsarClient, get_pubsub from . producer import Producer from . consumer import Consumer from . metrics import ProcessorMetrics, ConsumerMetrics @@ -34,8 +34,11 @@ class AsyncProcessor: # Store the identity self.id = params.get("id") - # Register a pulsar client - self.pulsar_client_object = PulsarClient(**params) + # Create pub/sub backend via factory + self.pubsub_backend = get_pubsub(**params) + + # Store pulsar_host for backward compatibility + self._pulsar_host = params.get("pulsar_host", "pulsar://pulsar:6650") # Initialise metrics, records the parameters ProcessorMetrics(processor = self.id).info({ @@ -70,7 +73,7 @@ class AsyncProcessor: self.config_sub_task = Consumer( taskgroup = self.taskgroup, - client = self.pulsar_client, + backend = self.pubsub_backend, # Changed from client to backend subscriber = config_subscriber_id, flow = None, @@ -96,16 +99,16 @@ class AsyncProcessor: # This is called to stop all threads. An over-ride point for extra # functionality def stop(self): - self.pulsar_client.close() + self.pubsub_backend.close() self.running = False - # Returns the pulsar host + # Returns the pub/sub backend (new interface) @property - def pulsar_host(self): return self.pulsar_client_object.pulsar_host + def pubsub(self): return self.pubsub_backend - # Returns the pulsar client + # Returns the pulsar host (backward compatibility) @property - def pulsar_client(self): return self.pulsar_client_object.client + def pulsar_host(self): return self._pulsar_host # Register a new event handler for configuration change def register_config_handler(self, handler): @@ -247,6 +250,14 @@ class AsyncProcessor: @staticmethod def add_args(parser): + # Pub/sub backend selection + parser.add_argument( + '--pubsub-backend', + default=os.getenv('PUBSUB_BACKEND', 'pulsar'), + choices=['pulsar', 'mqtt'], + help='Pub/sub backend (default: pulsar, env: PUBSUB_BACKEND)', + ) + PulsarClient.add_args(parser) add_logging_args(parser) diff --git a/trustgraph-base/trustgraph/base/backend.py b/trustgraph-base/trustgraph/base/backend.py new file mode 100644 index 00000000..b9f5f923 --- /dev/null +++ b/trustgraph-base/trustgraph/base/backend.py @@ -0,0 +1,148 @@ +""" +Backend abstraction interfaces for pub/sub systems. + +This module defines Protocol classes that all pub/sub backends must implement, +allowing TrustGraph to work with different messaging systems (Pulsar, MQTT, Kafka, etc.) +""" + +from typing import Protocol, Any, runtime_checkable + + +@runtime_checkable +class Message(Protocol): + """Protocol for a received message.""" + + def value(self) -> Any: + """ + Get the deserialized message content. + + Returns: + Dataclass instance representing the message + """ + ... + + def properties(self) -> dict: + """ + Get message properties/metadata. + + Returns: + Dictionary of message properties + """ + ... + + +@runtime_checkable +class BackendProducer(Protocol): + """Protocol for backend-specific producer.""" + + def send(self, message: Any, properties: dict = {}) -> None: + """ + Send a message (dataclass instance) with optional properties. + + Args: + message: Dataclass instance to send + properties: Optional metadata properties + """ + ... + + def flush(self) -> None: + """Flush any buffered messages.""" + ... + + def close(self) -> None: + """Close the producer.""" + ... + + +@runtime_checkable +class BackendConsumer(Protocol): + """Protocol for backend-specific consumer.""" + + def receive(self, timeout_millis: int = 2000) -> Message: + """ + Receive a message from the topic. + + Args: + timeout_millis: Timeout in milliseconds + + Returns: + Message object + + Raises: + TimeoutError: If no message received within timeout + """ + ... + + def acknowledge(self, message: Message) -> None: + """ + Acknowledge successful processing of a message. + + Args: + message: The message to acknowledge + """ + ... + + def negative_acknowledge(self, message: Message) -> None: + """ + Negative acknowledge - triggers redelivery. + + Args: + message: The message to negatively acknowledge + """ + ... + + def unsubscribe(self) -> None: + """Unsubscribe from the topic.""" + ... + + def close(self) -> None: + """Close the consumer.""" + ... + + +@runtime_checkable +class PubSubBackend(Protocol): + """Protocol defining the interface all pub/sub backends must implement.""" + + def create_producer(self, topic: str, schema: type, **options) -> BackendProducer: + """ + Create a producer for a topic. + + Args: + topic: Generic topic format (qos/tenant/namespace/queue) + schema: Dataclass type for messages + **options: Backend-specific options (e.g., chunking_enabled) + + Returns: + Backend-specific producer instance + """ + ... + + def create_consumer( + self, + topic: str, + subscription: str, + schema: type, + initial_position: str = 'latest', + consumer_type: str = 'shared', + **options + ) -> BackendConsumer: + """ + Create a consumer for a topic. + + Args: + topic: Generic topic format (qos/tenant/namespace/queue) + subscription: Subscription/consumer group name + schema: Dataclass type for messages + initial_position: 'earliest' or 'latest' (some backends may ignore) + consumer_type: 'shared', 'exclusive', 'failover' (some backends may ignore) + **options: Backend-specific options + + Returns: + Backend-specific consumer instance + """ + ... + + def close(self) -> None: + """Close the backend connection.""" + ... diff --git a/trustgraph-base/trustgraph/base/consumer.py b/trustgraph-base/trustgraph/base/consumer.py index 43b4bc51..2a220312 100644 --- a/trustgraph-base/trustgraph/base/consumer.py +++ b/trustgraph-base/trustgraph/base/consumer.py @@ -9,9 +9,6 @@ # one handler, and a single thread of concurrency, nothing too outrageous # will happen if synchronous / blocking code is used -from pulsar.schema import JsonSchema -import pulsar -import _pulsar import asyncio import time import logging @@ -21,11 +18,15 @@ from .. exceptions import TooManyRequests # Module logger logger = logging.getLogger(__name__) +# Timeout exception - can come from different backends +class TimeoutError(Exception): + pass + class Consumer: def __init__( - self, taskgroup, flow, client, topic, subscriber, schema, - handler, + self, taskgroup, flow, backend, topic, subscriber, schema, + handler, metrics = None, start_of_messages=False, rate_limit_retry_time = 10, rate_limit_timeout = 7200, @@ -35,7 +36,7 @@ class Consumer: self.taskgroup = taskgroup self.flow = flow - self.client = client + self.backend = backend # Changed from 'client' to 'backend' self.topic = topic self.subscriber = subscriber self.schema = schema @@ -96,18 +97,20 @@ class Consumer: logger.info(f"Subscribing to topic: {self.topic}") + # Determine initial position if self.start_of_messages: - pos = pulsar.InitialPosition.Earliest + initial_pos = 'earliest' else: - pos = pulsar.InitialPosition.Latest + initial_pos = 'latest' + # Create consumer via backend self.consumer = await asyncio.to_thread( - self.client.subscribe, + self.backend.create_consumer, topic = self.topic, - subscription_name = self.subscriber, - schema = JsonSchema(self.schema), - initial_position = pos, - consumer_type = pulsar.ConsumerType.Shared, + subscription = self.subscriber, + schema = self.schema, + initial_position = initial_pos, + consumer_type = 'shared', ) except Exception as e: @@ -159,9 +162,10 @@ class Consumer: self.consumer.receive, timeout_millis=2000 ) - except _pulsar.Timeout: - continue except Exception as e: + # Handle timeout from any backend + if 'timeout' in str(type(e)).lower() or 'timeout' in str(e).lower(): + continue raise e await self.handle_one_from_queue(msg) diff --git a/trustgraph-base/trustgraph/base/consumer_spec.py b/trustgraph-base/trustgraph/base/consumer_spec.py index 89581b02..0ef4672b 100644 --- a/trustgraph-base/trustgraph/base/consumer_spec.py +++ b/trustgraph-base/trustgraph/base/consumer_spec.py @@ -19,7 +19,7 @@ class ConsumerSpec(Spec): consumer = Consumer( taskgroup = processor.taskgroup, flow = flow, - client = processor.pulsar_client, + backend = processor.pubsub, topic = definition[self.name], subscriber = processor.id + "--" + flow.name + "--" + self.name, schema = self.schema, diff --git a/trustgraph-base/trustgraph/base/producer.py b/trustgraph-base/trustgraph/base/producer.py index 0d65d1de..20b4b0d6 100644 --- a/trustgraph-base/trustgraph/base/producer.py +++ b/trustgraph-base/trustgraph/base/producer.py @@ -1,5 +1,4 @@ -from pulsar.schema import JsonSchema import asyncio import logging @@ -8,10 +7,10 @@ logger = logging.getLogger(__name__) class Producer: - def __init__(self, client, topic, schema, metrics=None, + def __init__(self, backend, topic, schema, metrics=None, chunking_enabled=True): - self.client = client + self.backend = backend # Changed from 'client' to 'backend' self.topic = topic self.schema = schema @@ -44,9 +43,9 @@ class Producer: try: logger.info(f"Connecting publisher to {self.topic}...") - self.producer = self.client.create_producer( + self.producer = self.backend.create_producer( topic = self.topic, - schema = JsonSchema(self.schema), + schema = self.schema, chunking_enabled = self.chunking_enabled, ) logger.info(f"Connected publisher to {self.topic}") diff --git a/trustgraph-base/trustgraph/base/producer_spec.py b/trustgraph-base/trustgraph/base/producer_spec.py index 9c8bbc6a..cf46b958 100644 --- a/trustgraph-base/trustgraph/base/producer_spec.py +++ b/trustgraph-base/trustgraph/base/producer_spec.py @@ -15,7 +15,7 @@ class ProducerSpec(Spec): ) producer = Producer( - client = processor.pulsar_client, + backend = processor.pubsub, topic = definition[self.name], schema = self.schema, metrics = producer_metrics, diff --git a/trustgraph-base/trustgraph/base/prompt_client.py b/trustgraph-base/trustgraph/base/prompt_client.py index 307a118a..55c54cfc 100644 --- a/trustgraph-base/trustgraph/base/prompt_client.py +++ b/trustgraph-base/trustgraph/base/prompt_client.py @@ -37,21 +37,20 @@ class PromptClient(RequestResponse): else: logger.info("DEBUG prompt_client: Streaming path") - # Streaming path - collect all chunks - full_text = "" - full_object = None + # Streaming path - just forward chunks, don't accumulate + last_text = "" + last_object = None - async def collect_chunks(resp): - nonlocal full_text, full_object - logger.info(f"DEBUG prompt_client: collect_chunks called, resp.text={resp.text[:50] if resp.text else None}, end_of_stream={getattr(resp, 'end_of_stream', False)}") + async def forward_chunks(resp): + nonlocal last_text, last_object + logger.info(f"DEBUG prompt_client: forward_chunks called, resp.text={resp.text[:50] if resp.text else None}, end_of_stream={getattr(resp, 'end_of_stream', False)}") if resp.error: logger.error(f"DEBUG prompt_client: Error in response: {resp.error.message}") raise RuntimeError(resp.error.message) if resp.text: - full_text += resp.text - logger.info(f"DEBUG prompt_client: Accumulated {len(full_text)} chars") + last_text = resp.text # Call chunk callback if provided if chunk_callback: logger.info(f"DEBUG prompt_client: Calling chunk_callback") @@ -61,7 +60,7 @@ class PromptClient(RequestResponse): chunk_callback(resp.text) elif resp.object: logger.info(f"DEBUG prompt_client: Got object response") - full_object = resp.object + last_object = resp.object end_stream = getattr(resp, 'end_of_stream', False) logger.info(f"DEBUG prompt_client: Returning end_of_stream={end_stream}") @@ -79,17 +78,17 @@ class PromptClient(RequestResponse): logger.info(f"DEBUG prompt_client: About to call self.request with recipient, timeout={timeout}") await self.request( req, - recipient=collect_chunks, + recipient=forward_chunks, timeout=timeout ) - logger.info(f"DEBUG prompt_client: self.request returned, full_text has {len(full_text)} chars") + logger.info(f"DEBUG prompt_client: self.request returned, last_text={last_text[:50] if last_text else None}") - if full_text: - logger.info("DEBUG prompt_client: Returning full_text") - return full_text + if last_text: + logger.info("DEBUG prompt_client: Returning last_text") + return last_text - logger.info("DEBUG prompt_client: Returning parsed full_object") - return json.loads(full_object) + logger.info("DEBUG prompt_client: Returning parsed last_object") + return json.loads(last_object) if last_object else None async def extract_definitions(self, text, timeout=600): return await self.prompt( diff --git a/trustgraph-base/trustgraph/base/publisher.py b/trustgraph-base/trustgraph/base/publisher.py index 5a481f82..0297d2b5 100644 --- a/trustgraph-base/trustgraph/base/publisher.py +++ b/trustgraph-base/trustgraph/base/publisher.py @@ -1,9 +1,6 @@ -from pulsar.schema import JsonSchema - import asyncio import time -import pulsar import logging # Module logger @@ -11,9 +8,9 @@ logger = logging.getLogger(__name__) class Publisher: - def __init__(self, client, topic, schema=None, max_size=10, + def __init__(self, backend, topic, schema=None, max_size=10, chunking_enabled=True, drain_timeout=5.0): - self.client = client + self.backend = backend # Changed from 'client' to 'backend' self.topic = topic self.schema = schema self.q = asyncio.Queue(maxsize=max_size) @@ -47,9 +44,9 @@ class Publisher: try: - producer = self.client.create_producer( + producer = self.backend.create_producer( topic=self.topic, - schema=JsonSchema(self.schema), + schema=self.schema, chunking_enabled=self.chunking_enabled, ) diff --git a/trustgraph-base/trustgraph/base/pubsub.py b/trustgraph-base/trustgraph/base/pubsub.py index 89f19880..a7772b67 100644 --- a/trustgraph-base/trustgraph/base/pubsub.py +++ b/trustgraph-base/trustgraph/base/pubsub.py @@ -4,8 +4,45 @@ import pulsar import _pulsar import uuid from pulsar.schema import JsonSchema +import logging from .. log_level import LogLevel +from .pulsar_backend import PulsarBackend + +logger = logging.getLogger(__name__) + + +def get_pubsub(**config): + """ + Factory function to create a pub/sub backend based on configuration. + + Args: + config: Configuration dictionary from command-line args + Must include 'pubsub_backend' key + + Returns: + Backend instance (PulsarBackend, MQTTBackend, etc.) + + Example: + backend = get_pubsub( + pubsub_backend='pulsar', + pulsar_host='pulsar://localhost:6650' + ) + """ + backend_type = config.get('pubsub_backend', 'pulsar') + + if backend_type == 'pulsar': + return PulsarBackend( + host=config.get('pulsar_host', PulsarClient.default_pulsar_host), + api_key=config.get('pulsar_api_key', PulsarClient.default_pulsar_api_key), + listener=config.get('pulsar_listener'), + ) + elif backend_type == 'mqtt': + # TODO: Implement MQTT backend + raise NotImplementedError("MQTT backend not yet implemented") + else: + raise ValueError(f"Unknown pub/sub backend: {backend_type}") + class PulsarClient: diff --git a/trustgraph-base/trustgraph/base/pulsar_backend.py b/trustgraph-base/trustgraph/base/pulsar_backend.py new file mode 100644 index 00000000..1c55be9b --- /dev/null +++ b/trustgraph-base/trustgraph/base/pulsar_backend.py @@ -0,0 +1,350 @@ +""" +Pulsar backend implementation for pub/sub abstraction. + +This module provides a Pulsar-specific implementation of the backend interfaces, +handling topic mapping, serialization, and Pulsar client management. +""" + +import pulsar +import _pulsar +import json +import logging +import base64 +import types +from dataclasses import asdict, is_dataclass +from typing import Any + +from .backend import PubSubBackend, BackendProducer, BackendConsumer, Message + +logger = logging.getLogger(__name__) + + +def dataclass_to_dict(obj: Any) -> dict: + """ + Recursively convert a dataclass to a dictionary, handling None values and bytes. + + None values are excluded from the dictionary (not serialized). + Bytes values are decoded as UTF-8 strings for JSON serialization (matching Pulsar behavior). + """ + if obj is None: + return None + + if is_dataclass(obj): + result = {} + for key, value in asdict(obj).items(): + if value is not None: + if isinstance(value, bytes): + # Decode bytes as UTF-8 for JSON serialization (like Pulsar did) + result[key] = value.decode('utf-8') + elif is_dataclass(value): + result[key] = dataclass_to_dict(value) + elif isinstance(value, list): + result[key] = [ + item.decode('utf-8') if isinstance(item, bytes) + else dataclass_to_dict(item) if is_dataclass(item) + else item + for item in value + ] + elif isinstance(value, dict): + result[key] = {k: dataclass_to_dict(v) if is_dataclass(v) else v for k, v in value.items()} + else: + result[key] = value + return result + return obj + + +def dict_to_dataclass(data: dict, cls: type) -> Any: + """ + Convert a dictionary back to a dataclass instance. + + Handles nested dataclasses and missing fields. + """ + if data is None: + return None + + if not is_dataclass(cls): + return data + + # Get field types from the dataclass + field_types = {f.name: f.type for f in cls.__dataclass_fields__.values()} + kwargs = {} + + for key, value in data.items(): + if key in field_types: + field_type = field_types[key] + + # Handle modern union types (X | Y) + if isinstance(field_type, types.UnionType): + # Check if it's Optional (X | None) + if type(None) in field_type.__args__: + # Get the non-None type + actual_type = next((t for t in field_type.__args__ if t is not type(None)), None) + if actual_type and is_dataclass(actual_type) and isinstance(value, dict): + kwargs[key] = dict_to_dataclass(value, actual_type) + else: + kwargs[key] = value + else: + kwargs[key] = value + # Check if this is a generic type (list, dict, etc.) + elif hasattr(field_type, '__origin__'): + # Handle list[T] + if field_type.__origin__ == list: + item_type = field_type.__args__[0] if field_type.__args__ else None + if item_type and is_dataclass(item_type) and isinstance(value, list): + kwargs[key] = [ + dict_to_dataclass(item, item_type) if isinstance(item, dict) else item + for item in value + ] + else: + kwargs[key] = value + # Handle old-style Optional[T] (which is Union[T, None]) + elif hasattr(field_type, '__args__') and type(None) in field_type.__args__: + # Get the non-None type from Union + actual_type = next((t for t in field_type.__args__ if t is not type(None)), None) + if actual_type and is_dataclass(actual_type) and isinstance(value, dict): + kwargs[key] = dict_to_dataclass(value, actual_type) + else: + kwargs[key] = value + else: + kwargs[key] = value + # Handle direct dataclass fields + elif is_dataclass(field_type) and isinstance(value, dict): + kwargs[key] = dict_to_dataclass(value, field_type) + # Handle bytes fields (UTF-8 encoded strings from JSON) + elif field_type == bytes and isinstance(value, str): + kwargs[key] = value.encode('utf-8') + else: + kwargs[key] = value + + return cls(**kwargs) + + +class PulsarMessage: + """Wrapper for Pulsar messages to match Message protocol.""" + + def __init__(self, pulsar_msg, schema_cls): + self._msg = pulsar_msg + self._schema_cls = schema_cls + self._value = None + + def value(self) -> Any: + """Deserialize and return the message value as a dataclass.""" + if self._value is None: + # Get JSON string from Pulsar message + json_data = self._msg.data().decode('utf-8') + data_dict = json.loads(json_data) + # Convert to dataclass + self._value = dict_to_dataclass(data_dict, self._schema_cls) + return self._value + + def properties(self) -> dict: + """Return message properties.""" + return self._msg.properties() + + +class PulsarBackendProducer: + """Pulsar-specific producer implementation.""" + + def __init__(self, pulsar_producer, schema_cls): + self._producer = pulsar_producer + self._schema_cls = schema_cls + + def send(self, message: Any, properties: dict = {}) -> None: + """Send a dataclass message.""" + # Convert dataclass to dict, excluding None values + data_dict = dataclass_to_dict(message) + # Serialize to JSON + json_data = json.dumps(data_dict) + # Send via Pulsar + self._producer.send(json_data.encode('utf-8'), properties=properties) + + def flush(self) -> None: + """Flush buffered messages.""" + self._producer.flush() + + def close(self) -> None: + """Close the producer.""" + self._producer.close() + + +class PulsarBackendConsumer: + """Pulsar-specific consumer implementation.""" + + def __init__(self, pulsar_consumer, schema_cls): + self._consumer = pulsar_consumer + self._schema_cls = schema_cls + + def receive(self, timeout_millis: int = 2000) -> Message: + """Receive a message.""" + pulsar_msg = self._consumer.receive(timeout_millis=timeout_millis) + return PulsarMessage(pulsar_msg, self._schema_cls) + + def acknowledge(self, message: Message) -> None: + """Acknowledge a message.""" + if isinstance(message, PulsarMessage): + self._consumer.acknowledge(message._msg) + + def negative_acknowledge(self, message: Message) -> None: + """Negative acknowledge a message.""" + if isinstance(message, PulsarMessage): + self._consumer.negative_acknowledge(message._msg) + + def unsubscribe(self) -> None: + """Unsubscribe from the topic.""" + self._consumer.unsubscribe() + + def close(self) -> None: + """Close the consumer.""" + self._consumer.close() + + +class PulsarBackend: + """ + Pulsar backend implementation. + + Handles topic mapping, client management, and creation of Pulsar-specific + producers and consumers. + """ + + def __init__(self, host: str, api_key: str = None, listener: str = None): + """ + Initialize Pulsar backend. + + Args: + host: Pulsar broker URL (e.g., pulsar://localhost:6650) + api_key: Optional API key for authentication + listener: Optional listener name for multi-homed setups + """ + self.host = host + self.api_key = api_key + self.listener = listener + + # Create Pulsar client + client_args = {'service_url': host} + + if listener: + client_args['listener_name'] = listener + + if api_key: + client_args['authentication'] = pulsar.AuthenticationToken(api_key) + + self.client = pulsar.Client(**client_args) + logger.info(f"Pulsar client connected to {host}") + + def map_topic(self, generic_topic: str) -> str: + """ + Map generic topic format to Pulsar URI. + + Format: qos/tenant/namespace/queue + Example: q1/tg/flow/my-queue -> persistent://tg/flow/my-queue + + Args: + generic_topic: Generic topic string or already-formatted Pulsar URI + + Returns: + Pulsar topic URI + """ + # If already a Pulsar URI, return as-is + if '://' in generic_topic: + return generic_topic + + parts = generic_topic.split('/', 3) + if len(parts) != 4: + raise ValueError(f"Invalid topic format: {generic_topic}, expected qos/tenant/namespace/queue") + + qos, tenant, namespace, queue = parts + + # Map QoS to persistence + if qos == 'q0': + persistence = 'non-persistent' + elif qos in ['q1', 'q2']: + persistence = 'persistent' + else: + raise ValueError(f"Invalid QoS level: {qos}, expected q0, q1, or q2") + + return f"{persistence}://{tenant}/{namespace}/{queue}" + + def create_producer(self, topic: str, schema: type, **options) -> BackendProducer: + """ + Create a Pulsar producer. + + Args: + topic: Generic topic format (qos/tenant/namespace/queue) + schema: Dataclass type for messages + **options: Backend-specific options (e.g., chunking_enabled) + + Returns: + PulsarBackendProducer instance + """ + pulsar_topic = self.map_topic(topic) + + producer_args = { + 'topic': pulsar_topic, + 'schema': pulsar.schema.BytesSchema(), # We handle serialization ourselves + } + + # Add optional parameters + if 'chunking_enabled' in options: + producer_args['chunking_enabled'] = options['chunking_enabled'] + + pulsar_producer = self.client.create_producer(**producer_args) + logger.debug(f"Created producer for topic: {pulsar_topic}") + + return PulsarBackendProducer(pulsar_producer, schema) + + def create_consumer( + self, + topic: str, + subscription: str, + schema: type, + initial_position: str = 'latest', + consumer_type: str = 'shared', + **options + ) -> BackendConsumer: + """ + Create a Pulsar consumer. + + Args: + topic: Generic topic format (qos/tenant/namespace/queue) + subscription: Subscription name + schema: Dataclass type for messages + initial_position: 'earliest' or 'latest' + consumer_type: 'shared', 'exclusive', or 'failover' + **options: Backend-specific options + + Returns: + PulsarBackendConsumer instance + """ + pulsar_topic = self.map_topic(topic) + + # Map initial position + if initial_position == 'earliest': + pos = pulsar.InitialPosition.Earliest + else: + pos = pulsar.InitialPosition.Latest + + # Map consumer type + if consumer_type == 'exclusive': + ctype = pulsar.ConsumerType.Exclusive + elif consumer_type == 'failover': + ctype = pulsar.ConsumerType.Failover + else: + ctype = pulsar.ConsumerType.Shared + + consumer_args = { + 'topic': pulsar_topic, + 'subscription_name': subscription, + 'schema': pulsar.schema.BytesSchema(), # We handle deserialization ourselves + 'initial_position': pos, + 'consumer_type': ctype, + } + + pulsar_consumer = self.client.subscribe(**consumer_args) + logger.debug(f"Created consumer for topic: {pulsar_topic}, subscription: {subscription}") + + return PulsarBackendConsumer(pulsar_consumer, schema) + + def close(self) -> None: + """Close the Pulsar client.""" + self.client.close() + logger.info("Pulsar client closed") diff --git a/trustgraph-base/trustgraph/base/request_response_spec.py b/trustgraph-base/trustgraph/base/request_response_spec.py index 82574e9d..e4c80c74 100644 --- a/trustgraph-base/trustgraph/base/request_response_spec.py +++ b/trustgraph-base/trustgraph/base/request_response_spec.py @@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) class RequestResponse(Subscriber): def __init__( - self, client, subscription, consumer_name, + self, backend, subscription, consumer_name, request_topic, request_schema, request_metrics, response_topic, response_schema, @@ -22,7 +22,7 @@ class RequestResponse(Subscriber): ): super(RequestResponse, self).__init__( - client = client, + backend = backend, subscription = subscription, consumer_name = consumer_name, topic = response_topic, @@ -31,7 +31,7 @@ class RequestResponse(Subscriber): ) self.producer = Producer( - client = client, + backend = backend, topic = request_topic, schema = request_schema, metrics = request_metrics, @@ -126,7 +126,7 @@ class RequestResponseSpec(Spec): ) rr = self.impl( - client = processor.pulsar_client, + backend = processor.pubsub, # Make subscription names unique, so that all subscribers get # to see all response messages diff --git a/trustgraph-base/trustgraph/base/subscriber.py b/trustgraph-base/trustgraph/base/subscriber.py index 503fac80..d59bcab3 100644 --- a/trustgraph-base/trustgraph/base/subscriber.py +++ b/trustgraph-base/trustgraph/base/subscriber.py @@ -3,9 +3,7 @@ # off of a queue and make it available using an internal broker system, # so suitable for when multiple recipients are reading from the same queue -from pulsar.schema import JsonSchema import asyncio -import _pulsar import time import logging import uuid @@ -13,12 +11,16 @@ import uuid # Module logger logger = logging.getLogger(__name__) +# Timeout exception - can come from different backends +class TimeoutError(Exception): + pass + class Subscriber: - def __init__(self, client, topic, subscription, consumer_name, + def __init__(self, backend, topic, subscription, consumer_name, schema=None, max_size=100, metrics=None, backpressure_strategy="block", drain_timeout=5.0): - self.client = client + self.backend = backend # Changed from 'client' to 'backend' self.topic = topic self.subscription = subscription self.consumer_name = consumer_name @@ -43,18 +45,14 @@ class Subscriber: async def start(self): - # Build subscribe arguments - subscribe_args = { - 'topic': self.topic, - 'subscription_name': self.subscription, - 'consumer_name': self.consumer_name, - } - - # Only add schema if provided (omit if None) - if self.schema is not None: - subscribe_args['schema'] = JsonSchema(self.schema) - - self.consumer = self.client.subscribe(**subscribe_args) + # Create consumer via backend + self.consumer = await asyncio.to_thread( + self.backend.create_consumer, + topic=self.topic, + subscription=self.subscription, + schema=self.schema, + consumer_type='shared', + ) self.task = asyncio.create_task(self.run()) @@ -94,12 +92,13 @@ class Subscriber: drain_end_time = time.time() + self.drain_timeout logger.info(f"Subscriber entering drain mode, timeout={self.drain_timeout}s") - # Stop accepting new messages from Pulsar during drain - if self.consumer: + # Stop accepting new messages during drain + # Note: Not all backends support pausing message listeners + if self.consumer and hasattr(self.consumer, 'pause_message_listener'): try: self.consumer.pause_message_listener() - except _pulsar.InvalidConfiguration: - # Not all consumers have message listeners (e.g., blocking receive mode) + except Exception: + # Not all consumers support message listeners pass # Check drain timeout @@ -133,9 +132,10 @@ class Subscriber: self.consumer.receive, timeout_millis=250 ) - except _pulsar.Timeout: - continue except Exception as e: + # Handle timeout from any backend + if 'timeout' in str(type(e)).lower() or 'timeout' in str(e).lower(): + continue logger.error(f"Exception in subscriber receive: {e}", exc_info=True) raise e @@ -157,19 +157,20 @@ class Subscriber: for msg in self.pending_acks.values(): try: self.consumer.negative_acknowledge(msg) - except _pulsar.AlreadyClosed: - pass # Consumer already closed + except Exception: + pass # Consumer already closed or error self.pending_acks.clear() if self.consumer: - try: - self.consumer.unsubscribe() - except _pulsar.AlreadyClosed: - pass # Already closed + if hasattr(self.consumer, 'unsubscribe'): + try: + self.consumer.unsubscribe() + except Exception: + pass # Already closed or error try: self.consumer.close() - except _pulsar.AlreadyClosed: - pass # Already closed + except Exception: + pass # Already closed or error self.consumer = None diff --git a/trustgraph-base/trustgraph/base/subscriber_spec.py b/trustgraph-base/trustgraph/base/subscriber_spec.py index 7dca09db..b408366c 100644 --- a/trustgraph-base/trustgraph/base/subscriber_spec.py +++ b/trustgraph-base/trustgraph/base/subscriber_spec.py @@ -16,7 +16,7 @@ class SubscriberSpec(Spec): ) subscriber = Subscriber( - client = processor.pulsar_client, + backend = processor.pubsub, topic = definition[self.name], subscription = flow.id, consumer_name = flow.id, diff --git a/trustgraph-base/trustgraph/clients/base.py b/trustgraph-base/trustgraph/clients/base.py index 25eac3b7..3a4da6ec 100644 --- a/trustgraph-base/trustgraph/clients/base.py +++ b/trustgraph-base/trustgraph/clients/base.py @@ -7,6 +7,7 @@ import time from pulsar.schema import JsonSchema from .. exceptions import * +from ..base.pubsub import get_pubsub # Default timeout for a request/response. In seconds. DEFAULT_TIMEOUT=300 @@ -39,30 +40,25 @@ class BaseClient: if subscriber == None: subscriber = str(uuid.uuid4()) - if pulsar_api_key: - auth = pulsar.AuthenticationToken(pulsar_api_key) - self.client = pulsar.Client( - pulsar_host, - logger=pulsar.ConsoleLogger(log_level), - authentication=auth, - listener=listener, - ) - else: - self.client = pulsar.Client( - pulsar_host, - logger=pulsar.ConsoleLogger(log_level), - listener_name=listener, - ) + # Create backend using factory + self.backend = get_pubsub( + pulsar_host=pulsar_host, + pulsar_api_key=pulsar_api_key, + pulsar_listener=listener, + pubsub_backend='pulsar' + ) - self.producer = self.client.create_producer( + self.producer = self.backend.create_producer( topic=input_queue, - schema=JsonSchema(input_schema), + schema=input_schema, chunking_enabled=True, ) - self.consumer = self.client.subscribe( - output_queue, subscriber, - schema=JsonSchema(output_schema), + self.consumer = self.backend.create_consumer( + topic=output_queue, + subscription=subscriber, + schema=output_schema, + consumer_type='shared', ) self.input_schema = input_schema @@ -136,10 +132,11 @@ class BaseClient: if hasattr(self, "consumer"): self.consumer.close() - + if hasattr(self, "producer"): self.producer.flush() self.producer.close() - - self.client.close() + + if hasattr(self, "backend"): + self.backend.close() diff --git a/trustgraph-base/trustgraph/clients/config_client.py b/trustgraph-base/trustgraph/clients/config_client.py index ed8c704a..be2bf5b9 100644 --- a/trustgraph-base/trustgraph/clients/config_client.py +++ b/trustgraph-base/trustgraph/clients/config_client.py @@ -64,7 +64,6 @@ class ConfigClient(BaseClient): def get(self, keys, timeout=300): resp = self.call( - id=id, operation="get", keys=[ ConfigKey( @@ -88,7 +87,6 @@ class ConfigClient(BaseClient): def list(self, type, timeout=300): resp = self.call( - id=id, operation="list", type=type, timeout=timeout @@ -99,7 +97,6 @@ class ConfigClient(BaseClient): def getvalues(self, type, timeout=300): resp = self.call( - id=id, operation="getvalues", type=type, timeout=timeout @@ -117,7 +114,6 @@ class ConfigClient(BaseClient): def delete(self, keys, timeout=300): resp = self.call( - id=id, operation="delete", keys=[ ConfigKey( @@ -134,7 +130,6 @@ class ConfigClient(BaseClient): def put(self, values, timeout=300): resp = self.call( - id=id, operation="put", values=[ ConfigValue( @@ -152,7 +147,6 @@ class ConfigClient(BaseClient): def config(self, timeout=300): resp = self.call( - id=id, operation="config", timeout=timeout ) diff --git a/trustgraph-base/trustgraph/messaging/translators/retrieval.py b/trustgraph-base/trustgraph/messaging/translators/retrieval.py index 441a9d18..d2161cff 100644 --- a/trustgraph-base/trustgraph/messaging/translators/retrieval.py +++ b/trustgraph-base/trustgraph/messaging/translators/retrieval.py @@ -34,14 +34,12 @@ class DocumentRagResponseTranslator(MessageTranslator): def from_pulsar(self, obj: DocumentRagResponse) -> Dict[str, Any]: result = {} - # Check if this is a streaming response (has chunk) - if hasattr(obj, 'chunk') and obj.chunk: - result["chunk"] = obj.chunk - result["end_of_stream"] = getattr(obj, "end_of_stream", False) - else: - # Non-streaming response - if obj.response: - result["response"] = obj.response + # Include response content (chunk or complete) + if obj.response: + result["response"] = obj.response + + # Include end_of_stream flag + result["end_of_stream"] = getattr(obj, "end_of_stream", False) # Always include error if present if hasattr(obj, 'error') and obj.error and obj.error.message: @@ -51,13 +49,7 @@ class DocumentRagResponseTranslator(MessageTranslator): def from_response_with_completion(self, obj: DocumentRagResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - # For streaming responses, check end_of_stream - if hasattr(obj, 'chunk') and obj.chunk: - is_final = getattr(obj, 'end_of_stream', False) - else: - # For non-streaming responses, it's always final - is_final = True - + is_final = getattr(obj, 'end_of_stream', False) return self.from_pulsar(obj), is_final @@ -98,14 +90,12 @@ class GraphRagResponseTranslator(MessageTranslator): def from_pulsar(self, obj: GraphRagResponse) -> Dict[str, Any]: result = {} - # Check if this is a streaming response (has chunk) - if hasattr(obj, 'chunk') and obj.chunk: - result["chunk"] = obj.chunk - result["end_of_stream"] = getattr(obj, "end_of_stream", False) - else: - # Non-streaming response - if obj.response: - result["response"] = obj.response + # Include response content (chunk or complete) + if obj.response: + result["response"] = obj.response + + # Include end_of_stream flag + result["end_of_stream"] = getattr(obj, "end_of_stream", False) # Always include error if present if hasattr(obj, 'error') and obj.error and obj.error.message: @@ -115,11 +105,5 @@ class GraphRagResponseTranslator(MessageTranslator): def from_response_with_completion(self, obj: GraphRagResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - # For streaming responses, check end_of_stream - if hasattr(obj, 'chunk') and obj.chunk: - is_final = getattr(obj, 'end_of_stream', False) - else: - # For non-streaming responses, it's always final - is_final = True - + is_final = getattr(obj, 'end_of_stream', False) return self.from_pulsar(obj), is_final \ No newline at end of file diff --git a/trustgraph-base/trustgraph/schema/core/metadata.py b/trustgraph-base/trustgraph/schema/core/metadata.py index cb2022ac..1888e612 100644 --- a/trustgraph-base/trustgraph/schema/core/metadata.py +++ b/trustgraph-base/trustgraph/schema/core/metadata.py @@ -1,16 +1,14 @@ - -from pulsar.schema import Record, String, Array +from dataclasses import dataclass, field from .primitives import Triple -class Metadata(Record): - +@dataclass +class Metadata: # Source identifier - id = String() + id: str = "" # Subgraph - metadata = Array(Triple()) + metadata: list[Triple] = field(default_factory=list) # Collection management - user = String() - collection = String() - + user: str = "" + collection: str = "" diff --git a/trustgraph-base/trustgraph/schema/core/primitives.py b/trustgraph-base/trustgraph/schema/core/primitives.py index fb85d05c..02517614 100644 --- a/trustgraph-base/trustgraph/schema/core/primitives.py +++ b/trustgraph-base/trustgraph/schema/core/primitives.py @@ -1,34 +1,39 @@ -from pulsar.schema import Record, String, Boolean, Array, Integer +from dataclasses import dataclass, field -class Error(Record): - type = String() - message = String() +@dataclass +class Error: + type: str = "" + message: str = "" -class Value(Record): - value = String() - is_uri = Boolean() - type = String() +@dataclass +class Value: + value: str = "" + is_uri: bool = False + type: str = "" -class Triple(Record): - s = Value() - p = Value() - o = Value() +@dataclass +class Triple: + s: Value | None = None + p: Value | None = None + o: Value | None = None -class Field(Record): - name = String() +@dataclass +class Field: + name: str = "" # int, string, long, bool, float, double, timestamp - type = String() - size = Integer() - primary = Boolean() - description = String() + type: str = "" + size: int = 0 + primary: bool = False + description: str = "" # NEW FIELDS for structured data: - required = Boolean() # Whether field is required - enum_values = Array(String()) # For enum type fields - indexed = Boolean() # Whether field should be indexed + required: bool = False # Whether field is required + enum_values: list[str] = field(default_factory=list) # For enum type fields + indexed: bool = False # Whether field should be indexed -class RowSchema(Record): - name = String() - description = String() - fields = Array(Field()) +@dataclass +class RowSchema: + name: str = "" + description: str = "" + fields: list[Field] = field(default_factory=list) diff --git a/trustgraph-base/trustgraph/schema/core/topic.py b/trustgraph-base/trustgraph/schema/core/topic.py index cdd643b7..09c633e4 100644 --- a/trustgraph-base/trustgraph/schema/core/topic.py +++ b/trustgraph-base/trustgraph/schema/core/topic.py @@ -1,4 +1,23 @@ -def topic(topic, kind='persistent', tenant='tg', namespace='flow'): - return f"{kind}://{tenant}/{namespace}/{topic}" +def topic(queue_name, qos='q1', tenant='tg', namespace='flow'): + """ + Create a generic topic identifier that can be mapped by backends. + + Args: + queue_name: The queue/topic name + qos: Quality of service + - 'q0' = best-effort (no ack) + - 'q1' = at-least-once (ack required) + - 'q2' = exactly-once (two-phase ack) + tenant: Tenant identifier for multi-tenancy + namespace: Namespace within tenant + + Returns: + Generic topic string: qos/tenant/namespace/queue_name + + Examples: + topic('my-queue') # q1/tg/flow/my-queue + topic('config', qos='q2', namespace='config') # q2/tg/config/config + """ + return f"{qos}/{tenant}/{namespace}/{queue_name}" diff --git a/trustgraph-base/trustgraph/schema/knowledge/document.py b/trustgraph-base/trustgraph/schema/knowledge/document.py index f41ee8a6..d8ce97b4 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/document.py +++ b/trustgraph-base/trustgraph/schema/knowledge/document.py @@ -1,4 +1,4 @@ -from pulsar.schema import Record, Bytes +from dataclasses import dataclass from ..core.metadata import Metadata from ..core.topic import topic @@ -6,24 +6,27 @@ from ..core.topic import topic ############################################################################ # PDF docs etc. -class Document(Record): - metadata = Metadata() - data = Bytes() +@dataclass +class Document: + metadata: Metadata | None = None + data: bytes = b"" ############################################################################ # Text documents / text from PDF -class TextDocument(Record): - metadata = Metadata() - text = Bytes() +@dataclass +class TextDocument: + metadata: Metadata | None = None + text: bytes = b"" ############################################################################ # Chunks of text -class Chunk(Record): - metadata = Metadata() - chunk = Bytes() +@dataclass +class Chunk: + metadata: Metadata | None = None + chunk: bytes = b"" -############################################################################ \ No newline at end of file +############################################################################ diff --git a/trustgraph-base/trustgraph/schema/knowledge/embeddings.py b/trustgraph-base/trustgraph/schema/knowledge/embeddings.py index cfdae068..a3e5b394 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/embeddings.py +++ b/trustgraph-base/trustgraph/schema/knowledge/embeddings.py @@ -1,4 +1,4 @@ -from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double, Map +from dataclasses import dataclass, field from ..core.metadata import Metadata from ..core.primitives import Value, RowSchema @@ -8,49 +8,55 @@ from ..core.topic import topic # Graph embeddings are embeddings associated with a graph entity -class EntityEmbeddings(Record): - entity = Value() - vectors = Array(Array(Double())) +@dataclass +class EntityEmbeddings: + entity: Value | None = None + vectors: list[list[float]] = field(default_factory=list) # This is a 'batching' mechanism for the above data -class GraphEmbeddings(Record): - metadata = Metadata() - entities = Array(EntityEmbeddings()) +@dataclass +class GraphEmbeddings: + metadata: Metadata | None = None + entities: list[EntityEmbeddings] = field(default_factory=list) ############################################################################ # Document embeddings are embeddings associated with a chunk -class ChunkEmbeddings(Record): - chunk = Bytes() - vectors = Array(Array(Double())) +@dataclass +class ChunkEmbeddings: + chunk: bytes = b"" + vectors: list[list[float]] = field(default_factory=list) # This is a 'batching' mechanism for the above data -class DocumentEmbeddings(Record): - metadata = Metadata() - chunks = Array(ChunkEmbeddings()) +@dataclass +class DocumentEmbeddings: + metadata: Metadata | None = None + chunks: list[ChunkEmbeddings] = field(default_factory=list) ############################################################################ # Object embeddings are embeddings associated with the primary key of an # object -class ObjectEmbeddings(Record): - metadata = Metadata() - vectors = Array(Array(Double())) - name = String() - key_name = String() - id = String() +@dataclass +class ObjectEmbeddings: + metadata: Metadata | None = None + vectors: list[list[float]] = field(default_factory=list) + name: str = "" + key_name: str = "" + id: str = "" ############################################################################ # Structured object embeddings with enhanced capabilities -class StructuredObjectEmbedding(Record): - metadata = Metadata() - vectors = Array(Array(Double())) - schema_name = String() - object_id = String() # Primary key value - field_embeddings = Map(Array(Double())) # Per-field embeddings +@dataclass +class StructuredObjectEmbedding: + metadata: Metadata | None = None + vectors: list[list[float]] = field(default_factory=list) + schema_name: str = "" + object_id: str = "" # Primary key value + field_embeddings: dict[str, list[float]] = field(default_factory=dict) # Per-field embeddings -############################################################################ \ No newline at end of file +############################################################################ diff --git a/trustgraph-base/trustgraph/schema/knowledge/graph.py b/trustgraph-base/trustgraph/schema/knowledge/graph.py index 1d55c8f0..9040c25e 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/graph.py +++ b/trustgraph-base/trustgraph/schema/knowledge/graph.py @@ -1,4 +1,4 @@ -from pulsar.schema import Record, String, Array +from dataclasses import dataclass, field from ..core.primitives import Value, Triple from ..core.metadata import Metadata @@ -8,21 +8,24 @@ from ..core.topic import topic # Entity context are an entity associated with textual context -class EntityContext(Record): - entity = Value() - context = String() +@dataclass +class EntityContext: + entity: Value | None = None + context: str = "" # This is a 'batching' mechanism for the above data -class EntityContexts(Record): - metadata = Metadata() - entities = Array(EntityContext()) +@dataclass +class EntityContexts: + metadata: Metadata | None = None + entities: list[EntityContext] = field(default_factory=list) ############################################################################ # Graph triples -class Triples(Record): - metadata = Metadata() - triples = Array(Triple()) +@dataclass +class Triples: + metadata: Metadata | None = None + triples: list[Triple] = field(default_factory=list) -############################################################################ \ No newline at end of file +############################################################################ diff --git a/trustgraph-base/trustgraph/schema/knowledge/knowledge.py b/trustgraph-base/trustgraph/schema/knowledge/knowledge.py index 7cd5450e..cffcbac7 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/knowledge.py +++ b/trustgraph-base/trustgraph/schema/knowledge/knowledge.py @@ -1,5 +1,4 @@ - -from pulsar.schema import Record, Bytes, String, Array, Long, Boolean +from dataclasses import dataclass, field from ..core.primitives import Triple, Error from ..core.topic import topic from ..core.metadata import Metadata @@ -22,40 +21,40 @@ from .embeddings import GraphEmbeddings # <- () # <- (error) -class KnowledgeRequest(Record): - +@dataclass +class KnowledgeRequest: # get-kg-core, delete-kg-core, list-kg-cores, put-kg-core # load-kg-core, unload-kg-core - operation = String() + operation: str = "" # list-kg-cores, delete-kg-core, put-kg-core - user = String() + user: str = "" # get-kg-core, list-kg-cores, delete-kg-core, put-kg-core, # load-kg-core, unload-kg-core - id = String() + id: str = "" # load-kg-core - flow = String() + flow: str = "" # load-kg-core - collection = String() + collection: str = "" # put-kg-core - triples = Triples() - graph_embeddings = GraphEmbeddings() + triples: Triples | None = None + graph_embeddings: GraphEmbeddings | None = None -class KnowledgeResponse(Record): - error = Error() - ids = Array(String()) - eos = Boolean() # Indicates end of knowledge core stream - triples = Triples() - graph_embeddings = GraphEmbeddings() +@dataclass +class KnowledgeResponse: + error: Error | None = None + ids: list[str] = field(default_factory=list) + eos: bool = False # Indicates end of knowledge core stream + triples: Triples | None = None + graph_embeddings: GraphEmbeddings | None = None knowledge_request_queue = topic( - 'knowledge', kind='non-persistent', namespace='request' + 'knowledge', qos='q0', namespace='request' ) knowledge_response_queue = topic( - 'knowledge', kind='non-persistent', namespace='response', + 'knowledge', qos='q0', namespace='response', ) - diff --git a/trustgraph-base/trustgraph/schema/knowledge/nlp.py b/trustgraph-base/trustgraph/schema/knowledge/nlp.py index 0ffc3ba1..10b5f215 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/nlp.py +++ b/trustgraph-base/trustgraph/schema/knowledge/nlp.py @@ -1,4 +1,4 @@ -from pulsar.schema import Record, String, Boolean +from dataclasses import dataclass from ..core.topic import topic @@ -6,21 +6,25 @@ from ..core.topic import topic # NLP extraction data types -class Definition(Record): - name = String() - definition = String() +@dataclass +class Definition: + name: str = "" + definition: str = "" -class Topic(Record): - name = String() - definition = String() +@dataclass +class Topic: + name: str = "" + definition: str = "" -class Relationship(Record): - s = String() - p = String() - o = String() - o_entity = Boolean() +@dataclass +class Relationship: + s: str = "" + p: str = "" + o: str = "" + o_entity: bool = False -class Fact(Record): - s = String() - p = String() - o = String() \ No newline at end of file +@dataclass +class Fact: + s: str = "" + p: str = "" + o: str = "" diff --git a/trustgraph-base/trustgraph/schema/knowledge/object.py b/trustgraph-base/trustgraph/schema/knowledge/object.py index 537eb95e..39b0095f 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/object.py +++ b/trustgraph-base/trustgraph/schema/knowledge/object.py @@ -1,4 +1,4 @@ -from pulsar.schema import Record, String, Map, Double, Array +from dataclasses import dataclass, field from ..core.metadata import Metadata from ..core.topic import topic @@ -7,11 +7,13 @@ from ..core.topic import topic # Extracted object from text processing -class ExtractedObject(Record): - metadata = Metadata() - schema_name = String() # Which schema this object belongs to - values = Array(Map(String())) # Array of objects, each object is field name -> value - confidence = Double() - source_span = String() # Text span where object was found +@dataclass +class ExtractedObject: + metadata: Metadata | None = None + schema_name: str = "" # Which schema this object belongs to + values: list[dict[str, str]] = field(default_factory=list) # Array of objects, each object is field name -> value + confidence: float = 0.0 + source_span: str = "" # Text span where object was found + +############################################################################ -############################################################################ \ No newline at end of file diff --git a/trustgraph-base/trustgraph/schema/knowledge/rows.py b/trustgraph-base/trustgraph/schema/knowledge/rows.py index 8b1c79ef..ca2131df 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/rows.py +++ b/trustgraph-base/trustgraph/schema/knowledge/rows.py @@ -1,4 +1,4 @@ -from pulsar.schema import Record, Array, Map, String +from dataclasses import dataclass, field from ..core.metadata import Metadata from ..core.primitives import RowSchema @@ -8,9 +8,10 @@ from ..core.topic import topic # Stores rows of information -class Rows(Record): - metadata = Metadata() - row_schema = RowSchema() - rows = Array(Map(String())) +@dataclass +class Rows: + metadata: Metadata | None = None + row_schema: RowSchema | None = None + rows: list[dict[str, str]] = field(default_factory=list) -############################################################################ \ No newline at end of file +############################################################################ diff --git a/trustgraph-base/trustgraph/schema/knowledge/structured.py b/trustgraph-base/trustgraph/schema/knowledge/structured.py index 3d2b1311..c227d767 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/structured.py +++ b/trustgraph-base/trustgraph/schema/knowledge/structured.py @@ -1,4 +1,4 @@ -from pulsar.schema import Record, String, Bytes, Map +from dataclasses import dataclass, field from ..core.metadata import Metadata from ..core.topic import topic @@ -7,11 +7,13 @@ from ..core.topic import topic # Structured data submission for fire-and-forget processing -class StructuredDataSubmission(Record): - metadata = Metadata() - format = String() # "json", "csv", "xml" - schema_name = String() # Reference to schema in config - data = Bytes() # Raw data to ingest - options = Map(String()) # Format-specific options +@dataclass +class StructuredDataSubmission: + metadata: Metadata | None = None + format: str = "" # "json", "csv", "xml" + schema_name: str = "" # Reference to schema in config + data: bytes = b"" # Raw data to ingest + options: dict[str, str] = field(default_factory=dict) # Format-specific options + +############################################################################ -############################################################################ \ No newline at end of file diff --git a/trustgraph-base/trustgraph/schema/services/agent.py b/trustgraph-base/trustgraph/schema/services/agent.py index 6e8be5eb..9f883ff2 100644 --- a/trustgraph-base/trustgraph/schema/services/agent.py +++ b/trustgraph-base/trustgraph/schema/services/agent.py @@ -1,5 +1,5 @@ -from pulsar.schema import Record, String, Array, Map, Boolean +from dataclasses import dataclass, field from ..core.topic import topic from ..core.primitives import Error @@ -8,33 +8,36 @@ from ..core.primitives import Error # Prompt services, abstract the prompt generation -class AgentStep(Record): - thought = String() - action = String() - arguments = Map(String()) - observation = String() - user = String() # User context for the step +@dataclass +class AgentStep: + thought: str = "" + action: str = "" + arguments: dict[str, str] = field(default_factory=dict) + observation: str = "" + user: str = "" # User context for the step -class AgentRequest(Record): - question = String() - state = String() - group = Array(String()) - history = Array(AgentStep()) - user = String() # User context for multi-tenancy - streaming = Boolean() # NEW: Enable streaming response delivery (default false) +@dataclass +class AgentRequest: + question: str = "" + state: str = "" + group: list[str] | None = None + history: list[AgentStep] = field(default_factory=list) + user: str = "" # User context for multi-tenancy + streaming: bool = False # NEW: Enable streaming response delivery (default false) -class AgentResponse(Record): +@dataclass +class AgentResponse: # Streaming-first design - chunk_type = String() # "thought", "action", "observation", "answer", "error" - content = String() # The actual content (interpretation depends on chunk_type) - end_of_message = Boolean() # Current chunk type (thought/action/etc.) is complete - end_of_dialog = Boolean() # Entire agent dialog is complete + chunk_type: str = "" # "thought", "action", "observation", "answer", "error" + content: str = "" # The actual content (interpretation depends on chunk_type) + end_of_message: bool = False # Current chunk type (thought/action/etc.) is complete + end_of_dialog: bool = False # Entire agent dialog is complete # Legacy fields (deprecated but kept for backward compatibility) - answer = String() - error = Error() - thought = String() - observation = String() + answer: str = "" + error: Error | None = None + thought: str = "" + observation: str = "" ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/collection.py b/trustgraph-base/trustgraph/schema/services/collection.py index 04f644e8..74381abb 100644 --- a/trustgraph-base/trustgraph/schema/services/collection.py +++ b/trustgraph-base/trustgraph/schema/services/collection.py @@ -1,4 +1,4 @@ -from pulsar.schema import Record, String, Integer, Array +from dataclasses import dataclass, field from datetime import datetime from ..core.primitives import Error @@ -10,37 +10,40 @@ from ..core.topic import topic # Collection metadata operations (for librarian service) -class CollectionMetadata(Record): +@dataclass +class CollectionMetadata: """Collection metadata record""" - user = String() - collection = String() - name = String() - description = String() - tags = Array(String()) + user: str = "" + collection: str = "" + name: str = "" + description: str = "" + tags: list[str] = field(default_factory=list) ############################################################################ -class CollectionManagementRequest(Record): +@dataclass +class CollectionManagementRequest: """Request for collection management operations""" - operation = String() # e.g., "delete-collection" + operation: str = "" # e.g., "delete-collection" # For 'list-collections' - user = String() - collection = String() - timestamp = String() # ISO timestamp - name = String() - description = String() - tags = Array(String()) + user: str = "" + collection: str = "" + timestamp: str = "" # ISO timestamp + name: str = "" + description: str = "" + tags: list[str] = field(default_factory=list) # For list - tag_filter = Array(String()) # Optional filter by tags - limit = Integer() + tag_filter: list[str] = field(default_factory=list) # Optional filter by tags + limit: int = 0 -class CollectionManagementResponse(Record): +@dataclass +class CollectionManagementResponse: """Response for collection management operations""" - error = Error() # Only populated if there's an error - timestamp = String() # ISO timestamp - collections = Array(CollectionMetadata()) + error: Error | None = None # Only populated if there's an error + timestamp: str = "" # ISO timestamp + collections: list[CollectionMetadata] = field(default_factory=list) ############################################################################ @@ -48,8 +51,9 @@ class CollectionManagementResponse(Record): # Topics collection_request_queue = topic( - 'collection', kind='non-persistent', namespace='request' + 'collection', qos='q0', namespace='request' ) collection_response_queue = topic( - 'collection', kind='non-persistent', namespace='response' + 'collection', qos='q0', namespace='response' ) + diff --git a/trustgraph-base/trustgraph/schema/services/config.py b/trustgraph-base/trustgraph/schema/services/config.py index a0955eab..38bd1cbf 100644 --- a/trustgraph-base/trustgraph/schema/services/config.py +++ b/trustgraph-base/trustgraph/schema/services/config.py @@ -1,5 +1,5 @@ -from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer +from dataclasses import dataclass, field from ..core.topic import topic from ..core.primitives import Error @@ -13,58 +13,61 @@ from ..core.primitives import Error # put(values) -> () # delete(keys) -> () # config() -> (version, config) -class ConfigKey(Record): - type = String() - key = String() +@dataclass +class ConfigKey: + type: str = "" + key: str = "" -class ConfigValue(Record): - type = String() - key = String() - value = String() +@dataclass +class ConfigValue: + type: str = "" + key: str = "" + value: str = "" # Prompt services, abstract the prompt generation -class ConfigRequest(Record): - - operation = String() # get, list, getvalues, delete, put, config +@dataclass +class ConfigRequest: + operation: str = "" # get, list, getvalues, delete, put, config # get, delete - keys = Array(ConfigKey()) + keys: list[ConfigKey] = field(default_factory=list) # list, getvalues - type = String() + type: str = "" # put - values = Array(ConfigValue()) - -class ConfigResponse(Record): + values: list[ConfigValue] = field(default_factory=list) +@dataclass +class ConfigResponse: # get, list, getvalues, config - version = Integer() + version: int = 0 # get, getvalues - values = Array(ConfigValue()) + values: list[ConfigValue] = field(default_factory=list) # list - directory = Array(String()) + directory: list[str] = field(default_factory=list) # config - config = Map(Map(String())) + config: dict[str, dict[str, str]] = field(default_factory=dict) # Everything - error = Error() + error: Error | None = None -class ConfigPush(Record): - version = Integer() - config = Map(Map(String())) +@dataclass +class ConfigPush: + version: int = 0 + config: dict[str, dict[str, str]] = field(default_factory=dict) config_request_queue = topic( - 'config', kind='non-persistent', namespace='request' + 'config', qos='q0', namespace='request' ) config_response_queue = topic( - 'config', kind='non-persistent', namespace='response' + 'config', qos='q0', namespace='response' ) config_push_queue = topic( - 'config', kind='persistent', namespace='config' + 'config', qos='q2', namespace='config' ) ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/diagnosis.py b/trustgraph-base/trustgraph/schema/services/diagnosis.py index 1bd6d3ed..529e7d9e 100644 --- a/trustgraph-base/trustgraph/schema/services/diagnosis.py +++ b/trustgraph-base/trustgraph/schema/services/diagnosis.py @@ -1,33 +1,36 @@ -from pulsar.schema import Record, String, Map, Double, Array +from dataclasses import dataclass, field from ..core.primitives import Error ############################################################################ # Structured data diagnosis services -class StructuredDataDiagnosisRequest(Record): - operation = String() # "detect-type", "generate-descriptor", "diagnose", or "schema-selection" - sample = String() # Data sample to analyze (text content) - type = String() # Data type (csv, json, xml) - optional, required for generate-descriptor - schema_name = String() # Target schema name for descriptor generation - optional +@dataclass +class StructuredDataDiagnosisRequest: + operation: str = "" # "detect-type", "generate-descriptor", "diagnose", or "schema-selection" + sample: str = "" # Data sample to analyze (text content) + type: str = "" # Data type (csv, json, xml) - optional, required for generate-descriptor + schema_name: str = "" # Target schema name for descriptor generation - optional # JSON encoded options (e.g., delimiter for CSV) - options = Map(String()) + options: dict[str, str] = field(default_factory=dict) -class StructuredDataDiagnosisResponse(Record): - error = Error() +@dataclass +class StructuredDataDiagnosisResponse: + error: Error | None = None - operation = String() # The operation that was performed - detected_type = String() # Detected data type (for detect-type/diagnose) - optional - confidence = Double() # Confidence score for type detection - optional + operation: str = "" # The operation that was performed + detected_type: str = "" # Detected data type (for detect-type/diagnose) - optional + confidence: float = 0.0 # Confidence score for type detection - optional # JSON encoded descriptor (for generate-descriptor/diagnose) - optional - descriptor = String() + descriptor: str = "" # JSON encoded additional metadata (e.g., field count, sample records) - metadata = Map(String()) + metadata: dict[str, str] = field(default_factory=dict) # Array of matching schema IDs (for schema-selection operation) - optional - schema_matches = Array(String()) + schema_matches: list[str] = field(default_factory=list) + +############################################################################ -############################################################################ \ No newline at end of file diff --git a/trustgraph-base/trustgraph/schema/services/flow.py b/trustgraph-base/trustgraph/schema/services/flow.py index d03e559b..b993b1b3 100644 --- a/trustgraph-base/trustgraph/schema/services/flow.py +++ b/trustgraph-base/trustgraph/schema/services/flow.py @@ -1,5 +1,5 @@ -from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer +from dataclasses import dataclass, field from ..core.topic import topic from ..core.primitives import Error @@ -11,61 +11,61 @@ from ..core.primitives import Error # get_class(classname) -> (class) # put_class(class) -> (class) # delete_class(classname) -> () -# +# # list_flows() -> (flowid[]) # get_flow(flowid) -> (flow) # start_flow(flowid, classname) -> () # stop_flow(flowid) -> () # Prompt services, abstract the prompt generation -class FlowRequest(Record): - - operation = String() # list-classes, get-class, put-class, delete-class +@dataclass +class FlowRequest: + operation: str = "" # list-classes, get-class, put-class, delete-class # list-flows, get-flow, start-flow, stop-flow # get_class, put_class, delete_class, start_flow - class_name = String() + class_name: str = "" # put_class - class_definition = String() + class_definition: str = "" # start_flow - description = String() + description: str = "" # get_flow, start_flow, stop_flow - flow_id = String() + flow_id: str = "" # start_flow - optional parameters for flow customization - parameters = Map(String()) - -class FlowResponse(Record): + parameters: dict[str, str] = field(default_factory=dict) +@dataclass +class FlowResponse: # list_classes - class_names = Array(String()) + class_names: list[str] = field(default_factory=list) # list_flows - flow_ids = Array(String()) + flow_ids: list[str] = field(default_factory=list) # get_class - class_definition = String() + class_definition: str = "" # get_flow - flow = String() + flow: str = "" # get_flow - description = String() + description: str = "" # get_flow - parameters used when flow was started - parameters = Map(String()) + parameters: dict[str, str] = field(default_factory=dict) # Everything - error = Error() + error: Error | None = None flow_request_queue = topic( - 'flow', kind='non-persistent', namespace='request' + 'flow', qos='q0', namespace='request' ) flow_response_queue = topic( - 'flow', kind='non-persistent', namespace='response' + 'flow', qos='q0', namespace='response' ) ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/library.py b/trustgraph-base/trustgraph/schema/services/library.py index d9678a90..391d49e1 100644 --- a/trustgraph-base/trustgraph/schema/services/library.py +++ b/trustgraph-base/trustgraph/schema/services/library.py @@ -1,9 +1,8 @@ - -from pulsar.schema import Record, Bytes, String, Array, Long +from dataclasses import dataclass, field from ..core.primitives import Triple, Error from ..core.topic import topic from ..core.metadata import Metadata -from ..knowledge.document import Document, TextDocument +# Note: Document imports will be updated after knowledge schemas are converted # add-document # -> (document_id, document_metadata, content) @@ -50,76 +49,79 @@ from ..knowledge.document import Document, TextDocument # <- (processing_metadata[]) # <- (error) -class DocumentMetadata(Record): - id = String() - time = Long() - kind = String() - title = String() - comments = String() - metadata = Array(Triple()) - user = String() - tags = Array(String()) +@dataclass +class DocumentMetadata: + id: str = "" + time: int = 0 + kind: str = "" + title: str = "" + comments: str = "" + metadata: list[Triple] = field(default_factory=list) + user: str = "" + tags: list[str] = field(default_factory=list) -class ProcessingMetadata(Record): - id = String() - document_id = String() - time = Long() - flow = String() - user = String() - collection = String() - tags = Array(String()) +@dataclass +class ProcessingMetadata: + id: str = "" + document_id: str = "" + time: int = 0 + flow: str = "" + user: str = "" + collection: str = "" + tags: list[str] = field(default_factory=list) -class Criteria(Record): - key = String() - value = String() - operator = String() - -class LibrarianRequest(Record): +@dataclass +class Criteria: + key: str = "" + value: str = "" + operator: str = "" +@dataclass +class LibrarianRequest: # add-document, remove-document, update-document, get-document-metadata, # get-document-content, add-processing, remove-processing, list-documents, # list-processing - operation = String() + operation: str = "" # add-document, remove-document, update-document, get-document-metadata, # get-document-content - document_id = String() + document_id: str = "" # add-processing, remove-processing - processing_id = String() + processing_id: str = "" # add-document, update-document - document_metadata = DocumentMetadata() + document_metadata: DocumentMetadata | None = None # add-processing - processing_metadata = ProcessingMetadata() + processing_metadata: ProcessingMetadata | None = None # add-document - content = Bytes() + content: bytes = b"" # list-documents, list-processing - user = String() + user: str = "" # list-documents?, list-processing? - collection = String() + collection: str = "" - # - criteria = Array(Criteria()) + # + criteria: list[Criteria] = field(default_factory=list) -class LibrarianResponse(Record): - error = Error() - document_metadata = DocumentMetadata() - content = Bytes() - document_metadatas = Array(DocumentMetadata()) - processing_metadatas = Array(ProcessingMetadata()) +@dataclass +class LibrarianResponse: + error: Error | None = None + document_metadata: DocumentMetadata | None = None + content: bytes = b"" + document_metadatas: list[DocumentMetadata] = field(default_factory=list) + processing_metadatas: list[ProcessingMetadata] = field(default_factory=list) # FIXME: Is this right? Using persistence on librarian so that # message chunking works librarian_request_queue = topic( - 'librarian', kind='persistent', namespace='request' + 'librarian', qos='q1', namespace='request' ) librarian_response_queue = topic( - 'librarian', kind='persistent', namespace='response', + 'librarian', qos='q1', namespace='response', ) - diff --git a/trustgraph-base/trustgraph/schema/services/llm.py b/trustgraph-base/trustgraph/schema/services/llm.py index 3fd21937..1261158e 100644 --- a/trustgraph-base/trustgraph/schema/services/llm.py +++ b/trustgraph-base/trustgraph/schema/services/llm.py @@ -1,5 +1,5 @@ -from pulsar.schema import Record, String, Array, Double, Integer, Boolean +from dataclasses import dataclass, field from ..core.topic import topic from ..core.primitives import Error @@ -8,46 +8,49 @@ from ..core.primitives import Error # LLM text completion -class TextCompletionRequest(Record): - system = String() - prompt = String() - streaming = Boolean() # Default false for backward compatibility +@dataclass +class TextCompletionRequest: + system: str = "" + prompt: str = "" + streaming: bool = False # Default false for backward compatibility -class TextCompletionResponse(Record): - error = Error() - response = String() - in_token = Integer() - out_token = Integer() - model = String() - end_of_stream = Boolean() # Indicates final message in stream +@dataclass +class TextCompletionResponse: + error: Error | None = None + response: str = "" + in_token: int = 0 + out_token: int = 0 + model: str = "" + end_of_stream: bool = False # Indicates final message in stream ############################################################################ # Embeddings -class EmbeddingsRequest(Record): - text = String() +@dataclass +class EmbeddingsRequest: + text: str = "" -class EmbeddingsResponse(Record): - error = Error() - vectors = Array(Array(Double())) +@dataclass +class EmbeddingsResponse: + error: Error | None = None + vectors: list[list[float]] = field(default_factory=list) ############################################################################ # Tool request/response -class ToolRequest(Record): - name = String() - +@dataclass +class ToolRequest: + name: str = "" # Parameters are JSON encoded - parameters = String() - -class ToolResponse(Record): - error = Error() + parameters: str = "" +@dataclass +class ToolResponse: + error: Error | None = None # Plain text aka "unstructured" - text = String() - + text: str = "" # JSON-encoded object aka "structured" - object = String() + object: str = "" diff --git a/trustgraph-base/trustgraph/schema/services/lookup.py b/trustgraph-base/trustgraph/schema/services/lookup.py index 7cc0bd03..bdeac636 100644 --- a/trustgraph-base/trustgraph/schema/services/lookup.py +++ b/trustgraph-base/trustgraph/schema/services/lookup.py @@ -1,5 +1,4 @@ - -from pulsar.schema import Record, String +from dataclasses import dataclass from ..core.primitives import Error, Value, Triple from ..core.topic import topic @@ -9,13 +8,14 @@ from ..core.metadata import Metadata # Lookups -class LookupRequest(Record): - kind = String() - term = String() +@dataclass +class LookupRequest: + kind: str = "" + term: str = "" -class LookupResponse(Record): - text = String() - error = Error() +@dataclass +class LookupResponse: + text: str = "" + error: Error | None = None ############################################################################ - diff --git a/trustgraph-base/trustgraph/schema/services/nlp_query.py b/trustgraph-base/trustgraph/schema/services/nlp_query.py index a3e709a1..6cd65f0e 100644 --- a/trustgraph-base/trustgraph/schema/services/nlp_query.py +++ b/trustgraph-base/trustgraph/schema/services/nlp_query.py @@ -1,4 +1,4 @@ -from pulsar.schema import Record, String, Array, Map, Integer, Double +from dataclasses import dataclass, field from ..core.primitives import Error from ..core.topic import topic @@ -7,15 +7,18 @@ from ..core.topic import topic # NLP to Structured Query Service - converts natural language to GraphQL -class QuestionToStructuredQueryRequest(Record): - question = String() - max_results = Integer() +@dataclass +class QuestionToStructuredQueryRequest: + question: str = "" + max_results: int = 0 -class QuestionToStructuredQueryResponse(Record): - error = Error() - graphql_query = String() # Generated GraphQL query - variables = Map(String()) # GraphQL variables if any - detected_schemas = Array(String()) # Which schemas the query targets - confidence = Double() +@dataclass +class QuestionToStructuredQueryResponse: + error: Error | None = None + graphql_query: str = "" # Generated GraphQL query + variables: dict[str, str] = field(default_factory=dict) # GraphQL variables if any + detected_schemas: list[str] = field(default_factory=list) # Which schemas the query targets + confidence: float = 0.0 ############################################################################ + diff --git a/trustgraph-base/trustgraph/schema/services/objects_query.py b/trustgraph-base/trustgraph/schema/services/objects_query.py index 6c3a307c..bbb1cb61 100644 --- a/trustgraph-base/trustgraph/schema/services/objects_query.py +++ b/trustgraph-base/trustgraph/schema/services/objects_query.py @@ -1,4 +1,4 @@ -from pulsar.schema import Record, String, Map, Array +from dataclasses import dataclass, field from ..core.primitives import Error from ..core.topic import topic @@ -7,22 +7,25 @@ from ..core.topic import topic # Objects Query Service - executes GraphQL queries against structured data -class GraphQLError(Record): - message = String() - path = Array(String()) # Path to the field that caused the error - extensions = Map(String()) # Additional error metadata +@dataclass +class GraphQLError: + message: str = "" + path: list[str] = field(default_factory=list) # Path to the field that caused the error + extensions: dict[str, str] = field(default_factory=dict) # Additional error metadata -class ObjectsQueryRequest(Record): - user = String() # Cassandra keyspace (follows pattern from TriplesQueryRequest) - collection = String() # Data collection identifier (required for partition key) - query = String() # GraphQL query string - variables = Map(String()) # GraphQL variables - operation_name = String() # Operation to execute for multi-operation documents +@dataclass +class ObjectsQueryRequest: + user: str = "" # Cassandra keyspace (follows pattern from TriplesQueryRequest) + collection: str = "" # Data collection identifier (required for partition key) + query: str = "" # GraphQL query string + variables: dict[str, str] = field(default_factory=dict) # GraphQL variables + operation_name: str = "" # Operation to execute for multi-operation documents -class ObjectsQueryResponse(Record): - error = Error() # System-level error (connection, timeout, etc.) - data = String() # JSON-encoded GraphQL response data - errors = Array(GraphQLError()) # GraphQL field-level errors - extensions = Map(String()) # Query metadata (execution time, etc.) +@dataclass +class ObjectsQueryResponse: + error: Error | None = None # System-level error (connection, timeout, etc.) + data: str = "" # JSON-encoded GraphQL response data + errors: list[GraphQLError] = field(default_factory=list) # GraphQL field-level errors + extensions: dict[str, str] = field(default_factory=dict) # Query metadata (execution time, etc.) -############################################################################ \ No newline at end of file +############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/prompt.py b/trustgraph-base/trustgraph/schema/services/prompt.py index edb569c9..f7a31c14 100644 --- a/trustgraph-base/trustgraph/schema/services/prompt.py +++ b/trustgraph-base/trustgraph/schema/services/prompt.py @@ -1,4 +1,4 @@ -from pulsar.schema import Record, String, Map, Boolean +from dataclasses import dataclass, field from ..core.primitives import Error from ..core.topic import topic @@ -18,27 +18,28 @@ from ..core.topic import topic # extract-rows # schema, chunk -> rows -class PromptRequest(Record): - id = String() +@dataclass +class PromptRequest: + id: str = "" # JSON encoded values - terms = Map(String()) + terms: dict[str, str] = field(default_factory=dict) # Streaming support (default false for backward compatibility) - streaming = Boolean() - -class PromptResponse(Record): + streaming: bool = False +@dataclass +class PromptResponse: # Error case - error = Error() + error: Error | None = None # Just plain text - text = String() + text: str = "" # JSON encoded - object = String() + object: str = "" # Indicates final message in stream - end_of_stream = Boolean() + end_of_stream: bool = False ############################################################################ \ No newline at end of file diff --git a/trustgraph-base/trustgraph/schema/services/query.py b/trustgraph-base/trustgraph/schema/services/query.py index 91231ade..31d0852d 100644 --- a/trustgraph-base/trustgraph/schema/services/query.py +++ b/trustgraph-base/trustgraph/schema/services/query.py @@ -1,4 +1,4 @@ -from pulsar.schema import Record, String, Integer, Array, Double +from dataclasses import dataclass, field from ..core.primitives import Error, Value, Triple from ..core.topic import topic @@ -7,49 +7,55 @@ from ..core.topic import topic # Graph embeddings query -class GraphEmbeddingsRequest(Record): - vectors = Array(Array(Double())) - limit = Integer() - user = String() - collection = String() +@dataclass +class GraphEmbeddingsRequest: + vectors: list[list[float]] = field(default_factory=list) + limit: int = 0 + user: str = "" + collection: str = "" -class GraphEmbeddingsResponse(Record): - error = Error() - entities = Array(Value()) +@dataclass +class GraphEmbeddingsResponse: + error: Error | None = None + entities: list[Value] = field(default_factory=list) ############################################################################ # Graph triples query -class TriplesQueryRequest(Record): - user = String() - collection = String() - s = Value() - p = Value() - o = Value() - limit = Integer() +@dataclass +class TriplesQueryRequest: + user: str = "" + collection: str = "" + s: Value | None = None + p: Value | None = None + o: Value | None = None + limit: int = 0 -class TriplesQueryResponse(Record): - error = Error() - triples = Array(Triple()) +@dataclass +class TriplesQueryResponse: + error: Error | None = None + triples: list[Triple] = field(default_factory=list) ############################################################################ # Doc embeddings query -class DocumentEmbeddingsRequest(Record): - vectors = Array(Array(Double())) - limit = Integer() - user = String() - collection = String() +@dataclass +class DocumentEmbeddingsRequest: + vectors: list[list[float]] = field(default_factory=list) + limit: int = 0 + user: str = "" + collection: str = "" -class DocumentEmbeddingsResponse(Record): - error = Error() - chunks = Array(String()) +@dataclass +class DocumentEmbeddingsResponse: + error: Error | None = None + chunks: list[str] = field(default_factory=list) document_embeddings_request_queue = topic( - "non-persistent://trustgraph/document-embeddings-request" + "document-embeddings-request", qos='q0', tenant='trustgraph', namespace='flow' ) document_embeddings_response_queue = topic( - "non-persistent://trustgraph/document-embeddings-response" + "document-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow' ) \ No newline at end of file diff --git a/trustgraph-base/trustgraph/schema/services/retrieval.py b/trustgraph-base/trustgraph/schema/services/retrieval.py index 3cd7f792..72085ae8 100644 --- a/trustgraph-base/trustgraph/schema/services/retrieval.py +++ b/trustgraph-base/trustgraph/schema/services/retrieval.py @@ -1,5 +1,4 @@ - -from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double +from dataclasses import dataclass from ..core.topic import topic from ..core.primitives import Error, Value @@ -7,36 +6,37 @@ from ..core.primitives import Error, Value # Graph RAG text retrieval -class GraphRagQuery(Record): - query = String() - user = String() - collection = String() - entity_limit = Integer() - triple_limit = Integer() - max_subgraph_size = Integer() - max_path_length = Integer() - streaming = Boolean() +@dataclass +class GraphRagQuery: + query: str = "" + user: str = "" + collection: str = "" + entity_limit: int = 0 + triple_limit: int = 0 + max_subgraph_size: int = 0 + max_path_length: int = 0 + streaming: bool = False -class GraphRagResponse(Record): - error = Error() - response = String() - chunk = String() - end_of_stream = Boolean() +@dataclass +class GraphRagResponse: + error: Error | None = None + response: str = "" + end_of_stream: bool = False ############################################################################ # Document RAG text retrieval -class DocumentRagQuery(Record): - query = String() - user = String() - collection = String() - doc_limit = Integer() - streaming = Boolean() - -class DocumentRagResponse(Record): - error = Error() - response = String() - chunk = String() - end_of_stream = Boolean() +@dataclass +class DocumentRagQuery: + query: str = "" + user: str = "" + collection: str = "" + doc_limit: int = 0 + streaming: bool = False +@dataclass +class DocumentRagResponse: + error: Error | None = None + response: str = "" + end_of_stream: bool = False diff --git a/trustgraph-base/trustgraph/schema/services/storage.py b/trustgraph-base/trustgraph/schema/services/storage.py index 16791615..e65fb793 100644 --- a/trustgraph-base/trustgraph/schema/services/storage.py +++ b/trustgraph-base/trustgraph/schema/services/storage.py @@ -1,4 +1,4 @@ -from pulsar.schema import Record, String +from dataclasses import dataclass from ..core.primitives import Error from ..core.topic import topic @@ -7,15 +7,17 @@ from ..core.topic import topic # Storage management operations -class StorageManagementRequest(Record): +@dataclass +class StorageManagementRequest: """Request for storage management operations sent to store processors""" - operation = String() # e.g., "delete-collection" - user = String() - collection = String() + operation: str = "" # e.g., "delete-collection" + user: str = "" + collection: str = "" -class StorageManagementResponse(Record): +@dataclass +class StorageManagementResponse: """Response from storage processors for management operations""" - error = Error() # Only populated if there's an error, if null success + error: Error | None = None # Only populated if there's an error, if null success ############################################################################ @@ -23,20 +25,21 @@ class StorageManagementResponse(Record): # Topics for sending collection management requests to different storage types vector_storage_management_topic = topic( - 'vector-storage-management', kind='non-persistent', namespace='request' + 'vector-storage-management', qos='q0', namespace='request' ) object_storage_management_topic = topic( - 'object-storage-management', kind='non-persistent', namespace='request' + 'object-storage-management', qos='q0', namespace='request' ) triples_storage_management_topic = topic( - 'triples-storage-management', kind='non-persistent', namespace='request' + 'triples-storage-management', qos='q0', namespace='request' ) # Topic for receiving responses from storage processors storage_management_response_topic = topic( - 'storage-management', kind='non-persistent', namespace='response' + 'storage-management', qos='q0', namespace='response' ) ############################################################################ + diff --git a/trustgraph-base/trustgraph/schema/services/structured_query.py b/trustgraph-base/trustgraph/schema/services/structured_query.py index df21bfe2..ae1eaa5f 100644 --- a/trustgraph-base/trustgraph/schema/services/structured_query.py +++ b/trustgraph-base/trustgraph/schema/services/structured_query.py @@ -1,4 +1,4 @@ -from pulsar.schema import Record, String, Map, Array +from dataclasses import dataclass, field from ..core.primitives import Error from ..core.topic import topic @@ -7,14 +7,17 @@ from ..core.topic import topic # Structured Query Service - executes GraphQL queries -class StructuredQueryRequest(Record): - question = String() - user = String() # Cassandra keyspace identifier - collection = String() # Data collection identifier +@dataclass +class StructuredQueryRequest: + question: str = "" + user: str = "" # Cassandra keyspace identifier + collection: str = "" # Data collection identifier -class StructuredQueryResponse(Record): - error = Error() - data = String() # JSON-encoded GraphQL response data - errors = Array(String()) # GraphQL errors if any +@dataclass +class StructuredQueryResponse: + error: Error | None = None + data: str = "" # JSON-encoded GraphQL response data + errors: list[str] = field(default_factory=list) # GraphQL errors if any ############################################################################ + diff --git a/trustgraph-cli/trustgraph/cli/dump_queues.py b/trustgraph-cli/trustgraph/cli/dump_queues.py index 93151858..0a298450 100644 --- a/trustgraph-cli/trustgraph/cli/dump_queues.py +++ b/trustgraph-cli/trustgraph/cli/dump_queues.py @@ -17,6 +17,7 @@ from datetime import datetime import argparse from trustgraph.base.subscriber import Subscriber +from trustgraph.base.pubsub import get_pubsub def format_message(queue_name, msg): """Format a message with timestamp and queue name.""" @@ -167,11 +168,11 @@ async def async_main(queues, output_file, pulsar_host, listener_name, subscriber print(f"Mode: {'append' if append_mode else 'overwrite'}") print(f"Press Ctrl+C to stop\n") - # Connect to Pulsar + # Create backend connection try: - client = pulsar.Client(pulsar_host, listener_name=listener_name) + backend = get_pubsub(pulsar_host=pulsar_host, pulsar_listener=listener_name, pubsub_backend='pulsar') except Exception as e: - print(f"Error connecting to Pulsar at {pulsar_host}: {e}", file=sys.stderr) + print(f"Error connecting to backend at {pulsar_host}: {e}", file=sys.stderr) sys.exit(1) # Create Subscribers and central queue @@ -181,7 +182,7 @@ async def async_main(queues, output_file, pulsar_host, listener_name, subscriber for queue_name in queues: try: sub = Subscriber( - client=client, + backend=backend, topic=queue_name, subscription=subscriber_name, consumer_name=f"{subscriber_name}-{queue_name}", @@ -195,7 +196,7 @@ async def async_main(queues, output_file, pulsar_host, listener_name, subscriber if not subscribers: print("\nNo subscribers created. Exiting.", file=sys.stderr) - client.close() + backend.close() sys.exit(1) print(f"\nListening for messages...\n") @@ -256,7 +257,7 @@ async def async_main(queues, output_file, pulsar_host, listener_name, subscriber # Clean shutdown of Subscribers for _, sub in subscribers: await sub.stop() - client.close() + backend.close() print(f"\nMessages logged to: {output_file}") diff --git a/trustgraph-cli/trustgraph/cli/invoke_document_rag.py b/trustgraph-cli/trustgraph/cli/invoke_document_rag.py index d30f05ac..7e88bdc4 100644 --- a/trustgraph-cli/trustgraph/cli/invoke_document_rag.py +++ b/trustgraph-cli/trustgraph/cli/invoke_document_rag.py @@ -24,7 +24,7 @@ def question(url, flow_id, question, user, collection, doc_limit, streaming=True try: response = flow.document_rag( - question=question, + query=question, user=user, collection=collection, doc_limit=doc_limit, @@ -42,7 +42,7 @@ def question(url, flow_id, question, user, collection, doc_limit, streaming=True # Use REST API for non-streaming flow = api.flow().id(flow_id) resp = flow.document_rag( - question=question, + query=question, user=user, collection=collection, doc_limit=doc_limit, diff --git a/trustgraph-cli/trustgraph/cli/invoke_graph_rag.py b/trustgraph-cli/trustgraph/cli/invoke_graph_rag.py index 56c27585..5fa359ab 100644 --- a/trustgraph-cli/trustgraph/cli/invoke_graph_rag.py +++ b/trustgraph-cli/trustgraph/cli/invoke_graph_rag.py @@ -30,7 +30,7 @@ def question( try: response = flow.graph_rag( - question=question, + query=question, user=user, collection=collection, entity_limit=entity_limit, @@ -51,7 +51,7 @@ def question( # Use REST API for non-streaming flow = api.flow().id(flow_id) resp = flow.graph_rag( - question=question, + query=question, user=user, collection=collection, entity_limit=entity_limit, diff --git a/trustgraph-flow/trustgraph/agent/react/service.py b/trustgraph-flow/trustgraph/agent/react/service.py index a4238e36..d4a4d72f 100755 --- a/trustgraph-flow/trustgraph/agent/react/service.py +++ b/trustgraph-flow/trustgraph/agent/react/service.py @@ -433,13 +433,11 @@ class Processor(AgentService): end_of_dialog=True, # Legacy fields for backward compatibility error=error_obj, - response=None, ) else: # Legacy format r = AgentResponse( error=error_obj, - response=None, ) await respond(r) diff --git a/trustgraph-flow/trustgraph/config/service/config.py b/trustgraph-flow/trustgraph/config/service/config.py index 701d7f58..c0a5be1e 100644 --- a/trustgraph-flow/trustgraph/config/service/config.py +++ b/trustgraph-flow/trustgraph/config/service/config.py @@ -95,9 +95,6 @@ class Configuration: return ConfigResponse( version = await self.get_version(), values = values, - directory = None, - config = None, - error = None, ) async def handle_list(self, v): @@ -117,10 +114,7 @@ class Configuration: return ConfigResponse( version = await self.get_version(), - values = None, directory = await self.table_store.get_keys(v.type), - config = None, - error = None, ) async def handle_getvalues(self, v): @@ -150,9 +144,6 @@ class Configuration: return ConfigResponse( version = await self.get_version(), values = list(values), - directory = None, - config = None, - error = None, ) async def handle_delete(self, v): @@ -179,12 +170,6 @@ class Configuration: await self.push() return ConfigResponse( - version = None, - value = None, - directory = None, - values = None, - config = None, - error = None, ) async def handle_put(self, v): @@ -198,11 +183,6 @@ class Configuration: await self.push() return ConfigResponse( - version = None, - value = None, - directory = None, - values = None, - error = None, ) async def get_config(self): @@ -224,11 +204,7 @@ class Configuration: return ConfigResponse( version = await self.get_version(), - value = None, - directory = None, - values = None, config = config, - error = None, ) async def handle(self, msg): @@ -262,9 +238,6 @@ class Configuration: else: resp = ConfigResponse( - value=None, - directory=None, - values=None, error=Error( type = "bad-operation", message = "Bad operation" diff --git a/trustgraph-flow/trustgraph/config/service/flow.py b/trustgraph-flow/trustgraph/config/service/flow.py index b99b7d0a..42696c31 100644 --- a/trustgraph-flow/trustgraph/config/service/flow.py +++ b/trustgraph-flow/trustgraph/config/service/flow.py @@ -361,9 +361,6 @@ class FlowConfig: else: resp = FlowResponse( - value=None, - directory=None, - values=None, error=Error( type = "bad-operation", message = "Bad operation" diff --git a/trustgraph-flow/trustgraph/config/service/service.py b/trustgraph-flow/trustgraph/config/service/service.py index 414ad847..42b256df 100644 --- a/trustgraph-flow/trustgraph/config/service/service.py +++ b/trustgraph-flow/trustgraph/config/service/service.py @@ -112,7 +112,7 @@ class Processor(AsyncProcessor): self.config_request_consumer = Consumer( taskgroup = self.taskgroup, - client = self.pulsar_client, + backend = self.pubsub, flow = None, topic = config_request_queue, subscriber = id, @@ -122,14 +122,14 @@ class Processor(AsyncProcessor): ) self.config_response_producer = Producer( - client = self.pulsar_client, + backend = self.pubsub, topic = config_response_queue, schema = ConfigResponse, metrics = config_response_metrics, ) self.config_push_producer = Producer( - client = self.pulsar_client, + backend = self.pubsub, topic = config_push_queue, schema = ConfigPush, metrics = config_push_metrics, @@ -137,7 +137,7 @@ class Processor(AsyncProcessor): self.flow_request_consumer = Consumer( taskgroup = self.taskgroup, - client = self.pulsar_client, + backend = self.pubsub, flow = None, topic = flow_request_queue, subscriber = id, @@ -147,7 +147,7 @@ class Processor(AsyncProcessor): ) self.flow_response_producer = Producer( - client = self.pulsar_client, + backend = self.pubsub, topic = flow_response_queue, schema = FlowResponse, metrics = flow_response_metrics, @@ -178,11 +178,7 @@ class Processor(AsyncProcessor): resp = ConfigPush( version = version, - value = None, - directory = None, - values = None, config = config, - error = None, ) await self.config_push_producer.send(resp) @@ -215,7 +211,6 @@ class Processor(AsyncProcessor): type = "config-error", message = str(e), ), - text=None, ) await self.config_response_producer.send( @@ -240,13 +235,12 @@ class Processor(AsyncProcessor): ) except Exception as e: - + resp = FlowResponse( error=Error( type = "flow-error", message = str(e), ), - text=None, ) await self.flow_response_producer.send( diff --git a/trustgraph-flow/trustgraph/cores/knowledge.py b/trustgraph-flow/trustgraph/cores/knowledge.py index 449f1c3b..0d5c3d82 100644 --- a/trustgraph-flow/trustgraph/cores/knowledge.py +++ b/trustgraph-flow/trustgraph/cores/knowledge.py @@ -234,11 +234,11 @@ class KnowledgeManager: logger.debug(f"Graph embeddings queue: {ge_q}") t_pub = Publisher( - self.flow_config.pulsar_client, t_q, + self.flow_config.pubsub, t_q, schema=Triples, ) ge_pub = Publisher( - self.flow_config.pulsar_client, ge_q, + self.flow_config.pubsub, ge_q, schema=GraphEmbeddings ) diff --git a/trustgraph-flow/trustgraph/cores/service.py b/trustgraph-flow/trustgraph/cores/service.py index 4b1573a9..18154fc5 100755 --- a/trustgraph-flow/trustgraph/cores/service.py +++ b/trustgraph-flow/trustgraph/cores/service.py @@ -84,7 +84,7 @@ class Processor(AsyncProcessor): self.knowledge_request_consumer = Consumer( taskgroup = self.taskgroup, - client = self.pulsar_client, + backend = self.pubsub, flow = None, topic = knowledge_request_queue, subscriber = id, @@ -94,7 +94,7 @@ class Processor(AsyncProcessor): ) self.knowledge_response_producer = Producer( - client = self.pulsar_client, + backend = self.pubsub, topic = knowledge_response_queue, schema = KnowledgeResponse, metrics = knowledge_response_metrics, diff --git a/trustgraph-flow/trustgraph/gateway/config/receiver.py b/trustgraph-flow/trustgraph/gateway/config/receiver.py index 0427e236..bdd123a9 100755 --- a/trustgraph-flow/trustgraph/gateway/config/receiver.py +++ b/trustgraph-flow/trustgraph/gateway/config/receiver.py @@ -34,9 +34,9 @@ logger.setLevel(logging.INFO) class ConfigReceiver: - def __init__(self, pulsar_client): + def __init__(self, backend): - self.pulsar_client = pulsar_client + self.backend = backend self.flow_handlers = [] @@ -104,8 +104,8 @@ class ConfigReceiver: self.config_cons = Consumer( taskgroup = tg, flow = None, - client = self.pulsar_client, - subscriber = f"gateway-{id}", + backend = self.backend, + subscriber = f"gateway-{id}", topic = config_push_queue, schema = ConfigPush, handler = self.on_config, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/agent.py b/trustgraph-flow/trustgraph/gateway/dispatch/agent.py index 1a5e8299..8867956d 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/agent.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/agent.py @@ -6,12 +6,12 @@ from . requestor import ServiceRequestor class AgentRequestor(ServiceRequestor): def __init__( - self, pulsar_client, request_queue, response_queue, timeout, + self, backend, request_queue, response_queue, timeout, consumer, subscriber, ): super(AgentRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, request_queue=request_queue, response_queue=response_queue, request_schema=AgentRequest, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/collection_management.py b/trustgraph-flow/trustgraph/gateway/dispatch/collection_management.py index 9773ad4c..2fa3759d 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/collection_management.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/collection_management.py @@ -5,7 +5,7 @@ from ... messaging import TranslatorRegistry from . requestor import ServiceRequestor class CollectionManagementRequestor(ServiceRequestor): - def __init__(self, pulsar_client, consumer, subscriber, timeout=120, + def __init__(self, backend, consumer, subscriber, timeout=120, request_queue=None, response_queue=None): if request_queue is None: @@ -14,7 +14,7 @@ class CollectionManagementRequestor(ServiceRequestor): response_queue = collection_response_queue super(CollectionManagementRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, consumer_name = consumer, subscription = subscriber, request_queue=request_queue, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/config.py b/trustgraph-flow/trustgraph/gateway/dispatch/config.py index 10a0aab9..9d40e8cc 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/config.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/config.py @@ -7,7 +7,7 @@ from ... messaging import TranslatorRegistry from . requestor import ServiceRequestor class ConfigRequestor(ServiceRequestor): - def __init__(self, pulsar_client, consumer, subscriber, timeout=120, + def __init__(self, backend, consumer, subscriber, timeout=120, request_queue=None, response_queue=None): if request_queue is None: @@ -16,7 +16,7 @@ class ConfigRequestor(ServiceRequestor): response_queue = config_response_queue super(ConfigRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, consumer_name = consumer, subscription = subscriber, request_queue=request_queue, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/core_export.py b/trustgraph-flow/trustgraph/gateway/dispatch/core_export.py index 61b0bcbc..62626046 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/core_export.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/core_export.py @@ -10,9 +10,9 @@ logger = logging.getLogger(__name__) class CoreExport: - def __init__(self, pulsar_client): - self.pulsar_client = pulsar_client - + def __init__(self, backend): + self.backend = backend + async def process(self, data, error, ok, request): id = request.query["id"] @@ -21,7 +21,7 @@ class CoreExport: response = await ok() kr = KnowledgeRequestor( - pulsar_client = self.pulsar_client, + backend = self.backend, consumer = "api-gateway-core-export-" + str(uuid.uuid4()), subscriber = "api-gateway-core-export-" + str(uuid.uuid4()), ) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/core_import.py b/trustgraph-flow/trustgraph/gateway/dispatch/core_import.py index b32fb7f7..af22a5b0 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/core_import.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/core_import.py @@ -11,8 +11,8 @@ logger = logging.getLogger(__name__) class CoreImport: - def __init__(self, pulsar_client): - self.pulsar_client = pulsar_client + def __init__(self, backend): + self.backend = backend async def process(self, data, error, ok, request): @@ -20,7 +20,7 @@ class CoreImport: user = request.query["user"] kr = KnowledgeRequestor( - pulsar_client = self.pulsar_client, + backend = self.backend, consumer = "api-gateway-core-import-" + str(uuid.uuid4()), subscriber = "api-gateway-core-import-" + str(uuid.uuid4()), ) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_export.py b/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_export.py index f7d53005..8866972d 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_export.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_export.py @@ -15,12 +15,12 @@ logger = logging.getLogger(__name__) class DocumentEmbeddingsExport: def __init__( - self, ws, running, pulsar_client, queue, consumer, subscriber + self, ws, running, backend, queue, consumer, subscriber ): self.ws = ws self.running = running - self.pulsar_client = pulsar_client + self.backend = backend self.queue = queue self.consumer = consumer self.subscriber = subscriber @@ -48,9 +48,9 @@ class DocumentEmbeddingsExport: async def run(self): """Enhanced run with better error handling""" self.subs = Subscriber( - client = self.pulsar_client, + backend = self.backend, topic = self.queue, - consumer_name = self.consumer, + consumer_name = self.consumer, subscription = self.subscriber, schema = DocumentEmbeddings, backpressure_strategy = "block" # Configurable diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_import.py b/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_import.py index 7ec2f595..bd5f9666 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_import.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_import.py @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) class DocumentEmbeddingsImport: def __init__( - self, ws, running, pulsar_client, queue + self, ws, running, backend, queue ): self.ws = ws @@ -23,7 +23,7 @@ class DocumentEmbeddingsImport: self.translator = DocumentEmbeddingsTranslator() self.publisher = Publisher( - pulsar_client, topic = queue, schema = DocumentEmbeddings + backend, topic = queue, schema = DocumentEmbeddings ) async def start(self): diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/document_load.py b/trustgraph-flow/trustgraph/gateway/dispatch/document_load.py index 7e38877c..eb68b0b1 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/document_load.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/document_load.py @@ -11,10 +11,10 @@ from . sender import ServiceSender logger = logging.getLogger(__name__) class DocumentLoad(ServiceSender): - def __init__(self, pulsar_client, queue): + def __init__(self, backend, queue): super(DocumentLoad, self).__init__( - pulsar_client = pulsar_client, + backend = backend, queue = queue, schema = Document, ) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/document_rag.py b/trustgraph-flow/trustgraph/gateway/dispatch/document_rag.py index a7f3634e..83b3cb9a 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/document_rag.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/document_rag.py @@ -6,12 +6,12 @@ from . requestor import ServiceRequestor class DocumentRagRequestor(ServiceRequestor): def __init__( - self, pulsar_client, request_queue, response_queue, timeout, + self, backend, request_queue, response_queue, timeout, consumer, subscriber, ): super(DocumentRagRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, request_queue=request_queue, response_queue=response_queue, request_schema=DocumentRagQuery, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/embeddings.py b/trustgraph-flow/trustgraph/gateway/dispatch/embeddings.py index 47146e57..6c1b55ba 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/embeddings.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/embeddings.py @@ -6,12 +6,12 @@ from . requestor import ServiceRequestor class EmbeddingsRequestor(ServiceRequestor): def __init__( - self, pulsar_client, request_queue, response_queue, timeout, + self, backend, request_queue, response_queue, timeout, consumer, subscriber, ): super(EmbeddingsRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, request_queue=request_queue, response_queue=response_queue, request_schema=EmbeddingsRequest, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/entity_contexts_export.py b/trustgraph-flow/trustgraph/gateway/dispatch/entity_contexts_export.py index 2be9c703..c03bdda6 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/entity_contexts_export.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/entity_contexts_export.py @@ -15,12 +15,12 @@ logger = logging.getLogger(__name__) class EntityContextsExport: def __init__( - self, ws, running, pulsar_client, queue, consumer, subscriber + self, ws, running, backend, queue, consumer, subscriber ): self.ws = ws self.running = running - self.pulsar_client = pulsar_client + self.backend = backend self.queue = queue self.consumer = consumer self.subscriber = subscriber @@ -48,9 +48,9 @@ class EntityContextsExport: async def run(self): """Enhanced run with better error handling""" self.subs = Subscriber( - client = self.pulsar_client, + backend = self.backend, topic = self.queue, - consumer_name = self.consumer, + consumer_name = self.consumer, subscription = self.subscriber, schema = EntityContexts, backpressure_strategy = "block" # Configurable diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/entity_contexts_import.py b/trustgraph-flow/trustgraph/gateway/dispatch/entity_contexts_import.py index c76f1612..6e01a5ca 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/entity_contexts_import.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/entity_contexts_import.py @@ -16,14 +16,14 @@ logger = logging.getLogger(__name__) class EntityContextsImport: def __init__( - self, ws, running, pulsar_client, queue + self, ws, running, backend, queue ): self.ws = ws self.running = running self.publisher = Publisher( - pulsar_client, topic = queue, schema = EntityContexts + backend, topic = queue, schema = EntityContexts ) async def start(self): diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/flow.py b/trustgraph-flow/trustgraph/gateway/dispatch/flow.py index cb641656..be91995d 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/flow.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/flow.py @@ -7,7 +7,7 @@ from ... messaging import TranslatorRegistry from . requestor import ServiceRequestor class FlowRequestor(ServiceRequestor): - def __init__(self, pulsar_client, consumer, subscriber, timeout=120, + def __init__(self, backend, consumer, subscriber, timeout=120, request_queue=None, response_queue=None): if request_queue is None: @@ -16,7 +16,7 @@ class FlowRequestor(ServiceRequestor): response_queue = flow_response_queue super(FlowRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, consumer_name = consumer, subscription = subscriber, request_queue=request_queue, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_export.py b/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_export.py index d4abec73..d6d7a1c5 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_export.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_export.py @@ -15,12 +15,12 @@ logger = logging.getLogger(__name__) class GraphEmbeddingsExport: def __init__( - self, ws, running, pulsar_client, queue, consumer, subscriber + self, ws, running, backend, queue, consumer, subscriber ): self.ws = ws self.running = running - self.pulsar_client = pulsar_client + self.backend = backend self.queue = queue self.consumer = consumer self.subscriber = subscriber @@ -48,9 +48,9 @@ class GraphEmbeddingsExport: async def run(self): """Enhanced run with better error handling""" self.subs = Subscriber( - client = self.pulsar_client, + backend = self.backend, topic = self.queue, - consumer_name = self.consumer, + consumer_name = self.consumer, subscription = self.subscriber, schema = GraphEmbeddings, backpressure_strategy = "block" # Configurable diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_import.py b/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_import.py index ee3d88ef..8abf5e9c 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_import.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_import.py @@ -16,14 +16,14 @@ logger = logging.getLogger(__name__) class GraphEmbeddingsImport: def __init__( - self, ws, running, pulsar_client, queue + self, ws, running, backend, queue ): self.ws = ws self.running = running self.publisher = Publisher( - pulsar_client, topic = queue, schema = GraphEmbeddings + backend, topic = queue, schema = GraphEmbeddings ) async def start(self): diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_query.py b/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_query.py index f5be06fb..a7bb1bd8 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_query.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_query.py @@ -6,12 +6,12 @@ from . requestor import ServiceRequestor class GraphEmbeddingsQueryRequestor(ServiceRequestor): def __init__( - self, pulsar_client, request_queue, response_queue, timeout, + self, backend, request_queue, response_queue, timeout, consumer, subscriber, ): super(GraphEmbeddingsQueryRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, request_queue=request_queue, response_queue=response_queue, request_schema=GraphEmbeddingsRequest, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/graph_rag.py b/trustgraph-flow/trustgraph/gateway/dispatch/graph_rag.py index a15a1aee..a0299a43 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/graph_rag.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/graph_rag.py @@ -6,12 +6,12 @@ from . requestor import ServiceRequestor class GraphRagRequestor(ServiceRequestor): def __init__( - self, pulsar_client, request_queue, response_queue, timeout, + self, backend, request_queue, response_queue, timeout, consumer, subscriber, ): super(GraphRagRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, request_queue=request_queue, response_queue=response_queue, request_schema=GraphRagQuery, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/knowledge.py b/trustgraph-flow/trustgraph/gateway/dispatch/knowledge.py index b42db648..83aefbd0 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/knowledge.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/knowledge.py @@ -10,7 +10,7 @@ from ... messaging import TranslatorRegistry from . requestor import ServiceRequestor class KnowledgeRequestor(ServiceRequestor): - def __init__(self, pulsar_client, consumer, subscriber, timeout=120, + def __init__(self, backend, consumer, subscriber, timeout=120, request_queue=None, response_queue=None): if request_queue is None: @@ -19,7 +19,7 @@ class KnowledgeRequestor(ServiceRequestor): response_queue = knowledge_response_queue super(KnowledgeRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, consumer_name = consumer, subscription = subscriber, request_queue=request_queue, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/librarian.py b/trustgraph-flow/trustgraph/gateway/dispatch/librarian.py index 8fc62d54..bbf7190e 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/librarian.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/librarian.py @@ -9,7 +9,7 @@ from ... messaging import TranslatorRegistry from . requestor import ServiceRequestor class LibrarianRequestor(ServiceRequestor): - def __init__(self, pulsar_client, consumer, subscriber, timeout=120, + def __init__(self, backend, consumer, subscriber, timeout=120, request_queue=None, response_queue=None): if request_queue is None: @@ -18,7 +18,7 @@ class LibrarianRequestor(ServiceRequestor): response_queue = librarian_response_queue super(LibrarianRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, consumer_name = consumer, subscription = subscriber, request_queue=request_queue, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/manager.py b/trustgraph-flow/trustgraph/gateway/dispatch/manager.py index d35e5525..0766e232 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/manager.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/manager.py @@ -98,9 +98,9 @@ class DispatcherWrapper: class DispatcherManager: - def __init__(self, pulsar_client, config_receiver, prefix="api-gateway", + def __init__(self, backend, config_receiver, prefix="api-gateway", queue_overrides=None): - self.pulsar_client = pulsar_client + self.backend = backend self.config_receiver = config_receiver self.config_receiver.add_handler(self) self.prefix = prefix @@ -133,12 +133,12 @@ class DispatcherManager: async def process_core_import(self, data, error, ok, request): - ci = CoreImport(self.pulsar_client) + ci = CoreImport(self.backend) return await ci.process(data, error, ok, request) async def process_core_export(self, data, error, ok, request): - ce = CoreExport(self.pulsar_client) + ce = CoreExport(self.backend) return await ce.process(data, error, ok, request) async def process_global_service(self, data, responder, params): @@ -161,7 +161,7 @@ class DispatcherManager: response_queue = self.queue_overrides[kind].get("response") dispatcher = global_dispatchers[kind]( - pulsar_client = self.pulsar_client, + backend = self.backend, timeout = 120, consumer = f"{self.prefix}-{kind}-request", subscriber = f"{self.prefix}-{kind}-request", @@ -216,7 +216,7 @@ class DispatcherManager: id = str(uuid.uuid4()) dispatcher = import_dispatchers[kind]( - pulsar_client = self.pulsar_client, + backend = self.backend, ws = ws, running = running, queue = qconfig, @@ -254,7 +254,7 @@ class DispatcherManager: id = str(uuid.uuid4()) dispatcher = export_dispatchers[kind]( - pulsar_client = self.pulsar_client, + backend = self.backend, ws = ws, running = running, queue = qconfig, @@ -296,7 +296,7 @@ class DispatcherManager: if kind in request_response_dispatchers: dispatcher = request_response_dispatchers[kind]( - pulsar_client = self.pulsar_client, + backend = self.backend, request_queue = qconfig["request"], response_queue = qconfig["response"], timeout = 120, @@ -305,7 +305,7 @@ class DispatcherManager: ) elif kind in sender_dispatchers: dispatcher = sender_dispatchers[kind]( - pulsar_client = self.pulsar_client, + backend = self.backend, queue = qconfig, ) else: diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/mcp_tool.py b/trustgraph-flow/trustgraph/gateway/dispatch/mcp_tool.py index da2a7bb0..a5f9398e 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/mcp_tool.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/mcp_tool.py @@ -6,12 +6,12 @@ from . requestor import ServiceRequestor class McpToolRequestor(ServiceRequestor): def __init__( - self, pulsar_client, request_queue, response_queue, timeout, + self, backend, request_queue, response_queue, timeout, consumer, subscriber, ): super(McpToolRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, request_queue=request_queue, response_queue=response_queue, request_schema=ToolRequest, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/nlp_query.py b/trustgraph-flow/trustgraph/gateway/dispatch/nlp_query.py index 3cf5684a..3a6314f2 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/nlp_query.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/nlp_query.py @@ -5,12 +5,12 @@ from . requestor import ServiceRequestor class NLPQueryRequestor(ServiceRequestor): def __init__( - self, pulsar_client, request_queue, response_queue, timeout, + self, backend, request_queue, response_queue, timeout, consumer, subscriber, ): super(NLPQueryRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, request_queue=request_queue, response_queue=response_queue, request_schema=QuestionToStructuredQueryRequest, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/objects_import.py b/trustgraph-flow/trustgraph/gateway/dispatch/objects_import.py index bc0c1b85..fc982b69 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/objects_import.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/objects_import.py @@ -15,14 +15,14 @@ logger = logging.getLogger(__name__) class ObjectsImport: def __init__( - self, ws, running, pulsar_client, queue + self, ws, running, backend, queue ): self.ws = ws self.running = running self.publisher = Publisher( - pulsar_client, topic = queue, schema = ExtractedObject + backend, topic = queue, schema = ExtractedObject ) async def start(self): diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/objects_query.py b/trustgraph-flow/trustgraph/gateway/dispatch/objects_query.py index 2f2535a9..fb8dc81d 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/objects_query.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/objects_query.py @@ -5,12 +5,12 @@ from . requestor import ServiceRequestor class ObjectsQueryRequestor(ServiceRequestor): def __init__( - self, pulsar_client, request_queue, response_queue, timeout, + self, backend, request_queue, response_queue, timeout, consumer, subscriber, ): super(ObjectsQueryRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, request_queue=request_queue, response_queue=response_queue, request_schema=ObjectsQueryRequest, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/prompt.py b/trustgraph-flow/trustgraph/gateway/dispatch/prompt.py index 5c316cf6..23017733 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/prompt.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/prompt.py @@ -8,12 +8,12 @@ from . requestor import ServiceRequestor class PromptRequestor(ServiceRequestor): def __init__( - self, pulsar_client, request_queue, response_queue, timeout, + self, backend, request_queue, response_queue, timeout, consumer, subscriber, ): super(PromptRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, request_queue=request_queue, response_queue=response_queue, request_schema=PromptRequest, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/requestor.py b/trustgraph-flow/trustgraph/gateway/dispatch/requestor.py index 1acac5e5..e8f0a63e 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/requestor.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/requestor.py @@ -13,7 +13,7 @@ class ServiceRequestor: def __init__( self, - pulsar_client, + backend, request_queue, request_schema, response_queue, response_schema, subscription="api-gateway", consumer_name="api-gateway", @@ -21,12 +21,12 @@ class ServiceRequestor: ): self.pub = Publisher( - pulsar_client, request_queue, + backend, request_queue, schema=request_schema, ) self.sub = Subscriber( - pulsar_client, response_queue, + backend, response_queue, subscription, consumer_name, response_schema ) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/sender.py b/trustgraph-flow/trustgraph/gateway/dispatch/sender.py index 2435cdc1..17324b19 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/sender.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/sender.py @@ -14,12 +14,12 @@ class ServiceSender: def __init__( self, - pulsar_client, + backend, queue, schema, ): self.pub = Publisher( - pulsar_client, queue, + backend, queue, schema=schema, ) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/streamer.py b/trustgraph-flow/trustgraph/gateway/dispatch/streamer.py index 54674906..9c6d4251 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/streamer.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/streamer.py @@ -13,7 +13,7 @@ class ServiceRequestor: def __init__( self, - pulsar_client, + backend, queue, schema, handler, subscription="api-gateway", consumer_name="api-gateway", @@ -21,7 +21,7 @@ class ServiceRequestor: ): self.sub = Subscriber( - pulsar_client, queue, + backend, queue, subscription, consumer_name, schema ) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/structured_diag.py b/trustgraph-flow/trustgraph/gateway/dispatch/structured_diag.py index 8dae646d..895b55be 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/structured_diag.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/structured_diag.py @@ -5,12 +5,12 @@ from . requestor import ServiceRequestor class StructuredDiagRequestor(ServiceRequestor): def __init__( - self, pulsar_client, request_queue, response_queue, timeout, + self, backend, request_queue, response_queue, timeout, consumer, subscriber, ): super(StructuredDiagRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, request_queue=request_queue, response_queue=response_queue, request_schema=StructuredDataDiagnosisRequest, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/structured_query.py b/trustgraph-flow/trustgraph/gateway/dispatch/structured_query.py index f08ef038..9a9fbb6a 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/structured_query.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/structured_query.py @@ -5,12 +5,12 @@ from . requestor import ServiceRequestor class StructuredQueryRequestor(ServiceRequestor): def __init__( - self, pulsar_client, request_queue, response_queue, timeout, + self, backend, request_queue, response_queue, timeout, consumer, subscriber, ): super(StructuredQueryRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, request_queue=request_queue, response_queue=response_queue, request_schema=StructuredQueryRequest, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/text_completion.py b/trustgraph-flow/trustgraph/gateway/dispatch/text_completion.py index d29d1918..0e77584e 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/text_completion.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/text_completion.py @@ -6,12 +6,12 @@ from . requestor import ServiceRequestor class TextCompletionRequestor(ServiceRequestor): def __init__( - self, pulsar_client, request_queue, response_queue, timeout, + self, backend, request_queue, response_queue, timeout, consumer, subscriber, ): super(TextCompletionRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, request_queue=request_queue, response_queue=response_queue, request_schema=TextCompletionRequest, diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/text_load.py b/trustgraph-flow/trustgraph/gateway/dispatch/text_load.py index 36922c89..b2562938 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/text_load.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/text_load.py @@ -11,10 +11,10 @@ from . sender import ServiceSender logger = logging.getLogger(__name__) class TextLoad(ServiceSender): - def __init__(self, pulsar_client, queue): + def __init__(self, backend, queue): super(TextLoad, self).__init__( - pulsar_client = pulsar_client, + backend = backend, queue = queue, schema = TextDocument, ) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/triples_export.py b/trustgraph-flow/trustgraph/gateway/dispatch/triples_export.py index ff91e461..69fc588d 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/triples_export.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/triples_export.py @@ -15,12 +15,12 @@ logger = logging.getLogger(__name__) class TriplesExport: def __init__( - self, ws, running, pulsar_client, queue, consumer, subscriber + self, ws, running, backend, queue, consumer, subscriber ): self.ws = ws self.running = running - self.pulsar_client = pulsar_client + self.backend = backend self.queue = queue self.consumer = consumer self.subscriber = subscriber @@ -48,9 +48,9 @@ class TriplesExport: async def run(self): """Enhanced run with better error handling""" self.subs = Subscriber( - client = self.pulsar_client, + backend = self.backend, topic = self.queue, - consumer_name = self.consumer, + consumer_name = self.consumer, subscription = self.subscriber, schema = Triples, backpressure_strategy = "block" # Configurable diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/triples_import.py b/trustgraph-flow/trustgraph/gateway/dispatch/triples_import.py index 520a9cbc..6bb46975 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/triples_import.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/triples_import.py @@ -16,14 +16,14 @@ logger = logging.getLogger(__name__) class TriplesImport: def __init__( - self, ws, running, pulsar_client, queue + self, ws, running, backend, queue ): self.ws = ws self.running = running self.publisher = Publisher( - pulsar_client, topic = queue, schema = Triples + backend, topic = queue, schema = Triples ) async def start(self): diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/triples_query.py b/trustgraph-flow/trustgraph/gateway/dispatch/triples_query.py index d2def9c1..6b306139 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/triples_query.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/triples_query.py @@ -6,12 +6,12 @@ from . requestor import ServiceRequestor class TriplesQueryRequestor(ServiceRequestor): def __init__( - self, pulsar_client, request_queue, response_queue, timeout, + self, backend, request_queue, response_queue, timeout, consumer, subscriber, ): super(TriplesQueryRequestor, self).__init__( - pulsar_client=pulsar_client, + backend=backend, request_queue=request_queue, response_queue=response_queue, request_schema=TriplesQueryRequest, diff --git a/trustgraph-flow/trustgraph/gateway/service.py b/trustgraph-flow/trustgraph/gateway/service.py index 3ebed6af..aaa6f725 100755 --- a/trustgraph-flow/trustgraph/gateway/service.py +++ b/trustgraph-flow/trustgraph/gateway/service.py @@ -10,6 +10,7 @@ import logging import os from trustgraph.base.logging import setup_logging +from trustgraph.base.pubsub import get_pubsub from . auth import Authenticator from . config.receiver import ConfigReceiver @@ -50,15 +51,8 @@ class Api: self.pulsar_listener = config.get("pulsar_listener", None) - if self.pulsar_api_key: - self.pulsar_client = pulsar.Client( - self.pulsar_host, listener_name=self.pulsar_listener, - authentication=pulsar.AuthenticationToken(self.pulsar_api_key) - ) - else: - self.pulsar_client = pulsar.Client( - self.pulsar_host, listener_name=self.pulsar_listener, - ) + # Create backend using factory + self.pubsub_backend = get_pubsub(**config) self.prometheus_url = config.get( "prometheus_url", default_prometheus_url, @@ -75,7 +69,7 @@ class Api: else: self.auth = Authenticator(allow_all=True) - self.config_receiver = ConfigReceiver(self.pulsar_client) + self.config_receiver = ConfigReceiver(self.pubsub_backend) # Build queue overrides dictionary from CLI arguments queue_overrides = {} @@ -121,7 +115,7 @@ class Api: queue_overrides["librarian"]["response"] = librarian_resp self.dispatcher_manager = DispatcherManager( - pulsar_client = self.pulsar_client, + backend = self.pubsub_backend, config_receiver = self.config_receiver, prefix = "gateway", queue_overrides = queue_overrides, @@ -174,6 +168,14 @@ def run(): help='Service identifier for logging and metrics (default: api-gateway)', ) + # Pub/sub backend selection + parser.add_argument( + '--pubsub-backend', + default=os.getenv('PUBSUB_BACKEND', 'pulsar'), + choices=['pulsar', 'mqtt'], + help='Pub/sub backend (default: pulsar, env: PUBSUB_BACKEND)', + ) + parser.add_argument( '-p', '--pulsar-host', default=default_pulsar_host, diff --git a/trustgraph-flow/trustgraph/librarian/service.py b/trustgraph-flow/trustgraph/librarian/service.py index e3ec6977..a569dea8 100755 --- a/trustgraph-flow/trustgraph/librarian/service.py +++ b/trustgraph-flow/trustgraph/librarian/service.py @@ -143,7 +143,7 @@ class Processor(AsyncProcessor): self.librarian_request_consumer = Consumer( taskgroup = self.taskgroup, - client = self.pulsar_client, + backend = self.pubsub, flow = None, topic = librarian_request_queue, subscriber = id, @@ -153,7 +153,7 @@ class Processor(AsyncProcessor): ) self.librarian_response_producer = Producer( - client = self.pulsar_client, + backend = self.pubsub, topic = librarian_response_queue, schema = LibrarianResponse, metrics = librarian_response_metrics, @@ -161,7 +161,7 @@ class Processor(AsyncProcessor): self.collection_request_consumer = Consumer( taskgroup = self.taskgroup, - client = self.pulsar_client, + backend = self.pubsub, flow = None, topic = collection_request_queue, subscriber = id, @@ -171,7 +171,7 @@ class Processor(AsyncProcessor): ) self.collection_response_producer = Producer( - client = self.pulsar_client, + backend = self.pubsub, topic = collection_response_queue, schema = CollectionManagementResponse, metrics = collection_response_metrics, @@ -183,7 +183,7 @@ class Processor(AsyncProcessor): ) self.config_request_producer = Producer( - client = self.pulsar_client, + backend = self.pubsub, topic = config_request_queue, schema = ConfigRequest, metrics = config_request_metrics, @@ -195,7 +195,7 @@ class Processor(AsyncProcessor): self.config_response_consumer = Consumer( taskgroup = self.taskgroup, - client = self.pulsar_client, + backend = self.pubsub, flow = None, topic = config_response_queue, subscriber = f"{id}-config", @@ -299,14 +299,13 @@ class Processor(AsyncProcessor): collection = processing.collection ), data = base64.b64encode(content).decode("utf-8") - ) schema = Document logger.debug(f"Submitting to queue {q}...") pub = Publisher( - self.pulsar_client, q, schema=schema + self.pubsub, q, schema=schema ) await pub.start() diff --git a/trustgraph-flow/trustgraph/retrieval/document_rag/rag.py b/trustgraph-flow/trustgraph/retrieval/document_rag/rag.py index 670d71a1..ec67a072 100755 --- a/trustgraph-flow/trustgraph/retrieval/document_rag/rag.py +++ b/trustgraph-flow/trustgraph/retrieval/document_rag/rag.py @@ -98,16 +98,16 @@ class Processor(FlowProcessor): async def send_chunk(chunk): await flow("response").send( DocumentRagResponse( - chunk=chunk, + response=chunk, end_of_stream=False, - response=None, error=None ), properties={"id": id} ) # Query with streaming enabled - full_response = await self.rag.query( + # The query returns the last chunk (not accumulated text) + final_response = await self.rag.query( v.query, user=v.user, collection=v.collection, @@ -116,12 +116,11 @@ class Processor(FlowProcessor): chunk_callback=send_chunk, ) - # Send final message with complete response + # Send final message with last chunk await flow("response").send( DocumentRagResponse( - chunk=None, + response=final_response if final_response else "", end_of_stream=True, - response=full_response, error=None ), properties={"id": id} diff --git a/trustgraph-flow/trustgraph/retrieval/graph_rag/rag.py b/trustgraph-flow/trustgraph/retrieval/graph_rag/rag.py index 565921a3..de1f0e24 100755 --- a/trustgraph-flow/trustgraph/retrieval/graph_rag/rag.py +++ b/trustgraph-flow/trustgraph/retrieval/graph_rag/rag.py @@ -141,16 +141,16 @@ class Processor(FlowProcessor): async def send_chunk(chunk): await flow("response").send( GraphRagResponse( - chunk=chunk, + response=chunk, end_of_stream=False, - response=None, error=None ), properties={"id": id} ) # Query with streaming enabled - full_response = await rag.query( + # The query will send chunks via callback AND return the complete text + final_response = await rag.query( query = v.query, user = v.user, collection = v.collection, entity_limit = entity_limit, triple_limit = triple_limit, max_subgraph_size = max_subgraph_size, @@ -159,12 +159,12 @@ class Processor(FlowProcessor): chunk_callback = send_chunk, ) - # Send final message with complete response + # Send final message - may have last chunk of content with end_of_stream=True + # (prompt service may send final chunk with text, so we pass through whatever we got) await flow("response").send( GraphRagResponse( - chunk=None, + response=final_response if final_response else "", end_of_stream=True, - response=full_response, error=None ), properties={"id": id} diff --git a/trustgraph-flow/trustgraph/rev_gateway/dispatcher.py b/trustgraph-flow/trustgraph/rev_gateway/dispatcher.py index 03e79c0d..986558ec 100644 --- a/trustgraph-flow/trustgraph/rev_gateway/dispatcher.py +++ b/trustgraph-flow/trustgraph/rev_gateway/dispatcher.py @@ -26,19 +26,19 @@ class WebSocketResponder: self.completed = True class MessageDispatcher: - - def __init__(self, max_workers: int = 10, config_receiver=None, pulsar_client=None): + + def __init__(self, max_workers: int = 10, config_receiver=None, backend=None): self.max_workers = max_workers self.semaphore = asyncio.Semaphore(max_workers) self.active_tasks = set() - self.pulsar_client = pulsar_client - + self.backend = backend + # Use DispatcherManager for flow and service management - if pulsar_client and config_receiver: - self.dispatcher_manager = DispatcherManager(pulsar_client, config_receiver, prefix="rev-gateway") + if backend and config_receiver: + self.dispatcher_manager = DispatcherManager(backend, config_receiver, prefix="rev-gateway") else: self.dispatcher_manager = None - logger.warning("No pulsar_client or config_receiver provided - using fallback mode") + logger.warning("No backend or config_receiver provided - using fallback mode") # Service name mapping from websocket protocol to translator registry self.service_mapping = { @@ -78,7 +78,7 @@ class MessageDispatcher: try: if not self.dispatcher_manager: - raise RuntimeError("DispatcherManager not available - pulsar_client and config_receiver required") + raise RuntimeError("DispatcherManager not available - backend and config_receiver required") # Use DispatcherManager for flow-based processing responder = WebSocketResponder() diff --git a/trustgraph-flow/trustgraph/rev_gateway/service.py b/trustgraph-flow/trustgraph/rev_gateway/service.py index c8e78af2..cc905172 100644 --- a/trustgraph-flow/trustgraph/rev_gateway/service.py +++ b/trustgraph-flow/trustgraph/rev_gateway/service.py @@ -7,10 +7,10 @@ import os from aiohttp import ClientSession, WSMsgType, ClientWebSocketResponse from typing import Optional from urllib.parse import urlparse, urlunparse -import pulsar from .dispatcher import MessageDispatcher from ..gateway.config.receiver import ConfigReceiver +from ..base import get_pubsub logger = logging.getLogger("rev_gateway") logger.setLevel(logging.INFO) @@ -56,25 +56,20 @@ class ReverseGateway: self.pulsar_host = pulsar_host or os.getenv("PULSAR_HOST", "pulsar://pulsar:6650") self.pulsar_api_key = pulsar_api_key or os.getenv("PULSAR_API_KEY", None) self.pulsar_listener = pulsar_listener - - # Initialize Pulsar client - if self.pulsar_api_key: - self.pulsar_client = pulsar.Client( - self.pulsar_host, - listener_name=self.pulsar_listener, - authentication=pulsar.AuthenticationToken(self.pulsar_api_key) - ) - else: - self.pulsar_client = pulsar.Client( - self.pulsar_host, - listener_name=self.pulsar_listener - ) - + + # Create backend using factory + backend_params = { + 'pulsar_host': self.pulsar_host, + 'pulsar_api_key': self.pulsar_api_key, + 'pulsar_listener': self.pulsar_listener, + } + self.backend = get_pubsub(**backend_params) + # Initialize config receiver - self.config_receiver = ConfigReceiver(self.pulsar_client) - - # Initialize dispatcher with config_receiver and pulsar_client - must be created after config_receiver - self.dispatcher = MessageDispatcher(max_workers, self.config_receiver, self.pulsar_client) + self.config_receiver = ConfigReceiver(self.backend) + + # Initialize dispatcher with config_receiver and backend - must be created after config_receiver + self.dispatcher = MessageDispatcher(max_workers, self.config_receiver, self.backend) async def connect(self) -> bool: try: @@ -170,10 +165,10 @@ class ReverseGateway: self.running = False await self.dispatcher.shutdown() await self.disconnect() - - # Close Pulsar client - if hasattr(self, 'pulsar_client'): - self.pulsar_client.close() + + # Close backend + if hasattr(self, 'backend'): + self.backend.close() def stop(self): self.running = False diff --git a/trustgraph-flow/trustgraph/storage/objects/cassandra/write.py b/trustgraph-flow/trustgraph/storage/objects/cassandra/write.py index c5b8af06..05b8100d 100644 --- a/trustgraph-flow/trustgraph/storage/objects/cassandra/write.py +++ b/trustgraph-flow/trustgraph/storage/objects/cassandra/write.py @@ -78,7 +78,7 @@ class Processor(FlowProcessor): # Create storage management consumer self.storage_request_consumer = Consumer( taskgroup=self.taskgroup, - client=self.pulsar_client, + backend=self.pubsub, flow=None, topic=object_storage_management_topic, subscriber=f"{id}-storage", @@ -89,7 +89,7 @@ class Processor(FlowProcessor): # Create storage management response producer self.storage_response_producer = Producer( - client=self.pulsar_client, + backend=self.pubsub, topic=storage_management_response_topic, schema=StorageManagementResponse, metrics=storage_response_metrics, diff --git a/trustgraph-flow/trustgraph/tables/library.py b/trustgraph-flow/trustgraph/tables/library.py index b5465f90..0a7c6081 100644 --- a/trustgraph-flow/trustgraph/tables/library.py +++ b/trustgraph-flow/trustgraph/tables/library.py @@ -338,7 +338,6 @@ class LibraryTableStore: for m in row[5] ], tags = row[6] if row[6] else [], - object_id = row[7], ) for row in resp ] @@ -384,7 +383,6 @@ class LibraryTableStore: for m in row[4] ], tags = row[5] if row[5] else [], - object_id = row[6], ) logger.debug("Done")