From 4fb0b4d8e80abd766e0b94e95b234c7a7117d18f Mon Sep 17 00:00:00 2001 From: cybermaggedon Date: Wed, 1 Apr 2026 20:16:53 +0100 Subject: [PATCH] Pub/sub abstraction: decouple from Pulsar (#751) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove Pulsar-specific concepts from application code so that the pub/sub backend is swappable via configuration. Rename translators: - to_pulsar/from_pulsar → decode/encode across all translator classes, dispatch handlers, and tests (55+ files) - from_response_with_completion → encode_with_completion - Remove pulsar.schema.Record from translator base class Queue naming (CLASS:TOPICSPACE:TOPIC): - Replace topic() helper with queue() using new format: flow:tg:name, request:tg:name, response:tg:name, state:tg:name - Queue class implies persistence/TTL (no QoS in names) - Update Pulsar backend map_topic() to parse new format - Librarian queues use flow class (persistent, for chunking) - Config push uses state class (persistent, last-value) - Remove 15 dead topic imports from schema files - Update init_trustgraph.py namespace: config → state Confine Pulsar to pulsar_backend.py: - Delete legacy PulsarClient class from pubsub.py - Move add_args to add_pubsub_args() with standalone flag for CLI tools (defaults to localhost) - PulsarBackendConsumer.receive() catches _pulsar.Timeout, raises standard TimeoutError - Remove Pulsar imports from: async_processor, flow_processor, log_level, all 11 client files, 4 storage writers, gateway service, gateway config receiver - Remove log_level/LoggerLevel from client API - Rewrite tg-monitor-prompts to use backend abstraction - Update tg-dump-queues to use add_pubsub_args Also: pubsub-abstraction.md tech spec covering problem statement, design goals, as-is requirements, candidate broker assessment, approach, and implementation order. --- docs/tech-specs/pubsub-abstraction.md | 551 ++++++++++++++++++ .../test_document_embeddings_contract.py | 40 +- .../test_translator_completion_flags.py | 24 +- .../test_sync_document_embeddings_client.py | 4 +- .../unit/test_gateway/test_dispatch_config.py | 8 +- .../test_streaming_translators.py | 56 +- .../test_text_document_translator.py | 12 +- tests/unit/test_pubsub/test_queue_naming.py | 133 +++++ tests/unit/test_rdf/test_rdf_wire_format.py | 110 ++-- .../test_metadata_preservation.py | 30 +- .../test_message_translation.py | 16 +- trustgraph-base/trustgraph/base/__init__.py | 2 +- .../trustgraph/base/async_processor.py | 17 +- .../trustgraph/base/flow_processor.py | 2 - trustgraph-base/trustgraph/base/pubsub.py | 118 ++-- .../trustgraph/base/pulsar_backend.py | 47 +- .../trustgraph/clients/agent_client.py | 8 +- trustgraph-base/trustgraph/clients/base.py | 14 +- .../trustgraph/clients/config_client.py | 8 +- .../clients/document_embeddings_client.py | 8 +- .../trustgraph/clients/document_rag_client.py | 7 - .../trustgraph/clients/embeddings_client.py | 9 +- .../clients/graph_embeddings_client.py | 8 +- .../trustgraph/clients/graph_rag_client.py | 7 - .../trustgraph/clients/llm_client.py | 8 +- .../trustgraph/clients/prompt_client.py | 8 +- .../clients/row_embeddings_client.py | 8 +- .../clients/triples_query_client.py | 8 +- trustgraph-base/trustgraph/log_level.py | 10 +- .../trustgraph/messaging/translators/agent.py | 12 +- .../trustgraph/messaging/translators/base.py | 47 +- .../messaging/translators/collection.py | 8 +- .../messaging/translators/config.py | 12 +- .../messaging/translators/diagnosis.py | 12 +- .../messaging/translators/document_loading.py | 16 +- .../messaging/translators/embeddings.py | 12 +- .../messaging/translators/embeddings_query.py | 38 +- .../trustgraph/messaging/translators/flow.py | 12 +- .../messaging/translators/knowledge.py | 24 +- .../messaging/translators/library.py | 26 +- .../messaging/translators/metadata.py | 12 +- .../messaging/translators/nlp_query.py | 12 +- .../messaging/translators/primitives.py | 56 +- .../messaging/translators/prompt.py | 12 +- .../messaging/translators/retrieval.py | 24 +- .../messaging/translators/rows_query.py | 12 +- .../messaging/translators/structured_query.py | 12 +- .../messaging/translators/text_completion.py | 12 +- .../trustgraph/messaging/translators/tool.py | 12 +- .../messaging/translators/triples.py | 26 +- .../trustgraph/schema/core/topic.py | 31 +- .../trustgraph/schema/knowledge/document.py | 1 - .../trustgraph/schema/knowledge/embeddings.py | 1 - .../trustgraph/schema/knowledge/graph.py | 1 - .../trustgraph/schema/knowledge/knowledge.py | 10 +- .../trustgraph/schema/knowledge/nlp.py | 1 - .../trustgraph/schema/knowledge/object.py | 1 - .../trustgraph/schema/knowledge/rows.py | 1 - .../trustgraph/schema/knowledge/structured.py | 1 - .../trustgraph/schema/services/agent.py | 1 - .../trustgraph/schema/services/collection.py | 10 +- .../trustgraph/schema/services/config.py | 14 +- .../trustgraph/schema/services/flow.py | 10 +- .../trustgraph/schema/services/library.py | 10 +- .../trustgraph/schema/services/llm.py | 1 - .../trustgraph/schema/services/lookup.py | 1 - .../trustgraph/schema/services/nlp_query.py | 1 - .../trustgraph/schema/services/prompt.py | 1 - .../trustgraph/schema/services/query.py | 18 +- .../trustgraph/schema/services/retrieval.py | 1 - .../trustgraph/schema/services/rows_query.py | 1 - .../schema/services/structured_query.py | 1 - trustgraph-cli/trustgraph/cli/dump_queues.py | 51 +- .../trustgraph/cli/init_trustgraph.py | 2 +- .../trustgraph/cli/monitor_prompts.py | 94 ++- .../trustgraph/extract/kg/rows/processor.py | 2 +- .../trustgraph/gateway/config/receiver.py | 1 - .../trustgraph/gateway/dispatch/agent.py | 4 +- .../gateway/dispatch/collection_management.py | 4 +- .../trustgraph/gateway/dispatch/config.py | 4 +- .../dispatch/document_embeddings_import.py | 2 +- .../dispatch/document_embeddings_query.py | 4 +- .../gateway/dispatch/document_load.py | 2 +- .../gateway/dispatch/document_rag.py | 4 +- .../trustgraph/gateway/dispatch/embeddings.py | 4 +- .../trustgraph/gateway/dispatch/flow.py | 4 +- .../dispatch/graph_embeddings_query.py | 4 +- .../trustgraph/gateway/dispatch/graph_rag.py | 4 +- .../trustgraph/gateway/dispatch/knowledge.py | 4 +- .../trustgraph/gateway/dispatch/librarian.py | 4 +- .../trustgraph/gateway/dispatch/mcp_tool.py | 4 +- .../trustgraph/gateway/dispatch/nlp_query.py | 4 +- .../trustgraph/gateway/dispatch/prompt.py | 4 +- .../gateway/dispatch/row_embeddings_query.py | 4 +- .../trustgraph/gateway/dispatch/rows_query.py | 4 +- .../trustgraph/gateway/dispatch/serialize.py | 8 +- .../gateway/dispatch/structured_diag.py | 4 +- .../gateway/dispatch/structured_query.py | 4 +- .../gateway/dispatch/text_completion.py | 4 +- .../trustgraph/gateway/dispatch/text_load.py | 2 +- .../gateway/dispatch/triples_query.py | 4 +- trustgraph-flow/trustgraph/gateway/service.py | 1 - .../storage/triples/cassandra/write.py | 1 - .../storage/triples/falkordb/write.py | 1 - .../storage/triples/memgraph/write.py | 1 - .../trustgraph/storage/triples/neo4j/write.py | 1 - 106 files changed, 1269 insertions(+), 788 deletions(-) create mode 100644 docs/tech-specs/pubsub-abstraction.md create mode 100644 tests/unit/test_pubsub/test_queue_naming.py diff --git a/docs/tech-specs/pubsub-abstraction.md b/docs/tech-specs/pubsub-abstraction.md new file mode 100644 index 00000000..722b3b47 --- /dev/null +++ b/docs/tech-specs/pubsub-abstraction.md @@ -0,0 +1,551 @@ +# Pub/Sub Abstraction: Broker-Independent Messaging + +## Problem + +TrustGraph's messaging infrastructure is deeply coupled to Apache Pulsar in ways that go beyond the transport layer. This coupling creates several concrete problems. + +### 1. Schema system is Pulsar-native + +Every message type in the system is defined as a `pulsar.schema.Record` subclass using Pulsar field types (`String()`, `Integer()`, `Boolean()`, etc.). This means: + +- The `pulsar` Python package is a build dependency for `trustgraph-base`, even though `trustgraph-base` contains no transport logic +- Any code that imports a message schema transitively depends on Pulsar +- The schema definitions cannot be reused with a different broker without the Pulsar library installed +- What's actually happening on the wire is JSON serialisation — the Pulsar schema machinery adds complexity without adding value over plain JSON encode/decode + +### 2. Translators are named after the broker + +The translator layer that converts between internal Python objects and wire format uses methods called `to_pulsar()` and `from_pulsar()`. These are really just JSON encode/decode operations — they have nothing to do with Pulsar specifically. The naming creates a false impression that the translation is broker-specific, when in reality any broker that carries JSON payloads would use identical logic. + +### 3. Queue names use Pulsar URI format + +Queue identifiers throughout the codebase use Pulsar's `persistent://tenant/namespace/topic` or `non-persistent://tenant/namespace/topic` URI format. These are hardcoded in schema definitions and referenced across services. RabbitMQ, Redis Streams, or any other broker would use completely different naming conventions. There is no abstraction between the logical identity of a queue and its broker-specific address. + +### 4. Broker selection is not configurable + +There is no mechanism to select a different pub/sub backend at deployment time. The Pulsar client is instantiated directly in the gateway and via `PulsarClient` in the base processor. Switching to a different broker would require code changes across multiple packages, not a configuration change. + +### 5. Architectural requirements are implicit + +TrustGraph relies on specific pub/sub behaviours — shared subscriptions for load balancing, message acknowledgement for reliability, message properties for correlation — but these requirements are not documented. This makes it difficult to evaluate whether a candidate broker (RabbitMQ, Redis Streams, NATS, etc.) actually satisfies the system's needs, or where the gaps would be. + +## Design Goals + +### Goal 1: Remove the link between Pulsar schemas and application code + +Message types should be plain Python objects (dataclasses) that know how to serialise to and from JSON. The `pulsar.schema.Record` base class and Pulsar field types should not appear in schema definitions. The pub/sub transport layer sends and receives JSON bytes; the schema layer handles the mapping between JSON and typed Python objects independently. + +### Goal 2: Remove `to_pulsar` / `from_pulsar` naming + +The translator methods should reflect what they actually do: encode a Python object to a JSON-compatible dict, and decode a JSON-compatible dict back to a Python object. The naming should be broker-neutral (e.g. `encode` / `decode`, or `to_dict` / `from_dict`). + +### Goal 3: Schema objects provide encode/decode + +Each message type should be a Python dataclass (or similar) with a well-defined mapping to and from JSON. For example: + +```python +@dataclass +class TextCompletionRequest: + system: str + prompt: str + streaming: bool = False +``` + +Given `{"system": "You are helpful", "prompt": "Hello", "streaming": false}` on the wire, decoding produces an object where `request.system` is `"You are helpful"`, `request.prompt` is `"Hello"`, and `request.streaming` is `False`. Encoding does the reverse. This is the schema's concern, not the broker's. + +### Goal 4: Abstract queue naming + +Queue identifiers should not use Pulsar URI format (`persistent://tg/flow/topic`). A broker-neutral naming scheme is needed so that each backend can map logical queue names to its native format. The right approach here is not yet clear and needs to be worked through — considerations include how to express quality-of-service, multi-tenancy, and namespace separation without leaking broker concepts. + +### Goal 5: Document pub/sub architectural requirements + +TrustGraph's actual requirements from the pub/sub layer need to be formally specified. This includes: + +- **Delivery semantics**: Which queues need at-least-once delivery? Are any fire-and-forget? +- **Consumer patterns**: Shared subscriptions (competing consumers for load balancing), exclusive subscriptions, fan-out/broadcast +- **Message acknowledgement**: Positive ack, negative ack (redelivery), timeout-based redelivery +- **Message properties**: Key-value metadata on messages used for correlation (e.g. request IDs, flow routing) +- **Ordering guarantees**: Per-topic ordering, per-key ordering, or no ordering required +- **Message size**: Typical and maximum message sizes (some payloads include base64-encoded documents) +- **Persistence**: Which messages must survive broker restarts +- **Consumer positioning**: Ability to consume from earliest (replay) vs latest (live tail) +- **Connection model**: Long-lived connections with reconnection, or transient + +Documenting these requirements makes it possible to evaluate RabbitMQ or any other candidate against concrete criteria rather than discovering gaps during implementation. + +## Pub/Sub Architectural Requirements (As-Is) + +This section documents what TrustGraph currently needs from its pub/sub layer. These are the as-is requirements — some may be revisited or relaxed in a future design if it makes broker portability easier. + +### Consumer model + +All consumers use **shared subscriptions** (competing consumers). Multiple instances of the same processor read from the same subscription, and each message is delivered to exactly one instance. This is the load-balancing mechanism. + +No exclusive or failover subscriptions are used anywhere in the codebase, despite infrastructure support for them. + +Consumers support configurable concurrency — multiple async tasks within a single process can independently call `receive()` on the same subscription. + +### Delivery semantics + +Almost all queues are **non-persistent / best-effort (q0)**. The only persistent queue is `config_push_queue` (q2, exactly-once), which pushes full configuration state to processors. Since config pushes are idempotent (full state, not deltas), the persistence requirement here is about surviving broker restarts, not about exactly-once semantics per se. + +Flow processing queues (request/response pairs for LLM, RAG, agent, etc.) are all non-persistent. Messages in flight are lost on broker restart. This is acceptable because: + +- Requests originate from a client that will time out and retry +- There is no durable work-in-progress that would be corrupted by message loss +- The system is designed for real-time query processing, not batch pipelines + +### Message acknowledgement + +**Positive acknowledgement**: After successful handler execution, the message is acknowledged. This removes it from the subscription. + +**Negative acknowledgement**: On handler failure (unhandled exception or rate-limit timeout), the message is negatively acknowledged, which triggers redelivery by the broker. Rate-limited messages retry for up to 7200 seconds before giving up and negatively acknowledging. + +**Orphaned messages**: In the request-response subscriber pattern, messages that arrive with no matching waiter (e.g. the requester timed out) are positively acknowledged and discarded. This prevents redelivery storms. + +### Message properties + +Messages carry a small set of key-value string properties as metadata, separate from the payload. The primary use is a `"id"` property for request-response correlation — the requester generates a unique ID, attaches it as a property, and the responder echoes it back so the subscriber can match responses to waiters. + +Agent orchestration correlation (`correlation_id`, `parent_session_id`) is carried in the message payload, not in properties. + +### Consumer positioning + +Two modes are used: + +- **Earliest**: The configuration consumer starts from the beginning of the topic to receive full configuration history on startup. This is the only use of earliest positioning. +- **Latest** (default): All flow consumers start from the current position, processing only new messages. + +### Message ordering + +**Not required.** The codebase explicitly does not depend on message ordering: + +- Shared subscriptions distribute messages across consumers without ordering guarantees +- Concurrent handler tasks within a consumer process messages in arbitrary order +- Request-response correlation uses IDs, not positional ordering +- The supervisor fan-out/fan-in pattern collects results in a dictionary, order-independent +- Configuration pushes are full state snapshots, not ordered deltas + +### Message sizes + +Most messages are small JSON payloads (< 10KB). The exceptions: + +- **Document content**: Large documents (PDFs, text files) can be sent through the chunking service with base64 encoding. Pulsar's chunking feature (`chunking_enabled`) handles automatic splitting of oversized messages. +- **Agent observations**: LLM-generated text can be several KB but rarely exceeds typical message size limits. + +A replacement broker needs to either support large messages natively or provide a chunking/streaming mechanism. Alternatively, the large-document path could be refactored to use a side-channel (e.g. object store reference) instead of inline payload. + +### Fan-out patterns + +**Supervisor fan-out**: One supervisor request decomposes into N independent sub-agent requests, each emitted as a separate message on the agent request queue. Different agent instances pick them up via the shared subscription. A correlation ID links the completions back to the original decomposition. This is not pub/sub fan-out (one message to many consumers) — it's application-level fan-out (many messages to one queue). + +**Request-response isolation**: Each client creates a unique subscription name on response queues so it only receives its own responses. This means the response queue effectively has many independent subscribers, each seeing a filtered subset of messages based on the `"id"` property match. + +### Reconnection and resilience + +Reconnection logic lives in the Consumer/Producer/Publisher/Subscriber classes, not in the broker client. These classes handle: + +- Automatic reconnection on connection loss +- Retry loops with backoff +- Graceful shutdown (unsubscribe, close) + +The broker client itself is expected to provide a basic connection that can fail, and the wrapper classes handle recovery. This is important for the abstraction — the backend interface can be simple because resilience is handled above it. + +### Queue inventory + +| Queue | Persistence | Purpose | +|-------|-------------|---------| +| config push | Persistent (q2) | Full configuration state broadcast | +| config request/response | Non-persistent | Configuration queries | +| flow request/response | Non-persistent | Flow management | +| knowledge request/response | Non-persistent | Knowledge graph operations | +| librarian request/response | Non-persistent | Document storage operations | +| document embeddings request/response | Non-persistent | Document vector queries | +| row embeddings request/response | Non-persistent | Row vector queries | +| collection request/response | Non-persistent | Collection management | + +Additionally, each processing service (LLM, RAG, agent, prompt, embeddings, etc.) has dynamically defined request/response queue pairs configured at deployment time. + +### Summary of hard requirements for a replacement broker + +1. **Shared subscription / competing consumers** — multiple consumers on one queue, each message delivered to exactly one +2. **Message acknowledgement** — positive ack (remove from queue) and negative ack (trigger redelivery) +3. **Message properties** — key-value metadata on messages, at minimum a string `"id"` field +4. **Two consumer start positions** — from beginning of topic and from current position +5. **Persistence for at least one queue** — config state must survive broker restart +6. **Messages up to several MB** — or a chunking mechanism for large payloads +7. **No ordering requirement** — simplifies broker selection significantly + +## Candidate Brokers + +A quick assessment of alternatives against the hard requirements above. + +### RabbitMQ + +The primary candidate. Mature, widely deployed, well understood. + +- **Competing consumers**: Yes — multiple consumers on a queue, round-robin delivery. This is RabbitMQ's native model. +- **Acknowledgement**: Yes — `basic.ack` and `basic.nack` with requeue flag. +- **Message properties**: Yes — headers and properties on every message. The `correlation_id` and `message_id` fields are first-class concepts. +- **Consumer positioning**: Yes, via RabbitMQ Streams (3.9+). Streams are append-only logs that support reading from any offset — beginning, end, or timestamp. Classic queues are consumed destructively (no replay), but streams solve this cleanly. The `state` queue class maps to a RabbitMQ stream. Additionally, the Last Value Cache Exchange plugin can retain the most recent message per routing key for new consumers. +- **Persistence**: Yes — durable queues and persistent messages survive broker restart. +- **Large messages**: No hard limit but not designed for very large payloads. Practical limit around 128MB with default config. Adequate for current use. +- **Ordering**: FIFO per queue (stronger than required). +- **Operational complexity**: Low. Single binary, no ZooKeeper/BookKeeper dependencies. Significantly simpler to operate than Pulsar. +- **Ecosystem**: Excellent client libraries, management UI, mature tooling. + +**Gaps**: None significant. RabbitMQ Streams cover the replay/earliest positioning requirement. + +### Apache Kafka + +High-throughput distributed log. More infrastructure than TrustGraph likely needs. + +- **Competing consumers**: Yes — consumer groups with partition assignment. +- **Acknowledgement**: Yes — offset commits. No per-message negative ack; failed messages require application-level retry or dead-letter handling. +- **Message properties**: Yes — message headers (key-value byte arrays). +- **Consumer positioning**: Yes — seek to earliest or latest offset. Supports full replay. +- **Persistence**: Yes — all messages are persisted to the log by default. +- **Large messages**: Configurable (`max.message.bytes`), default 1MB, can be increased. Large payloads are discouraged by design. +- **Ordering**: Per-partition ordering (stronger than required). +- **Operational complexity**: High. Requires ZooKeeper (or KRaft), partition management, replication config. Overkill for typical TrustGraph deployments. +- **Ecosystem**: Excellent client libraries, schema registry, Connect framework. + +**Gaps**: No native negative acknowledgement. Operational complexity is high for small-to-medium deployments. Partition count must be planned upfront for parallelism. + +### Redis Streams + +Lightweight option using Redis as a message broker. + +- **Competing consumers**: Yes — consumer groups with `XREADGROUP`. +- **Acknowledgement**: Yes — `XACK`. Pending entries list tracks unacknowledged messages. No explicit negative ack but unacknowledged messages can be claimed after timeout via `XAUTOCLAIM`. +- **Message properties**: No native separation between properties and payload. Would need to encode properties as fields within the stream entry or in the payload. +- **Consumer positioning**: Yes — `0` (earliest) or `$` (latest) on group creation. +- **Persistence**: Yes — Redis persistence (RDB/AOF), though Redis is primarily an in-memory system. +- **Large messages**: Practical limit tied to Redis memory. Not suited for large payloads. +- **Ordering**: Per-stream ordering (stronger than required). +- **Operational complexity**: Low if Redis is already in the stack. No additional infrastructure. + +**Gaps**: No native message properties. Memory-bound. Persistence depends on Redis configuration. Not a natural fit for message broker patterns. + +### NATS / NATS JetStream + +Lightweight, high-performance messaging. JetStream adds persistence. + +- **Competing consumers**: Yes — queue groups in core NATS; consumer groups in JetStream. +- **Acknowledgement**: JetStream only — `Ack`, `Nak` (with redelivery), `InProgress` (extend timeout). +- **Message properties**: Yes — message headers (key-value). +- **Consumer positioning**: JetStream — deliver all, deliver last, deliver new, deliver by sequence/time. +- **Persistence**: JetStream only. Core NATS is fire-and-forget. +- **Large messages**: Default 1MB, configurable up to 64MB. +- **Ordering**: Per-subject ordering. +- **Operational complexity**: Very low. Single binary, no dependencies. Clustering is straightforward. + +**Gaps**: Requires JetStream for persistence and acknowledgement. Smaller ecosystem than RabbitMQ/Kafka. + +### Assessment Summary + +| Requirement | RabbitMQ | Kafka | Redis Streams | NATS JetStream | +|---|---|---|---|---| +| Competing consumers | Yes | Yes | Yes | Yes | +| Positive/negative ack | Yes | Partial | Partial | Yes | +| Message properties | Yes | Yes | No | Yes | +| Earliest positioning | Yes (Streams) | Yes | Yes | Yes | +| Persistence | Yes | Yes | Partial | Yes | +| Large messages | Yes | Configurable | No | Configurable | +| Operational simplicity | Good | Poor | Good | Good | + +**RabbitMQ** is the strongest candidate given TrustGraph's requirements and deployment profile. The only gap (earliest consumer positioning for config) has known workarounds. Operational simplicity is a significant advantage over Pulsar. + +## Approach + +### Current state + +The codebase has already undergone a partial abstraction. The picture is better than the problem statement might suggest: + +- **Backend abstraction exists**: `backend.py` defines Protocol-based interfaces (`PubSubBackend`, `BackendProducer`, `BackendConsumer`, `Message`). The Pulsar implementation lives in `pulsar_backend.py`. +- **Schemas are already dataclasses**: Message types in `schema/services/*.py` are plain Python dataclasses with type hints, not Pulsar `Record` subclasses. This was the hardest part of the old spec and it's done. +- **Serialization is JSON-based**: `pulsar_backend.py` contains `dataclass_to_dict()` and `dict_to_dataclass()` helpers that handle the round-trip. The wire format is JSON. +- **Factory pattern exists**: `pubsub.py` has `get_pubsub()` which creates a backend from configuration. Currently only Pulsar is implemented. +- **Consumer/Producer/Publisher/Subscriber are backend-agnostic**: These classes accept a `backend` parameter and delegate transport operations to it. They own retry, reconnection, metrics, and concurrency. + +What remains is cleanup, not a rewrite. + +### What needs to change + +#### 1. Rename translator methods + +The translator base class (`messaging/translators/base.py`) defines `to_pulsar()` and `from_pulsar()` as abstract methods. Every translator implements these. The methods convert between external API dicts and internal dataclass objects — nothing Pulsar-specific happens in them. + +**Change**: Rename to `decode()` (external dict → dataclass) and `encode()` (dataclass → external dict). Update all translator subclasses and all call sites. + +This is a mechanical rename. The method bodies don't change. + +#### 2. Rename translator base classes + +The base classes `Translator`, `MessageTranslator`, and `SendTranslator` reference "pulsar" in docstrings and parameter names. Clean these up so the naming reflects what the layer actually does: translating between the external API representation (JSON dicts from HTTP/WebSocket) and the internal schema (dataclasses). + +#### 3. Move serialization out of the Pulsar backend + +`dataclass_to_dict()` and `dict_to_dataclass()` currently live in `pulsar_backend.py` but are not Pulsar-specific. They handle the conversion between dataclasses and JSON-compatible dicts, which every backend needs. + +**Change**: Move these to a shared location (e.g. `trustgraph/base/serialization.py` or alongside the schema definitions). The backend interface sends and receives dicts; serialization to/from dataclasses happens at a layer above. + +This means the backend Protocol simplifies: `send()` accepts a dict and properties, `value()` returns a dict. The Consumer/Producer layer handles dataclass ↔ dict conversion using the shared serializers. + +#### 4. Abstract queue naming + +Queue names currently use the format `q0/tg/flow/queue-name` or `q2/tg/config/queue-name`, which the Pulsar backend maps to `non-persistent://tg/flow/queue-name` or `persistent://tg/config/queue-name`. + +This is an open design question. Options: + +**Option A: Simple string names.** Queues are just strings like `"text-completion-request"`. The backend is responsible for mapping to its native format (Pulsar adds `persistent://tg/flow/` prefix, RabbitMQ uses the string as-is or adds a vhost prefix). Persistence and namespace are configuration concerns, not embedded in the name. + +**Option B: Structured queue descriptor.** A small object that carries the logical name plus metadata: + +```python +@dataclass +class QueueDescriptor: + name: str # e.g. "text-completion-request" + namespace: str = "flow" # logical grouping + persistent: bool = False # must survive broker restart +``` + +The backend maps this to its native format. + +**Option C: Keep the current format** (`q0/tg/flow/name`) but document it as a TrustGraph convention, not a Pulsar convention. Backends parse it. + +Option B is the most explicit. Option A is the simplest. Either is workable. The key constraint is that persistence is a property of the queue definition, not a runtime choice — the config push queue is persistent, everything else is not. + +#### 5. Implement RabbitMQ backend + +Write `rabbitmq_backend.py` implementing the `PubSubBackend` Protocol: + +- **`create_producer()`**: Creates a channel and declares the target queue. `send()` publishes to the default exchange with the queue name as routing key. Properties map to AMQP basic properties (specifically `message_id` for the `"id"` property). +- **`create_consumer()`**: Declares the queue and starts consuming with `basic_consume`. Shared subscription is the default RabbitMQ model — multiple consumers on one queue get round-robin delivery. `acknowledge()` maps to `basic_ack`, `negative_acknowledge()` maps to `basic_nack` with `requeue=True`. +- **Persistence**: For persistent queues, declare as durable with `delivery_mode=2` on messages. For non-persistent queues, declare as non-durable. +- **Consumer positioning**: RabbitMQ queues are consumed destructively, so "earliest" doesn't apply in the Pulsar sense. For the config push use case, use a **fanout exchange with per-consumer exclusive queues** — each new processor gets its own queue that receives all config publishes, plus the last-value can be handled by having the config service re-publish on startup. +- **Large messages**: RabbitMQ handles messages up to `rabbit.max_message_size` (default 128MB). No chunking needed. + +The factory in `pubsub.py` gets a new branch: + +```python +if backend_type == 'rabbitmq': + return RabbitMQBackend( + host=config.get('rabbitmq_host'), + port=config.get('rabbitmq_port'), + username=config.get('rabbitmq_username'), + password=config.get('rabbitmq_password'), + vhost=config.get('rabbitmq_vhost', '/'), + ) +``` + +Backend selection via `PUBSUB_BACKEND=rabbitmq` environment variable or `--pubsub-backend rabbitmq` CLI flag. + +#### 6. Clean up remaining Pulsar references + +After the above changes, Pulsar-specific code should be confined to: + +- `pulsar_backend.py` — the Pulsar implementation +- `pubsub.py` — the factory that imports it + +Audit and remove any remaining Pulsar imports, Pulsar exception handling, or Pulsar-specific concepts from: + +- `async_processor.py` (currently catches `_pulsar.Interrupted`) +- `consumer.py`, `subscriber.py` (if any Pulsar exceptions leak through) +- Schema files (should be clean already, but verify) +- Gateway service (currently instantiates Pulsar client directly) + +The gateway is a special case — it currently bypasses the abstraction layer and creates a Pulsar client directly for dispatching API requests. It should use the same `get_pubsub()` factory as everything else. + +### What stays the same + +- **Schema definitions**: Already dataclasses. No changes needed. +- **Consumer/Producer/Publisher/Subscriber**: Already backend-agnostic. No changes to their core logic. +- **FlowProcessor and spec wiring**: Already uses `processor.pubsub` to create backend instances. No changes. +- **Backend Protocol**: The interface in `backend.py` is sound. Minor refinement possible (dict vs dataclass at the boundary) but the shape is right. + +### Concrete cleanups + +The following files have Pulsar-specific imports that should not be there after the abstraction is complete. Pulsar imports should be confined to `pulsar_backend.py` and the factory in `pubsub.py`. + +**Dead imports (unused, can just be removed):** + +- `trustgraph-base/trustgraph/base/pubsub.py` — `from pulsar.schema import JsonSchema`, `import pulsar`, `import _pulsar`. The `JsonSchema` import is unused since the switch to `BytesSchema`. The `pulsar`/`_pulsar` imports are only used by the legacy `PulsarClient` class which should be removed (superseded by `PulsarBackend`). +- `trustgraph-base/trustgraph/base/flow_processor.py` — `from pulsar.schema import JsonSchema`. Unused. + +**Legacy `PulsarClient` class:** + +- `trustgraph-base/trustgraph/base/pubsub.py` — The `PulsarClient` class is a leftover from before the backend abstraction. `get_pubsub()` still references `PulsarClient.default_pulsar_host` for defaults. Move the defaults to `PulsarBackend` or to environment variable reads in the factory, then delete `PulsarClient`. + +**Client libraries using Pulsar directly:** + +- `trustgraph-base/trustgraph/clients/base.py` — `import pulsar`, `import _pulsar`, `from pulsar.schema import JsonSchema`. This is the base class for the old synchronous client library. These clients predate the backend abstraction and use Pulsar directly. +- `trustgraph-base/trustgraph/clients/embeddings_client.py` — `from pulsar.schema import JsonSchema`, `import _pulsar`. +- `trustgraph-base/trustgraph/clients/*.py` (agent, config, document_embeddings, document_rag, graph_embeddings, graph_rag, llm, prompt, row_embeddings, triples_query) — all import `_pulsar` for exception handling. + +These clients are the internal request-response clients used by processors. They need to be migrated to use the backend abstraction or their Pulsar exception handling needs to be wrapped behind a backend-agnostic exception type. + +**Translator base class:** + +- `trustgraph-base/trustgraph/messaging/translators/base.py` — `from pulsar.schema import Record`. Used in type hints. Should be removed when `to_pulsar`/`from_pulsar` are renamed. + +**Gateway service (bypasses abstraction):** + +- `trustgraph-flow/trustgraph/gateway/service.py` — `import pulsar`. Creates a Pulsar client directly. +- `trustgraph-flow/trustgraph/gateway/config/receiver.py` — `import pulsar`. Direct Pulsar usage. + +The gateway should use `get_pubsub()` like everything else. + +**Storage writers:** + +- `trustgraph-flow/trustgraph/storage/triples/neo4j/write.py` — `import pulsar` +- `trustgraph-flow/trustgraph/storage/triples/memgraph/write.py` — `import pulsar` +- `trustgraph-flow/trustgraph/storage/triples/falkordb/write.py` — `import pulsar` +- `trustgraph-flow/trustgraph/storage/triples/cassandra/write.py` — `import pulsar` + +These need investigation — likely Pulsar exception handling or direct client usage that should go through the abstraction. + +**Log level:** + +- `trustgraph-base/trustgraph/log_level.py` — `import _pulsar`. Used to set Pulsar's log level. Should be moved into `pulsar_backend.py`. + +### Queue naming + +The current scheme encodes QoS, tenant, namespace, and queue name into a slash-separated string (`q0/tg/request/config`) which the Pulsar backend parses and maps to a Pulsar URI (`non-persistent://tg/request/config`). This was an attempt at abstraction but it has problems: + +- QoS in the name was a mistake — it's a property of the queue definition, not something that belongs in the name. A queue is either persistent or it isn't; that's decided once when the queue is defined. +- The tenant/namespace structure mirrors Pulsar's model. RabbitMQ doesn't use this — it has vhosts and exchange/queue names. Pretending the naming isn't TrustGraph-specific just leaks Pulsar concepts. +- The `topic()` helper generates these strings, and the backend parses them apart. This is unnecessary indirection. + +There are two categories of queue in TrustGraph: + +**Infrastructure queues** — defined in code, used for system services. These are fixed and well-known: + +| Queue | Persistent | Purpose | +|-------|------------|---------| +| `config-request` | No | Config queries | +| `config-response` | No | Config query responses | +| `config-push` | Yes | Config state broadcast | +| `flow-request` | No | Flow management queries | +| `flow-response` | No | Flow management responses | +| `librarian-request` | No | Document storage operations | +| `librarian-response` | No | Document storage responses | +| `knowledge-request` | No | Knowledge graph operations | +| `knowledge-response` | No | Knowledge graph responses | +| `document-embeddings-request` | No | Document vector queries | +| `document-embeddings-response` | No | Document vector responses | +| `row-embeddings-request` | No | Row vector queries | +| `row-embeddings-response` | No | Row vector responses | +| `collection-request` | No | Collection management | +| `collection-response` | No | Collection management responses | + +**Flow queues** — defined in configuration, created dynamically per flow. The queue names come from the config service (e.g. `text-completion-request`, `graph-rag-request`, `agent-request`). Each flow instance has its own set of these queues. + +For infrastructure queues, the name is just a string. Persistence is a property of the queue definition, not encoded in the name. The backend maps the name to whatever its native format requires. + +For flow queues, the name comes from configuration. The config service already distributes queue names as strings — the backend just needs to be able to use them. + +#### Proposed scheme: CLASS:TOPICSPACE:TOPIC + +A queue name has three parts separated by colons: + +- **CLASS** — a small enum that defines the queue's operational characteristics. The backend knows what each class means in terms of persistence, TTL, memory limits, etc. There are only four classes: + + | Class | Persistent | TTL | Behaviour | + |-------|------------|-----|-----------| + | `flow` | Yes | Long | Processing pipeline queues. Messages survive broker restart. | + | `request` | No | Short | Transient request-response. Low TTL, no persistence needed — clients retry on failure. | + | `response` | No | Short | Same as request, for the response side. | + | `state` | Yes | Retained | Last-value state broadcast. Consumers need the most recent value on startup, plus any future updates. Config push is the primary example. | + +- **TOPICSPACE** — deployment isolation. Keeps different TrustGraph deployments separate when sharing the same pub/sub infrastructure. Most deployments just use `tg`. Avoids the overloaded terms "tenant" and "namespace". + +- **TOPIC** — the logical queue identity. What the queue is for. + +**Examples:** + +``` +flow:tg:text-completion-request +flow:tg:graph-rag-request +flow:tg:agent-request +request:tg:librarian +response:tg:librarian +request:tg:config +response:tg:config +state:tg:config +request:tg:flow +response:tg:flow +``` + +**Backend mapping:** + +Each backend parses the three parts and maps them to its native concepts: + +- **Pulsar**: `flow:tg:text-completion-request` → `persistent://tg/flow/text-completion-request`. Class maps to persistent/non-persistent and namespace. State class uses persistent topic with earliest consumer positioning. +- **RabbitMQ**: Topicspace maps to vhost. Class determines queue durability and TTL policy. State class uses a last-value queue (via plugin) or a fanout exchange pattern where each consumer gets the retained state on connect. +- **Kafka**: `flow.tg.text-completion-request` as topic name. Class determines retention and compaction policy. State class maps to a compacted topic (last value per key). + +**Why this works:** + +- The class enum is small and stable — adding a new class is rare and deliberate +- Queue properties (persistence, TTL) are implied by class, not encoded in the name +- Dynamic registration works naturally — the config service publishes `flow:tg:text-completion-request` and the backend knows how to declare it from the `flow` class +- The colon separator is unambiguous, easy to split, doesn't conflict with URIs or path separators that backends use internally +- No pretence of being generic — this is a TrustGraph convention, and that's fine + +### Serialization boundary + +**Decision: the backend owns the wire format.** + +The contract between the Consumer/Producer layer and the backend is dataclass objects in, dataclass objects out: + +- `send()` accepts a dataclass instance and properties dict +- `receive()` returns a message whose `value()` is a dataclass instance + +What happens on the wire is the backend's concern. The Pulsar backend uses JSON (via `dataclass_to_dict` / `dict_to_dataclass`). A RabbitMQ backend would likely also use JSON. A future backend could use Protobuf, MessagePack, or Avro if the broker benefits from it. + +The serialization helpers stay inside the backend that uses them — they are not shared infrastructure. Each backend brings its own serialization strategy. The Consumer/Producer layer never thinks about wire format. + +### Gateway service + +**Decision: the gateway uses the backend abstraction like any other component.** + +The gateway currently bridges WebSocket/REST to Pulsar directly, bypassing the abstraction layer. It translates incoming API JSON to Pulsar schema objects, sends them, receives responses as Pulsar schema objects, and translates back to API JSON. Since the wire format is JSON in both directions, this is effectively a no-op round trip through the schema machinery. + +With the backend abstraction, the gateway follows the same pattern as every other component: + +1. Incoming API JSON → translator `decode()` → dataclass +2. Dataclass → backend `send()` (backend handles wire format) +3. Backend `receive()` → dataclass +4. Dataclass → translator `encode()` → API JSON → WebSocket/REST client + +This is architecturally simple — one code path, no special cases. The gateway depends on the schema dataclasses and the translator layer, which it already does. The overhead of deserialize-then-reserialize is negligible for the message sizes involved. And it keeps all options open — if a future backend uses a non-JSON wire format, the gateway still works without changes. + +## Implementation Order + +### Phase 1: Rename translators + +Rename `to_pulsar()` → `decode()`, `from_pulsar()` → `encode()` across all translator classes and call sites. Remove `from pulsar.schema import Record` from the translator base class. Mechanical find-and-replace, no behavioural changes. + +### Phase 2: Queue naming + +Replace the `topic()` helper with the CLASS:TOPICSPACE:TOPIC scheme. Update all queue definitions in `schema/services/*.py` and `schema/knowledge/*.py`. Update `PulsarBackend.map_topic()` to parse the new format. Verify all existing functionality still works with Pulsar. + +### Phase 3: Clean up Pulsar leaks + +Work through the concrete cleanups list: remove dead imports, delete the legacy `PulsarClient` class, migrate the client libraries and gateway to use the backend abstraction. After this phase, `pulsar` imports exist only in `pulsar_backend.py`. + +### Phase 4: RabbitMQ backend + +Implement `rabbitmq_backend.py` against the existing `PubSubBackend` Protocol. Map queue classes to RabbitMQ concepts: `flow` → durable queues, `request`/`response` → non-durable queues with TTL, `state` → RabbitMQ streams. Add `rabbitmq` as a backend option in the factory. Test end-to-end with `PUBSUB_BACKEND=rabbitmq`. + +Phases 1-3 are safe to do on main — they don't change behaviour, just clean up. Phase 4 is additive — it adds a new backend without touching the existing one. + +### Config distribution on RabbitMQ + +The `state` queue class needs "start from earliest" semantics — a newly started processor must receive the current configuration state. + +RabbitMQ Streams (available since 3.9) solve this directly. Streams are persistent, append-only logs that support consumer offset positioning. The RabbitMQ backend maps the `state` class to a stream, and consumers attach with offset `first` to read from the beginning, or `last` to read the most recent entry plus future updates. + +Since config pushes are full state snapshots (not deltas), a consumer only needs the most recent entry. The RabbitMQ backend can use `last` offset positioning for `state` class consumers, which delivers the last message in the stream followed by any new messages. This matches the current behaviour where processors read config on startup and then react to updates. + diff --git a/tests/contract/test_document_embeddings_contract.py b/tests/contract/test_document_embeddings_contract.py index c7d6369a..b6d14124 100644 --- a/tests/contract/test_document_embeddings_contract.py +++ b/tests/contract/test_document_embeddings_contract.py @@ -38,7 +38,7 @@ class TestDocumentEmbeddingsRequestContract: assert request.user == "test_user" assert request.collection == "test_collection" - def test_request_translator_to_pulsar(self): + def test_request_translator_decode(self): """Test request translator converts dict to Pulsar schema""" translator = DocumentEmbeddingsRequestTranslator() @@ -49,7 +49,7 @@ class TestDocumentEmbeddingsRequestContract: "collection": "custom_collection" } - result = translator.to_pulsar(data) + result = translator.decode(data) assert isinstance(result, DocumentEmbeddingsRequest) assert result.vector == [0.1, 0.2, 0.3, 0.4] @@ -57,7 +57,7 @@ class TestDocumentEmbeddingsRequestContract: assert result.user == "custom_user" assert result.collection == "custom_collection" - def test_request_translator_to_pulsar_with_defaults(self): + def test_request_translator_decode_with_defaults(self): """Test request translator uses correct defaults""" translator = DocumentEmbeddingsRequestTranslator() @@ -66,7 +66,7 @@ class TestDocumentEmbeddingsRequestContract: # No limit, user, or collection provided } - result = translator.to_pulsar(data) + result = translator.decode(data) assert isinstance(result, DocumentEmbeddingsRequest) assert result.vector == [0.1, 0.2] @@ -74,7 +74,7 @@ class TestDocumentEmbeddingsRequestContract: assert result.user == "trustgraph" # Default assert result.collection == "default" # Default - def test_request_translator_from_pulsar(self): + def test_request_translator_encode(self): """Test request translator converts Pulsar schema to dict""" translator = DocumentEmbeddingsRequestTranslator() @@ -85,7 +85,7 @@ class TestDocumentEmbeddingsRequestContract: collection="test_collection" ) - result = translator.from_pulsar(request) + result = translator.encode(request) assert isinstance(result, dict) assert result["vector"] == [0.5, 0.6] @@ -134,7 +134,7 @@ class TestDocumentEmbeddingsResponseContract: assert response.error == error assert response.chunks == [] - def test_response_translator_from_pulsar_with_chunks(self): + def test_response_translator_encode_with_chunks(self): """Test response translator converts Pulsar schema with chunks to dict""" translator = DocumentEmbeddingsResponseTranslator() @@ -147,7 +147,7 @@ class TestDocumentEmbeddingsResponseContract: ] ) - result = translator.from_pulsar(response) + result = translator.encode(response) assert isinstance(result, dict) assert "chunks" in result @@ -155,7 +155,7 @@ class TestDocumentEmbeddingsResponseContract: assert result["chunks"][0]["chunk_id"] == "doc1/c1" assert result["chunks"][0]["score"] == 0.95 - def test_response_translator_from_pulsar_with_empty_chunks(self): + def test_response_translator_encode_with_empty_chunks(self): """Test response translator handles empty chunks list""" translator = DocumentEmbeddingsResponseTranslator() @@ -164,25 +164,25 @@ class TestDocumentEmbeddingsResponseContract: chunks=[] ) - result = translator.from_pulsar(response) + result = translator.encode(response) assert isinstance(result, dict) assert "chunks" in result assert result["chunks"] == [] - def test_response_translator_from_pulsar_with_none_chunks(self): + def test_response_translator_encode_with_none_chunks(self): """Test response translator handles None chunks""" translator = DocumentEmbeddingsResponseTranslator() response = MagicMock() response.chunks = None - result = translator.from_pulsar(response) + result = translator.encode(response) assert isinstance(result, dict) assert "chunks" not in result or result.get("chunks") is None - def test_response_translator_from_response_with_completion(self): + def test_response_translator_encode_with_completion(self): """Test response translator with completion flag""" translator = DocumentEmbeddingsResponseTranslator() @@ -194,7 +194,7 @@ class TestDocumentEmbeddingsResponseContract: ] ) - result, is_final = translator.from_response_with_completion(response) + result, is_final = translator.encode_with_completion(response) assert isinstance(result, dict) assert "chunks" in result @@ -202,12 +202,12 @@ class TestDocumentEmbeddingsResponseContract: assert result["chunks"][0]["chunk_id"] == "chunk1" assert is_final is True # Document embeddings responses are always final - def test_response_translator_to_pulsar_not_implemented(self): - """Test that to_pulsar raises NotImplementedError for responses""" + def test_response_translator_decode_not_implemented(self): + """Test that decode raises NotImplementedError for responses""" translator = DocumentEmbeddingsResponseTranslator() with pytest.raises(NotImplementedError): - translator.to_pulsar({"chunks": [{"chunk_id": "test", "score": 0.9}]}) + translator.decode({"chunks": [{"chunk_id": "test", "score": 0.9}]}) class TestDocumentEmbeddingsMessageCompatibility: @@ -225,7 +225,7 @@ class TestDocumentEmbeddingsMessageCompatibility: # Convert to Pulsar request req_translator = DocumentEmbeddingsRequestTranslator() - pulsar_request = req_translator.to_pulsar(request_data) + pulsar_request = req_translator.decode(request_data) # Simulate service processing and creating response response = DocumentEmbeddingsResponse( @@ -238,7 +238,7 @@ class TestDocumentEmbeddingsMessageCompatibility: # Convert response back to dict resp_translator = DocumentEmbeddingsResponseTranslator() - response_data = resp_translator.from_pulsar(response) + response_data = resp_translator.encode(response) # Verify data integrity assert isinstance(pulsar_request, DocumentEmbeddingsRequest) @@ -261,7 +261,7 @@ class TestDocumentEmbeddingsMessageCompatibility: # Convert response to dict translator = DocumentEmbeddingsResponseTranslator() - response_data = translator.from_pulsar(response) + response_data = translator.encode(response) # Verify error handling assert isinstance(response_data, dict) diff --git a/tests/contract/test_translator_completion_flags.py b/tests/contract/test_translator_completion_flags.py index a22e1c41..91ce1b77 100644 --- a/tests/contract/test_translator_completion_flags.py +++ b/tests/contract/test_translator_completion_flags.py @@ -33,7 +33,7 @@ class TestRAGTranslatorCompletionFlags: ) # Act - response_dict, is_final = translator.from_response_with_completion(response) + response_dict, is_final = translator.encode_with_completion(response) # Assert assert is_final is True, "is_final must be True when end_of_session=True" @@ -57,7 +57,7 @@ class TestRAGTranslatorCompletionFlags: ) # Act - response_dict, is_final = translator.from_response_with_completion(response) + response_dict, is_final = translator.encode_with_completion(response) # Assert assert is_final is False, "is_final must be False when end_of_session=False" @@ -80,7 +80,7 @@ class TestRAGTranslatorCompletionFlags: ) # Act - response_dict, is_final = translator.from_response_with_completion(response) + response_dict, is_final = translator.encode_with_completion(response) # Assert assert is_final is False @@ -103,7 +103,7 @@ class TestRAGTranslatorCompletionFlags: ) # Act - response_dict, is_final = translator.from_response_with_completion(response) + response_dict, is_final = translator.encode_with_completion(response) # Assert assert is_final is False, "end_of_stream=True should NOT make is_final=True" @@ -125,7 +125,7 @@ class TestRAGTranslatorCompletionFlags: ) # Act - response_dict, is_final = translator.from_response_with_completion(response) + response_dict, is_final = translator.encode_with_completion(response) # Assert assert is_final is True, "is_final must be True when end_of_session=True" @@ -147,7 +147,7 @@ class TestRAGTranslatorCompletionFlags: ) # Act - response_dict, is_final = translator.from_response_with_completion(response) + response_dict, is_final = translator.encode_with_completion(response) # Assert assert is_final is False, "end_of_stream=True should NOT make is_final=True" @@ -168,7 +168,7 @@ class TestRAGTranslatorCompletionFlags: ) # Act - response_dict, is_final = translator.from_response_with_completion(response) + response_dict, is_final = translator.encode_with_completion(response) # Assert assert is_final is False, "is_final must be False when end_of_stream=False" @@ -195,7 +195,7 @@ class TestAgentTranslatorCompletionFlags: ) # Act - response_dict, is_final = translator.from_response_with_completion(response) + response_dict, is_final = translator.encode_with_completion(response) # Assert assert is_final is True, "is_final must be True when end_of_dialog=True" @@ -217,7 +217,7 @@ class TestAgentTranslatorCompletionFlags: ) # Act - response_dict, is_final = translator.from_response_with_completion(response) + response_dict, is_final = translator.encode_with_completion(response) # Assert assert is_final is False, "is_final must be False when end_of_dialog=False" @@ -240,7 +240,7 @@ class TestAgentTranslatorCompletionFlags: ) # Act - thought_dict, thought_is_final = translator.from_response_with_completion(thought_response) + thought_dict, thought_is_final = translator.encode_with_completion(thought_response) # Assert assert thought_is_final is False, "Thought message must not be final" @@ -254,7 +254,7 @@ class TestAgentTranslatorCompletionFlags: ) # Act - obs_dict, obs_is_final = translator.from_response_with_completion(observation_response) + obs_dict, obs_is_final = translator.encode_with_completion(observation_response) # Assert assert obs_is_final is False, "Observation message must not be final" @@ -275,7 +275,7 @@ class TestAgentTranslatorCompletionFlags: ) # Act - response_dict, is_final = translator.from_response_with_completion(response) + response_dict, is_final = translator.encode_with_completion(response) # Assert assert is_final is True, "Streaming format must use end_of_dialog for is_final" diff --git a/tests/unit/test_clients/test_sync_document_embeddings_client.py b/tests/unit/test_clients/test_sync_document_embeddings_client.py index ce758f66..edf4ac81 100644 --- a/tests/unit/test_clients/test_sync_document_embeddings_client.py +++ b/tests/unit/test_clients/test_sync_document_embeddings_client.py @@ -21,17 +21,15 @@ class TestSyncDocumentEmbeddingsClient: # Act client = DocumentEmbeddingsClient( - log_level=1, subscriber="test-subscriber", input_queue="test-input", output_queue="test-output", pulsar_host="pulsar://test:6650", pulsar_api_key="test-key" ) - + # Assert mock_base_init.assert_called_once_with( - log_level=1, subscriber="test-subscriber", input_queue="test-input", output_queue="test-output", diff --git a/tests/unit/test_gateway/test_dispatch_config.py b/tests/unit/test_gateway/test_dispatch_config.py index 4fbd8484..11eb7484 100644 --- a/tests/unit/test_gateway/test_dispatch_config.py +++ b/tests/unit/test_gateway/test_dispatch_config.py @@ -49,7 +49,7 @@ class TestConfigRequestor: mock_translator_registry.get_response_translator.return_value = Mock() # Setup translator response - mock_request_translator.to_pulsar.return_value = "translated_request" + mock_request_translator.decode.return_value = "translated_request" # Patch ServiceRequestor async methods with regular mocks (not AsyncMock) with patch.object(ServiceRequestor, 'start', return_value=None), \ @@ -64,7 +64,7 @@ class TestConfigRequestor: result = requestor.to_request({"test": "body"}) # Verify translator was called correctly - mock_request_translator.to_pulsar.assert_called_once_with({"test": "body"}) + mock_request_translator.decode.assert_called_once_with({"test": "body"}) assert result == "translated_request" @patch('trustgraph.gateway.dispatch.config.TranslatorRegistry') @@ -76,7 +76,7 @@ class TestConfigRequestor: mock_translator_registry.get_response_translator.return_value = mock_response_translator # Setup translator response - mock_response_translator.from_response_with_completion.return_value = "translated_response" + mock_response_translator.encode_with_completion.return_value = "translated_response" requestor = ConfigRequestor( backend=Mock(), @@ -89,5 +89,5 @@ class TestConfigRequestor: result = requestor.from_response(mock_message) # Verify translator was called correctly - mock_response_translator.from_response_with_completion.assert_called_once_with(mock_message) + mock_response_translator.encode_with_completion.assert_called_once_with(mock_message) assert result == "translated_response" \ No newline at end of file diff --git a/tests/unit/test_gateway/test_streaming_translators.py b/tests/unit/test_gateway/test_streaming_translators.py index e190fe68..31912688 100644 --- a/tests/unit/test_gateway/test_streaming_translators.py +++ b/tests/unit/test_gateway/test_streaming_translators.py @@ -25,7 +25,7 @@ from trustgraph.schema import ( class TestGraphRagResponseTranslator: """Test GraphRagResponseTranslator streaming behavior""" - def test_from_pulsar_with_empty_response(self): + def test_encode_with_empty_response(self): """Test that empty response strings are preserved""" # Arrange translator = GraphRagResponseTranslator() @@ -36,14 +36,14 @@ class TestGraphRagResponseTranslator: ) # Act - result = translator.from_pulsar(response) + result = translator.encode(response) # Assert - Empty string should be included in result assert "response" in result assert result["response"] == "" assert result["end_of_stream"] is True - def test_from_pulsar_with_non_empty_response(self): + def test_encode_with_non_empty_response(self): """Test that non-empty responses work correctly""" # Arrange translator = GraphRagResponseTranslator() @@ -54,13 +54,13 @@ class TestGraphRagResponseTranslator: ) # Act - result = translator.from_pulsar(response) + result = translator.encode(response) # Assert assert result["response"] == "Some text" assert result["end_of_stream"] is False - def test_from_pulsar_with_none_response(self): + def test_encode_with_none_response(self): """Test that None response is handled correctly""" # Arrange translator = GraphRagResponseTranslator() @@ -71,14 +71,14 @@ class TestGraphRagResponseTranslator: ) # Act - result = translator.from_pulsar(response) + result = translator.encode(response) # Assert - None should not be included assert "response" not in result assert result["end_of_stream"] is True - def test_from_response_with_completion_returns_correct_flag(self): - """Test that from_response_with_completion returns correct is_final flag""" + def test_encode_with_completion_returns_correct_flag(self): + """Test that encode_with_completion returns correct is_final flag""" # Arrange translator = GraphRagResponseTranslator() @@ -90,7 +90,7 @@ class TestGraphRagResponseTranslator: ) # Act - result, is_final = translator.from_response_with_completion(response_chunk) + result, is_final = translator.encode_with_completion(response_chunk) # Assert assert is_final is False @@ -105,7 +105,7 @@ class TestGraphRagResponseTranslator: ) # Act - result, is_final = translator.from_response_with_completion(final_response) + result, is_final = translator.encode_with_completion(final_response) # Assert - is_final is based on end_of_session, not end_of_stream assert is_final is True @@ -116,7 +116,7 @@ class TestGraphRagResponseTranslator: class TestDocumentRagResponseTranslator: """Test DocumentRagResponseTranslator streaming behavior""" - def test_from_pulsar_with_empty_response(self): + def test_encode_with_empty_response(self): """Test that empty response strings are preserved""" # Arrange translator = DocumentRagResponseTranslator() @@ -127,14 +127,14 @@ class TestDocumentRagResponseTranslator: ) # Act - result = translator.from_pulsar(response) + result = translator.encode(response) # Assert assert "response" in result assert result["response"] == "" assert result["end_of_stream"] is True - def test_from_pulsar_with_non_empty_response(self): + def test_encode_with_non_empty_response(self): """Test that non-empty responses work correctly""" # Arrange translator = DocumentRagResponseTranslator() @@ -145,7 +145,7 @@ class TestDocumentRagResponseTranslator: ) # Act - result = translator.from_pulsar(response) + result = translator.encode(response) # Assert assert result["response"] == "Document content" @@ -155,7 +155,7 @@ class TestDocumentRagResponseTranslator: class TestPromptResponseTranslator: """Test PromptResponseTranslator streaming behavior""" - def test_from_pulsar_with_empty_text(self): + def test_encode_with_empty_text(self): """Test that empty text strings are preserved""" # Arrange translator = PromptResponseTranslator() @@ -167,14 +167,14 @@ class TestPromptResponseTranslator: ) # Act - result = translator.from_pulsar(response) + result = translator.encode(response) # Assert assert "text" in result assert result["text"] == "" assert result["end_of_stream"] is True - def test_from_pulsar_with_non_empty_text(self): + def test_encode_with_non_empty_text(self): """Test that non-empty text works correctly""" # Arrange translator = PromptResponseTranslator() @@ -186,13 +186,13 @@ class TestPromptResponseTranslator: ) # Act - result = translator.from_pulsar(response) + result = translator.encode(response) # Assert assert result["text"] == "Some prompt response" assert result["end_of_stream"] is False - def test_from_pulsar_with_none_text(self): + def test_encode_with_none_text(self): """Test that None text is handled correctly""" # Arrange translator = PromptResponseTranslator() @@ -204,14 +204,14 @@ class TestPromptResponseTranslator: ) # Act - result = translator.from_pulsar(response) + result = translator.encode(response) # Assert assert "text" not in result assert "object" in result assert result["end_of_stream"] is True - def test_from_pulsar_includes_end_of_stream(self): + def test_encode_includes_end_of_stream(self): """Test that end_of_stream flag is always included""" # Arrange translator = PromptResponseTranslator() @@ -225,7 +225,7 @@ class TestPromptResponseTranslator: ) # Act - result = translator.from_pulsar(response) + result = translator.encode(response) # Assert assert "end_of_stream" in result @@ -235,7 +235,7 @@ class TestPromptResponseTranslator: class TestTextCompletionResponseTranslator: """Test TextCompletionResponseTranslator streaming behavior""" - def test_from_pulsar_always_includes_response(self): + def test_encode_always_includes_response(self): """Test that response field is always included, even if empty""" # Arrange translator = TextCompletionResponseTranslator() @@ -249,13 +249,13 @@ class TestTextCompletionResponseTranslator: ) # Act - result = translator.from_pulsar(response) + result = translator.encode(response) # Assert - Response should always be present assert "response" in result assert result["response"] == "" - def test_from_response_with_completion_with_empty_final(self): + def test_encode_with_completion_with_empty_final(self): """Test that empty final response is handled correctly""" # Arrange translator = TextCompletionResponseTranslator() @@ -269,7 +269,7 @@ class TestTextCompletionResponseTranslator: ) # Act - result, is_final = translator.from_response_with_completion(response) + result, is_final = translator.encode_with_completion(response) # Assert assert is_final is True @@ -297,7 +297,7 @@ class TestStreamingProtocolCompliance: response = response_class(**kwargs) # Act - result = translator.from_pulsar(response) + result = translator.encode(response) # Assert assert field_name in result, f"{translator_class.__name__} should include '{field_name}' field even when empty" @@ -320,7 +320,7 @@ class TestStreamingProtocolCompliance: response = response_class(**kwargs) # Act - result = translator.from_pulsar(response) + result = translator.encode(response) # Assert assert "end_of_stream" in result, f"{translator_class.__name__} should include 'end_of_stream' flag" diff --git a/tests/unit/test_gateway/test_text_document_translator.py b/tests/unit/test_gateway/test_text_document_translator.py index f836eb2b..84eedefc 100644 --- a/tests/unit/test_gateway/test_text_document_translator.py +++ b/tests/unit/test_gateway/test_text_document_translator.py @@ -8,11 +8,11 @@ from trustgraph.messaging.translators.document_loading import TextDocumentTransl class TestTextDocumentTranslator: - def test_to_pulsar_decodes_base64_text(self): + def test_decode_decodes_base64_text(self): translator = TextDocumentTranslator() payload = "Cancer survival: 2.74× higher hazard ratio" - msg = translator.to_pulsar( + msg = translator.decode( { "id": "doc-1", "user": "alice", @@ -27,11 +27,11 @@ class TestTextDocumentTranslator: assert msg.metadata.collection == "research" assert msg.text == payload.encode("utf-8") - def test_to_pulsar_accepts_raw_utf8_text(self): + def test_decode_accepts_raw_utf8_text(self): translator = TextDocumentTranslator() payload = "Cancer survival: 2.74× higher hazard ratio" - msg = translator.to_pulsar( + msg = translator.decode( { "charset": "utf-8", "text": payload, @@ -40,11 +40,11 @@ class TestTextDocumentTranslator: assert msg.text == payload.encode("utf-8") - def test_to_pulsar_falls_back_to_raw_non_base64_ascii(self): + def test_decode_falls_back_to_raw_non_base64_ascii(self): translator = TextDocumentTranslator() payload = "plain-text payload" - msg = translator.to_pulsar( + msg = translator.decode( { "charset": "utf-8", "text": payload, diff --git a/tests/unit/test_pubsub/test_queue_naming.py b/tests/unit/test_pubsub/test_queue_naming.py new file mode 100644 index 00000000..1ee781d9 --- /dev/null +++ b/tests/unit/test_pubsub/test_queue_naming.py @@ -0,0 +1,133 @@ +""" +Tests for queue naming and topic mapping. +""" + +import pytest +import argparse + +from trustgraph.schema.core.topic import queue +from trustgraph.base.pubsub import get_pubsub, add_pubsub_args +from trustgraph.base.pulsar_backend import PulsarBackend + + +class TestQueueFunction: + + def test_flow_default(self): + assert queue('text-completion-request') == 'flow:tg:text-completion-request' + + def test_request_class(self): + assert queue('config', cls='request') == 'request:tg:config' + + def test_response_class(self): + assert queue('config', cls='response') == 'response:tg:config' + + def test_state_class(self): + assert queue('config', cls='state') == 'state:tg:config' + + def test_custom_topicspace(self): + assert queue('config', cls='request', topicspace='prod') == 'request:prod:config' + + def test_default_class_is_flow(self): + result = queue('something') + assert result.startswith('flow:') + + +class TestPulsarMapTopic: + + @pytest.fixture + def backend(self): + """Create a PulsarBackend without connecting.""" + b = object.__new__(PulsarBackend) + return b + + def test_flow_maps_to_persistent(self, backend): + assert backend.map_topic('flow:tg:text-completion-request') == \ + 'persistent://tg/flow/text-completion-request' + + def test_state_maps_to_persistent(self, backend): + assert backend.map_topic('state:tg:config') == \ + 'persistent://tg/state/config' + + def test_request_maps_to_non_persistent(self, backend): + assert backend.map_topic('request:tg:config') == \ + 'non-persistent://tg/request/config' + + def test_response_maps_to_non_persistent(self, backend): + assert backend.map_topic('response:tg:librarian') == \ + 'non-persistent://tg/response/librarian' + + def test_passthrough_pulsar_uri(self, backend): + uri = 'persistent://tg/flow/something' + assert backend.map_topic(uri) == uri + + def test_invalid_format_raises(self, backend): + with pytest.raises(ValueError, match="Invalid queue format"): + backend.map_topic('bad-format') + + def test_invalid_class_raises(self, backend): + with pytest.raises(ValueError, match="Invalid queue class"): + backend.map_topic('unknown:tg:topic') + + def test_custom_topicspace(self, backend): + assert backend.map_topic('flow:prod:my-queue') == \ + 'persistent://prod/flow/my-queue' + + +class TestGetPubsubDispatch: + + def test_unknown_backend_raises(self): + with pytest.raises(ValueError, match="Unknown pub/sub backend"): + get_pubsub(pubsub_backend='redis') + + +class TestAddPubsubArgs: + + def test_standalone_defaults_to_localhost(self): + parser = argparse.ArgumentParser() + add_pubsub_args(parser, standalone=True) + args = parser.parse_args([]) + assert args.pulsar_host == 'pulsar://localhost:6650' + assert args.pulsar_listener == 'localhost' + + def test_non_standalone_defaults_to_container(self): + parser = argparse.ArgumentParser() + add_pubsub_args(parser, standalone=False) + args = parser.parse_args([]) + assert 'pulsar:6650' in args.pulsar_host + assert args.pulsar_listener is None + + def test_cli_override_respected(self): + parser = argparse.ArgumentParser() + add_pubsub_args(parser, standalone=True) + args = parser.parse_args(['--pulsar-host', 'pulsar://custom:6650']) + assert args.pulsar_host == 'pulsar://custom:6650' + + def test_pubsub_backend_default(self): + parser = argparse.ArgumentParser() + add_pubsub_args(parser) + args = parser.parse_args([]) + assert args.pubsub_backend == 'pulsar' + + +class TestQueueDefinitions: + """Verify the actual queue constants produce correct names.""" + + def test_config_request(self): + from trustgraph.schema.services.config import config_request_queue + assert config_request_queue == 'request:tg:config' + + def test_config_response(self): + from trustgraph.schema.services.config import config_response_queue + assert config_response_queue == 'response:tg:config' + + def test_config_push(self): + from trustgraph.schema.services.config import config_push_queue + assert config_push_queue == 'state:tg:config' + + def test_librarian_request_is_persistent(self): + from trustgraph.schema.services.library import librarian_request_queue + assert librarian_request_queue.startswith('flow:') + + def test_knowledge_request(self): + from trustgraph.schema.knowledge.knowledge import knowledge_request_queue + assert knowledge_request_queue == 'request:tg:knowledge' diff --git a/tests/unit/test_rdf/test_rdf_wire_format.py b/tests/unit/test_rdf/test_rdf_wire_format.py index a0bbd27a..d4375462 100644 --- a/tests/unit/test_rdf/test_rdf_wire_format.py +++ b/tests/unit/test_rdf/test_rdf_wire_format.py @@ -28,21 +28,21 @@ def triple_tx(): class TestTermTranslatorIri: - def test_iri_to_pulsar(self, term_tx): + def test_iri_decode(self, term_tx): data = {"t": "i", "i": "http://example.org/Alice"} - term = term_tx.to_pulsar(data) + term = term_tx.decode(data) assert term.type == IRI assert term.iri == "http://example.org/Alice" - def test_iri_from_pulsar(self, term_tx): + def test_iri_encode(self, term_tx): term = Term(type=IRI, iri="http://example.org/Bob") - wire = term_tx.from_pulsar(term) + wire = term_tx.encode(term) assert wire == {"t": "i", "i": "http://example.org/Bob"} def test_iri_round_trip(self, term_tx): original = Term(type=IRI, iri="http://example.org/round") - wire = term_tx.from_pulsar(original) - restored = term_tx.to_pulsar(wire) + wire = term_tx.encode(original) + restored = term_tx.decode(wire) assert restored == original @@ -52,21 +52,21 @@ class TestTermTranslatorIri: class TestTermTranslatorBlank: - def test_blank_to_pulsar(self, term_tx): + def test_blank_decode(self, term_tx): data = {"t": "b", "d": "_:b42"} - term = term_tx.to_pulsar(data) + term = term_tx.decode(data) assert term.type == BLANK assert term.id == "_:b42" - def test_blank_from_pulsar(self, term_tx): + def test_blank_encode(self, term_tx): term = Term(type=BLANK, id="_:node1") - wire = term_tx.from_pulsar(term) + wire = term_tx.encode(term) assert wire == {"t": "b", "d": "_:node1"} def test_blank_round_trip(self, term_tx): original = Term(type=BLANK, id="_:x") - wire = term_tx.from_pulsar(original) - restored = term_tx.to_pulsar(wire) + wire = term_tx.encode(original) + restored = term_tx.decode(wire) assert restored == original @@ -76,29 +76,29 @@ class TestTermTranslatorBlank: class TestTermTranslatorTypedLiteral: - def test_plain_literal_to_pulsar(self, term_tx): + def test_plain_literal_decode(self, term_tx): data = {"t": "l", "v": "hello"} - term = term_tx.to_pulsar(data) + term = term_tx.decode(data) assert term.type == LITERAL assert term.value == "hello" assert term.datatype == "" assert term.language == "" - def test_xsd_integer_to_pulsar(self, term_tx): + def test_xsd_integer_decode(self, term_tx): data = { "t": "l", "v": "42", "dt": "http://www.w3.org/2001/XMLSchema#integer", } - term = term_tx.to_pulsar(data) + term = term_tx.decode(data) assert term.value == "42" assert term.datatype.endswith("#integer") - def test_typed_literal_from_pulsar(self, term_tx): + def test_typed_literal_encode(self, term_tx): term = Term( type=LITERAL, value="3.14", datatype="http://www.w3.org/2001/XMLSchema#double", ) - wire = term_tx.from_pulsar(term) + wire = term_tx.encode(term) assert wire["t"] == "l" assert wire["v"] == "3.14" assert wire["dt"] == "http://www.w3.org/2001/XMLSchema#double" @@ -109,13 +109,13 @@ class TestTermTranslatorTypedLiteral: type=LITERAL, value="true", datatype="http://www.w3.org/2001/XMLSchema#boolean", ) - wire = term_tx.from_pulsar(original) - restored = term_tx.to_pulsar(wire) + wire = term_tx.encode(original) + restored = term_tx.decode(wire) assert restored == original def test_plain_literal_omits_dt_and_ln(self, term_tx): term = Term(type=LITERAL, value="x") - wire = term_tx.from_pulsar(term) + wire = term_tx.encode(term) assert "dt" not in wire assert "ln" not in wire @@ -126,22 +126,22 @@ class TestTermTranslatorTypedLiteral: class TestTermTranslatorLangLiteral: - def test_language_tag_to_pulsar(self, term_tx): + def test_language_tag_decode(self, term_tx): data = {"t": "l", "v": "bonjour", "ln": "fr"} - term = term_tx.to_pulsar(data) + term = term_tx.decode(data) assert term.value == "bonjour" assert term.language == "fr" - def test_language_tag_from_pulsar(self, term_tx): + def test_language_tag_encode(self, term_tx): term = Term(type=LITERAL, value="colour", language="en-GB") - wire = term_tx.from_pulsar(term) + wire = term_tx.encode(term) assert wire["ln"] == "en-GB" assert "dt" not in wire # No datatype def test_language_tag_round_trip(self, term_tx): original = Term(type=LITERAL, value="hola", language="es") - wire = term_tx.from_pulsar(original) - restored = term_tx.to_pulsar(wire) + wire = term_tx.encode(original) + restored = term_tx.decode(wire) assert restored == original @@ -151,7 +151,7 @@ class TestTermTranslatorLangLiteral: class TestTermTranslatorQuotedTriple: - def test_quoted_triple_to_pulsar(self, term_tx): + def test_quoted_triple_decode(self, term_tx): data = { "t": "t", "tr": { @@ -160,20 +160,20 @@ class TestTermTranslatorQuotedTriple: "o": {"t": "i", "i": "http://example.org/Bob"}, }, } - term = term_tx.to_pulsar(data) + term = term_tx.decode(data) assert term.type == TRIPLE assert term.triple is not None assert term.triple.s.iri == "http://example.org/Alice" assert term.triple.o.iri == "http://example.org/Bob" - def test_quoted_triple_from_pulsar(self, term_tx): + def test_quoted_triple_encode(self, term_tx): inner = Triple( s=Term(type=IRI, iri="http://example.org/s"), p=Term(type=IRI, iri="http://example.org/p"), o=Term(type=LITERAL, value="val"), ) term = Term(type=TRIPLE, triple=inner) - wire = term_tx.from_pulsar(term) + wire = term_tx.encode(term) assert wire["t"] == "t" assert "tr" in wire assert wire["tr"]["s"]["i"] == "http://example.org/s" @@ -186,18 +186,18 @@ class TestTermTranslatorQuotedTriple: o=Term(type=LITERAL, value="C", language="en"), ) original = Term(type=TRIPLE, triple=inner) - wire = term_tx.from_pulsar(original) - restored = term_tx.to_pulsar(wire) + wire = term_tx.encode(original) + restored = term_tx.decode(wire) assert restored.type == TRIPLE assert restored.triple.s == original.triple.s assert restored.triple.o == original.triple.o def test_quoted_triple_none_triple(self, term_tx): term = Term(type=TRIPLE, triple=None) - wire = term_tx.from_pulsar(term) + wire = term_tx.encode(term) assert wire == {"t": "t"} # And back - restored = term_tx.to_pulsar(wire) + restored = term_tx.decode(wire) assert restored.type == TRIPLE assert restored.triple is None @@ -210,7 +210,7 @@ class TestTermTranslatorQuotedTriple: "o": {"t": "l", "v": "A feeling of expectation"}, }, } - term = term_tx.to_pulsar(data) + term = term_tx.decode(data) assert term.triple.o.type == LITERAL assert term.triple.o.value == "A feeling of expectation" @@ -223,22 +223,22 @@ class TestTermTranslatorEdgeCases: def test_unknown_type(self, term_tx): data = {"t": "z"} - term = term_tx.to_pulsar(data) + term = term_tx.decode(data) assert term.type == "z" def test_empty_type(self, term_tx): data = {} - term = term_tx.to_pulsar(data) + term = term_tx.decode(data) assert term.type == "" def test_missing_iri_field(self, term_tx): data = {"t": "i"} - term = term_tx.to_pulsar(data) + term = term_tx.decode(data) assert term.iri == "" def test_missing_literal_fields(self, term_tx): data = {"t": "l"} - term = term_tx.to_pulsar(data) + term = term_tx.decode(data) assert term.value == "" assert term.datatype == "" assert term.language == "" @@ -250,24 +250,24 @@ class TestTermTranslatorEdgeCases: class TestTripleTranslator: - def test_triple_to_pulsar(self, triple_tx): + def test_triple_decode(self, triple_tx): data = { "s": {"t": "i", "i": "http://example.org/s"}, "p": {"t": "i", "i": "http://example.org/p"}, "o": {"t": "l", "v": "object"}, } - triple = triple_tx.to_pulsar(data) + triple = triple_tx.decode(data) assert triple.s.iri == "http://example.org/s" assert triple.o.value == "object" assert triple.g is None - def test_triple_from_pulsar(self, triple_tx): + def test_triple_encode(self, triple_tx): triple = Triple( s=Term(type=IRI, iri="http://example.org/A"), p=Term(type=IRI, iri="http://example.org/B"), o=Term(type=LITERAL, value="C"), ) - wire = triple_tx.from_pulsar(triple) + wire = triple_tx.encode(triple) assert wire["s"]["t"] == "i" assert wire["o"]["v"] == "C" assert "g" not in wire @@ -279,17 +279,17 @@ class TestTripleTranslator: "o": {"t": "l", "v": "val"}, "g": "urn:graph:source", } - quad = triple_tx.to_pulsar(data) + quad = triple_tx.decode(data) assert quad.g == "urn:graph:source" - def test_quad_from_pulsar_includes_graph(self, triple_tx): + def test_quad_encode_includes_graph(self, triple_tx): quad = Triple( s=Term(type=IRI, iri="http://example.org/s"), p=Term(type=IRI, iri="http://example.org/p"), o=Term(type=LITERAL, value="v"), g="urn:graph:retrieval", ) - wire = triple_tx.from_pulsar(quad) + wire = triple_tx.encode(quad) assert wire["g"] == "urn:graph:retrieval" def test_quad_round_trip(self, triple_tx): @@ -299,8 +299,8 @@ class TestTripleTranslator: o=Term(type=LITERAL, value="v"), g="urn:graph:source", ) - wire = triple_tx.from_pulsar(original) - restored = triple_tx.to_pulsar(wire) + wire = triple_tx.encode(original) + restored = triple_tx.decode(wire) assert restored == original def test_none_graph_omitted_from_wire(self, triple_tx): @@ -310,12 +310,12 @@ class TestTripleTranslator: o=Term(type=LITERAL, value="v"), g=None, ) - wire = triple_tx.from_pulsar(triple) + wire = triple_tx.encode(triple) assert "g" not in wire def test_missing_terms_handled(self, triple_tx): data = {} - triple = triple_tx.to_pulsar(data) + triple = triple_tx.decode(data) assert triple.s is None assert triple.p is None assert triple.o is None @@ -342,16 +342,16 @@ class TestSubgraphTranslator: g="urn:graph:source", ), ] - wire_list = tx.from_pulsar(triples) + wire_list = tx.encode(triples) assert len(wire_list) == 2 assert wire_list[1]["g"] == "urn:graph:source" - restored = tx.to_pulsar(wire_list) + restored = tx.decode(wire_list) assert len(restored) == 2 assert restored[0] == triples[0] assert restored[1] == triples[1] def test_empty_subgraph(self): tx = SubgraphTranslator() - assert tx.to_pulsar([]) == [] - assert tx.from_pulsar([]) == [] + assert tx.decode([]) == [] + assert tx.encode([]) == [] diff --git a/tests/unit/test_reliability/test_metadata_preservation.py b/tests/unit/test_reliability/test_metadata_preservation.py index 2fabed58..aded7253 100644 --- a/tests/unit/test_reliability/test_metadata_preservation.py +++ b/tests/unit/test_reliability/test_metadata_preservation.py @@ -35,7 +35,7 @@ class TestDocumentMetadataTranslator: "parent-id": "doc-100", "document-type": "page", } - obj = self.tx.to_pulsar(data) + obj = self.tx.decode(data) assert obj.id == "doc-123" assert obj.time == 1710000000 assert obj.kind == "application/pdf" @@ -45,14 +45,14 @@ class TestDocumentMetadataTranslator: assert obj.parent_id == "doc-100" assert obj.document_type == "page" - wire = self.tx.from_pulsar(obj) + wire = self.tx.encode(obj) assert wire["id"] == "doc-123" assert wire["user"] == "alice" assert wire["parent-id"] == "doc-100" assert wire["document-type"] == "page" def test_defaults_for_missing_fields(self): - obj = self.tx.to_pulsar({}) + obj = self.tx.decode({}) assert obj.parent_id == "" assert obj.document_type == "source" @@ -63,25 +63,25 @@ class TestDocumentMetadataTranslator: "o": {"t": "i", "i": "http://example.org/o"}, }] data = {"metadata": triple_wire} - obj = self.tx.to_pulsar(data) + obj = self.tx.decode(data) assert len(obj.metadata) == 1 assert obj.metadata[0].s.iri == "http://example.org/s" def test_none_metadata_handled(self): data = {"metadata": None} - obj = self.tx.to_pulsar(data) + obj = self.tx.decode(data) assert obj.metadata == [] def test_empty_tags_preserved(self): data = {"tags": []} - obj = self.tx.to_pulsar(data) - wire = self.tx.from_pulsar(obj) + obj = self.tx.decode(data) + wire = self.tx.encode(obj) assert wire["tags"] == [] def test_falsy_fields_omitted_from_wire(self): """Empty string fields should be omitted from wire format.""" obj = DocumentMetadata(id="", time=0, user="") - wire = self.tx.from_pulsar(obj) + wire = self.tx.encode(obj) assert "id" not in wire assert "user" not in wire @@ -105,7 +105,7 @@ class TestProcessingMetadataTranslator: "collection": "my-collection", "tags": ["tag1"], } - obj = self.tx.to_pulsar(data) + obj = self.tx.decode(data) assert obj.id == "proc-1" assert obj.document_id == "doc-123" assert obj.flow == "default" @@ -113,32 +113,32 @@ class TestProcessingMetadataTranslator: assert obj.collection == "my-collection" assert obj.tags == ["tag1"] - wire = self.tx.from_pulsar(obj) + wire = self.tx.encode(obj) assert wire["id"] == "proc-1" assert wire["document-id"] == "doc-123" assert wire["user"] == "alice" assert wire["collection"] == "my-collection" def test_missing_fields_use_defaults(self): - obj = self.tx.to_pulsar({}) + obj = self.tx.decode({}) assert obj.id is None assert obj.user is None assert obj.collection is None def test_tags_none_omitted(self): obj = ProcessingMetadata(tags=None) - wire = self.tx.from_pulsar(obj) + wire = self.tx.encode(obj) assert "tags" not in wire def test_tags_empty_list_preserved(self): obj = ProcessingMetadata(tags=[]) - wire = self.tx.from_pulsar(obj) + wire = self.tx.encode(obj) assert wire["tags"] == [] def test_user_and_collection_preserved(self): """Core pipeline routing fields must survive round-trip.""" data = {"user": "bob", "collection": "research"} - obj = self.tx.to_pulsar(data) - wire = self.tx.from_pulsar(obj) + obj = self.tx.decode(data) + wire = self.tx.encode(obj) assert wire["user"] == "bob" assert wire["collection"] == "research" diff --git a/tests/unit/test_retrieval/test_structured_diag/test_message_translation.py b/tests/unit/test_retrieval/test_structured_diag/test_message_translation.py index 7a113250..4c6d6803 100644 --- a/tests/unit/test_retrieval/test_structured_diag/test_message_translation.py +++ b/tests/unit/test_retrieval/test_structured_diag/test_message_translation.py @@ -28,7 +28,7 @@ class TestRequestTranslation: } # Translate to Pulsar - pulsar_msg = translator.to_pulsar(api_data) + pulsar_msg = translator.decode(api_data) assert pulsar_msg.operation == "schema-selection" assert pulsar_msg.sample == "test data sample" @@ -46,7 +46,7 @@ class TestRequestTranslation: "options": {"delimiter": ","} } - pulsar_msg = translator.to_pulsar(api_data) + pulsar_msg = translator.decode(api_data) assert pulsar_msg.operation == "generate-descriptor" assert pulsar_msg.sample == "csv data" @@ -70,7 +70,7 @@ class TestResponseTranslation: ) # Translate to API format - api_data = translator.from_pulsar(pulsar_response) + api_data = translator.encode(pulsar_response) assert api_data["operation"] == "schema-selection" assert api_data["schema-matches"] == ["products", "inventory", "catalog"] @@ -86,7 +86,7 @@ class TestResponseTranslation: error=None ) - api_data = translator.from_pulsar(pulsar_response) + api_data = translator.encode(pulsar_response) assert api_data["operation"] == "schema-selection" assert api_data["schema-matches"] == [] @@ -103,7 +103,7 @@ class TestResponseTranslation: error=None ) - api_data = translator.from_pulsar(pulsar_response) + api_data = translator.encode(pulsar_response) assert api_data["operation"] == "detect-type" assert api_data["detected-type"] == "xml" @@ -123,7 +123,7 @@ class TestResponseTranslation: ) ) - api_data = translator.from_pulsar(pulsar_response) + api_data = translator.encode(pulsar_response) assert api_data["operation"] == "schema-selection" # Error objects are typically handled separately by the gateway @@ -146,7 +146,7 @@ class TestResponseTranslation: error=None ) - api_data = translator.from_pulsar(pulsar_response) + api_data = translator.encode(pulsar_response) assert api_data["operation"] == "diagnose" assert api_data["detected-type"] == "csv" @@ -165,7 +165,7 @@ class TestResponseTranslation: error=None ) - api_data, is_final = translator.from_response_with_completion(pulsar_response) + api_data, is_final = translator.encode_with_completion(pulsar_response) assert is_final is True # Structured-diag responses are always final assert api_data["operation"] == "schema-selection" diff --git a/trustgraph-base/trustgraph/base/__init__.py b/trustgraph-base/trustgraph/base/__init__.py index f9f38060..5a454279 100644 --- a/trustgraph-base/trustgraph/base/__init__.py +++ b/trustgraph-base/trustgraph/base/__init__.py @@ -1,5 +1,5 @@ -from . pubsub import PulsarClient, get_pubsub +from . pubsub import get_pubsub, add_pubsub_args from . async_processor import AsyncProcessor from . consumer import Consumer from . producer import Producer diff --git a/trustgraph-base/trustgraph/base/async_processor.py b/trustgraph-base/trustgraph/base/async_processor.py index 8068c67d..94bab278 100644 --- a/trustgraph-base/trustgraph/base/async_processor.py +++ b/trustgraph-base/trustgraph/base/async_processor.py @@ -6,7 +6,6 @@ import asyncio import argparse -import _pulsar import time import uuid import logging @@ -15,7 +14,7 @@ from prometheus_client import start_http_server, Info from .. schema import ConfigPush, config_push_queue from .. log_level import LogLevel -from . pubsub import PulsarClient, get_pubsub +from . pubsub import get_pubsub, add_pubsub_args from . producer import Producer from . consumer import Consumer from . metrics import ProcessorMetrics, ConsumerMetrics @@ -223,8 +222,8 @@ class AsyncProcessor: logger.info("Keyboard interrupt.") return - except _pulsar.Interrupted: - logger.info("Pulsar Interrupted.") + except KeyboardInterrupt: + logger.info("Interrupted.") return # Exceptions from a taskgroup come in as an exception group @@ -250,15 +249,7 @@ class AsyncProcessor: @staticmethod def add_args(parser): - # Pub/sub backend selection - parser.add_argument( - '--pubsub-backend', - default=os.getenv('PUBSUB_BACKEND', 'pulsar'), - choices=['pulsar', 'mqtt'], - help='Pub/sub backend (default: pulsar, env: PUBSUB_BACKEND)', - ) - - PulsarClient.add_args(parser) + add_pubsub_args(parser) add_logging_args(parser) parser.add_argument( diff --git a/trustgraph-base/trustgraph/base/flow_processor.py b/trustgraph-base/trustgraph/base/flow_processor.py index 0f170030..1caeaec0 100644 --- a/trustgraph-base/trustgraph/base/flow_processor.py +++ b/trustgraph-base/trustgraph/base/flow_processor.py @@ -6,8 +6,6 @@ import json import logging -from pulsar.schema import JsonSchema - from .. schema import Error from .. schema import config_request_queue, config_response_queue from .. schema import config_push_queue diff --git a/trustgraph-base/trustgraph/base/pubsub.py b/trustgraph-base/trustgraph/base/pubsub.py index a7772b67..04734f28 100644 --- a/trustgraph-base/trustgraph/base/pubsub.py +++ b/trustgraph-base/trustgraph/base/pubsub.py @@ -1,110 +1,72 @@ import os -import pulsar -import _pulsar -import uuid -from pulsar.schema import JsonSchema import logging -from .. log_level import LogLevel -from .pulsar_backend import PulsarBackend - logger = logging.getLogger(__name__) +# Default connection settings from environment +DEFAULT_PULSAR_HOST = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650') +DEFAULT_PULSAR_API_KEY = os.getenv("PULSAR_API_KEY", None) + def get_pubsub(**config): """ Factory function to create a pub/sub backend based on configuration. Args: - config: Configuration dictionary from command-line args - Must include 'pubsub_backend' key + config: Configuration dictionary from command-line args. + Key 'pubsub_backend' selects the backend (default: 'pulsar'). Returns: - Backend instance (PulsarBackend, MQTTBackend, etc.) - - Example: - backend = get_pubsub( - pubsub_backend='pulsar', - pulsar_host='pulsar://localhost:6650' - ) + Backend instance implementing the PubSubBackend protocol. """ backend_type = config.get('pubsub_backend', 'pulsar') if backend_type == 'pulsar': + from .pulsar_backend import PulsarBackend return PulsarBackend( - host=config.get('pulsar_host', PulsarClient.default_pulsar_host), - api_key=config.get('pulsar_api_key', PulsarClient.default_pulsar_api_key), + host=config.get('pulsar_host', DEFAULT_PULSAR_HOST), + api_key=config.get('pulsar_api_key', DEFAULT_PULSAR_API_KEY), listener=config.get('pulsar_listener'), ) - elif backend_type == 'mqtt': - # TODO: Implement MQTT backend - raise NotImplementedError("MQTT backend not yet implemented") else: raise ValueError(f"Unknown pub/sub backend: {backend_type}") -class PulsarClient: +STANDALONE_PULSAR_HOST = 'pulsar://localhost:6650' - default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650') - default_pulsar_api_key = os.getenv("PULSAR_API_KEY", None) - def __init__(self, **params): +def add_pubsub_args(parser, standalone=False): + """Add pub/sub CLI arguments to an argument parser. - self.client = None + Args: + parser: argparse.ArgumentParser + standalone: If True, default host is localhost (for CLI tools + that run outside containers) + """ + host = STANDALONE_PULSAR_HOST if standalone else DEFAULT_PULSAR_HOST + listener_default = 'localhost' if standalone else None - pulsar_host = params.get("pulsar_host", self.default_pulsar_host) - pulsar_listener = params.get("pulsar_listener", None) - pulsar_api_key = params.get( - "pulsar_api_key", - self.default_pulsar_api_key - ) - # Hard-code Pulsar logging to ERROR level to minimize noise + parser.add_argument( + '--pubsub-backend', + default=os.getenv('PUBSUB_BACKEND', 'pulsar'), + help='Pub/sub backend (default: pulsar, env: PUBSUB_BACKEND)', + ) - self.pulsar_host = pulsar_host - self.pulsar_api_key = pulsar_api_key + parser.add_argument( + '-p', '--pulsar-host', + default=host, + help=f'Pulsar host (default: {host})', + ) - if pulsar_api_key: - auth = pulsar.AuthenticationToken(pulsar_api_key) - self.client = pulsar.Client( - pulsar_host, - authentication=auth, - logger=pulsar.ConsoleLogger(_pulsar.LoggerLevel.Error) - ) - else: - self.client = pulsar.Client( - pulsar_host, - listener_name=pulsar_listener, - logger=pulsar.ConsoleLogger(_pulsar.LoggerLevel.Error) - ) + parser.add_argument( + '--pulsar-api-key', + default=DEFAULT_PULSAR_API_KEY, + help='Pulsar API key', + ) - self.pulsar_listener = pulsar_listener - - def close(self): - self.client.close() - - def __del__(self): - - if hasattr(self, "client"): - if self.client: - self.client.close() - - @staticmethod - def add_args(parser): - - parser.add_argument( - '-p', '--pulsar-host', - default=__class__.default_pulsar_host, - help=f'Pulsar host (default: {__class__.default_pulsar_host})', - ) - - parser.add_argument( - '--pulsar-api-key', - default=__class__.default_pulsar_api_key, - help=f'Pulsar API key', - ) - - parser.add_argument( - '--pulsar-listener', - help=f'Pulsar listener (default: none)', - ) + parser.add_argument( + '--pulsar-listener', + default=listener_default, + help=f'Pulsar listener (default: {listener_default or "none"})', + ) diff --git a/trustgraph-base/trustgraph/base/pulsar_backend.py b/trustgraph-base/trustgraph/base/pulsar_backend.py index a3c3debd..677f2527 100644 --- a/trustgraph-base/trustgraph/base/pulsar_backend.py +++ b/trustgraph-base/trustgraph/base/pulsar_backend.py @@ -181,8 +181,11 @@ class PulsarBackendConsumer: self._schema_cls = schema_cls def receive(self, timeout_millis: int = 2000) -> Message: - """Receive a message.""" - pulsar_msg = self._consumer.receive(timeout_millis=timeout_millis) + """Receive a message. Raises TimeoutError if no message available.""" + try: + pulsar_msg = self._consumer.receive(timeout_millis=timeout_millis) + except _pulsar.Timeout: + raise TimeoutError("No message received within timeout") return PulsarMessage(pulsar_msg, self._schema_cls) def acknowledge(self, message: Message) -> None: @@ -237,38 +240,44 @@ class PulsarBackend: self.client = pulsar.Client(**client_args) logger.info(f"Pulsar client connected to {host}") - def map_topic(self, generic_topic: str) -> str: + def map_topic(self, queue_id: str) -> str: """ - Map generic topic format to Pulsar URI. + Map queue identifier to Pulsar URI. - Format: qos/tenant/namespace/queue - Example: q1/tg/flow/my-queue -> persistent://tg/flow/my-queue + Format: class:topicspace:topic + Example: flow:tg:text-completion-request -> persistent://tg/flow/text-completion-request Args: - generic_topic: Generic topic string or already-formatted Pulsar URI + queue_id: Queue identifier string or already-formatted Pulsar URI Returns: Pulsar topic URI """ # If already a Pulsar URI, return as-is - if '://' in generic_topic: - return generic_topic + if '://' in queue_id: + return queue_id - parts = generic_topic.split('/', 3) - if len(parts) != 4: - raise ValueError(f"Invalid topic format: {generic_topic}, expected qos/tenant/namespace/queue") + parts = queue_id.split(':', 2) + if len(parts) != 3: + raise ValueError( + f"Invalid queue format: {queue_id}, " + f"expected class:topicspace:topic" + ) - qos, tenant, namespace, queue = parts + cls, topicspace, topic = parts - # Map QoS to persistence - if qos == 'q0': - persistence = 'non-persistent' - elif qos in ['q1', 'q2']: + # Map class to Pulsar persistence and namespace + if cls in ('flow', 'state'): persistence = 'persistent' + elif cls in ('request', 'response'): + persistence = 'non-persistent' else: - raise ValueError(f"Invalid QoS level: {qos}, expected q0, q1, or q2") + raise ValueError( + f"Invalid queue class: {cls}, " + f"expected flow, request, response, or state" + ) - return f"{persistence}://{tenant}/{namespace}/{queue}" + return f"{persistence}://{topicspace}/{cls}/{topic}" def create_producer(self, topic: str, schema: type, **options) -> BackendProducer: """ diff --git a/trustgraph-base/trustgraph/clients/agent_client.py b/trustgraph-base/trustgraph/clients/agent_client.py index 17ff5a09..1cadbdd5 100644 --- a/trustgraph-base/trustgraph/clients/agent_client.py +++ b/trustgraph-base/trustgraph/clients/agent_client.py @@ -1,5 +1,4 @@ -import _pulsar from .. schema import AgentRequest, AgentResponse from .. schema import agent_request_queue @@ -7,15 +6,11 @@ from .. schema import agent_response_queue from . base import BaseClient # Ugly -ERROR=_pulsar.LoggerLevel.Error -WARN=_pulsar.LoggerLevel.Warn -INFO=_pulsar.LoggerLevel.Info -DEBUG=_pulsar.LoggerLevel.Debug class AgentClient(BaseClient): def __init__( - self, log_level=ERROR, + self, subscriber=None, input_queue=None, output_queue=None, @@ -27,7 +22,6 @@ class AgentClient(BaseClient): if output_queue is None: output_queue = agent_response_queue super(AgentClient, self).__init__( - log_level=log_level, subscriber=subscriber, input_queue=input_queue, output_queue=output_queue, diff --git a/trustgraph-base/trustgraph/clients/base.py b/trustgraph-base/trustgraph/clients/base.py index 3a4da6ec..a71ba84e 100644 --- a/trustgraph-base/trustgraph/clients/base.py +++ b/trustgraph-base/trustgraph/clients/base.py @@ -1,10 +1,6 @@ -import pulsar -import _pulsar -import hashlib import uuid import time -from pulsar.schema import JsonSchema from .. exceptions import * from ..base.pubsub import get_pubsub @@ -12,16 +8,11 @@ from ..base.pubsub import get_pubsub # Default timeout for a request/response. In seconds. DEFAULT_TIMEOUT=300 -# Ugly -ERROR=_pulsar.LoggerLevel.Error -WARN=_pulsar.LoggerLevel.Warn -INFO=_pulsar.LoggerLevel.Info -DEBUG=_pulsar.LoggerLevel.Debug class BaseClient: def __init__( - self, log_level=ERROR, + self, subscriber=None, input_queue=None, output_queue=None, @@ -87,7 +78,7 @@ class BaseClient: try: msg = self.consumer.receive(timeout_millis=2500) - except pulsar.exceptions.Timeout: + except TimeoutError: continue mid = msg.properties()["id"] @@ -139,4 +130,3 @@ class BaseClient: if hasattr(self, "backend"): self.backend.close() - diff --git a/trustgraph-base/trustgraph/clients/config_client.py b/trustgraph-base/trustgraph/clients/config_client.py index be2bf5b9..daadf652 100644 --- a/trustgraph-base/trustgraph/clients/config_client.py +++ b/trustgraph-base/trustgraph/clients/config_client.py @@ -1,5 +1,4 @@ -import _pulsar import json import dataclasses @@ -9,10 +8,6 @@ from .. schema import config_response_queue from . base import BaseClient # Ugly -ERROR=_pulsar.LoggerLevel.Error -WARN=_pulsar.LoggerLevel.Warn -INFO=_pulsar.LoggerLevel.Info -DEBUG=_pulsar.LoggerLevel.Debug @dataclasses.dataclass class Definition: @@ -34,7 +29,7 @@ class Topic: class ConfigClient(BaseClient): def __init__( - self, log_level=ERROR, + self, subscriber=None, input_queue=None, output_queue=None, @@ -50,7 +45,6 @@ class ConfigClient(BaseClient): output_queue = config_response_queue super(ConfigClient, self).__init__( - log_level=log_level, subscriber=subscriber, input_queue=input_queue, output_queue=output_queue, diff --git a/trustgraph-base/trustgraph/clients/document_embeddings_client.py b/trustgraph-base/trustgraph/clients/document_embeddings_client.py index 1ab47aab..ebbad397 100644 --- a/trustgraph-base/trustgraph/clients/document_embeddings_client.py +++ b/trustgraph-base/trustgraph/clients/document_embeddings_client.py @@ -1,5 +1,4 @@ -import _pulsar from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse from .. schema import document_embeddings_request_queue @@ -7,15 +6,11 @@ from .. schema import document_embeddings_response_queue from . base import BaseClient # Ugly -ERROR=_pulsar.LoggerLevel.Error -WARN=_pulsar.LoggerLevel.Warn -INFO=_pulsar.LoggerLevel.Info -DEBUG=_pulsar.LoggerLevel.Debug class DocumentEmbeddingsClient(BaseClient): def __init__( - self, log_level=ERROR, + self, subscriber=None, input_queue=None, output_queue=None, @@ -30,7 +25,6 @@ class DocumentEmbeddingsClient(BaseClient): output_queue = document_embeddings_response_queue super(DocumentEmbeddingsClient, self).__init__( - log_level=log_level, subscriber=subscriber, input_queue=input_queue, output_queue=output_queue, diff --git a/trustgraph-base/trustgraph/clients/document_rag_client.py b/trustgraph-base/trustgraph/clients/document_rag_client.py index 946b1a6c..057376fb 100644 --- a/trustgraph-base/trustgraph/clients/document_rag_client.py +++ b/trustgraph-base/trustgraph/clients/document_rag_client.py @@ -1,21 +1,15 @@ -import _pulsar from .. schema import DocumentRagQuery, DocumentRagResponse from .. schema import document_rag_request_queue, document_rag_response_queue from . base import BaseClient # Ugly -ERROR=_pulsar.LoggerLevel.Error -WARN=_pulsar.LoggerLevel.Warn -INFO=_pulsar.LoggerLevel.Info -DEBUG=_pulsar.LoggerLevel.Debug class DocumentRagClient(BaseClient): def __init__( self, - log_level=ERROR, subscriber=None, input_queue=None, output_queue=None, @@ -30,7 +24,6 @@ class DocumentRagClient(BaseClient): output_queue = document_rag_response_queue super(DocumentRagClient, self).__init__( - log_level=log_level, subscriber=subscriber, input_queue=input_queue, output_queue=output_queue, diff --git a/trustgraph-base/trustgraph/clients/embeddings_client.py b/trustgraph-base/trustgraph/clients/embeddings_client.py index 1b1c0dc8..7d9e6d8e 100644 --- a/trustgraph-base/trustgraph/clients/embeddings_client.py +++ b/trustgraph-base/trustgraph/clients/embeddings_client.py @@ -1,20 +1,14 @@ -from pulsar.schema import JsonSchema from .. schema import EmbeddingsRequest, EmbeddingsResponse from . base import BaseClient -import _pulsar # Ugly -ERROR=_pulsar.LoggerLevel.Error -WARN=_pulsar.LoggerLevel.Warn -INFO=_pulsar.LoggerLevel.Info -DEBUG=_pulsar.LoggerLevel.Debug class EmbeddingsClient(BaseClient): def __init__( - self, log_level=ERROR, + self, input_queue=None, output_queue=None, subscriber=None, @@ -23,7 +17,6 @@ class EmbeddingsClient(BaseClient): ): super(EmbeddingsClient, self).__init__( - log_level=log_level, subscriber=subscriber, input_queue=input_queue, output_queue=output_queue, diff --git a/trustgraph-base/trustgraph/clients/graph_embeddings_client.py b/trustgraph-base/trustgraph/clients/graph_embeddings_client.py index f85c91ee..62a55609 100644 --- a/trustgraph-base/trustgraph/clients/graph_embeddings_client.py +++ b/trustgraph-base/trustgraph/clients/graph_embeddings_client.py @@ -1,5 +1,4 @@ -import _pulsar from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse from .. schema import graph_embeddings_request_queue @@ -7,15 +6,11 @@ from .. schema import graph_embeddings_response_queue from . base import BaseClient # Ugly -ERROR=_pulsar.LoggerLevel.Error -WARN=_pulsar.LoggerLevel.Warn -INFO=_pulsar.LoggerLevel.Info -DEBUG=_pulsar.LoggerLevel.Debug class GraphEmbeddingsClient(BaseClient): def __init__( - self, log_level=ERROR, + self, subscriber=None, input_queue=None, output_queue=None, @@ -30,7 +25,6 @@ class GraphEmbeddingsClient(BaseClient): output_queue = graph_embeddings_response_queue super(GraphEmbeddingsClient, self).__init__( - log_level=log_level, subscriber=subscriber, input_queue=input_queue, output_queue=output_queue, diff --git a/trustgraph-base/trustgraph/clients/graph_rag_client.py b/trustgraph-base/trustgraph/clients/graph_rag_client.py index 42ffce0c..17d7b0f0 100644 --- a/trustgraph-base/trustgraph/clients/graph_rag_client.py +++ b/trustgraph-base/trustgraph/clients/graph_rag_client.py @@ -1,21 +1,15 @@ -import _pulsar from .. schema import GraphRagQuery, GraphRagResponse from .. schema import graph_rag_request_queue, graph_rag_response_queue from . base import BaseClient # Ugly -ERROR=_pulsar.LoggerLevel.Error -WARN=_pulsar.LoggerLevel.Warn -INFO=_pulsar.LoggerLevel.Info -DEBUG=_pulsar.LoggerLevel.Debug class GraphRagClient(BaseClient): def __init__( self, - log_level=ERROR, subscriber=None, input_queue=None, output_queue=None, @@ -30,7 +24,6 @@ class GraphRagClient(BaseClient): output_queue = graph_rag_response_queue super(GraphRagClient, self).__init__( - log_level=log_level, subscriber=subscriber, input_queue=input_queue, output_queue=output_queue, diff --git a/trustgraph-base/trustgraph/clients/llm_client.py b/trustgraph-base/trustgraph/clients/llm_client.py index 3c629e7d..bfb4096c 100644 --- a/trustgraph-base/trustgraph/clients/llm_client.py +++ b/trustgraph-base/trustgraph/clients/llm_client.py @@ -1,5 +1,4 @@ -import _pulsar from .. schema import TextCompletionRequest, TextCompletionResponse from .. schema import text_completion_request_queue @@ -8,15 +7,11 @@ from . base import BaseClient from .. exceptions import LlmError # Ugly -ERROR=_pulsar.LoggerLevel.Error -WARN=_pulsar.LoggerLevel.Warn -INFO=_pulsar.LoggerLevel.Info -DEBUG=_pulsar.LoggerLevel.Debug class LlmClient(BaseClient): def __init__( - self, log_level=ERROR, + self, subscriber=None, input_queue=None, output_queue=None, @@ -28,7 +23,6 @@ class LlmClient(BaseClient): if output_queue is None: output_queue = text_completion_response_queue super(LlmClient, self).__init__( - log_level=log_level, subscriber=subscriber, input_queue=input_queue, output_queue=output_queue, diff --git a/trustgraph-base/trustgraph/clients/prompt_client.py b/trustgraph-base/trustgraph/clients/prompt_client.py index 91707670..12c9c194 100644 --- a/trustgraph-base/trustgraph/clients/prompt_client.py +++ b/trustgraph-base/trustgraph/clients/prompt_client.py @@ -1,5 +1,4 @@ -import _pulsar import json import dataclasses @@ -9,10 +8,6 @@ from .. schema import prompt_response_queue from . base import BaseClient # Ugly -ERROR=_pulsar.LoggerLevel.Error -WARN=_pulsar.LoggerLevel.Warn -INFO=_pulsar.LoggerLevel.Info -DEBUG=_pulsar.LoggerLevel.Debug @dataclasses.dataclass class Definition: @@ -34,7 +29,7 @@ class Topic: class PromptClient(BaseClient): def __init__( - self, log_level=ERROR, + self, subscriber=None, input_queue=None, output_queue=None, @@ -49,7 +44,6 @@ class PromptClient(BaseClient): output_queue = prompt_response_queue super(PromptClient, self).__init__( - log_level=log_level, subscriber=subscriber, input_queue=input_queue, output_queue=output_queue, diff --git a/trustgraph-base/trustgraph/clients/row_embeddings_client.py b/trustgraph-base/trustgraph/clients/row_embeddings_client.py index 19d4b338..6e10de29 100644 --- a/trustgraph-base/trustgraph/clients/row_embeddings_client.py +++ b/trustgraph-base/trustgraph/clients/row_embeddings_client.py @@ -1,5 +1,4 @@ -import _pulsar from .. schema import RowEmbeddingsRequest, RowEmbeddingsResponse from .. schema import row_embeddings_request_queue @@ -7,15 +6,11 @@ from .. schema import row_embeddings_response_queue from . base import BaseClient # Ugly -ERROR=_pulsar.LoggerLevel.Error -WARN=_pulsar.LoggerLevel.Warn -INFO=_pulsar.LoggerLevel.Info -DEBUG=_pulsar.LoggerLevel.Debug class RowEmbeddingsClient(BaseClient): def __init__( - self, log_level=ERROR, + self, subscriber=None, input_queue=None, output_queue=None, @@ -30,7 +25,6 @@ class RowEmbeddingsClient(BaseClient): output_queue = row_embeddings_response_queue super(RowEmbeddingsClient, self).__init__( - log_level=log_level, subscriber=subscriber, input_queue=input_queue, output_queue=output_queue, diff --git a/trustgraph-base/trustgraph/clients/triples_query_client.py b/trustgraph-base/trustgraph/clients/triples_query_client.py index 401aaf0b..403d02ea 100644 --- a/trustgraph-base/trustgraph/clients/triples_query_client.py +++ b/trustgraph-base/trustgraph/clients/triples_query_client.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -import _pulsar from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL from .. schema import triples_request_queue @@ -8,15 +7,11 @@ from .. schema import triples_response_queue from . base import BaseClient # Ugly -ERROR=_pulsar.LoggerLevel.Error -WARN=_pulsar.LoggerLevel.Warn -INFO=_pulsar.LoggerLevel.Info -DEBUG=_pulsar.LoggerLevel.Debug class TriplesQueryClient(BaseClient): def __init__( - self, log_level=ERROR, + self, subscriber=None, input_queue=None, output_queue=None, @@ -31,7 +26,6 @@ class TriplesQueryClient(BaseClient): output_queue = triples_response_queue super(TriplesQueryClient, self).__init__( - log_level=log_level, subscriber=subscriber, input_queue=input_queue, output_queue=output_queue, diff --git a/trustgraph-base/trustgraph/log_level.py b/trustgraph-base/trustgraph/log_level.py index 65486b29..5b6f9e0c 100644 --- a/trustgraph-base/trustgraph/log_level.py +++ b/trustgraph-base/trustgraph/log_level.py @@ -1,6 +1,6 @@ from enum import Enum -import _pulsar + class LogLevel(Enum): DEBUG = 'debug' @@ -10,11 +10,3 @@ class LogLevel(Enum): def __str__(self): return self.value - - def to_pulsar(self): - if self == LogLevel.DEBUG: return _pulsar.LoggerLevel.Debug - if self == LogLevel.INFO: return _pulsar.LoggerLevel.Info - if self == LogLevel.WARN: return _pulsar.LoggerLevel.Warn - if self == LogLevel.ERROR: return _pulsar.LoggerLevel.Error - raise RuntimeError("Log level mismatch") - diff --git a/trustgraph-base/trustgraph/messaging/translators/agent.py b/trustgraph-base/trustgraph/messaging/translators/agent.py index b245a83e..c2c00ac2 100644 --- a/trustgraph-base/trustgraph/messaging/translators/agent.py +++ b/trustgraph-base/trustgraph/messaging/translators/agent.py @@ -6,7 +6,7 @@ from .base import MessageTranslator class AgentRequestTranslator(MessageTranslator): """Translator for AgentRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> AgentRequest: + def decode(self, data: Dict[str, Any]) -> AgentRequest: return AgentRequest( question=data["question"], state=data.get("state", None), @@ -26,7 +26,7 @@ class AgentRequestTranslator(MessageTranslator): expected_siblings=data.get("expected_siblings", 0), ) - def from_pulsar(self, obj: AgentRequest) -> Dict[str, Any]: + def encode(self, obj: AgentRequest) -> Dict[str, Any]: return { "question": obj.question, "state": obj.state, @@ -50,10 +50,10 @@ class AgentRequestTranslator(MessageTranslator): class AgentResponseTranslator(MessageTranslator): """Translator for AgentResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> AgentResponse: + def decode(self, data: Dict[str, Any]) -> AgentResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: AgentResponse) -> Dict[str, Any]: + def encode(self, obj: AgentResponse) -> Dict[str, Any]: result = {} if obj.chunk_type: @@ -81,7 +81,7 @@ class AgentResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: AgentResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: AgentResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" is_final = getattr(obj, 'end_of_dialog', False) - return self.from_pulsar(obj), is_final \ No newline at end of file + return self.encode(obj), is_final \ No newline at end of file diff --git a/trustgraph-base/trustgraph/messaging/translators/base.py b/trustgraph-base/trustgraph/messaging/translators/base.py index 64e2b635..74658082 100644 --- a/trustgraph-base/trustgraph/messaging/translators/base.py +++ b/trustgraph-base/trustgraph/messaging/translators/base.py @@ -1,43 +1,46 @@ from abc import ABC, abstractmethod from typing import Dict, Any, Tuple -from pulsar.schema import Record class Translator(ABC): - """Base class for bidirectional Pulsar ↔ dict translation""" - + """Base class for bidirectional schema ↔ dict translation. + + Translates between external API dicts (JSON from HTTP/WebSocket) + and internal schema objects (dataclasses). + """ + @abstractmethod - def to_pulsar(self, data: Dict[str, Any]) -> Record: - """Convert dict to Pulsar schema object""" + def decode(self, data: Dict[str, Any]) -> Any: + """Convert external dict to schema object.""" pass - - @abstractmethod - def from_pulsar(self, obj: Record) -> Dict[str, Any]: - """Convert Pulsar schema object to dict""" + + @abstractmethod + def encode(self, obj: Any) -> Dict[str, Any]: + """Convert schema object to external dict.""" pass class MessageTranslator(Translator): - """For complete request/response message translation""" - - def from_response_with_completion(self, obj: Record) -> Tuple[Dict[str, Any], bool]: - """Returns (response_dict, is_final) - for streaming responses""" - return self.from_pulsar(obj), True + """For complete request/response message translation.""" + + def encode_with_completion(self, obj: Any) -> Tuple[Dict[str, Any], bool]: + """Returns (response_dict, is_final) — for streaming responses.""" + return self.encode(obj), True class SendTranslator(Translator): - """For fire-and-forget send operations (like ServiceSender)""" - - def from_pulsar(self, obj: Record) -> Dict[str, Any]: - """Usually not needed for send-only operations""" - raise NotImplementedError("Send translators typically don't need from_pulsar") + """For fire-and-forget send operations.""" + + def encode(self, obj: Any) -> Dict[str, Any]: + """Usually not needed for send-only operations.""" + raise NotImplementedError("Send translators don't need encode") -def handle_optional_fields(obj: Record, fields: list) -> Dict[str, Any]: - """Helper to extract optional fields from Pulsar object""" +def handle_optional_fields(obj: Any, fields: list) -> Dict[str, Any]: + """Helper to extract optional fields from a schema object.""" result = {} for field in fields: value = getattr(obj, field, None) if value is not None: result[field] = value - return result \ No newline at end of file + return result diff --git a/trustgraph-base/trustgraph/messaging/translators/collection.py b/trustgraph-base/trustgraph/messaging/translators/collection.py index 22c82828..c6fd1500 100644 --- a/trustgraph-base/trustgraph/messaging/translators/collection.py +++ b/trustgraph-base/trustgraph/messaging/translators/collection.py @@ -6,7 +6,7 @@ from .base import MessageTranslator class CollectionManagementRequestTranslator(MessageTranslator): """Translator for CollectionManagementRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> CollectionManagementRequest: + def decode(self, data: Dict[str, Any]) -> CollectionManagementRequest: return CollectionManagementRequest( operation=data.get("operation"), user=data.get("user"), @@ -19,7 +19,7 @@ class CollectionManagementRequestTranslator(MessageTranslator): limit=data.get("limit") ) - def from_pulsar(self, obj: CollectionManagementRequest) -> Dict[str, Any]: + def encode(self, obj: CollectionManagementRequest) -> Dict[str, Any]: result = {} if obj.operation is not None: @@ -47,7 +47,7 @@ class CollectionManagementRequestTranslator(MessageTranslator): class CollectionManagementResponseTranslator(MessageTranslator): """Translator for CollectionManagementResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> CollectionManagementResponse: + def decode(self, data: Dict[str, Any]) -> CollectionManagementResponse: # Handle error error = None @@ -76,7 +76,7 @@ class CollectionManagementResponseTranslator(MessageTranslator): collections=collections ) - def from_pulsar(self, obj: CollectionManagementResponse) -> Dict[str, Any]: + def encode(self, obj: CollectionManagementResponse) -> Dict[str, Any]: result = {} print("COLLECTIONMGMT", obj, flush=True) diff --git a/trustgraph-base/trustgraph/messaging/translators/config.py b/trustgraph-base/trustgraph/messaging/translators/config.py index 299c5438..e166362a 100644 --- a/trustgraph-base/trustgraph/messaging/translators/config.py +++ b/trustgraph-base/trustgraph/messaging/translators/config.py @@ -6,7 +6,7 @@ from .base import MessageTranslator class ConfigRequestTranslator(MessageTranslator): """Translator for ConfigRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> ConfigRequest: + def decode(self, data: Dict[str, Any]) -> ConfigRequest: keys = None if "keys" in data: keys = [ @@ -35,7 +35,7 @@ class ConfigRequestTranslator(MessageTranslator): values=values ) - def from_pulsar(self, obj: ConfigRequest) -> Dict[str, Any]: + def encode(self, obj: ConfigRequest) -> Dict[str, Any]: result = {} if obj.operation is not None: @@ -69,10 +69,10 @@ class ConfigRequestTranslator(MessageTranslator): class ConfigResponseTranslator(MessageTranslator): """Translator for ConfigResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> ConfigResponse: + def decode(self, data: Dict[str, Any]) -> ConfigResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: ConfigResponse) -> Dict[str, Any]: + def encode(self, obj: ConfigResponse) -> Dict[str, Any]: result = {} if obj.version is not None: @@ -96,6 +96,6 @@ class ConfigResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: ConfigResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: ConfigResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - return self.from_pulsar(obj), True + return self.encode(obj), True diff --git a/trustgraph-base/trustgraph/messaging/translators/diagnosis.py b/trustgraph-base/trustgraph/messaging/translators/diagnosis.py index e0cb6a89..2a4b811d 100644 --- a/trustgraph-base/trustgraph/messaging/translators/diagnosis.py +++ b/trustgraph-base/trustgraph/messaging/translators/diagnosis.py @@ -7,7 +7,7 @@ from .base import MessageTranslator class StructuredDataDiagnosisRequestTranslator(MessageTranslator): """Translator for StructuredDataDiagnosisRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> StructuredDataDiagnosisRequest: + def decode(self, data: Dict[str, Any]) -> StructuredDataDiagnosisRequest: return StructuredDataDiagnosisRequest( operation=data["operation"], sample=data["sample"], @@ -16,7 +16,7 @@ class StructuredDataDiagnosisRequestTranslator(MessageTranslator): options=data.get("options", {}) ) - def from_pulsar(self, obj: StructuredDataDiagnosisRequest) -> Dict[str, Any]: + def encode(self, obj: StructuredDataDiagnosisRequest) -> Dict[str, Any]: result = { "operation": obj.operation, "sample": obj.sample, @@ -36,10 +36,10 @@ class StructuredDataDiagnosisRequestTranslator(MessageTranslator): class StructuredDataDiagnosisResponseTranslator(MessageTranslator): """Translator for StructuredDataDiagnosisResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> StructuredDataDiagnosisResponse: + def decode(self, data: Dict[str, Any]) -> StructuredDataDiagnosisResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: StructuredDataDiagnosisResponse) -> Dict[str, Any]: + def encode(self, obj: StructuredDataDiagnosisResponse) -> Dict[str, Any]: result = { "operation": obj.operation } @@ -64,6 +64,6 @@ class StructuredDataDiagnosisResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: StructuredDataDiagnosisResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: StructuredDataDiagnosisResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - return self.from_pulsar(obj), True \ No newline at end of file + return self.encode(obj), True \ No newline at end of file diff --git a/trustgraph-base/trustgraph/messaging/translators/document_loading.py b/trustgraph-base/trustgraph/messaging/translators/document_loading.py index 51cda697..3e7062e2 100644 --- a/trustgraph-base/trustgraph/messaging/translators/document_loading.py +++ b/trustgraph-base/trustgraph/messaging/translators/document_loading.py @@ -30,7 +30,7 @@ def _decode_text_payload(payload: str | bytes, charset: str) -> str: class DocumentTranslator(SendTranslator): """Translator for Document schema objects (PDF docs etc.)""" - def to_pulsar(self, data: Dict[str, Any]) -> Document: + def decode(self, data: Dict[str, Any]) -> Document: # Handle base64 content validation doc = base64.b64decode(data["data"]) @@ -45,7 +45,7 @@ class DocumentTranslator(SendTranslator): data=base64.b64encode(doc).decode("utf-8") ) - def from_pulsar(self, obj: Document) -> Dict[str, Any]: + def encode(self, obj: Document) -> Dict[str, Any]: result = { "data": obj.data } @@ -69,7 +69,7 @@ class DocumentTranslator(SendTranslator): class TextDocumentTranslator(SendTranslator): """Translator for TextDocument schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> TextDocument: + def decode(self, data: Dict[str, Any]) -> TextDocument: charset = data.get("charset", "utf-8") text = _decode_text_payload(data["text"], charset) @@ -85,7 +85,7 @@ class TextDocumentTranslator(SendTranslator): text=text.encode("utf-8") ) - def from_pulsar(self, obj: TextDocument) -> Dict[str, Any]: + def encode(self, obj: TextDocument) -> Dict[str, Any]: result = { "text": obj.text.decode("utf-8") if isinstance(obj.text, bytes) else obj.text } @@ -109,7 +109,7 @@ class TextDocumentTranslator(SendTranslator): class ChunkTranslator(SendTranslator): """Translator for Chunk schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> Chunk: + def decode(self, data: Dict[str, Any]) -> Chunk: from ...schema import Metadata return Chunk( metadata=Metadata( @@ -121,7 +121,7 @@ class ChunkTranslator(SendTranslator): chunk=data["chunk"].encode("utf-8") if isinstance(data["chunk"], str) else data["chunk"] ) - def from_pulsar(self, obj: Chunk) -> Dict[str, Any]: + def encode(self, obj: Chunk) -> Dict[str, Any]: result = { "chunk": obj.chunk.decode("utf-8") if isinstance(obj.chunk, bytes) else obj.chunk } @@ -145,7 +145,7 @@ class ChunkTranslator(SendTranslator): class DocumentEmbeddingsTranslator(SendTranslator): """Translator for DocumentEmbeddings schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> DocumentEmbeddings: + def decode(self, data: Dict[str, Any]) -> DocumentEmbeddings: metadata = data.get("metadata", {}) chunks = [ @@ -167,7 +167,7 @@ class DocumentEmbeddingsTranslator(SendTranslator): chunks=chunks ) - def from_pulsar(self, obj: DocumentEmbeddings) -> Dict[str, Any]: + def encode(self, obj: DocumentEmbeddings) -> Dict[str, Any]: result = { "chunks": [ { diff --git a/trustgraph-base/trustgraph/messaging/translators/embeddings.py b/trustgraph-base/trustgraph/messaging/translators/embeddings.py index 454ce733..c3c1548a 100644 --- a/trustgraph-base/trustgraph/messaging/translators/embeddings.py +++ b/trustgraph-base/trustgraph/messaging/translators/embeddings.py @@ -6,12 +6,12 @@ from .base import MessageTranslator class EmbeddingsRequestTranslator(MessageTranslator): """Translator for EmbeddingsRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> EmbeddingsRequest: + def decode(self, data: Dict[str, Any]) -> EmbeddingsRequest: return EmbeddingsRequest( texts=data["texts"] ) - def from_pulsar(self, obj: EmbeddingsRequest) -> Dict[str, Any]: + def encode(self, obj: EmbeddingsRequest) -> Dict[str, Any]: return { "texts": obj.texts } @@ -20,14 +20,14 @@ class EmbeddingsRequestTranslator(MessageTranslator): class EmbeddingsResponseTranslator(MessageTranslator): """Translator for EmbeddingsResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> EmbeddingsResponse: + def decode(self, data: Dict[str, Any]) -> EmbeddingsResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: EmbeddingsResponse) -> Dict[str, Any]: + def encode(self, obj: EmbeddingsResponse) -> Dict[str, Any]: return { "vectors": obj.vectors } - def from_response_with_completion(self, obj: EmbeddingsResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: EmbeddingsResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - return self.from_pulsar(obj), True \ No newline at end of file + return self.encode(obj), True \ No newline at end of file diff --git a/trustgraph-base/trustgraph/messaging/translators/embeddings_query.py b/trustgraph-base/trustgraph/messaging/translators/embeddings_query.py index f10ca4c6..fce1625e 100644 --- a/trustgraph-base/trustgraph/messaging/translators/embeddings_query.py +++ b/trustgraph-base/trustgraph/messaging/translators/embeddings_query.py @@ -11,7 +11,7 @@ from .primitives import ValueTranslator class DocumentEmbeddingsRequestTranslator(MessageTranslator): """Translator for DocumentEmbeddingsRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> DocumentEmbeddingsRequest: + def decode(self, data: Dict[str, Any]) -> DocumentEmbeddingsRequest: return DocumentEmbeddingsRequest( vector=data["vector"], limit=int(data.get("limit", 10)), @@ -19,7 +19,7 @@ class DocumentEmbeddingsRequestTranslator(MessageTranslator): collection=data.get("collection", "default") ) - def from_pulsar(self, obj: DocumentEmbeddingsRequest) -> Dict[str, Any]: + def encode(self, obj: DocumentEmbeddingsRequest) -> Dict[str, Any]: return { "vector": obj.vector, "limit": obj.limit, @@ -31,10 +31,10 @@ class DocumentEmbeddingsRequestTranslator(MessageTranslator): class DocumentEmbeddingsResponseTranslator(MessageTranslator): """Translator for DocumentEmbeddingsResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> DocumentEmbeddingsResponse: + def decode(self, data: Dict[str, Any]) -> DocumentEmbeddingsResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: DocumentEmbeddingsResponse) -> Dict[str, Any]: + def encode(self, obj: DocumentEmbeddingsResponse) -> Dict[str, Any]: result = {} if obj.chunks is not None: @@ -48,15 +48,15 @@ class DocumentEmbeddingsResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: DocumentEmbeddingsResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: DocumentEmbeddingsResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - return self.from_pulsar(obj), True + return self.encode(obj), True class GraphEmbeddingsRequestTranslator(MessageTranslator): """Translator for GraphEmbeddingsRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> GraphEmbeddingsRequest: + def decode(self, data: Dict[str, Any]) -> GraphEmbeddingsRequest: return GraphEmbeddingsRequest( vector=data["vector"], limit=int(data.get("limit", 10)), @@ -64,7 +64,7 @@ class GraphEmbeddingsRequestTranslator(MessageTranslator): collection=data.get("collection", "default") ) - def from_pulsar(self, obj: GraphEmbeddingsRequest) -> Dict[str, Any]: + def encode(self, obj: GraphEmbeddingsRequest) -> Dict[str, Any]: return { "vector": obj.vector, "limit": obj.limit, @@ -79,16 +79,16 @@ class GraphEmbeddingsResponseTranslator(MessageTranslator): def __init__(self): self.value_translator = ValueTranslator() - def to_pulsar(self, data: Dict[str, Any]) -> GraphEmbeddingsResponse: + def decode(self, data: Dict[str, Any]) -> GraphEmbeddingsResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: GraphEmbeddingsResponse) -> Dict[str, Any]: + def encode(self, obj: GraphEmbeddingsResponse) -> Dict[str, Any]: result = {} if obj.entities is not None: result["entities"] = [ { - "entity": self.value_translator.from_pulsar(match.entity), + "entity": self.value_translator.encode(match.entity), "score": match.score } for match in obj.entities @@ -96,15 +96,15 @@ class GraphEmbeddingsResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: GraphEmbeddingsResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: GraphEmbeddingsResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - return self.from_pulsar(obj), True + return self.encode(obj), True class RowEmbeddingsRequestTranslator(MessageTranslator): """Translator for RowEmbeddingsRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> RowEmbeddingsRequest: + def decode(self, data: Dict[str, Any]) -> RowEmbeddingsRequest: return RowEmbeddingsRequest( vector=data["vector"], limit=int(data.get("limit", 10)), @@ -114,7 +114,7 @@ class RowEmbeddingsRequestTranslator(MessageTranslator): index_name=data.get("index_name") ) - def from_pulsar(self, obj: RowEmbeddingsRequest) -> Dict[str, Any]: + def encode(self, obj: RowEmbeddingsRequest) -> Dict[str, Any]: result = { "vector": obj.vector, "limit": obj.limit, @@ -130,10 +130,10 @@ class RowEmbeddingsRequestTranslator(MessageTranslator): class RowEmbeddingsResponseTranslator(MessageTranslator): """Translator for RowEmbeddingsResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> RowEmbeddingsResponse: + def decode(self, data: Dict[str, Any]) -> RowEmbeddingsResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: RowEmbeddingsResponse) -> Dict[str, Any]: + def encode(self, obj: RowEmbeddingsResponse) -> Dict[str, Any]: result = {} if obj.error is not None: @@ -155,6 +155,6 @@ class RowEmbeddingsResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: RowEmbeddingsResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: RowEmbeddingsResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - return self.from_pulsar(obj), True + return self.encode(obj), True diff --git a/trustgraph-base/trustgraph/messaging/translators/flow.py b/trustgraph-base/trustgraph/messaging/translators/flow.py index 542b65ec..2047475e 100644 --- a/trustgraph-base/trustgraph/messaging/translators/flow.py +++ b/trustgraph-base/trustgraph/messaging/translators/flow.py @@ -6,7 +6,7 @@ from .base import MessageTranslator class FlowRequestTranslator(MessageTranslator): """Translator for FlowRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> FlowRequest: + def decode(self, data: Dict[str, Any]) -> FlowRequest: return FlowRequest( operation=data.get("operation"), blueprint_name=data.get("blueprint-name"), @@ -16,7 +16,7 @@ class FlowRequestTranslator(MessageTranslator): parameters=data.get("parameters") ) - def from_pulsar(self, obj: FlowRequest) -> Dict[str, Any]: + def encode(self, obj: FlowRequest) -> Dict[str, Any]: result = {} if obj.operation is not None: @@ -38,10 +38,10 @@ class FlowRequestTranslator(MessageTranslator): class FlowResponseTranslator(MessageTranslator): """Translator for FlowResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> FlowResponse: + def decode(self, data: Dict[str, Any]) -> FlowResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: FlowResponse) -> Dict[str, Any]: + def encode(self, obj: FlowResponse) -> Dict[str, Any]: result = {} if obj.blueprint_names is not None: @@ -59,6 +59,6 @@ class FlowResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: FlowResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: FlowResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - return self.from_pulsar(obj), True + return self.encode(obj), True diff --git a/trustgraph-base/trustgraph/messaging/translators/knowledge.py b/trustgraph-base/trustgraph/messaging/translators/knowledge.py index 0043d1e4..2f11d75a 100644 --- a/trustgraph-base/trustgraph/messaging/translators/knowledge.py +++ b/trustgraph-base/trustgraph/messaging/translators/knowledge.py @@ -14,7 +14,7 @@ class KnowledgeRequestTranslator(MessageTranslator): self.value_translator = ValueTranslator() self.subgraph_translator = SubgraphTranslator() - def to_pulsar(self, data: Dict[str, Any]) -> KnowledgeRequest: + def decode(self, data: Dict[str, Any]) -> KnowledgeRequest: triples = None if "triples" in data: triples = Triples( @@ -24,7 +24,7 @@ class KnowledgeRequestTranslator(MessageTranslator): user=data["triples"]["metadata"]["user"], collection=data["triples"]["metadata"]["collection"] ), - triples=self.subgraph_translator.to_pulsar(data["triples"]["triples"]), + triples=self.subgraph_translator.decode(data["triples"]["triples"]), ) graph_embeddings = None @@ -38,7 +38,7 @@ class KnowledgeRequestTranslator(MessageTranslator): ), entities=[ EntityEmbeddings( - entity=self.value_translator.to_pulsar(ent["entity"]), + entity=self.value_translator.decode(ent["entity"]), vectors=ent["vectors"], ) for ent in data["graph-embeddings"]["entities"] @@ -55,7 +55,7 @@ class KnowledgeRequestTranslator(MessageTranslator): graph_embeddings=graph_embeddings, ) - def from_pulsar(self, obj: KnowledgeRequest) -> Dict[str, Any]: + def encode(self, obj: KnowledgeRequest) -> Dict[str, Any]: result = {} if obj.operation: @@ -77,7 +77,7 @@ class KnowledgeRequestTranslator(MessageTranslator): "user": obj.triples.metadata.user, "collection": obj.triples.metadata.collection, }, - "triples": self.subgraph_translator.from_pulsar(obj.triples.triples), + "triples": self.subgraph_translator.encode(obj.triples.triples), } if obj.graph_embeddings: @@ -91,7 +91,7 @@ class KnowledgeRequestTranslator(MessageTranslator): "entities": [ { "vector": entity.vector, - "entity": self.value_translator.from_pulsar(entity.entity), + "entity": self.value_translator.encode(entity.entity), } for entity in obj.graph_embeddings.entities ], @@ -107,10 +107,10 @@ class KnowledgeResponseTranslator(MessageTranslator): self.value_translator = ValueTranslator() self.subgraph_translator = SubgraphTranslator() - def to_pulsar(self, data: Dict[str, Any]) -> KnowledgeResponse: + def decode(self, data: Dict[str, Any]) -> KnowledgeResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: KnowledgeResponse) -> Dict[str, Any]: + def encode(self, obj: KnowledgeResponse) -> Dict[str, Any]: # Response to list operation if obj.ids is not None: return {"ids": obj.ids} @@ -125,7 +125,7 @@ class KnowledgeResponseTranslator(MessageTranslator): "user": obj.triples.metadata.user, "collection": obj.triples.metadata.collection, }, - "triples": self.subgraph_translator.from_pulsar(obj.triples.triples), + "triples": self.subgraph_translator.encode(obj.triples.triples), } } @@ -142,7 +142,7 @@ class KnowledgeResponseTranslator(MessageTranslator): "entities": [ { "vector": entity.vector, - "entity": self.value_translator.from_pulsar(entity.entity), + "entity": self.value_translator.encode(entity.entity), } for entity in obj.graph_embeddings.entities ], @@ -156,9 +156,9 @@ class KnowledgeResponseTranslator(MessageTranslator): # Empty response (successful delete) return {} - def from_response_with_completion(self, obj: KnowledgeResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: KnowledgeResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - response = self.from_pulsar(obj) + response = self.encode(obj) # Check if this is a final response is_final = ( diff --git a/trustgraph-base/trustgraph/messaging/translators/library.py b/trustgraph-base/trustgraph/messaging/translators/library.py index c7e849aa..7c77c39c 100644 --- a/trustgraph-base/trustgraph/messaging/translators/library.py +++ b/trustgraph-base/trustgraph/messaging/translators/library.py @@ -11,16 +11,16 @@ class LibraryRequestTranslator(MessageTranslator): self.doc_metadata_translator = DocumentMetadataTranslator() self.proc_metadata_translator = ProcessingMetadataTranslator() - def to_pulsar(self, data: Dict[str, Any]) -> LibrarianRequest: + def decode(self, data: Dict[str, Any]) -> LibrarianRequest: # Document metadata doc_metadata = None if "document-metadata" in data: - doc_metadata = self.doc_metadata_translator.to_pulsar(data["document-metadata"]) + doc_metadata = self.doc_metadata_translator.decode(data["document-metadata"]) # Processing metadata proc_metadata = None if "processing-metadata" in data: - proc_metadata = self.proc_metadata_translator.to_pulsar(data["processing-metadata"]) + proc_metadata = self.proc_metadata_translator.decode(data["processing-metadata"]) # Criteria criteria = [] @@ -61,7 +61,7 @@ class LibraryRequestTranslator(MessageTranslator): include_children=data.get("include-children", False), ) - def from_pulsar(self, obj: LibrarianRequest) -> Dict[str, Any]: + def encode(self, obj: LibrarianRequest) -> Dict[str, Any]: result = {} if obj.operation: @@ -71,9 +71,9 @@ class LibraryRequestTranslator(MessageTranslator): if obj.processing_id: result["processing-id"] = obj.processing_id if obj.document_metadata: - result["document-metadata"] = self.doc_metadata_translator.from_pulsar(obj.document_metadata) + result["document-metadata"] = self.doc_metadata_translator.encode(obj.document_metadata) if obj.processing_metadata: - result["processing-metadata"] = self.proc_metadata_translator.from_pulsar(obj.processing_metadata) + result["processing-metadata"] = self.proc_metadata_translator.encode(obj.processing_metadata) if obj.content: result["content"] = obj.content.decode("utf-8") if isinstance(obj.content, bytes) else obj.content if obj.user: @@ -100,10 +100,10 @@ class LibraryResponseTranslator(MessageTranslator): self.doc_metadata_translator = DocumentMetadataTranslator() self.proc_metadata_translator = ProcessingMetadataTranslator() - def to_pulsar(self, data: Dict[str, Any]) -> LibrarianResponse: + def decode(self, data: Dict[str, Any]) -> LibrarianResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: LibrarianResponse) -> Dict[str, Any]: + def encode(self, obj: LibrarianResponse) -> Dict[str, Any]: result = {} if obj.error: @@ -113,20 +113,20 @@ class LibraryResponseTranslator(MessageTranslator): } if obj.document_metadata: - result["document-metadata"] = self.doc_metadata_translator.from_pulsar(obj.document_metadata) + result["document-metadata"] = self.doc_metadata_translator.encode(obj.document_metadata) if obj.content: result["content"] = obj.content.decode("utf-8") if isinstance(obj.content, bytes) else obj.content if obj.document_metadatas is not None: result["document-metadatas"] = [ - self.doc_metadata_translator.from_pulsar(dm) + self.doc_metadata_translator.encode(dm) for dm in obj.document_metadatas ] if obj.processing_metadatas is not None: result["processing-metadatas"] = [ - self.proc_metadata_translator.from_pulsar(pm) + self.proc_metadata_translator.encode(pm) for pm in obj.processing_metadatas ] @@ -172,6 +172,6 @@ class LibraryResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: LibrarianResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: LibrarianResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - return self.from_pulsar(obj), obj.is_final + return self.encode(obj), obj.is_final diff --git a/trustgraph-base/trustgraph/messaging/translators/metadata.py b/trustgraph-base/trustgraph/messaging/translators/metadata.py index 46a28d0a..3e141c19 100644 --- a/trustgraph-base/trustgraph/messaging/translators/metadata.py +++ b/trustgraph-base/trustgraph/messaging/translators/metadata.py @@ -10,7 +10,7 @@ class DocumentMetadataTranslator(Translator): def __init__(self): self.subgraph_translator = SubgraphTranslator() - def to_pulsar(self, data: Dict[str, Any]) -> DocumentMetadata: + def decode(self, data: Dict[str, Any]) -> DocumentMetadata: metadata = data.get("metadata", []) return DocumentMetadata( id=data.get("id"), @@ -18,14 +18,14 @@ class DocumentMetadataTranslator(Translator): kind=data.get("kind"), title=data.get("title"), comments=data.get("comments"), - metadata=self.subgraph_translator.to_pulsar(metadata) if metadata is not None else [], + metadata=self.subgraph_translator.decode(metadata) if metadata is not None else [], user=data.get("user"), tags=data.get("tags"), parent_id=data.get("parent-id", ""), document_type=data.get("document-type", "source"), ) - def from_pulsar(self, obj: DocumentMetadata) -> Dict[str, Any]: + def encode(self, obj: DocumentMetadata) -> Dict[str, Any]: result = {} if obj.id: @@ -39,7 +39,7 @@ class DocumentMetadataTranslator(Translator): if obj.comments: result["comments"] = obj.comments if obj.metadata is not None: - result["metadata"] = self.subgraph_translator.from_pulsar(obj.metadata) + result["metadata"] = self.subgraph_translator.encode(obj.metadata) if obj.user: result["user"] = obj.user if obj.tags is not None: @@ -55,7 +55,7 @@ class DocumentMetadataTranslator(Translator): class ProcessingMetadataTranslator(Translator): """Translator for ProcessingMetadata schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> ProcessingMetadata: + def decode(self, data: Dict[str, Any]) -> ProcessingMetadata: return ProcessingMetadata( id=data.get("id"), document_id=data.get("document-id"), @@ -66,7 +66,7 @@ class ProcessingMetadataTranslator(Translator): tags=data.get("tags") ) - def from_pulsar(self, obj: ProcessingMetadata) -> Dict[str, Any]: + def encode(self, obj: ProcessingMetadata) -> Dict[str, Any]: result = {} if obj.id: diff --git a/trustgraph-base/trustgraph/messaging/translators/nlp_query.py b/trustgraph-base/trustgraph/messaging/translators/nlp_query.py index 2c445579..c1f016b8 100644 --- a/trustgraph-base/trustgraph/messaging/translators/nlp_query.py +++ b/trustgraph-base/trustgraph/messaging/translators/nlp_query.py @@ -6,13 +6,13 @@ from .base import MessageTranslator class QuestionToStructuredQueryRequestTranslator(MessageTranslator): """Translator for QuestionToStructuredQueryRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> QuestionToStructuredQueryRequest: + def decode(self, data: Dict[str, Any]) -> QuestionToStructuredQueryRequest: return QuestionToStructuredQueryRequest( question=data.get("question", ""), max_results=data.get("max_results", 100) ) - def from_pulsar(self, obj: QuestionToStructuredQueryRequest) -> Dict[str, Any]: + def encode(self, obj: QuestionToStructuredQueryRequest) -> Dict[str, Any]: return { "question": obj.question, "max_results": obj.max_results @@ -22,10 +22,10 @@ class QuestionToStructuredQueryRequestTranslator(MessageTranslator): class QuestionToStructuredQueryResponseTranslator(MessageTranslator): """Translator for QuestionToStructuredQueryResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> QuestionToStructuredQueryResponse: + def decode(self, data: Dict[str, Any]) -> QuestionToStructuredQueryResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: QuestionToStructuredQueryResponse) -> Dict[str, Any]: + def encode(self, obj: QuestionToStructuredQueryResponse) -> Dict[str, Any]: result = { "graphql_query": obj.graphql_query, "variables": dict(obj.variables) if obj.variables else {}, @@ -42,6 +42,6 @@ class QuestionToStructuredQueryResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: QuestionToStructuredQueryResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: QuestionToStructuredQueryResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - return self.from_pulsar(obj), True \ No newline at end of file + return self.encode(obj), True \ No newline at end of file diff --git a/trustgraph-base/trustgraph/messaging/translators/primitives.py b/trustgraph-base/trustgraph/messaging/translators/primitives.py index d54efc49..7eb3d285 100644 --- a/trustgraph-base/trustgraph/messaging/translators/primitives.py +++ b/trustgraph-base/trustgraph/messaging/translators/primitives.py @@ -17,7 +17,7 @@ class TermTranslator(Translator): - "tr": triple (for TRIPLE type, nested) """ - def to_pulsar(self, data: Dict[str, Any]) -> Term: + def decode(self, data: Dict[str, Any]) -> Term: term_type = data.get("t", "") if term_type == IRI: @@ -38,7 +38,7 @@ class TermTranslator(Translator): # Nested triple - use TripleTranslator triple_data = data.get("tr") if triple_data: - triple = _triple_translator_to_pulsar(triple_data) + triple = _triple_translator_decode(triple_data) else: triple = None return Term(type=TRIPLE, triple=triple) @@ -47,7 +47,7 @@ class TermTranslator(Translator): # Unknown or empty type return Term(type=term_type) - def from_pulsar(self, obj: Term) -> Dict[str, Any]: + def encode(self, obj: Term) -> Dict[str, Any]: result: Dict[str, Any] = {"t": obj.type} if obj.type == IRI: @@ -65,33 +65,33 @@ class TermTranslator(Translator): elif obj.type == TRIPLE: if obj.triple: - result["tr"] = _triple_translator_from_pulsar(obj.triple) + result["tr"] = _triple_translator_encode(obj.triple) return result # Module-level helper functions to avoid circular instantiation -def _triple_translator_to_pulsar(data: Dict[str, Any]) -> Triple: +def _triple_translator_decode(data: Dict[str, Any]) -> Triple: term_translator = TermTranslator() return Triple( - s=term_translator.to_pulsar(data["s"]) if data.get("s") else None, - p=term_translator.to_pulsar(data["p"]) if data.get("p") else None, - o=term_translator.to_pulsar(data["o"]) if data.get("o") else None, + s=term_translator.decode(data["s"]) if data.get("s") else None, + p=term_translator.decode(data["p"]) if data.get("p") else None, + o=term_translator.decode(data["o"]) if data.get("o") else None, g=data.get("g"), ) -def _triple_translator_from_pulsar(obj: Triple) -> Dict[str, Any]: +def _triple_translator_encode(obj: Triple) -> Dict[str, Any]: """Convert Triple object to wire format dict.""" term_translator = TermTranslator() result: Dict[str, Any] = {} if obj.s: - result["s"] = term_translator.from_pulsar(obj.s) + result["s"] = term_translator.encode(obj.s) if obj.p: - result["p"] = term_translator.from_pulsar(obj.p) + result["p"] = term_translator.encode(obj.p) if obj.o: - result["o"] = term_translator.from_pulsar(obj.o) + result["o"] = term_translator.encode(obj.o) if obj.g: result["g"] = obj.g @@ -104,23 +104,23 @@ class TripleTranslator(Translator): def __init__(self): self.term_translator = TermTranslator() - def to_pulsar(self, data: Dict[str, Any]) -> Triple: + def decode(self, data: Dict[str, Any]) -> Triple: return Triple( - s=self.term_translator.to_pulsar(data["s"]) if data.get("s") else None, - p=self.term_translator.to_pulsar(data["p"]) if data.get("p") else None, - o=self.term_translator.to_pulsar(data["o"]) if data.get("o") else None, + s=self.term_translator.decode(data["s"]) if data.get("s") else None, + p=self.term_translator.decode(data["p"]) if data.get("p") else None, + o=self.term_translator.decode(data["o"]) if data.get("o") else None, g=data.get("g"), ) - def from_pulsar(self, obj: Triple) -> Dict[str, Any]: + def encode(self, obj: Triple) -> Dict[str, Any]: result: Dict[str, Any] = {} if obj.s: - result["s"] = self.term_translator.from_pulsar(obj.s) + result["s"] = self.term_translator.encode(obj.s) if obj.p: - result["p"] = self.term_translator.from_pulsar(obj.p) + result["p"] = self.term_translator.encode(obj.p) if obj.o: - result["o"] = self.term_translator.from_pulsar(obj.o) + result["o"] = self.term_translator.encode(obj.o) if obj.g: result["g"] = obj.g @@ -137,17 +137,17 @@ class SubgraphTranslator(Translator): def __init__(self): self.triple_translator = TripleTranslator() - def to_pulsar(self, data: List[Dict[str, Any]]) -> List[Triple]: - return [self.triple_translator.to_pulsar(t) for t in data] + def decode(self, data: List[Dict[str, Any]]) -> List[Triple]: + return [self.triple_translator.decode(t) for t in data] - def from_pulsar(self, obj: List[Triple]) -> List[Dict[str, Any]]: - return [self.triple_translator.from_pulsar(t) for t in obj] + def encode(self, obj: List[Triple]) -> List[Dict[str, Any]]: + return [self.triple_translator.encode(t) for t in obj] class RowSchemaTranslator(Translator): """Translator for RowSchema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> RowSchema: + def decode(self, data: Dict[str, Any]) -> RowSchema: """Convert dict to RowSchema Pulsar object""" fields = [] for field_data in data.get("fields", []): @@ -169,7 +169,7 @@ class RowSchemaTranslator(Translator): fields=fields ) - def from_pulsar(self, obj: RowSchema) -> Dict[str, Any]: + def encode(self, obj: RowSchema) -> Dict[str, Any]: """Convert RowSchema Pulsar object to JSON-serializable dictionary""" result = { "name": obj.name, @@ -200,7 +200,7 @@ class RowSchemaTranslator(Translator): class FieldTranslator(Translator): """Translator for Field objects""" - def to_pulsar(self, data: Dict[str, Any]) -> Field: + def decode(self, data: Dict[str, Any]) -> Field: """Convert dict to Field Pulsar object""" return Field( name=data.get("name", ""), @@ -213,7 +213,7 @@ class FieldTranslator(Translator): enum_values=data.get("enum_values", []) ) - def from_pulsar(self, obj: Field) -> Dict[str, Any]: + def encode(self, obj: Field) -> Dict[str, Any]: """Convert Field Pulsar object to JSON-serializable dictionary""" result = { "name": obj.name, diff --git a/trustgraph-base/trustgraph/messaging/translators/prompt.py b/trustgraph-base/trustgraph/messaging/translators/prompt.py index 5ff99fdc..4345e6fd 100644 --- a/trustgraph-base/trustgraph/messaging/translators/prompt.py +++ b/trustgraph-base/trustgraph/messaging/translators/prompt.py @@ -7,7 +7,7 @@ from .base import MessageTranslator class PromptRequestTranslator(MessageTranslator): """Translator for PromptRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> PromptRequest: + def decode(self, data: Dict[str, Any]) -> PromptRequest: # Handle both "terms" and "variables" input keys terms = data.get("terms", {}) if "variables" in data: @@ -23,7 +23,7 @@ class PromptRequestTranslator(MessageTranslator): streaming=data.get("streaming", False) ) - def from_pulsar(self, obj: PromptRequest) -> Dict[str, Any]: + def encode(self, obj: PromptRequest) -> Dict[str, Any]: result = {} if obj.id: @@ -37,10 +37,10 @@ class PromptRequestTranslator(MessageTranslator): class PromptResponseTranslator(MessageTranslator): """Translator for PromptResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> PromptResponse: + def decode(self, data: Dict[str, Any]) -> PromptResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: PromptResponse) -> Dict[str, Any]: + def encode(self, obj: PromptResponse) -> Dict[str, Any]: result = {} # Include text field if present (even if empty string) @@ -55,8 +55,8 @@ class PromptResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: PromptResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: PromptResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" # Check end_of_stream field to determine if this is the final message is_final = getattr(obj, 'end_of_stream', True) - return self.from_pulsar(obj), is_final \ No newline at end of file + return self.encode(obj), is_final \ No newline at end of file diff --git a/trustgraph-base/trustgraph/messaging/translators/retrieval.py b/trustgraph-base/trustgraph/messaging/translators/retrieval.py index 98473db2..7e2abfa1 100644 --- a/trustgraph-base/trustgraph/messaging/translators/retrieval.py +++ b/trustgraph-base/trustgraph/messaging/translators/retrieval.py @@ -6,7 +6,7 @@ from .base import MessageTranslator class DocumentRagRequestTranslator(MessageTranslator): """Translator for DocumentRagQuery schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> DocumentRagQuery: + def decode(self, data: Dict[str, Any]) -> DocumentRagQuery: return DocumentRagQuery( query=data["query"], user=data.get("user", "trustgraph"), @@ -15,7 +15,7 @@ class DocumentRagRequestTranslator(MessageTranslator): streaming=data.get("streaming", False) ) - def from_pulsar(self, obj: DocumentRagQuery) -> Dict[str, Any]: + def encode(self, obj: DocumentRagQuery) -> Dict[str, Any]: return { "query": obj.query, "user": obj.user, @@ -28,10 +28,10 @@ class DocumentRagRequestTranslator(MessageTranslator): class DocumentRagResponseTranslator(MessageTranslator): """Translator for DocumentRagResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> DocumentRagResponse: + def decode(self, data: Dict[str, Any]) -> DocumentRagResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: DocumentRagResponse) -> Dict[str, Any]: + def encode(self, obj: DocumentRagResponse) -> Dict[str, Any]: result = {} # Include message_type for distinguishing chunk vs explain messages @@ -65,17 +65,17 @@ class DocumentRagResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: DocumentRagResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: DocumentRagResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" # Session is complete when end_of_session is True is_final = getattr(obj, 'end_of_session', False) - return self.from_pulsar(obj), is_final + return self.encode(obj), is_final class GraphRagRequestTranslator(MessageTranslator): """Translator for GraphRagQuery schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> GraphRagQuery: + def decode(self, data: Dict[str, Any]) -> GraphRagQuery: return GraphRagQuery( query=data["query"], user=data.get("user", "trustgraph"), @@ -89,7 +89,7 @@ class GraphRagRequestTranslator(MessageTranslator): streaming=data.get("streaming", False) ) - def from_pulsar(self, obj: GraphRagQuery) -> Dict[str, Any]: + def encode(self, obj: GraphRagQuery) -> Dict[str, Any]: return { "query": obj.query, "user": obj.user, @@ -107,10 +107,10 @@ class GraphRagRequestTranslator(MessageTranslator): class GraphRagResponseTranslator(MessageTranslator): """Translator for GraphRagResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> GraphRagResponse: + def decode(self, data: Dict[str, Any]) -> GraphRagResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: GraphRagResponse) -> Dict[str, Any]: + def encode(self, obj: GraphRagResponse) -> Dict[str, Any]: result = {} # Include message_type @@ -144,8 +144,8 @@ class GraphRagResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: GraphRagResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: GraphRagResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" # Session is complete when end_of_session is True is_final = getattr(obj, 'end_of_session', False) - return self.from_pulsar(obj), is_final \ No newline at end of file + return self.encode(obj), is_final \ No newline at end of file diff --git a/trustgraph-base/trustgraph/messaging/translators/rows_query.py b/trustgraph-base/trustgraph/messaging/translators/rows_query.py index 6feb75a3..6153901c 100644 --- a/trustgraph-base/trustgraph/messaging/translators/rows_query.py +++ b/trustgraph-base/trustgraph/messaging/translators/rows_query.py @@ -7,7 +7,7 @@ import json class RowsQueryRequestTranslator(MessageTranslator): """Translator for RowsQueryRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> RowsQueryRequest: + def decode(self, data: Dict[str, Any]) -> RowsQueryRequest: return RowsQueryRequest( user=data.get("user", "trustgraph"), collection=data.get("collection", "default"), @@ -16,7 +16,7 @@ class RowsQueryRequestTranslator(MessageTranslator): operation_name=data.get("operation_name", None) ) - def from_pulsar(self, obj: RowsQueryRequest) -> Dict[str, Any]: + def encode(self, obj: RowsQueryRequest) -> Dict[str, Any]: result = { "user": obj.user, "collection": obj.collection, @@ -33,10 +33,10 @@ class RowsQueryRequestTranslator(MessageTranslator): class RowsQueryResponseTranslator(MessageTranslator): """Translator for RowsQueryResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> RowsQueryResponse: + def decode(self, data: Dict[str, Any]) -> RowsQueryResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: RowsQueryResponse) -> Dict[str, Any]: + def encode(self, obj: RowsQueryResponse) -> Dict[str, Any]: result = {} # Handle GraphQL response data @@ -74,6 +74,6 @@ class RowsQueryResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: RowsQueryResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: RowsQueryResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - return self.from_pulsar(obj), True + return self.encode(obj), True diff --git a/trustgraph-base/trustgraph/messaging/translators/structured_query.py b/trustgraph-base/trustgraph/messaging/translators/structured_query.py index cc3ae80c..6b0b38a1 100644 --- a/trustgraph-base/trustgraph/messaging/translators/structured_query.py +++ b/trustgraph-base/trustgraph/messaging/translators/structured_query.py @@ -7,14 +7,14 @@ import json class StructuredQueryRequestTranslator(MessageTranslator): """Translator for StructuredQueryRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> StructuredQueryRequest: + def decode(self, data: Dict[str, Any]) -> StructuredQueryRequest: return StructuredQueryRequest( question=data.get("question", ""), user=data.get("user", "trustgraph"), # Default fallback collection=data.get("collection", "default") # Default fallback ) - def from_pulsar(self, obj: StructuredQueryRequest) -> Dict[str, Any]: + def encode(self, obj: StructuredQueryRequest) -> Dict[str, Any]: return { "question": obj.question, "user": obj.user, @@ -25,10 +25,10 @@ class StructuredQueryRequestTranslator(MessageTranslator): class StructuredQueryResponseTranslator(MessageTranslator): """Translator for StructuredQueryResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> StructuredQueryResponse: + def decode(self, data: Dict[str, Any]) -> StructuredQueryResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: StructuredQueryResponse) -> Dict[str, Any]: + def encode(self, obj: StructuredQueryResponse) -> Dict[str, Any]: result = {} # Handle structured query response data @@ -55,6 +55,6 @@ class StructuredQueryResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: StructuredQueryResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: StructuredQueryResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - return self.from_pulsar(obj), True \ No newline at end of file + return self.encode(obj), True \ No newline at end of file diff --git a/trustgraph-base/trustgraph/messaging/translators/text_completion.py b/trustgraph-base/trustgraph/messaging/translators/text_completion.py index fa3749b5..596ff744 100644 --- a/trustgraph-base/trustgraph/messaging/translators/text_completion.py +++ b/trustgraph-base/trustgraph/messaging/translators/text_completion.py @@ -6,14 +6,14 @@ from .base import MessageTranslator class TextCompletionRequestTranslator(MessageTranslator): """Translator for TextCompletionRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> TextCompletionRequest: + def decode(self, data: Dict[str, Any]) -> TextCompletionRequest: return TextCompletionRequest( system=data["system"], prompt=data["prompt"], streaming=data.get("streaming", False) ) - def from_pulsar(self, obj: TextCompletionRequest) -> Dict[str, Any]: + def encode(self, obj: TextCompletionRequest) -> Dict[str, Any]: return { "system": obj.system, "prompt": obj.prompt @@ -23,10 +23,10 @@ class TextCompletionRequestTranslator(MessageTranslator): class TextCompletionResponseTranslator(MessageTranslator): """Translator for TextCompletionResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> TextCompletionResponse: + def decode(self, data: Dict[str, Any]) -> TextCompletionResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: TextCompletionResponse) -> Dict[str, Any]: + def encode(self, obj: TextCompletionResponse) -> Dict[str, Any]: result = {"response": obj.response} if obj.in_token: @@ -41,8 +41,8 @@ class TextCompletionResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: TextCompletionResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: TextCompletionResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" # Check end_of_stream field to determine if this is the final message is_final = getattr(obj, 'end_of_stream', True) - return self.from_pulsar(obj), is_final \ No newline at end of file + return self.encode(obj), is_final \ No newline at end of file diff --git a/trustgraph-base/trustgraph/messaging/translators/tool.py b/trustgraph-base/trustgraph/messaging/translators/tool.py index 9f4d05cc..0651ca03 100644 --- a/trustgraph-base/trustgraph/messaging/translators/tool.py +++ b/trustgraph-base/trustgraph/messaging/translators/tool.py @@ -6,7 +6,7 @@ from .base import MessageTranslator class ToolRequestTranslator(MessageTranslator): """Translator for ToolRequest schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> ToolRequest: + def decode(self, data: Dict[str, Any]) -> ToolRequest: # Handle both "name" and "parameters" input keys name = data.get("name", "") if "parameters" in data: @@ -19,7 +19,7 @@ class ToolRequestTranslator(MessageTranslator): parameters = parameters, ) - def from_pulsar(self, obj: ToolRequest) -> Dict[str, Any]: + def encode(self, obj: ToolRequest) -> Dict[str, Any]: result = {} if obj.name: @@ -32,10 +32,10 @@ class ToolRequestTranslator(MessageTranslator): class ToolResponseTranslator(MessageTranslator): """Translator for ToolResponse schema objects""" - def to_pulsar(self, data: Dict[str, Any]) -> ToolResponse: + def decode(self, data: Dict[str, Any]) -> ToolResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: ToolResponse) -> Dict[str, Any]: + def encode(self, obj: ToolResponse) -> Dict[str, Any]: result = {} @@ -46,6 +46,6 @@ class ToolResponseTranslator(MessageTranslator): return result - def from_response_with_completion(self, obj: ToolResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: ToolResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - return self.from_pulsar(obj), True + return self.encode(obj), True diff --git a/trustgraph-base/trustgraph/messaging/translators/triples.py b/trustgraph-base/trustgraph/messaging/translators/triples.py index 2f29aa56..21d2698f 100644 --- a/trustgraph-base/trustgraph/messaging/translators/triples.py +++ b/trustgraph-base/trustgraph/messaging/translators/triples.py @@ -10,10 +10,10 @@ class TriplesQueryRequestTranslator(MessageTranslator): def __init__(self): self.value_translator = ValueTranslator() - def to_pulsar(self, data: Dict[str, Any]) -> TriplesQueryRequest: - s = self.value_translator.to_pulsar(data["s"]) if "s" in data else None - p = self.value_translator.to_pulsar(data["p"]) if "p" in data else None - o = self.value_translator.to_pulsar(data["o"]) if "o" in data else None + def decode(self, data: Dict[str, Any]) -> TriplesQueryRequest: + s = self.value_translator.decode(data["s"]) if "s" in data else None + p = self.value_translator.decode(data["p"]) if "p" in data else None + o = self.value_translator.decode(data["o"]) if "o" in data else None g = data.get("g") # None=default graph, "*"=all graphs return TriplesQueryRequest( @@ -28,7 +28,7 @@ class TriplesQueryRequestTranslator(MessageTranslator): batch_size=int(data.get("batch-size", 20)), ) - def from_pulsar(self, obj: TriplesQueryRequest) -> Dict[str, Any]: + def encode(self, obj: TriplesQueryRequest) -> Dict[str, Any]: result = { "limit": obj.limit, "user": obj.user, @@ -38,11 +38,11 @@ class TriplesQueryRequestTranslator(MessageTranslator): } if obj.s: - result["s"] = self.value_translator.from_pulsar(obj.s) + result["s"] = self.value_translator.encode(obj.s) if obj.p: - result["p"] = self.value_translator.from_pulsar(obj.p) + result["p"] = self.value_translator.encode(obj.p) if obj.o: - result["o"] = self.value_translator.from_pulsar(obj.o) + result["o"] = self.value_translator.encode(obj.o) if obj.g is not None: result["g"] = obj.g @@ -55,14 +55,14 @@ class TriplesQueryResponseTranslator(MessageTranslator): def __init__(self): self.subgraph_translator = SubgraphTranslator() - def to_pulsar(self, data: Dict[str, Any]) -> TriplesQueryResponse: + def decode(self, data: Dict[str, Any]) -> TriplesQueryResponse: raise NotImplementedError("Response translation to Pulsar not typically needed") - def from_pulsar(self, obj: TriplesQueryResponse) -> Dict[str, Any]: + def encode(self, obj: TriplesQueryResponse) -> Dict[str, Any]: return { - "response": self.subgraph_translator.from_pulsar(obj.triples) + "response": self.subgraph_translator.encode(obj.triples) } - def from_response_with_completion(self, obj: TriplesQueryResponse) -> Tuple[Dict[str, Any], bool]: + def encode_with_completion(self, obj: TriplesQueryResponse) -> Tuple[Dict[str, Any], bool]: """Returns (response_dict, is_final)""" - return self.from_pulsar(obj), obj.is_final \ No newline at end of file + return self.encode(obj), obj.is_final \ No newline at end of file diff --git a/trustgraph-base/trustgraph/schema/core/topic.py b/trustgraph-base/trustgraph/schema/core/topic.py index 09c633e4..036ea142 100644 --- a/trustgraph-base/trustgraph/schema/core/topic.py +++ b/trustgraph-base/trustgraph/schema/core/topic.py @@ -1,23 +1,26 @@ -def topic(queue_name, qos='q1', tenant='tg', namespace='flow'): +def queue(topic, cls='flow', topicspace='tg'): """ - Create a generic topic identifier that can be mapped by backends. + Create a queue identifier in CLASS:TOPICSPACE:TOPIC format. Args: - queue_name: The queue/topic name - qos: Quality of service - - 'q0' = best-effort (no ack) - - 'q1' = at-least-once (ack required) - - 'q2' = exactly-once (two-phase ack) - tenant: Tenant identifier for multi-tenancy - namespace: Namespace within tenant + topic: The logical queue name (e.g. 'config', 'librarian') + cls: Queue class determining operational characteristics: + - 'flow' = persistent processing pipeline queue + - 'request' = non-persistent, short TTL request queue + - 'response' = non-persistent, short TTL response queue + - 'state' = persistent, last-value state broadcast + topicspace: Deployment isolation prefix (default: 'tg') Returns: - Generic topic string: qos/tenant/namespace/queue_name + Queue identifier string: cls:topicspace:topic Examples: - topic('my-queue') # q1/tg/flow/my-queue - topic('config', qos='q2', namespace='config') # q2/tg/config/config + queue('text-completion-request') + # flow:tg:text-completion-request + queue('config', cls='request') + # request:tg:config + queue('config', cls='state') + # state:tg:config """ - return f"{qos}/{tenant}/{namespace}/{queue_name}" - + return f"{cls}:{topicspace}:{topic}" diff --git a/trustgraph-base/trustgraph/schema/knowledge/document.py b/trustgraph-base/trustgraph/schema/knowledge/document.py index c75a1227..fc7273ef 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/document.py +++ b/trustgraph-base/trustgraph/schema/knowledge/document.py @@ -1,7 +1,6 @@ from dataclasses import dataclass from ..core.metadata import Metadata -from ..core.topic import topic ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/knowledge/embeddings.py b/trustgraph-base/trustgraph/schema/knowledge/embeddings.py index a8bae35c..2cfd08cf 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/embeddings.py +++ b/trustgraph-base/trustgraph/schema/knowledge/embeddings.py @@ -2,7 +2,6 @@ from dataclasses import dataclass, field from ..core.metadata import Metadata from ..core.primitives import Term, RowSchema -from ..core.topic import topic ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/knowledge/graph.py b/trustgraph-base/trustgraph/schema/knowledge/graph.py index b4a05084..a15676ab 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/graph.py +++ b/trustgraph-base/trustgraph/schema/knowledge/graph.py @@ -2,7 +2,6 @@ from dataclasses import dataclass, field from ..core.primitives import Term, Triple from ..core.metadata import Metadata -from ..core.topic import topic ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/knowledge/knowledge.py b/trustgraph-base/trustgraph/schema/knowledge/knowledge.py index cffcbac7..0c4a9f7c 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/knowledge.py +++ b/trustgraph-base/trustgraph/schema/knowledge/knowledge.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from ..core.primitives import Triple, Error -from ..core.topic import topic +from ..core.topic import queue from ..core.metadata import Metadata from .document import Document, TextDocument from .graph import Triples @@ -52,9 +52,5 @@ class KnowledgeResponse: triples: Triples | None = None graph_embeddings: GraphEmbeddings | None = None -knowledge_request_queue = topic( - 'knowledge', qos='q0', namespace='request' -) -knowledge_response_queue = topic( - 'knowledge', qos='q0', namespace='response', -) +knowledge_request_queue = queue('knowledge', cls='request') +knowledge_response_queue = queue('knowledge', cls='response') diff --git a/trustgraph-base/trustgraph/schema/knowledge/nlp.py b/trustgraph-base/trustgraph/schema/knowledge/nlp.py index 10b5f215..84e2f080 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/nlp.py +++ b/trustgraph-base/trustgraph/schema/knowledge/nlp.py @@ -1,6 +1,5 @@ from dataclasses import dataclass -from ..core.topic import topic ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/knowledge/object.py b/trustgraph-base/trustgraph/schema/knowledge/object.py index 39b0095f..4b51bbe1 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/object.py +++ b/trustgraph-base/trustgraph/schema/knowledge/object.py @@ -1,7 +1,6 @@ from dataclasses import dataclass, field from ..core.metadata import Metadata -from ..core.topic import topic ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/knowledge/rows.py b/trustgraph-base/trustgraph/schema/knowledge/rows.py index ca2131df..015affe1 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/rows.py +++ b/trustgraph-base/trustgraph/schema/knowledge/rows.py @@ -2,7 +2,6 @@ from dataclasses import dataclass, field from ..core.metadata import Metadata from ..core.primitives import RowSchema -from ..core.topic import topic ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/knowledge/structured.py b/trustgraph-base/trustgraph/schema/knowledge/structured.py index c227d767..52bfec27 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/structured.py +++ b/trustgraph-base/trustgraph/schema/knowledge/structured.py @@ -1,7 +1,6 @@ from dataclasses import dataclass, field from ..core.metadata import Metadata -from ..core.topic import topic ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/agent.py b/trustgraph-base/trustgraph/schema/services/agent.py index fdb9e391..2a966dd4 100644 --- a/trustgraph-base/trustgraph/schema/services/agent.py +++ b/trustgraph-base/trustgraph/schema/services/agent.py @@ -2,7 +2,6 @@ from dataclasses import dataclass, field from typing import Optional -from ..core.topic import topic from ..core.primitives import Error ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/collection.py b/trustgraph-base/trustgraph/schema/services/collection.py index 74381abb..f4b5fc6e 100644 --- a/trustgraph-base/trustgraph/schema/services/collection.py +++ b/trustgraph-base/trustgraph/schema/services/collection.py @@ -2,7 +2,7 @@ from dataclasses import dataclass, field from datetime import datetime from ..core.primitives import Error -from ..core.topic import topic +from ..core.topic import queue ############################################################################ @@ -50,10 +50,6 @@ class CollectionManagementResponse: # Topics -collection_request_queue = topic( - 'collection', qos='q0', namespace='request' -) -collection_response_queue = topic( - 'collection', qos='q0', namespace='response' -) +collection_request_queue = queue('collection', cls='request') +collection_response_queue = queue('collection', cls='response') diff --git a/trustgraph-base/trustgraph/schema/services/config.py b/trustgraph-base/trustgraph/schema/services/config.py index 38bd1cbf..36e55674 100644 --- a/trustgraph-base/trustgraph/schema/services/config.py +++ b/trustgraph-base/trustgraph/schema/services/config.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, field -from ..core.topic import topic +from ..core.topic import queue from ..core.primitives import Error ############################################################################ @@ -60,15 +60,9 @@ class ConfigPush: version: int = 0 config: dict[str, dict[str, str]] = field(default_factory=dict) -config_request_queue = topic( - 'config', qos='q0', namespace='request' -) -config_response_queue = topic( - 'config', qos='q0', namespace='response' -) -config_push_queue = topic( - 'config', qos='q2', namespace='config' -) +config_request_queue = queue('config', cls='request') +config_response_queue = queue('config', cls='response') +config_push_queue = queue('config', cls='state') ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/flow.py b/trustgraph-base/trustgraph/schema/services/flow.py index cf62c84d..0d497dd7 100644 --- a/trustgraph-base/trustgraph/schema/services/flow.py +++ b/trustgraph-base/trustgraph/schema/services/flow.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, field -from ..core.topic import topic +from ..core.topic import queue from ..core.primitives import Error ############################################################################ @@ -61,12 +61,8 @@ class FlowResponse: # Everything error: Error | None = None -flow_request_queue = topic( - 'flow', qos='q0', namespace='request' -) -flow_response_queue = topic( - 'flow', qos='q0', namespace='response' -) +flow_request_queue = queue('flow', cls='request') +flow_response_queue = queue('flow', cls='response') ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/library.py b/trustgraph-base/trustgraph/schema/services/library.py index f1ab360f..51d0d5a5 100644 --- a/trustgraph-base/trustgraph/schema/services/library.py +++ b/trustgraph-base/trustgraph/schema/services/library.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from ..core.primitives import Triple, Error -from ..core.topic import topic +from ..core.topic import queue from ..core.metadata import Metadata # Note: Document imports will be updated after knowledge schemas are converted @@ -220,9 +220,5 @@ class LibrarianResponse: # FIXME: Is this right? Using persistence on librarian so that # message chunking works -librarian_request_queue = topic( - 'librarian', qos='q1', namespace='request' -) -librarian_response_queue = topic( - 'librarian', qos='q1', namespace='response', -) +librarian_request_queue = queue('librarian-request', cls='flow') +librarian_response_queue = queue('librarian-response', cls='flow') diff --git a/trustgraph-base/trustgraph/schema/services/llm.py b/trustgraph-base/trustgraph/schema/services/llm.py index 681638c3..0fd6ab90 100644 --- a/trustgraph-base/trustgraph/schema/services/llm.py +++ b/trustgraph-base/trustgraph/schema/services/llm.py @@ -1,7 +1,6 @@ from dataclasses import dataclass, field -from ..core.topic import topic from ..core.primitives import Error ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/lookup.py b/trustgraph-base/trustgraph/schema/services/lookup.py index d944fb89..3c661e4c 100644 --- a/trustgraph-base/trustgraph/schema/services/lookup.py +++ b/trustgraph-base/trustgraph/schema/services/lookup.py @@ -1,7 +1,6 @@ from dataclasses import dataclass from ..core.primitives import Error, Term, Triple -from ..core.topic import topic from ..core.metadata import Metadata ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/nlp_query.py b/trustgraph-base/trustgraph/schema/services/nlp_query.py index 6cd65f0e..73780567 100644 --- a/trustgraph-base/trustgraph/schema/services/nlp_query.py +++ b/trustgraph-base/trustgraph/schema/services/nlp_query.py @@ -1,7 +1,6 @@ from dataclasses import dataclass, field from ..core.primitives import Error -from ..core.topic import topic ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/prompt.py b/trustgraph-base/trustgraph/schema/services/prompt.py index f7a31c14..f7388102 100644 --- a/trustgraph-base/trustgraph/schema/services/prompt.py +++ b/trustgraph-base/trustgraph/schema/services/prompt.py @@ -1,7 +1,6 @@ from dataclasses import dataclass, field from ..core.primitives import Error -from ..core.topic import topic ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/query.py b/trustgraph-base/trustgraph/schema/services/query.py index 7a65f775..f9f08658 100644 --- a/trustgraph-base/trustgraph/schema/services/query.py +++ b/trustgraph-base/trustgraph/schema/services/query.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, field from ..core.primitives import Error, Term, Triple -from ..core.topic import topic +from ..core.topic import queue ############################################################################ @@ -69,12 +69,8 @@ class DocumentEmbeddingsResponse: error: Error | None = None chunks: list[ChunkMatch] = field(default_factory=list) -document_embeddings_request_queue = topic( - "document-embeddings-request", qos='q0', tenant='trustgraph', namespace='flow' -) -document_embeddings_response_queue = topic( - "document-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow' -) +document_embeddings_request_queue = queue('document-embeddings', cls='request') +document_embeddings_response_queue = queue('document-embeddings', cls='response') ############################################################################ @@ -104,9 +100,5 @@ class RowEmbeddingsResponse: error: Error | None = None matches: list[RowIndexMatch] = field(default_factory=list) -row_embeddings_request_queue = topic( - "row-embeddings-request", qos='q0', tenant='trustgraph', namespace='flow' -) -row_embeddings_response_queue = topic( - "row-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow' -) \ No newline at end of file +row_embeddings_request_queue = queue('row-embeddings', cls='request') +row_embeddings_response_queue = queue('row-embeddings', cls='response') \ No newline at end of file diff --git a/trustgraph-base/trustgraph/schema/services/retrieval.py b/trustgraph-base/trustgraph/schema/services/retrieval.py index f5ac73d3..a4621549 100644 --- a/trustgraph-base/trustgraph/schema/services/retrieval.py +++ b/trustgraph-base/trustgraph/schema/services/retrieval.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from ..core.topic import topic from ..core.primitives import Error, Term ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/rows_query.py b/trustgraph-base/trustgraph/schema/services/rows_query.py index a4818329..e3c4f14c 100644 --- a/trustgraph-base/trustgraph/schema/services/rows_query.py +++ b/trustgraph-base/trustgraph/schema/services/rows_query.py @@ -2,7 +2,6 @@ from dataclasses import dataclass, field from typing import Optional from ..core.primitives import Error -from ..core.topic import topic ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/structured_query.py b/trustgraph-base/trustgraph/schema/services/structured_query.py index ae1eaa5f..5f54ac16 100644 --- a/trustgraph-base/trustgraph/schema/services/structured_query.py +++ b/trustgraph-base/trustgraph/schema/services/structured_query.py @@ -1,7 +1,6 @@ from dataclasses import dataclass, field from ..core.primitives import Error -from ..core.topic import topic ############################################################################ diff --git a/trustgraph-cli/trustgraph/cli/dump_queues.py b/trustgraph-cli/trustgraph/cli/dump_queues.py index 4df61cc3..eb7898c2 100644 --- a/trustgraph-cli/trustgraph/cli/dump_queues.py +++ b/trustgraph-cli/trustgraph/cli/dump_queues.py @@ -8,8 +8,6 @@ message flows, diagnosing stuck services, and understanding system behavior. Uses TrustGraph's Subscriber abstraction for future-proof pub/sub compatibility. """ -import pulsar -from pulsar.schema import BytesSchema import sys import json import asyncio @@ -17,7 +15,7 @@ from datetime import datetime import argparse from trustgraph.base.subscriber import Subscriber -from trustgraph.base.pubsub import get_pubsub +from trustgraph.base.pubsub import get_pubsub, add_pubsub_args def decode_json_strings(obj): """Recursively decode JSON-encoded string values within a dict/list.""" @@ -172,15 +170,13 @@ async def log_writer(central_queue, file_handle, shutdown_event, console_output= break -async def async_main(queues, output_file, pulsar_host, listener_name, subscriber_name, append_mode): +async def async_main(queues, output_file, subscriber_name, append_mode, **pubsub_config): """ Main async function to monitor multiple queues concurrently. Args: queues: List of queue names to monitor output_file: Path to output file - pulsar_host: Pulsar connection URL - listener_name: Pulsar listener name subscriber_name: Base name for subscribers append_mode: Whether to append to existing file """ @@ -194,9 +190,9 @@ async def async_main(queues, output_file, pulsar_host, listener_name, subscriber # Create backend connection try: - backend = get_pubsub(pulsar_host=pulsar_host, pulsar_listener=listener_name, pubsub_backend='pulsar') + backend = get_pubsub(**pubsub_config) except Exception as e: - print(f"Error connecting to backend at {pulsar_host}: {e}", file=sys.stderr) + print(f"Error connecting to backend: {e}", file=sys.stderr) sys.exit(1) # Create Subscribers and central queue @@ -291,25 +287,20 @@ def main(): description='Monitor and dump messages from multiple Pulsar queues', epilog=""" Examples: - # Monitor agent and prompt queues - tg-dump-queues non-persistent://tg/request/agent:default \\ - non-persistent://tg/request/prompt:default + # Monitor agent and prompt flow queues + tg-dump-queues flow:tg:agent-request:default \\ + flow:tg:prompt-request:default # Monitor with custom output file - tg-dump-queues non-persistent://tg/request/agent:default \\ + tg-dump-queues flow:tg:agent-request:default \\ --output debug.log # Append to existing log file - tg-dump-queues non-persistent://tg/request/agent:default \\ + tg-dump-queues flow:tg:agent-request:default \\ --output queue.log --append -Common queue patterns: - - Agent requests: non-persistent://tg/request/agent:default - - Agent responses: non-persistent://tg/response/agent:default - - Prompt requests: non-persistent://tg/request/prompt:default - - Prompt responses: non-persistent://tg/response/prompt:default - - LLM requests: non-persistent://tg/request/text-completion:default - - LLM responses: non-persistent://tg/response/text-completion:default + # Raw Pulsar URIs also accepted + tg-dump-queues persistent://tg/flow/agent-request:default IMPORTANT: This tool subscribes to queues without a schema (schema-less mode). To avoid @@ -340,17 +331,7 @@ IMPORTANT: help='Append to output file instead of overwriting' ) - parser.add_argument( - '--pulsar-host', - default='pulsar://localhost:6650', - help='Pulsar host URL (default: pulsar://localhost:6650)' - ) - - parser.add_argument( - '--listener-name', - default='localhost', - help='Pulsar listener name (default: localhost)' - ) + add_pubsub_args(parser, standalone=True) parser.add_argument( '--subscriber', @@ -371,10 +352,12 @@ IMPORTANT: asyncio.run(async_main( queues=queues, output_file=args.output, - pulsar_host=args.pulsar_host, - listener_name=args.listener_name, subscriber_name=args.subscriber, - append_mode=args.append + append_mode=args.append, + pubsub_backend=args.pubsub_backend, + pulsar_host=args.pulsar_host, + pulsar_api_key=args.pulsar_api_key, + pulsar_listener=args.pulsar_listener, )) except KeyboardInterrupt: # Already handled in async_main diff --git a/trustgraph-cli/trustgraph/cli/init_trustgraph.py b/trustgraph-cli/trustgraph/cli/init_trustgraph.py index bed56a73..02456b1c 100644 --- a/trustgraph-cli/trustgraph/cli/init_trustgraph.py +++ b/trustgraph-cli/trustgraph/cli/init_trustgraph.py @@ -137,7 +137,7 @@ def init( } }) - ensure_namespace(pulsar_admin_url, tenant, "config", { + ensure_namespace(pulsar_admin_url, tenant, "state", { "retention_policies": { "retentionSizeInMB": 10, "retentionTimeInMinutes": -1, diff --git a/trustgraph-cli/trustgraph/cli/monitor_prompts.py b/trustgraph-cli/trustgraph/cli/monitor_prompts.py index 974cfbcd..c3b71afb 100644 --- a/trustgraph-cli/trustgraph/cli/monitor_prompts.py +++ b/trustgraph-cli/trustgraph/cli/monitor_prompts.py @@ -1,7 +1,7 @@ """ Monitor prompt request/response queues and log activity with timing. -Subscribes to prompt request and response Pulsar queues, correlates +Subscribes to prompt request and response queues, correlates them by message ID, and logs a summary of each request/response with elapsed time. Streaming responses are accumulated and shown once at completion. @@ -19,8 +19,7 @@ import argparse from datetime import datetime from collections import OrderedDict -import pulsar -from pulsar.schema import BytesSchema +from trustgraph.base.pubsub import get_pubsub, add_pubsub_args default_flow = "default" @@ -85,7 +84,7 @@ def format_terms(terms, max_lines, max_width): def parse_raw_message(msg): - """Parse a raw Pulsar message into (correlation_id, body_dict).""" + """Parse a raw message into (correlation_id, body_dict).""" try: props = msg.properties() corr_id = props.get("id", "") @@ -94,53 +93,46 @@ def parse_raw_message(msg): try: value = msg.value() - if isinstance(value, bytes): - value = value.decode("utf-8") - body = json.loads(value) if isinstance(value, str) else {} + if isinstance(value, dict): + body = value + elif isinstance(value, bytes): + body = json.loads(value.decode("utf-8")) + elif isinstance(value, str): + body = json.loads(value) + else: + body = {} except Exception: body = {} return corr_id, body -def receive_with_timeout(consumer, timeout_ms=500): - """Receive a message with timeout, returning None on timeout.""" - try: - return consumer.receive(timeout_millis=timeout_ms) - except Exception: - return None +async def monitor(flow, queue_type, max_lines, max_width, **config): - -async def monitor(flow, queue_type, max_lines, max_width, - pulsar_host, listener_name): - - request_queue = f"non-persistent://tg/request/{queue_type}:{flow}" - response_queue = f"non-persistent://tg/response/{queue_type}:{flow}" + request_queue = f"request:tg:{queue_type}:{flow}" + response_queue = f"response:tg:{queue_type}:{flow}" print(f"Monitoring prompt queues:") print(f" Request: {request_queue}") print(f" Response: {response_queue}") print(f"Press Ctrl+C to stop\n") - client = pulsar.Client( - pulsar_host, - listener_name=listener_name, + backend = get_pubsub(**config) + + req_consumer = backend.create_consumer( + topic=request_queue, + subscription="prompt-monitor-req", + schema=None, + consumer_type='shared', + initial_position='latest', ) - req_consumer = client.subscribe( - request_queue, - subscription_name="prompt-monitor-req", - consumer_type=pulsar.ConsumerType.Shared, - schema=BytesSchema(), - initial_position=pulsar.InitialPosition.Latest, - ) - - resp_consumer = client.subscribe( - response_queue, - subscription_name="prompt-monitor-resp", - consumer_type=pulsar.ConsumerType.Shared, - schema=BytesSchema(), - initial_position=pulsar.InitialPosition.Latest, + resp_consumer = backend.create_consumer( + topic=response_queue, + subscription="prompt-monitor-resp", + schema=None, + consumer_type='shared', + initial_position='latest', ) # Track in-flight requests: corr_id -> (timestamp, template_id) @@ -156,8 +148,8 @@ async def monitor(flow, queue_type, max_lines, max_width, got_message = False # Poll request queue - msg = receive_with_timeout(req_consumer, 100) - if msg: + try: + msg = req_consumer.receive(timeout_millis=100) got_message = True timestamp = datetime.now() corr_id, body = parse_raw_message(msg) @@ -182,10 +174,12 @@ async def monitor(flow, queue_type, max_lines, max_width, print(format_terms(terms, max_lines, max_width)) req_consumer.acknowledge(msg) + except TimeoutError: + pass # Poll response queue - msg = receive_with_timeout(resp_consumer, 100) - if msg: + try: + msg = resp_consumer.receive(timeout_millis=100) got_message = True timestamp = datetime.now() corr_id, body = parse_raw_message(msg) @@ -265,6 +259,8 @@ async def monitor(flow, queue_type, max_lines, max_width, print(f" {truncated}") resp_consumer.acknowledge(msg) + except TimeoutError: + pass if not got_message: await asyncio.sleep(0.05) @@ -274,7 +270,7 @@ async def monitor(flow, queue_type, max_lines, max_width, finally: req_consumer.close() resp_consumer.close() - client.close() + backend.close() def main(): @@ -310,17 +306,7 @@ def main(): help=f"Max width per line (default: {default_max_width})", ) - parser.add_argument( - "--pulsar-host", - default="pulsar://localhost:6650", - help="Pulsar host URL (default: pulsar://localhost:6650)", - ) - - parser.add_argument( - "--listener-name", - default="localhost", - help="Pulsar listener name (default: localhost)", - ) + add_pubsub_args(parser, standalone=True) args = parser.parse_args() @@ -331,7 +317,9 @@ def main(): max_lines=args.max_lines, max_width=args.max_width, pulsar_host=args.pulsar_host, - listener_name=args.listener_name, + pulsar_api_key=args.pulsar_api_key, + pulsar_listener=args.pulsar_listener, + pubsub_backend=args.pubsub_backend, )) except KeyboardInterrupt: pass diff --git a/trustgraph-flow/trustgraph/extract/kg/rows/processor.py b/trustgraph-flow/trustgraph/extract/kg/rows/processor.py index 88e29116..02aa7d78 100644 --- a/trustgraph-flow/trustgraph/extract/kg/rows/processor.py +++ b/trustgraph-flow/trustgraph/extract/kg/rows/processor.py @@ -145,7 +145,7 @@ class Processor(FlowProcessor): try: # Convert Pulsar RowSchema to JSON-serializable dict - schema_dict = row_schema_translator.from_pulsar(schema) + schema_dict = row_schema_translator.encode(schema) # Use prompt client to extract rows based on schema objects = await flow("prompt-request").extract_objects( diff --git a/trustgraph-flow/trustgraph/gateway/config/receiver.py b/trustgraph-flow/trustgraph/gateway/config/receiver.py index 4bf39ccd..d956c7c6 100755 --- a/trustgraph-flow/trustgraph/gateway/config/receiver.py +++ b/trustgraph-flow/trustgraph/gateway/config/receiver.py @@ -23,7 +23,6 @@ import uuid logger = logging.getLogger(__name__) import json -import pulsar from prometheus_client import start_http_server from ... schema import ConfigPush, config_push_queue diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/agent.py b/trustgraph-flow/trustgraph/gateway/dispatch/agent.py index 8867956d..5b97b297 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/agent.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/agent.py @@ -25,8 +25,8 @@ class AgentRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("agent") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/collection_management.py b/trustgraph-flow/trustgraph/gateway/dispatch/collection_management.py index 2fa3759d..544a412d 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/collection_management.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/collection_management.py @@ -29,8 +29,8 @@ class CollectionManagementRequestor(ServiceRequestor): def to_request(self, body): print("REQUEST", body, flush=True) - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): print("RESPONSE", message, flush=True) - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/config.py b/trustgraph-flow/trustgraph/gateway/dispatch/config.py index 9d40e8cc..3c591a81 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/config.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/config.py @@ -30,8 +30,8 @@ class ConfigRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("config") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_import.py b/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_import.py index bd5f9666..199b5a42 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_import.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_import.py @@ -44,7 +44,7 @@ class DocumentEmbeddingsImport: async def receive(self, msg): data = msg.json() - elt = self.translator.to_pulsar(data) + elt = self.translator.decode(data) await self.publisher.send(None, elt) async def run(self): diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_query.py b/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_query.py index 650d4f40..80a0935f 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_query.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_query.py @@ -25,7 +25,7 @@ class DocumentEmbeddingsQueryRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("document-embeddings-query") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/document_load.py b/trustgraph-flow/trustgraph/gateway/dispatch/document_load.py index eb68b0b1..67800f21 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/document_load.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/document_load.py @@ -23,5 +23,5 @@ class DocumentLoad(ServiceSender): def to_request(self, body): logger.info("Document received") - return self.translator.to_pulsar(body) + return self.translator.decode(body) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/document_rag.py b/trustgraph-flow/trustgraph/gateway/dispatch/document_rag.py index 83b3cb9a..55e20bfc 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/document_rag.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/document_rag.py @@ -25,8 +25,8 @@ class DocumentRagRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("document-rag") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/embeddings.py b/trustgraph-flow/trustgraph/gateway/dispatch/embeddings.py index 6c1b55ba..99994f2a 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/embeddings.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/embeddings.py @@ -25,8 +25,8 @@ class EmbeddingsRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("embeddings") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/flow.py b/trustgraph-flow/trustgraph/gateway/dispatch/flow.py index be91995d..6f901c1f 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/flow.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/flow.py @@ -30,8 +30,8 @@ class FlowRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("flow") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_query.py b/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_query.py index a7bb1bd8..3081ad59 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_query.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_query.py @@ -25,8 +25,8 @@ class GraphEmbeddingsQueryRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("graph-embeddings-query") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/graph_rag.py b/trustgraph-flow/trustgraph/gateway/dispatch/graph_rag.py index a0299a43..9b8feea4 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/graph_rag.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/graph_rag.py @@ -25,8 +25,8 @@ class GraphRagRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("graph-rag") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/knowledge.py b/trustgraph-flow/trustgraph/gateway/dispatch/knowledge.py index 83aefbd0..90f7f89c 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/knowledge.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/knowledge.py @@ -33,8 +33,8 @@ class KnowledgeRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("knowledge") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/librarian.py b/trustgraph-flow/trustgraph/gateway/dispatch/librarian.py index bbf7190e..8f33f9c1 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/librarian.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/librarian.py @@ -40,8 +40,8 @@ class LibrarianRequestor(ServiceRequestor): body = body.copy() body["content"] = content - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/mcp_tool.py b/trustgraph-flow/trustgraph/gateway/dispatch/mcp_tool.py index a5f9398e..9be08aff 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/mcp_tool.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/mcp_tool.py @@ -25,8 +25,8 @@ class McpToolRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("tool") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/nlp_query.py b/trustgraph-flow/trustgraph/gateway/dispatch/nlp_query.py index 3a6314f2..7152d6c0 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/nlp_query.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/nlp_query.py @@ -24,7 +24,7 @@ class NLPQueryRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("nlp-query") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) \ No newline at end of file + return self.response_translator.encode_with_completion(message) \ No newline at end of file diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/prompt.py b/trustgraph-flow/trustgraph/gateway/dispatch/prompt.py index 23017733..2304cbdb 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/prompt.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/prompt.py @@ -27,8 +27,8 @@ class PromptRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("prompt") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/row_embeddings_query.py b/trustgraph-flow/trustgraph/gateway/dispatch/row_embeddings_query.py index 8b139fc2..9a0704ca 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/row_embeddings_query.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/row_embeddings_query.py @@ -25,7 +25,7 @@ class RowEmbeddingsQueryRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("row-embeddings-query") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/rows_query.py b/trustgraph-flow/trustgraph/gateway/dispatch/rows_query.py index 57435be8..e285d9c8 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/rows_query.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/rows_query.py @@ -24,7 +24,7 @@ class RowsQueryRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("rows-query") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/serialize.py b/trustgraph-flow/trustgraph/gateway/dispatch/serialize.py index f42eee02..7267e320 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/serialize.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/serialize.py @@ -11,22 +11,22 @@ _triple_translator = TripleTranslator() def to_value(x): """Convert dict to Term. Delegates to TermTranslator.""" - return _term_translator.to_pulsar(x) + return _term_translator.decode(x) def to_subgraph(x): """Convert list of dicts to list of Triples. Delegates to TripleTranslator.""" - return [_triple_translator.to_pulsar(t) for t in x] + return [_triple_translator.decode(t) for t in x] def serialize_value(v): """Convert Term to dict. Delegates to TermTranslator.""" - return _term_translator.from_pulsar(v) + return _term_translator.encode(v) def serialize_triple(t): """Convert Triple to dict. Delegates to TripleTranslator.""" - return _triple_translator.from_pulsar(t) + return _triple_translator.encode(t) def serialize_subgraph(sg): diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/structured_diag.py b/trustgraph-flow/trustgraph/gateway/dispatch/structured_diag.py index 895b55be..5bf8f3e5 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/structured_diag.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/structured_diag.py @@ -24,7 +24,7 @@ class StructuredDiagRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("structured-diag") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) \ No newline at end of file + return self.response_translator.encode_with_completion(message) \ No newline at end of file diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/structured_query.py b/trustgraph-flow/trustgraph/gateway/dispatch/structured_query.py index 9a9fbb6a..19508f97 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/structured_query.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/structured_query.py @@ -24,7 +24,7 @@ class StructuredQueryRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("structured-query") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) \ No newline at end of file + return self.response_translator.encode_with_completion(message) \ No newline at end of file diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/text_completion.py b/trustgraph-flow/trustgraph/gateway/dispatch/text_completion.py index 0e77584e..a7c9f6e6 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/text_completion.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/text_completion.py @@ -25,8 +25,8 @@ class TextCompletionRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("text-completion") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/text_load.py b/trustgraph-flow/trustgraph/gateway/dispatch/text_load.py index b2562938..c21f8261 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/text_load.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/text_load.py @@ -23,5 +23,5 @@ class TextLoad(ServiceSender): def to_request(self, body): logger.info("Text document received") - return self.translator.to_pulsar(body) + return self.translator.decode(body) diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/triples_query.py b/trustgraph-flow/trustgraph/gateway/dispatch/triples_query.py index 6b306139..c9c66705 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/triples_query.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/triples_query.py @@ -25,8 +25,8 @@ class TriplesQueryRequestor(ServiceRequestor): self.response_translator = TranslatorRegistry.get_response_translator("triples-query") def to_request(self, body): - return self.request_translator.to_pulsar(body) + return self.request_translator.decode(body) def from_response(self, message): - return self.response_translator.from_response_with_completion(message) + return self.response_translator.encode_with_completion(message) diff --git a/trustgraph-flow/trustgraph/gateway/service.py b/trustgraph-flow/trustgraph/gateway/service.py index aaa6f725..cdf5daba 100755 --- a/trustgraph-flow/trustgraph/gateway/service.py +++ b/trustgraph-flow/trustgraph/gateway/service.py @@ -18,7 +18,6 @@ from . dispatch.manager import DispatcherManager from . endpoint.manager import EndpointManager -import pulsar from prometheus_client import start_http_server # Import default queue names diff --git a/trustgraph-flow/trustgraph/storage/triples/cassandra/write.py b/trustgraph-flow/trustgraph/storage/triples/cassandra/write.py index ab13ccbc..d31d6223 100755 --- a/trustgraph-flow/trustgraph/storage/triples/cassandra/write.py +++ b/trustgraph-flow/trustgraph/storage/triples/cassandra/write.py @@ -3,7 +3,6 @@ Graph writer. Input is graph edge. Writes edges to Cassandra graph. """ -import pulsar import base64 import os import argparse diff --git a/trustgraph-flow/trustgraph/storage/triples/falkordb/write.py b/trustgraph-flow/trustgraph/storage/triples/falkordb/write.py index 210ea53d..ac8d05c4 100755 --- a/trustgraph-flow/trustgraph/storage/triples/falkordb/write.py +++ b/trustgraph-flow/trustgraph/storage/triples/falkordb/write.py @@ -3,7 +3,6 @@ Graph writer. Input is graph edge. Writes edges to FalkorDB graph. """ -import pulsar import base64 import os import argparse diff --git a/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py b/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py index 55d4dee1..7864ac80 100755 --- a/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py +++ b/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py @@ -3,7 +3,6 @@ Graph writer. Input is graph edge. Writes edges to Memgraph. """ -import pulsar import base64 import os import argparse diff --git a/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py b/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py index 4a85a273..3db712fb 100755 --- a/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py +++ b/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py @@ -3,7 +3,6 @@ Graph writer. Input is graph edge. Writes edges to Neo4j graph. """ -import pulsar import base64 import os import argparse