trustgraph/trustgraph-base/trustgraph/base/librarian_client.py
cybermaggedon 03cc5ac80f
Per-flow librarian clients and per-workspace response queues (#865)
Replace singleton LibrarianClient with per-flow instances via the new
LibrarianSpec, giving each flow its own librarian tied to the
workspace-scoped request/response queues from the blueprint.

Move all workspace-scoped services (config, flow, librarian, knowledge)
from a single base-queue response producer to per-workspace response
producers created alongside the existing per-workspace request
consumers.  Update the gateway dispatcher and bootstrapper flow client
to subscribe to the matching workspace-scoped response queues.

Fix WorkspaceInit to register workspaces through the IAM
create-workspace API so they appear in __workspaces__ and are visible
to the gateway.  Simplify the bootstrapper gate to only check
config-svc reachability.

Updated tests accordingly.
2026-05-06 12:01:01 +01:00

250 lines
7.9 KiB
Python

"""
Shared librarian client for services that need to communicate
with the librarian via pub/sub.
Provides request-response and streaming operations over the message
broker, with proper support for large documents via stream-document.
Usage:
self.librarian = LibrarianClient(
id=id, backend=self.pubsub, taskgroup=self.taskgroup, **params
)
await self.librarian.start()
content = await self.librarian.fetch_document_content(doc_id)
"""
import asyncio
import base64
import logging
import uuid
from .consumer import Consumer
from .producer import Producer
from .metrics import ConsumerMetrics, ProducerMetrics
from ..schema import LibrarianRequest, LibrarianResponse, DocumentMetadata
from ..schema import librarian_request_queue, librarian_response_queue
logger = logging.getLogger(__name__)
class LibrarianClient:
"""Client for librarian request-response over the message broker."""
def __init__(self, id, backend, taskgroup, **params):
librarian_request_q = params.get(
"librarian_request_queue", librarian_request_queue,
)
librarian_response_q = params.get(
"librarian_response_queue", librarian_response_queue,
)
subscriber = params.get(
"librarian_subscriber", f"{id}-librarian",
)
flow_name = params.get("flow_name")
librarian_request_metrics = ProducerMetrics(
processor=id, flow=flow_name, name="librarian-request",
)
self._producer = Producer(
backend=backend,
topic=librarian_request_q,
schema=LibrarianRequest,
metrics=librarian_request_metrics,
)
librarian_response_metrics = ConsumerMetrics(
processor=id, flow=flow_name, name="librarian-response",
)
self._consumer = Consumer(
taskgroup=taskgroup,
backend=backend,
flow=None,
topic=librarian_response_q,
subscriber=subscriber,
schema=LibrarianResponse,
handler=self._on_response,
metrics=librarian_response_metrics,
)
# Single-response requests: request_id -> asyncio.Future
self._pending = {}
# Streaming requests: request_id -> asyncio.Queue
self._streams = {}
async def start(self):
"""Start the librarian producer and consumer."""
await self._producer.start()
await self._consumer.start()
async def stop(self):
"""Stop the librarian producer and consumer."""
await self._consumer.stop()
await self._producer.stop()
async def _on_response(self, msg, consumer, flow):
"""Route librarian responses to the right waiter."""
response = msg.value()
request_id = msg.properties().get("id")
if not request_id:
return
if request_id in self._pending:
future = self._pending.pop(request_id)
future.set_result(response)
elif request_id in self._streams:
await self._streams[request_id].put(response)
async def request(self, request, timeout=120):
"""Send a request to the librarian and wait for a single response."""
request_id = str(uuid.uuid4())
future = asyncio.get_event_loop().create_future()
self._pending[request_id] = future
try:
await self._producer.send(
request, properties={"id": request_id},
)
response = await asyncio.wait_for(future, timeout=timeout)
if response.error:
raise RuntimeError(
f"Librarian error: {response.error.type}: "
f"{response.error.message}"
)
return response
except asyncio.TimeoutError:
self._pending.pop(request_id, None)
raise RuntimeError("Timeout waiting for librarian response")
async def stream(self, request, timeout=120):
"""Send a request and collect streamed response chunks."""
request_id = str(uuid.uuid4())
q = asyncio.Queue()
self._streams[request_id] = q
try:
await self._producer.send(
request, properties={"id": request_id},
)
chunks = []
while True:
response = await asyncio.wait_for(q.get(), timeout=timeout)
if response.error:
raise RuntimeError(
f"Librarian error: {response.error.type}: "
f"{response.error.message}"
)
chunks.append(response)
if response.is_final:
break
return chunks
except asyncio.TimeoutError:
self._streams.pop(request_id, None)
raise RuntimeError("Timeout waiting for librarian stream")
finally:
self._streams.pop(request_id, None)
async def fetch_document_content(self, document_id, timeout=120):
"""Fetch document content using streaming.
Returns base64-encoded content. Caller is responsible for decoding.
"""
req = LibrarianRequest(
operation="stream-document",
document_id=document_id,
)
chunks = await self.stream(req, timeout=timeout)
# Decode each chunk's base64 to raw bytes, concatenate,
# re-encode for the caller.
raw = b""
for chunk in chunks:
if chunk.content:
if isinstance(chunk.content, bytes):
raw += base64.b64decode(chunk.content)
else:
raw += base64.b64decode(
chunk.content.encode("utf-8")
)
return base64.b64encode(raw)
async def fetch_document_text(self, document_id, timeout=120):
"""Fetch document content and decode as UTF-8 text."""
content = await self.fetch_document_content(
document_id, timeout=timeout,
)
return base64.b64decode(content).decode("utf-8")
async def fetch_document_metadata(self, document_id, timeout=120):
"""Fetch document metadata from the librarian."""
req = LibrarianRequest(
operation="get-document-metadata",
document_id=document_id,
)
response = await self.request(req, timeout=timeout)
return response.document_metadata
async def save_child_document(self, doc_id, parent_id, content,
document_type="chunk", title=None,
kind="text/plain", timeout=120):
"""Save a child document to the librarian."""
if isinstance(content, str):
content = content.encode("utf-8")
doc_metadata = DocumentMetadata(
id=doc_id,
kind=kind,
title=title or doc_id,
parent_id=parent_id,
document_type=document_type,
)
req = LibrarianRequest(
operation="add-child-document",
document_metadata=doc_metadata,
content=base64.b64encode(content).decode("utf-8"),
)
await self.request(req, timeout=timeout)
return doc_id
async def save_document(self, doc_id, content, title=None,
document_type="answer", kind="text/plain",
timeout=120):
"""Save a document to the librarian."""
if isinstance(content, str):
content = content.encode("utf-8")
doc_metadata = DocumentMetadata(
id=doc_id,
kind=kind,
title=title or doc_id,
document_type=document_type,
)
req = LibrarianRequest(
operation="add-document",
document_id=doc_id,
document_metadata=doc_metadata,
content=base64.b64encode(content).decode("utf-8"),
)
await self.request(req, timeout=timeout)
return doc_id