mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-12 16:52:37 +02:00
Per-flow librarian clients and per-workspace response queues (#865)
Replace singleton LibrarianClient with per-flow instances via the new LibrarianSpec, giving each flow its own librarian tied to the workspace-scoped request/response queues from the blueprint. Move all workspace-scoped services (config, flow, librarian, knowledge) from a single base-queue response producer to per-workspace response producers created alongside the existing per-workspace request consumers. Update the gateway dispatcher and bootstrapper flow client to subscribe to the matching workspace-scoped response queues. Fix WorkspaceInit to register workspaces through the IAM create-workspace API so they appear in __workspaces__ and are visible to the gateway. Simplify the bootstrapper gate to only check config-svc reachability. Updated tests accordingly.
This commit is contained in:
parent
01bf1d89d5
commit
03cc5ac80f
30 changed files with 405 additions and 735 deletions
|
|
@ -16,6 +16,7 @@ from . subscriber_spec import SubscriberSpec
|
|||
from . request_response_spec import RequestResponseSpec
|
||||
from . llm_service import LlmService, LlmResult, LlmChunk
|
||||
from . librarian_client import LibrarianClient
|
||||
from . librarian_spec import LibrarianSpec
|
||||
from . chunking_service import ChunkingService
|
||||
from . embeddings_service import EmbeddingsService
|
||||
from . embeddings_client import EmbeddingsClientSpec
|
||||
|
|
|
|||
|
|
@ -4,13 +4,11 @@ for chunk-size and chunk-overlap parameters, and librarian client for
|
|||
fetching large document content.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import logging
|
||||
|
||||
from .flow_processor import FlowProcessor
|
||||
from .parameter_spec import ParameterSpec
|
||||
from .librarian_client import LibrarianClient
|
||||
from .librarian_spec import LibrarianSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -35,35 +33,27 @@ class ChunkingService(FlowProcessor):
|
|||
ParameterSpec(name="chunk-overlap")
|
||||
)
|
||||
|
||||
# Librarian client
|
||||
self.librarian = LibrarianClient(
|
||||
id=id,
|
||||
backend=self.pubsub,
|
||||
taskgroup=self.taskgroup,
|
||||
self.register_specification(
|
||||
LibrarianSpec()
|
||||
)
|
||||
|
||||
logger.debug("ChunkingService initialized with parameter specifications")
|
||||
|
||||
async def start(self):
|
||||
await super(ChunkingService, self).start()
|
||||
await self.librarian.start()
|
||||
|
||||
async def get_document_text(self, doc, workspace):
|
||||
async def get_document_text(self, doc, flow):
|
||||
"""
|
||||
Get text content from a TextDocument, fetching from librarian if needed.
|
||||
|
||||
Args:
|
||||
doc: TextDocument with either inline text or document_id
|
||||
workspace: Workspace for librarian lookup (from flow.workspace)
|
||||
flow: Flow object with librarian client
|
||||
|
||||
Returns:
|
||||
str: The document text content
|
||||
"""
|
||||
if doc.document_id and not doc.text:
|
||||
logger.info(f"Fetching document {doc.document_id} from librarian...")
|
||||
text = await self.librarian.fetch_document_text(
|
||||
text = await flow.librarian.fetch_document_text(
|
||||
document_id=doc.document_id,
|
||||
workspace=workspace,
|
||||
)
|
||||
logger.info(f"Fetched {len(text)} characters from librarian")
|
||||
return text
|
||||
|
|
|
|||
|
|
@ -1,6 +1,4 @@
|
|||
|
||||
import asyncio
|
||||
|
||||
class Flow:
|
||||
"""
|
||||
Runtime representation of a deployed flow process.
|
||||
|
|
@ -22,16 +20,22 @@ class Flow:
|
|||
|
||||
self.parameter = {}
|
||||
|
||||
self.librarian = None
|
||||
|
||||
for spec in processor.specifications:
|
||||
spec.add(self, processor, defn)
|
||||
|
||||
async def start(self):
|
||||
if self.librarian:
|
||||
await self.librarian.start()
|
||||
for c in self.consumer.values():
|
||||
await c.start()
|
||||
|
||||
async def stop(self):
|
||||
for c in self.consumer.values():
|
||||
await c.stop()
|
||||
if self.librarian:
|
||||
await self.librarian.stop()
|
||||
|
||||
def __call__(self, key):
|
||||
if key in self.producer: return self.producer[key]
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ Usage:
|
|||
id=id, backend=self.pubsub, taskgroup=self.taskgroup, **params
|
||||
)
|
||||
await self.librarian.start()
|
||||
content = await self.librarian.fetch_document_content(doc_id, workspace)
|
||||
content = await self.librarian.fetch_document_content(doc_id)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
|
|
@ -39,9 +39,14 @@ class LibrarianClient:
|
|||
librarian_response_q = params.get(
|
||||
"librarian_response_queue", librarian_response_queue,
|
||||
)
|
||||
subscriber = params.get(
|
||||
"librarian_subscriber", f"{id}-librarian",
|
||||
)
|
||||
|
||||
flow_name = params.get("flow_name")
|
||||
|
||||
librarian_request_metrics = ProducerMetrics(
|
||||
processor=id, flow=None, name="librarian-request",
|
||||
processor=id, flow=flow_name, name="librarian-request",
|
||||
)
|
||||
|
||||
self._producer = Producer(
|
||||
|
|
@ -52,7 +57,7 @@ class LibrarianClient:
|
|||
)
|
||||
|
||||
librarian_response_metrics = ConsumerMetrics(
|
||||
processor=id, flow=None, name="librarian-response",
|
||||
processor=id, flow=flow_name, name="librarian-response",
|
||||
)
|
||||
|
||||
self._consumer = Consumer(
|
||||
|
|
@ -60,7 +65,7 @@ class LibrarianClient:
|
|||
backend=backend,
|
||||
flow=None,
|
||||
topic=librarian_response_q,
|
||||
subscriber=f"{id}-librarian",
|
||||
subscriber=subscriber,
|
||||
schema=LibrarianResponse,
|
||||
handler=self._on_response,
|
||||
metrics=librarian_response_metrics,
|
||||
|
|
@ -76,6 +81,11 @@ class LibrarianClient:
|
|||
await self._producer.start()
|
||||
await self._consumer.start()
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the librarian producer and consumer."""
|
||||
await self._consumer.stop()
|
||||
await self._producer.stop()
|
||||
|
||||
async def _on_response(self, msg, consumer, flow):
|
||||
"""Route librarian responses to the right waiter."""
|
||||
response = msg.value()
|
||||
|
|
@ -150,7 +160,7 @@ class LibrarianClient:
|
|||
finally:
|
||||
self._streams.pop(request_id, None)
|
||||
|
||||
async def fetch_document_content(self, document_id, workspace, timeout=120):
|
||||
async def fetch_document_content(self, document_id, timeout=120):
|
||||
"""Fetch document content using streaming.
|
||||
|
||||
Returns base64-encoded content. Caller is responsible for decoding.
|
||||
|
|
@ -158,7 +168,6 @@ class LibrarianClient:
|
|||
req = LibrarianRequest(
|
||||
operation="stream-document",
|
||||
document_id=document_id,
|
||||
workspace=workspace,
|
||||
)
|
||||
chunks = await self.stream(req, timeout=timeout)
|
||||
|
||||
|
|
@ -176,24 +185,23 @@ class LibrarianClient:
|
|||
|
||||
return base64.b64encode(raw)
|
||||
|
||||
async def fetch_document_text(self, document_id, workspace, timeout=120):
|
||||
async def fetch_document_text(self, document_id, timeout=120):
|
||||
"""Fetch document content and decode as UTF-8 text."""
|
||||
content = await self.fetch_document_content(
|
||||
document_id, workspace, timeout=timeout,
|
||||
document_id, timeout=timeout,
|
||||
)
|
||||
return base64.b64decode(content).decode("utf-8")
|
||||
|
||||
async def fetch_document_metadata(self, document_id, workspace, timeout=120):
|
||||
async def fetch_document_metadata(self, document_id, timeout=120):
|
||||
"""Fetch document metadata from the librarian."""
|
||||
req = LibrarianRequest(
|
||||
operation="get-document-metadata",
|
||||
document_id=document_id,
|
||||
workspace=workspace,
|
||||
)
|
||||
response = await self.request(req, timeout=timeout)
|
||||
return response.document_metadata
|
||||
|
||||
async def save_child_document(self, doc_id, parent_id, workspace, content,
|
||||
async def save_child_document(self, doc_id, parent_id, content,
|
||||
document_type="chunk", title=None,
|
||||
kind="text/plain", timeout=120):
|
||||
"""Save a child document to the librarian."""
|
||||
|
|
@ -217,7 +225,7 @@ class LibrarianClient:
|
|||
await self.request(req, timeout=timeout)
|
||||
return doc_id
|
||||
|
||||
async def save_document(self, doc_id, workspace, content, title=None,
|
||||
async def save_document(self, doc_id, content, title=None,
|
||||
document_type="answer", kind="text/plain",
|
||||
timeout=120):
|
||||
"""Save a document to the librarian."""
|
||||
|
|
@ -236,7 +244,6 @@ class LibrarianClient:
|
|||
document_id=doc_id,
|
||||
document_metadata=doc_metadata,
|
||||
content=base64.b64encode(content).decode("utf-8"),
|
||||
workspace=workspace,
|
||||
)
|
||||
|
||||
await self.request(req, timeout=timeout)
|
||||
|
|
|
|||
31
trustgraph-base/trustgraph/base/librarian_spec.py
Normal file
31
trustgraph-base/trustgraph/base/librarian_spec.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from typing import Any
|
||||
|
||||
from . spec import Spec
|
||||
from . librarian_client import LibrarianClient
|
||||
|
||||
|
||||
class LibrarianSpec(Spec):
|
||||
def __init__(self, request_name="librarian-request",
|
||||
response_name="librarian-response"):
|
||||
self.request_name = request_name
|
||||
self.response_name = response_name
|
||||
|
||||
def add(self, flow: Any, processor: Any, definition: dict[str, Any]) -> None:
|
||||
|
||||
client = LibrarianClient(
|
||||
id=flow.id,
|
||||
backend=processor.pubsub,
|
||||
taskgroup=processor.taskgroup,
|
||||
librarian_request_queue=definition["topics"][self.request_name],
|
||||
librarian_response_queue=definition["topics"][self.response_name],
|
||||
librarian_subscriber=(
|
||||
processor.id + "--" + flow.workspace + "--" +
|
||||
flow.name + "--librarian--" + str(uuid.uuid4())
|
||||
),
|
||||
flow_name=flow.name,
|
||||
)
|
||||
|
||||
flow.librarian = client
|
||||
Loading…
Add table
Add a link
Reference in a new issue