mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-12 16:52:37 +02:00
Per-flow librarian clients and per-workspace response queues (#865)
Replace singleton LibrarianClient with per-flow instances via the new LibrarianSpec, giving each flow its own librarian tied to the workspace-scoped request/response queues from the blueprint. Move all workspace-scoped services (config, flow, librarian, knowledge) from a single base-queue response producer to per-workspace response producers created alongside the existing per-workspace request consumers. Update the gateway dispatcher and bootstrapper flow client to subscribe to the matching workspace-scoped response queues. Fix WorkspaceInit to register workspaces through the IAM create-workspace API so they appear in __workspaces__ and are visible to the gateway. Simplify the bootstrapper gate to only check config-svc reachability. Updated tests accordingly.
This commit is contained in:
parent
01bf1d89d5
commit
03cc5ac80f
30 changed files with 405 additions and 735 deletions
|
|
@ -61,6 +61,10 @@ class FlowContext:
|
|||
def __call__(self, service_name):
|
||||
return self._flow(service_name)
|
||||
|
||||
@property
|
||||
def librarian(self):
|
||||
return self._flow.librarian
|
||||
|
||||
|
||||
class UsageTracker:
|
||||
"""Accumulates token usage across multiple prompt calls."""
|
||||
|
|
@ -320,9 +324,9 @@ class PatternBase:
|
|||
f"urn:trustgraph:agent:{session_id}/i{iteration_num}/thought"
|
||||
)
|
||||
try:
|
||||
await self.processor.save_answer_content(
|
||||
await flow.librarian.save_document(
|
||||
doc_id=thought_doc_id,
|
||||
workspace=flow.workspace,
|
||||
|
||||
content=act.thought,
|
||||
title=f"Agent Thought: {act.name}",
|
||||
)
|
||||
|
|
@ -389,9 +393,9 @@ class PatternBase:
|
|||
f"urn:trustgraph:agent:{session_id}/i{iteration_num}/observation"
|
||||
)
|
||||
try:
|
||||
await self.processor.save_answer_content(
|
||||
await flow.librarian.save_document(
|
||||
doc_id=observation_doc_id,
|
||||
workspace=flow.workspace,
|
||||
|
||||
content=observation_text,
|
||||
title=f"Agent Observation",
|
||||
)
|
||||
|
|
@ -445,9 +449,9 @@ class PatternBase:
|
|||
if answer_text:
|
||||
answer_doc_id = f"urn:trustgraph:agent:{session_id}/answer"
|
||||
try:
|
||||
await self.processor.save_answer_content(
|
||||
await flow.librarian.save_document(
|
||||
doc_id=answer_doc_id,
|
||||
workspace=flow.workspace,
|
||||
|
||||
content=answer_text,
|
||||
title=f"Agent Answer: {request.question[:50]}...",
|
||||
)
|
||||
|
|
@ -521,8 +525,8 @@ class PatternBase:
|
|||
|
||||
doc_id = f"urn:trustgraph:agent:{session_id}/finding/{index}/doc"
|
||||
try:
|
||||
await self.processor.save_answer_content(
|
||||
doc_id=doc_id, workspace=flow.workspace,
|
||||
await flow.librarian.save_document(
|
||||
doc_id=doc_id,
|
||||
content=answer_text,
|
||||
title=f"Finding: {goal[:60]}",
|
||||
)
|
||||
|
|
@ -574,8 +578,8 @@ class PatternBase:
|
|||
|
||||
doc_id = f"urn:trustgraph:agent:{session_id}/step/{index}/doc"
|
||||
try:
|
||||
await self.processor.save_answer_content(
|
||||
doc_id=doc_id, workspace=flow.workspace,
|
||||
await flow.librarian.save_document(
|
||||
doc_id=doc_id,
|
||||
content=answer_text,
|
||||
title=f"Step result: {goal[:60]}",
|
||||
)
|
||||
|
|
@ -606,8 +610,8 @@ class PatternBase:
|
|||
|
||||
doc_id = f"urn:trustgraph:agent:{session_id}/synthesis/doc"
|
||||
try:
|
||||
await self.processor.save_answer_content(
|
||||
doc_id=doc_id, workspace=flow.workspace,
|
||||
await flow.librarian.save_document(
|
||||
doc_id=doc_id,
|
||||
content=answer_text,
|
||||
title="Synthesis",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -7,26 +7,17 @@ to select between ReactPattern, PlanThenExecutePattern, and
|
|||
SupervisorPattern at runtime.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import functools
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from ... base import AgentService, TextCompletionClientSpec, PromptClientSpec
|
||||
from ... base import GraphRagClientSpec, ToolClientSpec, StructuredQueryClientSpec
|
||||
from ... base import RowEmbeddingsQueryClientSpec, EmbeddingsClientSpec
|
||||
from ... base import ProducerSpec
|
||||
from ... base import Consumer, Producer
|
||||
from ... base import ConsumerMetrics, ProducerMetrics
|
||||
from ... base import ProducerSpec, LibrarianSpec
|
||||
|
||||
from ... schema import AgentRequest, AgentResponse, AgentStep, Error
|
||||
from ..orchestrator.pattern_base import UsageTracker, PatternBase
|
||||
from ... schema import Triples, Metadata
|
||||
from ... schema import LibrarianRequest, LibrarianResponse, DocumentMetadata
|
||||
from ... schema import librarian_request_queue, librarian_response_queue
|
||||
|
||||
from trustgraph.provenance import (
|
||||
agent_session_uri,
|
||||
|
|
@ -52,8 +43,6 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
default_ident = "agent-manager"
|
||||
default_max_iterations = 10
|
||||
default_librarian_request_queue = librarian_request_queue
|
||||
default_librarian_response_queue = librarian_response_queue
|
||||
|
||||
|
||||
class Processor(AgentService):
|
||||
|
|
@ -151,94 +140,9 @@ class Processor(AgentService):
|
|||
)
|
||||
)
|
||||
|
||||
# Librarian client
|
||||
librarian_request_q = params.get(
|
||||
"librarian_request_queue", default_librarian_request_queue
|
||||
self.register_specification(
|
||||
LibrarianSpec()
|
||||
)
|
||||
librarian_response_q = params.get(
|
||||
"librarian_response_queue", default_librarian_response_queue
|
||||
)
|
||||
|
||||
librarian_request_metrics = ProducerMetrics(
|
||||
processor=id, flow=None, name="librarian-request"
|
||||
)
|
||||
|
||||
self.librarian_request_producer = Producer(
|
||||
backend=self.pubsub,
|
||||
topic=librarian_request_q,
|
||||
schema=LibrarianRequest,
|
||||
metrics=librarian_request_metrics,
|
||||
)
|
||||
|
||||
librarian_response_metrics = ConsumerMetrics(
|
||||
processor=id, flow=None, name="librarian-response"
|
||||
)
|
||||
|
||||
self.librarian_response_consumer = Consumer(
|
||||
taskgroup=self.taskgroup,
|
||||
backend=self.pubsub,
|
||||
flow=None,
|
||||
topic=librarian_response_q,
|
||||
subscriber=f"{id}-librarian",
|
||||
schema=LibrarianResponse,
|
||||
handler=self.on_librarian_response,
|
||||
metrics=librarian_response_metrics,
|
||||
)
|
||||
|
||||
self.pending_librarian_requests = {}
|
||||
|
||||
async def start(self):
|
||||
await super(Processor, self).start()
|
||||
await self.librarian_request_producer.start()
|
||||
await self.librarian_response_consumer.start()
|
||||
|
||||
async def on_librarian_response(self, msg, consumer, flow):
|
||||
response = msg.value()
|
||||
request_id = msg.properties().get("id")
|
||||
|
||||
if request_id in self.pending_librarian_requests:
|
||||
future = self.pending_librarian_requests.pop(request_id)
|
||||
future.set_result(response)
|
||||
|
||||
async def save_answer_content(self, doc_id, workspace, content, title=None,
|
||||
timeout=120):
|
||||
request_id = str(uuid.uuid4())
|
||||
|
||||
doc_metadata = DocumentMetadata(
|
||||
id=doc_id,
|
||||
workspace=workspace,
|
||||
kind="text/plain",
|
||||
title=title or "Agent Answer",
|
||||
document_type="answer",
|
||||
)
|
||||
|
||||
request = LibrarianRequest(
|
||||
operation="add-document",
|
||||
document_id=doc_id,
|
||||
document_metadata=doc_metadata,
|
||||
content=base64.b64encode(content.encode("utf-8")).decode("utf-8"),
|
||||
workspace=workspace,
|
||||
)
|
||||
|
||||
future = asyncio.get_event_loop().create_future()
|
||||
self.pending_librarian_requests[request_id] = future
|
||||
|
||||
try:
|
||||
await self.librarian_request_producer.send(
|
||||
request, properties={"id": request_id}
|
||||
)
|
||||
response = await asyncio.wait_for(future, timeout=timeout)
|
||||
|
||||
if response.error:
|
||||
raise RuntimeError(
|
||||
f"Librarian error saving answer: "
|
||||
f"{response.error.type}: {response.error.message}"
|
||||
)
|
||||
return doc_id
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
self.pending_librarian_requests.pop(request_id, None)
|
||||
raise RuntimeError(f"Timeout saving answer document {doc_id}")
|
||||
|
||||
def provenance_session_uri(self, session_id):
|
||||
return agent_session_uri(session_id)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ Simple agent infrastructure broadly implements the ReAct flow.
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
|
|
@ -19,14 +18,10 @@ logger = logging.getLogger(__name__)
|
|||
from ... base import AgentService, TextCompletionClientSpec, PromptClientSpec
|
||||
from ... base import GraphRagClientSpec, ToolClientSpec, StructuredQueryClientSpec
|
||||
from ... base import RowEmbeddingsQueryClientSpec, EmbeddingsClientSpec
|
||||
from ... base import ProducerSpec
|
||||
from ... base import Consumer, Producer
|
||||
from ... base import ConsumerMetrics, ProducerMetrics
|
||||
from ... base import ProducerSpec, LibrarianSpec
|
||||
|
||||
from ... schema import AgentRequest, AgentResponse, AgentStep, Error
|
||||
from ... schema import Triples, Metadata
|
||||
from ... schema import LibrarianRequest, LibrarianResponse, DocumentMetadata
|
||||
from ... schema import librarian_request_queue, librarian_response_queue
|
||||
|
||||
# Provenance imports for agent explainability
|
||||
from trustgraph.provenance import (
|
||||
|
|
@ -51,8 +46,6 @@ from . types import Final, Action, Tool, Argument
|
|||
|
||||
default_ident = "agent-manager"
|
||||
default_max_iterations = 10
|
||||
default_librarian_request_queue = librarian_request_queue
|
||||
default_librarian_response_queue = librarian_response_queue
|
||||
|
||||
class Processor(AgentService):
|
||||
|
||||
|
|
@ -141,112 +134,9 @@ class Processor(AgentService):
|
|||
)
|
||||
)
|
||||
|
||||
# Librarian client for storing answer content
|
||||
librarian_request_q = params.get(
|
||||
"librarian_request_queue", default_librarian_request_queue
|
||||
self.register_specification(
|
||||
LibrarianSpec()
|
||||
)
|
||||
librarian_response_q = params.get(
|
||||
"librarian_response_queue", default_librarian_response_queue
|
||||
)
|
||||
|
||||
librarian_request_metrics = ProducerMetrics(
|
||||
processor=id, flow=None, name="librarian-request"
|
||||
)
|
||||
|
||||
self.librarian_request_producer = Producer(
|
||||
backend=self.pubsub,
|
||||
topic=librarian_request_q,
|
||||
schema=LibrarianRequest,
|
||||
metrics=librarian_request_metrics,
|
||||
)
|
||||
|
||||
librarian_response_metrics = ConsumerMetrics(
|
||||
processor=id, flow=None, name="librarian-response"
|
||||
)
|
||||
|
||||
self.librarian_response_consumer = Consumer(
|
||||
taskgroup=self.taskgroup,
|
||||
backend=self.pubsub,
|
||||
flow=None,
|
||||
topic=librarian_response_q,
|
||||
subscriber=f"{id}-librarian",
|
||||
schema=LibrarianResponse,
|
||||
handler=self.on_librarian_response,
|
||||
metrics=librarian_response_metrics,
|
||||
)
|
||||
|
||||
# Pending librarian requests: request_id -> asyncio.Future
|
||||
self.pending_librarian_requests = {}
|
||||
|
||||
async def start(self):
|
||||
await super(Processor, self).start()
|
||||
await self.librarian_request_producer.start()
|
||||
await self.librarian_response_consumer.start()
|
||||
|
||||
async def on_librarian_response(self, msg, consumer, flow):
|
||||
"""Handle responses from the librarian service."""
|
||||
response = msg.value()
|
||||
request_id = msg.properties().get("id")
|
||||
|
||||
if request_id in self.pending_librarian_requests:
|
||||
future = self.pending_librarian_requests.pop(request_id)
|
||||
future.set_result(response)
|
||||
|
||||
async def save_answer_content(self, doc_id, workspace, content, title=None, timeout=120):
|
||||
"""
|
||||
Save answer content to the librarian.
|
||||
|
||||
Args:
|
||||
doc_id: ID for the answer document
|
||||
workspace: Workspace for isolation
|
||||
content: Answer text content
|
||||
title: Optional title
|
||||
timeout: Request timeout in seconds
|
||||
|
||||
Returns:
|
||||
The document ID on success
|
||||
"""
|
||||
request_id = str(uuid.uuid4())
|
||||
|
||||
doc_metadata = DocumentMetadata(
|
||||
id=doc_id,
|
||||
workspace=workspace,
|
||||
kind="text/plain",
|
||||
title=title or "Agent Answer",
|
||||
document_type="answer",
|
||||
)
|
||||
|
||||
request = LibrarianRequest(
|
||||
operation="add-document",
|
||||
document_id=doc_id,
|
||||
document_metadata=doc_metadata,
|
||||
content=base64.b64encode(content.encode("utf-8")).decode("utf-8"),
|
||||
workspace=workspace,
|
||||
)
|
||||
|
||||
# Create future for response
|
||||
future = asyncio.get_event_loop().create_future()
|
||||
self.pending_librarian_requests[request_id] = future
|
||||
|
||||
try:
|
||||
# Send request
|
||||
await self.librarian_request_producer.send(
|
||||
request, properties={"id": request_id}
|
||||
)
|
||||
|
||||
# Wait for response
|
||||
response = await asyncio.wait_for(future, timeout=timeout)
|
||||
|
||||
if response.error:
|
||||
raise RuntimeError(
|
||||
f"Librarian error saving answer: {response.error.type}: {response.error.message}"
|
||||
)
|
||||
|
||||
return doc_id
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
self.pending_librarian_requests.pop(request_id, None)
|
||||
raise RuntimeError(f"Timeout saving answer document {doc_id}")
|
||||
|
||||
async def on_tools_config(self, workspace, config, version):
|
||||
|
||||
|
|
@ -611,9 +501,9 @@ class Processor(AgentService):
|
|||
if act_decision.thought:
|
||||
t_doc_id = f"urn:trustgraph:agent:{session_id}/i{iteration_num}/thought"
|
||||
try:
|
||||
await self.save_answer_content(
|
||||
await flow.librarian.save_document(
|
||||
doc_id=t_doc_id,
|
||||
workspace=flow.workspace,
|
||||
|
||||
content=act_decision.thought,
|
||||
title=f"Agent Thought: {act_decision.name}",
|
||||
)
|
||||
|
|
@ -691,9 +581,9 @@ class Processor(AgentService):
|
|||
if f:
|
||||
answer_doc_id = f"urn:trustgraph:agent:{session_id}/answer"
|
||||
try:
|
||||
await self.save_answer_content(
|
||||
await flow.librarian.save_document(
|
||||
doc_id=answer_doc_id,
|
||||
workspace=flow.workspace,
|
||||
|
||||
content=f,
|
||||
title=f"Agent Answer: {request.question[:50]}...",
|
||||
)
|
||||
|
|
@ -768,9 +658,8 @@ class Processor(AgentService):
|
|||
if act.observation:
|
||||
observation_doc_id = f"urn:trustgraph:agent:{session_id}/i{iteration_num}/observation"
|
||||
try:
|
||||
await self.save_answer_content(
|
||||
await flow.librarian.save_document(
|
||||
doc_id=observation_doc_id,
|
||||
workspace=flow.workspace,
|
||||
content=act.observation,
|
||||
title=f"Agent Observation",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ class InitContext:
|
|||
logger: logging.Logger
|
||||
config: Any # ConfigClient
|
||||
make_flow_client: Any # callable(workspace) -> RequestResponse
|
||||
make_iam_client: Any # callable() -> RequestResponse
|
||||
|
||||
|
||||
class Initialiser:
|
||||
|
|
@ -35,7 +36,7 @@ class Initialiser:
|
|||
|
||||
* ``wait_for_services`` (bool, default ``True``): when ``True`` the
|
||||
initialiser only runs after the bootstrapper's service gate has
|
||||
passed (config-svc and flow-svc reachable). Set ``False`` for
|
||||
passed (config-svc reachable). Set ``False`` for
|
||||
initialisers that bring up infrastructure the gate itself
|
||||
depends on — principally Pulsar topology, without which
|
||||
config-svc cannot come online.
|
||||
|
|
|
|||
|
|
@ -28,6 +28,10 @@ from trustgraph.schema import (
|
|||
FlowRequest, FlowResponse,
|
||||
flow_request_queue, flow_response_queue,
|
||||
)
|
||||
from trustgraph.schema import (
|
||||
IamRequest, IamResponse,
|
||||
iam_request_queue, iam_response_queue,
|
||||
)
|
||||
|
||||
from .. base import Initialiser, InitContext
|
||||
|
||||
|
|
@ -189,13 +193,31 @@ class Processor(AsyncProcessor):
|
|||
request_metrics=ProducerMetrics(
|
||||
processor=self.id, flow=None, name="flow-request",
|
||||
),
|
||||
response_topic=flow_response_queue,
|
||||
response_topic=f"{flow_response_queue}:{workspace}",
|
||||
response_schema=FlowResponse,
|
||||
response_metrics=SubscriberMetrics(
|
||||
processor=self.id, flow=None, name="flow-response",
|
||||
),
|
||||
)
|
||||
|
||||
def _make_iam_client(self):
|
||||
rr_id = str(uuid.uuid4())
|
||||
return RequestResponse(
|
||||
backend=self.pubsub_backend,
|
||||
subscription=f"{self.id}--iam--{rr_id}",
|
||||
consumer_name=self.id,
|
||||
request_topic=iam_request_queue,
|
||||
request_schema=IamRequest,
|
||||
request_metrics=ProducerMetrics(
|
||||
processor=self.id, flow=None, name="iam-request",
|
||||
),
|
||||
response_topic=iam_response_queue,
|
||||
response_schema=IamResponse,
|
||||
response_metrics=SubscriberMetrics(
|
||||
processor=self.id, flow=None, name="iam-response",
|
||||
),
|
||||
)
|
||||
|
||||
async def _open_clients(self):
|
||||
config = self._make_config_client()
|
||||
await config.start()
|
||||
|
|
@ -211,13 +233,6 @@ class Processor(AsyncProcessor):
|
|||
# Service gate.
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _gate_workspace(self):
|
||||
for spec in self.specs:
|
||||
ws = getattr(spec.instance, "workspace", None)
|
||||
if ws and not ws.startswith("_"):
|
||||
return ws
|
||||
return None
|
||||
|
||||
async def _gate_ready(self, config):
|
||||
try:
|
||||
await config.keys(SYSTEM_WORKSPACE, INIT_STATE_TYPE)
|
||||
|
|
@ -227,33 +242,6 @@ class Processor(AsyncProcessor):
|
|||
)
|
||||
return False
|
||||
|
||||
workspace = self._gate_workspace()
|
||||
if workspace is None:
|
||||
return True
|
||||
|
||||
flow = self._make_flow_client(workspace)
|
||||
try:
|
||||
await flow.start()
|
||||
resp = await flow.request(
|
||||
FlowRequest(
|
||||
operation="list-blueprints",
|
||||
),
|
||||
timeout=5,
|
||||
)
|
||||
if resp.error:
|
||||
logger.info(
|
||||
f"Gate: flow-svc error: "
|
||||
f"{resp.error.type}: {resp.error.message}"
|
||||
)
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.info(
|
||||
f"Gate: flow-svc not ready ({type(e).__name__}: {e})"
|
||||
)
|
||||
return False
|
||||
finally:
|
||||
await self._safe_stop(flow)
|
||||
|
||||
return True
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
|
@ -307,6 +295,7 @@ class Processor(AsyncProcessor):
|
|||
logger=child_logger,
|
||||
config=config,
|
||||
make_flow_client=self._make_flow_client,
|
||||
make_iam_client=self._make_iam_client,
|
||||
)
|
||||
|
||||
child_logger.info(
|
||||
|
|
|
|||
|
|
@ -39,8 +39,6 @@ TEMPLATE_WORKSPACE = "__template__"
|
|||
|
||||
class TemplateSeed(Initialiser):
|
||||
|
||||
wait_for_services = False
|
||||
|
||||
def __init__(self, config_file, overwrite=False, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if not config_file:
|
||||
|
|
|
|||
|
|
@ -26,6 +26,8 @@ the next cycle once the prerequisite is satisfied.
|
|||
|
||||
import json
|
||||
|
||||
from trustgraph.schema import IamRequest, WorkspaceInput
|
||||
|
||||
from .. base import Initialiser
|
||||
|
||||
TEMPLATE_WORKSPACE = "__template__"
|
||||
|
|
@ -33,8 +35,6 @@ TEMPLATE_WORKSPACE = "__template__"
|
|||
|
||||
class WorkspaceInit(Initialiser):
|
||||
|
||||
wait_for_services = False
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
workspace="default",
|
||||
|
|
@ -61,6 +61,8 @@ class WorkspaceInit(Initialiser):
|
|||
self.overwrite = overwrite
|
||||
|
||||
async def run(self, ctx, old_flag, new_flag):
|
||||
await self._create_workspace(ctx)
|
||||
|
||||
if self.source == "seed-file":
|
||||
tree = self._load_seed_file()
|
||||
else:
|
||||
|
|
@ -107,6 +109,39 @@ class WorkspaceInit(Initialiser):
|
|||
)
|
||||
return tree
|
||||
|
||||
async def _create_workspace(self, ctx):
|
||||
"""Register the workspace via the IAM create-workspace API."""
|
||||
iam = ctx.make_iam_client()
|
||||
await iam.start()
|
||||
try:
|
||||
resp = await iam.request(
|
||||
IamRequest(
|
||||
operation="create-workspace",
|
||||
workspace_record=WorkspaceInput(
|
||||
id=self.workspace,
|
||||
name=self.workspace.title(),
|
||||
enabled=True,
|
||||
),
|
||||
),
|
||||
timeout=10,
|
||||
)
|
||||
if resp.error:
|
||||
if resp.error.type == "duplicate":
|
||||
ctx.logger.info(
|
||||
f"Workspace {self.workspace!r} already exists in IAM"
|
||||
)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"IAM create-workspace failed: "
|
||||
f"{resp.error.type}: {resp.error.message}"
|
||||
)
|
||||
else:
|
||||
ctx.logger.info(
|
||||
f"Workspace {self.workspace!r} created via IAM"
|
||||
)
|
||||
finally:
|
||||
await iam.stop()
|
||||
|
||||
async def _write_all(self, ctx, tree):
|
||||
values = []
|
||||
for type_name, entries in tree.items():
|
||||
|
|
@ -114,6 +149,7 @@ class WorkspaceInit(Initialiser):
|
|||
values.append((type_name, key, json.dumps(value)))
|
||||
if values:
|
||||
await ctx.config.put_many(self.workspace, values)
|
||||
|
||||
ctx.logger.info(
|
||||
f"Workspace {self.workspace!r} populated with "
|
||||
f"{len(values)} entries"
|
||||
|
|
@ -134,6 +170,7 @@ class WorkspaceInit(Initialiser):
|
|||
if values:
|
||||
await ctx.config.put_many(self.workspace, values)
|
||||
written += len(values)
|
||||
|
||||
ctx.logger.info(
|
||||
f"Workspace {self.workspace!r} upsert-missing: "
|
||||
f"{written} new entries"
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ class Processor(ChunkingService):
|
|||
logger.info(f"Chunking document {v.metadata.id}...")
|
||||
|
||||
# Get text content (fetches from librarian if needed)
|
||||
text = await self.get_document_text(v, flow.workspace)
|
||||
text = await self.get_document_text(v, flow)
|
||||
|
||||
# Extract chunk parameters from flow (allows runtime override)
|
||||
chunk_size, chunk_overlap = await self.chunk_document(
|
||||
|
|
@ -141,10 +141,9 @@ class Processor(ChunkingService):
|
|||
chunk_length = len(chunk.page_content)
|
||||
|
||||
# Save chunk to librarian as child document
|
||||
await self.librarian.save_child_document(
|
||||
await flow.librarian.save_child_document(
|
||||
doc_id=chunk_doc_id,
|
||||
parent_id=parent_doc_id,
|
||||
workspace=flow.workspace,
|
||||
content=chunk_content,
|
||||
document_type="chunk",
|
||||
title=f"Chunk {chunk_index}",
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ class Processor(ChunkingService):
|
|||
logger.info(f"Chunking document {v.metadata.id}...")
|
||||
|
||||
# Get text content (fetches from librarian if needed)
|
||||
text = await self.get_document_text(v, flow.workspace)
|
||||
text = await self.get_document_text(v, flow)
|
||||
|
||||
# Extract chunk parameters from flow (allows runtime override)
|
||||
chunk_size, chunk_overlap = await self.chunk_document(
|
||||
|
|
@ -137,10 +137,9 @@ class Processor(ChunkingService):
|
|||
chunk_length = len(chunk.page_content)
|
||||
|
||||
# Save chunk to librarian as child document
|
||||
await self.librarian.save_child_document(
|
||||
await flow.librarian.save_child_document(
|
||||
doc_id=chunk_doc_id,
|
||||
parent_id=parent_doc_id,
|
||||
workspace=flow.workspace,
|
||||
content=chunk_content,
|
||||
document_type="chunk",
|
||||
title=f"Chunk {chunk_index}",
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ class Processor(AsyncProcessor):
|
|||
config_request_queue = params.get(
|
||||
"config_request_queue", default_config_request_queue
|
||||
)
|
||||
config_response_queue = params.get(
|
||||
self.config_response_queue_base = params.get(
|
||||
"config_response_queue", default_config_response_queue
|
||||
)
|
||||
config_push_queue = params.get(
|
||||
|
|
@ -130,7 +130,7 @@ class Processor(AsyncProcessor):
|
|||
|
||||
self.config_response_producer = Producer(
|
||||
backend = self.pubsub,
|
||||
topic = config_response_queue,
|
||||
topic = self.config_response_queue_base,
|
||||
schema = ConfigResponse,
|
||||
metrics = config_response_metrics,
|
||||
)
|
||||
|
|
@ -208,17 +208,31 @@ class Processor(AsyncProcessor):
|
|||
)
|
||||
|
||||
async def _add_workspace_consumer(self, workspace_id):
|
||||
queue = workspace_queue(
|
||||
req_queue = workspace_queue(
|
||||
self.config_request_queue_base, workspace_id,
|
||||
)
|
||||
resp_queue = workspace_queue(
|
||||
self.config_response_queue_base, workspace_id,
|
||||
)
|
||||
|
||||
await self.pubsub.ensure_topic(queue)
|
||||
await self.pubsub.ensure_topic(req_queue)
|
||||
await self.pubsub.ensure_topic(resp_queue)
|
||||
|
||||
response_producer = Producer(
|
||||
backend=self.pubsub,
|
||||
topic=resp_queue,
|
||||
schema=ConfigResponse,
|
||||
metrics=ProducerMetrics(
|
||||
processor=self.id, flow=None,
|
||||
name=f"config-response-{workspace_id}",
|
||||
),
|
||||
)
|
||||
|
||||
consumer = Consumer(
|
||||
taskgroup=self.taskgroup,
|
||||
backend=self.pubsub,
|
||||
flow=None,
|
||||
topic=queue,
|
||||
topic=req_queue,
|
||||
subscriber=self.id,
|
||||
schema=ConfigRequest,
|
||||
handler=partial(
|
||||
|
|
@ -231,17 +245,23 @@ class Processor(AsyncProcessor):
|
|||
),
|
||||
)
|
||||
|
||||
await response_producer.start()
|
||||
await consumer.start()
|
||||
self.workspace_consumers[workspace_id] = consumer
|
||||
|
||||
self.workspace_consumers[workspace_id] = {
|
||||
"consumer": consumer,
|
||||
"response": response_producer,
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"Subscribed to workspace config queue: {workspace_id}"
|
||||
)
|
||||
|
||||
async def _remove_workspace_consumer(self, workspace_id):
|
||||
consumer = self.workspace_consumers.pop(workspace_id, None)
|
||||
if consumer:
|
||||
await consumer.stop()
|
||||
clients = self.workspace_consumers.pop(workspace_id, None)
|
||||
if clients:
|
||||
for client in clients.values():
|
||||
await client.stop()
|
||||
logger.info(
|
||||
f"Unsubscribed from workspace config queue: {workspace_id}"
|
||||
)
|
||||
|
|
@ -249,6 +269,7 @@ class Processor(AsyncProcessor):
|
|||
async def start(self):
|
||||
|
||||
await self.pubsub.ensure_topic(self.config_request_queue_base)
|
||||
await self.config_response_producer.start()
|
||||
await self.push() # Startup poke: empty types = everything
|
||||
await self.system_consumer.start()
|
||||
|
||||
|
|
@ -307,9 +328,11 @@ class Processor(AsyncProcessor):
|
|||
f"workspace={workspace}..."
|
||||
)
|
||||
|
||||
producer = self.workspace_consumers[workspace]["response"]
|
||||
|
||||
resp = await self.config.handle_workspace(v, workspace)
|
||||
|
||||
await self.config_response_producer.send(
|
||||
await producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
|
|
@ -322,7 +345,7 @@ class Processor(AsyncProcessor):
|
|||
),
|
||||
)
|
||||
|
||||
await self.config_response_producer.send(
|
||||
await producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ class Processor(WorkspaceProcessor):
|
|||
"knowledge_request_queue", default_knowledge_request_queue
|
||||
)
|
||||
|
||||
knowledge_response_queue = params.get(
|
||||
self.knowledge_response_queue_base = params.get(
|
||||
"knowledge_response_queue", default_knowledge_response_queue
|
||||
)
|
||||
|
||||
|
|
@ -70,24 +70,13 @@ class Processor(WorkspaceProcessor):
|
|||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
"knowledge_request_queue": self.knowledge_request_queue_base,
|
||||
"knowledge_response_queue": knowledge_response_queue,
|
||||
"knowledge_response_queue": self.knowledge_response_queue_base,
|
||||
"cassandra_host": self.cassandra_host,
|
||||
"cassandra_username": self.cassandra_username,
|
||||
"cassandra_password": self.cassandra_password,
|
||||
}
|
||||
)
|
||||
|
||||
knowledge_response_metrics = ProducerMetrics(
|
||||
processor = self.id, flow = None, name = "knowledge-response"
|
||||
)
|
||||
|
||||
self.knowledge_response_producer = Producer(
|
||||
backend = self.pubsub,
|
||||
topic = knowledge_response_queue,
|
||||
schema = KnowledgeResponse,
|
||||
metrics = knowledge_response_metrics,
|
||||
)
|
||||
|
||||
self.knowledge = KnowledgeManager(
|
||||
cassandra_host = self.cassandra_host,
|
||||
cassandra_username = self.cassandra_username,
|
||||
|
|
@ -109,17 +98,31 @@ class Processor(WorkspaceProcessor):
|
|||
if workspace in self.workspace_consumers:
|
||||
return
|
||||
|
||||
queue = workspace_queue(
|
||||
req_queue = workspace_queue(
|
||||
self.knowledge_request_queue_base, workspace,
|
||||
)
|
||||
resp_queue = workspace_queue(
|
||||
self.knowledge_response_queue_base, workspace,
|
||||
)
|
||||
|
||||
await self.pubsub.ensure_topic(queue)
|
||||
await self.pubsub.ensure_topic(req_queue)
|
||||
await self.pubsub.ensure_topic(resp_queue)
|
||||
|
||||
response_producer = Producer(
|
||||
backend=self.pubsub,
|
||||
topic=resp_queue,
|
||||
schema=KnowledgeResponse,
|
||||
metrics=ProducerMetrics(
|
||||
processor=self.id, flow=None,
|
||||
name=f"knowledge-response-{workspace}",
|
||||
),
|
||||
)
|
||||
|
||||
consumer = Consumer(
|
||||
taskgroup=self.taskgroup,
|
||||
backend=self.pubsub,
|
||||
flow=None,
|
||||
topic=queue,
|
||||
topic=req_queue,
|
||||
subscriber=self.id,
|
||||
schema=KnowledgeRequest,
|
||||
handler=partial(
|
||||
|
|
@ -131,22 +134,27 @@ class Processor(WorkspaceProcessor):
|
|||
),
|
||||
)
|
||||
|
||||
await response_producer.start()
|
||||
await consumer.start()
|
||||
self.workspace_consumers[workspace] = consumer
|
||||
|
||||
self.workspace_consumers[workspace] = {
|
||||
"consumer": consumer,
|
||||
"response": response_producer,
|
||||
}
|
||||
|
||||
logger.info(f"Subscribed to workspace queue: {workspace}")
|
||||
|
||||
async def on_workspace_deleted(self, workspace):
|
||||
|
||||
consumer = self.workspace_consumers.pop(workspace, None)
|
||||
if consumer:
|
||||
await consumer.stop()
|
||||
clients = self.workspace_consumers.pop(workspace, None)
|
||||
if clients:
|
||||
for client in clients.values():
|
||||
await client.stop()
|
||||
logger.info(f"Unsubscribed from workspace queue: {workspace}")
|
||||
|
||||
async def start(self):
|
||||
|
||||
await super(Processor, self).start()
|
||||
await self.knowledge_response_producer.start()
|
||||
|
||||
async def on_knowledge_config(self, workspace, config, version):
|
||||
|
||||
|
|
@ -164,7 +172,7 @@ class Processor(WorkspaceProcessor):
|
|||
|
||||
logger.debug(f"Flows for {workspace}: {self.flows[workspace]}")
|
||||
|
||||
async def process_request(self, v, id, workspace):
|
||||
async def process_request(self, v, id, workspace, producer):
|
||||
|
||||
if v.operation is None:
|
||||
raise RequestError("Null operation")
|
||||
|
|
@ -184,7 +192,7 @@ class Processor(WorkspaceProcessor):
|
|||
raise RequestError(f"Invalid operation: {v.operation}")
|
||||
|
||||
async def respond(x):
|
||||
await self.knowledge_response_producer.send(
|
||||
await producer.send(
|
||||
x, { "id": id }
|
||||
)
|
||||
return await impls[v.operation](v, respond, workspace)
|
||||
|
|
@ -199,11 +207,13 @@ class Processor(WorkspaceProcessor):
|
|||
|
||||
logger.info(f"Handling knowledge input {id}...")
|
||||
|
||||
producer = self.workspace_consumers[workspace]["response"]
|
||||
|
||||
try:
|
||||
|
||||
# We don't send a response back here, the processing
|
||||
# implementation sends whatever it needs to send.
|
||||
await self.process_request(v, id, workspace)
|
||||
await self.process_request(v, id, workspace, producer)
|
||||
|
||||
return
|
||||
|
||||
|
|
@ -215,7 +225,7 @@ class Processor(WorkspaceProcessor):
|
|||
)
|
||||
)
|
||||
|
||||
await self.knowledge_response_producer.send(
|
||||
await producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
|
|
@ -228,7 +238,7 @@ class Processor(WorkspaceProcessor):
|
|||
)
|
||||
)
|
||||
|
||||
await self.knowledge_response_producer.send(
|
||||
await producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -16,9 +16,8 @@ import os
|
|||
from mistralai import Mistral
|
||||
|
||||
from ... schema import Document, TextDocument, Metadata
|
||||
from ... schema import librarian_request_queue, librarian_response_queue
|
||||
from ... schema import Triples
|
||||
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec, LibrarianClient
|
||||
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec, LibrarianSpec
|
||||
|
||||
from ... provenance import (
|
||||
document_uri, page_uri as make_page_uri, derived_entity_triples,
|
||||
|
|
@ -36,9 +35,6 @@ COMPONENT_VERSION = "1.0.0"
|
|||
default_ident = "document-decoder"
|
||||
default_api_key = os.getenv("MISTRAL_TOKEN")
|
||||
|
||||
default_librarian_request_queue = librarian_request_queue
|
||||
default_librarian_response_queue = librarian_response_queue
|
||||
|
||||
pages_per_chunk = 5
|
||||
|
||||
def chunks(lst, n):
|
||||
|
|
@ -98,9 +94,8 @@ class Processor(FlowProcessor):
|
|||
)
|
||||
)
|
||||
|
||||
# Librarian client
|
||||
self.librarian = LibrarianClient(
|
||||
id=id, backend=self.pubsub, taskgroup=self.taskgroup,
|
||||
self.register_specification(
|
||||
LibrarianSpec()
|
||||
)
|
||||
|
||||
if api_key is None:
|
||||
|
|
@ -113,10 +108,6 @@ class Processor(FlowProcessor):
|
|||
|
||||
logger.info("Mistral OCR processor initialized")
|
||||
|
||||
async def start(self):
|
||||
await super(Processor, self).start()
|
||||
await self.librarian.start()
|
||||
|
||||
def ocr(self, blob):
|
||||
"""
|
||||
Run Mistral OCR on a PDF blob, returning per-page markdown strings.
|
||||
|
|
@ -198,9 +189,9 @@ class Processor(FlowProcessor):
|
|||
|
||||
# Check MIME type if fetching from librarian
|
||||
if v.document_id:
|
||||
doc_meta = await self.librarian.fetch_document_metadata(
|
||||
doc_meta = await flow.librarian.fetch_document_metadata(
|
||||
document_id=v.document_id,
|
||||
workspace=flow.workspace,
|
||||
|
||||
)
|
||||
if doc_meta and doc_meta.kind and doc_meta.kind != "application/pdf":
|
||||
logger.error(
|
||||
|
|
@ -213,9 +204,9 @@ class Processor(FlowProcessor):
|
|||
# Get PDF content - fetch from librarian or use inline data
|
||||
if v.document_id:
|
||||
logger.info(f"Fetching document {v.document_id} from librarian...")
|
||||
content = await self.librarian.fetch_document_content(
|
||||
content = await flow.librarian.fetch_document_content(
|
||||
document_id=v.document_id,
|
||||
workspace=flow.workspace,
|
||||
|
||||
)
|
||||
if isinstance(content, str):
|
||||
content = content.encode('utf-8')
|
||||
|
|
@ -240,10 +231,10 @@ class Processor(FlowProcessor):
|
|||
page_content = markdown.encode("utf-8")
|
||||
|
||||
# Save page as child document in librarian
|
||||
await self.librarian.save_child_document(
|
||||
await flow.librarian.save_child_document(
|
||||
doc_id=page_doc_id,
|
||||
parent_id=source_doc_id,
|
||||
workspace=flow.workspace,
|
||||
|
||||
content=page_content,
|
||||
document_type="page",
|
||||
title=f"Page {page_num}",
|
||||
|
|
@ -297,18 +288,6 @@ class Processor(FlowProcessor):
|
|||
help=f'Mistral API Key'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--librarian-request-queue',
|
||||
default=default_librarian_request_queue,
|
||||
help=f'Librarian request queue (default: {default_librarian_request_queue})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--librarian-response-queue',
|
||||
default=default_librarian_response_queue,
|
||||
help=f'Librarian response queue (default: {default_librarian_response_queue})',
|
||||
)
|
||||
|
||||
def run():
|
||||
|
||||
Processor.launch(default_ident, __doc__)
|
||||
|
|
|
|||
|
|
@ -12,9 +12,8 @@ import tempfile
|
|||
import base64
|
||||
import logging
|
||||
from ... schema import Document, TextDocument, Metadata
|
||||
from ... schema import librarian_request_queue, librarian_response_queue
|
||||
from ... schema import Triples
|
||||
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec, LibrarianClient
|
||||
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec, LibrarianSpec
|
||||
|
||||
PyPDFLoader = None
|
||||
|
||||
|
|
@ -32,9 +31,6 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
default_ident = "document-decoder"
|
||||
|
||||
default_librarian_request_queue = librarian_request_queue
|
||||
default_librarian_response_queue = librarian_response_queue
|
||||
|
||||
|
||||
class Processor(FlowProcessor):
|
||||
|
||||
|
|
@ -70,17 +66,12 @@ class Processor(FlowProcessor):
|
|||
)
|
||||
)
|
||||
|
||||
# Librarian client
|
||||
self.librarian = LibrarianClient(
|
||||
id=id, backend=self.pubsub, taskgroup=self.taskgroup,
|
||||
self.register_specification(
|
||||
LibrarianSpec()
|
||||
)
|
||||
|
||||
logger.info("PDF decoder initialized")
|
||||
|
||||
async def start(self):
|
||||
await super(Processor, self).start()
|
||||
await self.librarian.start()
|
||||
|
||||
async def on_message(self, msg, consumer, flow):
|
||||
|
||||
logger.debug("PDF message received")
|
||||
|
|
@ -91,9 +82,9 @@ class Processor(FlowProcessor):
|
|||
|
||||
# Check MIME type if fetching from librarian
|
||||
if v.document_id:
|
||||
doc_meta = await self.librarian.fetch_document_metadata(
|
||||
doc_meta = await flow.librarian.fetch_document_metadata(
|
||||
document_id=v.document_id,
|
||||
workspace=flow.workspace,
|
||||
|
||||
)
|
||||
if doc_meta and doc_meta.kind and doc_meta.kind != "application/pdf":
|
||||
logger.error(
|
||||
|
|
@ -112,9 +103,9 @@ class Processor(FlowProcessor):
|
|||
logger.info(f"Fetching document {v.document_id} from librarian...")
|
||||
fp.close()
|
||||
|
||||
content = await self.librarian.fetch_document_content(
|
||||
content = await flow.librarian.fetch_document_content(
|
||||
document_id=v.document_id,
|
||||
workspace=flow.workspace,
|
||||
|
||||
)
|
||||
|
||||
# Content is base64 encoded
|
||||
|
|
@ -154,10 +145,10 @@ class Processor(FlowProcessor):
|
|||
page_content = page.page_content.encode("utf-8")
|
||||
|
||||
# Save page as child document in librarian
|
||||
await self.librarian.save_child_document(
|
||||
await flow.librarian.save_child_document(
|
||||
doc_id=page_doc_id,
|
||||
parent_id=source_doc_id,
|
||||
workspace=flow.workspace,
|
||||
|
||||
content=page_content,
|
||||
document_type="page",
|
||||
title=f"Page {page_num}",
|
||||
|
|
@ -210,18 +201,6 @@ class Processor(FlowProcessor):
|
|||
def add_args(parser):
|
||||
FlowProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'--librarian-request-queue',
|
||||
default=default_librarian_request_queue,
|
||||
help=f'Librarian request queue (default: {default_librarian_request_queue})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--librarian-response-queue',
|
||||
default=default_librarian_response_queue,
|
||||
help=f'Librarian response queue (default: {default_librarian_response_queue})',
|
||||
)
|
||||
|
||||
def run():
|
||||
|
||||
Processor.launch(default_ident, __doc__)
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ class Processor(WorkspaceProcessor):
|
|||
self.flow_request_queue_base = params.get(
|
||||
"flow_request_queue", default_flow_request_queue
|
||||
)
|
||||
flow_response_queue = params.get(
|
||||
self.flow_response_queue_base = params.get(
|
||||
"flow_response_queue", default_flow_response_queue
|
||||
)
|
||||
|
||||
|
|
@ -54,17 +54,6 @@ class Processor(WorkspaceProcessor):
|
|||
}
|
||||
)
|
||||
|
||||
flow_response_metrics = ProducerMetrics(
|
||||
processor = self.id, flow = None, name = "flow-response"
|
||||
)
|
||||
|
||||
self.flow_response_producer = Producer(
|
||||
backend = self.pubsub,
|
||||
topic = flow_response_queue,
|
||||
schema = FlowResponse,
|
||||
metrics = flow_response_metrics,
|
||||
)
|
||||
|
||||
config_req_metrics = ProducerMetrics(
|
||||
processor=self.id, flow=None, name="config-request",
|
||||
)
|
||||
|
|
@ -96,17 +85,31 @@ class Processor(WorkspaceProcessor):
|
|||
if workspace in self.workspace_consumers:
|
||||
return
|
||||
|
||||
queue = workspace_queue(
|
||||
req_queue = workspace_queue(
|
||||
self.flow_request_queue_base, workspace,
|
||||
)
|
||||
resp_queue = workspace_queue(
|
||||
self.flow_response_queue_base, workspace,
|
||||
)
|
||||
|
||||
await self.pubsub.ensure_topic(queue)
|
||||
await self.pubsub.ensure_topic(req_queue)
|
||||
await self.pubsub.ensure_topic(resp_queue)
|
||||
|
||||
response_producer = Producer(
|
||||
backend=self.pubsub,
|
||||
topic=resp_queue,
|
||||
schema=FlowResponse,
|
||||
metrics=ProducerMetrics(
|
||||
processor=self.id, flow=None,
|
||||
name=f"flow-response-{workspace}",
|
||||
),
|
||||
)
|
||||
|
||||
consumer = Consumer(
|
||||
taskgroup=self.taskgroup,
|
||||
backend=self.pubsub,
|
||||
flow=None,
|
||||
topic=queue,
|
||||
topic=req_queue,
|
||||
subscriber=self.id,
|
||||
schema=FlowRequest,
|
||||
handler=partial(
|
||||
|
|
@ -118,16 +121,22 @@ class Processor(WorkspaceProcessor):
|
|||
),
|
||||
)
|
||||
|
||||
await response_producer.start()
|
||||
await consumer.start()
|
||||
self.workspace_consumers[workspace] = consumer
|
||||
|
||||
self.workspace_consumers[workspace] = {
|
||||
"consumer": consumer,
|
||||
"response": response_producer,
|
||||
}
|
||||
|
||||
logger.info(f"Subscribed to workspace queue: {workspace}")
|
||||
|
||||
async def on_workspace_deleted(self, workspace):
|
||||
|
||||
consumer = self.workspace_consumers.pop(workspace, None)
|
||||
if consumer:
|
||||
await consumer.stop()
|
||||
clients = self.workspace_consumers.pop(workspace, None)
|
||||
if clients:
|
||||
for client in clients.values():
|
||||
await client.stop()
|
||||
logger.info(f"Unsubscribed from workspace queue: {workspace}")
|
||||
|
||||
async def start(self):
|
||||
|
|
@ -149,9 +158,11 @@ class Processor(WorkspaceProcessor):
|
|||
|
||||
logger.debug(f"Handling flow request {id}...")
|
||||
|
||||
producer = self.workspace_consumers[workspace]["response"]
|
||||
|
||||
resp = await self.flow.handle(v, workspace)
|
||||
|
||||
await self.flow_response_producer.send(
|
||||
await producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
|
|
@ -166,7 +177,7 @@ class Processor(WorkspaceProcessor):
|
|||
),
|
||||
)
|
||||
|
||||
await self.flow_response_producer.send(
|
||||
await producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -7,11 +7,11 @@ import logging
|
|||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from ... schema import flow_request_queue
|
||||
from ... schema import librarian_request_queue
|
||||
from ... schema import knowledge_request_queue
|
||||
from ... schema import collection_request_queue
|
||||
from ... schema import config_request_queue
|
||||
from ... schema import flow_request_queue, flow_response_queue
|
||||
from ... schema import librarian_request_queue, librarian_response_queue
|
||||
from ... schema import knowledge_request_queue, knowledge_response_queue
|
||||
from ... schema import collection_request_queue, collection_response_queue
|
||||
from ... schema import config_request_queue, config_response_queue
|
||||
|
||||
from . config import ConfigRequestor
|
||||
from . flow import FlowRequestor
|
||||
|
|
@ -96,6 +96,14 @@ workspace_default_request_queues = {
|
|||
"collection-management": collection_request_queue,
|
||||
}
|
||||
|
||||
workspace_default_response_queues = {
|
||||
"config": config_response_queue,
|
||||
"flow": flow_response_queue,
|
||||
"librarian": librarian_response_queue,
|
||||
"knowledge": knowledge_response_queue,
|
||||
"collection-management": collection_response_queue,
|
||||
}
|
||||
|
||||
global_dispatchers = {**system_dispatchers, **workspace_dispatchers}
|
||||
|
||||
sender_dispatchers = {
|
||||
|
|
@ -267,11 +275,16 @@ class DispatcherManager:
|
|||
response_queue = self.queue_overrides[kind].get("response")
|
||||
|
||||
if kind in workspace_dispatchers and workspace:
|
||||
base_queue = (
|
||||
base_req_queue = (
|
||||
request_queue
|
||||
or workspace_default_request_queues[kind]
|
||||
)
|
||||
request_queue = f"{base_queue}:{workspace}"
|
||||
request_queue = f"{base_req_queue}:{workspace}"
|
||||
base_resp_queue = (
|
||||
response_queue
|
||||
or workspace_default_response_queues[kind]
|
||||
)
|
||||
response_queue = f"{base_resp_queue}:{workspace}"
|
||||
consumer_name = f"{self.prefix}-{kind}-{workspace}"
|
||||
else:
|
||||
consumer_name = f"{self.prefix}-{kind}-request"
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ class Processor(WorkspaceProcessor):
|
|||
"librarian_request_queue", default_librarian_request_queue
|
||||
)
|
||||
|
||||
librarian_response_queue = params.get(
|
||||
self.librarian_response_queue_base = params.get(
|
||||
"librarian_response_queue", default_librarian_response_queue
|
||||
)
|
||||
|
||||
|
|
@ -77,7 +77,7 @@ class Processor(WorkspaceProcessor):
|
|||
"collection_request_queue", default_collection_request_queue
|
||||
)
|
||||
|
||||
collection_response_queue = params.get(
|
||||
self.collection_response_queue_base = params.get(
|
||||
"collection_response_queue", default_collection_response_queue
|
||||
)
|
||||
|
||||
|
|
@ -132,9 +132,9 @@ class Processor(WorkspaceProcessor):
|
|||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
"librarian_request_queue": self.librarian_request_queue_base,
|
||||
"librarian_response_queue": librarian_response_queue,
|
||||
"librarian_response_queue": self.librarian_response_queue_base,
|
||||
"collection_request_queue": self.collection_request_queue_base,
|
||||
"collection_response_queue": collection_response_queue,
|
||||
"collection_response_queue": self.collection_response_queue_base,
|
||||
"object_store_endpoint": object_store_endpoint,
|
||||
"object_store_access_key": object_store_access_key,
|
||||
"cassandra_host": self.cassandra_host,
|
||||
|
|
@ -143,28 +143,6 @@ class Processor(WorkspaceProcessor):
|
|||
}
|
||||
)
|
||||
|
||||
librarian_response_metrics = ProducerMetrics(
|
||||
processor = self.id, flow = None, name = "librarian-response"
|
||||
)
|
||||
|
||||
collection_response_metrics = ProducerMetrics(
|
||||
processor = self.id, flow = None, name = "collection-response"
|
||||
)
|
||||
|
||||
self.librarian_response_producer = Producer(
|
||||
backend = self.pubsub,
|
||||
topic = librarian_response_queue,
|
||||
schema = LibrarianResponse,
|
||||
metrics = librarian_response_metrics,
|
||||
)
|
||||
|
||||
self.collection_response_producer = Producer(
|
||||
backend = self.pubsub,
|
||||
topic = collection_response_queue,
|
||||
schema = CollectionManagementResponse,
|
||||
metrics = collection_response_metrics,
|
||||
)
|
||||
|
||||
# Config service client for collection management
|
||||
config_request_metrics = ProducerMetrics(
|
||||
processor = id, flow = None, name = "config-request"
|
||||
|
|
@ -230,21 +208,49 @@ class Processor(WorkspaceProcessor):
|
|||
if workspace in self.workspace_consumers:
|
||||
return
|
||||
|
||||
lib_queue = workspace_queue(
|
||||
lib_req_queue = workspace_queue(
|
||||
self.librarian_request_queue_base, workspace,
|
||||
)
|
||||
col_queue = workspace_queue(
|
||||
lib_resp_queue = workspace_queue(
|
||||
self.librarian_response_queue_base, workspace,
|
||||
)
|
||||
col_req_queue = workspace_queue(
|
||||
self.collection_request_queue_base, workspace,
|
||||
)
|
||||
col_resp_queue = workspace_queue(
|
||||
self.collection_response_queue_base, workspace,
|
||||
)
|
||||
|
||||
await self.pubsub.ensure_topic(lib_queue)
|
||||
await self.pubsub.ensure_topic(col_queue)
|
||||
await self.pubsub.ensure_topic(lib_req_queue)
|
||||
await self.pubsub.ensure_topic(lib_resp_queue)
|
||||
await self.pubsub.ensure_topic(col_req_queue)
|
||||
await self.pubsub.ensure_topic(col_resp_queue)
|
||||
|
||||
lib_response_producer = Producer(
|
||||
backend=self.pubsub,
|
||||
topic=lib_resp_queue,
|
||||
schema=LibrarianResponse,
|
||||
metrics=ProducerMetrics(
|
||||
processor=self.id, flow=None,
|
||||
name=f"librarian-response-{workspace}",
|
||||
),
|
||||
)
|
||||
|
||||
col_response_producer = Producer(
|
||||
backend=self.pubsub,
|
||||
topic=col_resp_queue,
|
||||
schema=CollectionManagementResponse,
|
||||
metrics=ProducerMetrics(
|
||||
processor=self.id, flow=None,
|
||||
name=f"collection-response-{workspace}",
|
||||
),
|
||||
)
|
||||
|
||||
lib_consumer = Consumer(
|
||||
taskgroup=self.taskgroup,
|
||||
backend=self.pubsub,
|
||||
flow=None,
|
||||
topic=lib_queue,
|
||||
topic=lib_req_queue,
|
||||
subscriber=self.id,
|
||||
schema=LibrarianRequest,
|
||||
handler=partial(
|
||||
|
|
@ -260,7 +266,7 @@ class Processor(WorkspaceProcessor):
|
|||
taskgroup=self.taskgroup,
|
||||
backend=self.pubsub,
|
||||
flow=None,
|
||||
topic=col_queue,
|
||||
topic=col_req_queue,
|
||||
subscriber=self.id,
|
||||
schema=CollectionManagementRequest,
|
||||
handler=partial(
|
||||
|
|
@ -272,29 +278,31 @@ class Processor(WorkspaceProcessor):
|
|||
),
|
||||
)
|
||||
|
||||
await lib_response_producer.start()
|
||||
await col_response_producer.start()
|
||||
await lib_consumer.start()
|
||||
await col_consumer.start()
|
||||
|
||||
self.workspace_consumers[workspace] = {
|
||||
"librarian": lib_consumer,
|
||||
"librarian-response": lib_response_producer,
|
||||
"collection": col_consumer,
|
||||
"collection-response": col_response_producer,
|
||||
}
|
||||
|
||||
logger.info(f"Subscribed to workspace queues: {workspace}")
|
||||
|
||||
async def on_workspace_deleted(self, workspace):
|
||||
|
||||
consumers = self.workspace_consumers.pop(workspace, None)
|
||||
if consumers:
|
||||
for consumer in consumers.values():
|
||||
await consumer.stop()
|
||||
clients = self.workspace_consumers.pop(workspace, None)
|
||||
if clients:
|
||||
for client in clients.values():
|
||||
await client.stop()
|
||||
logger.info(f"Unsubscribed from workspace queues: {workspace}")
|
||||
|
||||
async def start(self):
|
||||
|
||||
await super(Processor, self).start()
|
||||
await self.librarian_response_producer.start()
|
||||
await self.collection_response_producer.start()
|
||||
await self.config_request_producer.start()
|
||||
await self.config_response_consumer.start()
|
||||
|
||||
|
|
@ -505,12 +513,14 @@ class Processor(WorkspaceProcessor):
|
|||
|
||||
logger.info(f"Handling librarian input {id}...")
|
||||
|
||||
producer = self.workspace_consumers[workspace]["librarian-response"]
|
||||
|
||||
try:
|
||||
|
||||
# Handle streaming operations specially
|
||||
if v.operation == "stream-document":
|
||||
async for resp in self.librarian.stream_document(v, workspace):
|
||||
await self.librarian_response_producer.send(
|
||||
await producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
return
|
||||
|
|
@ -518,7 +528,7 @@ class Processor(WorkspaceProcessor):
|
|||
# Non-streaming operations
|
||||
resp = await self.process_request(v, workspace)
|
||||
|
||||
await self.librarian_response_producer.send(
|
||||
await producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
|
|
@ -532,7 +542,7 @@ class Processor(WorkspaceProcessor):
|
|||
),
|
||||
)
|
||||
|
||||
await self.librarian_response_producer.send(
|
||||
await producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
|
|
@ -545,7 +555,7 @@ class Processor(WorkspaceProcessor):
|
|||
),
|
||||
)
|
||||
|
||||
await self.librarian_response_producer.send(
|
||||
await producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
|
|
@ -576,9 +586,11 @@ class Processor(WorkspaceProcessor):
|
|||
|
||||
logger.info(f"Handling collection request {id}...")
|
||||
|
||||
producer = self.workspace_consumers[workspace]["collection-response"]
|
||||
|
||||
try:
|
||||
resp = await self.process_collection_request(v, workspace)
|
||||
await self.collection_response_producer.send(
|
||||
await producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
except RequestError as e:
|
||||
|
|
@ -589,7 +601,7 @@ class Processor(WorkspaceProcessor):
|
|||
),
|
||||
timestamp=datetime.now().isoformat()
|
||||
)
|
||||
await self.collection_response_producer.send(
|
||||
await producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
except Exception as e:
|
||||
|
|
@ -600,7 +612,7 @@ class Processor(WorkspaceProcessor):
|
|||
),
|
||||
timestamp=datetime.now().isoformat()
|
||||
)
|
||||
await self.collection_response_producer.send(
|
||||
await producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -4,21 +4,16 @@ Simple RAG service, performs query using document RAG an LLM.
|
|||
Input is query, output is response.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import logging
|
||||
|
||||
import uuid
|
||||
|
||||
from ... schema import DocumentRagQuery, DocumentRagResponse, Error
|
||||
from ... schema import LibrarianRequest, LibrarianResponse, DocumentMetadata
|
||||
from ... schema import Triples, Metadata
|
||||
from ... provenance import GRAPH_RETRIEVAL
|
||||
from . document_rag import DocumentRag
|
||||
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
from ... base import PromptClientSpec, EmbeddingsClientSpec
|
||||
from ... base import DocumentEmbeddingsClientSpec
|
||||
from ... base import LibrarianClient
|
||||
from ... base import LibrarianSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -85,58 +80,14 @@ class Processor(FlowProcessor):
|
|||
)
|
||||
)
|
||||
|
||||
# Librarian client
|
||||
self.librarian = LibrarianClient(
|
||||
id=id,
|
||||
backend=self.pubsub,
|
||||
taskgroup=self.taskgroup,
|
||||
self.register_specification(
|
||||
LibrarianSpec()
|
||||
)
|
||||
|
||||
async def start(self):
|
||||
await super(Processor, self).start()
|
||||
await self.librarian.start()
|
||||
|
||||
async def fetch_chunk_content(self, chunk_id, workspace, timeout=120):
|
||||
"""Fetch chunk content from librarian. Chunks are small so
|
||||
single request-response is fine."""
|
||||
return await self.librarian.fetch_document_text(
|
||||
document_id=chunk_id, workspace=workspace, timeout=timeout,
|
||||
)
|
||||
|
||||
async def save_answer_content(self, doc_id, workspace, content, title=None, timeout=120):
|
||||
"""Save answer content to the librarian."""
|
||||
|
||||
doc_metadata = DocumentMetadata(
|
||||
id=doc_id,
|
||||
workspace=workspace,
|
||||
kind="text/plain",
|
||||
title=title or "DocumentRAG Answer",
|
||||
document_type="answer",
|
||||
)
|
||||
|
||||
request = LibrarianRequest(
|
||||
operation="add-document",
|
||||
document_id=doc_id,
|
||||
document_metadata=doc_metadata,
|
||||
content=base64.b64encode(content.encode("utf-8")).decode("utf-8"),
|
||||
workspace=workspace,
|
||||
)
|
||||
|
||||
await self.librarian.request(request, timeout=timeout)
|
||||
return doc_id
|
||||
|
||||
async def on_request(self, msg, consumer, flow):
|
||||
|
||||
try:
|
||||
|
||||
self.rag = DocumentRag(
|
||||
embeddings_client = flow("embeddings-request"),
|
||||
doc_embeddings_client = flow("document-embeddings-request"),
|
||||
prompt_client = flow("prompt-request"),
|
||||
fetch_chunk = self.fetch_chunk_content,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
v = msg.value()
|
||||
|
||||
# Sender-produced ID
|
||||
|
|
@ -144,15 +95,25 @@ class Processor(FlowProcessor):
|
|||
|
||||
logger.info(f"Handling input {id}...")
|
||||
|
||||
async def fetch_chunk(chunk_id, timeout=120):
|
||||
return await flow.librarian.fetch_document_text(
|
||||
document_id=chunk_id, timeout=timeout,
|
||||
)
|
||||
|
||||
self.rag = DocumentRag(
|
||||
embeddings_client = flow("embeddings-request"),
|
||||
doc_embeddings_client = flow("document-embeddings-request"),
|
||||
prompt_client = flow("prompt-request"),
|
||||
fetch_chunk = fetch_chunk,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
if v.doc_limit:
|
||||
doc_limit = v.doc_limit
|
||||
else:
|
||||
doc_limit = self.doc_limit
|
||||
|
||||
# Real-time explainability callback - emits triples and IDs as they're generated
|
||||
# Triples are stored in the request's collection with a named graph (urn:graph:retrieval)
|
||||
async def send_explainability(triples, explain_id):
|
||||
# Send triples to explainability queue - stores in same collection with named graph
|
||||
await flow("explainability").send(Triples(
|
||||
metadata=Metadata(
|
||||
id=explain_id,
|
||||
|
|
@ -161,7 +122,6 @@ class Processor(FlowProcessor):
|
|||
triples=triples,
|
||||
))
|
||||
|
||||
# Send explain data to response queue
|
||||
await flow("response").send(
|
||||
DocumentRagResponse(
|
||||
response=None,
|
||||
|
|
@ -173,13 +133,12 @@ class Processor(FlowProcessor):
|
|||
properties={"id": id}
|
||||
)
|
||||
|
||||
# Callback to save answer content to librarian
|
||||
async def save_answer(doc_id, answer_text):
|
||||
await self.save_answer_content(
|
||||
await flow.librarian.save_document(
|
||||
doc_id=doc_id,
|
||||
workspace=flow.workspace,
|
||||
content=answer_text,
|
||||
title=f"DocumentRAG Answer: {v.query[:50]}...",
|
||||
document_type="answer",
|
||||
)
|
||||
|
||||
# Check if streaming is requested
|
||||
|
|
|
|||
|
|
@ -4,29 +4,22 @@ Simple RAG service, performs query using graph RAG an LLM.
|
|||
Input is query, output is response.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
from ... schema import GraphRagQuery, GraphRagResponse, Error
|
||||
from ... schema import Triples, Metadata
|
||||
from ... schema import LibrarianRequest, LibrarianResponse, DocumentMetadata
|
||||
from ... schema import librarian_request_queue, librarian_response_queue
|
||||
from ... provenance import GRAPH_RETRIEVAL
|
||||
from . graph_rag import GraphRag
|
||||
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
from ... base import PromptClientSpec, EmbeddingsClientSpec
|
||||
from ... base import GraphEmbeddingsClientSpec, TriplesClientSpec
|
||||
from ... base import Consumer, Producer, ConsumerMetrics, ProducerMetrics
|
||||
from ... base import LibrarianSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "graph-rag"
|
||||
default_concurrency = 1
|
||||
default_librarian_request_queue = librarian_request_queue
|
||||
default_librarian_response_queue = librarian_response_queue
|
||||
|
||||
class Processor(FlowProcessor):
|
||||
|
||||
|
|
@ -117,115 +110,12 @@ class Processor(FlowProcessor):
|
|||
)
|
||||
)
|
||||
|
||||
# Librarian client for storing answer content
|
||||
librarian_request_q = params.get(
|
||||
"librarian_request_queue", default_librarian_request_queue
|
||||
self.register_specification(
|
||||
LibrarianSpec()
|
||||
)
|
||||
librarian_response_q = params.get(
|
||||
"librarian_response_queue", default_librarian_response_queue
|
||||
)
|
||||
|
||||
librarian_request_metrics = ProducerMetrics(
|
||||
processor=id, flow=None, name="librarian-request"
|
||||
)
|
||||
|
||||
self.librarian_request_producer = Producer(
|
||||
backend=self.pubsub,
|
||||
topic=librarian_request_q,
|
||||
schema=LibrarianRequest,
|
||||
metrics=librarian_request_metrics,
|
||||
)
|
||||
|
||||
librarian_response_metrics = ConsumerMetrics(
|
||||
processor=id, flow=None, name="librarian-response"
|
||||
)
|
||||
|
||||
self.librarian_response_consumer = Consumer(
|
||||
taskgroup=self.taskgroup,
|
||||
backend=self.pubsub,
|
||||
flow=None,
|
||||
topic=librarian_response_q,
|
||||
subscriber=f"{id}-librarian",
|
||||
schema=LibrarianResponse,
|
||||
handler=self.on_librarian_response,
|
||||
metrics=librarian_response_metrics,
|
||||
)
|
||||
|
||||
# Pending librarian requests: request_id -> asyncio.Future
|
||||
self.pending_librarian_requests = {}
|
||||
|
||||
logger.info("Graph RAG service initialized")
|
||||
|
||||
async def start(self):
|
||||
await super(Processor, self).start()
|
||||
await self.librarian_request_producer.start()
|
||||
await self.librarian_response_consumer.start()
|
||||
|
||||
async def on_librarian_response(self, msg, consumer, flow):
|
||||
"""Handle responses from the librarian service."""
|
||||
response = msg.value()
|
||||
request_id = msg.properties().get("id")
|
||||
|
||||
if request_id and request_id in self.pending_librarian_requests:
|
||||
future = self.pending_librarian_requests.pop(request_id)
|
||||
future.set_result(response)
|
||||
|
||||
async def save_answer_content(self, doc_id, workspace, content, title=None, timeout=120):
|
||||
"""
|
||||
Save answer content to the librarian.
|
||||
|
||||
Args:
|
||||
doc_id: ID for the answer document
|
||||
workspace: Workspace for isolation
|
||||
content: Answer text content
|
||||
title: Optional title
|
||||
timeout: Request timeout in seconds
|
||||
|
||||
Returns:
|
||||
The document ID on success
|
||||
"""
|
||||
request_id = str(uuid.uuid4())
|
||||
|
||||
doc_metadata = DocumentMetadata(
|
||||
id=doc_id,
|
||||
workspace=workspace,
|
||||
kind="text/plain",
|
||||
title=title or "GraphRAG Answer",
|
||||
document_type="answer",
|
||||
)
|
||||
|
||||
request = LibrarianRequest(
|
||||
operation="add-document",
|
||||
document_id=doc_id,
|
||||
document_metadata=doc_metadata,
|
||||
content=base64.b64encode(content.encode("utf-8")).decode("utf-8"),
|
||||
workspace=workspace,
|
||||
)
|
||||
|
||||
# Create future for response
|
||||
future = asyncio.get_event_loop().create_future()
|
||||
self.pending_librarian_requests[request_id] = future
|
||||
|
||||
try:
|
||||
# Send request
|
||||
await self.librarian_request_producer.send(
|
||||
request, properties={"id": request_id}
|
||||
)
|
||||
|
||||
# Wait for response
|
||||
response = await asyncio.wait_for(future, timeout=timeout)
|
||||
|
||||
if response.error:
|
||||
raise RuntimeError(
|
||||
f"Librarian error saving answer: {response.error.type}: {response.error.message}"
|
||||
)
|
||||
|
||||
return doc_id
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
self.pending_librarian_requests.pop(request_id, None)
|
||||
raise RuntimeError(f"Timeout saving answer document {doc_id}")
|
||||
|
||||
async def on_request(self, msg, consumer, flow):
|
||||
|
||||
try:
|
||||
|
|
@ -306,13 +196,12 @@ class Processor(FlowProcessor):
|
|||
else:
|
||||
edge_limit = self.default_edge_limit
|
||||
|
||||
# Callback to save answer content to librarian
|
||||
async def save_answer(doc_id, answer_text):
|
||||
await self.save_answer_content(
|
||||
await flow.librarian.save_document(
|
||||
doc_id=doc_id,
|
||||
workspace=flow.workspace,
|
||||
content=answer_text,
|
||||
title=f"GraphRAG Answer: {v.query[:50]}...",
|
||||
document_type="answer",
|
||||
)
|
||||
|
||||
# Check if streaming is requested
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue