Merge branch 'release/v2.3'

This commit is contained in:
Cyber MacGeddon 2026-04-16 09:13:52 +01:00
commit 1f30a3bcea
155 changed files with 6526 additions and 1885 deletions

View file

@ -15,6 +15,7 @@ dependencies = [
"requests",
"python-logging-loki",
"pika",
"pyyaml",
]
classifiers = [
"Programming Language :: Python :: 3",
@ -24,6 +25,9 @@ classifiers = [
[project.urls]
Homepage = "https://github.com/trustgraph-ai/trustgraph"
[project.scripts]
processor-group = "trustgraph.base.processor_group:run"
[tool.setuptools.packages.find]
include = ["trustgraph*"]
@ -31,4 +35,4 @@ include = ["trustgraph*"]
"trustgraph.i18n.packs" = ["*.json"]
[tool.setuptools.dynamic]
version = {attr = "trustgraph.base_version.__version__"}
version = {attr = "trustgraph.base_version.__version__"}

View file

@ -107,6 +107,7 @@ from .types import (
AgentObservation,
AgentAnswer,
RAGChunk,
TextCompletionResult,
ProvenanceEvent,
)
@ -185,6 +186,7 @@ __all__ = [
"AgentObservation",
"AgentAnswer",
"RAGChunk",
"TextCompletionResult",
"ProvenanceEvent",
# Exceptions

View file

@ -14,6 +14,8 @@ import aiohttp
import json
from typing import Optional, Dict, Any, List
from . types import TextCompletionResult
from . exceptions import ProtocolException, ApplicationException
@ -434,12 +436,11 @@ class AsyncFlowInstance:
return await self.request("agent", request_data)
async def text_completion(self, system: str, prompt: str, **kwargs: Any) -> str:
async def text_completion(self, system: str, prompt: str, **kwargs: Any) -> TextCompletionResult:
"""
Generate text completion (non-streaming).
Generates a text response from an LLM given a system prompt and user prompt.
Returns the complete response text.
Note: This method does not support streaming. For streaming text generation,
use AsyncSocketFlowInstance.text_completion() instead.
@ -450,19 +451,19 @@ class AsyncFlowInstance:
**kwargs: Additional service-specific parameters
Returns:
str: Complete generated text response
TextCompletionResult: Result with text, in_token, out_token, model
Example:
```python
async_flow = await api.async_flow()
flow = async_flow.id("default")
# Generate text
response = await flow.text_completion(
result = await flow.text_completion(
system="You are a helpful assistant.",
prompt="Explain quantum computing in simple terms."
)
print(response)
print(result.text)
print(f"Tokens: {result.in_token} in, {result.out_token} out")
```
"""
request_data = {
@ -473,7 +474,12 @@ class AsyncFlowInstance:
request_data.update(kwargs)
result = await self.request("text-completion", request_data)
return result.get("response", "")
return TextCompletionResult(
text=result.get("response", ""),
in_token=result.get("in_token"),
out_token=result.get("out_token"),
model=result.get("model"),
)
async def graph_rag(self, query: str, user: str, collection: str,
max_subgraph_size: int = 1000, max_subgraph_count: int = 5,

View file

@ -4,7 +4,7 @@ import asyncio
import websockets
from typing import Optional, Dict, Any, AsyncIterator, Union
from . types import AgentThought, AgentObservation, AgentAnswer, RAGChunk
from . types import AgentThought, AgentObservation, AgentAnswer, RAGChunk, TextCompletionResult
from . exceptions import ProtocolException, ApplicationException
@ -178,30 +178,32 @@ class AsyncSocketClient:
def _parse_chunk(self, resp: Dict[str, Any]):
"""Parse response chunk into appropriate type. Returns None for non-content messages."""
chunk_type = resp.get("chunk_type")
message_type = resp.get("message_type")
# Handle new GraphRAG message format with message_type
if message_type == "provenance":
return None
if chunk_type == "thought":
if message_type == "thought":
return AgentThought(
content=resp.get("content", ""),
end_of_message=resp.get("end_of_message", False)
)
elif chunk_type == "observation":
elif message_type == "observation":
return AgentObservation(
content=resp.get("content", ""),
end_of_message=resp.get("end_of_message", False)
)
elif chunk_type == "answer" or chunk_type == "final-answer":
elif message_type == "answer" or message_type == "final-answer":
return AgentAnswer(
content=resp.get("content", ""),
end_of_message=resp.get("end_of_message", False),
end_of_dialog=resp.get("end_of_dialog", False)
end_of_dialog=resp.get("end_of_dialog", False),
in_token=resp.get("in_token"),
out_token=resp.get("out_token"),
model=resp.get("model"),
)
elif chunk_type == "action":
elif message_type == "action":
return AgentThought(
content=resp.get("content", ""),
end_of_message=resp.get("end_of_message", False)
@ -211,7 +213,10 @@ class AsyncSocketClient:
return RAGChunk(
content=content,
end_of_stream=resp.get("end_of_stream", False),
error=None
error=None,
in_token=resp.get("in_token"),
out_token=resp.get("out_token"),
model=resp.get("model"),
)
async def aclose(self):
@ -269,7 +274,11 @@ class AsyncSocketFlowInstance:
return await self.client._send_request("agent", self.flow_id, request)
async def text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs):
"""Text completion with optional streaming"""
"""Text completion with optional streaming.
Non-streaming: returns a TextCompletionResult with text and token counts.
Streaming: returns an async iterator of RAGChunk (with token counts on the final chunk).
"""
request = {
"system": system,
"prompt": prompt,
@ -281,13 +290,18 @@ class AsyncSocketFlowInstance:
return self._text_completion_streaming(request)
else:
result = await self.client._send_request("text-completion", self.flow_id, request)
return result.get("response", "")
return TextCompletionResult(
text=result.get("response", ""),
in_token=result.get("in_token"),
out_token=result.get("out_token"),
model=result.get("model"),
)
async def _text_completion_streaming(self, request):
"""Helper for streaming text completion"""
"""Helper for streaming text completion. Yields RAGChunk objects."""
async for chunk in self.client._send_request_streaming("text-completion", self.flow_id, request):
if hasattr(chunk, 'content'):
yield chunk.content
if isinstance(chunk, RAGChunk):
yield chunk
async def graph_rag(self, query: str, user: str, collection: str,
max_subgraph_size: int = 1000, max_subgraph_count: int = 5,

View file

@ -11,7 +11,7 @@ import base64
from .. knowledge import hash, Uri, Literal, QuotedTriple
from .. schema import IRI, LITERAL, TRIPLE
from . types import Triple
from . types import Triple, TextCompletionResult
from . exceptions import ProtocolException
@ -360,16 +360,17 @@ class FlowInstance:
prompt: User prompt/question
Returns:
str: Generated response text
TextCompletionResult: Result with text, in_token, out_token, model
Example:
```python
flow = api.flow().id("default")
response = flow.text_completion(
result = flow.text_completion(
system="You are a helpful assistant",
prompt="What is quantum computing?"
)
print(response)
print(result.text)
print(f"Tokens: {result.in_token} in, {result.out_token} out")
```
"""
@ -379,10 +380,17 @@ class FlowInstance:
"prompt": prompt
}
return self.request(
result = self.request(
"service/text-completion",
input
)["response"]
)
return TextCompletionResult(
text=result.get("response", ""),
in_token=result.get("in_token"),
out_token=result.get("out_token"),
model=result.get("model"),
)
def agent(self, question, user="trustgraph", state=None, group=None, history=None):
"""
@ -498,10 +506,17 @@ class FlowInstance:
"edge-limit": edge_limit,
}
return self.request(
result = self.request(
"service/graph-rag",
input
)["response"]
)
return TextCompletionResult(
text=result.get("response", ""),
in_token=result.get("in_token"),
out_token=result.get("out_token"),
model=result.get("model"),
)
def document_rag(
self, query, user="trustgraph", collection="default",
@ -543,10 +558,17 @@ class FlowInstance:
"doc-limit": doc_limit,
}
return self.request(
result = self.request(
"service/document-rag",
input
)["response"]
)
return TextCompletionResult(
text=result.get("response", ""),
in_token=result.get("in_token"),
out_token=result.get("out_token"),
model=result.get("model"),
)
def embeddings(self, texts):
"""

View file

@ -14,7 +14,7 @@ import websockets
from typing import Optional, Dict, Any, Iterator, Union, List
from threading import Lock
from . types import AgentThought, AgentObservation, AgentAnswer, RAGChunk, StreamingChunk, ProvenanceEvent
from . types import AgentThought, AgentObservation, AgentAnswer, RAGChunk, StreamingChunk, ProvenanceEvent, TextCompletionResult
from . exceptions import ProtocolException, raise_from_error_dict
@ -360,41 +360,36 @@ class SocketClient:
def _parse_chunk(self, resp: Dict[str, Any], include_provenance: bool = False) -> Optional[StreamingChunk]:
"""Parse response chunk into appropriate type. Returns None for non-content messages."""
chunk_type = resp.get("chunk_type")
message_type = resp.get("message_type")
# Handle GraphRAG/DocRAG message format with message_type
if message_type == "explain":
if include_provenance:
return self._build_provenance_event(resp)
return None
# Handle Agent message format with chunk_type="explain"
if chunk_type == "explain":
if include_provenance:
return self._build_provenance_event(resp)
return None
if chunk_type == "thought":
if message_type == "thought":
return AgentThought(
content=resp.get("content", ""),
end_of_message=resp.get("end_of_message", False),
message_id=resp.get("message_id", ""),
)
elif chunk_type == "observation":
elif message_type == "observation":
return AgentObservation(
content=resp.get("content", ""),
end_of_message=resp.get("end_of_message", False),
message_id=resp.get("message_id", ""),
)
elif chunk_type == "answer" or chunk_type == "final-answer":
elif message_type == "answer" or message_type == "final-answer":
return AgentAnswer(
content=resp.get("content", ""),
end_of_message=resp.get("end_of_message", False),
end_of_dialog=resp.get("end_of_dialog", False),
message_id=resp.get("message_id", ""),
in_token=resp.get("in_token"),
out_token=resp.get("out_token"),
model=resp.get("model"),
)
elif chunk_type == "action":
elif message_type == "action":
return AgentThought(
content=resp.get("content", ""),
end_of_message=resp.get("end_of_message", False)
@ -404,7 +399,10 @@ class SocketClient:
return RAGChunk(
content=content,
end_of_stream=resp.get("end_of_stream", False),
error=None
error=None,
in_token=resp.get("in_token"),
out_token=resp.get("out_token"),
model=resp.get("model"),
)
def _build_provenance_event(self, resp: Dict[str, Any]) -> ProvenanceEvent:
@ -543,8 +541,12 @@ class SocketFlowInstance:
streaming=True, include_provenance=True
)
def text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs) -> Union[str, Iterator[str]]:
"""Execute text completion with optional streaming."""
def text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
"""Execute text completion with optional streaming.
Non-streaming: returns a TextCompletionResult with text and token counts.
Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
"""
request = {
"system": system,
"prompt": prompt,
@ -557,12 +559,17 @@ class SocketFlowInstance:
if streaming:
return self._text_completion_generator(result)
else:
return result.get("response", "")
return TextCompletionResult(
text=result.get("response", ""),
in_token=result.get("in_token"),
out_token=result.get("out_token"),
model=result.get("model"),
)
def _text_completion_generator(self, result: Iterator[StreamingChunk]) -> Iterator[str]:
def _text_completion_generator(self, result: Iterator[StreamingChunk]) -> Iterator[RAGChunk]:
for chunk in result:
if hasattr(chunk, 'content'):
yield chunk.content
if isinstance(chunk, RAGChunk):
yield chunk
def graph_rag(
self,
@ -577,8 +584,12 @@ class SocketFlowInstance:
edge_limit: int = 25,
streaming: bool = False,
**kwargs: Any
) -> Union[str, Iterator[str]]:
"""Execute graph-based RAG query with optional streaming."""
) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
"""Execute graph-based RAG query with optional streaming.
Non-streaming: returns a TextCompletionResult with text and token counts.
Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
"""
request = {
"query": query,
"user": user,
@ -598,7 +609,12 @@ class SocketFlowInstance:
if streaming:
return self._rag_generator(result)
else:
return result.get("response", "")
return TextCompletionResult(
text=result.get("response", ""),
in_token=result.get("in_token"),
out_token=result.get("out_token"),
model=result.get("model"),
)
def graph_rag_explain(
self,
@ -642,8 +658,12 @@ class SocketFlowInstance:
doc_limit: int = 10,
streaming: bool = False,
**kwargs: Any
) -> Union[str, Iterator[str]]:
"""Execute document-based RAG query with optional streaming."""
) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
"""Execute document-based RAG query with optional streaming.
Non-streaming: returns a TextCompletionResult with text and token counts.
Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
"""
request = {
"query": query,
"user": user,
@ -658,7 +678,12 @@ class SocketFlowInstance:
if streaming:
return self._rag_generator(result)
else:
return result.get("response", "")
return TextCompletionResult(
text=result.get("response", ""),
in_token=result.get("in_token"),
out_token=result.get("out_token"),
model=result.get("model"),
)
def document_rag_explain(
self,
@ -684,10 +709,10 @@ class SocketFlowInstance:
streaming=True, include_provenance=True
)
def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[str]:
def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[RAGChunk]:
for chunk in result:
if hasattr(chunk, 'content'):
yield chunk.content
if isinstance(chunk, RAGChunk):
yield chunk
def prompt(
self,
@ -695,8 +720,12 @@ class SocketFlowInstance:
variables: Dict[str, str],
streaming: bool = False,
**kwargs: Any
) -> Union[str, Iterator[str]]:
"""Execute a prompt template with optional streaming."""
) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
"""Execute a prompt template with optional streaming.
Non-streaming: returns a TextCompletionResult with text and token counts.
Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
"""
request = {
"id": id,
"variables": variables,
@ -709,7 +738,12 @@ class SocketFlowInstance:
if streaming:
return self._rag_generator(result)
else:
return result.get("response", "")
return TextCompletionResult(
text=result.get("text", result.get("response", "")),
in_token=result.get("in_token"),
out_token=result.get("out_token"),
model=result.get("model"),
)
def graph_embeddings_query(
self,

View file

@ -149,10 +149,10 @@ class AgentThought(StreamingChunk):
Attributes:
content: Agent's thought text
end_of_message: True if this completes the current thought
chunk_type: Always "thought"
message_type: Always "thought"
message_id: Provenance URI of the entity being built
"""
chunk_type: str = "thought"
message_type: str = "thought"
message_id: str = ""
@dataclasses.dataclass
@ -166,10 +166,10 @@ class AgentObservation(StreamingChunk):
Attributes:
content: Observation text describing tool results
end_of_message: True if this completes the current observation
chunk_type: Always "observation"
message_type: Always "observation"
message_id: Provenance URI of the entity being built
"""
chunk_type: str = "observation"
message_type: str = "observation"
message_id: str = ""
@dataclasses.dataclass
@ -184,11 +184,14 @@ class AgentAnswer(StreamingChunk):
content: Answer text
end_of_message: True if this completes the current answer segment
end_of_dialog: True if this completes the entire agent interaction
chunk_type: Always "final-answer"
message_type: Always "final-answer"
"""
chunk_type: str = "final-answer"
message_type: str = "final-answer"
end_of_dialog: bool = False
message_id: str = ""
in_token: Optional[int] = None
out_token: Optional[int] = None
model: Optional[str] = None
@dataclasses.dataclass
class RAGChunk(StreamingChunk):
@ -202,11 +205,37 @@ class RAGChunk(StreamingChunk):
content: Generated text content
end_of_stream: True if this is the final chunk of the stream
error: Optional error information if an error occurred
chunk_type: Always "rag"
in_token: Input token count (populated on the final chunk, 0 otherwise)
out_token: Output token count (populated on the final chunk, 0 otherwise)
model: Model identifier (populated on the final chunk, empty otherwise)
message_type: Always "rag"
"""
chunk_type: str = "rag"
message_type: str = "rag"
end_of_stream: bool = False
error: Optional[Dict[str, str]] = None
in_token: Optional[int] = None
out_token: Optional[int] = None
model: Optional[str] = None
@dataclasses.dataclass
class TextCompletionResult:
"""
Result from a text completion request.
Returned by text_completion() in both streaming and non-streaming modes.
In streaming mode, text is None (chunks are delivered via the iterator).
In non-streaming mode, text contains the complete response.
Attributes:
text: Complete response text (None in streaming mode)
in_token: Input token count (None if not available)
out_token: Output token count (None if not available)
model: Model identifier (None if not available)
"""
text: Optional[str]
in_token: Optional[int] = None
out_token: Optional[int] = None
model: Optional[str] = None
@dataclasses.dataclass
class ProvenanceEvent:

View file

@ -18,8 +18,10 @@ from . librarian_client import LibrarianClient
from . chunking_service import ChunkingService
from . embeddings_service import EmbeddingsService
from . embeddings_client import EmbeddingsClientSpec
from . text_completion_client import TextCompletionClientSpec
from . prompt_client import PromptClientSpec
from . text_completion_client import (
TextCompletionClientSpec, TextCompletionClient, TextCompletionResult,
)
from . prompt_client import PromptClientSpec, PromptClient, PromptResult
from . triples_store_service import TriplesStoreService
from . graph_embeddings_store_service import GraphEmbeddingsStoreService
from . document_embeddings_store_service import DocumentEmbeddingsStoreService

View file

@ -30,19 +30,19 @@ class AgentClient(RequestResponse):
raise RuntimeError(resp.error.message)
# Handle thought chunks
if resp.chunk_type == 'thought':
if resp.message_type == 'thought':
if think:
await think(resp.content, resp.end_of_message)
return False # Continue receiving
# Handle observation chunks
if resp.chunk_type == 'observation':
if resp.message_type == 'observation':
if observe:
await observe(resp.content, resp.end_of_message)
return False # Continue receiving
# Handle answer chunks
if resp.chunk_type == 'answer':
if resp.message_type == 'answer':
if resp.content:
accumulated_answer.append(resp.content)
if answer_callback:

View file

@ -58,6 +58,18 @@ class BackendProducer(Protocol):
class BackendConsumer(Protocol):
"""Protocol for backend-specific consumer."""
def ensure_connected(self) -> None:
"""
Eagerly establish the underlying connection and bind the queue.
Backends that lazily connect on first receive() must implement this
so that callers can guarantee the consumer is fully bound and
therefore able to receive responses before any related request is
published. Backends that connect at construction time may make this
a no-op.
"""
...
def receive(self, timeout_millis: int = 2000) -> Message:
"""
Receive a message from the topic.

View file

@ -88,14 +88,14 @@ class ChunkingService(FlowProcessor):
chunk_overlap = default_chunk_overlap
try:
cs = flow.parameters.get("chunk-size")
cs = flow("chunk-size")
if cs is not None:
chunk_size = int(cs)
except Exception as e:
logger.warning(f"Could not parse chunk-size parameter: {e}")
try:
co = flow.parameters.get("chunk-overlap")
co = flow("chunk-overlap")
if co is not None:
chunk_overlap = int(co)
except Exception as e:

View file

@ -8,12 +8,51 @@ ensuring consistent log formats, levels, and command-line arguments.
Supports dual output to console and Loki for centralized log aggregation.
"""
import contextvars
import logging
import logging.handlers
from queue import Queue
import os
# The current processor id for this task context. Read by
# _ProcessorIdFilter to stamp every LogRecord with its owning
# processor, and read by logging_loki's emitter via record.tags
# to label log lines in Loki. ContextVar so asyncio subtasks
# inherit their parent supervisor's processor id automatically.
current_processor_id = contextvars.ContextVar(
"current_processor_id", default="unknown"
)
def set_processor_id(pid):
"""Set the processor id for the current task context.
All subsequent log records emitted from this task and any
asyncio tasks spawned from it will be tagged with this id
in the console format and in Loki labels.
"""
current_processor_id.set(pid)
class _ProcessorIdFilter(logging.Filter):
"""Stamps every LogRecord with processor_id from the contextvar.
Attaches two fields to each record:
record.processor_id used by the console format string
record.tags merged into Loki labels by logging_loki's
emitter (it reads record.tags and combines
with the handler's static tags)
"""
def filter(self, record):
pid = current_processor_id.get()
record.processor_id = pid
existing = getattr(record, "tags", None) or {}
record.tags = {**existing, "processor": pid}
return True
def add_logging_args(parser):
"""
Add standard logging arguments to an argument parser.
@ -87,12 +126,15 @@ def setup_logging(args):
loki_url = args.get('loki_url', 'http://loki:3100/loki/api/v1/push')
loki_username = args.get('loki_username')
loki_password = args.get('loki_password')
processor_id = args.get('id') # Processor identity (e.g., "config-svc", "text-completion")
try:
from logging_loki import LokiHandler
# Create Loki handler with optional authentication and processor label
# Create Loki handler with optional authentication. The
# processor label is NOT baked in here — it's stamped onto
# each record by _ProcessorIdFilter reading the task-local
# contextvar, and logging_loki's emitter reads record.tags
# to build per-record Loki labels.
loki_handler_kwargs = {
'url': loki_url,
'version': "1",
@ -101,10 +143,6 @@ def setup_logging(args):
if loki_username and loki_password:
loki_handler_kwargs['auth'] = (loki_username, loki_password)
# Add processor label if available (for consistency with Prometheus metrics)
if processor_id:
loki_handler_kwargs['tags'] = {'processor': processor_id}
loki_handler = LokiHandler(**loki_handler_kwargs)
# Wrap in QueueHandler for non-blocking operation
@ -133,23 +171,44 @@ def setup_logging(args):
print(f"WARNING: Failed to setup Loki logging: {e}")
print("Continuing with console-only logging")
# Get processor ID for log formatting (use 'unknown' if not available)
processor_id = args.get('id', 'unknown')
# Configure logging with all handlers
# Use processor ID as the primary identifier in logs
# Configure logging with all handlers. The processor id comes
# from _ProcessorIdFilter (via contextvar) and is injected into
# each record as record.processor_id. The format string reads
# that attribute on every emit.
logging.basicConfig(
level=getattr(logging, log_level.upper()),
format=f'%(asctime)s - {processor_id} - %(levelname)s - %(message)s',
format='%(asctime)s - %(processor_id)s - %(levelname)s - %(message)s',
handlers=handlers,
force=True # Force reconfiguration if already configured
)
# Prevent recursive logging from Loki's HTTP client
if loki_enabled and queue_listener:
# Disable urllib3 logging to prevent infinite loop
logging.getLogger('urllib3').setLevel(logging.WARNING)
logging.getLogger('urllib3.connectionpool').setLevel(logging.WARNING)
# Attach the processor-id filter to every handler so all records
# passing through any sink get stamped (console, queue→loki,
# future handlers). Filters on handlers run regardless of which
# logger originated the record, so logs from pika, cassandra,
# processor code, etc. all pass through it.
processor_filter = _ProcessorIdFilter()
for h in handlers:
h.addFilter(processor_filter)
# Seed the contextvar from --id if one was supplied. In group
# mode --id isn't present; the processor_group supervisor sets
# it per task. In standalone mode AsyncProcessor.launch provides
# it via argparse default.
if args.get('id'):
set_processor_id(args['id'])
# Silence noisy third-party library loggers. These emit INFO-level
# chatter (connection churn, channel open/close, driver warnings) that
# drowns the useful signal and can't be attributed to a specific
# processor anyway. WARNING and above still propagate.
for noisy in (
'pika',
'cassandra',
'urllib3',
'urllib3.connectionpool',
):
logging.getLogger(noisy).setLevel(logging.WARNING)
logger = logging.getLogger(__name__)
logger.info(f"Logging configured with level: {log_level}")

View file

@ -0,0 +1,204 @@
# Multi-processor group runner. Runs multiple AsyncProcessor descendants
# as concurrent tasks inside a single process, sharing one event loop,
# one Prometheus HTTP server, and one pub/sub backend pool.
#
# Intended for dev and resource-constrained deployments. Scale deployments
# should continue to use per-processor endpoints.
#
# Group config is a YAML or JSON file with shape:
#
# processors:
# - class: trustgraph.extract.kg.definitions.extract.Processor
# params:
# id: kg-extract-definitions
# triples_batch_size: 1000
# - class: trustgraph.chunking.recursive.Processor
# params:
# id: chunker-recursive
#
# Each entry's params are passed directly to the class constructor alongside
# the shared taskgroup. Defaults live inside each processor class.
import argparse
import asyncio
import importlib
import json
import logging
import time
from prometheus_client import start_http_server
from . logging import add_logging_args, setup_logging, set_processor_id
logger = logging.getLogger(__name__)
def _load_config(path):
with open(path) as f:
text = f.read()
if path.endswith((".yaml", ".yml")):
import yaml
return yaml.safe_load(text)
return json.loads(text)
def _resolve_class(dotted):
module_path, _, class_name = dotted.rpartition(".")
if not module_path:
raise ValueError(
f"Processor class must be a dotted path, got {dotted!r}"
)
module = importlib.import_module(module_path)
return getattr(module, class_name)
RESTART_DELAY_SECONDS = 4
async def _supervise(entry):
"""Run one processor with its own nested TaskGroup, restarting on any
failure. Each processor is isolated from its siblings a crash here
does not propagate to the outer group."""
pid = entry["params"]["id"]
class_path = entry["class"]
# Stamp the contextvar for this supervisor task. Every log
# record emitted from this task — and from any inner TaskGroup
# child created by the processor — inherits this id via
# contextvar propagation. Siblings in the outer group set
# their own id in their own task context and do not interfere.
set_processor_id(pid)
while True:
try:
async with asyncio.TaskGroup() as inner_tg:
cls = _resolve_class(class_path)
params = dict(entry.get("params", {}))
params["taskgroup"] = inner_tg
logger.info(f"Starting {class_path} as {pid}")
p = cls(**params)
await p.start()
inner_tg.create_task(p.run())
# Clean exit — processor's run() returned without raising.
# Treat as a transient shutdown and restart, matching the
# behaviour of per-container `restart: on-failure`.
logger.warning(
f"Processor {pid} exited cleanly, will restart"
)
except asyncio.CancelledError:
logger.info(f"Processor {pid} cancelled")
raise
except BaseExceptionGroup as eg:
for e in eg.exceptions:
logger.error(
f"Processor {pid} failure: {type(e).__name__}: {e}",
exc_info=e,
)
except Exception as e:
logger.error(
f"Processor {pid} failure: {type(e).__name__}: {e}",
exc_info=True,
)
logger.info(
f"Restarting {pid} in {RESTART_DELAY_SECONDS}s..."
)
await asyncio.sleep(RESTART_DELAY_SECONDS)
async def run_group(config):
entries = config.get("processors", [])
if not entries:
raise RuntimeError("Group config has no processors")
seen_ids = set()
for entry in entries:
pid = entry.get("params", {}).get("id")
if pid is None:
raise RuntimeError(
f"Entry {entry.get('class')!r} missing params.id — "
f"required for metrics labelling"
)
if pid in seen_ids:
raise RuntimeError(f"Duplicate processor id {pid!r} in group")
seen_ids.add(pid)
async with asyncio.TaskGroup() as outer_tg:
for entry in entries:
outer_tg.create_task(_supervise(entry))
def run():
parser = argparse.ArgumentParser(
prog="processor-group",
description="Run multiple processors as tasks in one process",
)
parser.add_argument(
"-c", "--config",
required=True,
help="Path to group config file (JSON or YAML)",
)
parser.add_argument(
"--metrics",
action=argparse.BooleanOptionalAction,
default=True,
help="Metrics enabled (default: true)",
)
parser.add_argument(
"-P", "--metrics-port",
type=int,
default=8000,
help="Prometheus metrics port (default: 8000)",
)
add_logging_args(parser)
args = vars(parser.parse_args())
setup_logging(args)
config = _load_config(args["config"])
if args["metrics"]:
start_http_server(args["metrics_port"])
while True:
logger.info("Starting group...")
try:
asyncio.run(run_group(config))
except KeyboardInterrupt:
logger.info("Keyboard interrupt.")
return
except ExceptionGroup as e:
logger.error("Exception group:")
for se in e.exceptions:
logger.error(f" Type: {type(se)}")
logger.error(f" Exception: {se}", exc_info=se)
except Exception as e:
logger.error(f"Type: {type(e)}")
logger.error(f"Exception: {e}", exc_info=True)
logger.warning("Will retry...")
time.sleep(4)
logger.info("Retrying...")

View file

@ -1,10 +1,22 @@
import json
import asyncio
from dataclasses import dataclass
from typing import Optional, Any
from . request_response_spec import RequestResponse, RequestResponseSpec
from .. schema import PromptRequest, PromptResponse
@dataclass
class PromptResult:
response_type: str # "text", "json", or "jsonl"
text: Optional[str] = None # populated for "text"
object: Any = None # populated for "json"
objects: Optional[list] = None # populated for "jsonl"
in_token: Optional[int] = None
out_token: Optional[int] = None
model: Optional[str] = None
class PromptClient(RequestResponse):
async def prompt(self, id, variables, timeout=600, streaming=False, chunk_callback=None):
@ -26,17 +38,40 @@ class PromptClient(RequestResponse):
if resp.error:
raise RuntimeError(resp.error.message)
if resp.text: return resp.text
if resp.text:
return PromptResult(
response_type="text",
text=resp.text,
in_token=resp.in_token,
out_token=resp.out_token,
model=resp.model,
)
return json.loads(resp.object)
parsed = json.loads(resp.object)
if isinstance(parsed, list):
return PromptResult(
response_type="jsonl",
objects=parsed,
in_token=resp.in_token,
out_token=resp.out_token,
model=resp.model,
)
return PromptResult(
response_type="json",
object=parsed,
in_token=resp.in_token,
out_token=resp.out_token,
model=resp.model,
)
else:
last_text = ""
last_object = None
last_resp = None
async def forward_chunks(resp):
nonlocal last_text, last_object
nonlocal last_resp
if resp.error:
raise RuntimeError(resp.error.message)
@ -44,14 +79,13 @@ class PromptClient(RequestResponse):
end_stream = getattr(resp, 'end_of_stream', False)
if resp.text is not None:
last_text = resp.text
if chunk_callback:
if asyncio.iscoroutinefunction(chunk_callback):
await chunk_callback(resp.text, end_stream)
else:
chunk_callback(resp.text, end_stream)
elif resp.object:
last_object = resp.object
last_resp = resp
return end_stream
@ -70,10 +104,36 @@ class PromptClient(RequestResponse):
timeout=timeout
)
if last_text:
return last_text
if last_resp is None:
return PromptResult(response_type="text")
return json.loads(last_object) if last_object else None
if last_resp.object:
parsed = json.loads(last_resp.object)
if isinstance(parsed, list):
return PromptResult(
response_type="jsonl",
objects=parsed,
in_token=last_resp.in_token,
out_token=last_resp.out_token,
model=last_resp.model,
)
return PromptResult(
response_type="json",
object=parsed,
in_token=last_resp.in_token,
out_token=last_resp.out_token,
model=last_resp.model,
)
return PromptResult(
response_type="text",
text=last_resp.text,
in_token=last_resp.in_token,
out_token=last_resp.out_token,
model=last_resp.model,
)
async def extract_definitions(self, text, timeout=600):
return await self.prompt(
@ -152,4 +212,3 @@ class PromptClientSpec(RequestResponseSpec):
response_schema = PromptResponse,
impl = PromptClient,
)

View file

@ -72,6 +72,16 @@ class PulsarBackendConsumer:
self._consumer = pulsar_consumer
self._schema_cls = schema_cls
def ensure_connected(self) -> None:
"""No-op for Pulsar.
PulsarBackend.create_consumer() calls client.subscribe() which is
synchronous and returns a fully-subscribed consumer, so the
consumer is already ready by the time this object is constructed.
Defined for parity with the BackendConsumer protocol used by
Subscriber.start()'s readiness barrier."""
pass
def receive(self, timeout_millis: int = 2000) -> Message:
"""Receive a message. Raises TimeoutError if no message available."""
try:

View file

@ -214,16 +214,43 @@ class RabbitMQBackendConsumer:
and self._channel.is_open
)
def ensure_connected(self) -> None:
"""Eagerly declare and bind the queue.
Without this, the queue is only declared lazily on the first
receive() call. For request/response with ephemeral per-subscriber
response queues that is a race: a request published before the
response queue is bound will have its reply silently dropped by
the broker. Subscriber.start() calls this so callers get a hard
readiness barrier."""
if not self._is_alive():
self._connect()
def receive(self, timeout_millis: int = 2000) -> Message:
"""Receive a message. Raises TimeoutError if none available."""
"""Receive a message. Raises TimeoutError if none available.
Loop ordering matters: check _incoming at the TOP of each
iteration, not as the loop condition. process_data_events
may dispatch a message via the _on_message callback during
the pump; we must re-check _incoming on the next iteration
before giving up on the deadline. The previous control
flow (`while deadline: check; pump`) could lose a wakeup if
the pump consumed the remainder of the window the
`while` check would fail before `_incoming` was re-read,
leaving a just-dispatched message stranded until the next
receive() call one full poll cycle later.
"""
if not self._is_alive():
self._connect()
timeout_seconds = timeout_millis / 1000.0
deadline = time.monotonic() + timeout_seconds
while time.monotonic() < deadline:
# Check if a message was already delivered
while True:
# Check if a message has been dispatched to our queue.
# This catches both (a) messages dispatched before this
# receive() was called and (b) messages dispatched
# during the previous iteration's process_data_events.
try:
method, properties, body = self._incoming.get_nowait()
return RabbitMQMessage(
@ -232,14 +259,16 @@ class RabbitMQBackendConsumer:
except queue.Empty:
pass
# Drive pika's I/O — delivers messages and processes heartbeats
remaining = deadline - time.monotonic()
if remaining > 0:
self._connection.process_data_events(
time_limit=min(0.1, remaining),
)
if remaining <= 0:
raise TimeoutError("No message received within timeout")
raise TimeoutError("No message received within timeout")
# Drive pika's I/O. Any messages delivered during this
# call land in _incoming via _on_message; the next
# iteration of this loop catches them at the top.
self._connection.process_data_events(
time_limit=min(0.1, remaining),
)
def acknowledge(self, message: Message) -> None:
if isinstance(message, RabbitMQMessage) and message._method:

View file

@ -41,14 +41,55 @@ class Subscriber:
self.consumer = None
self.executor = None
# Readiness barrier — completed by run() once the underlying
# backend consumer is fully connected and bound. start() awaits
# this so callers know any subsequently published request will
# have a queue ready to receive its response. Without this,
# ephemeral per-subscriber response queues (RabbitMQ auto-delete
# exclusive queues) would race the request and lose the reply.
# A Future is used (rather than an Event) so that a first-attempt
# connection failure can be propagated to start() as an exception.
self._ready = None # created in start() so we have a running loop
def __del__(self):
self.running = False
async def start(self):
self._ready = asyncio.get_event_loop().create_future()
self.task = asyncio.create_task(self.run())
# Block until run() signals readiness OR exits. The future
# carries the outcome of the first connect attempt: a value on
# success, an exception on first-attempt failure. If run() exits
# without ever signalling (e.g. cancelled, or a code path bug),
# we surface that as a clear RuntimeError rather than hanging
# forever waiting on the future.
ready_wait = asyncio.ensure_future(
asyncio.shield(self._ready)
)
try:
await asyncio.wait(
{self.task, ready_wait},
return_when=asyncio.FIRST_COMPLETED,
)
finally:
ready_wait.cancel()
if self._ready.done():
# Re-raise first-attempt connect failure if any.
self._ready.result()
return
# run() exited before _ready was settled. Propagate its exception
# if it had one, otherwise raise a generic readiness error.
if self.task.done() and self.task.exception() is not None:
raise self.task.exception()
raise RuntimeError(
"Subscriber.run() exited before signalling readiness"
)
async def stop(self):
"""Initiate graceful shutdown with draining"""
self.running = False
@ -66,6 +107,7 @@ class Subscriber:
async def run(self):
"""Enhanced run method with integrated draining logic"""
first_attempt = True
while self.running or self.draining:
if self.metrics:
@ -87,10 +129,27 @@ class Subscriber:
),
)
# Eagerly bind the queue. For backends that connect
# lazily on first receive (RabbitMQ), this is what
# closes the request/response setup race — without
# it the response queue is not bound until later and
# any reply published in the meantime is dropped.
await loop.run_in_executor(
self.executor,
lambda: self.consumer.ensure_connected(),
)
if self.metrics:
self.metrics.state("running")
logger.info("Subscriber running...")
# Signal start() that the consumer is ready. This must
# happen AFTER ensure_connected() above so callers can
# safely publish requests immediately after start() returns.
if first_attempt and not self._ready.done():
self._ready.set_result(None)
first_attempt = False
drain_end_time = None
while self.running or self.draining:
@ -162,6 +221,16 @@ class Subscriber:
except Exception as e:
logger.error(f"Subscriber exception: {e}", exc_info=True)
# First-attempt connection failure: propagate to start()
# so the caller can decide what to do (retry, give up).
# Subsequent failures use the existing retry-with-backoff
# path so a long-lived subscriber survives broker blips.
if first_attempt and not self._ready.done():
self._ready.set_exception(e)
first_attempt = False
# Falls through into finally for cleanup, then the
# outer return below ends run() so start() unblocks.
finally:
# Negative acknowledge any pending messages
for msg in self.pending_acks.values():
@ -193,6 +262,11 @@ class Subscriber:
if not self.running and not self.draining:
return
# If start() has already returned with an exception there is
# nothing more to do — exit run() rather than busy-retry.
if self._ready.done() and self._ready.exception() is not None:
return
# Sleep before retry
await asyncio.sleep(1)

View file

@ -1,47 +1,71 @@
from dataclasses import dataclass
from typing import Optional
from . request_response_spec import RequestResponse, RequestResponseSpec
from .. schema import TextCompletionRequest, TextCompletionResponse
@dataclass
class TextCompletionResult:
text: Optional[str]
in_token: Optional[int] = None
out_token: Optional[int] = None
model: Optional[str] = None
class TextCompletionClient(RequestResponse):
async def text_completion(self, system, prompt, streaming=False, timeout=600):
# If not streaming, use original behavior
if not streaming:
resp = await self.request(
TextCompletionRequest(
system = system, prompt = prompt, streaming = False
),
timeout=timeout
)
if resp.error:
raise RuntimeError(resp.error.message)
async def text_completion(self, system, prompt, timeout=600):
return resp.response
# For streaming: collect all chunks and return complete response
full_response = ""
async def collect_chunks(resp):
nonlocal full_response
if resp.error:
raise RuntimeError(resp.error.message)
if resp.response:
full_response += resp.response
# Return True when end_of_stream is reached
return getattr(resp, 'end_of_stream', False)
await self.request(
resp = await self.request(
TextCompletionRequest(
system = system, prompt = prompt, streaming = True
system = system, prompt = prompt, streaming = False
),
recipient=collect_chunks,
timeout=timeout
)
return full_response
if resp.error:
raise RuntimeError(resp.error.message)
return TextCompletionResult(
text = resp.response,
in_token = resp.in_token,
out_token = resp.out_token,
model = resp.model,
)
async def text_completion_stream(
self, system, prompt, handler, timeout=600,
):
"""
Streaming text completion. `handler` is an async callable invoked
once per chunk with the chunk's TextCompletionResponse. Returns a
TextCompletionResult with text=None and token counts / model taken
from the end_of_stream message.
"""
async def on_chunk(resp):
if resp.error:
raise RuntimeError(resp.error.message)
await handler(resp)
return getattr(resp, "end_of_stream", False)
final = await self.request(
TextCompletionRequest(
system = system, prompt = prompt, streaming = True
),
recipient=on_chunk,
timeout=timeout,
)
return TextCompletionResult(
text = None,
in_token = final.in_token,
out_token = final.out_token,
model = final.model,
)
class TextCompletionClientSpec(RequestResponseSpec):
def __init__(
@ -54,4 +78,3 @@ class TextCompletionClientSpec(RequestResponseSpec):
response_schema = TextCompletionResponse,
impl = TextCompletionClient,
)

View file

@ -58,23 +58,23 @@ class AgentClient(BaseClient):
def inspect(x):
# Handle errors
if x.chunk_type == 'error' or x.error:
if x.message_type == 'error' or x.error:
if error_callback:
error_callback(x.content or (x.error.message if x.error else ""))
# Continue to check end_of_dialog
# Handle thought chunks
elif x.chunk_type == 'thought':
elif x.message_type == 'thought':
if think:
think(x.content, x.end_of_message)
# Handle observation chunks
elif x.chunk_type == 'observation':
elif x.message_type == 'observation':
if observe:
observe(x.content, x.end_of_message)
# Handle answer chunks
elif x.chunk_type == 'answer':
elif x.message_type == 'answer':
if x.content:
accumulated_answer.append(x.content)
if answer_callback:

View file

@ -60,8 +60,8 @@ class AgentResponseTranslator(MessageTranslator):
def encode(self, obj: AgentResponse) -> Dict[str, Any]:
result = {}
if obj.chunk_type:
result["chunk_type"] = obj.chunk_type
if obj.message_type:
result["message_type"] = obj.message_type
if obj.content:
result["content"] = obj.content
result["end_of_message"] = getattr(obj, "end_of_message", False)
@ -90,6 +90,13 @@ class AgentResponseTranslator(MessageTranslator):
if hasattr(obj, 'error') and obj.error and obj.error.message:
result["error"] = {"message": obj.error.message, "code": obj.error.code}
if obj.in_token is not None:
result["in_token"] = obj.in_token
if obj.out_token is not None:
result["out_token"] = obj.out_token
if obj.model is not None:
result["model"] = obj.model
return result
def encode_with_completion(self, obj: AgentResponse) -> Tuple[Dict[str, Any], bool]:

View file

@ -151,7 +151,7 @@ class DocumentEmbeddingsTranslator(SendTranslator):
chunks = [
ChunkEmbeddings(
chunk_id=chunk["chunk_id"],
vectors=chunk["vectors"]
vector=chunk["vector"]
)
for chunk in data.get("chunks", [])
]

View file

@ -39,7 +39,7 @@ class KnowledgeRequestTranslator(MessageTranslator):
entities=[
EntityEmbeddings(
entity=self.value_translator.decode(ent["entity"]),
vectors=ent["vectors"],
vector=ent["vector"],
)
for ent in data["graph-embeddings"]["entities"]
]

View file

@ -53,6 +53,13 @@ class PromptResponseTranslator(MessageTranslator):
# Always include end_of_stream flag for streaming support
result["end_of_stream"] = getattr(obj, "end_of_stream", False)
if obj.in_token is not None:
result["in_token"] = obj.in_token
if obj.out_token is not None:
result["out_token"] = obj.out_token
if obj.model is not None:
result["model"] = obj.model
return result
def encode_with_completion(self, obj: PromptResponse) -> Tuple[Dict[str, Any], bool]:

View file

@ -74,6 +74,13 @@ class DocumentRagResponseTranslator(MessageTranslator):
if hasattr(obj, 'error') and obj.error and obj.error.message:
result["error"] = {"message": obj.error.message, "type": obj.error.type}
if obj.in_token is not None:
result["in_token"] = obj.in_token
if obj.out_token is not None:
result["out_token"] = obj.out_token
if obj.model is not None:
result["model"] = obj.model
return result
def encode_with_completion(self, obj: DocumentRagResponse) -> Tuple[Dict[str, Any], bool]:
@ -163,6 +170,13 @@ class GraphRagResponseTranslator(MessageTranslator):
if hasattr(obj, 'error') and obj.error and obj.error.message:
result["error"] = {"message": obj.error.message, "type": obj.error.type}
if obj.in_token is not None:
result["in_token"] = obj.in_token
if obj.out_token is not None:
result["out_token"] = obj.out_token
if obj.model is not None:
result["model"] = obj.model
return result
def encode_with_completion(self, obj: GraphRagResponse) -> Tuple[Dict[str, Any], bool]:

View file

@ -29,11 +29,11 @@ class TextCompletionResponseTranslator(MessageTranslator):
def encode(self, obj: TextCompletionResponse) -> Dict[str, Any]:
result = {"response": obj.response}
if obj.in_token:
if obj.in_token is not None:
result["in_token"] = obj.in_token
if obj.out_token:
if obj.out_token is not None:
result["out_token"] = obj.out_token
if obj.model:
if obj.model is not None:
result["model"] = obj.model
# Always include end_of_stream flag for streaming support

View file

@ -59,6 +59,7 @@ from . uris import (
agent_plan_uri,
agent_step_result_uri,
agent_synthesis_uri,
agent_pattern_decision_uri,
# Document RAG provenance URIs
docrag_question_uri,
docrag_grounding_uri,
@ -102,6 +103,11 @@ from . namespaces import (
# Agent provenance predicates
TG_THOUGHT, TG_ACTION, TG_ARGUMENTS, TG_OBSERVATION,
TG_SUBAGENT_GOAL, TG_PLAN_STEP,
TG_TOOL_CANDIDATE, TG_TERMINATION_REASON,
TG_STEP_NUMBER, TG_PATTERN_DECISION, TG_PATTERN, TG_TASK_TYPE,
TG_LLM_DURATION_MS, TG_TOOL_DURATION_MS, TG_TOOL_ERROR,
TG_IN_TOKEN, TG_OUT_TOKEN,
TG_ERROR_TYPE,
# Orchestrator entity types
TG_DECOMPOSITION, TG_FINDING, TG_PLAN_TYPE, TG_STEP_RESULT,
# Document reference predicate
@ -141,6 +147,7 @@ from . agent import (
agent_plan_triples,
agent_step_result_triples,
agent_synthesis_triples,
agent_pattern_decision_triples,
)
# Vocabulary bootstrap
@ -182,6 +189,7 @@ __all__ = [
"agent_plan_uri",
"agent_step_result_uri",
"agent_synthesis_uri",
"agent_pattern_decision_uri",
# Document RAG provenance URIs
"docrag_question_uri",
"docrag_grounding_uri",
@ -218,6 +226,11 @@ __all__ = [
# Agent provenance predicates
"TG_THOUGHT", "TG_ACTION", "TG_ARGUMENTS", "TG_OBSERVATION",
"TG_SUBAGENT_GOAL", "TG_PLAN_STEP",
"TG_TOOL_CANDIDATE", "TG_TERMINATION_REASON",
"TG_STEP_NUMBER", "TG_PATTERN_DECISION", "TG_PATTERN", "TG_TASK_TYPE",
"TG_LLM_DURATION_MS", "TG_TOOL_DURATION_MS", "TG_TOOL_ERROR",
"TG_IN_TOKEN", "TG_OUT_TOKEN",
"TG_ERROR_TYPE",
# Orchestrator entity types
"TG_DECOMPOSITION", "TG_FINDING", "TG_PLAN_TYPE", "TG_STEP_RESULT",
# Document reference predicate
@ -249,6 +262,7 @@ __all__ = [
"agent_plan_triples",
"agent_step_result_triples",
"agent_synthesis_triples",
"agent_pattern_decision_triples",
# Utility
"set_graph",
# Vocabulary

View file

@ -29,6 +29,11 @@ from . namespaces import (
TG_AGENT_QUESTION,
TG_DECOMPOSITION, TG_FINDING, TG_PLAN_TYPE, TG_STEP_RESULT,
TG_SYNTHESIS, TG_SUBAGENT_GOAL, TG_PLAN_STEP,
TG_TOOL_CANDIDATE, TG_TERMINATION_REASON,
TG_STEP_NUMBER, TG_PATTERN_DECISION, TG_PATTERN, TG_TASK_TYPE,
TG_LLM_DURATION_MS, TG_TOOL_DURATION_MS, TG_TOOL_ERROR,
TG_ERROR_TYPE,
TG_IN_TOKEN, TG_OUT_TOKEN, TG_LLM_MODEL,
)
@ -47,6 +52,17 @@ def _triple(s: str, p: str, o_term: Term) -> Triple:
return Triple(s=_iri(s), p=_iri(p), o=o_term)
def _append_token_triples(triples, uri, in_token=None, out_token=None,
model=None):
"""Append in_token/out_token/model triples when values are present."""
if in_token is not None:
triples.append(_triple(uri, TG_IN_TOKEN, _literal(str(in_token))))
if out_token is not None:
triples.append(_triple(uri, TG_OUT_TOKEN, _literal(str(out_token))))
if model is not None:
triples.append(_triple(uri, TG_LLM_MODEL, _literal(model)))
def agent_session_triples(
session_uri: str,
query: str,
@ -90,6 +106,43 @@ def agent_session_triples(
return triples
def agent_pattern_decision_triples(
uri: str,
session_uri: str,
pattern: str,
task_type: str = "",
) -> List[Triple]:
"""
Build triples for a meta-router pattern decision.
Creates:
- Entity declaration with tg:PatternDecision type
- wasDerivedFrom link to session
- Pattern and task type predicates
Args:
uri: URI of this decision (from agent_pattern_decision_uri)
session_uri: URI of the parent session
pattern: Selected execution pattern (e.g. "react", "plan-then-execute")
task_type: Identified task type (e.g. "general", "research")
Returns:
List of Triple objects
"""
triples = [
_triple(uri, RDF_TYPE, _iri(PROV_ENTITY)),
_triple(uri, RDF_TYPE, _iri(TG_PATTERN_DECISION)),
_triple(uri, RDFS_LABEL, _literal(f"Pattern: {pattern}")),
_triple(uri, TG_PATTERN, _literal(pattern)),
_triple(uri, PROV_WAS_DERIVED_FROM, _iri(session_uri)),
]
if task_type:
triples.append(_triple(uri, TG_TASK_TYPE, _literal(task_type)))
return triples
def agent_iteration_triples(
iteration_uri: str,
question_uri: Optional[str] = None,
@ -98,6 +151,12 @@ def agent_iteration_triples(
arguments: Dict[str, Any] = None,
thought_uri: Optional[str] = None,
thought_document_id: Optional[str] = None,
tool_candidates: Optional[List[str]] = None,
step_number: Optional[int] = None,
llm_duration_ms: Optional[int] = None,
in_token: Optional[int] = None,
out_token: Optional[int] = None,
model: Optional[str] = None,
) -> List[Triple]:
"""
Build triples for one agent iteration (Analysis+ToolUse).
@ -106,6 +165,7 @@ def agent_iteration_triples(
- Entity declaration with tg:Analysis and tg:ToolUse types
- wasDerivedFrom link to question (if first iteration) or previous
- Action and arguments metadata
- Tool candidates (names of tools visible to the LLM)
- Thought sub-entity (tg:Reflection, tg:Thought) with librarian document
Args:
@ -116,6 +176,7 @@ def agent_iteration_triples(
arguments: Arguments passed to the tool (will be JSON-encoded)
thought_uri: URI for the thought sub-entity
thought_document_id: Document URI for thought in librarian
tool_candidates: List of tool names available to the LLM
Returns:
List of Triple objects
@ -132,6 +193,23 @@ def agent_iteration_triples(
_triple(iteration_uri, TG_ARGUMENTS, _literal(json.dumps(arguments))),
]
if tool_candidates:
for name in tool_candidates:
triples.append(
_triple(iteration_uri, TG_TOOL_CANDIDATE, _literal(name))
)
if step_number is not None:
triples.append(
_triple(iteration_uri, TG_STEP_NUMBER, _literal(str(step_number)))
)
if llm_duration_ms is not None:
triples.append(
_triple(iteration_uri, TG_LLM_DURATION_MS,
_literal(str(llm_duration_ms)))
)
if question_uri:
triples.append(
_triple(iteration_uri, PROV_WAS_DERIVED_FROM, _iri(question_uri))
@ -155,6 +233,8 @@ def agent_iteration_triples(
_triple(thought_uri, TG_DOCUMENT, _iri(thought_document_id))
)
_append_token_triples(triples, iteration_uri, in_token, out_token, model)
return triples
@ -162,6 +242,8 @@ def agent_observation_triples(
observation_uri: str,
iteration_uri: str,
document_id: Optional[str] = None,
tool_duration_ms: Optional[int] = None,
tool_error: Optional[str] = None,
) -> List[Triple]:
"""
Build triples for an agent observation (standalone entity).
@ -170,11 +252,15 @@ def agent_observation_triples(
- Entity declaration with prov:Entity and tg:Observation types
- wasDerivedFrom link to the iteration (Analysis+ToolUse)
- Document reference to librarian (if provided)
- Tool execution duration (if provided)
- Tool error message (if the tool failed)
Args:
observation_uri: URI of the observation entity
iteration_uri: URI of the iteration this observation derives from
document_id: Librarian document ID for the observation content
tool_duration_ms: Tool execution time in milliseconds
tool_error: Error message if the tool failed
Returns:
List of Triple objects
@ -191,6 +277,20 @@ def agent_observation_triples(
_triple(observation_uri, TG_DOCUMENT, _iri(document_id))
)
if tool_duration_ms is not None:
triples.append(
_triple(observation_uri, TG_TOOL_DURATION_MS,
_literal(str(tool_duration_ms)))
)
if tool_error:
triples.append(
_triple(observation_uri, TG_TOOL_ERROR, _literal(tool_error))
)
triples.append(
_triple(observation_uri, RDF_TYPE, _iri(TG_ERROR_TYPE))
)
return triples
@ -199,6 +299,10 @@ def agent_final_triples(
question_uri: Optional[str] = None,
previous_uri: Optional[str] = None,
document_id: Optional[str] = None,
termination_reason: Optional[str] = None,
in_token: Optional[int] = None,
out_token: Optional[int] = None,
model: Optional[str] = None,
) -> List[Triple]:
"""
Build triples for an agent final answer (Conclusion).
@ -208,12 +312,15 @@ def agent_final_triples(
- wasGeneratedBy link to question (if no iterations)
- wasDerivedFrom link to last iteration (if iterations exist)
- Document reference to librarian
- Termination reason (why the agent loop stopped)
Args:
final_uri: URI of the final answer (from agent_final_uri)
question_uri: URI of the question activity (if no iterations)
previous_uri: URI of the last iteration (if iterations exist)
document_id: Librarian document ID for the answer content
termination_reason: Why the loop stopped, e.g. "final-answer",
"max-iterations", "error"
Returns:
List of Triple objects
@ -237,6 +344,14 @@ def agent_final_triples(
if document_id:
triples.append(_triple(final_uri, TG_DOCUMENT, _iri(document_id)))
if termination_reason:
triples.append(
_triple(final_uri, TG_TERMINATION_REASON,
_literal(termination_reason))
)
_append_token_triples(triples, final_uri, in_token, out_token, model)
return triples
@ -244,6 +359,9 @@ def agent_decomposition_triples(
uri: str,
session_uri: str,
goals: List[str],
in_token: Optional[int] = None,
out_token: Optional[int] = None,
model: Optional[str] = None,
) -> List[Triple]:
"""Build triples for a supervisor decomposition step."""
triples = [
@ -255,6 +373,7 @@ def agent_decomposition_triples(
]
for goal in goals:
triples.append(_triple(uri, TG_SUBAGENT_GOAL, _literal(goal)))
_append_token_triples(triples, uri, in_token, out_token, model)
return triples
@ -282,6 +401,9 @@ def agent_plan_triples(
uri: str,
session_uri: str,
steps: List[str],
in_token: Optional[int] = None,
out_token: Optional[int] = None,
model: Optional[str] = None,
) -> List[Triple]:
"""Build triples for a plan-then-execute plan."""
triples = [
@ -293,6 +415,7 @@ def agent_plan_triples(
]
for step in steps:
triples.append(_triple(uri, TG_PLAN_STEP, _literal(step)))
_append_token_triples(triples, uri, in_token, out_token, model)
return triples
@ -301,6 +424,9 @@ def agent_step_result_triples(
plan_uri: str,
goal: str,
document_id: Optional[str] = None,
in_token: Optional[int] = None,
out_token: Optional[int] = None,
model: Optional[str] = None,
) -> List[Triple]:
"""Build triples for a plan step result."""
triples = [
@ -313,6 +439,7 @@ def agent_step_result_triples(
]
if document_id:
triples.append(_triple(uri, TG_DOCUMENT, _iri(document_id)))
_append_token_triples(triples, uri, in_token, out_token, model)
return triples
@ -320,6 +447,10 @@ def agent_synthesis_triples(
uri: str,
previous_uris,
document_id: Optional[str] = None,
termination_reason: Optional[str] = None,
in_token: Optional[int] = None,
out_token: Optional[int] = None,
model: Optional[str] = None,
) -> List[Triple]:
"""Build triples for a synthesis answer.
@ -327,6 +458,8 @@ def agent_synthesis_triples(
uri: URI of the synthesis entity
previous_uris: Single URI string or list of URIs to derive from
document_id: Librarian document ID for the answer content
termination_reason: Why the agent loop stopped
in_token/out_token/model: Token usage for the synthesis LLM call
"""
triples = [
_triple(uri, RDF_TYPE, _iri(PROV_ENTITY)),
@ -342,4 +475,12 @@ def agent_synthesis_triples(
if document_id:
triples.append(_triple(uri, TG_DOCUMENT, _iri(document_id)))
if termination_reason:
triples.append(
_triple(uri, TG_TERMINATION_REASON, _literal(termination_reason))
)
_append_token_triples(triples, uri, in_token, out_token, model)
return triples

View file

@ -119,6 +119,18 @@ TG_ARGUMENTS = TG + "arguments"
TG_OBSERVATION = TG + "observation" # Links iteration to observation sub-entity
TG_SUBAGENT_GOAL = TG + "subagentGoal" # Goal string on Decomposition/Finding
TG_PLAN_STEP = TG + "planStep" # Step goal string on Plan/StepResult
TG_TOOL_CANDIDATE = TG + "toolCandidate" # Tool name on Analysis events
TG_TERMINATION_REASON = TG + "terminationReason" # Why the agent loop stopped
TG_STEP_NUMBER = TG + "stepNumber" # Explicit step counter on iteration events
TG_PATTERN_DECISION = TG + "PatternDecision" # Meta-router routing decision entity type
TG_PATTERN = TG + "pattern" # Selected execution pattern
TG_TASK_TYPE = TG + "taskType" # Identified task type
TG_LLM_DURATION_MS = TG + "llmDurationMs" # LLM call duration in milliseconds
TG_TOOL_DURATION_MS = TG + "toolDurationMs" # Tool execution duration in milliseconds
TG_TOOL_ERROR = TG + "toolError" # Error message from a failed tool execution
TG_ERROR_TYPE = TG + "Error" # Mixin type for failure events
TG_IN_TOKEN = TG + "inToken" # Input token count for an LLM call
TG_OUT_TOKEN = TG + "outToken" # Output token count for an LLM call
# Named graph URIs for RDF datasets
# These separate different types of data while keeping them in the same collection

View file

@ -34,6 +34,8 @@ from . namespaces import (
TG_ANSWER_TYPE,
# Question subtypes
TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION,
# Token usage
TG_IN_TOKEN, TG_OUT_TOKEN,
)
from . uris import activity_uri, agent_uri, subgraph_uri, edge_selection_uri
@ -74,6 +76,17 @@ def _triple(s: str, p: str, o_term: Term) -> Triple:
return Triple(s=_iri(s), p=_iri(p), o=o_term)
def _append_token_triples(triples, uri, in_token=None, out_token=None,
model=None):
"""Append in_token/out_token/model triples when values are present."""
if in_token is not None:
triples.append(_triple(uri, TG_IN_TOKEN, _literal(str(in_token))))
if out_token is not None:
triples.append(_triple(uri, TG_OUT_TOKEN, _literal(str(out_token))))
if model is not None:
triples.append(_triple(uri, TG_LLM_MODEL, _literal(model)))
def document_triples(
doc_uri: str,
title: Optional[str] = None,
@ -396,6 +409,9 @@ def grounding_triples(
grounding_uri: str,
question_uri: str,
concepts: List[str],
in_token: Optional[int] = None,
out_token: Optional[int] = None,
model: Optional[str] = None,
) -> List[Triple]:
"""
Build triples for a grounding entity (concept decomposition of query).
@ -423,6 +439,8 @@ def grounding_triples(
for concept in concepts:
triples.append(_triple(grounding_uri, TG_CONCEPT, _literal(concept)))
_append_token_triples(triples, grounding_uri, in_token, out_token, model)
return triples
@ -485,6 +503,9 @@ def focus_triples(
exploration_uri: str,
selected_edges_with_reasoning: List[dict],
session_id: str = "",
in_token: Optional[int] = None,
out_token: Optional[int] = None,
model: Optional[str] = None,
) -> List[Triple]:
"""
Build triples for a focus entity (selected edges with reasoning).
@ -543,6 +564,8 @@ def focus_triples(
_triple(edge_sel_uri, TG_REASONING, _literal(reasoning))
)
_append_token_triples(triples, focus_uri, in_token, out_token, model)
return triples
@ -550,6 +573,9 @@ def synthesis_triples(
synthesis_uri: str,
focus_uri: str,
document_id: Optional[str] = None,
in_token: Optional[int] = None,
out_token: Optional[int] = None,
model: Optional[str] = None,
) -> List[Triple]:
"""
Build triples for a synthesis entity (final answer).
@ -578,6 +604,8 @@ def synthesis_triples(
if document_id:
triples.append(_triple(synthesis_uri, TG_DOCUMENT, _iri(document_id)))
_append_token_triples(triples, synthesis_uri, in_token, out_token, model)
return triples
@ -674,6 +702,9 @@ def docrag_synthesis_triples(
synthesis_uri: str,
exploration_uri: str,
document_id: Optional[str] = None,
in_token: Optional[int] = None,
out_token: Optional[int] = None,
model: Optional[str] = None,
) -> List[Triple]:
"""
Build triples for a document RAG synthesis entity (final answer).
@ -702,4 +733,6 @@ def docrag_synthesis_triples(
if document_id:
triples.append(_triple(synthesis_uri, TG_DOCUMENT, _iri(document_id)))
_append_token_triples(triples, synthesis_uri, in_token, out_token, model)
return triples

View file

@ -259,6 +259,11 @@ def agent_synthesis_uri(session_id: str) -> str:
return f"urn:trustgraph:agent:{session_id}/synthesis"
def agent_pattern_decision_uri(session_id: str) -> str:
"""Generate URI for a meta-router pattern decision."""
return f"urn:trustgraph:agent:{session_id}/pattern-decision"
# Document RAG provenance URIs
# These URIs use the urn:trustgraph:docrag: namespace to distinguish
# document RAG provenance from graph RAG provenance

View file

@ -51,8 +51,8 @@ class AgentRequest:
@dataclass
class AgentResponse:
# Streaming-first design
chunk_type: str = "" # "thought", "action", "observation", "answer", "explain", "error"
content: str = "" # The actual content (interpretation depends on chunk_type)
message_type: str = "" # "thought", "action", "observation", "answer", "explain", "error"
content: str = "" # The actual content (interpretation depends on message_type)
end_of_message: bool = False # Current chunk type (thought/action/etc.) is complete
end_of_dialog: bool = False # Entire agent dialog is complete
@ -66,5 +66,10 @@ class AgentResponse:
error: Error | None = None
# Token usage (populated on end_of_dialog message)
in_token: int | None = None
out_token: int | None = None
model: str | None = None
############################################################################

View file

@ -17,9 +17,9 @@ class TextCompletionRequest:
class TextCompletionResponse:
error: Error | None = None
response: str = ""
in_token: int = 0
out_token: int = 0
model: str = ""
in_token: int | None = None
out_token: int | None = None
model: str | None = None
end_of_stream: bool = False # Indicates final message in stream
############################################################################

View file

@ -41,4 +41,9 @@ class PromptResponse:
# Indicates final message in stream
end_of_stream: bool = False
# Token usage from the underlying text completion
in_token: int | None = None
out_token: int | None = None
model: str | None = None
############################################################################

View file

@ -29,6 +29,9 @@ class GraphRagResponse:
explain_triples: list[Triple] = field(default_factory=list) # Provenance triples for this step
message_type: str = "" # "chunk" or "explain"
end_of_session: bool = False # Entire session complete
in_token: int | None = None
out_token: int | None = None
model: str | None = None
############################################################################
@ -52,3 +55,6 @@ class DocumentRagResponse:
explain_triples: list[Triple] = field(default_factory=list) # Provenance triples for this step
message_type: str = "" # "chunk" or "explain"
end_of_session: bool = False # Entire session complete
in_token: int | None = None
out_token: int | None = None
model: str | None = None