mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Merge branch 'release/v2.3'
This commit is contained in:
commit
1f30a3bcea
155 changed files with 6526 additions and 1885 deletions
|
|
@ -15,6 +15,7 @@ dependencies = [
|
|||
"requests",
|
||||
"python-logging-loki",
|
||||
"pika",
|
||||
"pyyaml",
|
||||
]
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
|
|
@ -24,6 +25,9 @@ classifiers = [
|
|||
[project.urls]
|
||||
Homepage = "https://github.com/trustgraph-ai/trustgraph"
|
||||
|
||||
[project.scripts]
|
||||
processor-group = "trustgraph.base.processor_group:run"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["trustgraph*"]
|
||||
|
||||
|
|
@ -31,4 +35,4 @@ include = ["trustgraph*"]
|
|||
"trustgraph.i18n.packs" = ["*.json"]
|
||||
|
||||
[tool.setuptools.dynamic]
|
||||
version = {attr = "trustgraph.base_version.__version__"}
|
||||
version = {attr = "trustgraph.base_version.__version__"}
|
||||
|
|
|
|||
|
|
@ -107,6 +107,7 @@ from .types import (
|
|||
AgentObservation,
|
||||
AgentAnswer,
|
||||
RAGChunk,
|
||||
TextCompletionResult,
|
||||
ProvenanceEvent,
|
||||
)
|
||||
|
||||
|
|
@ -185,6 +186,7 @@ __all__ = [
|
|||
"AgentObservation",
|
||||
"AgentAnswer",
|
||||
"RAGChunk",
|
||||
"TextCompletionResult",
|
||||
"ProvenanceEvent",
|
||||
|
||||
# Exceptions
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ import aiohttp
|
|||
import json
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
from . types import TextCompletionResult
|
||||
|
||||
from . exceptions import ProtocolException, ApplicationException
|
||||
|
||||
|
||||
|
|
@ -434,12 +436,11 @@ class AsyncFlowInstance:
|
|||
|
||||
return await self.request("agent", request_data)
|
||||
|
||||
async def text_completion(self, system: str, prompt: str, **kwargs: Any) -> str:
|
||||
async def text_completion(self, system: str, prompt: str, **kwargs: Any) -> TextCompletionResult:
|
||||
"""
|
||||
Generate text completion (non-streaming).
|
||||
|
||||
Generates a text response from an LLM given a system prompt and user prompt.
|
||||
Returns the complete response text.
|
||||
|
||||
Note: This method does not support streaming. For streaming text generation,
|
||||
use AsyncSocketFlowInstance.text_completion() instead.
|
||||
|
|
@ -450,19 +451,19 @@ class AsyncFlowInstance:
|
|||
**kwargs: Additional service-specific parameters
|
||||
|
||||
Returns:
|
||||
str: Complete generated text response
|
||||
TextCompletionResult: Result with text, in_token, out_token, model
|
||||
|
||||
Example:
|
||||
```python
|
||||
async_flow = await api.async_flow()
|
||||
flow = async_flow.id("default")
|
||||
|
||||
# Generate text
|
||||
response = await flow.text_completion(
|
||||
result = await flow.text_completion(
|
||||
system="You are a helpful assistant.",
|
||||
prompt="Explain quantum computing in simple terms."
|
||||
)
|
||||
print(response)
|
||||
print(result.text)
|
||||
print(f"Tokens: {result.in_token} in, {result.out_token} out")
|
||||
```
|
||||
"""
|
||||
request_data = {
|
||||
|
|
@ -473,7 +474,12 @@ class AsyncFlowInstance:
|
|||
request_data.update(kwargs)
|
||||
|
||||
result = await self.request("text-completion", request_data)
|
||||
return result.get("response", "")
|
||||
return TextCompletionResult(
|
||||
text=result.get("response", ""),
|
||||
in_token=result.get("in_token"),
|
||||
out_token=result.get("out_token"),
|
||||
model=result.get("model"),
|
||||
)
|
||||
|
||||
async def graph_rag(self, query: str, user: str, collection: str,
|
||||
max_subgraph_size: int = 1000, max_subgraph_count: int = 5,
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import asyncio
|
|||
import websockets
|
||||
from typing import Optional, Dict, Any, AsyncIterator, Union
|
||||
|
||||
from . types import AgentThought, AgentObservation, AgentAnswer, RAGChunk
|
||||
from . types import AgentThought, AgentObservation, AgentAnswer, RAGChunk, TextCompletionResult
|
||||
from . exceptions import ProtocolException, ApplicationException
|
||||
|
||||
|
||||
|
|
@ -178,30 +178,32 @@ class AsyncSocketClient:
|
|||
|
||||
def _parse_chunk(self, resp: Dict[str, Any]):
|
||||
"""Parse response chunk into appropriate type. Returns None for non-content messages."""
|
||||
chunk_type = resp.get("chunk_type")
|
||||
message_type = resp.get("message_type")
|
||||
|
||||
# Handle new GraphRAG message format with message_type
|
||||
if message_type == "provenance":
|
||||
return None
|
||||
|
||||
if chunk_type == "thought":
|
||||
if message_type == "thought":
|
||||
return AgentThought(
|
||||
content=resp.get("content", ""),
|
||||
end_of_message=resp.get("end_of_message", False)
|
||||
)
|
||||
elif chunk_type == "observation":
|
||||
elif message_type == "observation":
|
||||
return AgentObservation(
|
||||
content=resp.get("content", ""),
|
||||
end_of_message=resp.get("end_of_message", False)
|
||||
)
|
||||
elif chunk_type == "answer" or chunk_type == "final-answer":
|
||||
elif message_type == "answer" or message_type == "final-answer":
|
||||
return AgentAnswer(
|
||||
content=resp.get("content", ""),
|
||||
end_of_message=resp.get("end_of_message", False),
|
||||
end_of_dialog=resp.get("end_of_dialog", False)
|
||||
end_of_dialog=resp.get("end_of_dialog", False),
|
||||
in_token=resp.get("in_token"),
|
||||
out_token=resp.get("out_token"),
|
||||
model=resp.get("model"),
|
||||
)
|
||||
elif chunk_type == "action":
|
||||
elif message_type == "action":
|
||||
return AgentThought(
|
||||
content=resp.get("content", ""),
|
||||
end_of_message=resp.get("end_of_message", False)
|
||||
|
|
@ -211,7 +213,10 @@ class AsyncSocketClient:
|
|||
return RAGChunk(
|
||||
content=content,
|
||||
end_of_stream=resp.get("end_of_stream", False),
|
||||
error=None
|
||||
error=None,
|
||||
in_token=resp.get("in_token"),
|
||||
out_token=resp.get("out_token"),
|
||||
model=resp.get("model"),
|
||||
)
|
||||
|
||||
async def aclose(self):
|
||||
|
|
@ -269,7 +274,11 @@ class AsyncSocketFlowInstance:
|
|||
return await self.client._send_request("agent", self.flow_id, request)
|
||||
|
||||
async def text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs):
|
||||
"""Text completion with optional streaming"""
|
||||
"""Text completion with optional streaming.
|
||||
|
||||
Non-streaming: returns a TextCompletionResult with text and token counts.
|
||||
Streaming: returns an async iterator of RAGChunk (with token counts on the final chunk).
|
||||
"""
|
||||
request = {
|
||||
"system": system,
|
||||
"prompt": prompt,
|
||||
|
|
@ -281,13 +290,18 @@ class AsyncSocketFlowInstance:
|
|||
return self._text_completion_streaming(request)
|
||||
else:
|
||||
result = await self.client._send_request("text-completion", self.flow_id, request)
|
||||
return result.get("response", "")
|
||||
return TextCompletionResult(
|
||||
text=result.get("response", ""),
|
||||
in_token=result.get("in_token"),
|
||||
out_token=result.get("out_token"),
|
||||
model=result.get("model"),
|
||||
)
|
||||
|
||||
async def _text_completion_streaming(self, request):
|
||||
"""Helper for streaming text completion"""
|
||||
"""Helper for streaming text completion. Yields RAGChunk objects."""
|
||||
async for chunk in self.client._send_request_streaming("text-completion", self.flow_id, request):
|
||||
if hasattr(chunk, 'content'):
|
||||
yield chunk.content
|
||||
if isinstance(chunk, RAGChunk):
|
||||
yield chunk
|
||||
|
||||
async def graph_rag(self, query: str, user: str, collection: str,
|
||||
max_subgraph_size: int = 1000, max_subgraph_count: int = 5,
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import base64
|
|||
|
||||
from .. knowledge import hash, Uri, Literal, QuotedTriple
|
||||
from .. schema import IRI, LITERAL, TRIPLE
|
||||
from . types import Triple
|
||||
from . types import Triple, TextCompletionResult
|
||||
from . exceptions import ProtocolException
|
||||
|
||||
|
||||
|
|
@ -360,16 +360,17 @@ class FlowInstance:
|
|||
prompt: User prompt/question
|
||||
|
||||
Returns:
|
||||
str: Generated response text
|
||||
TextCompletionResult: Result with text, in_token, out_token, model
|
||||
|
||||
Example:
|
||||
```python
|
||||
flow = api.flow().id("default")
|
||||
response = flow.text_completion(
|
||||
result = flow.text_completion(
|
||||
system="You are a helpful assistant",
|
||||
prompt="What is quantum computing?"
|
||||
)
|
||||
print(response)
|
||||
print(result.text)
|
||||
print(f"Tokens: {result.in_token} in, {result.out_token} out")
|
||||
```
|
||||
"""
|
||||
|
||||
|
|
@ -379,10 +380,17 @@ class FlowInstance:
|
|||
"prompt": prompt
|
||||
}
|
||||
|
||||
return self.request(
|
||||
result = self.request(
|
||||
"service/text-completion",
|
||||
input
|
||||
)["response"]
|
||||
)
|
||||
|
||||
return TextCompletionResult(
|
||||
text=result.get("response", ""),
|
||||
in_token=result.get("in_token"),
|
||||
out_token=result.get("out_token"),
|
||||
model=result.get("model"),
|
||||
)
|
||||
|
||||
def agent(self, question, user="trustgraph", state=None, group=None, history=None):
|
||||
"""
|
||||
|
|
@ -498,10 +506,17 @@ class FlowInstance:
|
|||
"edge-limit": edge_limit,
|
||||
}
|
||||
|
||||
return self.request(
|
||||
result = self.request(
|
||||
"service/graph-rag",
|
||||
input
|
||||
)["response"]
|
||||
)
|
||||
|
||||
return TextCompletionResult(
|
||||
text=result.get("response", ""),
|
||||
in_token=result.get("in_token"),
|
||||
out_token=result.get("out_token"),
|
||||
model=result.get("model"),
|
||||
)
|
||||
|
||||
def document_rag(
|
||||
self, query, user="trustgraph", collection="default",
|
||||
|
|
@ -543,10 +558,17 @@ class FlowInstance:
|
|||
"doc-limit": doc_limit,
|
||||
}
|
||||
|
||||
return self.request(
|
||||
result = self.request(
|
||||
"service/document-rag",
|
||||
input
|
||||
)["response"]
|
||||
)
|
||||
|
||||
return TextCompletionResult(
|
||||
text=result.get("response", ""),
|
||||
in_token=result.get("in_token"),
|
||||
out_token=result.get("out_token"),
|
||||
model=result.get("model"),
|
||||
)
|
||||
|
||||
def embeddings(self, texts):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ import websockets
|
|||
from typing import Optional, Dict, Any, Iterator, Union, List
|
||||
from threading import Lock
|
||||
|
||||
from . types import AgentThought, AgentObservation, AgentAnswer, RAGChunk, StreamingChunk, ProvenanceEvent
|
||||
from . types import AgentThought, AgentObservation, AgentAnswer, RAGChunk, StreamingChunk, ProvenanceEvent, TextCompletionResult
|
||||
from . exceptions import ProtocolException, raise_from_error_dict
|
||||
|
||||
|
||||
|
|
@ -360,41 +360,36 @@ class SocketClient:
|
|||
|
||||
def _parse_chunk(self, resp: Dict[str, Any], include_provenance: bool = False) -> Optional[StreamingChunk]:
|
||||
"""Parse response chunk into appropriate type. Returns None for non-content messages."""
|
||||
chunk_type = resp.get("chunk_type")
|
||||
message_type = resp.get("message_type")
|
||||
|
||||
# Handle GraphRAG/DocRAG message format with message_type
|
||||
if message_type == "explain":
|
||||
if include_provenance:
|
||||
return self._build_provenance_event(resp)
|
||||
return None
|
||||
|
||||
# Handle Agent message format with chunk_type="explain"
|
||||
if chunk_type == "explain":
|
||||
if include_provenance:
|
||||
return self._build_provenance_event(resp)
|
||||
return None
|
||||
|
||||
if chunk_type == "thought":
|
||||
if message_type == "thought":
|
||||
return AgentThought(
|
||||
content=resp.get("content", ""),
|
||||
end_of_message=resp.get("end_of_message", False),
|
||||
message_id=resp.get("message_id", ""),
|
||||
)
|
||||
elif chunk_type == "observation":
|
||||
elif message_type == "observation":
|
||||
return AgentObservation(
|
||||
content=resp.get("content", ""),
|
||||
end_of_message=resp.get("end_of_message", False),
|
||||
message_id=resp.get("message_id", ""),
|
||||
)
|
||||
elif chunk_type == "answer" or chunk_type == "final-answer":
|
||||
elif message_type == "answer" or message_type == "final-answer":
|
||||
return AgentAnswer(
|
||||
content=resp.get("content", ""),
|
||||
end_of_message=resp.get("end_of_message", False),
|
||||
end_of_dialog=resp.get("end_of_dialog", False),
|
||||
message_id=resp.get("message_id", ""),
|
||||
in_token=resp.get("in_token"),
|
||||
out_token=resp.get("out_token"),
|
||||
model=resp.get("model"),
|
||||
)
|
||||
elif chunk_type == "action":
|
||||
elif message_type == "action":
|
||||
return AgentThought(
|
||||
content=resp.get("content", ""),
|
||||
end_of_message=resp.get("end_of_message", False)
|
||||
|
|
@ -404,7 +399,10 @@ class SocketClient:
|
|||
return RAGChunk(
|
||||
content=content,
|
||||
end_of_stream=resp.get("end_of_stream", False),
|
||||
error=None
|
||||
error=None,
|
||||
in_token=resp.get("in_token"),
|
||||
out_token=resp.get("out_token"),
|
||||
model=resp.get("model"),
|
||||
)
|
||||
|
||||
def _build_provenance_event(self, resp: Dict[str, Any]) -> ProvenanceEvent:
|
||||
|
|
@ -543,8 +541,12 @@ class SocketFlowInstance:
|
|||
streaming=True, include_provenance=True
|
||||
)
|
||||
|
||||
def text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs) -> Union[str, Iterator[str]]:
|
||||
"""Execute text completion with optional streaming."""
|
||||
def text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
|
||||
"""Execute text completion with optional streaming.
|
||||
|
||||
Non-streaming: returns a TextCompletionResult with text and token counts.
|
||||
Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
|
||||
"""
|
||||
request = {
|
||||
"system": system,
|
||||
"prompt": prompt,
|
||||
|
|
@ -557,12 +559,17 @@ class SocketFlowInstance:
|
|||
if streaming:
|
||||
return self._text_completion_generator(result)
|
||||
else:
|
||||
return result.get("response", "")
|
||||
return TextCompletionResult(
|
||||
text=result.get("response", ""),
|
||||
in_token=result.get("in_token"),
|
||||
out_token=result.get("out_token"),
|
||||
model=result.get("model"),
|
||||
)
|
||||
|
||||
def _text_completion_generator(self, result: Iterator[StreamingChunk]) -> Iterator[str]:
|
||||
def _text_completion_generator(self, result: Iterator[StreamingChunk]) -> Iterator[RAGChunk]:
|
||||
for chunk in result:
|
||||
if hasattr(chunk, 'content'):
|
||||
yield chunk.content
|
||||
if isinstance(chunk, RAGChunk):
|
||||
yield chunk
|
||||
|
||||
def graph_rag(
|
||||
self,
|
||||
|
|
@ -577,8 +584,12 @@ class SocketFlowInstance:
|
|||
edge_limit: int = 25,
|
||||
streaming: bool = False,
|
||||
**kwargs: Any
|
||||
) -> Union[str, Iterator[str]]:
|
||||
"""Execute graph-based RAG query with optional streaming."""
|
||||
) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
|
||||
"""Execute graph-based RAG query with optional streaming.
|
||||
|
||||
Non-streaming: returns a TextCompletionResult with text and token counts.
|
||||
Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
|
||||
"""
|
||||
request = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
|
|
@ -598,7 +609,12 @@ class SocketFlowInstance:
|
|||
if streaming:
|
||||
return self._rag_generator(result)
|
||||
else:
|
||||
return result.get("response", "")
|
||||
return TextCompletionResult(
|
||||
text=result.get("response", ""),
|
||||
in_token=result.get("in_token"),
|
||||
out_token=result.get("out_token"),
|
||||
model=result.get("model"),
|
||||
)
|
||||
|
||||
def graph_rag_explain(
|
||||
self,
|
||||
|
|
@ -642,8 +658,12 @@ class SocketFlowInstance:
|
|||
doc_limit: int = 10,
|
||||
streaming: bool = False,
|
||||
**kwargs: Any
|
||||
) -> Union[str, Iterator[str]]:
|
||||
"""Execute document-based RAG query with optional streaming."""
|
||||
) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
|
||||
"""Execute document-based RAG query with optional streaming.
|
||||
|
||||
Non-streaming: returns a TextCompletionResult with text and token counts.
|
||||
Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
|
||||
"""
|
||||
request = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
|
|
@ -658,7 +678,12 @@ class SocketFlowInstance:
|
|||
if streaming:
|
||||
return self._rag_generator(result)
|
||||
else:
|
||||
return result.get("response", "")
|
||||
return TextCompletionResult(
|
||||
text=result.get("response", ""),
|
||||
in_token=result.get("in_token"),
|
||||
out_token=result.get("out_token"),
|
||||
model=result.get("model"),
|
||||
)
|
||||
|
||||
def document_rag_explain(
|
||||
self,
|
||||
|
|
@ -684,10 +709,10 @@ class SocketFlowInstance:
|
|||
streaming=True, include_provenance=True
|
||||
)
|
||||
|
||||
def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[str]:
|
||||
def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[RAGChunk]:
|
||||
for chunk in result:
|
||||
if hasattr(chunk, 'content'):
|
||||
yield chunk.content
|
||||
if isinstance(chunk, RAGChunk):
|
||||
yield chunk
|
||||
|
||||
def prompt(
|
||||
self,
|
||||
|
|
@ -695,8 +720,12 @@ class SocketFlowInstance:
|
|||
variables: Dict[str, str],
|
||||
streaming: bool = False,
|
||||
**kwargs: Any
|
||||
) -> Union[str, Iterator[str]]:
|
||||
"""Execute a prompt template with optional streaming."""
|
||||
) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
|
||||
"""Execute a prompt template with optional streaming.
|
||||
|
||||
Non-streaming: returns a TextCompletionResult with text and token counts.
|
||||
Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
|
||||
"""
|
||||
request = {
|
||||
"id": id,
|
||||
"variables": variables,
|
||||
|
|
@ -709,7 +738,12 @@ class SocketFlowInstance:
|
|||
if streaming:
|
||||
return self._rag_generator(result)
|
||||
else:
|
||||
return result.get("response", "")
|
||||
return TextCompletionResult(
|
||||
text=result.get("text", result.get("response", "")),
|
||||
in_token=result.get("in_token"),
|
||||
out_token=result.get("out_token"),
|
||||
model=result.get("model"),
|
||||
)
|
||||
|
||||
def graph_embeddings_query(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -149,10 +149,10 @@ class AgentThought(StreamingChunk):
|
|||
Attributes:
|
||||
content: Agent's thought text
|
||||
end_of_message: True if this completes the current thought
|
||||
chunk_type: Always "thought"
|
||||
message_type: Always "thought"
|
||||
message_id: Provenance URI of the entity being built
|
||||
"""
|
||||
chunk_type: str = "thought"
|
||||
message_type: str = "thought"
|
||||
message_id: str = ""
|
||||
|
||||
@dataclasses.dataclass
|
||||
|
|
@ -166,10 +166,10 @@ class AgentObservation(StreamingChunk):
|
|||
Attributes:
|
||||
content: Observation text describing tool results
|
||||
end_of_message: True if this completes the current observation
|
||||
chunk_type: Always "observation"
|
||||
message_type: Always "observation"
|
||||
message_id: Provenance URI of the entity being built
|
||||
"""
|
||||
chunk_type: str = "observation"
|
||||
message_type: str = "observation"
|
||||
message_id: str = ""
|
||||
|
||||
@dataclasses.dataclass
|
||||
|
|
@ -184,11 +184,14 @@ class AgentAnswer(StreamingChunk):
|
|||
content: Answer text
|
||||
end_of_message: True if this completes the current answer segment
|
||||
end_of_dialog: True if this completes the entire agent interaction
|
||||
chunk_type: Always "final-answer"
|
||||
message_type: Always "final-answer"
|
||||
"""
|
||||
chunk_type: str = "final-answer"
|
||||
message_type: str = "final-answer"
|
||||
end_of_dialog: bool = False
|
||||
message_id: str = ""
|
||||
in_token: Optional[int] = None
|
||||
out_token: Optional[int] = None
|
||||
model: Optional[str] = None
|
||||
|
||||
@dataclasses.dataclass
|
||||
class RAGChunk(StreamingChunk):
|
||||
|
|
@ -202,11 +205,37 @@ class RAGChunk(StreamingChunk):
|
|||
content: Generated text content
|
||||
end_of_stream: True if this is the final chunk of the stream
|
||||
error: Optional error information if an error occurred
|
||||
chunk_type: Always "rag"
|
||||
in_token: Input token count (populated on the final chunk, 0 otherwise)
|
||||
out_token: Output token count (populated on the final chunk, 0 otherwise)
|
||||
model: Model identifier (populated on the final chunk, empty otherwise)
|
||||
message_type: Always "rag"
|
||||
"""
|
||||
chunk_type: str = "rag"
|
||||
message_type: str = "rag"
|
||||
end_of_stream: bool = False
|
||||
error: Optional[Dict[str, str]] = None
|
||||
in_token: Optional[int] = None
|
||||
out_token: Optional[int] = None
|
||||
model: Optional[str] = None
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TextCompletionResult:
|
||||
"""
|
||||
Result from a text completion request.
|
||||
|
||||
Returned by text_completion() in both streaming and non-streaming modes.
|
||||
In streaming mode, text is None (chunks are delivered via the iterator).
|
||||
In non-streaming mode, text contains the complete response.
|
||||
|
||||
Attributes:
|
||||
text: Complete response text (None in streaming mode)
|
||||
in_token: Input token count (None if not available)
|
||||
out_token: Output token count (None if not available)
|
||||
model: Model identifier (None if not available)
|
||||
"""
|
||||
text: Optional[str]
|
||||
in_token: Optional[int] = None
|
||||
out_token: Optional[int] = None
|
||||
model: Optional[str] = None
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ProvenanceEvent:
|
||||
|
|
|
|||
|
|
@ -18,8 +18,10 @@ from . librarian_client import LibrarianClient
|
|||
from . chunking_service import ChunkingService
|
||||
from . embeddings_service import EmbeddingsService
|
||||
from . embeddings_client import EmbeddingsClientSpec
|
||||
from . text_completion_client import TextCompletionClientSpec
|
||||
from . prompt_client import PromptClientSpec
|
||||
from . text_completion_client import (
|
||||
TextCompletionClientSpec, TextCompletionClient, TextCompletionResult,
|
||||
)
|
||||
from . prompt_client import PromptClientSpec, PromptClient, PromptResult
|
||||
from . triples_store_service import TriplesStoreService
|
||||
from . graph_embeddings_store_service import GraphEmbeddingsStoreService
|
||||
from . document_embeddings_store_service import DocumentEmbeddingsStoreService
|
||||
|
|
|
|||
|
|
@ -30,19 +30,19 @@ class AgentClient(RequestResponse):
|
|||
raise RuntimeError(resp.error.message)
|
||||
|
||||
# Handle thought chunks
|
||||
if resp.chunk_type == 'thought':
|
||||
if resp.message_type == 'thought':
|
||||
if think:
|
||||
await think(resp.content, resp.end_of_message)
|
||||
return False # Continue receiving
|
||||
|
||||
# Handle observation chunks
|
||||
if resp.chunk_type == 'observation':
|
||||
if resp.message_type == 'observation':
|
||||
if observe:
|
||||
await observe(resp.content, resp.end_of_message)
|
||||
return False # Continue receiving
|
||||
|
||||
# Handle answer chunks
|
||||
if resp.chunk_type == 'answer':
|
||||
if resp.message_type == 'answer':
|
||||
if resp.content:
|
||||
accumulated_answer.append(resp.content)
|
||||
if answer_callback:
|
||||
|
|
|
|||
|
|
@ -58,6 +58,18 @@ class BackendProducer(Protocol):
|
|||
class BackendConsumer(Protocol):
|
||||
"""Protocol for backend-specific consumer."""
|
||||
|
||||
def ensure_connected(self) -> None:
|
||||
"""
|
||||
Eagerly establish the underlying connection and bind the queue.
|
||||
|
||||
Backends that lazily connect on first receive() must implement this
|
||||
so that callers can guarantee the consumer is fully bound — and
|
||||
therefore able to receive responses — before any related request is
|
||||
published. Backends that connect at construction time may make this
|
||||
a no-op.
|
||||
"""
|
||||
...
|
||||
|
||||
def receive(self, timeout_millis: int = 2000) -> Message:
|
||||
"""
|
||||
Receive a message from the topic.
|
||||
|
|
|
|||
|
|
@ -88,14 +88,14 @@ class ChunkingService(FlowProcessor):
|
|||
chunk_overlap = default_chunk_overlap
|
||||
|
||||
try:
|
||||
cs = flow.parameters.get("chunk-size")
|
||||
cs = flow("chunk-size")
|
||||
if cs is not None:
|
||||
chunk_size = int(cs)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not parse chunk-size parameter: {e}")
|
||||
|
||||
try:
|
||||
co = flow.parameters.get("chunk-overlap")
|
||||
co = flow("chunk-overlap")
|
||||
if co is not None:
|
||||
chunk_overlap = int(co)
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -8,12 +8,51 @@ ensuring consistent log formats, levels, and command-line arguments.
|
|||
Supports dual output to console and Loki for centralized log aggregation.
|
||||
"""
|
||||
|
||||
import contextvars
|
||||
import logging
|
||||
import logging.handlers
|
||||
from queue import Queue
|
||||
import os
|
||||
|
||||
|
||||
# The current processor id for this task context. Read by
|
||||
# _ProcessorIdFilter to stamp every LogRecord with its owning
|
||||
# processor, and read by logging_loki's emitter via record.tags
|
||||
# to label log lines in Loki. ContextVar so asyncio subtasks
|
||||
# inherit their parent supervisor's processor id automatically.
|
||||
current_processor_id = contextvars.ContextVar(
|
||||
"current_processor_id", default="unknown"
|
||||
)
|
||||
|
||||
|
||||
def set_processor_id(pid):
|
||||
"""Set the processor id for the current task context.
|
||||
|
||||
All subsequent log records emitted from this task — and any
|
||||
asyncio tasks spawned from it — will be tagged with this id
|
||||
in the console format and in Loki labels.
|
||||
"""
|
||||
current_processor_id.set(pid)
|
||||
|
||||
|
||||
class _ProcessorIdFilter(logging.Filter):
|
||||
"""Stamps every LogRecord with processor_id from the contextvar.
|
||||
|
||||
Attaches two fields to each record:
|
||||
record.processor_id — used by the console format string
|
||||
record.tags — merged into Loki labels by logging_loki's
|
||||
emitter (it reads record.tags and combines
|
||||
with the handler's static tags)
|
||||
"""
|
||||
|
||||
def filter(self, record):
|
||||
pid = current_processor_id.get()
|
||||
record.processor_id = pid
|
||||
existing = getattr(record, "tags", None) or {}
|
||||
record.tags = {**existing, "processor": pid}
|
||||
return True
|
||||
|
||||
|
||||
def add_logging_args(parser):
|
||||
"""
|
||||
Add standard logging arguments to an argument parser.
|
||||
|
|
@ -87,12 +126,15 @@ def setup_logging(args):
|
|||
loki_url = args.get('loki_url', 'http://loki:3100/loki/api/v1/push')
|
||||
loki_username = args.get('loki_username')
|
||||
loki_password = args.get('loki_password')
|
||||
processor_id = args.get('id') # Processor identity (e.g., "config-svc", "text-completion")
|
||||
|
||||
try:
|
||||
from logging_loki import LokiHandler
|
||||
|
||||
# Create Loki handler with optional authentication and processor label
|
||||
# Create Loki handler with optional authentication. The
|
||||
# processor label is NOT baked in here — it's stamped onto
|
||||
# each record by _ProcessorIdFilter reading the task-local
|
||||
# contextvar, and logging_loki's emitter reads record.tags
|
||||
# to build per-record Loki labels.
|
||||
loki_handler_kwargs = {
|
||||
'url': loki_url,
|
||||
'version': "1",
|
||||
|
|
@ -101,10 +143,6 @@ def setup_logging(args):
|
|||
if loki_username and loki_password:
|
||||
loki_handler_kwargs['auth'] = (loki_username, loki_password)
|
||||
|
||||
# Add processor label if available (for consistency with Prometheus metrics)
|
||||
if processor_id:
|
||||
loki_handler_kwargs['tags'] = {'processor': processor_id}
|
||||
|
||||
loki_handler = LokiHandler(**loki_handler_kwargs)
|
||||
|
||||
# Wrap in QueueHandler for non-blocking operation
|
||||
|
|
@ -133,23 +171,44 @@ def setup_logging(args):
|
|||
print(f"WARNING: Failed to setup Loki logging: {e}")
|
||||
print("Continuing with console-only logging")
|
||||
|
||||
# Get processor ID for log formatting (use 'unknown' if not available)
|
||||
processor_id = args.get('id', 'unknown')
|
||||
|
||||
# Configure logging with all handlers
|
||||
# Use processor ID as the primary identifier in logs
|
||||
# Configure logging with all handlers. The processor id comes
|
||||
# from _ProcessorIdFilter (via contextvar) and is injected into
|
||||
# each record as record.processor_id. The format string reads
|
||||
# that attribute on every emit.
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, log_level.upper()),
|
||||
format=f'%(asctime)s - {processor_id} - %(levelname)s - %(message)s',
|
||||
format='%(asctime)s - %(processor_id)s - %(levelname)s - %(message)s',
|
||||
handlers=handlers,
|
||||
force=True # Force reconfiguration if already configured
|
||||
)
|
||||
|
||||
# Prevent recursive logging from Loki's HTTP client
|
||||
if loki_enabled and queue_listener:
|
||||
# Disable urllib3 logging to prevent infinite loop
|
||||
logging.getLogger('urllib3').setLevel(logging.WARNING)
|
||||
logging.getLogger('urllib3.connectionpool').setLevel(logging.WARNING)
|
||||
# Attach the processor-id filter to every handler so all records
|
||||
# passing through any sink get stamped (console, queue→loki,
|
||||
# future handlers). Filters on handlers run regardless of which
|
||||
# logger originated the record, so logs from pika, cassandra,
|
||||
# processor code, etc. all pass through it.
|
||||
processor_filter = _ProcessorIdFilter()
|
||||
for h in handlers:
|
||||
h.addFilter(processor_filter)
|
||||
|
||||
# Seed the contextvar from --id if one was supplied. In group
|
||||
# mode --id isn't present; the processor_group supervisor sets
|
||||
# it per task. In standalone mode AsyncProcessor.launch provides
|
||||
# it via argparse default.
|
||||
if args.get('id'):
|
||||
set_processor_id(args['id'])
|
||||
|
||||
# Silence noisy third-party library loggers. These emit INFO-level
|
||||
# chatter (connection churn, channel open/close, driver warnings) that
|
||||
# drowns the useful signal and can't be attributed to a specific
|
||||
# processor anyway. WARNING and above still propagate.
|
||||
for noisy in (
|
||||
'pika',
|
||||
'cassandra',
|
||||
'urllib3',
|
||||
'urllib3.connectionpool',
|
||||
):
|
||||
logging.getLogger(noisy).setLevel(logging.WARNING)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info(f"Logging configured with level: {log_level}")
|
||||
|
|
|
|||
204
trustgraph-base/trustgraph/base/processor_group.py
Normal file
204
trustgraph-base/trustgraph/base/processor_group.py
Normal file
|
|
@ -0,0 +1,204 @@
|
|||
|
||||
# Multi-processor group runner. Runs multiple AsyncProcessor descendants
|
||||
# as concurrent tasks inside a single process, sharing one event loop,
|
||||
# one Prometheus HTTP server, and one pub/sub backend pool.
|
||||
#
|
||||
# Intended for dev and resource-constrained deployments. Scale deployments
|
||||
# should continue to use per-processor endpoints.
|
||||
#
|
||||
# Group config is a YAML or JSON file with shape:
|
||||
#
|
||||
# processors:
|
||||
# - class: trustgraph.extract.kg.definitions.extract.Processor
|
||||
# params:
|
||||
# id: kg-extract-definitions
|
||||
# triples_batch_size: 1000
|
||||
# - class: trustgraph.chunking.recursive.Processor
|
||||
# params:
|
||||
# id: chunker-recursive
|
||||
#
|
||||
# Each entry's params are passed directly to the class constructor alongside
|
||||
# the shared taskgroup. Defaults live inside each processor class.
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import importlib
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
|
||||
from prometheus_client import start_http_server
|
||||
|
||||
from . logging import add_logging_args, setup_logging, set_processor_id
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _load_config(path):
|
||||
with open(path) as f:
|
||||
text = f.read()
|
||||
if path.endswith((".yaml", ".yml")):
|
||||
import yaml
|
||||
return yaml.safe_load(text)
|
||||
return json.loads(text)
|
||||
|
||||
|
||||
def _resolve_class(dotted):
|
||||
module_path, _, class_name = dotted.rpartition(".")
|
||||
if not module_path:
|
||||
raise ValueError(
|
||||
f"Processor class must be a dotted path, got {dotted!r}"
|
||||
)
|
||||
module = importlib.import_module(module_path)
|
||||
return getattr(module, class_name)
|
||||
|
||||
|
||||
RESTART_DELAY_SECONDS = 4
|
||||
|
||||
|
||||
async def _supervise(entry):
|
||||
"""Run one processor with its own nested TaskGroup, restarting on any
|
||||
failure. Each processor is isolated from its siblings — a crash here
|
||||
does not propagate to the outer group."""
|
||||
|
||||
pid = entry["params"]["id"]
|
||||
class_path = entry["class"]
|
||||
|
||||
# Stamp the contextvar for this supervisor task. Every log
|
||||
# record emitted from this task — and from any inner TaskGroup
|
||||
# child created by the processor — inherits this id via
|
||||
# contextvar propagation. Siblings in the outer group set
|
||||
# their own id in their own task context and do not interfere.
|
||||
set_processor_id(pid)
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
async with asyncio.TaskGroup() as inner_tg:
|
||||
|
||||
cls = _resolve_class(class_path)
|
||||
params = dict(entry.get("params", {}))
|
||||
params["taskgroup"] = inner_tg
|
||||
|
||||
logger.info(f"Starting {class_path} as {pid}")
|
||||
|
||||
p = cls(**params)
|
||||
await p.start()
|
||||
inner_tg.create_task(p.run())
|
||||
|
||||
# Clean exit — processor's run() returned without raising.
|
||||
# Treat as a transient shutdown and restart, matching the
|
||||
# behaviour of per-container `restart: on-failure`.
|
||||
logger.warning(
|
||||
f"Processor {pid} exited cleanly, will restart"
|
||||
)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info(f"Processor {pid} cancelled")
|
||||
raise
|
||||
|
||||
except BaseExceptionGroup as eg:
|
||||
for e in eg.exceptions:
|
||||
logger.error(
|
||||
f"Processor {pid} failure: {type(e).__name__}: {e}",
|
||||
exc_info=e,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Processor {pid} failure: {type(e).__name__}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Restarting {pid} in {RESTART_DELAY_SECONDS}s..."
|
||||
)
|
||||
await asyncio.sleep(RESTART_DELAY_SECONDS)
|
||||
|
||||
|
||||
async def run_group(config):
|
||||
|
||||
entries = config.get("processors", [])
|
||||
if not entries:
|
||||
raise RuntimeError("Group config has no processors")
|
||||
|
||||
seen_ids = set()
|
||||
for entry in entries:
|
||||
pid = entry.get("params", {}).get("id")
|
||||
if pid is None:
|
||||
raise RuntimeError(
|
||||
f"Entry {entry.get('class')!r} missing params.id — "
|
||||
f"required for metrics labelling"
|
||||
)
|
||||
if pid in seen_ids:
|
||||
raise RuntimeError(f"Duplicate processor id {pid!r} in group")
|
||||
seen_ids.add(pid)
|
||||
|
||||
async with asyncio.TaskGroup() as outer_tg:
|
||||
for entry in entries:
|
||||
outer_tg.create_task(_supervise(entry))
|
||||
|
||||
|
||||
def run():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="processor-group",
|
||||
description="Run multiple processors as tasks in one process",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-c", "--config",
|
||||
required=True,
|
||||
help="Path to group config file (JSON or YAML)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--metrics",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=True,
|
||||
help="Metrics enabled (default: true)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-P", "--metrics-port",
|
||||
type=int,
|
||||
default=8000,
|
||||
help="Prometheus metrics port (default: 8000)",
|
||||
)
|
||||
|
||||
add_logging_args(parser)
|
||||
|
||||
args = vars(parser.parse_args())
|
||||
|
||||
setup_logging(args)
|
||||
|
||||
config = _load_config(args["config"])
|
||||
|
||||
if args["metrics"]:
|
||||
start_http_server(args["metrics_port"])
|
||||
|
||||
while True:
|
||||
|
||||
logger.info("Starting group...")
|
||||
|
||||
try:
|
||||
asyncio.run(run_group(config))
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Keyboard interrupt.")
|
||||
return
|
||||
|
||||
except ExceptionGroup as e:
|
||||
logger.error("Exception group:")
|
||||
for se in e.exceptions:
|
||||
logger.error(f" Type: {type(se)}")
|
||||
logger.error(f" Exception: {se}", exc_info=se)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Type: {type(e)}")
|
||||
logger.error(f"Exception: {e}", exc_info=True)
|
||||
|
||||
logger.warning("Will retry...")
|
||||
time.sleep(4)
|
||||
logger.info("Retrying...")
|
||||
|
|
@ -1,10 +1,22 @@
|
|||
|
||||
import json
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Any
|
||||
|
||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||
from .. schema import PromptRequest, PromptResponse
|
||||
|
||||
@dataclass
|
||||
class PromptResult:
|
||||
response_type: str # "text", "json", or "jsonl"
|
||||
text: Optional[str] = None # populated for "text"
|
||||
object: Any = None # populated for "json"
|
||||
objects: Optional[list] = None # populated for "jsonl"
|
||||
in_token: Optional[int] = None
|
||||
out_token: Optional[int] = None
|
||||
model: Optional[str] = None
|
||||
|
||||
class PromptClient(RequestResponse):
|
||||
|
||||
async def prompt(self, id, variables, timeout=600, streaming=False, chunk_callback=None):
|
||||
|
|
@ -26,17 +38,40 @@ class PromptClient(RequestResponse):
|
|||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
||||
if resp.text: return resp.text
|
||||
if resp.text:
|
||||
return PromptResult(
|
||||
response_type="text",
|
||||
text=resp.text,
|
||||
in_token=resp.in_token,
|
||||
out_token=resp.out_token,
|
||||
model=resp.model,
|
||||
)
|
||||
|
||||
return json.loads(resp.object)
|
||||
parsed = json.loads(resp.object)
|
||||
|
||||
if isinstance(parsed, list):
|
||||
return PromptResult(
|
||||
response_type="jsonl",
|
||||
objects=parsed,
|
||||
in_token=resp.in_token,
|
||||
out_token=resp.out_token,
|
||||
model=resp.model,
|
||||
)
|
||||
|
||||
return PromptResult(
|
||||
response_type="json",
|
||||
object=parsed,
|
||||
in_token=resp.in_token,
|
||||
out_token=resp.out_token,
|
||||
model=resp.model,
|
||||
)
|
||||
|
||||
else:
|
||||
|
||||
last_text = ""
|
||||
last_object = None
|
||||
last_resp = None
|
||||
|
||||
async def forward_chunks(resp):
|
||||
nonlocal last_text, last_object
|
||||
nonlocal last_resp
|
||||
|
||||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
|
@ -44,14 +79,13 @@ class PromptClient(RequestResponse):
|
|||
end_stream = getattr(resp, 'end_of_stream', False)
|
||||
|
||||
if resp.text is not None:
|
||||
last_text = resp.text
|
||||
if chunk_callback:
|
||||
if asyncio.iscoroutinefunction(chunk_callback):
|
||||
await chunk_callback(resp.text, end_stream)
|
||||
else:
|
||||
chunk_callback(resp.text, end_stream)
|
||||
elif resp.object:
|
||||
last_object = resp.object
|
||||
|
||||
last_resp = resp
|
||||
|
||||
return end_stream
|
||||
|
||||
|
|
@ -70,10 +104,36 @@ class PromptClient(RequestResponse):
|
|||
timeout=timeout
|
||||
)
|
||||
|
||||
if last_text:
|
||||
return last_text
|
||||
if last_resp is None:
|
||||
return PromptResult(response_type="text")
|
||||
|
||||
return json.loads(last_object) if last_object else None
|
||||
if last_resp.object:
|
||||
parsed = json.loads(last_resp.object)
|
||||
|
||||
if isinstance(parsed, list):
|
||||
return PromptResult(
|
||||
response_type="jsonl",
|
||||
objects=parsed,
|
||||
in_token=last_resp.in_token,
|
||||
out_token=last_resp.out_token,
|
||||
model=last_resp.model,
|
||||
)
|
||||
|
||||
return PromptResult(
|
||||
response_type="json",
|
||||
object=parsed,
|
||||
in_token=last_resp.in_token,
|
||||
out_token=last_resp.out_token,
|
||||
model=last_resp.model,
|
||||
)
|
||||
|
||||
return PromptResult(
|
||||
response_type="text",
|
||||
text=last_resp.text,
|
||||
in_token=last_resp.in_token,
|
||||
out_token=last_resp.out_token,
|
||||
model=last_resp.model,
|
||||
)
|
||||
|
||||
async def extract_definitions(self, text, timeout=600):
|
||||
return await self.prompt(
|
||||
|
|
@ -152,4 +212,3 @@ class PromptClientSpec(RequestResponseSpec):
|
|||
response_schema = PromptResponse,
|
||||
impl = PromptClient,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -72,6 +72,16 @@ class PulsarBackendConsumer:
|
|||
self._consumer = pulsar_consumer
|
||||
self._schema_cls = schema_cls
|
||||
|
||||
def ensure_connected(self) -> None:
|
||||
"""No-op for Pulsar.
|
||||
|
||||
PulsarBackend.create_consumer() calls client.subscribe() which is
|
||||
synchronous and returns a fully-subscribed consumer, so the
|
||||
consumer is already ready by the time this object is constructed.
|
||||
Defined for parity with the BackendConsumer protocol used by
|
||||
Subscriber.start()'s readiness barrier."""
|
||||
pass
|
||||
|
||||
def receive(self, timeout_millis: int = 2000) -> Message:
|
||||
"""Receive a message. Raises TimeoutError if no message available."""
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -214,16 +214,43 @@ class RabbitMQBackendConsumer:
|
|||
and self._channel.is_open
|
||||
)
|
||||
|
||||
def ensure_connected(self) -> None:
|
||||
"""Eagerly declare and bind the queue.
|
||||
|
||||
Without this, the queue is only declared lazily on the first
|
||||
receive() call. For request/response with ephemeral per-subscriber
|
||||
response queues that is a race: a request published before the
|
||||
response queue is bound will have its reply silently dropped by
|
||||
the broker. Subscriber.start() calls this so callers get a hard
|
||||
readiness barrier."""
|
||||
if not self._is_alive():
|
||||
self._connect()
|
||||
|
||||
def receive(self, timeout_millis: int = 2000) -> Message:
|
||||
"""Receive a message. Raises TimeoutError if none available."""
|
||||
"""Receive a message. Raises TimeoutError if none available.
|
||||
|
||||
Loop ordering matters: check _incoming at the TOP of each
|
||||
iteration, not as the loop condition. process_data_events
|
||||
may dispatch a message via the _on_message callback during
|
||||
the pump; we must re-check _incoming on the next iteration
|
||||
before giving up on the deadline. The previous control
|
||||
flow (`while deadline: check; pump`) could lose a wakeup if
|
||||
the pump consumed the remainder of the window — the
|
||||
`while` check would fail before `_incoming` was re-read,
|
||||
leaving a just-dispatched message stranded until the next
|
||||
receive() call one full poll cycle later.
|
||||
"""
|
||||
if not self._is_alive():
|
||||
self._connect()
|
||||
|
||||
timeout_seconds = timeout_millis / 1000.0
|
||||
deadline = time.monotonic() + timeout_seconds
|
||||
|
||||
while time.monotonic() < deadline:
|
||||
# Check if a message was already delivered
|
||||
while True:
|
||||
# Check if a message has been dispatched to our queue.
|
||||
# This catches both (a) messages dispatched before this
|
||||
# receive() was called and (b) messages dispatched
|
||||
# during the previous iteration's process_data_events.
|
||||
try:
|
||||
method, properties, body = self._incoming.get_nowait()
|
||||
return RabbitMQMessage(
|
||||
|
|
@ -232,14 +259,16 @@ class RabbitMQBackendConsumer:
|
|||
except queue.Empty:
|
||||
pass
|
||||
|
||||
# Drive pika's I/O — delivers messages and processes heartbeats
|
||||
remaining = deadline - time.monotonic()
|
||||
if remaining > 0:
|
||||
self._connection.process_data_events(
|
||||
time_limit=min(0.1, remaining),
|
||||
)
|
||||
if remaining <= 0:
|
||||
raise TimeoutError("No message received within timeout")
|
||||
|
||||
raise TimeoutError("No message received within timeout")
|
||||
# Drive pika's I/O. Any messages delivered during this
|
||||
# call land in _incoming via _on_message; the next
|
||||
# iteration of this loop catches them at the top.
|
||||
self._connection.process_data_events(
|
||||
time_limit=min(0.1, remaining),
|
||||
)
|
||||
|
||||
def acknowledge(self, message: Message) -> None:
|
||||
if isinstance(message, RabbitMQMessage) and message._method:
|
||||
|
|
|
|||
|
|
@ -41,14 +41,55 @@ class Subscriber:
|
|||
self.consumer = None
|
||||
self.executor = None
|
||||
|
||||
# Readiness barrier — completed by run() once the underlying
|
||||
# backend consumer is fully connected and bound. start() awaits
|
||||
# this so callers know any subsequently published request will
|
||||
# have a queue ready to receive its response. Without this,
|
||||
# ephemeral per-subscriber response queues (RabbitMQ auto-delete
|
||||
# exclusive queues) would race the request and lose the reply.
|
||||
# A Future is used (rather than an Event) so that a first-attempt
|
||||
# connection failure can be propagated to start() as an exception.
|
||||
self._ready = None # created in start() so we have a running loop
|
||||
|
||||
def __del__(self):
|
||||
|
||||
self.running = False
|
||||
|
||||
async def start(self):
|
||||
|
||||
self._ready = asyncio.get_event_loop().create_future()
|
||||
self.task = asyncio.create_task(self.run())
|
||||
|
||||
# Block until run() signals readiness OR exits. The future
|
||||
# carries the outcome of the first connect attempt: a value on
|
||||
# success, an exception on first-attempt failure. If run() exits
|
||||
# without ever signalling (e.g. cancelled, or a code path bug),
|
||||
# we surface that as a clear RuntimeError rather than hanging
|
||||
# forever waiting on the future.
|
||||
ready_wait = asyncio.ensure_future(
|
||||
asyncio.shield(self._ready)
|
||||
)
|
||||
try:
|
||||
await asyncio.wait(
|
||||
{self.task, ready_wait},
|
||||
return_when=asyncio.FIRST_COMPLETED,
|
||||
)
|
||||
finally:
|
||||
ready_wait.cancel()
|
||||
|
||||
if self._ready.done():
|
||||
# Re-raise first-attempt connect failure if any.
|
||||
self._ready.result()
|
||||
return
|
||||
|
||||
# run() exited before _ready was settled. Propagate its exception
|
||||
# if it had one, otherwise raise a generic readiness error.
|
||||
if self.task.done() and self.task.exception() is not None:
|
||||
raise self.task.exception()
|
||||
raise RuntimeError(
|
||||
"Subscriber.run() exited before signalling readiness"
|
||||
)
|
||||
|
||||
async def stop(self):
|
||||
"""Initiate graceful shutdown with draining"""
|
||||
self.running = False
|
||||
|
|
@ -66,6 +107,7 @@ class Subscriber:
|
|||
|
||||
async def run(self):
|
||||
"""Enhanced run method with integrated draining logic"""
|
||||
first_attempt = True
|
||||
while self.running or self.draining:
|
||||
|
||||
if self.metrics:
|
||||
|
|
@ -87,10 +129,27 @@ class Subscriber:
|
|||
),
|
||||
)
|
||||
|
||||
# Eagerly bind the queue. For backends that connect
|
||||
# lazily on first receive (RabbitMQ), this is what
|
||||
# closes the request/response setup race — without
|
||||
# it the response queue is not bound until later and
|
||||
# any reply published in the meantime is dropped.
|
||||
await loop.run_in_executor(
|
||||
self.executor,
|
||||
lambda: self.consumer.ensure_connected(),
|
||||
)
|
||||
|
||||
if self.metrics:
|
||||
self.metrics.state("running")
|
||||
|
||||
logger.info("Subscriber running...")
|
||||
|
||||
# Signal start() that the consumer is ready. This must
|
||||
# happen AFTER ensure_connected() above so callers can
|
||||
# safely publish requests immediately after start() returns.
|
||||
if first_attempt and not self._ready.done():
|
||||
self._ready.set_result(None)
|
||||
first_attempt = False
|
||||
drain_end_time = None
|
||||
|
||||
while self.running or self.draining:
|
||||
|
|
@ -162,6 +221,16 @@ class Subscriber:
|
|||
except Exception as e:
|
||||
logger.error(f"Subscriber exception: {e}", exc_info=True)
|
||||
|
||||
# First-attempt connection failure: propagate to start()
|
||||
# so the caller can decide what to do (retry, give up).
|
||||
# Subsequent failures use the existing retry-with-backoff
|
||||
# path so a long-lived subscriber survives broker blips.
|
||||
if first_attempt and not self._ready.done():
|
||||
self._ready.set_exception(e)
|
||||
first_attempt = False
|
||||
# Falls through into finally for cleanup, then the
|
||||
# outer return below ends run() so start() unblocks.
|
||||
|
||||
finally:
|
||||
# Negative acknowledge any pending messages
|
||||
for msg in self.pending_acks.values():
|
||||
|
|
@ -193,6 +262,11 @@ class Subscriber:
|
|||
if not self.running and not self.draining:
|
||||
return
|
||||
|
||||
# If start() has already returned with an exception there is
|
||||
# nothing more to do — exit run() rather than busy-retry.
|
||||
if self._ready.done() and self._ready.exception() is not None:
|
||||
return
|
||||
|
||||
# Sleep before retry
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,47 +1,71 @@
|
|||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||
from .. schema import TextCompletionRequest, TextCompletionResponse
|
||||
|
||||
@dataclass
|
||||
class TextCompletionResult:
|
||||
text: Optional[str]
|
||||
in_token: Optional[int] = None
|
||||
out_token: Optional[int] = None
|
||||
model: Optional[str] = None
|
||||
|
||||
class TextCompletionClient(RequestResponse):
|
||||
async def text_completion(self, system, prompt, streaming=False, timeout=600):
|
||||
# If not streaming, use original behavior
|
||||
if not streaming:
|
||||
resp = await self.request(
|
||||
TextCompletionRequest(
|
||||
system = system, prompt = prompt, streaming = False
|
||||
),
|
||||
timeout=timeout
|
||||
)
|
||||
|
||||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
async def text_completion(self, system, prompt, timeout=600):
|
||||
|
||||
return resp.response
|
||||
|
||||
# For streaming: collect all chunks and return complete response
|
||||
full_response = ""
|
||||
|
||||
async def collect_chunks(resp):
|
||||
nonlocal full_response
|
||||
|
||||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
||||
if resp.response:
|
||||
full_response += resp.response
|
||||
|
||||
# Return True when end_of_stream is reached
|
||||
return getattr(resp, 'end_of_stream', False)
|
||||
|
||||
await self.request(
|
||||
resp = await self.request(
|
||||
TextCompletionRequest(
|
||||
system = system, prompt = prompt, streaming = True
|
||||
system = system, prompt = prompt, streaming = False
|
||||
),
|
||||
recipient=collect_chunks,
|
||||
timeout=timeout
|
||||
)
|
||||
|
||||
return full_response
|
||||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
||||
return TextCompletionResult(
|
||||
text = resp.response,
|
||||
in_token = resp.in_token,
|
||||
out_token = resp.out_token,
|
||||
model = resp.model,
|
||||
)
|
||||
|
||||
async def text_completion_stream(
|
||||
self, system, prompt, handler, timeout=600,
|
||||
):
|
||||
"""
|
||||
Streaming text completion. `handler` is an async callable invoked
|
||||
once per chunk with the chunk's TextCompletionResponse. Returns a
|
||||
TextCompletionResult with text=None and token counts / model taken
|
||||
from the end_of_stream message.
|
||||
"""
|
||||
|
||||
async def on_chunk(resp):
|
||||
|
||||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
||||
await handler(resp)
|
||||
|
||||
return getattr(resp, "end_of_stream", False)
|
||||
|
||||
final = await self.request(
|
||||
TextCompletionRequest(
|
||||
system = system, prompt = prompt, streaming = True
|
||||
),
|
||||
recipient=on_chunk,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
return TextCompletionResult(
|
||||
text = None,
|
||||
in_token = final.in_token,
|
||||
out_token = final.out_token,
|
||||
model = final.model,
|
||||
)
|
||||
|
||||
class TextCompletionClientSpec(RequestResponseSpec):
|
||||
def __init__(
|
||||
|
|
@ -54,4 +78,3 @@ class TextCompletionClientSpec(RequestResponseSpec):
|
|||
response_schema = TextCompletionResponse,
|
||||
impl = TextCompletionClient,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -58,23 +58,23 @@ class AgentClient(BaseClient):
|
|||
|
||||
def inspect(x):
|
||||
# Handle errors
|
||||
if x.chunk_type == 'error' or x.error:
|
||||
if x.message_type == 'error' or x.error:
|
||||
if error_callback:
|
||||
error_callback(x.content or (x.error.message if x.error else ""))
|
||||
# Continue to check end_of_dialog
|
||||
|
||||
# Handle thought chunks
|
||||
elif x.chunk_type == 'thought':
|
||||
elif x.message_type == 'thought':
|
||||
if think:
|
||||
think(x.content, x.end_of_message)
|
||||
|
||||
# Handle observation chunks
|
||||
elif x.chunk_type == 'observation':
|
||||
elif x.message_type == 'observation':
|
||||
if observe:
|
||||
observe(x.content, x.end_of_message)
|
||||
|
||||
# Handle answer chunks
|
||||
elif x.chunk_type == 'answer':
|
||||
elif x.message_type == 'answer':
|
||||
if x.content:
|
||||
accumulated_answer.append(x.content)
|
||||
if answer_callback:
|
||||
|
|
|
|||
|
|
@ -60,8 +60,8 @@ class AgentResponseTranslator(MessageTranslator):
|
|||
def encode(self, obj: AgentResponse) -> Dict[str, Any]:
|
||||
result = {}
|
||||
|
||||
if obj.chunk_type:
|
||||
result["chunk_type"] = obj.chunk_type
|
||||
if obj.message_type:
|
||||
result["message_type"] = obj.message_type
|
||||
if obj.content:
|
||||
result["content"] = obj.content
|
||||
result["end_of_message"] = getattr(obj, "end_of_message", False)
|
||||
|
|
@ -90,6 +90,13 @@ class AgentResponseTranslator(MessageTranslator):
|
|||
if hasattr(obj, 'error') and obj.error and obj.error.message:
|
||||
result["error"] = {"message": obj.error.message, "code": obj.error.code}
|
||||
|
||||
if obj.in_token is not None:
|
||||
result["in_token"] = obj.in_token
|
||||
if obj.out_token is not None:
|
||||
result["out_token"] = obj.out_token
|
||||
if obj.model is not None:
|
||||
result["model"] = obj.model
|
||||
|
||||
return result
|
||||
|
||||
def encode_with_completion(self, obj: AgentResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ class DocumentEmbeddingsTranslator(SendTranslator):
|
|||
chunks = [
|
||||
ChunkEmbeddings(
|
||||
chunk_id=chunk["chunk_id"],
|
||||
vectors=chunk["vectors"]
|
||||
vector=chunk["vector"]
|
||||
)
|
||||
for chunk in data.get("chunks", [])
|
||||
]
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
entities=[
|
||||
EntityEmbeddings(
|
||||
entity=self.value_translator.decode(ent["entity"]),
|
||||
vectors=ent["vectors"],
|
||||
vector=ent["vector"],
|
||||
)
|
||||
for ent in data["graph-embeddings"]["entities"]
|
||||
]
|
||||
|
|
|
|||
|
|
@ -53,6 +53,13 @@ class PromptResponseTranslator(MessageTranslator):
|
|||
# Always include end_of_stream flag for streaming support
|
||||
result["end_of_stream"] = getattr(obj, "end_of_stream", False)
|
||||
|
||||
if obj.in_token is not None:
|
||||
result["in_token"] = obj.in_token
|
||||
if obj.out_token is not None:
|
||||
result["out_token"] = obj.out_token
|
||||
if obj.model is not None:
|
||||
result["model"] = obj.model
|
||||
|
||||
return result
|
||||
|
||||
def encode_with_completion(self, obj: PromptResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
|
|
|
|||
|
|
@ -74,6 +74,13 @@ class DocumentRagResponseTranslator(MessageTranslator):
|
|||
if hasattr(obj, 'error') and obj.error and obj.error.message:
|
||||
result["error"] = {"message": obj.error.message, "type": obj.error.type}
|
||||
|
||||
if obj.in_token is not None:
|
||||
result["in_token"] = obj.in_token
|
||||
if obj.out_token is not None:
|
||||
result["out_token"] = obj.out_token
|
||||
if obj.model is not None:
|
||||
result["model"] = obj.model
|
||||
|
||||
return result
|
||||
|
||||
def encode_with_completion(self, obj: DocumentRagResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
|
|
@ -163,6 +170,13 @@ class GraphRagResponseTranslator(MessageTranslator):
|
|||
if hasattr(obj, 'error') and obj.error and obj.error.message:
|
||||
result["error"] = {"message": obj.error.message, "type": obj.error.type}
|
||||
|
||||
if obj.in_token is not None:
|
||||
result["in_token"] = obj.in_token
|
||||
if obj.out_token is not None:
|
||||
result["out_token"] = obj.out_token
|
||||
if obj.model is not None:
|
||||
result["model"] = obj.model
|
||||
|
||||
return result
|
||||
|
||||
def encode_with_completion(self, obj: GraphRagResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
|
|
|
|||
|
|
@ -29,11 +29,11 @@ class TextCompletionResponseTranslator(MessageTranslator):
|
|||
def encode(self, obj: TextCompletionResponse) -> Dict[str, Any]:
|
||||
result = {"response": obj.response}
|
||||
|
||||
if obj.in_token:
|
||||
if obj.in_token is not None:
|
||||
result["in_token"] = obj.in_token
|
||||
if obj.out_token:
|
||||
if obj.out_token is not None:
|
||||
result["out_token"] = obj.out_token
|
||||
if obj.model:
|
||||
if obj.model is not None:
|
||||
result["model"] = obj.model
|
||||
|
||||
# Always include end_of_stream flag for streaming support
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ from . uris import (
|
|||
agent_plan_uri,
|
||||
agent_step_result_uri,
|
||||
agent_synthesis_uri,
|
||||
agent_pattern_decision_uri,
|
||||
# Document RAG provenance URIs
|
||||
docrag_question_uri,
|
||||
docrag_grounding_uri,
|
||||
|
|
@ -102,6 +103,11 @@ from . namespaces import (
|
|||
# Agent provenance predicates
|
||||
TG_THOUGHT, TG_ACTION, TG_ARGUMENTS, TG_OBSERVATION,
|
||||
TG_SUBAGENT_GOAL, TG_PLAN_STEP,
|
||||
TG_TOOL_CANDIDATE, TG_TERMINATION_REASON,
|
||||
TG_STEP_NUMBER, TG_PATTERN_DECISION, TG_PATTERN, TG_TASK_TYPE,
|
||||
TG_LLM_DURATION_MS, TG_TOOL_DURATION_MS, TG_TOOL_ERROR,
|
||||
TG_IN_TOKEN, TG_OUT_TOKEN,
|
||||
TG_ERROR_TYPE,
|
||||
# Orchestrator entity types
|
||||
TG_DECOMPOSITION, TG_FINDING, TG_PLAN_TYPE, TG_STEP_RESULT,
|
||||
# Document reference predicate
|
||||
|
|
@ -141,6 +147,7 @@ from . agent import (
|
|||
agent_plan_triples,
|
||||
agent_step_result_triples,
|
||||
agent_synthesis_triples,
|
||||
agent_pattern_decision_triples,
|
||||
)
|
||||
|
||||
# Vocabulary bootstrap
|
||||
|
|
@ -182,6 +189,7 @@ __all__ = [
|
|||
"agent_plan_uri",
|
||||
"agent_step_result_uri",
|
||||
"agent_synthesis_uri",
|
||||
"agent_pattern_decision_uri",
|
||||
# Document RAG provenance URIs
|
||||
"docrag_question_uri",
|
||||
"docrag_grounding_uri",
|
||||
|
|
@ -218,6 +226,11 @@ __all__ = [
|
|||
# Agent provenance predicates
|
||||
"TG_THOUGHT", "TG_ACTION", "TG_ARGUMENTS", "TG_OBSERVATION",
|
||||
"TG_SUBAGENT_GOAL", "TG_PLAN_STEP",
|
||||
"TG_TOOL_CANDIDATE", "TG_TERMINATION_REASON",
|
||||
"TG_STEP_NUMBER", "TG_PATTERN_DECISION", "TG_PATTERN", "TG_TASK_TYPE",
|
||||
"TG_LLM_DURATION_MS", "TG_TOOL_DURATION_MS", "TG_TOOL_ERROR",
|
||||
"TG_IN_TOKEN", "TG_OUT_TOKEN",
|
||||
"TG_ERROR_TYPE",
|
||||
# Orchestrator entity types
|
||||
"TG_DECOMPOSITION", "TG_FINDING", "TG_PLAN_TYPE", "TG_STEP_RESULT",
|
||||
# Document reference predicate
|
||||
|
|
@ -249,6 +262,7 @@ __all__ = [
|
|||
"agent_plan_triples",
|
||||
"agent_step_result_triples",
|
||||
"agent_synthesis_triples",
|
||||
"agent_pattern_decision_triples",
|
||||
# Utility
|
||||
"set_graph",
|
||||
# Vocabulary
|
||||
|
|
|
|||
|
|
@ -29,6 +29,11 @@ from . namespaces import (
|
|||
TG_AGENT_QUESTION,
|
||||
TG_DECOMPOSITION, TG_FINDING, TG_PLAN_TYPE, TG_STEP_RESULT,
|
||||
TG_SYNTHESIS, TG_SUBAGENT_GOAL, TG_PLAN_STEP,
|
||||
TG_TOOL_CANDIDATE, TG_TERMINATION_REASON,
|
||||
TG_STEP_NUMBER, TG_PATTERN_DECISION, TG_PATTERN, TG_TASK_TYPE,
|
||||
TG_LLM_DURATION_MS, TG_TOOL_DURATION_MS, TG_TOOL_ERROR,
|
||||
TG_ERROR_TYPE,
|
||||
TG_IN_TOKEN, TG_OUT_TOKEN, TG_LLM_MODEL,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -47,6 +52,17 @@ def _triple(s: str, p: str, o_term: Term) -> Triple:
|
|||
return Triple(s=_iri(s), p=_iri(p), o=o_term)
|
||||
|
||||
|
||||
def _append_token_triples(triples, uri, in_token=None, out_token=None,
|
||||
model=None):
|
||||
"""Append in_token/out_token/model triples when values are present."""
|
||||
if in_token is not None:
|
||||
triples.append(_triple(uri, TG_IN_TOKEN, _literal(str(in_token))))
|
||||
if out_token is not None:
|
||||
triples.append(_triple(uri, TG_OUT_TOKEN, _literal(str(out_token))))
|
||||
if model is not None:
|
||||
triples.append(_triple(uri, TG_LLM_MODEL, _literal(model)))
|
||||
|
||||
|
||||
def agent_session_triples(
|
||||
session_uri: str,
|
||||
query: str,
|
||||
|
|
@ -90,6 +106,43 @@ def agent_session_triples(
|
|||
return triples
|
||||
|
||||
|
||||
def agent_pattern_decision_triples(
|
||||
uri: str,
|
||||
session_uri: str,
|
||||
pattern: str,
|
||||
task_type: str = "",
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for a meta-router pattern decision.
|
||||
|
||||
Creates:
|
||||
- Entity declaration with tg:PatternDecision type
|
||||
- wasDerivedFrom link to session
|
||||
- Pattern and task type predicates
|
||||
|
||||
Args:
|
||||
uri: URI of this decision (from agent_pattern_decision_uri)
|
||||
session_uri: URI of the parent session
|
||||
pattern: Selected execution pattern (e.g. "react", "plan-then-execute")
|
||||
task_type: Identified task type (e.g. "general", "research")
|
||||
|
||||
Returns:
|
||||
List of Triple objects
|
||||
"""
|
||||
triples = [
|
||||
_triple(uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
_triple(uri, RDF_TYPE, _iri(TG_PATTERN_DECISION)),
|
||||
_triple(uri, RDFS_LABEL, _literal(f"Pattern: {pattern}")),
|
||||
_triple(uri, TG_PATTERN, _literal(pattern)),
|
||||
_triple(uri, PROV_WAS_DERIVED_FROM, _iri(session_uri)),
|
||||
]
|
||||
|
||||
if task_type:
|
||||
triples.append(_triple(uri, TG_TASK_TYPE, _literal(task_type)))
|
||||
|
||||
return triples
|
||||
|
||||
|
||||
def agent_iteration_triples(
|
||||
iteration_uri: str,
|
||||
question_uri: Optional[str] = None,
|
||||
|
|
@ -98,6 +151,12 @@ def agent_iteration_triples(
|
|||
arguments: Dict[str, Any] = None,
|
||||
thought_uri: Optional[str] = None,
|
||||
thought_document_id: Optional[str] = None,
|
||||
tool_candidates: Optional[List[str]] = None,
|
||||
step_number: Optional[int] = None,
|
||||
llm_duration_ms: Optional[int] = None,
|
||||
in_token: Optional[int] = None,
|
||||
out_token: Optional[int] = None,
|
||||
model: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for one agent iteration (Analysis+ToolUse).
|
||||
|
|
@ -106,6 +165,7 @@ def agent_iteration_triples(
|
|||
- Entity declaration with tg:Analysis and tg:ToolUse types
|
||||
- wasDerivedFrom link to question (if first iteration) or previous
|
||||
- Action and arguments metadata
|
||||
- Tool candidates (names of tools visible to the LLM)
|
||||
- Thought sub-entity (tg:Reflection, tg:Thought) with librarian document
|
||||
|
||||
Args:
|
||||
|
|
@ -116,6 +176,7 @@ def agent_iteration_triples(
|
|||
arguments: Arguments passed to the tool (will be JSON-encoded)
|
||||
thought_uri: URI for the thought sub-entity
|
||||
thought_document_id: Document URI for thought in librarian
|
||||
tool_candidates: List of tool names available to the LLM
|
||||
|
||||
Returns:
|
||||
List of Triple objects
|
||||
|
|
@ -132,6 +193,23 @@ def agent_iteration_triples(
|
|||
_triple(iteration_uri, TG_ARGUMENTS, _literal(json.dumps(arguments))),
|
||||
]
|
||||
|
||||
if tool_candidates:
|
||||
for name in tool_candidates:
|
||||
triples.append(
|
||||
_triple(iteration_uri, TG_TOOL_CANDIDATE, _literal(name))
|
||||
)
|
||||
|
||||
if step_number is not None:
|
||||
triples.append(
|
||||
_triple(iteration_uri, TG_STEP_NUMBER, _literal(str(step_number)))
|
||||
)
|
||||
|
||||
if llm_duration_ms is not None:
|
||||
triples.append(
|
||||
_triple(iteration_uri, TG_LLM_DURATION_MS,
|
||||
_literal(str(llm_duration_ms)))
|
||||
)
|
||||
|
||||
if question_uri:
|
||||
triples.append(
|
||||
_triple(iteration_uri, PROV_WAS_DERIVED_FROM, _iri(question_uri))
|
||||
|
|
@ -155,6 +233,8 @@ def agent_iteration_triples(
|
|||
_triple(thought_uri, TG_DOCUMENT, _iri(thought_document_id))
|
||||
)
|
||||
|
||||
_append_token_triples(triples, iteration_uri, in_token, out_token, model)
|
||||
|
||||
return triples
|
||||
|
||||
|
||||
|
|
@ -162,6 +242,8 @@ def agent_observation_triples(
|
|||
observation_uri: str,
|
||||
iteration_uri: str,
|
||||
document_id: Optional[str] = None,
|
||||
tool_duration_ms: Optional[int] = None,
|
||||
tool_error: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for an agent observation (standalone entity).
|
||||
|
|
@ -170,11 +252,15 @@ def agent_observation_triples(
|
|||
- Entity declaration with prov:Entity and tg:Observation types
|
||||
- wasDerivedFrom link to the iteration (Analysis+ToolUse)
|
||||
- Document reference to librarian (if provided)
|
||||
- Tool execution duration (if provided)
|
||||
- Tool error message (if the tool failed)
|
||||
|
||||
Args:
|
||||
observation_uri: URI of the observation entity
|
||||
iteration_uri: URI of the iteration this observation derives from
|
||||
document_id: Librarian document ID for the observation content
|
||||
tool_duration_ms: Tool execution time in milliseconds
|
||||
tool_error: Error message if the tool failed
|
||||
|
||||
Returns:
|
||||
List of Triple objects
|
||||
|
|
@ -191,6 +277,20 @@ def agent_observation_triples(
|
|||
_triple(observation_uri, TG_DOCUMENT, _iri(document_id))
|
||||
)
|
||||
|
||||
if tool_duration_ms is not None:
|
||||
triples.append(
|
||||
_triple(observation_uri, TG_TOOL_DURATION_MS,
|
||||
_literal(str(tool_duration_ms)))
|
||||
)
|
||||
|
||||
if tool_error:
|
||||
triples.append(
|
||||
_triple(observation_uri, TG_TOOL_ERROR, _literal(tool_error))
|
||||
)
|
||||
triples.append(
|
||||
_triple(observation_uri, RDF_TYPE, _iri(TG_ERROR_TYPE))
|
||||
)
|
||||
|
||||
return triples
|
||||
|
||||
|
||||
|
|
@ -199,6 +299,10 @@ def agent_final_triples(
|
|||
question_uri: Optional[str] = None,
|
||||
previous_uri: Optional[str] = None,
|
||||
document_id: Optional[str] = None,
|
||||
termination_reason: Optional[str] = None,
|
||||
in_token: Optional[int] = None,
|
||||
out_token: Optional[int] = None,
|
||||
model: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for an agent final answer (Conclusion).
|
||||
|
|
@ -208,12 +312,15 @@ def agent_final_triples(
|
|||
- wasGeneratedBy link to question (if no iterations)
|
||||
- wasDerivedFrom link to last iteration (if iterations exist)
|
||||
- Document reference to librarian
|
||||
- Termination reason (why the agent loop stopped)
|
||||
|
||||
Args:
|
||||
final_uri: URI of the final answer (from agent_final_uri)
|
||||
question_uri: URI of the question activity (if no iterations)
|
||||
previous_uri: URI of the last iteration (if iterations exist)
|
||||
document_id: Librarian document ID for the answer content
|
||||
termination_reason: Why the loop stopped, e.g. "final-answer",
|
||||
"max-iterations", "error"
|
||||
|
||||
Returns:
|
||||
List of Triple objects
|
||||
|
|
@ -237,6 +344,14 @@ def agent_final_triples(
|
|||
if document_id:
|
||||
triples.append(_triple(final_uri, TG_DOCUMENT, _iri(document_id)))
|
||||
|
||||
if termination_reason:
|
||||
triples.append(
|
||||
_triple(final_uri, TG_TERMINATION_REASON,
|
||||
_literal(termination_reason))
|
||||
)
|
||||
|
||||
_append_token_triples(triples, final_uri, in_token, out_token, model)
|
||||
|
||||
return triples
|
||||
|
||||
|
||||
|
|
@ -244,6 +359,9 @@ def agent_decomposition_triples(
|
|||
uri: str,
|
||||
session_uri: str,
|
||||
goals: List[str],
|
||||
in_token: Optional[int] = None,
|
||||
out_token: Optional[int] = None,
|
||||
model: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""Build triples for a supervisor decomposition step."""
|
||||
triples = [
|
||||
|
|
@ -255,6 +373,7 @@ def agent_decomposition_triples(
|
|||
]
|
||||
for goal in goals:
|
||||
triples.append(_triple(uri, TG_SUBAGENT_GOAL, _literal(goal)))
|
||||
_append_token_triples(triples, uri, in_token, out_token, model)
|
||||
return triples
|
||||
|
||||
|
||||
|
|
@ -282,6 +401,9 @@ def agent_plan_triples(
|
|||
uri: str,
|
||||
session_uri: str,
|
||||
steps: List[str],
|
||||
in_token: Optional[int] = None,
|
||||
out_token: Optional[int] = None,
|
||||
model: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""Build triples for a plan-then-execute plan."""
|
||||
triples = [
|
||||
|
|
@ -293,6 +415,7 @@ def agent_plan_triples(
|
|||
]
|
||||
for step in steps:
|
||||
triples.append(_triple(uri, TG_PLAN_STEP, _literal(step)))
|
||||
_append_token_triples(triples, uri, in_token, out_token, model)
|
||||
return triples
|
||||
|
||||
|
||||
|
|
@ -301,6 +424,9 @@ def agent_step_result_triples(
|
|||
plan_uri: str,
|
||||
goal: str,
|
||||
document_id: Optional[str] = None,
|
||||
in_token: Optional[int] = None,
|
||||
out_token: Optional[int] = None,
|
||||
model: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""Build triples for a plan step result."""
|
||||
triples = [
|
||||
|
|
@ -313,6 +439,7 @@ def agent_step_result_triples(
|
|||
]
|
||||
if document_id:
|
||||
triples.append(_triple(uri, TG_DOCUMENT, _iri(document_id)))
|
||||
_append_token_triples(triples, uri, in_token, out_token, model)
|
||||
return triples
|
||||
|
||||
|
||||
|
|
@ -320,6 +447,10 @@ def agent_synthesis_triples(
|
|||
uri: str,
|
||||
previous_uris,
|
||||
document_id: Optional[str] = None,
|
||||
termination_reason: Optional[str] = None,
|
||||
in_token: Optional[int] = None,
|
||||
out_token: Optional[int] = None,
|
||||
model: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""Build triples for a synthesis answer.
|
||||
|
||||
|
|
@ -327,6 +458,8 @@ def agent_synthesis_triples(
|
|||
uri: URI of the synthesis entity
|
||||
previous_uris: Single URI string or list of URIs to derive from
|
||||
document_id: Librarian document ID for the answer content
|
||||
termination_reason: Why the agent loop stopped
|
||||
in_token/out_token/model: Token usage for the synthesis LLM call
|
||||
"""
|
||||
triples = [
|
||||
_triple(uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
|
|
@ -342,4 +475,12 @@ def agent_synthesis_triples(
|
|||
|
||||
if document_id:
|
||||
triples.append(_triple(uri, TG_DOCUMENT, _iri(document_id)))
|
||||
|
||||
if termination_reason:
|
||||
triples.append(
|
||||
_triple(uri, TG_TERMINATION_REASON, _literal(termination_reason))
|
||||
)
|
||||
|
||||
_append_token_triples(triples, uri, in_token, out_token, model)
|
||||
|
||||
return triples
|
||||
|
|
|
|||
|
|
@ -119,6 +119,18 @@ TG_ARGUMENTS = TG + "arguments"
|
|||
TG_OBSERVATION = TG + "observation" # Links iteration to observation sub-entity
|
||||
TG_SUBAGENT_GOAL = TG + "subagentGoal" # Goal string on Decomposition/Finding
|
||||
TG_PLAN_STEP = TG + "planStep" # Step goal string on Plan/StepResult
|
||||
TG_TOOL_CANDIDATE = TG + "toolCandidate" # Tool name on Analysis events
|
||||
TG_TERMINATION_REASON = TG + "terminationReason" # Why the agent loop stopped
|
||||
TG_STEP_NUMBER = TG + "stepNumber" # Explicit step counter on iteration events
|
||||
TG_PATTERN_DECISION = TG + "PatternDecision" # Meta-router routing decision entity type
|
||||
TG_PATTERN = TG + "pattern" # Selected execution pattern
|
||||
TG_TASK_TYPE = TG + "taskType" # Identified task type
|
||||
TG_LLM_DURATION_MS = TG + "llmDurationMs" # LLM call duration in milliseconds
|
||||
TG_TOOL_DURATION_MS = TG + "toolDurationMs" # Tool execution duration in milliseconds
|
||||
TG_TOOL_ERROR = TG + "toolError" # Error message from a failed tool execution
|
||||
TG_ERROR_TYPE = TG + "Error" # Mixin type for failure events
|
||||
TG_IN_TOKEN = TG + "inToken" # Input token count for an LLM call
|
||||
TG_OUT_TOKEN = TG + "outToken" # Output token count for an LLM call
|
||||
|
||||
# Named graph URIs for RDF datasets
|
||||
# These separate different types of data while keeping them in the same collection
|
||||
|
|
|
|||
|
|
@ -34,6 +34,8 @@ from . namespaces import (
|
|||
TG_ANSWER_TYPE,
|
||||
# Question subtypes
|
||||
TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION,
|
||||
# Token usage
|
||||
TG_IN_TOKEN, TG_OUT_TOKEN,
|
||||
)
|
||||
|
||||
from . uris import activity_uri, agent_uri, subgraph_uri, edge_selection_uri
|
||||
|
|
@ -74,6 +76,17 @@ def _triple(s: str, p: str, o_term: Term) -> Triple:
|
|||
return Triple(s=_iri(s), p=_iri(p), o=o_term)
|
||||
|
||||
|
||||
def _append_token_triples(triples, uri, in_token=None, out_token=None,
|
||||
model=None):
|
||||
"""Append in_token/out_token/model triples when values are present."""
|
||||
if in_token is not None:
|
||||
triples.append(_triple(uri, TG_IN_TOKEN, _literal(str(in_token))))
|
||||
if out_token is not None:
|
||||
triples.append(_triple(uri, TG_OUT_TOKEN, _literal(str(out_token))))
|
||||
if model is not None:
|
||||
triples.append(_triple(uri, TG_LLM_MODEL, _literal(model)))
|
||||
|
||||
|
||||
def document_triples(
|
||||
doc_uri: str,
|
||||
title: Optional[str] = None,
|
||||
|
|
@ -396,6 +409,9 @@ def grounding_triples(
|
|||
grounding_uri: str,
|
||||
question_uri: str,
|
||||
concepts: List[str],
|
||||
in_token: Optional[int] = None,
|
||||
out_token: Optional[int] = None,
|
||||
model: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for a grounding entity (concept decomposition of query).
|
||||
|
|
@ -423,6 +439,8 @@ def grounding_triples(
|
|||
for concept in concepts:
|
||||
triples.append(_triple(grounding_uri, TG_CONCEPT, _literal(concept)))
|
||||
|
||||
_append_token_triples(triples, grounding_uri, in_token, out_token, model)
|
||||
|
||||
return triples
|
||||
|
||||
|
||||
|
|
@ -485,6 +503,9 @@ def focus_triples(
|
|||
exploration_uri: str,
|
||||
selected_edges_with_reasoning: List[dict],
|
||||
session_id: str = "",
|
||||
in_token: Optional[int] = None,
|
||||
out_token: Optional[int] = None,
|
||||
model: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for a focus entity (selected edges with reasoning).
|
||||
|
|
@ -543,6 +564,8 @@ def focus_triples(
|
|||
_triple(edge_sel_uri, TG_REASONING, _literal(reasoning))
|
||||
)
|
||||
|
||||
_append_token_triples(triples, focus_uri, in_token, out_token, model)
|
||||
|
||||
return triples
|
||||
|
||||
|
||||
|
|
@ -550,6 +573,9 @@ def synthesis_triples(
|
|||
synthesis_uri: str,
|
||||
focus_uri: str,
|
||||
document_id: Optional[str] = None,
|
||||
in_token: Optional[int] = None,
|
||||
out_token: Optional[int] = None,
|
||||
model: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for a synthesis entity (final answer).
|
||||
|
|
@ -578,6 +604,8 @@ def synthesis_triples(
|
|||
if document_id:
|
||||
triples.append(_triple(synthesis_uri, TG_DOCUMENT, _iri(document_id)))
|
||||
|
||||
_append_token_triples(triples, synthesis_uri, in_token, out_token, model)
|
||||
|
||||
return triples
|
||||
|
||||
|
||||
|
|
@ -674,6 +702,9 @@ def docrag_synthesis_triples(
|
|||
synthesis_uri: str,
|
||||
exploration_uri: str,
|
||||
document_id: Optional[str] = None,
|
||||
in_token: Optional[int] = None,
|
||||
out_token: Optional[int] = None,
|
||||
model: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for a document RAG synthesis entity (final answer).
|
||||
|
|
@ -702,4 +733,6 @@ def docrag_synthesis_triples(
|
|||
if document_id:
|
||||
triples.append(_triple(synthesis_uri, TG_DOCUMENT, _iri(document_id)))
|
||||
|
||||
_append_token_triples(triples, synthesis_uri, in_token, out_token, model)
|
||||
|
||||
return triples
|
||||
|
|
|
|||
|
|
@ -259,6 +259,11 @@ def agent_synthesis_uri(session_id: str) -> str:
|
|||
return f"urn:trustgraph:agent:{session_id}/synthesis"
|
||||
|
||||
|
||||
def agent_pattern_decision_uri(session_id: str) -> str:
|
||||
"""Generate URI for a meta-router pattern decision."""
|
||||
return f"urn:trustgraph:agent:{session_id}/pattern-decision"
|
||||
|
||||
|
||||
# Document RAG provenance URIs
|
||||
# These URIs use the urn:trustgraph:docrag: namespace to distinguish
|
||||
# document RAG provenance from graph RAG provenance
|
||||
|
|
|
|||
|
|
@ -51,8 +51,8 @@ class AgentRequest:
|
|||
@dataclass
|
||||
class AgentResponse:
|
||||
# Streaming-first design
|
||||
chunk_type: str = "" # "thought", "action", "observation", "answer", "explain", "error"
|
||||
content: str = "" # The actual content (interpretation depends on chunk_type)
|
||||
message_type: str = "" # "thought", "action", "observation", "answer", "explain", "error"
|
||||
content: str = "" # The actual content (interpretation depends on message_type)
|
||||
end_of_message: bool = False # Current chunk type (thought/action/etc.) is complete
|
||||
end_of_dialog: bool = False # Entire agent dialog is complete
|
||||
|
||||
|
|
@ -66,5 +66,10 @@ class AgentResponse:
|
|||
|
||||
error: Error | None = None
|
||||
|
||||
# Token usage (populated on end_of_dialog message)
|
||||
in_token: int | None = None
|
||||
out_token: int | None = None
|
||||
model: str | None = None
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
|
|||
|
|
@ -17,9 +17,9 @@ class TextCompletionRequest:
|
|||
class TextCompletionResponse:
|
||||
error: Error | None = None
|
||||
response: str = ""
|
||||
in_token: int = 0
|
||||
out_token: int = 0
|
||||
model: str = ""
|
||||
in_token: int | None = None
|
||||
out_token: int | None = None
|
||||
model: str | None = None
|
||||
end_of_stream: bool = False # Indicates final message in stream
|
||||
|
||||
############################################################################
|
||||
|
|
|
|||
|
|
@ -41,4 +41,9 @@ class PromptResponse:
|
|||
# Indicates final message in stream
|
||||
end_of_stream: bool = False
|
||||
|
||||
# Token usage from the underlying text completion
|
||||
in_token: int | None = None
|
||||
out_token: int | None = None
|
||||
model: str | None = None
|
||||
|
||||
############################################################################
|
||||
|
|
@ -29,6 +29,9 @@ class GraphRagResponse:
|
|||
explain_triples: list[Triple] = field(default_factory=list) # Provenance triples for this step
|
||||
message_type: str = "" # "chunk" or "explain"
|
||||
end_of_session: bool = False # Entire session complete
|
||||
in_token: int | None = None
|
||||
out_token: int | None = None
|
||||
model: str | None = None
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
@ -52,3 +55,6 @@ class DocumentRagResponse:
|
|||
explain_triples: list[Triple] = field(default_factory=list) # Provenance triples for this step
|
||||
message_type: str = "" # "chunk" or "explain"
|
||||
end_of_session: bool = False # Entire session complete
|
||||
in_token: int | None = None
|
||||
out_token: int | None = None
|
||||
model: str | None = None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue