mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-29 02:23:44 +02:00
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
This commit is contained in:
parent
9332089b3d
commit
d35473f7f7
377 changed files with 6868 additions and 5785 deletions
|
|
@ -27,7 +27,6 @@ Quick Start:
|
|||
# Execute a graph RAG query
|
||||
response = flow.graph_rag(
|
||||
query="What are the main topics?",
|
||||
user="trustgraph",
|
||||
collection="default"
|
||||
)
|
||||
```
|
||||
|
|
@ -38,7 +37,7 @@ For streaming and async operations:
|
|||
socket = api.socket()
|
||||
flow = socket.flow("default")
|
||||
|
||||
for chunk in flow.agent(question="Hello", user="trustgraph"):
|
||||
for chunk in flow.agent(question="Hello"):
|
||||
print(chunk.content)
|
||||
|
||||
# Async operations
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ class Api:
|
|||
token: Optional bearer token for authentication
|
||||
"""
|
||||
|
||||
def __init__(self, url="http://localhost:8088/", timeout=60, token: Optional[str] = None):
|
||||
def __init__(self, url="http://localhost:8088/", timeout=60, token: Optional[str] = None, workspace: str = "default"):
|
||||
"""
|
||||
Initialize the TrustGraph API client.
|
||||
|
||||
|
|
@ -82,6 +82,7 @@ class Api:
|
|||
|
||||
self.timeout = timeout
|
||||
self.token = token
|
||||
self.workspace = workspace
|
||||
|
||||
# Lazy initialization for new clients
|
||||
self._socket_client = None
|
||||
|
|
@ -137,7 +138,7 @@ class Api:
|
|||
config.put([ConfigValue(type="llm", key="model", value="gpt-4")])
|
||||
```
|
||||
"""
|
||||
return Config(api=self)
|
||||
return Config(api=self, workspace=self.workspace)
|
||||
|
||||
def knowledge(self):
|
||||
"""
|
||||
|
|
@ -151,10 +152,10 @@ class Api:
|
|||
knowledge = api.knowledge()
|
||||
|
||||
# List available KG cores
|
||||
cores = knowledge.list_kg_cores(user="trustgraph")
|
||||
cores = knowledge.list_kg_cores()
|
||||
|
||||
# Load a KG core
|
||||
knowledge.load_kg_core(id="core-123", user="trustgraph")
|
||||
knowledge.load_kg_core(id="core-123")
|
||||
```
|
||||
"""
|
||||
return Knowledge(api=self)
|
||||
|
|
@ -191,6 +192,12 @@ class Api:
|
|||
if self.token:
|
||||
headers["Authorization"] = f"Bearer {self.token}"
|
||||
|
||||
# Ensure every REST request carries the workspace so services can
|
||||
# scope their behaviour. Callers that already set workspace in the
|
||||
# payload (e.g. Library client) take precedence.
|
||||
if isinstance(request, dict) and "workspace" not in request:
|
||||
request = {**request, "workspace": self.workspace}
|
||||
|
||||
# Invoke the API, input is passed as JSON
|
||||
resp = requests.post(url, json=request, timeout=self.timeout, headers=headers)
|
||||
|
||||
|
|
@ -227,13 +234,12 @@ class Api:
|
|||
document=b"Document content",
|
||||
id="doc-123",
|
||||
metadata=[],
|
||||
user="trustgraph",
|
||||
title="My Document",
|
||||
comments="Test document"
|
||||
)
|
||||
|
||||
# List documents
|
||||
docs = library.get_documents(user="trustgraph")
|
||||
docs = library.get_documents()
|
||||
```
|
||||
"""
|
||||
return Library(self)
|
||||
|
|
@ -253,11 +259,10 @@ class Api:
|
|||
collection = api.collection()
|
||||
|
||||
# List collections
|
||||
colls = collection.list_collections(user="trustgraph")
|
||||
colls = collection.list_collections()
|
||||
|
||||
# Update collection metadata
|
||||
collection.update_collection(
|
||||
user="trustgraph",
|
||||
collection="default",
|
||||
name="Default Collection",
|
||||
description="Main data collection"
|
||||
|
|
@ -286,7 +291,6 @@ class Api:
|
|||
# Stream agent responses
|
||||
for chunk in flow.agent(
|
||||
question="Explain quantum computing",
|
||||
user="trustgraph",
|
||||
streaming=True
|
||||
):
|
||||
if hasattr(chunk, 'content'):
|
||||
|
|
@ -297,7 +301,10 @@ class Api:
|
|||
from . socket_client import SocketClient
|
||||
# Extract base URL (remove api/v1/ suffix)
|
||||
base_url = self.url.rsplit("api/v1/", 1)[0].rstrip("/")
|
||||
self._socket_client = SocketClient(base_url, self.timeout, self.token)
|
||||
self._socket_client = SocketClient(
|
||||
base_url, self.timeout, self.token,
|
||||
workspace=self.workspace,
|
||||
)
|
||||
return self._socket_client
|
||||
|
||||
def bulk(self):
|
||||
|
|
@ -406,7 +413,6 @@ class Api:
|
|||
# Stream agent responses
|
||||
async for chunk in flow.agent(
|
||||
question="Explain quantum computing",
|
||||
user="trustgraph",
|
||||
streaming=True
|
||||
):
|
||||
if hasattr(chunk, 'content'):
|
||||
|
|
@ -417,7 +423,10 @@ class Api:
|
|||
from . async_socket_client import AsyncSocketClient
|
||||
# Extract base URL (remove api/v1/ suffix)
|
||||
base_url = self.url.rsplit("api/v1/", 1)[0].rstrip("/")
|
||||
self._async_socket_client = AsyncSocketClient(base_url, self.timeout, self.token)
|
||||
self._async_socket_client = AsyncSocketClient(
|
||||
base_url, self.timeout, self.token,
|
||||
workspace=self.workspace,
|
||||
)
|
||||
return self._async_socket_client
|
||||
|
||||
def async_bulk(self):
|
||||
|
|
|
|||
|
|
@ -326,9 +326,7 @@ class AsyncFlow:
|
|||
|
||||
# Use flow services
|
||||
result = await flow.graph_rag(
|
||||
query="What is TrustGraph?",
|
||||
user="trustgraph",
|
||||
collection="default"
|
||||
query="What is TrustGraph?",collection="default"
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
|
@ -385,7 +383,7 @@ class AsyncFlowInstance:
|
|||
"""
|
||||
return await self.flow.request(f"flow/{self.flow_id}/service/{service}", request_data)
|
||||
|
||||
async def agent(self, question: str, user: str, state: Optional[Dict] = None,
|
||||
async def agent(self, question: str, state: Optional[Dict] = None,
|
||||
group: Optional[str] = None, history: Optional[List] = None, **kwargs: Any) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute an agent operation (non-streaming).
|
||||
|
|
@ -399,7 +397,6 @@ class AsyncFlowInstance:
|
|||
|
||||
Args:
|
||||
question: User question or instruction
|
||||
user: User identifier
|
||||
state: Optional state dictionary for conversation context
|
||||
group: Optional group identifier for session management
|
||||
history: Optional conversation history list
|
||||
|
|
@ -416,14 +413,12 @@ class AsyncFlowInstance:
|
|||
# Execute agent
|
||||
result = await flow.agent(
|
||||
question="What is the capital of France?",
|
||||
user="trustgraph"
|
||||
)
|
||||
)
|
||||
print(f"Answer: {result.get('response')}")
|
||||
```
|
||||
"""
|
||||
request_data = {
|
||||
"question": question,
|
||||
"user": user,
|
||||
"streaming": False # REST doesn't support streaming
|
||||
}
|
||||
if state is not None:
|
||||
|
|
@ -481,7 +476,7 @@ class AsyncFlowInstance:
|
|||
model=result.get("model"),
|
||||
)
|
||||
|
||||
async def graph_rag(self, query: str, user: str, collection: str,
|
||||
async def graph_rag(self, query: str, collection: str,
|
||||
max_subgraph_size: int = 1000, max_subgraph_count: int = 5,
|
||||
max_entity_distance: int = 3, **kwargs: Any) -> str:
|
||||
"""
|
||||
|
|
@ -496,7 +491,6 @@ class AsyncFlowInstance:
|
|||
|
||||
Args:
|
||||
query: User query text
|
||||
user: User identifier
|
||||
collection: Collection identifier containing the knowledge graph
|
||||
max_subgraph_size: Maximum number of triples per subgraph (default: 1000)
|
||||
max_subgraph_count: Maximum number of subgraphs to retrieve (default: 5)
|
||||
|
|
@ -513,9 +507,7 @@ class AsyncFlowInstance:
|
|||
|
||||
# Query knowledge graph
|
||||
response = await flow.graph_rag(
|
||||
query="What are the relationships between these entities?",
|
||||
user="trustgraph",
|
||||
collection="medical-kb",
|
||||
query="What are the relationships between these entities?",collection="medical-kb",
|
||||
max_subgraph_count=3
|
||||
)
|
||||
print(response)
|
||||
|
|
@ -523,7 +515,6 @@ class AsyncFlowInstance:
|
|||
"""
|
||||
request_data = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"max-subgraph-size": max_subgraph_size,
|
||||
"max-subgraph-count": max_subgraph_count,
|
||||
|
|
@ -535,7 +526,7 @@ class AsyncFlowInstance:
|
|||
result = await self.request("graph-rag", request_data)
|
||||
return result.get("response", "")
|
||||
|
||||
async def document_rag(self, query: str, user: str, collection: str,
|
||||
async def document_rag(self, query: str, collection: str,
|
||||
doc_limit: int = 10, **kwargs: Any) -> str:
|
||||
"""
|
||||
Execute document-based RAG query (non-streaming).
|
||||
|
|
@ -549,7 +540,6 @@ class AsyncFlowInstance:
|
|||
|
||||
Args:
|
||||
query: User query text
|
||||
user: User identifier
|
||||
collection: Collection identifier containing documents
|
||||
doc_limit: Maximum number of document chunks to retrieve (default: 10)
|
||||
**kwargs: Additional service-specific parameters
|
||||
|
|
@ -564,9 +554,7 @@ class AsyncFlowInstance:
|
|||
|
||||
# Query documents
|
||||
response = await flow.document_rag(
|
||||
query="What does the documentation say about authentication?",
|
||||
user="trustgraph",
|
||||
collection="docs",
|
||||
query="What does the documentation say about authentication?",collection="docs",
|
||||
doc_limit=5
|
||||
)
|
||||
print(response)
|
||||
|
|
@ -574,7 +562,6 @@ class AsyncFlowInstance:
|
|||
"""
|
||||
request_data = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"doc-limit": doc_limit,
|
||||
"streaming": False
|
||||
|
|
@ -584,7 +571,7 @@ class AsyncFlowInstance:
|
|||
result = await self.request("document-rag", request_data)
|
||||
return result.get("response", "")
|
||||
|
||||
async def graph_embeddings_query(self, text: str, user: str, collection: str, limit: int = 10, **kwargs: Any):
|
||||
async def graph_embeddings_query(self, text: str, collection: str, limit: int = 10, **kwargs: Any):
|
||||
"""
|
||||
Query graph embeddings for semantic entity search.
|
||||
|
||||
|
|
@ -593,7 +580,6 @@ class AsyncFlowInstance:
|
|||
|
||||
Args:
|
||||
text: Query text for semantic search
|
||||
user: User identifier
|
||||
collection: Collection identifier containing graph embeddings
|
||||
limit: Maximum number of results to return (default: 10)
|
||||
**kwargs: Additional service-specific parameters
|
||||
|
|
@ -608,9 +594,7 @@ class AsyncFlowInstance:
|
|||
|
||||
# Find related entities
|
||||
results = await flow.graph_embeddings_query(
|
||||
text="machine learning algorithms",
|
||||
user="trustgraph",
|
||||
collection="tech-kb",
|
||||
text="machine learning algorithms",collection="tech-kb",
|
||||
limit=5
|
||||
)
|
||||
|
||||
|
|
@ -624,7 +608,6 @@ class AsyncFlowInstance:
|
|||
|
||||
request_data = {
|
||||
"vector": vector,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
|
|
@ -663,7 +646,7 @@ class AsyncFlowInstance:
|
|||
|
||||
return await self.request("embeddings", request_data)
|
||||
|
||||
async def triples_query(self, s=None, p=None, o=None, user=None, collection=None, limit=100, **kwargs: Any):
|
||||
async def triples_query(self, s=None, p=None, o=None, collection=None, limit=100, **kwargs: Any):
|
||||
"""
|
||||
Query RDF triples using pattern matching.
|
||||
|
||||
|
|
@ -674,7 +657,6 @@ class AsyncFlowInstance:
|
|||
s: Subject pattern (None for wildcard)
|
||||
p: Predicate pattern (None for wildcard)
|
||||
o: Object pattern (None for wildcard)
|
||||
user: User identifier (None for all users)
|
||||
collection: Collection identifier (None for all collections)
|
||||
limit: Maximum number of triples to return (default: 100)
|
||||
**kwargs: Additional service-specific parameters
|
||||
|
|
@ -689,9 +671,7 @@ class AsyncFlowInstance:
|
|||
|
||||
# Find all triples with a specific predicate
|
||||
results = await flow.triples_query(
|
||||
p="knows",
|
||||
user="trustgraph",
|
||||
collection="social",
|
||||
p="knows",collection="social",
|
||||
limit=50
|
||||
)
|
||||
|
||||
|
|
@ -706,15 +686,13 @@ class AsyncFlowInstance:
|
|||
request_data["p"] = str(p)
|
||||
if o is not None:
|
||||
request_data["o"] = str(o)
|
||||
if user is not None:
|
||||
request_data["user"] = user
|
||||
if collection is not None:
|
||||
request_data["collection"] = collection
|
||||
request_data.update(kwargs)
|
||||
|
||||
return await self.request("triples", request_data)
|
||||
|
||||
async def rows_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
|
||||
async def rows_query(self, query: str, collection: str, variables: Optional[Dict] = None,
|
||||
operation_name: Optional[str] = None, **kwargs: Any):
|
||||
"""
|
||||
Execute a GraphQL query on stored rows.
|
||||
|
|
@ -724,7 +702,6 @@ class AsyncFlowInstance:
|
|||
|
||||
Args:
|
||||
query: GraphQL query string
|
||||
user: User identifier
|
||||
collection: Collection identifier containing rows
|
||||
variables: Optional GraphQL query variables
|
||||
operation_name: Optional operation name for multi-operation queries
|
||||
|
|
@ -750,9 +727,7 @@ class AsyncFlowInstance:
|
|||
'''
|
||||
|
||||
result = await flow.rows_query(
|
||||
query=query,
|
||||
user="trustgraph",
|
||||
collection="users",
|
||||
query=query,collection="users",
|
||||
variables={"status": "active"}
|
||||
)
|
||||
|
||||
|
|
@ -762,7 +737,6 @@ class AsyncFlowInstance:
|
|||
"""
|
||||
request_data = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection
|
||||
}
|
||||
if variables:
|
||||
|
|
@ -774,7 +748,7 @@ class AsyncFlowInstance:
|
|||
return await self.request("rows", request_data)
|
||||
|
||||
async def row_embeddings_query(
|
||||
self, text: str, schema_name: str, user: str = "trustgraph",
|
||||
self, text: str, schema_name: str,
|
||||
collection: str = "default", index_name: Optional[str] = None,
|
||||
limit: int = 10, **kwargs: Any
|
||||
):
|
||||
|
|
@ -788,7 +762,6 @@ class AsyncFlowInstance:
|
|||
Args:
|
||||
text: Query text for semantic search
|
||||
schema_name: Schema name to search within
|
||||
user: User identifier (default: "trustgraph")
|
||||
collection: Collection identifier (default: "default")
|
||||
index_name: Optional index name to filter search to specific index
|
||||
limit: Maximum number of results to return (default: 10)
|
||||
|
|
@ -806,9 +779,7 @@ class AsyncFlowInstance:
|
|||
# Search for customers by name similarity
|
||||
results = await flow.row_embeddings_query(
|
||||
text="John Smith",
|
||||
schema_name="customers",
|
||||
user="trustgraph",
|
||||
collection="sales",
|
||||
schema_name="customers",collection="sales",
|
||||
limit=5
|
||||
)
|
||||
|
||||
|
|
@ -823,7 +794,6 @@ class AsyncFlowInstance:
|
|||
request_data = {
|
||||
"vector": vector,
|
||||
"schema_name": schema_name,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,10 +22,14 @@ class AsyncSocketClient:
|
|||
Or call connect()/aclose() manually.
|
||||
"""
|
||||
|
||||
def __init__(self, url: str, timeout: int, token: Optional[str]):
|
||||
def __init__(
|
||||
self, url: str, timeout: int, token: Optional[str],
|
||||
workspace: str = "default",
|
||||
):
|
||||
self.url = self._convert_to_ws_url(url)
|
||||
self.timeout = timeout
|
||||
self.token = token
|
||||
self.workspace = workspace
|
||||
self._request_counter = 0
|
||||
self._socket = None
|
||||
self._connect_cm = None
|
||||
|
|
@ -117,6 +121,7 @@ class AsyncSocketClient:
|
|||
try:
|
||||
message = {
|
||||
"id": request_id,
|
||||
"workspace": self.workspace,
|
||||
"service": service,
|
||||
"request": request
|
||||
}
|
||||
|
|
@ -149,6 +154,7 @@ class AsyncSocketClient:
|
|||
try:
|
||||
message = {
|
||||
"id": request_id,
|
||||
"workspace": self.workspace,
|
||||
"service": service,
|
||||
"request": request
|
||||
}
|
||||
|
|
@ -251,13 +257,12 @@ class AsyncSocketFlowInstance:
|
|||
self.client = client
|
||||
self.flow_id = flow_id
|
||||
|
||||
async def agent(self, question: str, user: str, state: Optional[Dict[str, Any]] = None,
|
||||
async def agent(self, question: str, state: Optional[Dict[str, Any]] = None,
|
||||
group: Optional[str] = None, history: Optional[list] = None,
|
||||
streaming: bool = False, **kwargs) -> Union[Dict[str, Any], AsyncIterator]:
|
||||
"""Agent with optional streaming"""
|
||||
request = {
|
||||
"question": question,
|
||||
"user": user,
|
||||
"streaming": streaming
|
||||
}
|
||||
if state is not None:
|
||||
|
|
@ -303,13 +308,12 @@ class AsyncSocketFlowInstance:
|
|||
if isinstance(chunk, RAGChunk):
|
||||
yield chunk
|
||||
|
||||
async def graph_rag(self, query: str, user: str, collection: str,
|
||||
async def graph_rag(self, query: str, collection: str,
|
||||
max_subgraph_size: int = 1000, max_subgraph_count: int = 5,
|
||||
max_entity_distance: int = 3, streaming: bool = False, **kwargs):
|
||||
"""Graph RAG with optional streaming"""
|
||||
request = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"max-subgraph-size": max_subgraph_size,
|
||||
"max-subgraph-count": max_subgraph_count,
|
||||
|
|
@ -330,12 +334,11 @@ class AsyncSocketFlowInstance:
|
|||
if hasattr(chunk, 'content'):
|
||||
yield chunk.content
|
||||
|
||||
async def document_rag(self, query: str, user: str, collection: str,
|
||||
async def document_rag(self, query: str, collection: str,
|
||||
doc_limit: int = 10, streaming: bool = False, **kwargs):
|
||||
"""Document RAG with optional streaming"""
|
||||
request = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"doc-limit": doc_limit,
|
||||
"streaming": streaming
|
||||
|
|
@ -375,14 +378,13 @@ class AsyncSocketFlowInstance:
|
|||
if hasattr(chunk, 'content'):
|
||||
yield chunk.content
|
||||
|
||||
async def graph_embeddings_query(self, text: str, user: str, collection: str, limit: int = 10, **kwargs):
|
||||
async def graph_embeddings_query(self, text: str, collection: str, limit: int = 10, **kwargs):
|
||||
"""Query graph embeddings for semantic search"""
|
||||
emb_result = await self.embeddings(texts=[text])
|
||||
vector = emb_result.get("vectors", [[]])[0]
|
||||
|
||||
request = {
|
||||
"vector": vector,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
|
|
@ -397,7 +399,7 @@ class AsyncSocketFlowInstance:
|
|||
|
||||
return await self.client._send_request("embeddings", self.flow_id, request)
|
||||
|
||||
async def triples_query(self, s=None, p=None, o=None, user=None, collection=None, limit=100, **kwargs):
|
||||
async def triples_query(self, s=None, p=None, o=None, collection=None, limit=100, **kwargs):
|
||||
"""Triple pattern query"""
|
||||
request = {"limit": limit}
|
||||
if s is not None:
|
||||
|
|
@ -406,20 +408,17 @@ class AsyncSocketFlowInstance:
|
|||
request["p"] = str(p)
|
||||
if o is not None:
|
||||
request["o"] = str(o)
|
||||
if user is not None:
|
||||
request["user"] = user
|
||||
if collection is not None:
|
||||
request["collection"] = collection
|
||||
request.update(kwargs)
|
||||
|
||||
return await self.client._send_request("triples", self.flow_id, request)
|
||||
|
||||
async def rows_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
|
||||
async def rows_query(self, query: str, collection: str, variables: Optional[Dict] = None,
|
||||
operation_name: Optional[str] = None, **kwargs):
|
||||
"""GraphQL query against structured rows"""
|
||||
request = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection
|
||||
}
|
||||
if variables:
|
||||
|
|
@ -441,7 +440,7 @@ class AsyncSocketFlowInstance:
|
|||
return await self.client._send_request("mcp-tool", self.flow_id, request)
|
||||
|
||||
async def row_embeddings_query(
|
||||
self, text: str, schema_name: str, user: str = "trustgraph",
|
||||
self, text: str, schema_name: str,
|
||||
collection: str = "default", index_name: Optional[str] = None,
|
||||
limit: int = 10, **kwargs
|
||||
):
|
||||
|
|
@ -452,7 +451,6 @@ class AsyncSocketFlowInstance:
|
|||
request = {
|
||||
"vector": vector,
|
||||
"schema_name": schema_name,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ class BulkClient:
|
|||
Args:
|
||||
flow: Flow identifier
|
||||
triples: Iterator yielding Triple objects
|
||||
metadata: Metadata dict with id, metadata, user, collection
|
||||
metadata: Metadata dict with id, metadata, collection
|
||||
batch_size: Number of triples per batch (default 100)
|
||||
**kwargs: Additional parameters (reserved for future use)
|
||||
|
||||
|
|
@ -105,7 +105,7 @@ class BulkClient:
|
|||
bulk.import_triples(
|
||||
flow="default",
|
||||
triples=triple_generator(),
|
||||
metadata={"id": "doc1", "metadata": [], "user": "user1", "collection": "default"}
|
||||
metadata={"id": "doc1", "metadata": [], "collection": "default"}
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
|
@ -121,7 +121,7 @@ class BulkClient:
|
|||
ws_url = f"{ws_url}?token={self.token}"
|
||||
|
||||
if metadata is None:
|
||||
metadata = {"id": "", "metadata": [], "user": "trustgraph", "collection": "default"}
|
||||
metadata = {"id": "", "metadata": [], "collection": "default"}
|
||||
|
||||
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
|
||||
batch = []
|
||||
|
|
@ -418,7 +418,7 @@ class BulkClient:
|
|||
Args:
|
||||
flow: Flow identifier
|
||||
contexts: Iterator yielding context dictionaries
|
||||
metadata: Metadata dict with id, metadata, user, collection
|
||||
metadata: Metadata dict with id, metadata, collection
|
||||
batch_size: Number of contexts per batch (default 100)
|
||||
**kwargs: Additional parameters (reserved for future use)
|
||||
|
||||
|
|
@ -435,7 +435,7 @@ class BulkClient:
|
|||
bulk.import_entity_contexts(
|
||||
flow="default",
|
||||
contexts=context_generator(),
|
||||
metadata={"id": "doc1", "metadata": [], "user": "user1", "collection": "default"}
|
||||
metadata={"id": "doc1", "metadata": [], "collection": "default"}
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
|
@ -451,7 +451,7 @@ class BulkClient:
|
|||
ws_url = f"{ws_url}?token={self.token}"
|
||||
|
||||
if metadata is None:
|
||||
metadata = {"id": "", "metadata": [], "user": "trustgraph", "collection": "default"}
|
||||
metadata = {"id": "", "metadata": [], "collection": "default"}
|
||||
|
||||
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
|
||||
batch = []
|
||||
|
|
|
|||
|
|
@ -2,11 +2,9 @@
|
|||
TrustGraph Collection Management
|
||||
|
||||
This module provides interfaces for managing data collections in TrustGraph.
|
||||
Collections provide logical grouping and isolation for documents and knowledge
|
||||
graph data.
|
||||
Collections provide logical grouping within a workspace.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
|
||||
from . types import CollectionMetadata
|
||||
|
|
@ -18,10 +16,9 @@ class Collection:
|
|||
"""
|
||||
Collection management client.
|
||||
|
||||
Provides methods for managing data collections, including listing,
|
||||
updating metadata, and deleting collections. Collections organize
|
||||
documents and knowledge graph data into logical groupings for
|
||||
isolation and access control.
|
||||
Provides methods for managing data collections within the configured
|
||||
workspace, including listing, updating metadata, and deleting
|
||||
collections.
|
||||
"""
|
||||
|
||||
def __init__(self, api):
|
||||
|
|
@ -45,45 +42,20 @@ class Collection:
|
|||
"""
|
||||
return self.api.request(f"collection-management", request)
|
||||
|
||||
def list_collections(self, user, tag_filter=None):
|
||||
def list_collections(self, tag_filter=None):
|
||||
"""
|
||||
List all collections for a user.
|
||||
|
||||
Retrieves metadata for all collections owned by the specified user,
|
||||
with optional filtering by tags.
|
||||
List all collections in this workspace.
|
||||
|
||||
Args:
|
||||
user: User identifier
|
||||
tag_filter: Optional list of tags to filter collections (default: None)
|
||||
tag_filter: Optional list of tags to filter collections
|
||||
|
||||
Returns:
|
||||
list[CollectionMetadata]: List of collection metadata objects
|
||||
|
||||
Raises:
|
||||
ProtocolException: If response format is invalid
|
||||
|
||||
Example:
|
||||
```python
|
||||
collection = api.collection()
|
||||
|
||||
# List all collections
|
||||
all_colls = collection.list_collections(user="trustgraph")
|
||||
for coll in all_colls:
|
||||
print(f"{coll.collection}: {coll.name}")
|
||||
print(f" Description: {coll.description}")
|
||||
print(f" Tags: {', '.join(coll.tags)}")
|
||||
|
||||
# List collections with specific tags
|
||||
research_colls = collection.list_collections(
|
||||
user="trustgraph",
|
||||
tag_filter=["research", "published"]
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
input = {
|
||||
"operation": "list-collections",
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
}
|
||||
|
||||
if tag_filter:
|
||||
|
|
@ -92,7 +64,6 @@ class Collection:
|
|||
object = self.request(input)
|
||||
|
||||
try:
|
||||
# Handle case where collections might be None or missing
|
||||
if object is None or "collections" not in object:
|
||||
return []
|
||||
|
||||
|
|
@ -102,7 +73,6 @@ class Collection:
|
|||
|
||||
return [
|
||||
CollectionMetadata(
|
||||
user = v["user"],
|
||||
collection = v["collection"],
|
||||
name = v["name"],
|
||||
description = v["description"],
|
||||
|
|
@ -114,15 +84,11 @@ class Collection:
|
|||
logger.error("Failed to parse collection list response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def update_collection(self, user, collection, name=None, description=None, tags=None):
|
||||
def update_collection(self, collection, name=None, description=None, tags=None):
|
||||
"""
|
||||
Update collection metadata.
|
||||
|
||||
Updates the name, description, and/or tags for an existing collection.
|
||||
Only provided fields are updated; others remain unchanged.
|
||||
|
||||
Args:
|
||||
user: User identifier
|
||||
collection: Collection identifier
|
||||
name: New collection name (optional)
|
||||
description: New collection description (optional)
|
||||
|
|
@ -130,35 +96,11 @@ class Collection:
|
|||
|
||||
Returns:
|
||||
CollectionMetadata: Updated collection metadata, or None if not found
|
||||
|
||||
Raises:
|
||||
ProtocolException: If response format is invalid
|
||||
|
||||
Example:
|
||||
```python
|
||||
collection_api = api.collection()
|
||||
|
||||
# Update collection metadata
|
||||
updated = collection_api.update_collection(
|
||||
user="trustgraph",
|
||||
collection="default",
|
||||
name="Default Collection",
|
||||
description="Main data collection for general use",
|
||||
tags=["default", "production"]
|
||||
)
|
||||
|
||||
# Update only specific fields
|
||||
updated = collection_api.update_collection(
|
||||
user="trustgraph",
|
||||
collection="research",
|
||||
description="Updated description"
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
input = {
|
||||
"operation": "update-collection",
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"collection": collection,
|
||||
}
|
||||
|
||||
|
|
@ -175,7 +117,6 @@ class Collection:
|
|||
if "collections" in object and object["collections"]:
|
||||
v = object["collections"][0]
|
||||
return CollectionMetadata(
|
||||
user = v["user"],
|
||||
collection = v["collection"],
|
||||
name = v["name"],
|
||||
description = v["description"],
|
||||
|
|
@ -186,37 +127,23 @@ class Collection:
|
|||
logger.error("Failed to parse collection update response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def delete_collection(self, user, collection):
|
||||
def delete_collection(self, collection):
|
||||
"""
|
||||
Delete a collection.
|
||||
|
||||
Removes a collection and all its associated data from the system.
|
||||
|
||||
Args:
|
||||
user: User identifier
|
||||
collection: Collection identifier to delete
|
||||
|
||||
Returns:
|
||||
dict: Empty response object
|
||||
|
||||
Example:
|
||||
```python
|
||||
collection_api = api.collection()
|
||||
|
||||
# Delete a collection
|
||||
collection_api.delete_collection(
|
||||
user="trustgraph",
|
||||
collection="old-collection"
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
input = {
|
||||
"operation": "delete-collection",
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"collection": collection,
|
||||
}
|
||||
|
||||
object = self.request(input)
|
||||
self.request(input)
|
||||
|
||||
return {}
|
||||
return {}
|
||||
|
|
|
|||
|
|
@ -21,14 +21,16 @@ class Config:
|
|||
and list operations.
|
||||
"""
|
||||
|
||||
def __init__(self, api):
|
||||
def __init__(self, api, workspace="default"):
|
||||
"""
|
||||
Initialize Config client.
|
||||
|
||||
Args:
|
||||
api: Parent Api instance for making requests
|
||||
workspace: Workspace to scope all config operations to
|
||||
"""
|
||||
self.api = api
|
||||
self.workspace = workspace
|
||||
|
||||
def request(self, request):
|
||||
"""
|
||||
|
|
@ -75,9 +77,9 @@ class Config:
|
|||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "get",
|
||||
"workspace": self.workspace,
|
||||
"keys": [
|
||||
{ "type": k.type, "key": k.key }
|
||||
for k in keys
|
||||
|
|
@ -123,9 +125,9 @@ class Config:
|
|||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "put",
|
||||
"workspace": self.workspace,
|
||||
"values": [
|
||||
{ "type": v.type, "key": v.key, "value": v.value }
|
||||
for v in values
|
||||
|
|
@ -157,9 +159,9 @@ class Config:
|
|||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "delete",
|
||||
"workspace": self.workspace,
|
||||
"keys": [
|
||||
{ "type": v.type, "key": v.key }
|
||||
for v in keys
|
||||
|
|
@ -195,9 +197,9 @@ class Config:
|
|||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "list",
|
||||
"workspace": self.workspace,
|
||||
"type": type,
|
||||
}
|
||||
|
||||
|
|
@ -235,9 +237,9 @@ class Config:
|
|||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "getvalues",
|
||||
"workspace": self.workspace,
|
||||
"type": type,
|
||||
}
|
||||
|
||||
|
|
@ -255,6 +257,46 @@ class Config:
|
|||
except:
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def get_values_all_workspaces(self, type):
|
||||
"""
|
||||
Get all configuration values of a given type across all workspaces.
|
||||
|
||||
Unlike get_values(), this is not scoped to a single workspace —
|
||||
it returns every entry of the given type in the system. Each
|
||||
returned ConfigValue includes its workspace field. Used by
|
||||
shared processors to load type-scoped config at startup.
|
||||
|
||||
Args:
|
||||
type: Configuration type (e.g. "prompt", "schema")
|
||||
|
||||
Returns:
|
||||
list[ConfigValue]: Values across all workspaces; each has
|
||||
its workspace field populated.
|
||||
|
||||
Raises:
|
||||
ProtocolException: If response format is invalid
|
||||
"""
|
||||
|
||||
input = {
|
||||
"operation": "getvalues-all-ws",
|
||||
"type": type,
|
||||
}
|
||||
|
||||
object = self.request(input)
|
||||
|
||||
try:
|
||||
return [
|
||||
ConfigValue(
|
||||
type = v["type"],
|
||||
key = v["key"],
|
||||
value = v["value"],
|
||||
workspace = v.get("workspace", ""),
|
||||
)
|
||||
for v in object["values"]
|
||||
]
|
||||
except Exception:
|
||||
raise ProtocolException("Response not formatted correctly")
|
||||
|
||||
def all(self):
|
||||
"""
|
||||
Get complete configuration and version.
|
||||
|
|
@ -279,9 +321,9 @@ class Config:
|
|||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "config"
|
||||
"operation": "config",
|
||||
"workspace": self.workspace,
|
||||
}
|
||||
|
||||
object = self.request(input)
|
||||
|
|
|
|||
|
|
@ -486,7 +486,6 @@ class ExplainabilityClient:
|
|||
self,
|
||||
uri: str,
|
||||
graph: Optional[str] = None,
|
||||
user: Optional[str] = None,
|
||||
collection: Optional[str] = None
|
||||
) -> Optional[ExplainEntity]:
|
||||
"""
|
||||
|
|
@ -502,7 +501,6 @@ class ExplainabilityClient:
|
|||
Args:
|
||||
uri: The entity URI to fetch
|
||||
graph: Named graph to query (e.g., "urn:graph:retrieval")
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
|
||||
Returns:
|
||||
|
|
@ -515,7 +513,6 @@ class ExplainabilityClient:
|
|||
wire_triples = self.flow.triples_query(
|
||||
s=uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=100
|
||||
)
|
||||
|
|
@ -548,7 +545,7 @@ class ExplainabilityClient:
|
|||
if prev_triples:
|
||||
# Re-fetch and parse
|
||||
wire_triples = self.flow.triples_query(
|
||||
s=uri, g=graph, user=user, collection=collection, limit=100
|
||||
s=uri, g=graph, collection=collection, limit=100
|
||||
)
|
||||
if wire_triples:
|
||||
triples = wire_triples_to_tuples(wire_triples)
|
||||
|
|
@ -560,7 +557,6 @@ class ExplainabilityClient:
|
|||
self,
|
||||
uri: str,
|
||||
graph: Optional[str] = None,
|
||||
user: Optional[str] = None,
|
||||
collection: Optional[str] = None
|
||||
) -> Optional[EdgeSelection]:
|
||||
"""
|
||||
|
|
@ -569,7 +565,6 @@ class ExplainabilityClient:
|
|||
Args:
|
||||
uri: The edge selection URI
|
||||
graph: Named graph to query
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
|
||||
Returns:
|
||||
|
|
@ -578,7 +573,6 @@ class ExplainabilityClient:
|
|||
wire_triples = self.flow.triples_query(
|
||||
s=uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=100
|
||||
)
|
||||
|
|
@ -593,7 +587,6 @@ class ExplainabilityClient:
|
|||
self,
|
||||
uri: str,
|
||||
graph: Optional[str] = None,
|
||||
user: Optional[str] = None,
|
||||
collection: Optional[str] = None
|
||||
) -> Optional[Focus]:
|
||||
"""
|
||||
|
|
@ -602,20 +595,19 @@ class ExplainabilityClient:
|
|||
Args:
|
||||
uri: The Focus entity URI
|
||||
graph: Named graph to query
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
|
||||
Returns:
|
||||
Focus with populated edge_selections, or None
|
||||
"""
|
||||
entity = self.fetch_entity(uri, graph, user, collection)
|
||||
entity = self.fetch_entity(uri, graph, collection)
|
||||
|
||||
if not isinstance(entity, Focus):
|
||||
return None
|
||||
|
||||
# Fetch each edge selection
|
||||
for edge_uri in entity.selected_edge_uris:
|
||||
edge_sel = self.fetch_edge_selection(edge_uri, graph, user, collection)
|
||||
edge_sel = self.fetch_edge_selection(edge_uri, graph, collection)
|
||||
if edge_sel:
|
||||
entity.edge_selections.append(edge_sel)
|
||||
|
||||
|
|
@ -624,7 +616,6 @@ class ExplainabilityClient:
|
|||
def resolve_label(
|
||||
self,
|
||||
uri: str,
|
||||
user: Optional[str] = None,
|
||||
collection: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
|
|
@ -632,7 +623,6 @@ class ExplainabilityClient:
|
|||
|
||||
Args:
|
||||
uri: The URI to get label for
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
|
||||
Returns:
|
||||
|
|
@ -647,7 +637,6 @@ class ExplainabilityClient:
|
|||
wire_triples = self.flow.triples_query(
|
||||
s=uri,
|
||||
p=RDFS_LABEL,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=1
|
||||
)
|
||||
|
|
@ -665,7 +654,6 @@ class ExplainabilityClient:
|
|||
def resolve_edge_labels(
|
||||
self,
|
||||
edge: Dict[str, str],
|
||||
user: Optional[str] = None,
|
||||
collection: Optional[str] = None
|
||||
) -> Tuple[str, str, str]:
|
||||
"""
|
||||
|
|
@ -673,22 +661,20 @@ class ExplainabilityClient:
|
|||
|
||||
Args:
|
||||
edge: Dict with "s", "p", "o" keys
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
|
||||
Returns:
|
||||
Tuple of (s_label, p_label, o_label)
|
||||
"""
|
||||
s_label = self.resolve_label(edge.get("s", ""), user, collection)
|
||||
p_label = self.resolve_label(edge.get("p", ""), user, collection)
|
||||
o_label = self.resolve_label(edge.get("o", ""), user, collection)
|
||||
s_label = self.resolve_label(edge.get("s", ""), collection)
|
||||
p_label = self.resolve_label(edge.get("p", ""), collection)
|
||||
o_label = self.resolve_label(edge.get("o", ""), collection)
|
||||
return (s_label, p_label, o_label)
|
||||
|
||||
def fetch_document_content(
|
||||
self,
|
||||
document_uri: str,
|
||||
api: Any,
|
||||
user: Optional[str] = None,
|
||||
max_content: int = 10000
|
||||
) -> str:
|
||||
"""
|
||||
|
|
@ -697,7 +683,6 @@ class ExplainabilityClient:
|
|||
Args:
|
||||
document_uri: The document URI in the librarian
|
||||
api: TrustGraph Api instance for librarian access
|
||||
user: User identifier for librarian
|
||||
max_content: Maximum content length to return
|
||||
|
||||
Returns:
|
||||
|
|
@ -712,7 +697,7 @@ class ExplainabilityClient:
|
|||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
library = api.library()
|
||||
content_bytes = library.get_document_content(user=user, id=doc_id)
|
||||
content_bytes = library.get_document_content(id=doc_id)
|
||||
|
||||
# Decode as text
|
||||
try:
|
||||
|
|
@ -736,7 +721,6 @@ class ExplainabilityClient:
|
|||
self,
|
||||
question_uri: str,
|
||||
graph: Optional[str] = None,
|
||||
user: Optional[str] = None,
|
||||
collection: Optional[str] = None,
|
||||
api: Any = None,
|
||||
max_content: int = 10000
|
||||
|
|
@ -749,7 +733,6 @@ class ExplainabilityClient:
|
|||
Args:
|
||||
question_uri: The question entity URI
|
||||
graph: Named graph (default: urn:graph:retrieval)
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
api: TrustGraph Api instance for librarian access (optional)
|
||||
max_content: Maximum content length for synthesis
|
||||
|
|
@ -769,7 +752,7 @@ class ExplainabilityClient:
|
|||
}
|
||||
|
||||
# Fetch question
|
||||
question = self.fetch_entity(question_uri, graph, user, collection)
|
||||
question = self.fetch_entity(question_uri, graph, collection)
|
||||
if not isinstance(question, Question):
|
||||
return trace
|
||||
trace["question"] = question
|
||||
|
|
@ -779,7 +762,6 @@ class ExplainabilityClient:
|
|||
p=PROV_WAS_DERIVED_FROM,
|
||||
o=question_uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=10
|
||||
)
|
||||
|
|
@ -790,7 +772,7 @@ class ExplainabilityClient:
|
|||
for t in grounding_triples
|
||||
]
|
||||
for gnd_uri in grounding_uris:
|
||||
grounding = self.fetch_entity(gnd_uri, graph, user, collection)
|
||||
grounding = self.fetch_entity(gnd_uri, graph, collection)
|
||||
if isinstance(grounding, Grounding):
|
||||
trace["grounding"] = grounding
|
||||
break
|
||||
|
|
@ -803,7 +785,6 @@ class ExplainabilityClient:
|
|||
p=PROV_WAS_DERIVED_FROM,
|
||||
o=trace["grounding"].uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=10
|
||||
)
|
||||
|
|
@ -814,7 +795,7 @@ class ExplainabilityClient:
|
|||
for t in exploration_triples
|
||||
]
|
||||
for exp_uri in exploration_uris:
|
||||
exploration = self.fetch_entity(exp_uri, graph, user, collection)
|
||||
exploration = self.fetch_entity(exp_uri, graph, collection)
|
||||
if isinstance(exploration, Exploration):
|
||||
trace["exploration"] = exploration
|
||||
break
|
||||
|
|
@ -827,7 +808,6 @@ class ExplainabilityClient:
|
|||
p=PROV_WAS_DERIVED_FROM,
|
||||
o=trace["exploration"].uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=10
|
||||
)
|
||||
|
|
@ -838,7 +818,7 @@ class ExplainabilityClient:
|
|||
for t in focus_triples
|
||||
]
|
||||
for focus_uri in focus_uris:
|
||||
focus = self.fetch_focus_with_edges(focus_uri, graph, user, collection)
|
||||
focus = self.fetch_focus_with_edges(focus_uri, graph, collection)
|
||||
if focus:
|
||||
trace["focus"] = focus
|
||||
break
|
||||
|
|
@ -851,7 +831,6 @@ class ExplainabilityClient:
|
|||
p=PROV_WAS_DERIVED_FROM,
|
||||
o=trace["focus"].uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=10
|
||||
)
|
||||
|
|
@ -862,7 +841,7 @@ class ExplainabilityClient:
|
|||
for t in synthesis_triples
|
||||
]
|
||||
for synth_uri in synthesis_uris:
|
||||
synthesis = self.fetch_entity(synth_uri, graph, user, collection)
|
||||
synthesis = self.fetch_entity(synth_uri, graph, collection)
|
||||
if isinstance(synthesis, Synthesis):
|
||||
trace["synthesis"] = synthesis
|
||||
break
|
||||
|
|
@ -873,7 +852,6 @@ class ExplainabilityClient:
|
|||
self,
|
||||
question_uri: str,
|
||||
graph: Optional[str] = None,
|
||||
user: Optional[str] = None,
|
||||
collection: Optional[str] = None,
|
||||
api: Any = None,
|
||||
max_content: int = 10000
|
||||
|
|
@ -887,7 +865,6 @@ class ExplainabilityClient:
|
|||
Args:
|
||||
question_uri: The question entity URI
|
||||
graph: Named graph (default: urn:graph:retrieval)
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
api: TrustGraph Api instance for librarian access (optional)
|
||||
max_content: Maximum content length for synthesis
|
||||
|
|
@ -906,7 +883,7 @@ class ExplainabilityClient:
|
|||
}
|
||||
|
||||
# Fetch question
|
||||
question = self.fetch_entity(question_uri, graph, user, collection)
|
||||
question = self.fetch_entity(question_uri, graph, collection)
|
||||
if not isinstance(question, Question):
|
||||
return trace
|
||||
trace["question"] = question
|
||||
|
|
@ -916,7 +893,6 @@ class ExplainabilityClient:
|
|||
p=PROV_WAS_DERIVED_FROM,
|
||||
o=question_uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=10
|
||||
)
|
||||
|
|
@ -927,7 +903,7 @@ class ExplainabilityClient:
|
|||
for t in grounding_triples
|
||||
]
|
||||
for gnd_uri in grounding_uris:
|
||||
grounding = self.fetch_entity(gnd_uri, graph, user, collection)
|
||||
grounding = self.fetch_entity(gnd_uri, graph, collection)
|
||||
if isinstance(grounding, Grounding):
|
||||
trace["grounding"] = grounding
|
||||
break
|
||||
|
|
@ -940,7 +916,6 @@ class ExplainabilityClient:
|
|||
p=PROV_WAS_DERIVED_FROM,
|
||||
o=trace["grounding"].uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=10
|
||||
)
|
||||
|
|
@ -951,7 +926,7 @@ class ExplainabilityClient:
|
|||
for t in exploration_triples
|
||||
]
|
||||
for exp_uri in exploration_uris:
|
||||
exploration = self.fetch_entity(exp_uri, graph, user, collection)
|
||||
exploration = self.fetch_entity(exp_uri, graph, collection)
|
||||
if isinstance(exploration, Exploration):
|
||||
trace["exploration"] = exploration
|
||||
break
|
||||
|
|
@ -964,7 +939,6 @@ class ExplainabilityClient:
|
|||
p=PROV_WAS_DERIVED_FROM,
|
||||
o=trace["exploration"].uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=10
|
||||
)
|
||||
|
|
@ -975,7 +949,7 @@ class ExplainabilityClient:
|
|||
for t in synthesis_triples
|
||||
]
|
||||
for synth_uri in synthesis_uris:
|
||||
synthesis = self.fetch_entity(synth_uri, graph, user, collection)
|
||||
synthesis = self.fetch_entity(synth_uri, graph, collection)
|
||||
if isinstance(synthesis, Synthesis):
|
||||
trace["synthesis"] = synthesis
|
||||
break
|
||||
|
|
@ -986,7 +960,6 @@ class ExplainabilityClient:
|
|||
self,
|
||||
session_uri: str,
|
||||
graph: Optional[str] = None,
|
||||
user: Optional[str] = None,
|
||||
collection: Optional[str] = None,
|
||||
api: Any = None,
|
||||
max_content: int = 10000
|
||||
|
|
@ -1002,7 +975,6 @@ class ExplainabilityClient:
|
|||
Args:
|
||||
session_uri: The agent session/question URI
|
||||
graph: Named graph (default: urn:graph:retrieval)
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
api: TrustGraph Api instance for librarian access (optional)
|
||||
max_content: Maximum content length for conclusion
|
||||
|
|
@ -1019,21 +991,21 @@ class ExplainabilityClient:
|
|||
}
|
||||
|
||||
# Fetch question/session
|
||||
question = self.fetch_entity(session_uri, graph, user, collection)
|
||||
question = self.fetch_entity(session_uri, graph, collection)
|
||||
if not isinstance(question, Question):
|
||||
return trace
|
||||
trace["question"] = question
|
||||
|
||||
# Follow the provenance chain from the question
|
||||
self._follow_provenance_chain(
|
||||
session_uri, trace, graph, user, collection,
|
||||
session_uri, trace, graph, collection,
|
||||
max_depth=50,
|
||||
)
|
||||
|
||||
return trace
|
||||
|
||||
def _follow_provenance_chain(
|
||||
self, current_uri, trace, graph, user, collection,
|
||||
self, current_uri, trace, graph, collection,
|
||||
max_depth=50,
|
||||
):
|
||||
"""Recursively follow the provenance chain, handling branches."""
|
||||
|
|
@ -1044,7 +1016,7 @@ class ExplainabilityClient:
|
|||
derived_triples = self.flow.triples_query(
|
||||
p=PROV_WAS_DERIVED_FROM,
|
||||
o=current_uri,
|
||||
g=graph, user=user, collection=collection,
|
||||
g=graph, collection=collection,
|
||||
limit=20
|
||||
)
|
||||
|
||||
|
|
@ -1060,7 +1032,7 @@ class ExplainabilityClient:
|
|||
if not derived_uri:
|
||||
continue
|
||||
|
||||
entity = self.fetch_entity(derived_uri, graph, user, collection)
|
||||
entity = self.fetch_entity(derived_uri, graph, collection)
|
||||
if entity is None:
|
||||
continue
|
||||
|
||||
|
|
@ -1070,7 +1042,7 @@ class ExplainabilityClient:
|
|||
|
||||
# Continue following from this entity
|
||||
self._follow_provenance_chain(
|
||||
derived_uri, trace, graph, user, collection,
|
||||
derived_uri, trace, graph, collection,
|
||||
max_depth=max_depth - 1,
|
||||
)
|
||||
|
||||
|
|
@ -1079,11 +1051,11 @@ class ExplainabilityClient:
|
|||
# Fetch the full sub-trace and embed it.
|
||||
if entity.question_type == "graph-rag":
|
||||
sub_trace = self.fetch_graphrag_trace(
|
||||
derived_uri, graph, user, collection,
|
||||
derived_uri, graph, collection,
|
||||
)
|
||||
elif entity.question_type == "document-rag":
|
||||
sub_trace = self.fetch_docrag_trace(
|
||||
derived_uri, graph, user, collection,
|
||||
derived_uri, graph, collection,
|
||||
)
|
||||
else:
|
||||
sub_trace = None
|
||||
|
|
@ -1100,7 +1072,7 @@ class ExplainabilityClient:
|
|||
terminal = sub_trace.get("synthesis")
|
||||
if terminal:
|
||||
self._follow_provenance_chain(
|
||||
terminal.uri, trace, graph, user, collection,
|
||||
terminal.uri, trace, graph, collection,
|
||||
max_depth=max_depth - 1,
|
||||
)
|
||||
|
||||
|
|
@ -1110,7 +1082,6 @@ class ExplainabilityClient:
|
|||
def list_sessions(
|
||||
self,
|
||||
graph: Optional[str] = None,
|
||||
user: Optional[str] = None,
|
||||
collection: Optional[str] = None,
|
||||
limit: int = 50
|
||||
) -> List[Question]:
|
||||
|
|
@ -1119,7 +1090,6 @@ class ExplainabilityClient:
|
|||
|
||||
Args:
|
||||
graph: Named graph (default: urn:graph:retrieval)
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
limit: Maximum number of sessions to return
|
||||
|
||||
|
|
@ -1133,7 +1103,6 @@ class ExplainabilityClient:
|
|||
query_triples = self.flow.triples_query(
|
||||
p=TG_QUERY,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=limit
|
||||
)
|
||||
|
|
@ -1142,7 +1111,7 @@ class ExplainabilityClient:
|
|||
for t in query_triples:
|
||||
question_uri = extract_term_value(t.get("s", {}))
|
||||
if question_uri:
|
||||
entity = self.fetch_entity(question_uri, graph, user, collection)
|
||||
entity = self.fetch_entity(question_uri, graph, collection)
|
||||
if isinstance(entity, Question):
|
||||
questions.append(entity)
|
||||
|
||||
|
|
@ -1154,7 +1123,6 @@ class ExplainabilityClient:
|
|||
s=q.uri,
|
||||
p=PROV_WAS_DERIVED_FROM,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=1
|
||||
)
|
||||
|
|
@ -1170,7 +1138,6 @@ class ExplainabilityClient:
|
|||
self,
|
||||
session_uri: str,
|
||||
graph: Optional[str] = None,
|
||||
user: Optional[str] = None,
|
||||
collection: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
|
|
@ -1179,7 +1146,6 @@ class ExplainabilityClient:
|
|||
Args:
|
||||
session_uri: The session/question URI
|
||||
graph: Named graph
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
|
||||
Returns:
|
||||
|
|
@ -1201,7 +1167,6 @@ class ExplainabilityClient:
|
|||
p=PROV_WAS_DERIVED_FROM,
|
||||
o=session_uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=5
|
||||
)
|
||||
|
|
@ -1212,7 +1177,7 @@ class ExplainabilityClient:
|
|||
]
|
||||
|
||||
for child_uri in all_child_uris:
|
||||
entity = self.fetch_entity(child_uri, graph, user, collection)
|
||||
entity = self.fetch_entity(child_uri, graph, collection)
|
||||
if isinstance(entity, (Analysis, Decomposition, Plan)):
|
||||
return "agent"
|
||||
if isinstance(entity, Exploration):
|
||||
|
|
|
|||
|
|
@ -115,72 +115,32 @@ class Flow:
|
|||
return FlowInstance(api=self, id=id)
|
||||
|
||||
def list_blueprints(self):
|
||||
"""
|
||||
List all available flow blueprints.
|
||||
"""List blueprints in the current workspace."""
|
||||
|
||||
Returns:
|
||||
list[str]: List of blueprint names
|
||||
|
||||
Example:
|
||||
```python
|
||||
blueprints = api.flow().list_blueprints()
|
||||
print(blueprints) # ['default', 'custom-flow', ...]
|
||||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "list-blueprints",
|
||||
"workspace": self.api.workspace,
|
||||
}
|
||||
|
||||
return self.request(request = input)["blueprint-names"]
|
||||
|
||||
def get_blueprint(self, blueprint_name):
|
||||
"""
|
||||
Get a flow blueprint definition by name.
|
||||
"""Get a flow blueprint definition by name."""
|
||||
|
||||
Args:
|
||||
blueprint_name: Name of the blueprint to retrieve
|
||||
|
||||
Returns:
|
||||
dict: Blueprint definition as a dictionary
|
||||
|
||||
Example:
|
||||
```python
|
||||
blueprint = api.flow().get_blueprint("default")
|
||||
print(blueprint) # Blueprint configuration
|
||||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "get-blueprint",
|
||||
"workspace": self.api.workspace,
|
||||
"blueprint-name": blueprint_name,
|
||||
}
|
||||
|
||||
return json.loads(self.request(request = input)["blueprint-definition"])
|
||||
|
||||
def put_blueprint(self, blueprint_name, definition):
|
||||
"""
|
||||
Create or update a flow blueprint.
|
||||
"""Create or update a flow blueprint."""
|
||||
|
||||
Args:
|
||||
blueprint_name: Name for the blueprint
|
||||
definition: Blueprint definition dictionary
|
||||
|
||||
Example:
|
||||
```python
|
||||
definition = {
|
||||
"services": ["text-completion", "graph-rag"],
|
||||
"parameters": {"model": "gpt-4"}
|
||||
}
|
||||
api.flow().put_blueprint("my-blueprint", definition)
|
||||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "put-blueprint",
|
||||
"workspace": self.api.workspace,
|
||||
"blueprint-name": blueprint_name,
|
||||
"blueprint-definition": json.dumps(definition),
|
||||
}
|
||||
|
|
@ -188,96 +148,43 @@ class Flow:
|
|||
self.request(request = input)
|
||||
|
||||
def delete_blueprint(self, blueprint_name):
|
||||
"""
|
||||
Delete a flow blueprint.
|
||||
"""Delete a flow blueprint."""
|
||||
|
||||
Args:
|
||||
blueprint_name: Name of the blueprint to delete
|
||||
|
||||
Example:
|
||||
```python
|
||||
api.flow().delete_blueprint("old-blueprint")
|
||||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "delete-blueprint",
|
||||
"workspace": self.api.workspace,
|
||||
"blueprint-name": blueprint_name,
|
||||
}
|
||||
|
||||
self.request(request = input)
|
||||
|
||||
def list(self):
|
||||
"""
|
||||
List all active flow instances.
|
||||
"""List flow instances in the current workspace."""
|
||||
|
||||
Returns:
|
||||
list[str]: List of flow instance IDs
|
||||
|
||||
Example:
|
||||
```python
|
||||
flows = api.flow().list()
|
||||
print(flows) # ['default', 'flow-1', 'flow-2', ...]
|
||||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "list-flows",
|
||||
"workspace": self.api.workspace,
|
||||
}
|
||||
|
||||
return self.request(request = input)["flow-ids"]
|
||||
|
||||
def get(self, id):
|
||||
"""
|
||||
Get the definition of a running flow instance.
|
||||
"""Get the definition of a flow instance."""
|
||||
|
||||
Args:
|
||||
id: Flow instance ID
|
||||
|
||||
Returns:
|
||||
dict: Flow instance definition
|
||||
|
||||
Example:
|
||||
```python
|
||||
flow_def = api.flow().get("default")
|
||||
print(flow_def)
|
||||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "get-flow",
|
||||
"workspace": self.api.workspace,
|
||||
"flow-id": id,
|
||||
}
|
||||
|
||||
return json.loads(self.request(request = input)["flow"])
|
||||
|
||||
def start(self, blueprint_name, id, description, parameters=None):
|
||||
"""
|
||||
Start a new flow instance from a blueprint.
|
||||
"""Start a new flow instance from a blueprint."""
|
||||
|
||||
Args:
|
||||
blueprint_name: Name of the blueprint to instantiate
|
||||
id: Unique identifier for the flow instance
|
||||
description: Human-readable description
|
||||
parameters: Optional parameters dictionary
|
||||
|
||||
Example:
|
||||
```python
|
||||
api.flow().start(
|
||||
blueprint_name="default",
|
||||
id="my-flow",
|
||||
description="My custom flow",
|
||||
parameters={"model": "gpt-4"}
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "start-flow",
|
||||
"workspace": self.api.workspace,
|
||||
"flow-id": id,
|
||||
"blueprint-name": blueprint_name,
|
||||
"description": description,
|
||||
|
|
@ -289,21 +196,11 @@ class Flow:
|
|||
self.request(request = input)
|
||||
|
||||
def stop(self, id):
|
||||
"""
|
||||
Stop a running flow instance.
|
||||
"""Stop a running flow instance."""
|
||||
|
||||
Args:
|
||||
id: Flow instance ID to stop
|
||||
|
||||
Example:
|
||||
```python
|
||||
api.flow().stop("my-flow")
|
||||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "stop-flow",
|
||||
"workspace": self.api.workspace,
|
||||
"flow-id": id,
|
||||
}
|
||||
|
||||
|
|
@ -349,6 +246,13 @@ class FlowInstance:
|
|||
Returns:
|
||||
dict: Service response
|
||||
"""
|
||||
# Inject workspace so the gateway can route to the right
|
||||
# workspace's flow. If already present, keep the caller's value.
|
||||
if isinstance(request, dict) and "workspace" not in request:
|
||||
request = {
|
||||
"workspace": self.api.api.workspace,
|
||||
**request,
|
||||
}
|
||||
return self.api.request(path = f"{self.id}/{path}", request = request)
|
||||
|
||||
def text_completion(self, system, prompt):
|
||||
|
|
@ -392,7 +296,7 @@ class FlowInstance:
|
|||
model=result.get("model"),
|
||||
)
|
||||
|
||||
def agent(self, question, user="trustgraph", state=None, group=None, history=None):
|
||||
def agent(self, question,state=None, group=None, history=None):
|
||||
"""
|
||||
Execute an agent operation with reasoning and tool use capabilities.
|
||||
|
||||
|
|
@ -401,7 +305,6 @@ class FlowInstance:
|
|||
|
||||
Args:
|
||||
question: User question or instruction
|
||||
user: User identifier (default: "trustgraph")
|
||||
state: Optional state dictionary for stateful conversations
|
||||
group: Optional group identifier for multi-user contexts
|
||||
history: Optional conversation history as list of message dicts
|
||||
|
|
@ -416,8 +319,7 @@ class FlowInstance:
|
|||
# Simple question
|
||||
answer = flow.agent(
|
||||
question="What is the capital of France?",
|
||||
user="trustgraph"
|
||||
)
|
||||
)
|
||||
|
||||
# With conversation history
|
||||
history = [
|
||||
|
|
@ -425,9 +327,7 @@ class FlowInstance:
|
|||
{"role": "assistant", "content": "Hi! How can I help?"}
|
||||
]
|
||||
answer = flow.agent(
|
||||
question="Tell me about Paris",
|
||||
user="trustgraph",
|
||||
history=history
|
||||
question="Tell me about Paris",history=history
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
|
@ -435,7 +335,6 @@ class FlowInstance:
|
|||
# The input consists of a question and optional context
|
||||
input = {
|
||||
"question": question,
|
||||
"user": user,
|
||||
}
|
||||
|
||||
# Only include state if it has a value
|
||||
|
|
@ -455,7 +354,7 @@ class FlowInstance:
|
|||
)["answer"]
|
||||
|
||||
def graph_rag(
|
||||
self, query, user="trustgraph", collection="default",
|
||||
self, query,collection="default",
|
||||
entity_limit=50, triple_limit=30, max_subgraph_size=150,
|
||||
max_path_length=2, edge_score_limit=30, edge_limit=25,
|
||||
):
|
||||
|
|
@ -467,7 +366,6 @@ class FlowInstance:
|
|||
|
||||
Args:
|
||||
query: Natural language query
|
||||
user: User/keyspace identifier (default: "trustgraph")
|
||||
collection: Collection identifier (default: "default")
|
||||
entity_limit: Maximum entities to retrieve (default: 50)
|
||||
triple_limit: Maximum triples per entity (default: 30)
|
||||
|
|
@ -483,9 +381,7 @@ class FlowInstance:
|
|||
```python
|
||||
flow = api.flow().id("default")
|
||||
response = flow.graph_rag(
|
||||
query="Tell me about Marie Curie's discoveries",
|
||||
user="trustgraph",
|
||||
collection="scientists",
|
||||
query="Tell me about Marie Curie's discoveries",collection="scientists",
|
||||
entity_limit=20,
|
||||
max_path_length=3
|
||||
)
|
||||
|
|
@ -496,7 +392,6 @@ class FlowInstance:
|
|||
# The input consists of a question
|
||||
input = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"entity-limit": entity_limit,
|
||||
"triple-limit": triple_limit,
|
||||
|
|
@ -519,7 +414,7 @@ class FlowInstance:
|
|||
)
|
||||
|
||||
def document_rag(
|
||||
self, query, user="trustgraph", collection="default",
|
||||
self, query,collection="default",
|
||||
doc_limit=10,
|
||||
):
|
||||
"""
|
||||
|
|
@ -530,7 +425,6 @@ class FlowInstance:
|
|||
|
||||
Args:
|
||||
query: Natural language query
|
||||
user: User/keyspace identifier (default: "trustgraph")
|
||||
collection: Collection identifier (default: "default")
|
||||
doc_limit: Maximum document chunks to retrieve (default: 10)
|
||||
|
||||
|
|
@ -541,9 +435,7 @@ class FlowInstance:
|
|||
```python
|
||||
flow = api.flow().id("default")
|
||||
response = flow.document_rag(
|
||||
query="Summarize the key findings",
|
||||
user="trustgraph",
|
||||
collection="research-papers",
|
||||
query="Summarize the key findings",collection="research-papers",
|
||||
doc_limit=5
|
||||
)
|
||||
print(response)
|
||||
|
|
@ -553,7 +445,6 @@ class FlowInstance:
|
|||
# The input consists of a question
|
||||
input = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"doc-limit": doc_limit,
|
||||
}
|
||||
|
|
@ -600,7 +491,7 @@ class FlowInstance:
|
|||
input
|
||||
)["vectors"]
|
||||
|
||||
def graph_embeddings_query(self, text, user, collection, limit=10):
|
||||
def graph_embeddings_query(self, text, collection, limit=10):
|
||||
"""
|
||||
Query knowledge graph entities using semantic similarity.
|
||||
|
||||
|
|
@ -609,7 +500,6 @@ class FlowInstance:
|
|||
|
||||
Args:
|
||||
text: Query text for semantic search
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
limit: Maximum number of results (default: 10)
|
||||
|
||||
|
|
@ -620,9 +510,7 @@ class FlowInstance:
|
|||
```python
|
||||
flow = api.flow().id("default")
|
||||
results = flow.graph_embeddings_query(
|
||||
text="physicist who discovered radioactivity",
|
||||
user="trustgraph",
|
||||
collection="scientists",
|
||||
text="physicist who discovered radioactivity",collection="scientists",
|
||||
limit=5
|
||||
)
|
||||
# results contains {"entities": [{"entity": {...}, "score": 0.95}, ...]}
|
||||
|
|
@ -636,7 +524,6 @@ class FlowInstance:
|
|||
# Query graph embeddings for semantic search
|
||||
input = {
|
||||
"vector": vector,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
|
|
@ -646,7 +533,7 @@ class FlowInstance:
|
|||
input
|
||||
)
|
||||
|
||||
def document_embeddings_query(self, text, user, collection, limit=10):
|
||||
def document_embeddings_query(self, text, collection, limit=10):
|
||||
"""
|
||||
Query document chunks using semantic similarity.
|
||||
|
||||
|
|
@ -655,7 +542,6 @@ class FlowInstance:
|
|||
|
||||
Args:
|
||||
text: Query text for semantic search
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
limit: Maximum number of results (default: 10)
|
||||
|
||||
|
|
@ -666,9 +552,7 @@ class FlowInstance:
|
|||
```python
|
||||
flow = api.flow().id("default")
|
||||
results = flow.document_embeddings_query(
|
||||
text="machine learning algorithms",
|
||||
user="trustgraph",
|
||||
collection="research-papers",
|
||||
text="machine learning algorithms",collection="research-papers",
|
||||
limit=5
|
||||
)
|
||||
# results contains {"chunks": [{"chunk_id": "doc1/p0/c0", "score": 0.95}, ...]}
|
||||
|
|
@ -682,7 +566,6 @@ class FlowInstance:
|
|||
# Query document embeddings for semantic search
|
||||
input = {
|
||||
"vector": vector,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
|
|
@ -805,7 +688,7 @@ class FlowInstance:
|
|||
|
||||
def triples_query(
|
||||
self, s=None, p=None, o=None,
|
||||
user=None, collection=None, limit=10000
|
||||
collection=None, limit=10000
|
||||
):
|
||||
"""
|
||||
Query knowledge graph triples using pattern matching.
|
||||
|
|
@ -817,7 +700,6 @@ class FlowInstance:
|
|||
s: Subject URI (optional, use None for wildcard)
|
||||
p: Predicate URI (optional, use None for wildcard)
|
||||
o: Object URI or Literal (optional, use None for wildcard)
|
||||
user: User/keyspace identifier (optional)
|
||||
collection: Collection identifier (optional)
|
||||
limit: Maximum results to return (default: 10000)
|
||||
|
||||
|
|
@ -835,9 +717,7 @@ class FlowInstance:
|
|||
|
||||
# Find all triples about a specific subject
|
||||
triples = flow.triples_query(
|
||||
s=Uri("http://example.org/person/marie-curie"),
|
||||
user="trustgraph",
|
||||
collection="scientists"
|
||||
s=Uri("http://example.org/person/marie-curie"),collection="scientists"
|
||||
)
|
||||
|
||||
# Find all instances of a specific relationship
|
||||
|
|
@ -851,10 +731,6 @@ class FlowInstance:
|
|||
input = {
|
||||
"limit": limit
|
||||
}
|
||||
|
||||
if user:
|
||||
input["user"] = user
|
||||
|
||||
if collection:
|
||||
input["collection"] = collection
|
||||
|
||||
|
|
@ -888,7 +764,7 @@ class FlowInstance:
|
|||
]
|
||||
|
||||
def load_document(
|
||||
self, document, id=None, metadata=None, user=None,
|
||||
self, document, id=None, metadata=None,
|
||||
collection=None,
|
||||
):
|
||||
"""
|
||||
|
|
@ -901,7 +777,6 @@ class FlowInstance:
|
|||
document: Document content as bytes
|
||||
id: Optional document identifier (auto-generated if None)
|
||||
metadata: Optional metadata (list of Triples or object with emit method)
|
||||
user: User/keyspace identifier (optional)
|
||||
collection: Collection identifier (optional)
|
||||
|
||||
Returns:
|
||||
|
|
@ -918,9 +793,7 @@ class FlowInstance:
|
|||
with open("research.pdf", "rb") as f:
|
||||
result = flow.load_document(
|
||||
document=f.read(),
|
||||
id="research-001",
|
||||
user="trustgraph",
|
||||
collection="papers"
|
||||
id="research-001",collection="papers"
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
|
@ -955,10 +828,6 @@ class FlowInstance:
|
|||
"metadata": triples,
|
||||
"data": base64.b64encode(document).decode("utf-8"),
|
||||
}
|
||||
|
||||
if user:
|
||||
input["user"] = user
|
||||
|
||||
if collection:
|
||||
input["collection"] = collection
|
||||
|
||||
|
|
@ -969,7 +838,7 @@ class FlowInstance:
|
|||
|
||||
def load_text(
|
||||
self, text, id=None, metadata=None, charset="utf-8",
|
||||
user=None, collection=None,
|
||||
collection=None,
|
||||
):
|
||||
"""
|
||||
Load text content for processing.
|
||||
|
|
@ -982,7 +851,6 @@ class FlowInstance:
|
|||
id: Optional document identifier (auto-generated if None)
|
||||
metadata: Optional metadata (list of Triples or object with emit method)
|
||||
charset: Character encoding (default: "utf-8")
|
||||
user: User/keyspace identifier (optional)
|
||||
collection: Collection identifier (optional)
|
||||
|
||||
Returns:
|
||||
|
|
@ -1000,9 +868,7 @@ class FlowInstance:
|
|||
result = flow.load_text(
|
||||
text=text_content,
|
||||
id="text-001",
|
||||
charset="utf-8",
|
||||
user="trustgraph",
|
||||
collection="documents"
|
||||
charset="utf-8",collection="documents"
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
|
@ -1035,10 +901,6 @@ class FlowInstance:
|
|||
"charset": charset,
|
||||
"text": base64.b64encode(text).decode("utf-8"),
|
||||
}
|
||||
|
||||
if user:
|
||||
input["user"] = user
|
||||
|
||||
if collection:
|
||||
input["collection"] = collection
|
||||
|
||||
|
|
@ -1048,7 +910,7 @@ class FlowInstance:
|
|||
)
|
||||
|
||||
def rows_query(
|
||||
self, query, user="trustgraph", collection="default",
|
||||
self, query,collection="default",
|
||||
variables=None, operation_name=None
|
||||
):
|
||||
"""
|
||||
|
|
@ -1059,7 +921,6 @@ class FlowInstance:
|
|||
|
||||
Args:
|
||||
query: GraphQL query string
|
||||
user: User/keyspace identifier (default: "trustgraph")
|
||||
collection: Collection identifier (default: "default")
|
||||
variables: Optional query variables dictionary
|
||||
operation_name: Optional operation name for multi-operation documents
|
||||
|
|
@ -1085,9 +946,7 @@ class FlowInstance:
|
|||
}
|
||||
'''
|
||||
result = flow.rows_query(
|
||||
query=query,
|
||||
user="trustgraph",
|
||||
collection="scientists"
|
||||
query=query,collection="scientists"
|
||||
)
|
||||
|
||||
# Query with variables
|
||||
|
|
@ -1109,7 +968,6 @@ class FlowInstance:
|
|||
# The input consists of a GraphQL query and optional variables
|
||||
input = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
}
|
||||
|
||||
|
|
@ -1145,7 +1003,7 @@ class FlowInstance:
|
|||
return result
|
||||
|
||||
def sparql_query(
|
||||
self, query, user="trustgraph", collection="default",
|
||||
self, query,collection="default",
|
||||
limit=10000
|
||||
):
|
||||
"""
|
||||
|
|
@ -1153,7 +1011,6 @@ class FlowInstance:
|
|||
|
||||
Args:
|
||||
query: SPARQL 1.1 query string
|
||||
user: User/keyspace identifier (default: "trustgraph")
|
||||
collection: Collection identifier (default: "default")
|
||||
limit: Safety limit on results (default: 10000)
|
||||
|
||||
|
|
@ -1169,7 +1026,6 @@ class FlowInstance:
|
|||
|
||||
input = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit,
|
||||
}
|
||||
|
|
@ -1213,14 +1069,13 @@ class FlowInstance:
|
|||
|
||||
return response
|
||||
|
||||
def structured_query(self, question, user="trustgraph", collection="default"):
|
||||
def structured_query(self, question,collection="default"):
|
||||
"""
|
||||
Execute a natural language question against structured data.
|
||||
Combines NLP query conversion and GraphQL execution.
|
||||
|
||||
Args:
|
||||
question: Natural language question
|
||||
user: Cassandra keyspace identifier (default: "trustgraph")
|
||||
collection: Data collection identifier (default: "default")
|
||||
|
||||
Returns:
|
||||
|
|
@ -1229,7 +1084,6 @@ class FlowInstance:
|
|||
|
||||
input = {
|
||||
"question": question,
|
||||
"user": user,
|
||||
"collection": collection
|
||||
}
|
||||
|
||||
|
|
@ -1383,7 +1237,7 @@ class FlowInstance:
|
|||
return response["schema-matches"]
|
||||
|
||||
def row_embeddings_query(
|
||||
self, text, schema_name, user="trustgraph", collection="default",
|
||||
self, text, schema_name,collection="default",
|
||||
index_name=None, limit=10
|
||||
):
|
||||
"""
|
||||
|
|
@ -1396,7 +1250,6 @@ class FlowInstance:
|
|||
Args:
|
||||
text: Query text for semantic search
|
||||
schema_name: Schema name to search within
|
||||
user: User/keyspace identifier (default: "trustgraph")
|
||||
collection: Collection identifier (default: "default")
|
||||
index_name: Optional index name to filter search to specific index
|
||||
limit: Maximum number of results (default: 10)
|
||||
|
|
@ -1412,9 +1265,7 @@ class FlowInstance:
|
|||
# Search for customers by name similarity
|
||||
results = flow.row_embeddings_query(
|
||||
text="John Smith",
|
||||
schema_name="customers",
|
||||
user="trustgraph",
|
||||
collection="sales",
|
||||
schema_name="customers",collection="sales",
|
||||
limit=5
|
||||
)
|
||||
|
||||
|
|
@ -1436,7 +1287,6 @@ class FlowInstance:
|
|||
input = {
|
||||
"vector": vector,
|
||||
"schema_name": schema_name,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,105 +63,50 @@ class Knowledge:
|
|||
"""
|
||||
return self.api.request(f"knowledge", request)
|
||||
|
||||
def list_kg_cores(self, user="trustgraph"):
|
||||
def list_kg_cores(self):
|
||||
"""
|
||||
List all available knowledge graph cores.
|
||||
|
||||
Retrieves the IDs of all KG cores available for the specified user.
|
||||
|
||||
Args:
|
||||
user: User identifier (default: "trustgraph")
|
||||
List all available knowledge graph cores in this workspace.
|
||||
|
||||
Returns:
|
||||
list[str]: List of KG core identifiers
|
||||
|
||||
Example:
|
||||
```python
|
||||
knowledge = api.knowledge()
|
||||
|
||||
# List available KG cores
|
||||
cores = knowledge.list_kg_cores(user="trustgraph")
|
||||
print(f"Available KG cores: {cores}")
|
||||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "list-kg-cores",
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
}
|
||||
|
||||
return self.request(request = input)["ids"]
|
||||
|
||||
def delete_kg_core(self, id, user="trustgraph"):
|
||||
def delete_kg_core(self, id):
|
||||
"""
|
||||
Delete a knowledge graph core.
|
||||
|
||||
Removes a KG core from storage. This does not affect currently loaded
|
||||
cores in flows.
|
||||
Delete a knowledge graph core in this workspace.
|
||||
|
||||
Args:
|
||||
id: KG core identifier to delete
|
||||
user: User identifier (default: "trustgraph")
|
||||
|
||||
Example:
|
||||
```python
|
||||
knowledge = api.knowledge()
|
||||
|
||||
# Delete a KG core
|
||||
knowledge.delete_kg_core(id="medical-kb-v1", user="trustgraph")
|
||||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "delete-kg-core",
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"id": id,
|
||||
}
|
||||
|
||||
self.request(request = input)
|
||||
|
||||
def load_kg_core(self, id, user="trustgraph", flow="default",
|
||||
collection="default"):
|
||||
def load_kg_core(self, id, flow="default", collection="default"):
|
||||
"""
|
||||
Load a knowledge graph core into a flow.
|
||||
|
||||
Makes a KG core available for use in queries and RAG operations within
|
||||
the specified flow and collection.
|
||||
|
||||
Args:
|
||||
id: KG core identifier to load
|
||||
user: User identifier (default: "trustgraph")
|
||||
flow: Flow instance to load into (default: "default")
|
||||
collection: Collection to associate with (default: "default")
|
||||
|
||||
Example:
|
||||
```python
|
||||
knowledge = api.knowledge()
|
||||
|
||||
# Load a medical knowledge base into the default flow
|
||||
knowledge.load_kg_core(
|
||||
id="medical-kb-v1",
|
||||
user="trustgraph",
|
||||
flow="default",
|
||||
collection="medical"
|
||||
)
|
||||
|
||||
# Now the flow can use this KG core for RAG queries
|
||||
flow = api.flow().id("default")
|
||||
response = flow.graph_rag(
|
||||
query="What are the symptoms of diabetes?",
|
||||
user="trustgraph",
|
||||
collection="medical"
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "load-kg-core",
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"id": id,
|
||||
"flow": flow,
|
||||
"collection": collection,
|
||||
|
|
@ -169,35 +114,18 @@ class Knowledge:
|
|||
|
||||
self.request(request = input)
|
||||
|
||||
def unload_kg_core(self, id, user="trustgraph", flow="default"):
|
||||
def unload_kg_core(self, id, flow="default"):
|
||||
"""
|
||||
Unload a knowledge graph core from a flow.
|
||||
|
||||
Removes a KG core from active use in the specified flow, freeing
|
||||
resources while keeping the core available in storage.
|
||||
|
||||
Args:
|
||||
id: KG core identifier to unload
|
||||
user: User identifier (default: "trustgraph")
|
||||
flow: Flow instance to unload from (default: "default")
|
||||
|
||||
Example:
|
||||
```python
|
||||
knowledge = api.knowledge()
|
||||
|
||||
# Unload a KG core when no longer needed
|
||||
knowledge.unload_kg_core(
|
||||
id="medical-kb-v1",
|
||||
user="trustgraph",
|
||||
flow="default"
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "unload-kg-core",
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"id": id,
|
||||
"flow": flow,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ class Library:
|
|||
return self.api.request(f"librarian", request)
|
||||
|
||||
def add_document(
|
||||
self, document, id, metadata, user, title, comments,
|
||||
self, document, id, metadata, title, comments,
|
||||
kind="text/plain", tags=[], on_progress=None,
|
||||
):
|
||||
"""
|
||||
|
|
@ -108,7 +108,6 @@ class Library:
|
|||
document: Document content as bytes
|
||||
id: Document identifier (auto-generated if None)
|
||||
metadata: Document metadata as list of Triple objects or object with emit method
|
||||
user: User/owner identifier
|
||||
title: Document title
|
||||
comments: Document description or comments
|
||||
kind: MIME type of the document (default: "text/plain")
|
||||
|
|
@ -131,7 +130,6 @@ class Library:
|
|||
document=f.read(),
|
||||
id="research-001",
|
||||
metadata=[],
|
||||
user="trustgraph",
|
||||
title="Research Paper",
|
||||
comments="Key findings in quantum computing",
|
||||
kind="application/pdf",
|
||||
|
|
@ -147,7 +145,6 @@ class Library:
|
|||
document=f.read(),
|
||||
id="large-doc-001",
|
||||
metadata=[],
|
||||
user="trustgraph",
|
||||
title="Large Document",
|
||||
comments="A very large document",
|
||||
kind="application/pdf",
|
||||
|
|
@ -176,7 +173,6 @@ class Library:
|
|||
document=document,
|
||||
id=id,
|
||||
metadata=metadata,
|
||||
user=user,
|
||||
title=title,
|
||||
comments=comments,
|
||||
kind=kind,
|
||||
|
|
@ -213,6 +209,7 @@ class Library:
|
|||
|
||||
input = {
|
||||
"operation": "add-document",
|
||||
"workspace": self.api.workspace,
|
||||
"document-metadata": {
|
||||
"id": id,
|
||||
"time": int(time.time()),
|
||||
|
|
@ -220,7 +217,7 @@ class Library:
|
|||
"title": title,
|
||||
"comments": comments,
|
||||
"metadata": triples,
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"tags": tags
|
||||
},
|
||||
"content": base64.b64encode(document).decode("utf-8"),
|
||||
|
|
@ -229,7 +226,7 @@ class Library:
|
|||
return self.request(input)
|
||||
|
||||
def _add_document_chunked(
|
||||
self, document, id, metadata, user, title, comments,
|
||||
self, document, id, metadata, title, comments,
|
||||
kind, tags, on_progress=None,
|
||||
):
|
||||
"""
|
||||
|
|
@ -245,13 +242,14 @@ class Library:
|
|||
# Begin upload session
|
||||
begin_request = {
|
||||
"operation": "begin-upload",
|
||||
"workspace": self.api.workspace,
|
||||
"document-metadata": {
|
||||
"id": id,
|
||||
"time": int(time.time()),
|
||||
"kind": kind,
|
||||
"title": title,
|
||||
"comments": comments,
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"tags": tags,
|
||||
},
|
||||
"total-size": total_size,
|
||||
|
|
@ -279,10 +277,10 @@ class Library:
|
|||
|
||||
chunk_request = {
|
||||
"operation": "upload-chunk",
|
||||
"workspace": self.api.workspace,
|
||||
"upload-id": upload_id,
|
||||
"chunk-index": chunk_index,
|
||||
"content": base64.b64encode(chunk_data).decode("utf-8"),
|
||||
"user": user,
|
||||
}
|
||||
|
||||
chunk_response = self.request(chunk_request)
|
||||
|
|
@ -298,8 +296,8 @@ class Library:
|
|||
# Complete upload
|
||||
complete_request = {
|
||||
"operation": "complete-upload",
|
||||
"workspace": self.api.workspace,
|
||||
"upload-id": upload_id,
|
||||
"user": user,
|
||||
}
|
||||
|
||||
complete_response = self.request(complete_request)
|
||||
|
|
@ -314,8 +312,8 @@ class Library:
|
|||
try:
|
||||
abort_request = {
|
||||
"operation": "abort-upload",
|
||||
"workspace": self.api.workspace,
|
||||
"upload-id": upload_id,
|
||||
"user": user,
|
||||
}
|
||||
self.request(abort_request)
|
||||
logger.info(f"Aborted failed upload {upload_id}")
|
||||
|
|
@ -323,15 +321,13 @@ class Library:
|
|||
logger.warning(f"Failed to abort upload: {abort_error}")
|
||||
raise
|
||||
|
||||
def get_documents(self, user, include_children=False):
|
||||
def get_documents(self, include_children=False):
|
||||
"""
|
||||
List all documents for a user.
|
||||
List all documents in the current workspace.
|
||||
|
||||
Retrieves metadata for all documents owned by the specified user.
|
||||
By default, only returns top-level documents (not child/extracted documents).
|
||||
|
||||
Args:
|
||||
user: User identifier
|
||||
include_children: If True, also include child documents (default: False)
|
||||
|
||||
Returns:
|
||||
|
|
@ -345,7 +341,7 @@ class Library:
|
|||
library = api.library()
|
||||
|
||||
# Get only top-level documents
|
||||
docs = library.get_documents(user="trustgraph")
|
||||
docs = library.get_documents()
|
||||
|
||||
for doc in docs:
|
||||
print(f"{doc.id}: {doc.title} ({doc.kind})")
|
||||
|
|
@ -353,13 +349,13 @@ class Library:
|
|||
print(f" Tags: {', '.join(doc.tags)}")
|
||||
|
||||
# Get all documents including extracted pages
|
||||
all_docs = library.get_documents(user="trustgraph", include_children=True)
|
||||
all_docs = library.get_documents(include_children=True)
|
||||
```
|
||||
"""
|
||||
|
||||
input = {
|
||||
"operation": "list-documents",
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"include-children": include_children,
|
||||
}
|
||||
|
||||
|
|
@ -381,7 +377,7 @@ class Library:
|
|||
)
|
||||
for w in v["metadata"]
|
||||
],
|
||||
user = v["user"],
|
||||
workspace = v.get("workspace", ""),
|
||||
tags = v["tags"],
|
||||
parent_id = v.get("parent-id", ""),
|
||||
document_type = v.get("document-type", "source"),
|
||||
|
|
@ -392,14 +388,13 @@ class Library:
|
|||
logger.error("Failed to parse document list response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def get_document(self, user, id):
|
||||
def get_document(self, id):
|
||||
"""
|
||||
Get metadata for a specific document.
|
||||
|
||||
Retrieves the metadata for a single document by ID.
|
||||
|
||||
Args:
|
||||
user: User identifier
|
||||
id: Document identifier
|
||||
|
||||
Returns:
|
||||
|
|
@ -411,7 +406,7 @@ class Library:
|
|||
Example:
|
||||
```python
|
||||
library = api.library()
|
||||
doc = library.get_document(user="trustgraph", id="doc-123")
|
||||
doc = library.get_document(id="doc-123")
|
||||
print(f"Title: {doc.title}")
|
||||
print(f"Comments: {doc.comments}")
|
||||
```
|
||||
|
|
@ -419,7 +414,7 @@ class Library:
|
|||
|
||||
input = {
|
||||
"operation": "get-document",
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"document-id": id,
|
||||
}
|
||||
|
||||
|
|
@ -441,7 +436,7 @@ class Library:
|
|||
)
|
||||
for w in doc["metadata"]
|
||||
],
|
||||
user = doc["user"],
|
||||
workspace = doc.get("workspace", ""),
|
||||
tags = doc["tags"],
|
||||
parent_id = doc.get("parent-id", ""),
|
||||
document_type = doc.get("document-type", "source"),
|
||||
|
|
@ -450,14 +445,13 @@ class Library:
|
|||
logger.error("Failed to parse document response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def update_document(self, user, id, metadata):
|
||||
def update_document(self, id, metadata):
|
||||
"""
|
||||
Update document metadata.
|
||||
|
||||
Updates the metadata for an existing document in the library.
|
||||
|
||||
Args:
|
||||
user: User identifier
|
||||
id: Document identifier
|
||||
metadata: Updated DocumentMetadata object
|
||||
|
||||
|
|
@ -472,7 +466,7 @@ class Library:
|
|||
library = api.library()
|
||||
|
||||
# Get existing document
|
||||
doc = library.get_document(user="trustgraph", id="doc-123")
|
||||
doc = library.get_document(id="doc-123")
|
||||
|
||||
# Update metadata
|
||||
doc.title = "Updated Title"
|
||||
|
|
@ -481,7 +475,6 @@ class Library:
|
|||
|
||||
# Save changes
|
||||
updated_doc = library.update_document(
|
||||
user="trustgraph",
|
||||
id="doc-123",
|
||||
metadata=doc
|
||||
)
|
||||
|
|
@ -490,8 +483,9 @@ class Library:
|
|||
|
||||
input = {
|
||||
"operation": "update-document",
|
||||
"workspace": self.api.workspace,
|
||||
"document-metadata": {
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"document-id": id,
|
||||
"time": metadata.time,
|
||||
"title": metadata.title,
|
||||
|
|
@ -526,21 +520,20 @@ class Library:
|
|||
)
|
||||
for w in doc["metadata"]
|
||||
],
|
||||
user = doc["user"],
|
||||
workspace = doc.get("workspace", ""),
|
||||
tags = doc["tags"]
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Failed to parse document update response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def remove_document(self, user, id):
|
||||
def remove_document(self, id):
|
||||
"""
|
||||
Remove a document from the library.
|
||||
|
||||
Deletes a document and its metadata from the library.
|
||||
|
||||
Args:
|
||||
user: User identifier
|
||||
id: Document identifier to remove
|
||||
|
||||
Returns:
|
||||
|
|
@ -549,13 +542,13 @@ class Library:
|
|||
Example:
|
||||
```python
|
||||
library = api.library()
|
||||
library.remove_document(user="trustgraph", id="doc-123")
|
||||
library.remove_document(id="doc-123")
|
||||
```
|
||||
"""
|
||||
|
||||
input = {
|
||||
"operation": "remove-document",
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"document-id": id,
|
||||
}
|
||||
|
||||
|
|
@ -565,7 +558,7 @@ class Library:
|
|||
|
||||
def start_processing(
|
||||
self, id, document_id, flow="default",
|
||||
user="trustgraph", collection="default", tags=[],
|
||||
collection="default", tags=[],
|
||||
):
|
||||
"""
|
||||
Start a document processing workflow.
|
||||
|
|
@ -577,7 +570,6 @@ class Library:
|
|||
id: Unique processing job identifier
|
||||
document_id: ID of the document to process
|
||||
flow: Flow instance to use for processing (default: "default")
|
||||
user: User identifier (default: "trustgraph")
|
||||
collection: Target collection for processed data (default: "default")
|
||||
tags: List of tags for the processing job (default: [])
|
||||
|
||||
|
|
@ -593,7 +585,6 @@ class Library:
|
|||
id="proc-001",
|
||||
document_id="doc-123",
|
||||
flow="default",
|
||||
user="trustgraph",
|
||||
collection="research",
|
||||
tags=["automated", "extract"]
|
||||
)
|
||||
|
|
@ -602,12 +593,13 @@ class Library:
|
|||
|
||||
input = {
|
||||
"operation": "add-processing",
|
||||
"workspace": self.api.workspace,
|
||||
"processing-metadata": {
|
||||
"id": id,
|
||||
"document-id": document_id,
|
||||
"time": int(time.time()),
|
||||
"flow": flow,
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"collection": collection,
|
||||
"tags": tags,
|
||||
}
|
||||
|
|
@ -618,7 +610,7 @@ class Library:
|
|||
return {}
|
||||
|
||||
def stop_processing(
|
||||
self, id, user="trustgraph",
|
||||
self, id,
|
||||
):
|
||||
"""
|
||||
Stop a running document processing job.
|
||||
|
|
@ -627,7 +619,6 @@ class Library:
|
|||
|
||||
Args:
|
||||
id: Processing job identifier to stop
|
||||
user: User identifier (default: "trustgraph")
|
||||
|
||||
Returns:
|
||||
dict: Empty response object
|
||||
|
|
@ -635,29 +626,26 @@ class Library:
|
|||
Example:
|
||||
```python
|
||||
library = api.library()
|
||||
library.stop_processing(id="proc-001", user="trustgraph")
|
||||
library.stop_processing(id="proc-001")
|
||||
```
|
||||
"""
|
||||
|
||||
input = {
|
||||
"operation": "remove-processing",
|
||||
"workspace": self.api.workspace,
|
||||
"processing-id": id,
|
||||
"user": user,
|
||||
}
|
||||
|
||||
object = self.request(input)
|
||||
|
||||
return {}
|
||||
|
||||
def get_processings(self, user="trustgraph"):
|
||||
def get_processings(self):
|
||||
"""
|
||||
List all active document processing jobs.
|
||||
|
||||
Retrieves metadata for all currently running document processing workflows
|
||||
for the specified user.
|
||||
|
||||
Args:
|
||||
user: User identifier (default: "trustgraph")
|
||||
in the current workspace.
|
||||
|
||||
Returns:
|
||||
list[ProcessingMetadata]: List of processing job metadata objects
|
||||
|
|
@ -668,7 +656,7 @@ class Library:
|
|||
Example:
|
||||
```python
|
||||
library = api.library()
|
||||
jobs = library.get_processings(user="trustgraph")
|
||||
jobs = library.get_processings()
|
||||
|
||||
for job in jobs:
|
||||
print(f"Job {job.id}:")
|
||||
|
|
@ -681,7 +669,7 @@ class Library:
|
|||
|
||||
input = {
|
||||
"operation": "list-processing",
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
}
|
||||
|
||||
object = self.request(input)
|
||||
|
|
@ -693,7 +681,7 @@ class Library:
|
|||
document_id = v["document-id"],
|
||||
time = datetime.datetime.fromtimestamp(v["time"]),
|
||||
flow = v["flow"],
|
||||
user = v["user"],
|
||||
workspace = v.get("workspace", ""),
|
||||
collection = v["collection"],
|
||||
tags = v["tags"],
|
||||
)
|
||||
|
|
@ -705,23 +693,20 @@ class Library:
|
|||
|
||||
# Chunked upload management methods
|
||||
|
||||
def get_pending_uploads(self, user):
|
||||
def get_pending_uploads(self):
|
||||
"""
|
||||
List all pending (in-progress) uploads for a user.
|
||||
List all pending (in-progress) uploads in the current workspace.
|
||||
|
||||
Retrieves information about chunked uploads that have been started
|
||||
but not yet completed.
|
||||
|
||||
Args:
|
||||
user: User identifier
|
||||
|
||||
Returns:
|
||||
list[dict]: List of pending upload information
|
||||
|
||||
Example:
|
||||
```python
|
||||
library = api.library()
|
||||
pending = library.get_pending_uploads(user="trustgraph")
|
||||
pending = library.get_pending_uploads()
|
||||
|
||||
for upload in pending:
|
||||
print(f"Upload {upload['upload_id']}:")
|
||||
|
|
@ -731,14 +716,14 @@ class Library:
|
|||
"""
|
||||
input = {
|
||||
"operation": "list-uploads",
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
}
|
||||
|
||||
response = self.request(input)
|
||||
|
||||
return response.get("upload-sessions", [])
|
||||
|
||||
def get_upload_status(self, upload_id, user):
|
||||
def get_upload_status(self, upload_id):
|
||||
"""
|
||||
Get the status of a specific upload.
|
||||
|
||||
|
|
@ -747,7 +732,6 @@ class Library:
|
|||
|
||||
Args:
|
||||
upload_id: Upload session identifier
|
||||
user: User identifier
|
||||
|
||||
Returns:
|
||||
dict: Upload status information including:
|
||||
|
|
@ -763,10 +747,7 @@ class Library:
|
|||
Example:
|
||||
```python
|
||||
library = api.library()
|
||||
status = library.get_upload_status(
|
||||
upload_id="abc-123",
|
||||
user="trustgraph"
|
||||
)
|
||||
status = library.get_upload_status(upload_id="abc-123")
|
||||
|
||||
if status['state'] == 'in-progress':
|
||||
print(f"Missing chunks: {status['missing_chunks']}")
|
||||
|
|
@ -774,13 +755,13 @@ class Library:
|
|||
"""
|
||||
input = {
|
||||
"operation": "get-upload-status",
|
||||
"workspace": self.api.workspace,
|
||||
"upload-id": upload_id,
|
||||
"user": user,
|
||||
}
|
||||
|
||||
return self.request(input)
|
||||
|
||||
def abort_upload(self, upload_id, user):
|
||||
def abort_upload(self, upload_id):
|
||||
"""
|
||||
Abort an in-progress upload.
|
||||
|
||||
|
|
@ -788,7 +769,6 @@ class Library:
|
|||
|
||||
Args:
|
||||
upload_id: Upload session identifier
|
||||
user: User identifier
|
||||
|
||||
Returns:
|
||||
dict: Empty response on success
|
||||
|
|
@ -796,18 +776,18 @@ class Library:
|
|||
Example:
|
||||
```python
|
||||
library = api.library()
|
||||
library.abort_upload(upload_id="abc-123", user="trustgraph")
|
||||
library.abort_upload(upload_id="abc-123")
|
||||
```
|
||||
"""
|
||||
input = {
|
||||
"operation": "abort-upload",
|
||||
"workspace": self.api.workspace,
|
||||
"upload-id": upload_id,
|
||||
"user": user,
|
||||
}
|
||||
|
||||
return self.request(input)
|
||||
|
||||
def resume_upload(self, upload_id, document, user, on_progress=None):
|
||||
def resume_upload(self, upload_id, document, on_progress=None):
|
||||
"""
|
||||
Resume an interrupted upload.
|
||||
|
||||
|
|
@ -817,7 +797,6 @@ class Library:
|
|||
Args:
|
||||
upload_id: Upload session identifier to resume
|
||||
document: Complete document content as bytes
|
||||
user: User identifier
|
||||
on_progress: Optional callback(bytes_sent, total_bytes) for progress updates
|
||||
|
||||
Returns:
|
||||
|
|
@ -828,23 +807,19 @@ class Library:
|
|||
library = api.library()
|
||||
|
||||
# Check what's missing
|
||||
status = library.get_upload_status(
|
||||
upload_id="abc-123",
|
||||
user="trustgraph"
|
||||
)
|
||||
status = library.get_upload_status(upload_id="abc-123")
|
||||
|
||||
if status['state'] == 'in-progress':
|
||||
# Resume with the same document
|
||||
with open("large_document.pdf", "rb") as f:
|
||||
library.resume_upload(
|
||||
upload_id="abc-123",
|
||||
document=f.read(),
|
||||
user="trustgraph"
|
||||
document=f.read()
|
||||
)
|
||||
```
|
||||
"""
|
||||
# Get current status
|
||||
status = self.get_upload_status(upload_id, user)
|
||||
status = self.get_upload_status(upload_id)
|
||||
|
||||
if status.get("upload-state") == "expired":
|
||||
raise RuntimeError("Upload session has expired, please start a new upload")
|
||||
|
|
@ -867,10 +842,10 @@ class Library:
|
|||
|
||||
chunk_request = {
|
||||
"operation": "upload-chunk",
|
||||
"workspace": self.api.workspace,
|
||||
"upload-id": upload_id,
|
||||
"chunk-index": chunk_index,
|
||||
"content": base64.b64encode(chunk_data).decode("utf-8"),
|
||||
"user": user,
|
||||
}
|
||||
|
||||
self.request(chunk_request)
|
||||
|
|
@ -886,8 +861,8 @@ class Library:
|
|||
# Complete upload
|
||||
complete_request = {
|
||||
"operation": "complete-upload",
|
||||
"workspace": self.api.workspace,
|
||||
"upload-id": upload_id,
|
||||
"user": user,
|
||||
}
|
||||
|
||||
return self.request(complete_request)
|
||||
|
|
@ -895,7 +870,7 @@ class Library:
|
|||
# Child document methods
|
||||
|
||||
def add_child_document(
|
||||
self, document, id, parent_id, user, title, comments,
|
||||
self, document, id, parent_id, title, comments,
|
||||
kind="text/plain", tags=[], metadata=None,
|
||||
):
|
||||
"""
|
||||
|
|
@ -909,7 +884,6 @@ class Library:
|
|||
document: Document content as bytes
|
||||
id: Document identifier (auto-generated if None)
|
||||
parent_id: Parent document identifier (required)
|
||||
user: User/owner identifier
|
||||
title: Document title
|
||||
comments: Document description or comments
|
||||
kind: MIME type of the document (default: "text/plain")
|
||||
|
|
@ -931,7 +905,6 @@ class Library:
|
|||
document=page_text.encode('utf-8'),
|
||||
id="doc-123-page-1",
|
||||
parent_id="doc-123",
|
||||
user="trustgraph",
|
||||
title="Page 1 of Research Paper",
|
||||
comments="First page extracted from PDF",
|
||||
kind="text/plain",
|
||||
|
|
@ -964,6 +937,7 @@ class Library:
|
|||
|
||||
input = {
|
||||
"operation": "add-child-document",
|
||||
"workspace": self.api.workspace,
|
||||
"document-metadata": {
|
||||
"id": id,
|
||||
"time": int(time.time()),
|
||||
|
|
@ -971,7 +945,7 @@ class Library:
|
|||
"title": title,
|
||||
"comments": comments,
|
||||
"metadata": triples,
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"tags": tags,
|
||||
"parent-id": parent_id,
|
||||
"document-type": "extracted",
|
||||
|
|
@ -981,13 +955,12 @@ class Library:
|
|||
|
||||
return self.request(input)
|
||||
|
||||
def list_children(self, document_id, user):
|
||||
def list_children(self, document_id):
|
||||
"""
|
||||
List all child documents for a given parent document.
|
||||
|
||||
Args:
|
||||
document_id: Parent document identifier
|
||||
user: User identifier
|
||||
|
||||
Returns:
|
||||
list[DocumentMetadata]: List of child document metadata objects
|
||||
|
|
@ -995,10 +968,7 @@ class Library:
|
|||
Example:
|
||||
```python
|
||||
library = api.library()
|
||||
children = library.list_children(
|
||||
document_id="doc-123",
|
||||
user="trustgraph"
|
||||
)
|
||||
children = library.list_children(document_id="doc-123")
|
||||
|
||||
for child in children:
|
||||
print(f"{child.id}: {child.title}")
|
||||
|
|
@ -1006,8 +976,8 @@ class Library:
|
|||
"""
|
||||
input = {
|
||||
"operation": "list-children",
|
||||
"workspace": self.api.workspace,
|
||||
"document-id": document_id,
|
||||
"user": user,
|
||||
}
|
||||
|
||||
response = self.request(input)
|
||||
|
|
@ -1028,7 +998,7 @@ class Library:
|
|||
)
|
||||
for w in v.get("metadata", [])
|
||||
],
|
||||
user=v["user"],
|
||||
workspace=v.get("workspace", ""),
|
||||
tags=v.get("tags", []),
|
||||
parent_id=v.get("parent-id", ""),
|
||||
document_type=v.get("document-type", "source"),
|
||||
|
|
@ -1039,14 +1009,13 @@ class Library:
|
|||
logger.error("Failed to parse children response", exc_info=True)
|
||||
raise ProtocolException("Response not formatted correctly")
|
||||
|
||||
def get_document_content(self, user, id):
|
||||
def get_document_content(self, id):
|
||||
"""
|
||||
Get the content of a document.
|
||||
|
||||
Retrieves the full content of a document as bytes.
|
||||
|
||||
Args:
|
||||
user: User identifier
|
||||
id: Document identifier
|
||||
|
||||
Returns:
|
||||
|
|
@ -1055,10 +1024,7 @@ class Library:
|
|||
Example:
|
||||
```python
|
||||
library = api.library()
|
||||
content = library.get_document_content(
|
||||
user="trustgraph",
|
||||
id="doc-123"
|
||||
)
|
||||
content = library.get_document_content(id="doc-123")
|
||||
|
||||
# Write to file
|
||||
with open("output.pdf", "wb") as f:
|
||||
|
|
@ -1067,7 +1033,7 @@ class Library:
|
|||
"""
|
||||
input = {
|
||||
"operation": "get-document-content",
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"document-id": id,
|
||||
}
|
||||
|
||||
|
|
@ -1076,7 +1042,7 @@ class Library:
|
|||
|
||||
return base64.b64decode(content_b64)
|
||||
|
||||
def stream_document_to_file(self, user, id, file_path, chunk_size=1024*1024, on_progress=None):
|
||||
def stream_document_to_file(self, id, file_path, chunk_size=1024*1024, on_progress=None):
|
||||
"""
|
||||
Stream document content to a file.
|
||||
|
||||
|
|
@ -1084,7 +1050,6 @@ class Library:
|
|||
enabling memory-efficient handling of large documents.
|
||||
|
||||
Args:
|
||||
user: User identifier
|
||||
id: Document identifier
|
||||
file_path: Path to write the document content
|
||||
chunk_size: Size of each chunk to download (default 1MB)
|
||||
|
|
@ -1101,7 +1066,6 @@ class Library:
|
|||
print(f"Downloaded {received}/{total} bytes")
|
||||
|
||||
library.stream_document_to_file(
|
||||
user="trustgraph",
|
||||
id="large-doc-123",
|
||||
file_path="/tmp/document.pdf",
|
||||
on_progress=progress
|
||||
|
|
@ -1116,7 +1080,7 @@ class Library:
|
|||
while True:
|
||||
input = {
|
||||
"operation": "stream-document",
|
||||
"user": user,
|
||||
"workspace": self.api.workspace,
|
||||
"document-id": id,
|
||||
"chunk-index": chunk_index,
|
||||
"chunk-size": chunk_size,
|
||||
|
|
|
|||
|
|
@ -84,10 +84,14 @@ class SocketClient:
|
|||
for streaming responses.
|
||||
"""
|
||||
|
||||
def __init__(self, url: str, timeout: int, token: Optional[str]) -> None:
|
||||
def __init__(
|
||||
self, url: str, timeout: int, token: Optional[str],
|
||||
workspace: str = "default",
|
||||
) -> None:
|
||||
self.url: str = self._convert_to_ws_url(url)
|
||||
self.timeout: int = timeout
|
||||
self.token: Optional[str] = token
|
||||
self.workspace: str = workspace
|
||||
self._request_counter: int = 0
|
||||
self._lock: Lock = Lock()
|
||||
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
||||
|
|
@ -251,6 +255,7 @@ class SocketClient:
|
|||
try:
|
||||
message = {
|
||||
"id": request_id,
|
||||
"workspace": self.workspace,
|
||||
"service": service,
|
||||
"request": request
|
||||
}
|
||||
|
|
@ -290,6 +295,7 @@ class SocketClient:
|
|||
try:
|
||||
message = {
|
||||
"id": request_id,
|
||||
"workspace": self.workspace,
|
||||
"service": service,
|
||||
"request": request
|
||||
}
|
||||
|
|
@ -328,6 +334,7 @@ class SocketClient:
|
|||
try:
|
||||
message = {
|
||||
"id": request_id,
|
||||
"workspace": self.workspace,
|
||||
"service": service,
|
||||
"request": request
|
||||
}
|
||||
|
|
@ -488,7 +495,6 @@ class SocketFlowInstance:
|
|||
def agent(
|
||||
self,
|
||||
question: str,
|
||||
user: str,
|
||||
state: Optional[Dict[str, Any]] = None,
|
||||
group: Optional[str] = None,
|
||||
history: Optional[List[Dict[str, Any]]] = None,
|
||||
|
|
@ -498,7 +504,6 @@ class SocketFlowInstance:
|
|||
"""Execute an agent operation with streaming support."""
|
||||
request = {
|
||||
"question": question,
|
||||
"user": user,
|
||||
"streaming": streaming
|
||||
}
|
||||
if state is not None:
|
||||
|
|
@ -514,7 +519,6 @@ class SocketFlowInstance:
|
|||
def agent_explain(
|
||||
self,
|
||||
question: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
state: Optional[Dict[str, Any]] = None,
|
||||
group: Optional[str] = None,
|
||||
|
|
@ -524,7 +528,6 @@ class SocketFlowInstance:
|
|||
"""Execute an agent operation with explainability support."""
|
||||
request = {
|
||||
"question": question,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"streaming": True
|
||||
}
|
||||
|
|
@ -574,7 +577,6 @@ class SocketFlowInstance:
|
|||
def graph_rag(
|
||||
self,
|
||||
query: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
entity_limit: int = 50,
|
||||
triple_limit: int = 30,
|
||||
|
|
@ -592,7 +594,6 @@ class SocketFlowInstance:
|
|||
"""
|
||||
request = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"entity-limit": entity_limit,
|
||||
"triple-limit": triple_limit,
|
||||
|
|
@ -619,7 +620,6 @@ class SocketFlowInstance:
|
|||
def graph_rag_explain(
|
||||
self,
|
||||
query: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
entity_limit: int = 50,
|
||||
triple_limit: int = 30,
|
||||
|
|
@ -632,7 +632,6 @@ class SocketFlowInstance:
|
|||
"""Execute graph-based RAG query with explainability support."""
|
||||
request = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"entity-limit": entity_limit,
|
||||
"triple-limit": triple_limit,
|
||||
|
|
@ -653,7 +652,6 @@ class SocketFlowInstance:
|
|||
def document_rag(
|
||||
self,
|
||||
query: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
doc_limit: int = 10,
|
||||
streaming: bool = False,
|
||||
|
|
@ -666,7 +664,6 @@ class SocketFlowInstance:
|
|||
"""
|
||||
request = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"doc-limit": doc_limit,
|
||||
"streaming": streaming
|
||||
|
|
@ -688,7 +685,6 @@ class SocketFlowInstance:
|
|||
def document_rag_explain(
|
||||
self,
|
||||
query: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
doc_limit: int = 10,
|
||||
**kwargs: Any
|
||||
|
|
@ -696,7 +692,6 @@ class SocketFlowInstance:
|
|||
"""Execute document-based RAG query with explainability support."""
|
||||
request = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"doc-limit": doc_limit,
|
||||
"streaming": True,
|
||||
|
|
@ -748,7 +743,6 @@ class SocketFlowInstance:
|
|||
def graph_embeddings_query(
|
||||
self,
|
||||
text: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
limit: int = 10,
|
||||
**kwargs: Any
|
||||
|
|
@ -759,7 +753,6 @@ class SocketFlowInstance:
|
|||
|
||||
request = {
|
||||
"vector": vector,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
|
|
@ -770,7 +763,6 @@ class SocketFlowInstance:
|
|||
def document_embeddings_query(
|
||||
self,
|
||||
text: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
limit: int = 10,
|
||||
**kwargs: Any
|
||||
|
|
@ -781,7 +773,6 @@ class SocketFlowInstance:
|
|||
|
||||
request = {
|
||||
"vector": vector,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
|
|
@ -802,7 +793,6 @@ class SocketFlowInstance:
|
|||
p: Optional[Union[str, Dict[str, Any]]] = None,
|
||||
o: Optional[Union[str, Dict[str, Any]]] = None,
|
||||
g: Optional[str] = None,
|
||||
user: Optional[str] = None,
|
||||
collection: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
**kwargs: Any
|
||||
|
|
@ -822,8 +812,6 @@ class SocketFlowInstance:
|
|||
request["o"] = o_term
|
||||
if g is not None:
|
||||
request["g"] = g
|
||||
if user is not None:
|
||||
request["user"] = user
|
||||
if collection is not None:
|
||||
request["collection"] = collection
|
||||
request.update(kwargs)
|
||||
|
|
@ -839,7 +827,6 @@ class SocketFlowInstance:
|
|||
p: Optional[Union[str, Dict[str, Any]]] = None,
|
||||
o: Optional[Union[str, Dict[str, Any]]] = None,
|
||||
g: Optional[str] = None,
|
||||
user: Optional[str] = None,
|
||||
collection: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
batch_size: int = 20,
|
||||
|
|
@ -864,8 +851,6 @@ class SocketFlowInstance:
|
|||
request["o"] = o_term
|
||||
if g is not None:
|
||||
request["g"] = g
|
||||
if user is not None:
|
||||
request["user"] = user
|
||||
if collection is not None:
|
||||
request["collection"] = collection
|
||||
request.update(kwargs)
|
||||
|
|
@ -879,7 +864,6 @@ class SocketFlowInstance:
|
|||
def sparql_query_stream(
|
||||
self,
|
||||
query: str,
|
||||
user: str = "trustgraph",
|
||||
collection: str = "default",
|
||||
limit: int = 10000,
|
||||
batch_size: int = 20,
|
||||
|
|
@ -888,7 +872,6 @@ class SocketFlowInstance:
|
|||
"""Execute a SPARQL query with streaming batches."""
|
||||
request = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit,
|
||||
"streaming": True,
|
||||
|
|
@ -904,7 +887,6 @@ class SocketFlowInstance:
|
|||
def rows_query(
|
||||
self,
|
||||
query: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
operation_name: Optional[str] = None,
|
||||
|
|
@ -913,7 +895,6 @@ class SocketFlowInstance:
|
|||
"""Execute a GraphQL query against structured rows."""
|
||||
request = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection
|
||||
}
|
||||
if variables:
|
||||
|
|
@ -943,7 +924,6 @@ class SocketFlowInstance:
|
|||
self,
|
||||
text: str,
|
||||
schema_name: str,
|
||||
user: str = "trustgraph",
|
||||
collection: str = "default",
|
||||
index_name: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
|
|
@ -956,7 +936,6 @@ class SocketFlowInstance:
|
|||
request = {
|
||||
"vector": vector,
|
||||
"schema_name": schema_name,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,10 +45,13 @@ class ConfigValue:
|
|||
type: Configuration type/category
|
||||
key: Specific configuration key
|
||||
value: Configuration value as string
|
||||
workspace: Workspace the value belongs to. Only populated for
|
||||
responses to getvalues-all-ws; empty otherwise.
|
||||
"""
|
||||
type : str
|
||||
key : str
|
||||
value : str
|
||||
workspace : str = ""
|
||||
|
||||
@dataclasses.dataclass
|
||||
class DocumentMetadata:
|
||||
|
|
@ -62,7 +65,7 @@ class DocumentMetadata:
|
|||
title: Document title
|
||||
comments: Additional comments or description
|
||||
metadata: List of RDF triples providing structured metadata
|
||||
user: User/owner identifier
|
||||
workspace: Workspace the document belongs to
|
||||
tags: List of tags for categorization
|
||||
parent_id: Parent document ID for child documents (empty for top-level docs)
|
||||
document_type: "source" for uploaded documents, "extracted" for derived content
|
||||
|
|
@ -73,7 +76,7 @@ class DocumentMetadata:
|
|||
title : str
|
||||
comments : str
|
||||
metadata : List[Triple]
|
||||
user : str
|
||||
workspace : str
|
||||
tags : List[str]
|
||||
parent_id : str = ""
|
||||
document_type : str = "source"
|
||||
|
|
@ -88,7 +91,7 @@ class ProcessingMetadata:
|
|||
document_id: ID of the document being processed
|
||||
time: Processing start timestamp
|
||||
flow: Flow instance handling the processing
|
||||
user: User identifier
|
||||
workspace: Workspace the processing job belongs to
|
||||
collection: Target collection for processed data
|
||||
tags: List of tags for categorization
|
||||
"""
|
||||
|
|
@ -96,7 +99,7 @@ class ProcessingMetadata:
|
|||
document_id : str
|
||||
time : datetime.datetime
|
||||
flow : str
|
||||
user : str
|
||||
workspace : str
|
||||
collection : str
|
||||
tags : List[str]
|
||||
|
||||
|
|
@ -105,17 +108,15 @@ class CollectionMetadata:
|
|||
"""
|
||||
Metadata for a data collection.
|
||||
|
||||
Collections provide logical grouping and isolation for documents and
|
||||
knowledge graph data.
|
||||
Collections provide logical grouping within a workspace for documents
|
||||
and knowledge graph data.
|
||||
|
||||
Attributes:
|
||||
user: User/owner identifier
|
||||
collection: Collection identifier
|
||||
name: Human-readable collection name
|
||||
description: Collection description
|
||||
tags: List of tags for categorization
|
||||
"""
|
||||
user : str
|
||||
collection : str
|
||||
name : str
|
||||
description : str
|
||||
|
|
|
|||
|
|
@ -125,21 +125,39 @@ class AsyncProcessor:
|
|||
response_metrics = config_resp_metrics,
|
||||
)
|
||||
|
||||
async def fetch_config(self):
|
||||
"""Fetch full config from config service using a short-lived
|
||||
request/response client. Returns (config, version) or raises."""
|
||||
client = self._create_config_client()
|
||||
try:
|
||||
await client.start()
|
||||
resp = await client.request(
|
||||
ConfigRequest(operation="config"),
|
||||
timeout=10,
|
||||
)
|
||||
if resp.error:
|
||||
raise RuntimeError(f"Config error: {resp.error.message}")
|
||||
return resp.config, resp.version
|
||||
finally:
|
||||
await client.stop()
|
||||
async def _fetch_type_workspace(self, client, workspace, config_type):
|
||||
"""Fetch config values of a single type within one workspace.
|
||||
Returns dict of {key: value}."""
|
||||
resp = await client.request(
|
||||
ConfigRequest(
|
||||
operation="getvalues",
|
||||
workspace=workspace,
|
||||
type=config_type,
|
||||
),
|
||||
timeout=10,
|
||||
)
|
||||
if resp.error:
|
||||
raise RuntimeError(f"Config error: {resp.error.message}")
|
||||
return {v.key: v.value for v in resp.values}
|
||||
|
||||
async def _fetch_type_all_workspaces(self, client, config_type):
|
||||
"""Fetch config values of a single type across all workspaces.
|
||||
Returns dict of {workspace: {key: value}}."""
|
||||
resp = await client.request(
|
||||
ConfigRequest(
|
||||
operation="getvalues-all-ws",
|
||||
type=config_type,
|
||||
),
|
||||
timeout=10,
|
||||
)
|
||||
if resp.error:
|
||||
raise RuntimeError(f"Config error: {resp.error.message}")
|
||||
|
||||
grouped = {}
|
||||
for v in resp.values:
|
||||
ws = grouped.setdefault(v.workspace, {})
|
||||
ws[v.key] = v.value
|
||||
return grouped, resp.version
|
||||
|
||||
# This is called to start dynamic behaviour.
|
||||
# Implements the subscribe-then-fetch pattern to avoid race conditions.
|
||||
|
|
@ -155,21 +173,51 @@ class AsyncProcessor:
|
|||
# processed by on_config_notify, which does the version check
|
||||
|
||||
async def fetch_and_apply_config(self):
|
||||
"""Fetch full config from config service and apply to all handlers.
|
||||
Retries until successful — config service may not be ready yet."""
|
||||
"""Startup: for each registered handler, fetch config for all its
|
||||
types across all workspaces and invoke the handler once per
|
||||
workspace. Retries until successful — config service may not be
|
||||
ready yet."""
|
||||
|
||||
while self.running:
|
||||
|
||||
try:
|
||||
config, version = await self.fetch_config()
|
||||
client = self._create_config_client()
|
||||
try:
|
||||
await client.start()
|
||||
|
||||
logger.info(f"Fetched config version {version}")
|
||||
version = 0
|
||||
|
||||
self.config_version = version
|
||||
for entry in self.config_handlers:
|
||||
handler_types = entry["types"]
|
||||
|
||||
# Apply to all handlers (startup = invoke all)
|
||||
for entry in self.config_handlers:
|
||||
await entry["handler"](config, version)
|
||||
# Handlers registered without types get nothing
|
||||
# at startup (there is no "all types" fetch).
|
||||
if not handler_types:
|
||||
continue
|
||||
|
||||
# Group all registered types by workspace:
|
||||
# {workspace: {type: {key: value}}}
|
||||
per_ws = {}
|
||||
for t in handler_types:
|
||||
type_data, v = \
|
||||
await self._fetch_type_all_workspaces(
|
||||
client, t,
|
||||
)
|
||||
version = max(version, v)
|
||||
for ws, kv in type_data.items():
|
||||
per_ws.setdefault(ws, {})[t] = kv
|
||||
|
||||
# Call the handler once per workspace
|
||||
for ws, config in per_ws.items():
|
||||
await entry["handler"](ws, config, version)
|
||||
|
||||
logger.info(
|
||||
f"Applied startup config version {version}"
|
||||
)
|
||||
self.config_version = version
|
||||
|
||||
finally:
|
||||
await client.stop()
|
||||
|
||||
return
|
||||
|
||||
|
|
@ -204,8 +252,9 @@ class AsyncProcessor:
|
|||
# Called when a config notify message arrives
|
||||
async def on_config_notify(self, message, consumer, flow):
|
||||
|
||||
notify_version = message.value().version
|
||||
notify_types = set(message.value().types)
|
||||
v = message.value()
|
||||
notify_version = v.version
|
||||
changes = v.changes # dict of type -> [workspaces]
|
||||
|
||||
# Skip if we already have this version or newer
|
||||
if notify_version <= self.config_version:
|
||||
|
|
@ -215,41 +264,60 @@ class AsyncProcessor:
|
|||
)
|
||||
return
|
||||
|
||||
# Check if any handler cares about the affected types
|
||||
if notify_types:
|
||||
any_interested = False
|
||||
for entry in self.config_handlers:
|
||||
handler_types = entry["types"]
|
||||
if handler_types is None or notify_types & handler_types:
|
||||
any_interested = True
|
||||
break
|
||||
notify_types = set(changes.keys())
|
||||
|
||||
if not any_interested:
|
||||
logger.debug(
|
||||
f"Ignoring config notify v{notify_version}, "
|
||||
f"no handlers for types {notify_types}"
|
||||
)
|
||||
self.config_version = notify_version
|
||||
return
|
||||
# Filter out handlers that don't care about any of the changed
|
||||
# types. A handler registered without types never fires on
|
||||
# notifications (nothing to scope to).
|
||||
interested = []
|
||||
for entry in self.config_handlers:
|
||||
handler_types = entry["types"]
|
||||
if handler_types and notify_types & handler_types:
|
||||
interested.append(entry)
|
||||
|
||||
if not interested:
|
||||
logger.debug(
|
||||
f"Ignoring config notify v{notify_version}, "
|
||||
f"no handlers for types {notify_types}"
|
||||
)
|
||||
self.config_version = notify_version
|
||||
return
|
||||
|
||||
logger.info(
|
||||
f"Config notify v{notify_version} types={list(notify_types)}, "
|
||||
f"fetching config..."
|
||||
f"Config notify v{notify_version} "
|
||||
f"types={list(notify_types)}, fetching config..."
|
||||
)
|
||||
|
||||
# Fetch full config using short-lived client
|
||||
try:
|
||||
config, version = await self.fetch_config()
|
||||
client = self._create_config_client()
|
||||
try:
|
||||
await client.start()
|
||||
|
||||
self.config_version = version
|
||||
for entry in interested:
|
||||
handler_types = entry["types"]
|
||||
|
||||
# Invoke handlers that care about the affected types
|
||||
for entry in self.config_handlers:
|
||||
handler_types = entry["types"]
|
||||
if handler_types is None:
|
||||
await entry["handler"](config, version)
|
||||
elif not notify_types or notify_types & handler_types:
|
||||
await entry["handler"](config, version)
|
||||
# Build {workspace: {type: {key: value}}} for types
|
||||
# this handler cares about, where the workspace was
|
||||
# affected for that type.
|
||||
per_ws = {}
|
||||
for t in handler_types:
|
||||
if t not in changes:
|
||||
continue
|
||||
for ws in changes[t]:
|
||||
kv = await self._fetch_type_workspace(
|
||||
client, ws, t,
|
||||
)
|
||||
per_ws.setdefault(ws, {})[t] = kv
|
||||
|
||||
for ws, config in per_ws.items():
|
||||
await entry["handler"](
|
||||
ws, config, notify_version,
|
||||
)
|
||||
|
||||
finally:
|
||||
await client.stop()
|
||||
|
||||
self.config_version = notify_version
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
|
|
|
|||
|
|
@ -48,12 +48,13 @@ class ChunkingService(FlowProcessor):
|
|||
await super(ChunkingService, self).start()
|
||||
await self.librarian.start()
|
||||
|
||||
async def get_document_text(self, doc):
|
||||
async def get_document_text(self, doc, workspace):
|
||||
"""
|
||||
Get text content from a TextDocument, fetching from librarian if needed.
|
||||
|
||||
Args:
|
||||
doc: TextDocument with either inline text or document_id
|
||||
workspace: Workspace for librarian lookup (from flow.workspace)
|
||||
|
||||
Returns:
|
||||
str: The document text content
|
||||
|
|
@ -62,7 +63,7 @@ class ChunkingService(FlowProcessor):
|
|||
logger.info(f"Fetching document {doc.document_id} from librarian...")
|
||||
text = await self.librarian.fetch_document_text(
|
||||
document_id=doc.document_id,
|
||||
user=doc.metadata.user,
|
||||
workspace=workspace,
|
||||
)
|
||||
logger.info(f"Fetched {len(text)} characters from librarian")
|
||||
return text
|
||||
|
|
|
|||
|
|
@ -15,114 +15,139 @@ class CollectionConfigHandler:
|
|||
Storage services should:
|
||||
1. Inherit from this class along with their service base class
|
||||
2. Call register_config_handler(self.on_collection_config) in __init__
|
||||
3. Implement create_collection(user, collection, metadata) method
|
||||
4. Implement delete_collection(user, collection) method
|
||||
3. Implement create_collection(workspace, collection, metadata) method
|
||||
4. Implement delete_collection(workspace, collection) method
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
# Track known collections: {(user, collection): metadata_dict}
|
||||
# Track known collections: {(workspace, collection): metadata_dict}
|
||||
self.known_collections: Dict[tuple, dict] = {}
|
||||
# Pass remaining kwargs up the inheritance chain
|
||||
super().__init__(**kwargs)
|
||||
|
||||
async def on_collection_config(self, config: dict, version: int):
|
||||
async def on_collection_config(
|
||||
self, workspace: str, config: dict, version: int
|
||||
):
|
||||
"""
|
||||
Handle config push messages and extract collection information
|
||||
for a single workspace.
|
||||
|
||||
Args:
|
||||
workspace: Workspace the config applies to
|
||||
config: Configuration dictionary from ConfigPush message
|
||||
version: Configuration version number
|
||||
"""
|
||||
logger.info(f"Processing collection configuration (version {version})")
|
||||
logger.info(
|
||||
f"Processing collection configuration "
|
||||
f"(version {version}, workspace {workspace})"
|
||||
)
|
||||
|
||||
# Extract collections from config (treat missing key as empty)
|
||||
# Extract collections from config (treat missing key as empty).
|
||||
# Each config key IS the collection name — config is already
|
||||
# partitioned by workspace, so no workspace prefix is needed
|
||||
# on the key.
|
||||
collection_config = config.get("collection", {})
|
||||
|
||||
# Track which collections we've seen in this config
|
||||
current_collections: Set[tuple] = set()
|
||||
|
||||
# Process each collection in the config
|
||||
for key, value_json in collection_config.items():
|
||||
for collection, value_json in collection_config.items():
|
||||
try:
|
||||
# Parse user:collection key
|
||||
if ":" not in key:
|
||||
logger.warning(f"Invalid collection key format (expected user:collection): {key}")
|
||||
continue
|
||||
current_collections.add((workspace, collection))
|
||||
|
||||
user, collection = key.split(":", 1)
|
||||
current_collections.add((user, collection))
|
||||
|
||||
# Parse metadata
|
||||
metadata = json.loads(value_json)
|
||||
|
||||
# Check if this is a new collection or updated
|
||||
collection_key = (user, collection)
|
||||
if collection_key not in self.known_collections:
|
||||
logger.info(f"New collection detected: {user}/{collection}")
|
||||
await self.create_collection(user, collection, metadata)
|
||||
self.known_collections[collection_key] = metadata
|
||||
key = (workspace, collection)
|
||||
if key not in self.known_collections:
|
||||
logger.info(
|
||||
f"New collection detected: {workspace}/{collection}"
|
||||
)
|
||||
await self.create_collection(
|
||||
workspace, collection, metadata
|
||||
)
|
||||
self.known_collections[key] = metadata
|
||||
else:
|
||||
# Collection already exists, update metadata if changed
|
||||
if self.known_collections[collection_key] != metadata:
|
||||
logger.info(f"Collection metadata updated: {user}/{collection}")
|
||||
# Most storage services don't need to do anything for metadata updates
|
||||
# They just need to know the collection exists
|
||||
self.known_collections[collection_key] = metadata
|
||||
if self.known_collections[key] != metadata:
|
||||
logger.info(
|
||||
f"Collection metadata updated: "
|
||||
f"{workspace}/{collection}"
|
||||
)
|
||||
self.known_collections[key] = metadata
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing collection config for key {key}: {e}", exc_info=True)
|
||||
logger.error(
|
||||
f"Error processing collection config for "
|
||||
f"{workspace}/{collection}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Find collections that were deleted (in known but not in current)
|
||||
deleted_collections = set(self.known_collections.keys()) - current_collections
|
||||
for user, collection in deleted_collections:
|
||||
logger.info(f"Collection deleted: {user}/{collection}")
|
||||
# Find collections for THIS workspace that were deleted (in
|
||||
# known but not in current). Only compare collections owned by
|
||||
# this workspace — other workspaces' collections are not
|
||||
# affected by this config update.
|
||||
known_for_ws = {
|
||||
(w, c) for (w, c) in self.known_collections.keys()
|
||||
if w == workspace
|
||||
}
|
||||
deleted_collections = known_for_ws - current_collections
|
||||
for ws, collection in deleted_collections:
|
||||
logger.info(f"Collection deleted: {ws}/{collection}")
|
||||
try:
|
||||
# Remove from known_collections FIRST to immediately reject new writes
|
||||
# This eliminates race condition with worker threads
|
||||
del self.known_collections[(user, collection)]
|
||||
# Physical deletion happens after - worker threads already rejecting writes
|
||||
await self.delete_collection(user, collection)
|
||||
# Remove from known_collections FIRST to immediately
|
||||
# reject new writes
|
||||
del self.known_collections[(ws, collection)]
|
||||
await self.delete_collection(ws, collection)
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting collection {user}/{collection}: {e}", exc_info=True)
|
||||
# If physical deletion failed, should we re-add to known_collections?
|
||||
# For now, keep it removed - collection is logically deleted per config
|
||||
logger.error(
|
||||
f"Error deleting collection {ws}/{collection}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
logger.debug(f"Collection config processing complete. Known collections: {len(self.known_collections)}")
|
||||
logger.debug(
|
||||
f"Collection config processing complete. "
|
||||
f"Known collections: {len(self.known_collections)}"
|
||||
)
|
||||
|
||||
async def create_collection(self, user: str, collection: str, metadata: dict):
|
||||
async def create_collection(
|
||||
self, workspace: str, collection: str, metadata: dict,
|
||||
):
|
||||
"""
|
||||
Create a collection in the storage backend.
|
||||
|
||||
Subclasses must implement this method.
|
||||
|
||||
Args:
|
||||
user: User ID
|
||||
workspace: Workspace ID
|
||||
collection: Collection ID
|
||||
metadata: Collection metadata dictionary
|
||||
"""
|
||||
raise NotImplementedError("Storage service must implement create_collection method")
|
||||
raise NotImplementedError(
|
||||
"Storage service must implement create_collection method"
|
||||
)
|
||||
|
||||
async def delete_collection(self, user: str, collection: str):
|
||||
async def delete_collection(self, workspace: str, collection: str):
|
||||
"""
|
||||
Delete a collection from the storage backend.
|
||||
|
||||
Subclasses must implement this method.
|
||||
|
||||
Args:
|
||||
user: User ID
|
||||
workspace: Workspace ID
|
||||
collection: Collection ID
|
||||
"""
|
||||
raise NotImplementedError("Storage service must implement delete_collection method")
|
||||
raise NotImplementedError(
|
||||
"Storage service must implement delete_collection method"
|
||||
)
|
||||
|
||||
def collection_exists(self, user: str, collection: str) -> bool:
|
||||
def collection_exists(self, workspace: str, collection: str) -> bool:
|
||||
"""
|
||||
Check if a collection is known to exist
|
||||
Check if a collection is known to exist.
|
||||
|
||||
Args:
|
||||
user: User ID
|
||||
workspace: Workspace ID
|
||||
collection: Collection ID
|
||||
|
||||
Returns:
|
||||
True if collection exists, False otherwise
|
||||
"""
|
||||
return (user, collection) in self.known_collections
|
||||
return (workspace, collection) in self.known_collections
|
||||
|
|
|
|||
|
|
@ -18,10 +18,11 @@ class ConfigClient(RequestResponse):
|
|||
)
|
||||
return resp
|
||||
|
||||
async def get(self, type, key, timeout=CONFIG_TIMEOUT):
|
||||
async def get(self, workspace, type, key, timeout=CONFIG_TIMEOUT):
|
||||
"""Get a single config value. Returns the value string or None."""
|
||||
resp = await self._request(
|
||||
operation="get",
|
||||
workspace=workspace,
|
||||
keys=[ConfigKey(type=type, key=key)],
|
||||
timeout=timeout,
|
||||
)
|
||||
|
|
@ -29,19 +30,21 @@ class ConfigClient(RequestResponse):
|
|||
return resp.values[0].value
|
||||
return None
|
||||
|
||||
async def put(self, type, key, value, timeout=CONFIG_TIMEOUT):
|
||||
async def put(self, workspace, type, key, value, timeout=CONFIG_TIMEOUT):
|
||||
"""Put a single config value."""
|
||||
await self._request(
|
||||
operation="put",
|
||||
workspace=workspace,
|
||||
values=[ConfigValue(type=type, key=key, value=value)],
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
async def put_many(self, values, timeout=CONFIG_TIMEOUT):
|
||||
"""Put multiple config values in a single request.
|
||||
values is a list of (type, key, value) tuples."""
|
||||
async def put_many(self, workspace, values, timeout=CONFIG_TIMEOUT):
|
||||
"""Put multiple config values in a single request within a
|
||||
single workspace. values is a list of (type, key, value) tuples."""
|
||||
await self._request(
|
||||
operation="put",
|
||||
workspace=workspace,
|
||||
values=[
|
||||
ConfigValue(type=t, key=k, value=v)
|
||||
for t, k, v in values
|
||||
|
|
@ -49,19 +52,21 @@ class ConfigClient(RequestResponse):
|
|||
timeout=timeout,
|
||||
)
|
||||
|
||||
async def delete(self, type, key, timeout=CONFIG_TIMEOUT):
|
||||
async def delete(self, workspace, type, key, timeout=CONFIG_TIMEOUT):
|
||||
"""Delete a single config key."""
|
||||
await self._request(
|
||||
operation="delete",
|
||||
workspace=workspace,
|
||||
keys=[ConfigKey(type=type, key=key)],
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
async def delete_many(self, keys, timeout=CONFIG_TIMEOUT):
|
||||
"""Delete multiple config keys in a single request.
|
||||
keys is a list of (type, key) tuples."""
|
||||
async def delete_many(self, workspace, keys, timeout=CONFIG_TIMEOUT):
|
||||
"""Delete multiple config keys in a single request within a
|
||||
single workspace. keys is a list of (type, key) tuples."""
|
||||
await self._request(
|
||||
operation="delete",
|
||||
workspace=workspace,
|
||||
keys=[
|
||||
ConfigKey(type=t, key=k)
|
||||
for t, k in keys
|
||||
|
|
@ -69,15 +74,26 @@ class ConfigClient(RequestResponse):
|
|||
timeout=timeout,
|
||||
)
|
||||
|
||||
async def keys(self, type, timeout=CONFIG_TIMEOUT):
|
||||
"""List all keys for a config type."""
|
||||
async def keys(self, workspace, type, timeout=CONFIG_TIMEOUT):
|
||||
"""List all keys for a config type within a workspace."""
|
||||
resp = await self._request(
|
||||
operation="list",
|
||||
workspace=workspace,
|
||||
type=type,
|
||||
timeout=timeout,
|
||||
)
|
||||
return resp.directory
|
||||
|
||||
async def workspaces_for_type(self, type, timeout=CONFIG_TIMEOUT):
|
||||
"""Return the set of distinct workspaces with any config of
|
||||
the given type."""
|
||||
resp = await self._request(
|
||||
operation="getvalues-all-ws",
|
||||
type=type,
|
||||
timeout=timeout,
|
||||
)
|
||||
return {v.workspace for v in resp.values if v.workspace}
|
||||
|
||||
|
||||
class ConfigClientSpec(RequestResponseSpec):
|
||||
def __init__(
|
||||
|
|
|
|||
|
|
@ -24,7 +24,10 @@ class ConsumerSpec(Spec):
|
|||
flow = flow,
|
||||
backend = processor.pubsub,
|
||||
topic = definition["topics"][self.name],
|
||||
subscriber = processor.id + "--" + flow.name + "--" + self.name,
|
||||
subscriber = (
|
||||
processor.id + "--" + flow.workspace + "--" +
|
||||
flow.name + "--" + self.name
|
||||
),
|
||||
schema = self.schema,
|
||||
handler = self.handler,
|
||||
metrics = consumer_metrics,
|
||||
|
|
|
|||
|
|
@ -9,14 +9,12 @@ from .. knowledge import Uri, Literal
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DocumentEmbeddingsClient(RequestResponse):
|
||||
async def query(self, vector, limit=20, user="trustgraph",
|
||||
collection="default", timeout=30):
|
||||
async def query(self, vector, limit=20, collection="default", timeout=30):
|
||||
|
||||
resp = await self.request(
|
||||
DocumentEmbeddingsRequest(
|
||||
vector = vector,
|
||||
limit = limit,
|
||||
user = user,
|
||||
collection = collection
|
||||
),
|
||||
timeout=timeout
|
||||
|
|
|
|||
|
|
@ -60,7 +60,9 @@ class DocumentEmbeddingsQueryService(FlowProcessor):
|
|||
|
||||
logger.debug(f"Handling document embeddings query request {id}...")
|
||||
|
||||
docs = await self.query_document_embeddings(request)
|
||||
docs = await self.query_document_embeddings(
|
||||
flow.workspace, request,
|
||||
)
|
||||
|
||||
logger.debug("Sending document embeddings query response...")
|
||||
r = DocumentEmbeddingsResponse(chunks=docs, error=None)
|
||||
|
|
|
|||
|
|
@ -41,7 +41,8 @@ class DocumentEmbeddingsStoreService(FlowProcessor):
|
|||
|
||||
request = msg.value()
|
||||
|
||||
await self.store_document_embeddings(request)
|
||||
# Workspace comes from the flow the message arrived on.
|
||||
await self.store_document_embeddings(flow.workspace, request)
|
||||
|
||||
except TooManyRequests as e:
|
||||
raise e
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
Base class for dynamically pluggable tool services.
|
||||
|
||||
Tool services are Pulsar services that can be invoked as agent tools.
|
||||
They receive a ToolServiceRequest with user, config, and arguments,
|
||||
They receive a ToolServiceRequest with config and arguments,
|
||||
and return a ToolServiceResponse with the result.
|
||||
|
||||
Uses direct Pulsar topics (no flow configuration required):
|
||||
|
|
@ -42,7 +42,6 @@ class DynamicToolService(AsyncProcessor):
|
|||
the tool's logic.
|
||||
|
||||
The invoke method receives:
|
||||
- user: The user context for multi-tenancy
|
||||
- config: Dict of config values from the tool descriptor
|
||||
- arguments: Dict of arguments from the LLM
|
||||
|
||||
|
|
@ -115,14 +114,13 @@ class DynamicToolService(AsyncProcessor):
|
|||
id = msg.properties().get("id", "unknown")
|
||||
|
||||
# Parse the request
|
||||
user = request.user or "trustgraph"
|
||||
config = json.loads(request.config) if request.config else {}
|
||||
arguments = json.loads(request.arguments) if request.arguments else {}
|
||||
|
||||
logger.debug(f"Tool service request: user={user}, config={config}, arguments={arguments}")
|
||||
logger.debug(f"Tool service request: config={config}, arguments={arguments}")
|
||||
|
||||
# Invoke the tool implementation
|
||||
response = await self.invoke(user, config, arguments)
|
||||
response = await self.invoke(config, arguments)
|
||||
|
||||
# Send success response
|
||||
await self.producer.send(
|
||||
|
|
@ -159,14 +157,13 @@ class DynamicToolService(AsyncProcessor):
|
|||
properties={"id": id if id else "unknown"}
|
||||
)
|
||||
|
||||
async def invoke(self, user, config, arguments):
|
||||
async def invoke(self, config, arguments):
|
||||
"""
|
||||
Invoke the tool service.
|
||||
|
||||
Override this method in subclasses to implement the tool's logic.
|
||||
|
||||
Args:
|
||||
user: The user context for multi-tenancy
|
||||
config: Dict of config values from the tool descriptor
|
||||
arguments: Dict of arguments from the LLM
|
||||
|
||||
|
|
|
|||
|
|
@ -4,15 +4,16 @@ import asyncio
|
|||
class Flow:
|
||||
"""
|
||||
Runtime representation of a deployed flow process.
|
||||
|
||||
|
||||
This class maintains internal processor states and orchestrates
|
||||
lifecycles (start, stop) for inputs (consumers) and parameters
|
||||
lifecycles (start, stop) for inputs (consumers) and parameters
|
||||
that drive data flowing across linked nodes.
|
||||
"""
|
||||
def __init__(self, id, flow, processor, defn):
|
||||
def __init__(self, id, flow, workspace, processor, defn):
|
||||
|
||||
self.id = id
|
||||
self.name = flow
|
||||
self.workspace = workspace
|
||||
|
||||
self.producer = {}
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,8 @@ class FlowProcessor(AsyncProcessor):
|
|||
)
|
||||
|
||||
# Initialise flow information state
|
||||
# Keyed by (workspace, flow) tuples; each workspace has its own
|
||||
# set of flow variants for this processor.
|
||||
self.flows = {}
|
||||
|
||||
# These can be overriden by a derived class:
|
||||
|
|
@ -48,23 +50,28 @@ class FlowProcessor(AsyncProcessor):
|
|||
def register_specification(self, spec: Any) -> None:
|
||||
self.specifications.append(spec)
|
||||
|
||||
# Start processing for a new flow
|
||||
async def start_flow(self, flow, defn):
|
||||
self.flows[flow] = Flow(self.id, flow, self, defn)
|
||||
await self.flows[flow].start()
|
||||
logger.info(f"Started flow: {flow}")
|
||||
|
||||
# Stop processing for a new flow
|
||||
async def stop_flow(self, flow):
|
||||
if flow in self.flows:
|
||||
await self.flows[flow].stop()
|
||||
del self.flows[flow]
|
||||
logger.info(f"Stopped flow: {flow}")
|
||||
# Start processing for a new flow within a workspace
|
||||
async def start_flow(self, workspace, flow, defn):
|
||||
key = (workspace, flow)
|
||||
self.flows[key] = Flow(self.id, flow, workspace, self, defn)
|
||||
await self.flows[key].start()
|
||||
logger.info(f"Started flow: {workspace}/{flow}")
|
||||
|
||||
# Event handler - called for a configuration change
|
||||
async def on_configure_flows(self, config, version):
|
||||
# Stop processing for a flow within a workspace
|
||||
async def stop_flow(self, workspace, flow):
|
||||
key = (workspace, flow)
|
||||
if key in self.flows:
|
||||
await self.flows[key].stop()
|
||||
del self.flows[key]
|
||||
logger.info(f"Stopped flow: {workspace}/{flow}")
|
||||
|
||||
logger.info(f"Got config version {version}")
|
||||
# Event handler - called for a configuration change for a single
|
||||
# workspace
|
||||
async def on_configure_flows(self, workspace, config, version):
|
||||
|
||||
logger.info(
|
||||
f"Got config version {version} for workspace {workspace}"
|
||||
)
|
||||
|
||||
config_type = f"processor:{self.id}"
|
||||
|
||||
|
|
@ -76,26 +83,28 @@ class FlowProcessor(AsyncProcessor):
|
|||
for k, v in config[config_type].items()
|
||||
}
|
||||
else:
|
||||
logger.debug("No configuration settings for me.")
|
||||
logger.debug(
|
||||
f"No configuration settings for me in {workspace}."
|
||||
)
|
||||
flow_config = {}
|
||||
|
||||
# Get list of flows which should be running and are currently
|
||||
# running
|
||||
wanted_flows = flow_config.keys()
|
||||
# This takes a copy, needed because dict gets modified by stop_flow
|
||||
current_flows = list(self.flows.keys())
|
||||
# Get list of flows which should be running in this workspace,
|
||||
# and the list currently running in this workspace
|
||||
wanted_flows = set(flow_config.keys())
|
||||
current_flows = {
|
||||
f for (ws, f) in self.flows.keys() if ws == workspace
|
||||
}
|
||||
|
||||
# Start all the flows which arent currently running
|
||||
for flow in wanted_flows:
|
||||
if flow not in current_flows:
|
||||
await self.start_flow(flow, flow_config[flow])
|
||||
# Start all the flows which aren't currently running in this
|
||||
# workspace
|
||||
for flow in wanted_flows - current_flows:
|
||||
await self.start_flow(workspace, flow, flow_config[flow])
|
||||
|
||||
# Stop all the unwanted flows which are due to be stopped
|
||||
for flow in current_flows:
|
||||
if flow not in wanted_flows:
|
||||
await self.stop_flow(flow)
|
||||
# Stop all the unwanted flows in this workspace
|
||||
for flow in current_flows - wanted_flows:
|
||||
await self.stop_flow(workspace, flow)
|
||||
|
||||
logger.info("Handled config update")
|
||||
logger.info(f"Handled config update for workspace {workspace}")
|
||||
|
||||
# Start threads, just call parent
|
||||
async def start(self):
|
||||
|
|
|
|||
|
|
@ -22,14 +22,12 @@ def to_value(x: Any) -> Any:
|
|||
return Literal(x.value or x.iri)
|
||||
|
||||
class GraphEmbeddingsClient(RequestResponse):
|
||||
async def query(self, vector, limit=20, user="trustgraph",
|
||||
collection="default", timeout=30):
|
||||
async def query(self, vector, limit=20, collection="default", timeout=30):
|
||||
|
||||
resp = await self.request(
|
||||
GraphEmbeddingsRequest(
|
||||
vector = vector,
|
||||
limit = limit,
|
||||
user = user,
|
||||
collection = collection
|
||||
),
|
||||
timeout=timeout
|
||||
|
|
|
|||
|
|
@ -60,7 +60,9 @@ class GraphEmbeddingsQueryService(FlowProcessor):
|
|||
|
||||
logger.debug(f"Handling graph embeddings query request {id}...")
|
||||
|
||||
entities = await self.query_graph_embeddings(request)
|
||||
entities = await self.query_graph_embeddings(
|
||||
flow.workspace, request,
|
||||
)
|
||||
|
||||
logger.debug("Sending graph embeddings query response...")
|
||||
r = GraphEmbeddingsResponse(entities=entities, error=None)
|
||||
|
|
|
|||
|
|
@ -41,7 +41,8 @@ class GraphEmbeddingsStoreService(FlowProcessor):
|
|||
|
||||
request = msg.value()
|
||||
|
||||
await self.store_graph_embeddings(request)
|
||||
# Workspace comes from the flow the message arrived on.
|
||||
await self.store_graph_embeddings(flow.workspace, request)
|
||||
|
||||
except TooManyRequests as e:
|
||||
raise e
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from . request_response_spec import RequestResponse, RequestResponseSpec
|
|||
from .. schema import GraphRagQuery, GraphRagResponse
|
||||
|
||||
class GraphRagClient(RequestResponse):
|
||||
async def rag(self, query, user="trustgraph", collection="default",
|
||||
async def rag(self, query, collection="default",
|
||||
chunk_callback=None, explain_callback=None,
|
||||
parent_uri="",
|
||||
timeout=600):
|
||||
|
|
@ -12,7 +12,6 @@ class GraphRagClient(RequestResponse):
|
|||
|
||||
Args:
|
||||
query: The question to ask
|
||||
user: User identifier
|
||||
collection: Collection identifier
|
||||
chunk_callback: Optional async callback(text, end_of_stream) for text chunks
|
||||
explain_callback: Optional async callback(explain_id, explain_graph, explain_triples) for explain notifications
|
||||
|
|
@ -49,7 +48,6 @@ class GraphRagClient(RequestResponse):
|
|||
await self.request(
|
||||
GraphRagQuery(
|
||||
query = query,
|
||||
user = user,
|
||||
collection = collection,
|
||||
parent_uri = parent_uri,
|
||||
),
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ Usage:
|
|||
id=id, backend=self.pubsub, taskgroup=self.taskgroup, **params
|
||||
)
|
||||
await self.librarian.start()
|
||||
content = await self.librarian.fetch_document_content(doc_id, user)
|
||||
content = await self.librarian.fetch_document_content(doc_id, workspace)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
|
|
@ -150,7 +150,7 @@ class LibrarianClient:
|
|||
finally:
|
||||
self._streams.pop(request_id, None)
|
||||
|
||||
async def fetch_document_content(self, document_id, user, timeout=120):
|
||||
async def fetch_document_content(self, document_id, workspace, timeout=120):
|
||||
"""Fetch document content using streaming.
|
||||
|
||||
Returns base64-encoded content. Caller is responsible for decoding.
|
||||
|
|
@ -158,7 +158,7 @@ class LibrarianClient:
|
|||
req = LibrarianRequest(
|
||||
operation="stream-document",
|
||||
document_id=document_id,
|
||||
user=user,
|
||||
workspace=workspace,
|
||||
)
|
||||
chunks = await self.stream(req, timeout=timeout)
|
||||
|
||||
|
|
@ -176,24 +176,24 @@ class LibrarianClient:
|
|||
|
||||
return base64.b64encode(raw)
|
||||
|
||||
async def fetch_document_text(self, document_id, user, timeout=120):
|
||||
async def fetch_document_text(self, document_id, workspace, timeout=120):
|
||||
"""Fetch document content and decode as UTF-8 text."""
|
||||
content = await self.fetch_document_content(
|
||||
document_id, user, timeout=timeout,
|
||||
document_id, workspace, timeout=timeout,
|
||||
)
|
||||
return base64.b64decode(content).decode("utf-8")
|
||||
|
||||
async def fetch_document_metadata(self, document_id, user, timeout=120):
|
||||
async def fetch_document_metadata(self, document_id, workspace, timeout=120):
|
||||
"""Fetch document metadata from the librarian."""
|
||||
req = LibrarianRequest(
|
||||
operation="get-document-metadata",
|
||||
document_id=document_id,
|
||||
user=user,
|
||||
workspace=workspace,
|
||||
)
|
||||
response = await self.request(req, timeout=timeout)
|
||||
return response.document_metadata
|
||||
|
||||
async def save_child_document(self, doc_id, parent_id, user, content,
|
||||
async def save_child_document(self, doc_id, parent_id, workspace, content,
|
||||
document_type="chunk", title=None,
|
||||
kind="text/plain", timeout=120):
|
||||
"""Save a child document to the librarian."""
|
||||
|
|
@ -202,7 +202,7 @@ class LibrarianClient:
|
|||
|
||||
doc_metadata = DocumentMetadata(
|
||||
id=doc_id,
|
||||
user=user,
|
||||
workspace=workspace,
|
||||
kind=kind,
|
||||
title=title or doc_id,
|
||||
parent_id=parent_id,
|
||||
|
|
@ -218,7 +218,7 @@ class LibrarianClient:
|
|||
await self.request(req, timeout=timeout)
|
||||
return doc_id
|
||||
|
||||
async def save_document(self, doc_id, user, content, title=None,
|
||||
async def save_document(self, doc_id, workspace, content, title=None,
|
||||
document_type="answer", kind="text/plain",
|
||||
timeout=120):
|
||||
"""Save a document to the librarian."""
|
||||
|
|
@ -227,7 +227,7 @@ class LibrarianClient:
|
|||
|
||||
doc_metadata = DocumentMetadata(
|
||||
id=doc_id,
|
||||
user=user,
|
||||
workspace=workspace,
|
||||
kind=kind,
|
||||
title=title or doc_id,
|
||||
document_type=document_type,
|
||||
|
|
@ -238,7 +238,7 @@ class LibrarianClient:
|
|||
document_id=doc_id,
|
||||
document_metadata=doc_metadata,
|
||||
content=base64.b64encode(content).decode("utf-8"),
|
||||
user=user,
|
||||
workspace=workspace,
|
||||
)
|
||||
|
||||
await self.request(req, timeout=timeout)
|
||||
|
|
|
|||
|
|
@ -133,8 +133,9 @@ class RequestResponseSpec(Spec):
|
|||
# Make subscription names unique, so that all subscribers get
|
||||
# to see all response messages
|
||||
subscription = (
|
||||
processor.id + "--" + flow.name + "--" + self.request_name +
|
||||
"--" + str(uuid.uuid4())
|
||||
processor.id + "--" + flow.workspace + "--" +
|
||||
flow.name + "--" + self.request_name + "--" +
|
||||
str(uuid.uuid4())
|
||||
),
|
||||
consumer_name = flow.id,
|
||||
request_topic = definition["topics"][self.request_name],
|
||||
|
|
|
|||
|
|
@ -3,13 +3,12 @@ from .. schema import RowEmbeddingsRequest, RowEmbeddingsResponse
|
|||
|
||||
class RowEmbeddingsQueryClient(RequestResponse):
|
||||
async def row_embeddings_query(
|
||||
self, vector, schema_name, user="trustgraph", collection="default",
|
||||
self, vector, schema_name, collection="default",
|
||||
index_name=None, limit=10, timeout=600
|
||||
):
|
||||
request = RowEmbeddingsRequest(
|
||||
vector=vector,
|
||||
schema_name=schema_name,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=limit
|
||||
)
|
||||
|
|
|
|||
|
|
@ -2,11 +2,10 @@ from . request_response_spec import RequestResponse, RequestResponseSpec
|
|||
from .. schema import StructuredQueryRequest, StructuredQueryResponse
|
||||
|
||||
class StructuredQueryClient(RequestResponse):
|
||||
async def structured_query(self, question, user="trustgraph", collection="default", timeout=600):
|
||||
async def structured_query(self, question, collection="default", timeout=600):
|
||||
resp = await self.request(
|
||||
StructuredQueryRequest(
|
||||
question = question,
|
||||
user = user,
|
||||
collection = collection
|
||||
),
|
||||
timeout=timeout
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ class SubscriberSpec(Spec):
|
|||
subscriber = Subscriber(
|
||||
backend = processor.pubsub,
|
||||
topic = definition["topics"][self.name],
|
||||
subscription = flow.id,
|
||||
subscription = flow.id + "--" + flow.workspace + "--" + flow.name,
|
||||
consumer_name = flow.id,
|
||||
schema = self.schema,
|
||||
metrics = subscriber_metrics,
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ class ToolService(FlowProcessor):
|
|||
id = msg.properties()["id"]
|
||||
|
||||
response = await self.invoke_tool(
|
||||
flow.workspace,
|
||||
request.name,
|
||||
json.loads(request.parameters) if request.parameters else {},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -11,12 +11,11 @@ logger = logging.getLogger(__name__)
|
|||
class ToolServiceClient(RequestResponse):
|
||||
"""Client for invoking dynamically configured tool services."""
|
||||
|
||||
async def call(self, user, config, arguments, timeout=600):
|
||||
async def call(self, config, arguments, timeout=600):
|
||||
"""
|
||||
Call a tool service.
|
||||
|
||||
Args:
|
||||
user: User context for multi-tenancy
|
||||
config: Dict of config values (e.g., {"collection": "customers"})
|
||||
arguments: Dict of arguments from LLM
|
||||
timeout: Request timeout in seconds
|
||||
|
|
@ -26,7 +25,6 @@ class ToolServiceClient(RequestResponse):
|
|||
"""
|
||||
resp = await self.request(
|
||||
ToolServiceRequest(
|
||||
user=user,
|
||||
config=json.dumps(config) if config else "{}",
|
||||
arguments=json.dumps(arguments) if arguments else "{}",
|
||||
),
|
||||
|
|
@ -38,12 +36,11 @@ class ToolServiceClient(RequestResponse):
|
|||
|
||||
return resp.response
|
||||
|
||||
async def call_streaming(self, user, config, arguments, callback, timeout=600):
|
||||
async def call_streaming(self, config, arguments, callback, timeout=600):
|
||||
"""
|
||||
Call a tool service with streaming response.
|
||||
|
||||
Args:
|
||||
user: User context for multi-tenancy
|
||||
config: Dict of config values
|
||||
arguments: Dict of arguments from LLM
|
||||
callback: Async function called with each response chunk
|
||||
|
|
@ -66,7 +63,6 @@ class ToolServiceClient(RequestResponse):
|
|||
|
||||
await self.request(
|
||||
ToolServiceRequest(
|
||||
user=user,
|
||||
config=json.dumps(config) if config else "{}",
|
||||
arguments=json.dumps(arguments) if arguments else "{}",
|
||||
),
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ def from_value(x: Any) -> Any:
|
|||
|
||||
class TriplesClient(RequestResponse):
|
||||
async def query(self, s=None, p=None, o=None, limit=20,
|
||||
user="trustgraph", collection="default",
|
||||
collection="default",
|
||||
timeout=30, g=None):
|
||||
|
||||
resp = await self.request(
|
||||
|
|
@ -54,7 +54,6 @@ class TriplesClient(RequestResponse):
|
|||
p = from_value(p),
|
||||
o = from_value(o),
|
||||
limit = limit,
|
||||
user = user,
|
||||
collection = collection,
|
||||
g = g,
|
||||
),
|
||||
|
|
@ -72,7 +71,7 @@ class TriplesClient(RequestResponse):
|
|||
return triples
|
||||
|
||||
async def query_stream(self, s=None, p=None, o=None, limit=20,
|
||||
user="trustgraph", collection="default",
|
||||
collection="default",
|
||||
batch_size=20, timeout=30,
|
||||
batch_callback=None, g=None):
|
||||
"""
|
||||
|
|
@ -81,7 +80,6 @@ class TriplesClient(RequestResponse):
|
|||
Args:
|
||||
s, p, o: Triple pattern (None for wildcard)
|
||||
limit: Maximum total triples to return
|
||||
user: User/keyspace
|
||||
collection: Collection name
|
||||
batch_size: Triples per batch
|
||||
timeout: Request timeout in seconds
|
||||
|
|
@ -116,7 +114,6 @@ class TriplesClient(RequestResponse):
|
|||
p=from_value(p),
|
||||
o=from_value(o),
|
||||
limit=limit,
|
||||
user=user,
|
||||
collection=collection,
|
||||
streaming=True,
|
||||
batch_size=batch_size,
|
||||
|
|
|
|||
|
|
@ -58,9 +58,13 @@ class TriplesQueryService(FlowProcessor):
|
|||
|
||||
logger.debug(f"Handling triples query request {id}...")
|
||||
|
||||
workspace = flow.workspace
|
||||
|
||||
if request.streaming:
|
||||
# Streaming mode: send batches
|
||||
async for batch, is_final in self.query_triples_stream(request):
|
||||
async for batch, is_final in self.query_triples_stream(
|
||||
workspace, request,
|
||||
):
|
||||
r = TriplesQueryResponse(
|
||||
triples=batch,
|
||||
error=None,
|
||||
|
|
@ -70,7 +74,7 @@ class TriplesQueryService(FlowProcessor):
|
|||
logger.debug("Triples query streaming completed")
|
||||
else:
|
||||
# Non-streaming mode: single response
|
||||
triples = await self.query_triples(request)
|
||||
triples = await self.query_triples(workspace, request)
|
||||
logger.debug("Sending triples query response...")
|
||||
r = TriplesQueryResponse(triples=triples, error=None)
|
||||
await flow("response").send(r, properties={"id": id})
|
||||
|
|
@ -92,13 +96,13 @@ class TriplesQueryService(FlowProcessor):
|
|||
|
||||
await flow("response").send(r, properties={"id": id})
|
||||
|
||||
async def query_triples_stream(self, request):
|
||||
async def query_triples_stream(self, workspace, request):
|
||||
"""
|
||||
Streaming query - yields (batch, is_final) tuples.
|
||||
Default implementation batches results from query_triples.
|
||||
Override for true streaming from backend.
|
||||
"""
|
||||
triples = await self.query_triples(request)
|
||||
triples = await self.query_triples(workspace, request)
|
||||
batch_size = request.batch_size if request.batch_size > 0 else 20
|
||||
|
||||
for i in range(0, len(triples), batch_size):
|
||||
|
|
|
|||
|
|
@ -45,7 +45,10 @@ class TriplesStoreService(FlowProcessor):
|
|||
|
||||
request = msg.value()
|
||||
|
||||
await self.store_triples(request)
|
||||
# Workspace is derived from the flow the message arrived on,
|
||||
# not from fields in the message payload. Topic routing is
|
||||
# the isolation boundary.
|
||||
await self.store_triples(flow.workspace, request)
|
||||
|
||||
except TooManyRequests as e:
|
||||
raise e
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ class ConfigClient(BaseClient):
|
|||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
workspace="default",
|
||||
**pubsub_config,
|
||||
):
|
||||
|
||||
|
|
@ -51,10 +52,13 @@ class ConfigClient(BaseClient):
|
|||
**pubsub_config,
|
||||
)
|
||||
|
||||
self.workspace = workspace
|
||||
|
||||
def get(self, keys, timeout=300):
|
||||
|
||||
resp = self.call(
|
||||
operation="get",
|
||||
workspace=self.workspace,
|
||||
keys=[
|
||||
ConfigKey(
|
||||
type = k["type"],
|
||||
|
|
@ -78,6 +82,7 @@ class ConfigClient(BaseClient):
|
|||
|
||||
resp = self.call(
|
||||
operation="list",
|
||||
workspace=self.workspace,
|
||||
type=type,
|
||||
timeout=timeout
|
||||
)
|
||||
|
|
@ -88,6 +93,7 @@ class ConfigClient(BaseClient):
|
|||
|
||||
resp = self.call(
|
||||
operation="getvalues",
|
||||
workspace=self.workspace,
|
||||
type=type,
|
||||
timeout=timeout
|
||||
)
|
||||
|
|
@ -101,10 +107,31 @@ class ConfigClient(BaseClient):
|
|||
for v in resp.values
|
||||
]
|
||||
|
||||
def getvalues_all_ws(self, type, timeout=300):
|
||||
"""Fetch all values of a given type across all workspaces.
|
||||
Returns a list of dicts including a 'workspace' field."""
|
||||
|
||||
resp = self.call(
|
||||
operation="getvalues-all-ws",
|
||||
type=type,
|
||||
timeout=timeout
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
"workspace": v.workspace,
|
||||
"type": v.type,
|
||||
"key": v.key,
|
||||
"value": v.value,
|
||||
}
|
||||
for v in resp.values
|
||||
]
|
||||
|
||||
def delete(self, keys, timeout=300):
|
||||
|
||||
resp = self.call(
|
||||
operation="delete",
|
||||
workspace=self.workspace,
|
||||
keys=[
|
||||
ConfigKey(
|
||||
type = k["type"],
|
||||
|
|
@ -121,6 +148,7 @@ class ConfigClient(BaseClient):
|
|||
|
||||
resp = self.call(
|
||||
operation="put",
|
||||
workspace=self.workspace,
|
||||
values=[
|
||||
ConfigValue(
|
||||
type = v["type"],
|
||||
|
|
@ -138,6 +166,7 @@ class ConfigClient(BaseClient):
|
|||
|
||||
resp = self.call(
|
||||
operation="config",
|
||||
workspace=self.workspace,
|
||||
timeout=timeout
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -35,11 +35,11 @@ class DocumentEmbeddingsClient(BaseClient):
|
|||
)
|
||||
|
||||
def request(
|
||||
self, vector, user="trustgraph", collection="default",
|
||||
self, vector, collection="default",
|
||||
limit=10, timeout=300
|
||||
):
|
||||
return self.call(
|
||||
user=user, collection=collection,
|
||||
collection=collection,
|
||||
vector=vector, limit=limit, timeout=timeout
|
||||
).chunks
|
||||
|
||||
|
|
|
|||
|
|
@ -33,14 +33,13 @@ class DocumentRagClient(BaseClient):
|
|||
output_schema=DocumentRagResponse,
|
||||
)
|
||||
|
||||
def request(self, query, user="trustgraph", collection="default",
|
||||
def request(self, query, collection="default",
|
||||
chunk_callback=None, explain_callback=None, timeout=300):
|
||||
"""
|
||||
Request a document RAG query with optional streaming callbacks.
|
||||
|
||||
Args:
|
||||
query: The question to ask
|
||||
user: User identifier
|
||||
collection: Collection identifier
|
||||
chunk_callback: Optional callback(text, end_of_stream) for text chunks
|
||||
explain_callback: Optional callback(explain_id, explain_graph, explain_triples) for explain notifications
|
||||
|
|
@ -71,7 +70,7 @@ class DocumentRagClient(BaseClient):
|
|||
return False # Continue receiving
|
||||
|
||||
self.call(
|
||||
query=query, user=user, collection=collection,
|
||||
query=query, collection=collection,
|
||||
inspect=inspect, timeout=timeout
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -35,11 +35,11 @@ class GraphEmbeddingsClient(BaseClient):
|
|||
)
|
||||
|
||||
def request(
|
||||
self, vector, user="trustgraph", collection="default",
|
||||
self, vector, collection="default",
|
||||
limit=10, timeout=300
|
||||
):
|
||||
return self.call(
|
||||
user=user, collection=collection,
|
||||
collection=collection,
|
||||
vector=vector, limit=limit, timeout=timeout
|
||||
).entities
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ class GraphRagClient(BaseClient):
|
|||
)
|
||||
|
||||
def request(
|
||||
self, query, user="trustgraph", collection="default",
|
||||
self, query, collection="default",
|
||||
chunk_callback=None,
|
||||
explain_callback=None,
|
||||
timeout=500
|
||||
|
|
@ -44,7 +44,6 @@ class GraphRagClient(BaseClient):
|
|||
|
||||
Args:
|
||||
query: The question to ask
|
||||
user: User identifier
|
||||
collection: Collection identifier
|
||||
chunk_callback: Optional callback(text, end_of_stream) for text chunks
|
||||
explain_callback: Optional callback(explain_id, explain_graph, explain_triples) for explain notifications
|
||||
|
|
@ -76,7 +75,7 @@ class GraphRagClient(BaseClient):
|
|||
return False # Continue receiving
|
||||
|
||||
self.call(
|
||||
user=user, collection=collection, query=query,
|
||||
collection=collection, query=query,
|
||||
inspect=inspect, timeout=timeout
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -35,11 +35,11 @@ class RowEmbeddingsClient(BaseClient):
|
|||
)
|
||||
|
||||
def request(
|
||||
self, vector, schema_name, user="trustgraph", collection="default",
|
||||
self, vector, schema_name, collection="default",
|
||||
index_name=None, limit=10, timeout=300
|
||||
):
|
||||
kwargs = dict(
|
||||
user=user, collection=collection,
|
||||
collection=collection,
|
||||
vector=vector, schema_name=schema_name,
|
||||
limit=limit, timeout=timeout
|
||||
)
|
||||
|
|
|
|||
|
|
@ -45,16 +45,15 @@ class TriplesQueryClient(BaseClient):
|
|||
return Term(type=LITERAL, value=ent)
|
||||
|
||||
def request(
|
||||
self,
|
||||
self,
|
||||
s, p, o,
|
||||
user="trustgraph", collection="default",
|
||||
collection="default",
|
||||
limit=10, timeout=120,
|
||||
):
|
||||
return self.call(
|
||||
s=self.create_value(s),
|
||||
p=self.create_value(p),
|
||||
o=self.create_value(o),
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=limit,
|
||||
timeout=timeout,
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ class AgentRequestTranslator(MessageTranslator):
|
|||
state=data.get("state", None),
|
||||
group=data.get("group", None),
|
||||
history=data.get("history", []),
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default"),
|
||||
streaming=data.get("streaming", False),
|
||||
session_id=data.get("session_id", ""),
|
||||
|
|
@ -33,7 +32,6 @@ class AgentRequestTranslator(MessageTranslator):
|
|||
"state": obj.state,
|
||||
"group": obj.group,
|
||||
"history": obj.history,
|
||||
"user": obj.user,
|
||||
"collection": getattr(obj, "collection", "default"),
|
||||
"streaming": getattr(obj, "streaming", False),
|
||||
"session_id": getattr(obj, "session_id", ""),
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ class CollectionManagementRequestTranslator(MessageTranslator):
|
|||
def decode(self, data: Dict[str, Any]) -> CollectionManagementRequest:
|
||||
return CollectionManagementRequest(
|
||||
operation=data.get("operation"),
|
||||
user=data.get("user"),
|
||||
workspace=data.get("workspace", ""),
|
||||
collection=data.get("collection"),
|
||||
timestamp=data.get("timestamp"),
|
||||
name=data.get("name"),
|
||||
|
|
@ -24,8 +24,8 @@ class CollectionManagementRequestTranslator(MessageTranslator):
|
|||
|
||||
if obj.operation is not None:
|
||||
result["operation"] = obj.operation
|
||||
if obj.user is not None:
|
||||
result["user"] = obj.user
|
||||
if obj.workspace:
|
||||
result["workspace"] = obj.workspace
|
||||
if obj.collection is not None:
|
||||
result["collection"] = obj.collection
|
||||
if obj.timestamp is not None:
|
||||
|
|
@ -63,7 +63,6 @@ class CollectionManagementResponseTranslator(MessageTranslator):
|
|||
if "collections" in data:
|
||||
for coll_data in data["collections"]:
|
||||
collections.append(CollectionMetadata(
|
||||
user=coll_data.get("user"),
|
||||
collection=coll_data.get("collection"),
|
||||
name=coll_data.get("name"),
|
||||
description=coll_data.get("description"),
|
||||
|
|
@ -91,7 +90,6 @@ class CollectionManagementResponseTranslator(MessageTranslator):
|
|||
result["collections"] = []
|
||||
for coll in obj.collections:
|
||||
result["collections"].append({
|
||||
"user": coll.user,
|
||||
"collection": coll.collection,
|
||||
"name": coll.name,
|
||||
"description": coll.description,
|
||||
|
|
|
|||
|
|
@ -23,13 +23,15 @@ class ConfigRequestTranslator(MessageTranslator):
|
|||
ConfigValue(
|
||||
type=v["type"],
|
||||
key=v["key"],
|
||||
value=v["value"]
|
||||
value=v["value"],
|
||||
workspace=v.get("workspace", ""),
|
||||
)
|
||||
for v in data["values"]
|
||||
]
|
||||
|
||||
return ConfigRequest(
|
||||
operation=data.get("operation"),
|
||||
workspace=data.get("workspace", ""),
|
||||
keys=keys,
|
||||
type=data.get("type"),
|
||||
values=values
|
||||
|
|
@ -37,10 +39,13 @@ class ConfigRequestTranslator(MessageTranslator):
|
|||
|
||||
def encode(self, obj: ConfigRequest) -> Dict[str, Any]:
|
||||
result = {}
|
||||
|
||||
|
||||
if obj.operation is not None:
|
||||
result["operation"] = obj.operation
|
||||
|
||||
if obj.workspace is not None:
|
||||
result["workspace"] = obj.workspace
|
||||
|
||||
if obj.type is not None:
|
||||
result["type"] = obj.type
|
||||
|
||||
|
|
@ -56,13 +61,14 @@ class ConfigRequestTranslator(MessageTranslator):
|
|||
if obj.values is not None:
|
||||
result["values"] = [
|
||||
{
|
||||
**({"workspace": v.workspace} if v.workspace else {}),
|
||||
"type": v.type,
|
||||
"key": v.key,
|
||||
"value": v.value
|
||||
"value": v.value,
|
||||
}
|
||||
for v in obj.values
|
||||
]
|
||||
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -81,13 +87,14 @@ class ConfigResponseTranslator(MessageTranslator):
|
|||
if obj.values is not None:
|
||||
result["values"] = [
|
||||
{
|
||||
**({"workspace": v.workspace} if v.workspace else {}),
|
||||
"type": v.type,
|
||||
"key": v.key,
|
||||
"value": v.value
|
||||
"value": v.value,
|
||||
}
|
||||
for v in obj.values
|
||||
]
|
||||
|
||||
|
||||
if obj.directory is not None:
|
||||
result["directory"] = obj.directory
|
||||
|
||||
|
|
|
|||
|
|
@ -39,7 +39,6 @@ class DocumentTranslator(SendTranslator):
|
|||
metadata=Metadata(
|
||||
id=data.get("id"),
|
||||
root=data.get("root", ""),
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default"),
|
||||
),
|
||||
data=base64.b64encode(doc).decode("utf-8")
|
||||
|
|
@ -56,8 +55,6 @@ class DocumentTranslator(SendTranslator):
|
|||
metadata_dict["id"] = obj.metadata.id
|
||||
if obj.metadata.root:
|
||||
metadata_dict["root"] = obj.metadata.root
|
||||
if obj.metadata.user:
|
||||
metadata_dict["user"] = obj.metadata.user
|
||||
if obj.metadata.collection:
|
||||
metadata_dict["collection"] = obj.metadata.collection
|
||||
|
||||
|
|
@ -79,7 +76,6 @@ class TextDocumentTranslator(SendTranslator):
|
|||
metadata=Metadata(
|
||||
id=data.get("id"),
|
||||
root=data.get("root", ""),
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default"),
|
||||
),
|
||||
text=text.encode("utf-8")
|
||||
|
|
@ -96,8 +92,6 @@ class TextDocumentTranslator(SendTranslator):
|
|||
metadata_dict["id"] = obj.metadata.id
|
||||
if obj.metadata.root:
|
||||
metadata_dict["root"] = obj.metadata.root
|
||||
if obj.metadata.user:
|
||||
metadata_dict["user"] = obj.metadata.user
|
||||
if obj.metadata.collection:
|
||||
metadata_dict["collection"] = obj.metadata.collection
|
||||
|
||||
|
|
@ -115,7 +109,6 @@ class ChunkTranslator(SendTranslator):
|
|||
metadata=Metadata(
|
||||
id=data.get("id"),
|
||||
root=data.get("root", ""),
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default"),
|
||||
),
|
||||
chunk=data["chunk"].encode("utf-8") if isinstance(data["chunk"], str) else data["chunk"]
|
||||
|
|
@ -132,8 +125,6 @@ class ChunkTranslator(SendTranslator):
|
|||
metadata_dict["id"] = obj.metadata.id
|
||||
if obj.metadata.root:
|
||||
metadata_dict["root"] = obj.metadata.root
|
||||
if obj.metadata.user:
|
||||
metadata_dict["user"] = obj.metadata.user
|
||||
if obj.metadata.collection:
|
||||
metadata_dict["collection"] = obj.metadata.collection
|
||||
|
||||
|
|
@ -161,7 +152,6 @@ class DocumentEmbeddingsTranslator(SendTranslator):
|
|||
metadata=Metadata(
|
||||
id=metadata.get("id"),
|
||||
root=metadata.get("root", ""),
|
||||
user=metadata.get("user", "trustgraph"),
|
||||
collection=metadata.get("collection", "default"),
|
||||
),
|
||||
chunks=chunks
|
||||
|
|
@ -184,8 +174,6 @@ class DocumentEmbeddingsTranslator(SendTranslator):
|
|||
metadata_dict["id"] = obj.metadata.id
|
||||
if obj.metadata.root:
|
||||
metadata_dict["root"] = obj.metadata.root
|
||||
if obj.metadata.user:
|
||||
metadata_dict["user"] = obj.metadata.user
|
||||
if obj.metadata.collection:
|
||||
metadata_dict["collection"] = obj.metadata.collection
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ class DocumentEmbeddingsRequestTranslator(MessageTranslator):
|
|||
return DocumentEmbeddingsRequest(
|
||||
vector=data["vector"],
|
||||
limit=int(data.get("limit", 10)),
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default")
|
||||
)
|
||||
|
||||
|
|
@ -23,7 +22,6 @@ class DocumentEmbeddingsRequestTranslator(MessageTranslator):
|
|||
return {
|
||||
"vector": obj.vector,
|
||||
"limit": obj.limit,
|
||||
"user": obj.user,
|
||||
"collection": obj.collection
|
||||
}
|
||||
|
||||
|
|
@ -60,7 +58,6 @@ class GraphEmbeddingsRequestTranslator(MessageTranslator):
|
|||
return GraphEmbeddingsRequest(
|
||||
vector=data["vector"],
|
||||
limit=int(data.get("limit", 10)),
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default")
|
||||
)
|
||||
|
||||
|
|
@ -68,7 +65,6 @@ class GraphEmbeddingsRequestTranslator(MessageTranslator):
|
|||
return {
|
||||
"vector": obj.vector,
|
||||
"limit": obj.limit,
|
||||
"user": obj.user,
|
||||
"collection": obj.collection
|
||||
}
|
||||
|
||||
|
|
@ -108,7 +104,6 @@ class RowEmbeddingsRequestTranslator(MessageTranslator):
|
|||
return RowEmbeddingsRequest(
|
||||
vector=data["vector"],
|
||||
limit=int(data.get("limit", 10)),
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default"),
|
||||
schema_name=data.get("schema_name", ""),
|
||||
index_name=data.get("index_name")
|
||||
|
|
@ -118,7 +113,6 @@ class RowEmbeddingsRequestTranslator(MessageTranslator):
|
|||
result = {
|
||||
"vector": obj.vector,
|
||||
"limit": obj.limit,
|
||||
"user": obj.user,
|
||||
"collection": obj.collection,
|
||||
"schema_name": obj.schema_name,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,18 +9,21 @@ class FlowRequestTranslator(MessageTranslator):
|
|||
def decode(self, data: Dict[str, Any]) -> FlowRequest:
|
||||
return FlowRequest(
|
||||
operation=data.get("operation"),
|
||||
workspace=data.get("workspace", ""),
|
||||
blueprint_name=data.get("blueprint-name"),
|
||||
blueprint_definition=data.get("blueprint-definition"),
|
||||
description=data.get("description"),
|
||||
flow_id=data.get("flow-id"),
|
||||
parameters=data.get("parameters")
|
||||
)
|
||||
|
||||
|
||||
def encode(self, obj: FlowRequest) -> Dict[str, Any]:
|
||||
result = {}
|
||||
|
||||
if obj.operation is not None:
|
||||
result["operation"] = obj.operation
|
||||
if obj.workspace is not None:
|
||||
result["workspace"] = obj.workspace
|
||||
if obj.blueprint_name is not None:
|
||||
result["blueprint-name"] = obj.blueprint_name
|
||||
if obj.blueprint_definition is not None:
|
||||
|
|
|
|||
|
|
@ -21,7 +21,6 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
metadata=Metadata(
|
||||
id=data["triples"]["metadata"]["id"],
|
||||
root=data["triples"]["metadata"].get("root", ""),
|
||||
user=data["triples"]["metadata"]["user"],
|
||||
collection=data["triples"]["metadata"]["collection"]
|
||||
),
|
||||
triples=self.subgraph_translator.decode(data["triples"]["triples"]),
|
||||
|
|
@ -33,7 +32,6 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
metadata=Metadata(
|
||||
id=data["graph-embeddings"]["metadata"]["id"],
|
||||
root=data["graph-embeddings"]["metadata"].get("root", ""),
|
||||
user=data["graph-embeddings"]["metadata"]["user"],
|
||||
collection=data["graph-embeddings"]["metadata"]["collection"]
|
||||
),
|
||||
entities=[
|
||||
|
|
@ -47,7 +45,7 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
|
||||
return KnowledgeRequest(
|
||||
operation=data.get("operation"),
|
||||
user=data.get("user"),
|
||||
workspace=data.get("workspace", ""),
|
||||
id=data.get("id"),
|
||||
flow=data.get("flow"),
|
||||
collection=data.get("collection"),
|
||||
|
|
@ -60,8 +58,8 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
|
||||
if obj.operation:
|
||||
result["operation"] = obj.operation
|
||||
if obj.user:
|
||||
result["user"] = obj.user
|
||||
if obj.workspace:
|
||||
result["workspace"] = obj.workspace
|
||||
if obj.id:
|
||||
result["id"] = obj.id
|
||||
if obj.flow:
|
||||
|
|
@ -74,7 +72,6 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
"metadata": {
|
||||
"id": obj.triples.metadata.id,
|
||||
"root": obj.triples.metadata.root,
|
||||
"user": obj.triples.metadata.user,
|
||||
"collection": obj.triples.metadata.collection,
|
||||
},
|
||||
"triples": self.subgraph_translator.encode(obj.triples.triples),
|
||||
|
|
@ -85,7 +82,6 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
"metadata": {
|
||||
"id": obj.graph_embeddings.metadata.id,
|
||||
"root": obj.graph_embeddings.metadata.root,
|
||||
"user": obj.graph_embeddings.metadata.user,
|
||||
"collection": obj.graph_embeddings.metadata.collection,
|
||||
},
|
||||
"entities": [
|
||||
|
|
@ -122,7 +118,6 @@ class KnowledgeResponseTranslator(MessageTranslator):
|
|||
"metadata": {
|
||||
"id": obj.triples.metadata.id,
|
||||
"root": obj.triples.metadata.root,
|
||||
"user": obj.triples.metadata.user,
|
||||
"collection": obj.triples.metadata.collection,
|
||||
},
|
||||
"triples": self.subgraph_translator.encode(obj.triples.triples),
|
||||
|
|
@ -136,7 +131,6 @@ class KnowledgeResponseTranslator(MessageTranslator):
|
|||
"metadata": {
|
||||
"id": obj.graph_embeddings.metadata.id,
|
||||
"root": obj.graph_embeddings.metadata.root,
|
||||
"user": obj.graph_embeddings.metadata.user,
|
||||
"collection": obj.graph_embeddings.metadata.collection,
|
||||
},
|
||||
"entities": [
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ class LibraryRequestTranslator(MessageTranslator):
|
|||
document_metadata=doc_metadata,
|
||||
processing_metadata=proc_metadata,
|
||||
content=content,
|
||||
user=data.get("user", ""),
|
||||
workspace=data.get("workspace", ""),
|
||||
collection=data.get("collection", ""),
|
||||
criteria=criteria,
|
||||
# Chunked upload fields
|
||||
|
|
@ -76,8 +76,8 @@ class LibraryRequestTranslator(MessageTranslator):
|
|||
result["processing-metadata"] = self.proc_metadata_translator.encode(obj.processing_metadata)
|
||||
if obj.content:
|
||||
result["content"] = obj.content.decode("utf-8") if isinstance(obj.content, bytes) else obj.content
|
||||
if obj.user:
|
||||
result["user"] = obj.user
|
||||
if obj.workspace:
|
||||
result["workspace"] = obj.workspace
|
||||
if obj.collection:
|
||||
result["collection"] = obj.collection
|
||||
if obj.criteria is not None:
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ class DocumentMetadataTranslator(Translator):
|
|||
title=data.get("title"),
|
||||
comments=data.get("comments"),
|
||||
metadata=self.subgraph_translator.decode(metadata) if metadata is not None else [],
|
||||
user=data.get("user"),
|
||||
workspace=data.get("workspace"),
|
||||
tags=data.get("tags"),
|
||||
parent_id=data.get("parent-id", ""),
|
||||
document_type=data.get("document-type", "source"),
|
||||
|
|
@ -40,8 +40,8 @@ class DocumentMetadataTranslator(Translator):
|
|||
result["comments"] = obj.comments
|
||||
if obj.metadata is not None:
|
||||
result["metadata"] = self.subgraph_translator.encode(obj.metadata)
|
||||
if obj.user:
|
||||
result["user"] = obj.user
|
||||
if obj.workspace:
|
||||
result["workspace"] = obj.workspace
|
||||
if obj.tags is not None:
|
||||
result["tags"] = obj.tags
|
||||
if obj.parent_id:
|
||||
|
|
@ -61,7 +61,7 @@ class ProcessingMetadataTranslator(Translator):
|
|||
document_id=data.get("document-id"),
|
||||
time=data.get("time"),
|
||||
flow=data.get("flow"),
|
||||
user=data.get("user"),
|
||||
workspace=data.get("workspace"),
|
||||
collection=data.get("collection"),
|
||||
tags=data.get("tags")
|
||||
)
|
||||
|
|
@ -77,8 +77,8 @@ class ProcessingMetadataTranslator(Translator):
|
|||
result["time"] = obj.time
|
||||
if obj.flow:
|
||||
result["flow"] = obj.flow
|
||||
if obj.user:
|
||||
result["user"] = obj.user
|
||||
if obj.workspace:
|
||||
result["workspace"] = obj.workspace
|
||||
if obj.collection:
|
||||
result["collection"] = obj.collection
|
||||
if obj.tags is not None:
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ class DocumentRagRequestTranslator(MessageTranslator):
|
|||
def decode(self, data: Dict[str, Any]) -> DocumentRagQuery:
|
||||
return DocumentRagQuery(
|
||||
query=data["query"],
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default"),
|
||||
doc_limit=int(data.get("doc-limit", 20)),
|
||||
streaming=data.get("streaming", False)
|
||||
|
|
@ -19,7 +18,6 @@ class DocumentRagRequestTranslator(MessageTranslator):
|
|||
def encode(self, obj: DocumentRagQuery) -> Dict[str, Any]:
|
||||
return {
|
||||
"query": obj.query,
|
||||
"user": obj.user,
|
||||
"collection": obj.collection,
|
||||
"doc-limit": obj.doc_limit,
|
||||
"streaming": getattr(obj, "streaming", False)
|
||||
|
|
@ -96,7 +94,6 @@ class GraphRagRequestTranslator(MessageTranslator):
|
|||
def decode(self, data: Dict[str, Any]) -> GraphRagQuery:
|
||||
return GraphRagQuery(
|
||||
query=data["query"],
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default"),
|
||||
entity_limit=int(data.get("entity-limit", 50)),
|
||||
triple_limit=int(data.get("triple-limit", 30)),
|
||||
|
|
@ -110,7 +107,6 @@ class GraphRagRequestTranslator(MessageTranslator):
|
|||
def encode(self, obj: GraphRagQuery) -> Dict[str, Any]:
|
||||
return {
|
||||
"query": obj.query,
|
||||
"user": obj.user,
|
||||
"collection": obj.collection,
|
||||
"entity-limit": obj.entity_limit,
|
||||
"triple-limit": obj.triple_limit,
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ class RowsQueryRequestTranslator(MessageTranslator):
|
|||
|
||||
def decode(self, data: Dict[str, Any]) -> RowsQueryRequest:
|
||||
return RowsQueryRequest(
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default"),
|
||||
query=data.get("query", ""),
|
||||
variables=data.get("variables", {}),
|
||||
|
|
@ -18,7 +17,6 @@ class RowsQueryRequestTranslator(MessageTranslator):
|
|||
|
||||
def encode(self, obj: RowsQueryRequest) -> Dict[str, Any]:
|
||||
result = {
|
||||
"user": obj.user,
|
||||
"collection": obj.collection,
|
||||
"query": obj.query,
|
||||
"variables": dict(obj.variables) if obj.variables else {}
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ class SparqlQueryRequestTranslator(MessageTranslator):
|
|||
|
||||
def decode(self, data: Dict[str, Any]) -> SparqlQueryRequest:
|
||||
return SparqlQueryRequest(
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default"),
|
||||
query=data.get("query", ""),
|
||||
limit=int(data.get("limit", 10000)),
|
||||
|
|
@ -22,7 +21,6 @@ class SparqlQueryRequestTranslator(MessageTranslator):
|
|||
|
||||
def encode(self, obj: SparqlQueryRequest) -> Dict[str, Any]:
|
||||
return {
|
||||
"user": obj.user,
|
||||
"collection": obj.collection,
|
||||
"query": obj.query,
|
||||
"limit": obj.limit,
|
||||
|
|
|
|||
|
|
@ -10,14 +10,12 @@ class StructuredQueryRequestTranslator(MessageTranslator):
|
|||
def decode(self, data: Dict[str, Any]) -> StructuredQueryRequest:
|
||||
return StructuredQueryRequest(
|
||||
question=data.get("question", ""),
|
||||
user=data.get("user", "trustgraph"), # Default fallback
|
||||
collection=data.get("collection", "default") # Default fallback
|
||||
collection=data.get("collection", "default")
|
||||
)
|
||||
|
||||
|
||||
def encode(self, obj: StructuredQueryRequest) -> Dict[str, Any]:
|
||||
return {
|
||||
"question": obj.question,
|
||||
"user": obj.user,
|
||||
"collection": obj.collection
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,16 +22,14 @@ class TriplesQueryRequestTranslator(MessageTranslator):
|
|||
o=o,
|
||||
g=g,
|
||||
limit=int(data.get("limit", 10000)),
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default"),
|
||||
streaming=data.get("streaming", False),
|
||||
batch_size=int(data.get("batch-size", 20)),
|
||||
)
|
||||
|
||||
|
||||
def encode(self, obj: TriplesQueryRequest) -> Dict[str, Any]:
|
||||
result = {
|
||||
"limit": obj.limit,
|
||||
"user": obj.user,
|
||||
"collection": obj.collection,
|
||||
"streaming": obj.streaming,
|
||||
"batch-size": obj.batch_size,
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ class Metadata:
|
|||
# Root document identifier (set by librarian, preserved through pipeline)
|
||||
root: str = ""
|
||||
|
||||
# Collection management
|
||||
user: str = ""
|
||||
# Collection the message belongs to. Workspace is NOT carried on the
|
||||
# message — consumers derive it from flow.workspace (the flow the
|
||||
# message arrived on), which is the trusted isolation boundary.
|
||||
collection: str = ""
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ from .embeddings import GraphEmbeddings
|
|||
# <- (error)
|
||||
|
||||
# list-kg-cores
|
||||
# -> (user)
|
||||
# -> (workspace)
|
||||
# <- ()
|
||||
# <- (error)
|
||||
|
||||
|
|
@ -27,8 +27,8 @@ class KnowledgeRequest:
|
|||
# load-kg-core, unload-kg-core
|
||||
operation: str = ""
|
||||
|
||||
# list-kg-cores, delete-kg-core, put-kg-core
|
||||
user: str = ""
|
||||
# Workspace the cores belong to. Partition / isolation boundary.
|
||||
workspace: str = ""
|
||||
|
||||
# get-kg-core, list-kg-cores, delete-kg-core, put-kg-core,
|
||||
# load-kg-core, unload-kg-core
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@ class AgentStep:
|
|||
action: str = ""
|
||||
arguments: dict[str, str] = field(default_factory=dict)
|
||||
observation: str = ""
|
||||
user: str = "" # User context for the step
|
||||
step_type: str = "" # "react", "plan", "execute", "decompose", "synthesise"
|
||||
plan: list[PlanStep] = field(default_factory=list) # Plan steps (for plan-then-execute)
|
||||
subagent_results: dict[str, str] = field(default_factory=dict) # Subagent results keyed by goal
|
||||
|
|
@ -33,7 +32,6 @@ class AgentRequest:
|
|||
state: str = ""
|
||||
group: list[str] | None = None
|
||||
history: list[AgentStep] = field(default_factory=list)
|
||||
user: str = "" # User context for multi-tenancy
|
||||
collection: str = "default" # Collection for provenance traces
|
||||
streaming: bool = False # Enable streaming response delivery (default false)
|
||||
session_id: str = "" # For provenance tracking across iterations
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ from ..core.topic import queue
|
|||
@dataclass
|
||||
class CollectionMetadata:
|
||||
"""Collection metadata record"""
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
|
|
@ -23,11 +22,17 @@ class CollectionMetadata:
|
|||
|
||||
@dataclass
|
||||
class CollectionManagementRequest:
|
||||
"""Request for collection management operations"""
|
||||
"""Request for collection management operations.
|
||||
|
||||
Collection-management is a global (non-flow-scoped) service, so the
|
||||
workspace has to travel on the wire — it's the isolation boundary
|
||||
for which workspace's collections the request operates on.
|
||||
"""
|
||||
operation: str = "" # e.g., "delete-collection"
|
||||
|
||||
# For 'list-collections'
|
||||
user: str = ""
|
||||
# Workspace the collection belongs to.
|
||||
workspace: str = ""
|
||||
|
||||
collection: str = ""
|
||||
timestamp: str = "" # ISO timestamp
|
||||
name: str = ""
|
||||
|
|
|
|||
|
|
@ -7,12 +7,19 @@ from ..core.primitives import Error
|
|||
############################################################################
|
||||
|
||||
# Config service:
|
||||
# get(keys) -> (version, values)
|
||||
# list(type) -> (version, values)
|
||||
# getvalues(type) -> (version, values)
|
||||
# put(values) -> ()
|
||||
# delete(keys) -> ()
|
||||
# config() -> (version, config)
|
||||
# get(workspace, keys) -> (version, values)
|
||||
# list(workspace, type) -> (version, directory)
|
||||
# getvalues(workspace, type) -> (version, values)
|
||||
# getvalues-all-ws(type) -> (version, values with workspace field)
|
||||
# put(workspace, values) -> ()
|
||||
# delete(workspace, keys) -> ()
|
||||
# config(workspace) -> (version, config)
|
||||
#
|
||||
# Most operations are scoped to a workspace. The workspace field on the
|
||||
# request identifies which workspace's config to read or modify.
|
||||
# getvalues-all-ws returns values across all workspaces for a single
|
||||
# type — used by shared processors to load type-scoped config at startup.
|
||||
|
||||
@dataclass
|
||||
class ConfigKey:
|
||||
type: str = ""
|
||||
|
|
@ -23,16 +30,24 @@ class ConfigValue:
|
|||
type: str = ""
|
||||
key: str = ""
|
||||
value: str = ""
|
||||
# Populated by getvalues-all-ws responses so callers can identify
|
||||
# which workspace each value belongs to. Empty otherwise.
|
||||
workspace: str = ""
|
||||
|
||||
# Prompt services, abstract the prompt generation
|
||||
@dataclass
|
||||
class ConfigRequest:
|
||||
operation: str = "" # get, list, getvalues, delete, put, config
|
||||
# Operations: get, list, getvalues, getvalues-all-ws, delete, put,
|
||||
# config
|
||||
operation: str = ""
|
||||
|
||||
# Workspace scope — required on all operations except
|
||||
# getvalues-all-ws which spans all workspaces.
|
||||
workspace: str = ""
|
||||
|
||||
# get, delete
|
||||
keys: list[ConfigKey] = field(default_factory=list)
|
||||
|
||||
# list, getvalues
|
||||
# list, getvalues, getvalues-all-ws
|
||||
type: str = ""
|
||||
|
||||
# put
|
||||
|
|
@ -58,7 +73,12 @@ class ConfigResponse:
|
|||
@dataclass
|
||||
class ConfigPush:
|
||||
version: int = 0
|
||||
types: list[str] = field(default_factory=list)
|
||||
|
||||
# Dict of config type -> list of affected workspaces.
|
||||
# Handlers look up their registered type and get the list of
|
||||
# workspaces that need refreshing.
|
||||
# e.g. {"prompt": ["workspace-a", "workspace-b"], "schema": ["workspace-a"]}
|
||||
changes: dict[str, list[str]] = field(default_factory=dict)
|
||||
|
||||
config_request_queue = queue('config', cls='request')
|
||||
config_response_queue = queue('config', cls='response')
|
||||
|
|
|
|||
|
|
@ -17,12 +17,14 @@ from ..core.primitives import Error
|
|||
# start_flow(flowid, blueprintname) -> ()
|
||||
# stop_flow(flowid) -> ()
|
||||
|
||||
# Prompt services, abstract the prompt generation
|
||||
@dataclass
|
||||
class FlowRequest:
|
||||
operation: str = "" # list-blueprints, get-blueprint, put-blueprint, delete-blueprint
|
||||
# list-flows, get-flow, start-flow, stop-flow
|
||||
|
||||
# Workspace scope — all operations act within this workspace
|
||||
workspace: str = ""
|
||||
|
||||
# get_blueprint, put_blueprint, delete_blueprint, start_flow
|
||||
blueprint_name: str = ""
|
||||
|
||||
|
|
|
|||
|
|
@ -43,12 +43,12 @@ from ..core.metadata import Metadata
|
|||
# <- (error)
|
||||
|
||||
# list-documents
|
||||
# -> (user, collection?)
|
||||
# -> (workspace, collection?)
|
||||
# <- (document_metadata[])
|
||||
# <- (error)
|
||||
|
||||
# list-processing
|
||||
# -> (user, collection?)
|
||||
# -> (workspace, collection?)
|
||||
# <- (processing_metadata[])
|
||||
# <- (error)
|
||||
|
||||
|
|
@ -78,7 +78,7 @@ from ..core.metadata import Metadata
|
|||
# <- (error)
|
||||
|
||||
# list-uploads
|
||||
# -> (user)
|
||||
# -> (workspace)
|
||||
# <- (uploads[])
|
||||
# <- (error)
|
||||
|
||||
|
|
@ -90,7 +90,7 @@ class DocumentMetadata:
|
|||
title: str = ""
|
||||
comments: str = ""
|
||||
metadata: list[Triple] = field(default_factory=list)
|
||||
user: str = ""
|
||||
workspace: str = ""
|
||||
tags: list[str] = field(default_factory=list)
|
||||
# Child document support
|
||||
parent_id: str = "" # Empty for top-level docs, set for children
|
||||
|
|
@ -107,7 +107,7 @@ class ProcessingMetadata:
|
|||
document_id: str = ""
|
||||
time: int = 0
|
||||
flow: str = ""
|
||||
user: str = ""
|
||||
workspace: str = ""
|
||||
collection: str = ""
|
||||
tags: list[str] = field(default_factory=list)
|
||||
|
||||
|
|
@ -162,8 +162,8 @@ class LibrarianRequest:
|
|||
# add-document, upload-chunk
|
||||
content: bytes = b""
|
||||
|
||||
# list-documents, list-processing, list-uploads
|
||||
user: str = ""
|
||||
# Workspace scopes every library operation.
|
||||
workspace: str = ""
|
||||
|
||||
# list-documents?, list-processing?
|
||||
collection: str = ""
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ from ..core.topic import queue
|
|||
class GraphEmbeddingsRequest:
|
||||
vector: list[float] = field(default_factory=list)
|
||||
limit: int = 0
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
|
||||
@dataclass
|
||||
|
|
@ -31,7 +30,6 @@ class GraphEmbeddingsResponse:
|
|||
|
||||
@dataclass
|
||||
class TriplesQueryRequest:
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
s: Term | None = None
|
||||
p: Term | None = None
|
||||
|
|
@ -55,7 +53,6 @@ class TriplesQueryResponse:
|
|||
class DocumentEmbeddingsRequest:
|
||||
vector: list[float] = field(default_factory=list)
|
||||
limit: int = 0
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
|
||||
@dataclass
|
||||
|
|
@ -89,7 +86,6 @@ class RowEmbeddingsRequest:
|
|||
"""Request for row embeddings semantic search"""
|
||||
vector: list[float] = field(default_factory=list) # Query vector
|
||||
limit: int = 10 # Max results to return
|
||||
user: str = "" # User/keyspace
|
||||
collection: str = "" # Collection name
|
||||
schema_name: str = "" # Schema name to search within
|
||||
index_name: str | None = None # Optional: filter to specific index
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ from ..core.primitives import Error, Term, Triple
|
|||
@dataclass
|
||||
class GraphRagQuery:
|
||||
query: str = ""
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
entity_limit: int = 0
|
||||
triple_limit: int = 0
|
||||
|
|
@ -40,7 +39,6 @@ class GraphRagResponse:
|
|||
@dataclass
|
||||
class DocumentRagQuery:
|
||||
query: str = ""
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
doc_limit: int = 0
|
||||
streaming: bool = False
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ class GraphQLError:
|
|||
|
||||
@dataclass
|
||||
class RowsQueryRequest:
|
||||
user: str = "" # Cassandra keyspace (follows pattern from TriplesQueryRequest)
|
||||
collection: str = "" # Data collection identifier (required for partition key)
|
||||
query: str = "" # GraphQL query string
|
||||
variables: dict[str, str] = field(default_factory=dict) # GraphQL variables
|
||||
|
|
|
|||
|
|
@ -16,7 +16,6 @@ class SparqlBinding:
|
|||
|
||||
@dataclass
|
||||
class SparqlQueryRequest:
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
query: str = "" # SPARQL query string
|
||||
limit: int = 10000 # Safety limit on results
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ from ..core.primitives import Error
|
|||
@dataclass
|
||||
class StructuredQueryRequest:
|
||||
question: str = ""
|
||||
user: str = "" # Cassandra keyspace identifier
|
||||
collection: str = "" # Data collection identifier
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@ from ..core.primitives import Error
|
|||
@dataclass
|
||||
class ToolServiceRequest:
|
||||
"""Request to a dynamically configured tool service."""
|
||||
# User context for multi-tenancy
|
||||
user: str = ""
|
||||
# Config values (collection, etc.) as JSON
|
||||
config: str = ""
|
||||
# Arguments from LLM as JSON
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue