Merge 2.0 to master (#651)

This commit is contained in:
cybermaggedon 2026-02-28 11:03:14 +00:00 committed by GitHub
parent 3666ece2c5
commit b9d7bf9a8b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
212 changed files with 13940 additions and 6180 deletions

View file

@ -73,6 +73,8 @@ from .async_metrics import AsyncMetrics
# Types
from .types import (
Triple,
Uri,
Literal,
ConfigKey,
ConfigValue,
DocumentMetadata,
@ -99,7 +101,7 @@ from .exceptions import (
LoadError,
LookupError,
NLPQueryError,
ObjectsQueryError,
RowsQueryError,
RequestError,
StructuredQueryError,
UnexpectedError,
@ -133,6 +135,8 @@ __all__ = [
# Types
"Triple",
"Uri",
"Literal",
"ConfigKey",
"ConfigValue",
"DocumentMetadata",
@ -157,7 +161,7 @@ __all__ = [
"LoadError",
"LookupError",
"NLPQueryError",
"ObjectsQueryError",
"RowsQueryError",
"RequestError",
"StructuredQueryError",
"UnexpectedError",

View file

@ -115,15 +115,15 @@ class AsyncBulkClient:
async for raw_message in websocket:
yield json.loads(raw_message)
async def import_objects(self, flow: str, objects: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None:
"""Bulk import objects via WebSocket"""
ws_url = f"{self.url}/api/v1/flow/{flow}/import/objects"
async def import_rows(self, flow: str, rows: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None:
"""Bulk import rows via WebSocket"""
ws_url = f"{self.url}/api/v1/flow/{flow}/import/rows"
if self.token:
ws_url = f"{ws_url}?token={self.token}"
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
async for obj in objects:
await websocket.send(json.dumps(obj))
async for row in rows:
await websocket.send(json.dumps(row))
async def aclose(self) -> None:
"""Close connections"""

View file

@ -612,8 +612,12 @@ class AsyncFlowInstance:
print(f"{entity['name']}: {entity['score']}")
```
"""
# First convert text to embeddings vectors
emb_result = await self.embeddings(text=text)
vectors = emb_result.get("vectors", [])
request_data = {
"text": text,
"vectors": vectors,
"user": user,
"collection": collection,
"limit": limit
@ -704,18 +708,18 @@ class AsyncFlowInstance:
return await self.request("triples", request_data)
async def objects_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
operation_name: Optional[str] = None, **kwargs: Any):
async def rows_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
operation_name: Optional[str] = None, **kwargs: Any):
"""
Execute a GraphQL query on stored objects.
Execute a GraphQL query on stored rows.
Queries structured data objects using GraphQL syntax. Supports complex
Queries structured data rows using GraphQL syntax. Supports complex
queries with variables and named operations.
Args:
query: GraphQL query string
user: User identifier
collection: Collection identifier containing objects
collection: Collection identifier containing rows
variables: Optional GraphQL query variables
operation_name: Optional operation name for multi-operation queries
**kwargs: Additional service-specific parameters
@ -739,7 +743,7 @@ class AsyncFlowInstance:
}
'''
result = await flow.objects_query(
result = await flow.rows_query(
query=query,
user="trustgraph",
collection="users",
@ -761,4 +765,64 @@ class AsyncFlowInstance:
request_data["operationName"] = operation_name
request_data.update(kwargs)
return await self.request("objects", request_data)
return await self.request("rows", request_data)
async def row_embeddings_query(
self, text: str, schema_name: str, user: str = "trustgraph",
collection: str = "default", index_name: Optional[str] = None,
limit: int = 10, **kwargs: Any
):
"""
Query row embeddings for semantic search on structured data.
Performs semantic search over row index embeddings to find rows whose
indexed field values are most similar to the input text. Enables
fuzzy/semantic matching on structured data.
Args:
text: Query text for semantic search
schema_name: Schema name to search within
user: User identifier (default: "trustgraph")
collection: Collection identifier (default: "default")
index_name: Optional index name to filter search to specific index
limit: Maximum number of results to return (default: 10)
**kwargs: Additional service-specific parameters
Returns:
dict: Response containing matches with index_name, index_value,
text, and score
Example:
```python
async_flow = await api.async_flow()
flow = async_flow.id("default")
# Search for customers by name similarity
results = await flow.row_embeddings_query(
text="John Smith",
schema_name="customers",
user="trustgraph",
collection="sales",
limit=5
)
for match in results.get("matches", []):
print(f"{match['index_name']}: {match['index_value']} (score: {match['score']})")
```
"""
# First convert text to embeddings vectors
emb_result = await self.embeddings(text=text)
vectors = emb_result.get("vectors", [])
request_data = {
"vectors": vectors,
"schema_name": schema_name,
"user": user,
"collection": collection,
"limit": limit
}
if index_name:
request_data["index_name"] = index_name
request_data.update(kwargs)
return await self.request("row-embeddings", request_data)

View file

@ -282,8 +282,12 @@ class AsyncSocketFlowInstance:
async def graph_embeddings_query(self, text: str, user: str, collection: str, limit: int = 10, **kwargs):
"""Query graph embeddings for semantic search"""
# First convert text to embeddings vectors
emb_result = await self.embeddings(text=text)
vectors = emb_result.get("vectors", [])
request = {
"text": text,
"vectors": vectors,
"user": user,
"collection": collection,
"limit": limit
@ -316,9 +320,9 @@ class AsyncSocketFlowInstance:
return await self.client._send_request("triples", self.flow_id, request)
async def objects_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
operation_name: Optional[str] = None, **kwargs):
"""GraphQL query"""
async def rows_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
operation_name: Optional[str] = None, **kwargs):
"""GraphQL query against structured rows"""
request = {
"query": query,
"user": user,
@ -330,7 +334,7 @@ class AsyncSocketFlowInstance:
request["operationName"] = operation_name
request.update(kwargs)
return await self.client._send_request("objects", self.flow_id, request)
return await self.client._send_request("rows", self.flow_id, request)
async def mcp_tool(self, name: str, parameters: Dict[str, Any], **kwargs):
"""Execute MCP tool"""
@ -341,3 +345,26 @@ class AsyncSocketFlowInstance:
request.update(kwargs)
return await self.client._send_request("mcp-tool", self.flow_id, request)
async def row_embeddings_query(
self, text: str, schema_name: str, user: str = "trustgraph",
collection: str = "default", index_name: Optional[str] = None,
limit: int = 10, **kwargs
):
"""Query row embeddings for semantic search on structured data"""
# First convert text to embeddings vectors
emb_result = await self.embeddings(text=text)
vectors = emb_result.get("vectors", [])
request = {
"vectors": vectors,
"schema_name": schema_name,
"user": user,
"collection": collection,
"limit": limit
}
if index_name:
request["index_name"] = index_name
request.update(kwargs)
return await self.client._send_request("row-embeddings", self.flow_id, request)

View file

@ -15,6 +15,15 @@ from . types import Triple
from . exceptions import ProtocolException
def _string_to_term(value: str) -> Dict[str, Any]:
"""Convert a string value to Term format for the gateway."""
# Treat URIs as IRI type, otherwise as literal
if value.startswith("http://") or value.startswith("https://") or "://" in value:
return {"t": "i", "i": value}
else:
return {"t": "l", "v": value}
class BulkClient:
"""
Synchronous bulk operations client for import/export.
@ -62,7 +71,12 @@ class BulkClient:
return loop.run_until_complete(coro)
def import_triples(self, flow: str, triples: Iterator[Triple], **kwargs: Any) -> None:
def import_triples(
self, flow: str, triples: Iterator[Triple],
metadata: Optional[Dict[str, Any]] = None,
batch_size: int = 100,
**kwargs: Any
) -> None:
"""
Bulk import RDF triples into a flow.
@ -71,6 +85,8 @@ class BulkClient:
Args:
flow: Flow identifier
triples: Iterator yielding Triple objects
metadata: Metadata dict with id, metadata, user, collection
batch_size: Number of triples per batch (default 100)
**kwargs: Additional parameters (reserved for future use)
Example:
@ -86,23 +102,47 @@ class BulkClient:
# ... more triples
# Import triples
bulk.import_triples(flow="default", triples=triple_generator())
bulk.import_triples(
flow="default",
triples=triple_generator(),
metadata={"id": "doc1", "metadata": [], "user": "user1", "collection": "default"}
)
```
"""
self._run_async(self._import_triples_async(flow, triples))
self._run_async(self._import_triples_async(flow, triples, metadata, batch_size))
async def _import_triples_async(self, flow: str, triples: Iterator[Triple]) -> None:
async def _import_triples_async(
self, flow: str, triples: Iterator[Triple],
metadata: Optional[Dict[str, Any]], batch_size: int
) -> None:
"""Async implementation of triple import"""
ws_url = f"{self.url}/api/v1/flow/{flow}/import/triples"
if self.token:
ws_url = f"{ws_url}?token={self.token}"
if metadata is None:
metadata = {"id": "", "metadata": [], "user": "trustgraph", "collection": "default"}
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
batch = []
for triple in triples:
batch.append({
"s": _string_to_term(triple.s),
"p": _string_to_term(triple.p),
"o": _string_to_term(triple.o)
})
if len(batch) >= batch_size:
message = {
"metadata": metadata,
"triples": batch
}
await websocket.send(json.dumps(message))
batch = []
# Send remaining items
if batch:
message = {
"s": triple.s,
"p": triple.p,
"o": triple.o
"metadata": metadata,
"triples": batch
}
await websocket.send(json.dumps(message))
@ -362,7 +402,12 @@ class BulkClient:
async for raw_message in websocket:
yield json.loads(raw_message)
def import_entity_contexts(self, flow: str, contexts: Iterator[Dict[str, Any]], **kwargs: Any) -> None:
def import_entity_contexts(
self, flow: str, contexts: Iterator[Dict[str, Any]],
metadata: Optional[Dict[str, Any]] = None,
batch_size: int = 100,
**kwargs: Any
) -> None:
"""
Bulk import entity contexts into a flow.
@ -373,6 +418,8 @@ class BulkClient:
Args:
flow: Flow identifier
contexts: Iterator yielding context dictionaries
metadata: Metadata dict with id, metadata, user, collection
batch_size: Number of contexts per batch (default 100)
**kwargs: Additional parameters (reserved for future use)
Example:
@ -381,27 +428,49 @@ class BulkClient:
# Generate entity contexts to import
def context_generator():
yield {"entity": "entity1", "context": "Description of entity1..."}
yield {"entity": "entity2", "context": "Description of entity2..."}
yield {"entity": {"v": "entity1", "e": True}, "context": "Description..."}
yield {"entity": {"v": "entity2", "e": True}, "context": "Description..."}
# ... more contexts
bulk.import_entity_contexts(
flow="default",
contexts=context_generator()
contexts=context_generator(),
metadata={"id": "doc1", "metadata": [], "user": "user1", "collection": "default"}
)
```
"""
self._run_async(self._import_entity_contexts_async(flow, contexts))
self._run_async(self._import_entity_contexts_async(flow, contexts, metadata, batch_size))
async def _import_entity_contexts_async(self, flow: str, contexts: Iterator[Dict[str, Any]]) -> None:
async def _import_entity_contexts_async(
self, flow: str, contexts: Iterator[Dict[str, Any]],
metadata: Optional[Dict[str, Any]], batch_size: int
) -> None:
"""Async implementation of entity contexts import"""
ws_url = f"{self.url}/api/v1/flow/{flow}/import/entity-contexts"
if self.token:
ws_url = f"{ws_url}?token={self.token}"
if metadata is None:
metadata = {"id": "", "metadata": [], "user": "trustgraph", "collection": "default"}
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
batch = []
for context in contexts:
await websocket.send(json.dumps(context))
batch.append(context)
if len(batch) >= batch_size:
message = {
"metadata": metadata,
"entities": batch
}
await websocket.send(json.dumps(message))
batch = []
# Send remaining items
if batch:
message = {
"metadata": metadata,
"entities": batch
}
await websocket.send(json.dumps(message))
def export_entity_contexts(self, flow: str, **kwargs: Any) -> Iterator[Dict[str, Any]]:
"""
@ -461,45 +530,45 @@ class BulkClient:
async for raw_message in websocket:
yield json.loads(raw_message)
def import_objects(self, flow: str, objects: Iterator[Dict[str, Any]], **kwargs: Any) -> None:
def import_rows(self, flow: str, rows: Iterator[Dict[str, Any]], **kwargs: Any) -> None:
"""
Bulk import structured objects into a flow.
Bulk import structured rows into a flow.
Efficiently uploads structured data objects via WebSocket streaming
Efficiently uploads structured data rows via WebSocket streaming
for use in GraphQL queries.
Args:
flow: Flow identifier
objects: Iterator yielding object dictionaries
rows: Iterator yielding row dictionaries
**kwargs: Additional parameters (reserved for future use)
Example:
```python
bulk = api.bulk()
# Generate objects to import
def object_generator():
yield {"id": "obj1", "name": "Object 1", "value": 100}
yield {"id": "obj2", "name": "Object 2", "value": 200}
# ... more objects
# Generate rows to import
def row_generator():
yield {"id": "row1", "name": "Row 1", "value": 100}
yield {"id": "row2", "name": "Row 2", "value": 200}
# ... more rows
bulk.import_objects(
bulk.import_rows(
flow="default",
objects=object_generator()
rows=row_generator()
)
```
"""
self._run_async(self._import_objects_async(flow, objects))
self._run_async(self._import_rows_async(flow, rows))
async def _import_objects_async(self, flow: str, objects: Iterator[Dict[str, Any]]) -> None:
"""Async implementation of objects import"""
ws_url = f"{self.url}/api/v1/flow/{flow}/import/objects"
async def _import_rows_async(self, flow: str, rows: Iterator[Dict[str, Any]]) -> None:
"""Async implementation of rows import"""
ws_url = f"{self.url}/api/v1/flow/{flow}/import/rows"
if self.token:
ws_url = f"{ws_url}?token={self.token}"
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
for obj in objects:
await websocket.send(json.dumps(obj))
for row in rows:
await websocket.send(json.dumps(row))
def close(self) -> None:
"""Close connections"""

View file

@ -71,8 +71,8 @@ class NLPQueryError(TrustGraphException):
pass
class ObjectsQueryError(TrustGraphException):
"""Objects query service error"""
class RowsQueryError(TrustGraphException):
"""Rows query service error"""
pass
@ -103,7 +103,7 @@ ERROR_TYPE_MAPPING = {
"load-error": LoadError,
"lookup-error": LookupError,
"nlp-query-error": NLPQueryError,
"objects-query-error": ObjectsQueryError,
"rows-query-error": RowsQueryError,
"request-error": RequestError,
"structured-query-error": StructuredQueryError,
"unexpected-error": UnexpectedError,

View file

@ -10,12 +10,27 @@ import json
import base64
from .. knowledge import hash, Uri, Literal
from .. schema import IRI, LITERAL
from . types import Triple
from . exceptions import ProtocolException
def to_value(x):
if x["e"]: return Uri(x["v"])
return Literal(x["v"])
"""Convert wire format to Uri or Literal."""
if x.get("t") == IRI:
return Uri(x.get("i", ""))
elif x.get("t") == LITERAL:
return Literal(x.get("v", ""))
# Fallback for any other type
return Literal(x.get("v", x.get("i", "")))
def from_value(v):
"""Convert Uri or Literal to wire format."""
if isinstance(v, Uri):
return {"t": IRI, "i": str(v)}
else:
return {"t": LITERAL, "v": str(v)}
class Flow:
"""
@ -569,9 +584,13 @@ class FlowInstance:
```
"""
# First convert text to embeddings vectors
emb_result = self.embeddings(text=text)
vectors = emb_result.get("vectors", [])
# Query graph embeddings for semantic search
input = {
"text": text,
"vectors": vectors,
"user": user,
"collection": collection,
"limit": limit
@ -582,6 +601,51 @@ class FlowInstance:
input
)
def document_embeddings_query(self, text, user, collection, limit=10):
"""
Query document chunks using semantic similarity.
Finds document chunks whose content is semantically similar to the
input text, using vector embeddings.
Args:
text: Query text for semantic search
user: User/keyspace identifier
collection: Collection identifier
limit: Maximum number of results (default: 10)
Returns:
dict: Query results with similar document chunks
Example:
```python
flow = api.flow().id("default")
results = flow.document_embeddings_query(
text="machine learning algorithms",
user="trustgraph",
collection="research-papers",
limit=5
)
```
"""
# First convert text to embeddings vectors
emb_result = self.embeddings(text=text)
vectors = emb_result.get("vectors", [])
# Query document embeddings for semantic search
input = {
"vectors": vectors,
"user": user,
"collection": collection,
"limit": limit
}
return self.request(
"service/document-embeddings",
input
)
def prompt(self, id, variables):
"""
Execute a prompt template with variable substitution.
@ -751,17 +815,17 @@ class FlowInstance:
if s:
if not isinstance(s, Uri):
raise RuntimeError("s must be Uri")
input["s"] = { "v": str(s), "e": isinstance(s, Uri), }
input["s"] = from_value(s)
if p:
if not isinstance(p, Uri):
raise RuntimeError("p must be Uri")
input["p"] = { "v": str(p), "e": isinstance(p, Uri), }
input["p"] = from_value(p)
if o:
if not isinstance(o, Uri) and not isinstance(o, Literal):
raise RuntimeError("o must be Uri or Literal")
input["o"] = { "v": str(o), "e": isinstance(o, Uri), }
input["o"] = from_value(o)
object = self.request(
"service/triples",
@ -834,9 +898,9 @@ class FlowInstance:
if metadata:
metadata.emit(
lambda t: triples.append({
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
"s": from_value(t["s"]),
"p": from_value(t["p"]),
"o": from_value(t["o"]),
})
)
@ -913,9 +977,9 @@ class FlowInstance:
if metadata:
metadata.emit(
lambda t: triples.append({
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
"s": from_value(t["s"]),
"p": from_value(t["p"]),
"o": from_value(t["o"]),
})
)
@ -937,12 +1001,12 @@ class FlowInstance:
input
)
def objects_query(
def rows_query(
self, query, user="trustgraph", collection="default",
variables=None, operation_name=None
):
"""
Execute a GraphQL query against structured objects in the knowledge graph.
Execute a GraphQL query against structured rows in the knowledge graph.
Queries structured data using GraphQL syntax, allowing complex queries
with filtering, aggregation, and relationship traversal.
@ -974,7 +1038,7 @@ class FlowInstance:
}
}
'''
result = flow.objects_query(
result = flow.rows_query(
query=query,
user="trustgraph",
collection="scientists"
@ -989,7 +1053,7 @@ class FlowInstance:
}
}
'''
result = flow.objects_query(
result = flow.rows_query(
query=query,
variables={"name": "Marie Curie"}
)
@ -1010,7 +1074,7 @@ class FlowInstance:
input["operation_name"] = operation_name
response = self.request(
"service/objects",
"service/rows",
input
)
@ -1233,3 +1297,78 @@ class FlowInstance:
return response["schema-matches"]
def row_embeddings_query(
self, text, schema_name, user="trustgraph", collection="default",
index_name=None, limit=10
):
"""
Query row data using semantic similarity on indexed fields.
Finds rows whose indexed field values are semantically similar to the
input text, using vector embeddings. This enables fuzzy/semantic matching
on structured data.
Args:
text: Query text for semantic search
schema_name: Schema name to search within
user: User/keyspace identifier (default: "trustgraph")
collection: Collection identifier (default: "default")
index_name: Optional index name to filter search to specific index
limit: Maximum number of results (default: 10)
Returns:
dict: Query results with matches containing index_name, index_value,
text, and score
Example:
```python
flow = api.flow().id("default")
# Search for customers by name similarity
results = flow.row_embeddings_query(
text="John Smith",
schema_name="customers",
user="trustgraph",
collection="sales",
limit=5
)
# Filter to specific index
results = flow.row_embeddings_query(
text="machine learning engineer",
schema_name="employees",
index_name="job_title",
limit=10
)
```
"""
# First convert text to embeddings vectors
emb_result = self.embeddings(text=text)
vectors = emb_result.get("vectors", [])
# Query row embeddings for semantic search
input = {
"vectors": vectors,
"schema_name": schema_name,
"user": user,
"collection": collection,
"limit": limit
}
if index_name:
input["index_name"] = index_name
response = self.request(
"service/row-embeddings",
input
)
# Check for system-level error
if "error" in response and response["error"]:
error_type = response["error"].get("type", "unknown")
error_message = response["error"].get("message", "Unknown error")
raise ProtocolException(f"{error_type}: {error_message}")
return response

View file

@ -10,11 +10,18 @@ import json
import base64
from .. knowledge import hash, Uri, Literal
from .. schema import IRI, LITERAL
from . types import Triple
def to_value(x):
if x["e"]: return Uri(x["v"])
return Literal(x["v"])
"""Convert wire format to Uri or Literal."""
if x.get("t") == IRI:
return Uri(x.get("i", ""))
elif x.get("t") == LITERAL:
return Literal(x.get("v", ""))
# Fallback for any other type
return Literal(x.get("v", x.get("i", "")))
class Knowledge:
"""

View file

@ -12,13 +12,28 @@ import logging
from . types import DocumentMetadata, ProcessingMetadata, Triple
from .. knowledge import hash, Uri, Literal
from .. schema import IRI, LITERAL
from . exceptions import *
logger = logging.getLogger(__name__)
def to_value(x):
if x["e"]: return Uri(x["v"])
return Literal(x["v"])
"""Convert wire format to Uri or Literal."""
if x.get("t") == IRI:
return Uri(x.get("i", ""))
elif x.get("t") == LITERAL:
return Literal(x.get("v", ""))
# Fallback for any other type
return Literal(x.get("v", x.get("i", "")))
def from_value(v):
"""Convert Uri or Literal to wire format."""
if isinstance(v, Uri):
return {"t": IRI, "i": str(v)}
else:
return {"t": LITERAL, "v": str(v)}
class Library:
"""
@ -118,18 +133,18 @@ class Library:
if isinstance(metadata, list):
triples = [
{
"s": { "v": t.s, "e": isinstance(t.s, Uri) },
"p": { "v": t.p, "e": isinstance(t.p, Uri) },
"o": { "v": t.o, "e": isinstance(t.o, Uri) }
"s": from_value(t.s),
"p": from_value(t.p),
"o": from_value(t.o),
}
for t in metadata
]
elif hasattr(metadata, "emit"):
metadata.emit(
lambda t: triples.append({
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
"s": from_value(t["s"]),
"p": from_value(t["p"]),
"o": from_value(t["o"]),
})
)
else:
@ -315,9 +330,9 @@ class Library:
"comments": metadata.comments,
"metadata": [
{
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
"s": from_value(t["s"]),
"p": from_value(t["p"]),
"o": from_value(t["o"]),
}
for t in metadata.metadata
],

View file

@ -649,8 +649,12 @@ class SocketFlowInstance:
)
```
"""
# First convert text to embeddings vectors
emb_result = self.embeddings(text=text)
vectors = emb_result.get("vectors", [])
request = {
"text": text,
"vectors": vectors,
"user": user,
"collection": collection,
"limit": limit
@ -659,6 +663,54 @@ class SocketFlowInstance:
return self.client._send_request_sync("graph-embeddings", self.flow_id, request, False)
def document_embeddings_query(
self,
text: str,
user: str,
collection: str,
limit: int = 10,
**kwargs: Any
) -> Dict[str, Any]:
"""
Query document chunks using semantic similarity.
Args:
text: Query text for semantic search
user: User/keyspace identifier
collection: Collection identifier
limit: Maximum number of results (default: 10)
**kwargs: Additional parameters passed to the service
Returns:
dict: Query results with similar document chunks
Example:
```python
socket = api.socket()
flow = socket.flow("default")
results = flow.document_embeddings_query(
text="machine learning algorithms",
user="trustgraph",
collection="research-papers",
limit=5
)
```
"""
# First convert text to embeddings vectors
emb_result = self.embeddings(text=text)
vectors = emb_result.get("vectors", [])
request = {
"vectors": vectors,
"user": user,
"collection": collection,
"limit": limit
}
request.update(kwargs)
return self.client._send_request_sync("document-embeddings", self.flow_id, request, False)
def embeddings(self, text: str, **kwargs: Any) -> Dict[str, Any]:
"""
Generate vector embeddings for text.
@ -737,7 +789,7 @@ class SocketFlowInstance:
return self.client._send_request_sync("triples", self.flow_id, request, False)
def objects_query(
def rows_query(
self,
query: str,
user: str,
@ -747,7 +799,7 @@ class SocketFlowInstance:
**kwargs: Any
) -> Dict[str, Any]:
"""
Execute a GraphQL query against structured objects.
Execute a GraphQL query against structured rows.
Args:
query: GraphQL query string
@ -774,7 +826,7 @@ class SocketFlowInstance:
}
}
'''
result = flow.objects_query(
result = flow.rows_query(
query=query,
user="trustgraph",
collection="scientists"
@ -792,7 +844,7 @@ class SocketFlowInstance:
request["operationName"] = operation_name
request.update(kwargs)
return self.client._send_request_sync("objects", self.flow_id, request, False)
return self.client._send_request_sync("rows", self.flow_id, request, False)
def mcp_tool(
self,
@ -829,3 +881,73 @@ class SocketFlowInstance:
request.update(kwargs)
return self.client._send_request_sync("mcp-tool", self.flow_id, request, False)
def row_embeddings_query(
self,
text: str,
schema_name: str,
user: str = "trustgraph",
collection: str = "default",
index_name: Optional[str] = None,
limit: int = 10,
**kwargs: Any
) -> Dict[str, Any]:
"""
Query row data using semantic similarity on indexed fields.
Finds rows whose indexed field values are semantically similar to the
input text, using vector embeddings. This enables fuzzy/semantic matching
on structured data.
Args:
text: Query text for semantic search
schema_name: Schema name to search within
user: User/keyspace identifier (default: "trustgraph")
collection: Collection identifier (default: "default")
index_name: Optional index name to filter search to specific index
limit: Maximum number of results (default: 10)
**kwargs: Additional parameters passed to the service
Returns:
dict: Query results with matches containing index_name, index_value,
text, and score
Example:
```python
socket = api.socket()
flow = socket.flow("default")
# Search for customers by name similarity
results = flow.row_embeddings_query(
text="John Smith",
schema_name="customers",
user="trustgraph",
collection="sales",
limit=5
)
# Filter to specific index
results = flow.row_embeddings_query(
text="machine learning engineer",
schema_name="employees",
index_name="job_title",
limit=10
)
```
"""
# First convert text to embeddings vectors
emb_result = self.embeddings(text=text)
vectors = emb_result.get("vectors", [])
request = {
"vectors": vectors,
"schema_name": schema_name,
"user": user,
"collection": collection,
"limit": limit
}
if index_name:
request["index_name"] = index_name
request.update(kwargs)
return self.client._send_request_sync("row-embeddings", self.flow_id, request, False)

View file

@ -34,5 +34,6 @@ from . tool_service import ToolService
from . tool_client import ToolClientSpec
from . agent_client import AgentClientSpec
from . structured_query_client import StructuredQueryClientSpec
from . row_embeddings_query_client import RowEmbeddingsQueryClientSpec
from . collection_config_handler import CollectionConfigHandler

View file

@ -7,7 +7,7 @@ embeddings.
import logging
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
from .. schema import Error, Value
from .. schema import Error, Term
from . flow_processor import FlowProcessor
from . consumer_spec import ConsumerSpec
@ -16,7 +16,7 @@ from . producer_spec import ProducerSpec
# Module logger
logger = logging.getLogger(__name__)
default_ident = "ge-query"
default_ident = "doc-embeddings-query"
class DocumentEmbeddingsQueryService(FlowProcessor):

View file

@ -2,15 +2,21 @@
import logging
from . request_response_spec import RequestResponse, RequestResponseSpec
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse, IRI, LITERAL
from .. knowledge import Uri, Literal
# Module logger
logger = logging.getLogger(__name__)
def to_value(x):
if x.is_uri: return Uri(x.value)
return Literal(x.value)
"""Convert schema Term to Uri or Literal."""
if x.type == IRI:
return Uri(x.iri)
elif x.type == LITERAL:
return Literal(x.value)
# Fallback
return Literal(x.value or x.iri)
class GraphEmbeddingsClient(RequestResponse):
async def query(self, vectors, limit=20, user="trustgraph",

View file

@ -7,7 +7,7 @@ embeddings.
import logging
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
from .. schema import Error, Value
from .. schema import Error, Term
from . flow_processor import FlowProcessor
from . consumer_spec import ConsumerSpec
@ -16,7 +16,7 @@ from . producer_spec import ProducerSpec
# Module logger
logger = logging.getLogger(__name__)
default_ident = "ge-query"
default_ident = "graph-embeddings-query"
class GraphEmbeddingsQueryService(FlowProcessor):

View file

@ -0,0 +1,45 @@
from . request_response_spec import RequestResponse, RequestResponseSpec
from .. schema import RowEmbeddingsRequest, RowEmbeddingsResponse
class RowEmbeddingsQueryClient(RequestResponse):
async def row_embeddings_query(
self, vectors, schema_name, user="trustgraph", collection="default",
index_name=None, limit=10, timeout=600
):
request = RowEmbeddingsRequest(
vectors=vectors,
schema_name=schema_name,
user=user,
collection=collection,
limit=limit
)
if index_name:
request.index_name = index_name
resp = await self.request(request, timeout=timeout)
if resp.error:
raise RuntimeError(resp.error.message)
# Return matches as list of dicts
return [
{
"index_name": match.index_name,
"index_value": match.index_value,
"text": match.text,
"score": match.score
}
for match in (resp.matches or [])
]
class RowEmbeddingsQueryClientSpec(RequestResponseSpec):
def __init__(
self, request_name, response_name,
):
super(RowEmbeddingsQueryClientSpec, self).__init__(
request_name = request_name,
request_schema = RowEmbeddingsRequest,
response_name = response_name,
response_schema = RowEmbeddingsResponse,
impl = RowEmbeddingsQueryClient,
)

View file

@ -222,35 +222,50 @@ class Subscriber:
# Store message for later acknowledgment
msg_id = str(uuid.uuid4())
self.pending_acks[msg_id] = msg
try:
id = msg.properties()["id"]
except:
id = None
value = msg.value()
delivery_success = False
has_matching_waiter = False
async with self.lock:
# Deliver to specific subscribers
if id in self.q:
has_matching_waiter = True
delivery_success = await self._deliver_to_queue(
self.q[id], value
)
# Deliver to all subscribers
for q in self.full.values():
has_matching_waiter = True
if await self._deliver_to_queue(q, value):
delivery_success = True
# Acknowledge only on successful delivery
if delivery_success:
self.consumer.acknowledge(msg)
del self.pending_acks[msg_id]
else:
# Negative acknowledge for retry
self.consumer.negative_acknowledge(msg)
del self.pending_acks[msg_id]
# Always acknowledge the message to prevent redelivery storms
# on shared topics. Negative acknowledging orphaned messages
# (no matching waiter) causes immediate redelivery to all
# subscribers, none of whom can handle it either.
self.consumer.acknowledge(msg)
del self.pending_acks[msg_id]
if not delivery_success:
if not has_matching_waiter:
# Message arrived for a waiter that no longer exists
# (likely due to client disconnect or timeout)
logger.debug(
f"Discarding orphaned message with id={id} - "
"no matching waiter"
)
else:
# Delivery failed (e.g., queue full with drop_new strategy)
logger.debug(
f"Message with id={id} dropped due to backpressure"
)
async def _deliver_to_queue(self, queue, value):
"""Deliver message to queue with backpressure handling"""

View file

@ -1,24 +1,34 @@
from . request_response_spec import RequestResponse, RequestResponseSpec
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Value
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL
from .. knowledge import Uri, Literal
class Triple:
def __init__(self, s, p, o):
self.s = s
self.p = p
self.o = o
def to_value(x):
if x.is_uri: return Uri(x.value)
return Literal(x.value)
"""Convert schema Term to Uri or Literal."""
if x.type == IRI:
return Uri(x.iri)
elif x.type == LITERAL:
return Literal(x.value)
# Fallback
return Literal(x.value or x.iri)
def from_value(x):
if x is None: return None
"""Convert Uri or Literal to schema Term."""
if x is None:
return None
if isinstance(x, Uri):
return Value(value=str(x), is_uri=True)
return Term(type=IRI, iri=str(x))
else:
return Value(value=str(x), is_uri=False)
return Term(type=LITERAL, value=str(x))
class TriplesClient(RequestResponse):
async def query(self, s=None, p=None, o=None, limit=20,

View file

@ -7,7 +7,7 @@ null. Output is a list of triples.
import logging
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Error
from .. schema import Value, Triple
from .. schema import Term, Triple
from . flow_processor import FlowProcessor
from . consumer_spec import ConsumerSpec

View file

@ -0,0 +1,60 @@
import _pulsar
from .. schema import RowEmbeddingsRequest, RowEmbeddingsResponse
from .. schema import row_embeddings_request_queue
from .. schema import row_embeddings_response_queue
from . base import BaseClient
# Ugly
ERROR=_pulsar.LoggerLevel.Error
WARN=_pulsar.LoggerLevel.Warn
INFO=_pulsar.LoggerLevel.Info
DEBUG=_pulsar.LoggerLevel.Debug
class RowEmbeddingsClient(BaseClient):
def __init__(
self, log_level=ERROR,
subscriber=None,
input_queue=None,
output_queue=None,
pulsar_host="pulsar://pulsar:6650",
pulsar_api_key=None,
):
if input_queue == None:
input_queue = row_embeddings_request_queue
if output_queue == None:
output_queue = row_embeddings_response_queue
super(RowEmbeddingsClient, self).__init__(
log_level=log_level,
subscriber=subscriber,
input_queue=input_queue,
output_queue=output_queue,
pulsar_host=pulsar_host,
pulsar_api_key=pulsar_api_key,
input_schema=RowEmbeddingsRequest,
output_schema=RowEmbeddingsResponse,
)
def request(
self, vectors, schema_name, user="trustgraph", collection="default",
index_name=None, limit=10, timeout=300
):
kwargs = dict(
user=user, collection=collection,
vectors=vectors, schema_name=schema_name,
limit=limit, timeout=timeout
)
if index_name:
kwargs["index_name"] = index_name
response = self.call(**kwargs)
if response.error:
raise RuntimeError(f"{response.error.type}: {response.error.message}")
return response.matches

View file

@ -2,7 +2,7 @@
import _pulsar
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Value
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL
from .. schema import triples_request_queue
from .. schema import triples_response_queue
from . base import BaseClient
@ -46,9 +46,9 @@ class TriplesQueryClient(BaseClient):
if ent == None: return None
if ent.startswith("http://") or ent.startswith("https://"):
return Value(value=ent, is_uri=True)
return Term(type=IRI, iri=ent)
return Value(value=ent, is_uri=False)
return Term(type=LITERAL, value=ent)
def request(
self,

View file

@ -19,9 +19,10 @@ from .translators.prompt import PromptRequestTranslator, PromptResponseTranslato
from .translators.tool import ToolRequestTranslator, ToolResponseTranslator
from .translators.embeddings_query import (
DocumentEmbeddingsRequestTranslator, DocumentEmbeddingsResponseTranslator,
GraphEmbeddingsRequestTranslator, GraphEmbeddingsResponseTranslator
GraphEmbeddingsRequestTranslator, GraphEmbeddingsResponseTranslator,
RowEmbeddingsRequestTranslator, RowEmbeddingsResponseTranslator
)
from .translators.objects_query import ObjectsQueryRequestTranslator, ObjectsQueryResponseTranslator
from .translators.rows_query import RowsQueryRequestTranslator, RowsQueryResponseTranslator
from .translators.nlp_query import QuestionToStructuredQueryRequestTranslator, QuestionToStructuredQueryResponseTranslator
from .translators.structured_query import StructuredQueryRequestTranslator, StructuredQueryResponseTranslator
from .translators.diagnosis import StructuredDataDiagnosisRequestTranslator, StructuredDataDiagnosisResponseTranslator
@ -107,15 +108,21 @@ TranslatorRegistry.register_service(
)
TranslatorRegistry.register_service(
"graph-embeddings-query",
GraphEmbeddingsRequestTranslator(),
"graph-embeddings-query",
GraphEmbeddingsRequestTranslator(),
GraphEmbeddingsResponseTranslator()
)
TranslatorRegistry.register_service(
"objects-query",
ObjectsQueryRequestTranslator(),
ObjectsQueryResponseTranslator()
"row-embeddings-query",
RowEmbeddingsRequestTranslator(),
RowEmbeddingsResponseTranslator()
)
TranslatorRegistry.register_service(
"rows-query",
RowsQueryRequestTranslator(),
RowsQueryResponseTranslator()
)
TranslatorRegistry.register_service(

View file

@ -1,5 +1,5 @@
from .base import Translator, MessageTranslator
from .primitives import ValueTranslator, TripleTranslator, SubgraphTranslator, RowSchemaTranslator, FieldTranslator, row_schema_translator, field_translator
from .primitives import TermTranslator, ValueTranslator, TripleTranslator, SubgraphTranslator, RowSchemaTranslator, FieldTranslator, row_schema_translator, field_translator
from .metadata import DocumentMetadataTranslator, ProcessingMetadataTranslator
from .agent import AgentRequestTranslator, AgentResponseTranslator
from .embeddings import EmbeddingsRequestTranslator, EmbeddingsResponseTranslator
@ -15,7 +15,8 @@ from .flow import FlowRequestTranslator, FlowResponseTranslator
from .prompt import PromptRequestTranslator, PromptResponseTranslator
from .embeddings_query import (
DocumentEmbeddingsRequestTranslator, DocumentEmbeddingsResponseTranslator,
GraphEmbeddingsRequestTranslator, GraphEmbeddingsResponseTranslator
GraphEmbeddingsRequestTranslator, GraphEmbeddingsResponseTranslator,
RowEmbeddingsRequestTranslator, RowEmbeddingsResponseTranslator
)
from .objects_query import ObjectsQueryRequestTranslator, ObjectsQueryResponseTranslator
from .rows_query import RowsQueryRequestTranslator, RowsQueryResponseTranslator
from .diagnosis import StructuredDataDiagnosisRequestTranslator, StructuredDataDiagnosisResponseTranslator

View file

@ -1,7 +1,8 @@
from typing import Dict, Any, Tuple
from ...schema import (
DocumentEmbeddingsRequest, DocumentEmbeddingsResponse,
GraphEmbeddingsRequest, GraphEmbeddingsResponse
GraphEmbeddingsRequest, GraphEmbeddingsResponse,
RowEmbeddingsRequest, RowEmbeddingsResponse, RowIndexMatch
)
from .base import MessageTranslator
from .primitives import ValueTranslator
@ -92,3 +93,62 @@ class GraphEmbeddingsResponseTranslator(MessageTranslator):
def from_response_with_completion(self, obj: GraphEmbeddingsResponse) -> Tuple[Dict[str, Any], bool]:
"""Returns (response_dict, is_final)"""
return self.from_pulsar(obj), True
class RowEmbeddingsRequestTranslator(MessageTranslator):
"""Translator for RowEmbeddingsRequest schema objects"""
def to_pulsar(self, data: Dict[str, Any]) -> RowEmbeddingsRequest:
return RowEmbeddingsRequest(
vectors=data["vectors"],
limit=int(data.get("limit", 10)),
user=data.get("user", "trustgraph"),
collection=data.get("collection", "default"),
schema_name=data.get("schema_name", ""),
index_name=data.get("index_name")
)
def from_pulsar(self, obj: RowEmbeddingsRequest) -> Dict[str, Any]:
result = {
"vectors": obj.vectors,
"limit": obj.limit,
"user": obj.user,
"collection": obj.collection,
"schema_name": obj.schema_name,
}
if obj.index_name:
result["index_name"] = obj.index_name
return result
class RowEmbeddingsResponseTranslator(MessageTranslator):
"""Translator for RowEmbeddingsResponse schema objects"""
def to_pulsar(self, data: Dict[str, Any]) -> RowEmbeddingsResponse:
raise NotImplementedError("Response translation to Pulsar not typically needed")
def from_pulsar(self, obj: RowEmbeddingsResponse) -> Dict[str, Any]:
result = {}
if obj.error is not None:
result["error"] = {
"type": obj.error.type,
"message": obj.error.message
}
if obj.matches is not None:
result["matches"] = [
{
"index_name": match.index_name,
"index_value": match.index_value,
"text": match.text,
"score": match.score
}
for match in obj.matches
]
return result
def from_response_with_completion(self, obj: RowEmbeddingsResponse) -> Tuple[Dict[str, Any], bool]:
"""Returns (response_dict, is_final)"""
return self.from_pulsar(obj), True

View file

@ -1,37 +1,133 @@
from typing import Dict, Any, List
from ...schema import Value, Triple, RowSchema, Field
from ...schema import Term, Triple, RowSchema, Field, IRI, BLANK, LITERAL, TRIPLE
from .base import Translator
class ValueTranslator(Translator):
"""Translator for Value schema objects"""
def to_pulsar(self, data: Dict[str, Any]) -> Value:
return Value(value=data["v"], is_uri=data["e"])
def from_pulsar(self, obj: Value) -> Dict[str, Any]:
return {"v": obj.value, "e": obj.is_uri}
class TermTranslator(Translator):
"""
Translator for Term schema objects.
Wire format (compact keys):
- "t": type (i/b/l/t)
- "i": iri (for IRI type)
- "d": id (for BLANK type)
- "v": value (for LITERAL type)
- "dt": datatype (for LITERAL type)
- "ln": language (for LITERAL type)
- "tr": triple (for TRIPLE type, nested)
"""
def to_pulsar(self, data: Dict[str, Any]) -> Term:
term_type = data.get("t", "")
if term_type == IRI:
return Term(type=IRI, iri=data.get("i", ""))
elif term_type == BLANK:
return Term(type=BLANK, id=data.get("d", ""))
elif term_type == LITERAL:
return Term(
type=LITERAL,
value=data.get("v", ""),
datatype=data.get("dt", ""),
language=data.get("ln", ""),
)
elif term_type == TRIPLE:
# Nested triple - use TripleTranslator
triple_data = data.get("tr")
if triple_data:
triple = _triple_translator_to_pulsar(triple_data)
else:
triple = None
return Term(type=TRIPLE, triple=triple)
else:
# Unknown or empty type
return Term(type=term_type)
def from_pulsar(self, obj: Term) -> Dict[str, Any]:
result: Dict[str, Any] = {"t": obj.type}
if obj.type == IRI:
result["i"] = obj.iri
elif obj.type == BLANK:
result["d"] = obj.id
elif obj.type == LITERAL:
result["v"] = obj.value
if obj.datatype:
result["dt"] = obj.datatype
if obj.language:
result["ln"] = obj.language
elif obj.type == TRIPLE:
if obj.triple:
result["tr"] = _triple_translator_from_pulsar(obj.triple)
return result
# Module-level helper functions to avoid circular instantiation
def _triple_translator_to_pulsar(data: Dict[str, Any]) -> Triple:
term_translator = TermTranslator()
return Triple(
s=term_translator.to_pulsar(data["s"]) if data.get("s") else None,
p=term_translator.to_pulsar(data["p"]) if data.get("p") else None,
o=term_translator.to_pulsar(data["o"]) if data.get("o") else None,
g=data.get("g"),
)
def _triple_translator_from_pulsar(obj: Triple) -> Dict[str, Any]:
term_translator = TermTranslator()
result: Dict[str, Any] = {}
if obj.s:
result["s"] = term_translator.from_pulsar(obj.s)
if obj.p:
result["p"] = term_translator.from_pulsar(obj.p)
if obj.o:
result["o"] = term_translator.from_pulsar(obj.o)
if obj.g:
result["g"] = obj.g
return result
class TripleTranslator(Translator):
"""Translator for Triple schema objects"""
"""Translator for Triple schema objects (quads with optional graph)"""
def __init__(self):
self.value_translator = ValueTranslator()
self.term_translator = TermTranslator()
def to_pulsar(self, data: Dict[str, Any]) -> Triple:
return Triple(
s=self.value_translator.to_pulsar(data["s"]),
p=self.value_translator.to_pulsar(data["p"]),
o=self.value_translator.to_pulsar(data["o"])
s=self.term_translator.to_pulsar(data["s"]) if data.get("s") else None,
p=self.term_translator.to_pulsar(data["p"]) if data.get("p") else None,
o=self.term_translator.to_pulsar(data["o"]) if data.get("o") else None,
g=data.get("g"),
)
def from_pulsar(self, obj: Triple) -> Dict[str, Any]:
return {
"s": self.value_translator.from_pulsar(obj.s),
"p": self.value_translator.from_pulsar(obj.p),
"o": self.value_translator.from_pulsar(obj.o)
}
result: Dict[str, Any] = {}
if obj.s:
result["s"] = self.term_translator.from_pulsar(obj.s)
if obj.p:
result["p"] = self.term_translator.from_pulsar(obj.p)
if obj.o:
result["o"] = self.term_translator.from_pulsar(obj.o)
if obj.g:
result["g"] = obj.g
return result
# Backward compatibility alias
ValueTranslator = TermTranslator
class SubgraphTranslator(Translator):

View file

@ -1,44 +1,44 @@
from typing import Dict, Any, Tuple, Optional
from ...schema import ObjectsQueryRequest, ObjectsQueryResponse
from ...schema import RowsQueryRequest, RowsQueryResponse
from .base import MessageTranslator
import json
class ObjectsQueryRequestTranslator(MessageTranslator):
"""Translator for ObjectsQueryRequest schema objects"""
def to_pulsar(self, data: Dict[str, Any]) -> ObjectsQueryRequest:
return ObjectsQueryRequest(
class RowsQueryRequestTranslator(MessageTranslator):
"""Translator for RowsQueryRequest schema objects"""
def to_pulsar(self, data: Dict[str, Any]) -> RowsQueryRequest:
return RowsQueryRequest(
user=data.get("user", "trustgraph"),
collection=data.get("collection", "default"),
query=data.get("query", ""),
variables=data.get("variables", {}),
operation_name=data.get("operation_name", None)
)
def from_pulsar(self, obj: ObjectsQueryRequest) -> Dict[str, Any]:
def from_pulsar(self, obj: RowsQueryRequest) -> Dict[str, Any]:
result = {
"user": obj.user,
"collection": obj.collection,
"query": obj.query,
"variables": dict(obj.variables) if obj.variables else {}
}
if obj.operation_name:
result["operation_name"] = obj.operation_name
return result
class ObjectsQueryResponseTranslator(MessageTranslator):
"""Translator for ObjectsQueryResponse schema objects"""
def to_pulsar(self, data: Dict[str, Any]) -> ObjectsQueryResponse:
class RowsQueryResponseTranslator(MessageTranslator):
"""Translator for RowsQueryResponse schema objects"""
def to_pulsar(self, data: Dict[str, Any]) -> RowsQueryResponse:
raise NotImplementedError("Response translation to Pulsar not typically needed")
def from_pulsar(self, obj: ObjectsQueryResponse) -> Dict[str, Any]:
def from_pulsar(self, obj: RowsQueryResponse) -> Dict[str, Any]:
result = {}
# Handle GraphQL response data
if obj.data:
try:
@ -47,7 +47,7 @@ class ObjectsQueryResponseTranslator(MessageTranslator):
result["data"] = obj.data
else:
result["data"] = None
# Handle GraphQL errors
if obj.errors:
result["errors"] = []
@ -60,20 +60,20 @@ class ObjectsQueryResponseTranslator(MessageTranslator):
if error.extensions:
error_dict["extensions"] = dict(error.extensions)
result["errors"].append(error_dict)
# Handle extensions
if obj.extensions:
result["extensions"] = dict(obj.extensions)
# Handle system-level error
if obj.error:
result["error"] = {
"type": obj.error.type,
"message": obj.error.message
}
return result
def from_response_with_completion(self, obj: ObjectsQueryResponse) -> Tuple[Dict[str, Any], bool]:
def from_response_with_completion(self, obj: RowsQueryResponse) -> Tuple[Dict[str, Any], bool]:
"""Returns (response_dict, is_final)"""
return self.from_pulsar(obj), True
return self.from_pulsar(obj), True

View file

@ -14,11 +14,13 @@ class TriplesQueryRequestTranslator(MessageTranslator):
s = self.value_translator.to_pulsar(data["s"]) if "s" in data else None
p = self.value_translator.to_pulsar(data["p"]) if "p" in data else None
o = self.value_translator.to_pulsar(data["o"]) if "o" in data else None
g = data.get("g") # None=default graph, "*"=all graphs
return TriplesQueryRequest(
s=s,
p=p,
o=o,
g=g,
limit=int(data.get("limit", 10000)),
user=data.get("user", "trustgraph"),
collection=data.get("collection", "default")
@ -30,14 +32,16 @@ class TriplesQueryRequestTranslator(MessageTranslator):
"user": obj.user,
"collection": obj.collection
}
if obj.s:
result["s"] = self.value_translator.from_pulsar(obj.s)
if obj.p:
result["p"] = self.value_translator.from_pulsar(obj.p)
if obj.o:
result["o"] = self.value_translator.from_pulsar(obj.o)
if obj.g is not None:
result["g"] = obj.g
return result

View file

@ -1,22 +1,57 @@
from dataclasses import dataclass, field
# Term type constants
IRI = "i" # IRI/URI node
BLANK = "b" # Blank node
LITERAL = "l" # Literal value
TRIPLE = "t" # Quoted triple (RDF-star)
@dataclass
class Error:
type: str = ""
message: str = ""
@dataclass
class Value:
class Term:
"""
RDF Term - can represent an IRI, blank node, literal, or quoted triple.
The 'type' field determines which other fields are relevant:
- IRI: use 'iri' field
- BLANK: use 'id' field
- LITERAL: use 'value', 'datatype', 'language' fields
- TRIPLE: use 'triple' field
"""
type: str = "" # One of: IRI, BLANK, LITERAL, TRIPLE
# For IRI terms (type == IRI)
iri: str = ""
# For blank nodes (type == BLANK)
id: str = ""
# For literals (type == LITERAL)
value: str = ""
is_uri: bool = False
type: str = ""
datatype: str = "" # XSD datatype URI (mutually exclusive with language)
language: str = "" # Language tag (mutually exclusive with datatype)
# For quoted triples (type == TRIPLE)
triple: "Triple | None" = None
@dataclass
class Triple:
s: Value | None = None
p: Value | None = None
o: Value | None = None
"""
RDF Triple / Quad.
The optional 'g' field specifies the named graph (None = default graph).
"""
s: Term | None = None # Subject
p: Term | None = None # Predicate
o: Term | None = None # Object
g: str | None = None # Graph name (IRI), None = default graph
@dataclass
class Field:

View file

@ -1,7 +1,7 @@
from dataclasses import dataclass, field
from ..core.metadata import Metadata
from ..core.primitives import Value, RowSchema
from ..core.primitives import Term, RowSchema
from ..core.topic import topic
############################################################################
@ -10,7 +10,7 @@ from ..core.topic import topic
@dataclass
class EntityEmbeddings:
entity: Value | None = None
entity: Term | None = None
vectors: list[list[float]] = field(default_factory=list)
# This is a 'batching' mechanism for the above data
@ -60,3 +60,23 @@ class StructuredObjectEmbedding:
field_embeddings: dict[str, list[float]] = field(default_factory=dict) # Per-field embeddings
############################################################################
# Row embeddings are embeddings associated with indexed field values
# in structured row data. Each index gets embedded separately.
@dataclass
class RowIndexEmbedding:
"""Single row's embedding for one index"""
index_name: str = "" # The indexed field name(s)
index_value: list[str] = field(default_factory=list) # The field value(s)
text: str = "" # Text that was embedded
vectors: list[list[float]] = field(default_factory=list)
@dataclass
class RowEmbeddings:
"""Batched row embeddings for a schema"""
metadata: Metadata | None = None
schema_name: str = ""
embeddings: list[RowIndexEmbedding] = field(default_factory=list)
############################################################################

View file

@ -1,6 +1,6 @@
from dataclasses import dataclass, field
from ..core.primitives import Value, Triple
from ..core.primitives import Term, Triple
from ..core.metadata import Metadata
from ..core.topic import topic
@ -10,7 +10,7 @@ from ..core.topic import topic
@dataclass
class EntityContext:
entity: Value | None = None
entity: Term | None = None
context: str = ""
# This is a 'batching' mechanism for the above data

View file

@ -9,7 +9,7 @@ from .library import *
from .lookup import *
from .nlp_query import *
from .structured_query import *
from .objects_query import *
from .rows_query import *
from .diagnosis import *
from .collection import *
from .storage import *

View file

@ -1,6 +1,6 @@
from dataclasses import dataclass
from ..core.primitives import Error, Value, Triple
from ..core.primitives import Error, Term, Triple
from ..core.topic import topic
from ..core.metadata import Metadata

View file

@ -1,6 +1,6 @@
from dataclasses import dataclass, field
from ..core.primitives import Error, Value, Triple
from ..core.primitives import Error, Term, Triple
from ..core.topic import topic
############################################################################
@ -17,7 +17,7 @@ class GraphEmbeddingsRequest:
@dataclass
class GraphEmbeddingsResponse:
error: Error | None = None
entities: list[Value] = field(default_factory=list)
entities: list[Term] = field(default_factory=list)
############################################################################
@ -27,9 +27,10 @@ class GraphEmbeddingsResponse:
class TriplesQueryRequest:
user: str = ""
collection: str = ""
s: Value | None = None
p: Value | None = None
o: Value | None = None
s: Term | None = None
p: Term | None = None
o: Term | None = None
g: str | None = None # Graph IRI. None=default graph, "*"=all graphs
limit: int = 0
@dataclass
@ -58,4 +59,39 @@ document_embeddings_request_queue = topic(
)
document_embeddings_response_queue = topic(
"document-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
)
############################################################################
# Row embeddings query - for semantic/fuzzy matching on row index values
@dataclass
class RowIndexMatch:
"""A single matching row index from a semantic search"""
index_name: str = "" # The indexed field(s)
index_value: list[str] = field(default_factory=list) # The index values
text: str = "" # The text that was embedded
score: float = 0.0 # Similarity score
@dataclass
class RowEmbeddingsRequest:
"""Request for row embeddings semantic search"""
vectors: list[list[float]] = field(default_factory=list) # Query vectors
limit: int = 10 # Max results to return
user: str = "" # User/keyspace
collection: str = "" # Collection name
schema_name: str = "" # Schema name to search within
index_name: str | None = None # Optional: filter to specific index
@dataclass
class RowEmbeddingsResponse:
"""Response from row embeddings semantic search"""
error: Error | None = None
matches: list[RowIndexMatch] = field(default_factory=list)
row_embeddings_request_queue = topic(
"row-embeddings-request", qos='q0', tenant='trustgraph', namespace='flow'
)
row_embeddings_response_queue = topic(
"row-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
)

View file

@ -1,6 +1,6 @@
from dataclasses import dataclass
from ..core.topic import topic
from ..core.primitives import Error, Value
from ..core.primitives import Error, Term
############################################################################

View file

@ -6,7 +6,7 @@ from ..core.topic import topic
############################################################################
# Objects Query Service - executes GraphQL queries against structured data
# Rows Query Service - executes GraphQL queries against structured data
@dataclass
class GraphQLError:
@ -15,7 +15,7 @@ class GraphQLError:
extensions: dict[str, str] = field(default_factory=dict) # Additional error metadata
@dataclass
class ObjectsQueryRequest:
class RowsQueryRequest:
user: str = "" # Cassandra keyspace (follows pattern from TriplesQueryRequest)
collection: str = "" # Data collection identifier (required for partition key)
query: str = "" # GraphQL query string
@ -23,7 +23,7 @@ class ObjectsQueryRequest:
operation_name: Optional[str] = None # Operation to execute for multi-operation documents
@dataclass
class ObjectsQueryResponse:
class RowsQueryResponse:
error: Error | None = None # System-level error (connection, timeout, etc.)
data: str = "" # JSON-encoded GraphQL response data
errors: list[GraphQLError] = field(default_factory=list) # GraphQL field-level errors