mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 00:46:22 +02:00
Merge 2.0 to master (#651)
This commit is contained in:
parent
3666ece2c5
commit
b9d7bf9a8b
212 changed files with 13940 additions and 6180 deletions
|
|
@ -73,6 +73,8 @@ from .async_metrics import AsyncMetrics
|
|||
# Types
|
||||
from .types import (
|
||||
Triple,
|
||||
Uri,
|
||||
Literal,
|
||||
ConfigKey,
|
||||
ConfigValue,
|
||||
DocumentMetadata,
|
||||
|
|
@ -99,7 +101,7 @@ from .exceptions import (
|
|||
LoadError,
|
||||
LookupError,
|
||||
NLPQueryError,
|
||||
ObjectsQueryError,
|
||||
RowsQueryError,
|
||||
RequestError,
|
||||
StructuredQueryError,
|
||||
UnexpectedError,
|
||||
|
|
@ -133,6 +135,8 @@ __all__ = [
|
|||
|
||||
# Types
|
||||
"Triple",
|
||||
"Uri",
|
||||
"Literal",
|
||||
"ConfigKey",
|
||||
"ConfigValue",
|
||||
"DocumentMetadata",
|
||||
|
|
@ -157,7 +161,7 @@ __all__ = [
|
|||
"LoadError",
|
||||
"LookupError",
|
||||
"NLPQueryError",
|
||||
"ObjectsQueryError",
|
||||
"RowsQueryError",
|
||||
"RequestError",
|
||||
"StructuredQueryError",
|
||||
"UnexpectedError",
|
||||
|
|
|
|||
|
|
@ -115,15 +115,15 @@ class AsyncBulkClient:
|
|||
async for raw_message in websocket:
|
||||
yield json.loads(raw_message)
|
||||
|
||||
async def import_objects(self, flow: str, objects: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None:
|
||||
"""Bulk import objects via WebSocket"""
|
||||
ws_url = f"{self.url}/api/v1/flow/{flow}/import/objects"
|
||||
async def import_rows(self, flow: str, rows: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None:
|
||||
"""Bulk import rows via WebSocket"""
|
||||
ws_url = f"{self.url}/api/v1/flow/{flow}/import/rows"
|
||||
if self.token:
|
||||
ws_url = f"{ws_url}?token={self.token}"
|
||||
|
||||
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
|
||||
async for obj in objects:
|
||||
await websocket.send(json.dumps(obj))
|
||||
async for row in rows:
|
||||
await websocket.send(json.dumps(row))
|
||||
|
||||
async def aclose(self) -> None:
|
||||
"""Close connections"""
|
||||
|
|
|
|||
|
|
@ -612,8 +612,12 @@ class AsyncFlowInstance:
|
|||
print(f"{entity['name']}: {entity['score']}")
|
||||
```
|
||||
"""
|
||||
# First convert text to embeddings vectors
|
||||
emb_result = await self.embeddings(text=text)
|
||||
vectors = emb_result.get("vectors", [])
|
||||
|
||||
request_data = {
|
||||
"text": text,
|
||||
"vectors": vectors,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
|
|
@ -704,18 +708,18 @@ class AsyncFlowInstance:
|
|||
|
||||
return await self.request("triples", request_data)
|
||||
|
||||
async def objects_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
|
||||
operation_name: Optional[str] = None, **kwargs: Any):
|
||||
async def rows_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
|
||||
operation_name: Optional[str] = None, **kwargs: Any):
|
||||
"""
|
||||
Execute a GraphQL query on stored objects.
|
||||
Execute a GraphQL query on stored rows.
|
||||
|
||||
Queries structured data objects using GraphQL syntax. Supports complex
|
||||
Queries structured data rows using GraphQL syntax. Supports complex
|
||||
queries with variables and named operations.
|
||||
|
||||
Args:
|
||||
query: GraphQL query string
|
||||
user: User identifier
|
||||
collection: Collection identifier containing objects
|
||||
collection: Collection identifier containing rows
|
||||
variables: Optional GraphQL query variables
|
||||
operation_name: Optional operation name for multi-operation queries
|
||||
**kwargs: Additional service-specific parameters
|
||||
|
|
@ -739,7 +743,7 @@ class AsyncFlowInstance:
|
|||
}
|
||||
'''
|
||||
|
||||
result = await flow.objects_query(
|
||||
result = await flow.rows_query(
|
||||
query=query,
|
||||
user="trustgraph",
|
||||
collection="users",
|
||||
|
|
@ -761,4 +765,64 @@ class AsyncFlowInstance:
|
|||
request_data["operationName"] = operation_name
|
||||
request_data.update(kwargs)
|
||||
|
||||
return await self.request("objects", request_data)
|
||||
return await self.request("rows", request_data)
|
||||
|
||||
async def row_embeddings_query(
|
||||
self, text: str, schema_name: str, user: str = "trustgraph",
|
||||
collection: str = "default", index_name: Optional[str] = None,
|
||||
limit: int = 10, **kwargs: Any
|
||||
):
|
||||
"""
|
||||
Query row embeddings for semantic search on structured data.
|
||||
|
||||
Performs semantic search over row index embeddings to find rows whose
|
||||
indexed field values are most similar to the input text. Enables
|
||||
fuzzy/semantic matching on structured data.
|
||||
|
||||
Args:
|
||||
text: Query text for semantic search
|
||||
schema_name: Schema name to search within
|
||||
user: User identifier (default: "trustgraph")
|
||||
collection: Collection identifier (default: "default")
|
||||
index_name: Optional index name to filter search to specific index
|
||||
limit: Maximum number of results to return (default: 10)
|
||||
**kwargs: Additional service-specific parameters
|
||||
|
||||
Returns:
|
||||
dict: Response containing matches with index_name, index_value,
|
||||
text, and score
|
||||
|
||||
Example:
|
||||
```python
|
||||
async_flow = await api.async_flow()
|
||||
flow = async_flow.id("default")
|
||||
|
||||
# Search for customers by name similarity
|
||||
results = await flow.row_embeddings_query(
|
||||
text="John Smith",
|
||||
schema_name="customers",
|
||||
user="trustgraph",
|
||||
collection="sales",
|
||||
limit=5
|
||||
)
|
||||
|
||||
for match in results.get("matches", []):
|
||||
print(f"{match['index_name']}: {match['index_value']} (score: {match['score']})")
|
||||
```
|
||||
"""
|
||||
# First convert text to embeddings vectors
|
||||
emb_result = await self.embeddings(text=text)
|
||||
vectors = emb_result.get("vectors", [])
|
||||
|
||||
request_data = {
|
||||
"vectors": vectors,
|
||||
"schema_name": schema_name,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
if index_name:
|
||||
request_data["index_name"] = index_name
|
||||
request_data.update(kwargs)
|
||||
|
||||
return await self.request("row-embeddings", request_data)
|
||||
|
|
|
|||
|
|
@ -282,8 +282,12 @@ class AsyncSocketFlowInstance:
|
|||
|
||||
async def graph_embeddings_query(self, text: str, user: str, collection: str, limit: int = 10, **kwargs):
|
||||
"""Query graph embeddings for semantic search"""
|
||||
# First convert text to embeddings vectors
|
||||
emb_result = await self.embeddings(text=text)
|
||||
vectors = emb_result.get("vectors", [])
|
||||
|
||||
request = {
|
||||
"text": text,
|
||||
"vectors": vectors,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
|
|
@ -316,9 +320,9 @@ class AsyncSocketFlowInstance:
|
|||
|
||||
return await self.client._send_request("triples", self.flow_id, request)
|
||||
|
||||
async def objects_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
|
||||
operation_name: Optional[str] = None, **kwargs):
|
||||
"""GraphQL query"""
|
||||
async def rows_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
|
||||
operation_name: Optional[str] = None, **kwargs):
|
||||
"""GraphQL query against structured rows"""
|
||||
request = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
|
|
@ -330,7 +334,7 @@ class AsyncSocketFlowInstance:
|
|||
request["operationName"] = operation_name
|
||||
request.update(kwargs)
|
||||
|
||||
return await self.client._send_request("objects", self.flow_id, request)
|
||||
return await self.client._send_request("rows", self.flow_id, request)
|
||||
|
||||
async def mcp_tool(self, name: str, parameters: Dict[str, Any], **kwargs):
|
||||
"""Execute MCP tool"""
|
||||
|
|
@ -341,3 +345,26 @@ class AsyncSocketFlowInstance:
|
|||
request.update(kwargs)
|
||||
|
||||
return await self.client._send_request("mcp-tool", self.flow_id, request)
|
||||
|
||||
async def row_embeddings_query(
|
||||
self, text: str, schema_name: str, user: str = "trustgraph",
|
||||
collection: str = "default", index_name: Optional[str] = None,
|
||||
limit: int = 10, **kwargs
|
||||
):
|
||||
"""Query row embeddings for semantic search on structured data"""
|
||||
# First convert text to embeddings vectors
|
||||
emb_result = await self.embeddings(text=text)
|
||||
vectors = emb_result.get("vectors", [])
|
||||
|
||||
request = {
|
||||
"vectors": vectors,
|
||||
"schema_name": schema_name,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
if index_name:
|
||||
request["index_name"] = index_name
|
||||
request.update(kwargs)
|
||||
|
||||
return await self.client._send_request("row-embeddings", self.flow_id, request)
|
||||
|
|
|
|||
|
|
@ -15,6 +15,15 @@ from . types import Triple
|
|||
from . exceptions import ProtocolException
|
||||
|
||||
|
||||
def _string_to_term(value: str) -> Dict[str, Any]:
|
||||
"""Convert a string value to Term format for the gateway."""
|
||||
# Treat URIs as IRI type, otherwise as literal
|
||||
if value.startswith("http://") or value.startswith("https://") or "://" in value:
|
||||
return {"t": "i", "i": value}
|
||||
else:
|
||||
return {"t": "l", "v": value}
|
||||
|
||||
|
||||
class BulkClient:
|
||||
"""
|
||||
Synchronous bulk operations client for import/export.
|
||||
|
|
@ -62,7 +71,12 @@ class BulkClient:
|
|||
|
||||
return loop.run_until_complete(coro)
|
||||
|
||||
def import_triples(self, flow: str, triples: Iterator[Triple], **kwargs: Any) -> None:
|
||||
def import_triples(
|
||||
self, flow: str, triples: Iterator[Triple],
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
batch_size: int = 100,
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
"""
|
||||
Bulk import RDF triples into a flow.
|
||||
|
||||
|
|
@ -71,6 +85,8 @@ class BulkClient:
|
|||
Args:
|
||||
flow: Flow identifier
|
||||
triples: Iterator yielding Triple objects
|
||||
metadata: Metadata dict with id, metadata, user, collection
|
||||
batch_size: Number of triples per batch (default 100)
|
||||
**kwargs: Additional parameters (reserved for future use)
|
||||
|
||||
Example:
|
||||
|
|
@ -86,23 +102,47 @@ class BulkClient:
|
|||
# ... more triples
|
||||
|
||||
# Import triples
|
||||
bulk.import_triples(flow="default", triples=triple_generator())
|
||||
bulk.import_triples(
|
||||
flow="default",
|
||||
triples=triple_generator(),
|
||||
metadata={"id": "doc1", "metadata": [], "user": "user1", "collection": "default"}
|
||||
)
|
||||
```
|
||||
"""
|
||||
self._run_async(self._import_triples_async(flow, triples))
|
||||
self._run_async(self._import_triples_async(flow, triples, metadata, batch_size))
|
||||
|
||||
async def _import_triples_async(self, flow: str, triples: Iterator[Triple]) -> None:
|
||||
async def _import_triples_async(
|
||||
self, flow: str, triples: Iterator[Triple],
|
||||
metadata: Optional[Dict[str, Any]], batch_size: int
|
||||
) -> None:
|
||||
"""Async implementation of triple import"""
|
||||
ws_url = f"{self.url}/api/v1/flow/{flow}/import/triples"
|
||||
if self.token:
|
||||
ws_url = f"{ws_url}?token={self.token}"
|
||||
|
||||
if metadata is None:
|
||||
metadata = {"id": "", "metadata": [], "user": "trustgraph", "collection": "default"}
|
||||
|
||||
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
|
||||
batch = []
|
||||
for triple in triples:
|
||||
batch.append({
|
||||
"s": _string_to_term(triple.s),
|
||||
"p": _string_to_term(triple.p),
|
||||
"o": _string_to_term(triple.o)
|
||||
})
|
||||
if len(batch) >= batch_size:
|
||||
message = {
|
||||
"metadata": metadata,
|
||||
"triples": batch
|
||||
}
|
||||
await websocket.send(json.dumps(message))
|
||||
batch = []
|
||||
# Send remaining items
|
||||
if batch:
|
||||
message = {
|
||||
"s": triple.s,
|
||||
"p": triple.p,
|
||||
"o": triple.o
|
||||
"metadata": metadata,
|
||||
"triples": batch
|
||||
}
|
||||
await websocket.send(json.dumps(message))
|
||||
|
||||
|
|
@ -362,7 +402,12 @@ class BulkClient:
|
|||
async for raw_message in websocket:
|
||||
yield json.loads(raw_message)
|
||||
|
||||
def import_entity_contexts(self, flow: str, contexts: Iterator[Dict[str, Any]], **kwargs: Any) -> None:
|
||||
def import_entity_contexts(
|
||||
self, flow: str, contexts: Iterator[Dict[str, Any]],
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
batch_size: int = 100,
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
"""
|
||||
Bulk import entity contexts into a flow.
|
||||
|
||||
|
|
@ -373,6 +418,8 @@ class BulkClient:
|
|||
Args:
|
||||
flow: Flow identifier
|
||||
contexts: Iterator yielding context dictionaries
|
||||
metadata: Metadata dict with id, metadata, user, collection
|
||||
batch_size: Number of contexts per batch (default 100)
|
||||
**kwargs: Additional parameters (reserved for future use)
|
||||
|
||||
Example:
|
||||
|
|
@ -381,27 +428,49 @@ class BulkClient:
|
|||
|
||||
# Generate entity contexts to import
|
||||
def context_generator():
|
||||
yield {"entity": "entity1", "context": "Description of entity1..."}
|
||||
yield {"entity": "entity2", "context": "Description of entity2..."}
|
||||
yield {"entity": {"v": "entity1", "e": True}, "context": "Description..."}
|
||||
yield {"entity": {"v": "entity2", "e": True}, "context": "Description..."}
|
||||
# ... more contexts
|
||||
|
||||
bulk.import_entity_contexts(
|
||||
flow="default",
|
||||
contexts=context_generator()
|
||||
contexts=context_generator(),
|
||||
metadata={"id": "doc1", "metadata": [], "user": "user1", "collection": "default"}
|
||||
)
|
||||
```
|
||||
"""
|
||||
self._run_async(self._import_entity_contexts_async(flow, contexts))
|
||||
self._run_async(self._import_entity_contexts_async(flow, contexts, metadata, batch_size))
|
||||
|
||||
async def _import_entity_contexts_async(self, flow: str, contexts: Iterator[Dict[str, Any]]) -> None:
|
||||
async def _import_entity_contexts_async(
|
||||
self, flow: str, contexts: Iterator[Dict[str, Any]],
|
||||
metadata: Optional[Dict[str, Any]], batch_size: int
|
||||
) -> None:
|
||||
"""Async implementation of entity contexts import"""
|
||||
ws_url = f"{self.url}/api/v1/flow/{flow}/import/entity-contexts"
|
||||
if self.token:
|
||||
ws_url = f"{ws_url}?token={self.token}"
|
||||
|
||||
if metadata is None:
|
||||
metadata = {"id": "", "metadata": [], "user": "trustgraph", "collection": "default"}
|
||||
|
||||
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
|
||||
batch = []
|
||||
for context in contexts:
|
||||
await websocket.send(json.dumps(context))
|
||||
batch.append(context)
|
||||
if len(batch) >= batch_size:
|
||||
message = {
|
||||
"metadata": metadata,
|
||||
"entities": batch
|
||||
}
|
||||
await websocket.send(json.dumps(message))
|
||||
batch = []
|
||||
# Send remaining items
|
||||
if batch:
|
||||
message = {
|
||||
"metadata": metadata,
|
||||
"entities": batch
|
||||
}
|
||||
await websocket.send(json.dumps(message))
|
||||
|
||||
def export_entity_contexts(self, flow: str, **kwargs: Any) -> Iterator[Dict[str, Any]]:
|
||||
"""
|
||||
|
|
@ -461,45 +530,45 @@ class BulkClient:
|
|||
async for raw_message in websocket:
|
||||
yield json.loads(raw_message)
|
||||
|
||||
def import_objects(self, flow: str, objects: Iterator[Dict[str, Any]], **kwargs: Any) -> None:
|
||||
def import_rows(self, flow: str, rows: Iterator[Dict[str, Any]], **kwargs: Any) -> None:
|
||||
"""
|
||||
Bulk import structured objects into a flow.
|
||||
Bulk import structured rows into a flow.
|
||||
|
||||
Efficiently uploads structured data objects via WebSocket streaming
|
||||
Efficiently uploads structured data rows via WebSocket streaming
|
||||
for use in GraphQL queries.
|
||||
|
||||
Args:
|
||||
flow: Flow identifier
|
||||
objects: Iterator yielding object dictionaries
|
||||
rows: Iterator yielding row dictionaries
|
||||
**kwargs: Additional parameters (reserved for future use)
|
||||
|
||||
Example:
|
||||
```python
|
||||
bulk = api.bulk()
|
||||
|
||||
# Generate objects to import
|
||||
def object_generator():
|
||||
yield {"id": "obj1", "name": "Object 1", "value": 100}
|
||||
yield {"id": "obj2", "name": "Object 2", "value": 200}
|
||||
# ... more objects
|
||||
# Generate rows to import
|
||||
def row_generator():
|
||||
yield {"id": "row1", "name": "Row 1", "value": 100}
|
||||
yield {"id": "row2", "name": "Row 2", "value": 200}
|
||||
# ... more rows
|
||||
|
||||
bulk.import_objects(
|
||||
bulk.import_rows(
|
||||
flow="default",
|
||||
objects=object_generator()
|
||||
rows=row_generator()
|
||||
)
|
||||
```
|
||||
"""
|
||||
self._run_async(self._import_objects_async(flow, objects))
|
||||
self._run_async(self._import_rows_async(flow, rows))
|
||||
|
||||
async def _import_objects_async(self, flow: str, objects: Iterator[Dict[str, Any]]) -> None:
|
||||
"""Async implementation of objects import"""
|
||||
ws_url = f"{self.url}/api/v1/flow/{flow}/import/objects"
|
||||
async def _import_rows_async(self, flow: str, rows: Iterator[Dict[str, Any]]) -> None:
|
||||
"""Async implementation of rows import"""
|
||||
ws_url = f"{self.url}/api/v1/flow/{flow}/import/rows"
|
||||
if self.token:
|
||||
ws_url = f"{ws_url}?token={self.token}"
|
||||
|
||||
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
|
||||
for obj in objects:
|
||||
await websocket.send(json.dumps(obj))
|
||||
for row in rows:
|
||||
await websocket.send(json.dumps(row))
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close connections"""
|
||||
|
|
|
|||
|
|
@ -71,8 +71,8 @@ class NLPQueryError(TrustGraphException):
|
|||
pass
|
||||
|
||||
|
||||
class ObjectsQueryError(TrustGraphException):
|
||||
"""Objects query service error"""
|
||||
class RowsQueryError(TrustGraphException):
|
||||
"""Rows query service error"""
|
||||
pass
|
||||
|
||||
|
||||
|
|
@ -103,7 +103,7 @@ ERROR_TYPE_MAPPING = {
|
|||
"load-error": LoadError,
|
||||
"lookup-error": LookupError,
|
||||
"nlp-query-error": NLPQueryError,
|
||||
"objects-query-error": ObjectsQueryError,
|
||||
"rows-query-error": RowsQueryError,
|
||||
"request-error": RequestError,
|
||||
"structured-query-error": StructuredQueryError,
|
||||
"unexpected-error": UnexpectedError,
|
||||
|
|
|
|||
|
|
@ -10,12 +10,27 @@ import json
|
|||
import base64
|
||||
|
||||
from .. knowledge import hash, Uri, Literal
|
||||
from .. schema import IRI, LITERAL
|
||||
from . types import Triple
|
||||
from . exceptions import ProtocolException
|
||||
|
||||
|
||||
def to_value(x):
|
||||
if x["e"]: return Uri(x["v"])
|
||||
return Literal(x["v"])
|
||||
"""Convert wire format to Uri or Literal."""
|
||||
if x.get("t") == IRI:
|
||||
return Uri(x.get("i", ""))
|
||||
elif x.get("t") == LITERAL:
|
||||
return Literal(x.get("v", ""))
|
||||
# Fallback for any other type
|
||||
return Literal(x.get("v", x.get("i", "")))
|
||||
|
||||
|
||||
def from_value(v):
|
||||
"""Convert Uri or Literal to wire format."""
|
||||
if isinstance(v, Uri):
|
||||
return {"t": IRI, "i": str(v)}
|
||||
else:
|
||||
return {"t": LITERAL, "v": str(v)}
|
||||
|
||||
class Flow:
|
||||
"""
|
||||
|
|
@ -569,9 +584,13 @@ class FlowInstance:
|
|||
```
|
||||
"""
|
||||
|
||||
# First convert text to embeddings vectors
|
||||
emb_result = self.embeddings(text=text)
|
||||
vectors = emb_result.get("vectors", [])
|
||||
|
||||
# Query graph embeddings for semantic search
|
||||
input = {
|
||||
"text": text,
|
||||
"vectors": vectors,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
|
|
@ -582,6 +601,51 @@ class FlowInstance:
|
|||
input
|
||||
)
|
||||
|
||||
def document_embeddings_query(self, text, user, collection, limit=10):
|
||||
"""
|
||||
Query document chunks using semantic similarity.
|
||||
|
||||
Finds document chunks whose content is semantically similar to the
|
||||
input text, using vector embeddings.
|
||||
|
||||
Args:
|
||||
text: Query text for semantic search
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
limit: Maximum number of results (default: 10)
|
||||
|
||||
Returns:
|
||||
dict: Query results with similar document chunks
|
||||
|
||||
Example:
|
||||
```python
|
||||
flow = api.flow().id("default")
|
||||
results = flow.document_embeddings_query(
|
||||
text="machine learning algorithms",
|
||||
user="trustgraph",
|
||||
collection="research-papers",
|
||||
limit=5
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
# First convert text to embeddings vectors
|
||||
emb_result = self.embeddings(text=text)
|
||||
vectors = emb_result.get("vectors", [])
|
||||
|
||||
# Query document embeddings for semantic search
|
||||
input = {
|
||||
"vectors": vectors,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
|
||||
return self.request(
|
||||
"service/document-embeddings",
|
||||
input
|
||||
)
|
||||
|
||||
def prompt(self, id, variables):
|
||||
"""
|
||||
Execute a prompt template with variable substitution.
|
||||
|
|
@ -751,17 +815,17 @@ class FlowInstance:
|
|||
if s:
|
||||
if not isinstance(s, Uri):
|
||||
raise RuntimeError("s must be Uri")
|
||||
input["s"] = { "v": str(s), "e": isinstance(s, Uri), }
|
||||
|
||||
input["s"] = from_value(s)
|
||||
|
||||
if p:
|
||||
if not isinstance(p, Uri):
|
||||
raise RuntimeError("p must be Uri")
|
||||
input["p"] = { "v": str(p), "e": isinstance(p, Uri), }
|
||||
input["p"] = from_value(p)
|
||||
|
||||
if o:
|
||||
if not isinstance(o, Uri) and not isinstance(o, Literal):
|
||||
raise RuntimeError("o must be Uri or Literal")
|
||||
input["o"] = { "v": str(o), "e": isinstance(o, Uri), }
|
||||
input["o"] = from_value(o)
|
||||
|
||||
object = self.request(
|
||||
"service/triples",
|
||||
|
|
@ -834,9 +898,9 @@ class FlowInstance:
|
|||
if metadata:
|
||||
metadata.emit(
|
||||
lambda t: triples.append({
|
||||
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
|
||||
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
|
||||
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
|
||||
"s": from_value(t["s"]),
|
||||
"p": from_value(t["p"]),
|
||||
"o": from_value(t["o"]),
|
||||
})
|
||||
)
|
||||
|
||||
|
|
@ -913,9 +977,9 @@ class FlowInstance:
|
|||
if metadata:
|
||||
metadata.emit(
|
||||
lambda t: triples.append({
|
||||
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
|
||||
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
|
||||
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
|
||||
"s": from_value(t["s"]),
|
||||
"p": from_value(t["p"]),
|
||||
"o": from_value(t["o"]),
|
||||
})
|
||||
)
|
||||
|
||||
|
|
@ -937,12 +1001,12 @@ class FlowInstance:
|
|||
input
|
||||
)
|
||||
|
||||
def objects_query(
|
||||
def rows_query(
|
||||
self, query, user="trustgraph", collection="default",
|
||||
variables=None, operation_name=None
|
||||
):
|
||||
"""
|
||||
Execute a GraphQL query against structured objects in the knowledge graph.
|
||||
Execute a GraphQL query against structured rows in the knowledge graph.
|
||||
|
||||
Queries structured data using GraphQL syntax, allowing complex queries
|
||||
with filtering, aggregation, and relationship traversal.
|
||||
|
|
@ -974,7 +1038,7 @@ class FlowInstance:
|
|||
}
|
||||
}
|
||||
'''
|
||||
result = flow.objects_query(
|
||||
result = flow.rows_query(
|
||||
query=query,
|
||||
user="trustgraph",
|
||||
collection="scientists"
|
||||
|
|
@ -989,7 +1053,7 @@ class FlowInstance:
|
|||
}
|
||||
}
|
||||
'''
|
||||
result = flow.objects_query(
|
||||
result = flow.rows_query(
|
||||
query=query,
|
||||
variables={"name": "Marie Curie"}
|
||||
)
|
||||
|
|
@ -1010,7 +1074,7 @@ class FlowInstance:
|
|||
input["operation_name"] = operation_name
|
||||
|
||||
response = self.request(
|
||||
"service/objects",
|
||||
"service/rows",
|
||||
input
|
||||
)
|
||||
|
||||
|
|
@ -1233,3 +1297,78 @@ class FlowInstance:
|
|||
|
||||
return response["schema-matches"]
|
||||
|
||||
def row_embeddings_query(
|
||||
self, text, schema_name, user="trustgraph", collection="default",
|
||||
index_name=None, limit=10
|
||||
):
|
||||
"""
|
||||
Query row data using semantic similarity on indexed fields.
|
||||
|
||||
Finds rows whose indexed field values are semantically similar to the
|
||||
input text, using vector embeddings. This enables fuzzy/semantic matching
|
||||
on structured data.
|
||||
|
||||
Args:
|
||||
text: Query text for semantic search
|
||||
schema_name: Schema name to search within
|
||||
user: User/keyspace identifier (default: "trustgraph")
|
||||
collection: Collection identifier (default: "default")
|
||||
index_name: Optional index name to filter search to specific index
|
||||
limit: Maximum number of results (default: 10)
|
||||
|
||||
Returns:
|
||||
dict: Query results with matches containing index_name, index_value,
|
||||
text, and score
|
||||
|
||||
Example:
|
||||
```python
|
||||
flow = api.flow().id("default")
|
||||
|
||||
# Search for customers by name similarity
|
||||
results = flow.row_embeddings_query(
|
||||
text="John Smith",
|
||||
schema_name="customers",
|
||||
user="trustgraph",
|
||||
collection="sales",
|
||||
limit=5
|
||||
)
|
||||
|
||||
# Filter to specific index
|
||||
results = flow.row_embeddings_query(
|
||||
text="machine learning engineer",
|
||||
schema_name="employees",
|
||||
index_name="job_title",
|
||||
limit=10
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
# First convert text to embeddings vectors
|
||||
emb_result = self.embeddings(text=text)
|
||||
vectors = emb_result.get("vectors", [])
|
||||
|
||||
# Query row embeddings for semantic search
|
||||
input = {
|
||||
"vectors": vectors,
|
||||
"schema_name": schema_name,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
|
||||
if index_name:
|
||||
input["index_name"] = index_name
|
||||
|
||||
response = self.request(
|
||||
"service/row-embeddings",
|
||||
input
|
||||
)
|
||||
|
||||
# Check for system-level error
|
||||
if "error" in response and response["error"]:
|
||||
error_type = response["error"].get("type", "unknown")
|
||||
error_message = response["error"].get("message", "Unknown error")
|
||||
raise ProtocolException(f"{error_type}: {error_message}")
|
||||
|
||||
return response
|
||||
|
||||
|
|
|
|||
|
|
@ -10,11 +10,18 @@ import json
|
|||
import base64
|
||||
|
||||
from .. knowledge import hash, Uri, Literal
|
||||
from .. schema import IRI, LITERAL
|
||||
from . types import Triple
|
||||
|
||||
|
||||
def to_value(x):
|
||||
if x["e"]: return Uri(x["v"])
|
||||
return Literal(x["v"])
|
||||
"""Convert wire format to Uri or Literal."""
|
||||
if x.get("t") == IRI:
|
||||
return Uri(x.get("i", ""))
|
||||
elif x.get("t") == LITERAL:
|
||||
return Literal(x.get("v", ""))
|
||||
# Fallback for any other type
|
||||
return Literal(x.get("v", x.get("i", "")))
|
||||
|
||||
class Knowledge:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -12,13 +12,28 @@ import logging
|
|||
|
||||
from . types import DocumentMetadata, ProcessingMetadata, Triple
|
||||
from .. knowledge import hash, Uri, Literal
|
||||
from .. schema import IRI, LITERAL
|
||||
from . exceptions import *
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def to_value(x):
|
||||
if x["e"]: return Uri(x["v"])
|
||||
return Literal(x["v"])
|
||||
"""Convert wire format to Uri or Literal."""
|
||||
if x.get("t") == IRI:
|
||||
return Uri(x.get("i", ""))
|
||||
elif x.get("t") == LITERAL:
|
||||
return Literal(x.get("v", ""))
|
||||
# Fallback for any other type
|
||||
return Literal(x.get("v", x.get("i", "")))
|
||||
|
||||
|
||||
def from_value(v):
|
||||
"""Convert Uri or Literal to wire format."""
|
||||
if isinstance(v, Uri):
|
||||
return {"t": IRI, "i": str(v)}
|
||||
else:
|
||||
return {"t": LITERAL, "v": str(v)}
|
||||
|
||||
class Library:
|
||||
"""
|
||||
|
|
@ -118,18 +133,18 @@ class Library:
|
|||
if isinstance(metadata, list):
|
||||
triples = [
|
||||
{
|
||||
"s": { "v": t.s, "e": isinstance(t.s, Uri) },
|
||||
"p": { "v": t.p, "e": isinstance(t.p, Uri) },
|
||||
"o": { "v": t.o, "e": isinstance(t.o, Uri) }
|
||||
"s": from_value(t.s),
|
||||
"p": from_value(t.p),
|
||||
"o": from_value(t.o),
|
||||
}
|
||||
for t in metadata
|
||||
]
|
||||
elif hasattr(metadata, "emit"):
|
||||
metadata.emit(
|
||||
lambda t: triples.append({
|
||||
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
|
||||
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
|
||||
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
|
||||
"s": from_value(t["s"]),
|
||||
"p": from_value(t["p"]),
|
||||
"o": from_value(t["o"]),
|
||||
})
|
||||
)
|
||||
else:
|
||||
|
|
@ -315,9 +330,9 @@ class Library:
|
|||
"comments": metadata.comments,
|
||||
"metadata": [
|
||||
{
|
||||
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
|
||||
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
|
||||
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
|
||||
"s": from_value(t["s"]),
|
||||
"p": from_value(t["p"]),
|
||||
"o": from_value(t["o"]),
|
||||
}
|
||||
for t in metadata.metadata
|
||||
],
|
||||
|
|
|
|||
|
|
@ -649,8 +649,12 @@ class SocketFlowInstance:
|
|||
)
|
||||
```
|
||||
"""
|
||||
# First convert text to embeddings vectors
|
||||
emb_result = self.embeddings(text=text)
|
||||
vectors = emb_result.get("vectors", [])
|
||||
|
||||
request = {
|
||||
"text": text,
|
||||
"vectors": vectors,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
|
|
@ -659,6 +663,54 @@ class SocketFlowInstance:
|
|||
|
||||
return self.client._send_request_sync("graph-embeddings", self.flow_id, request, False)
|
||||
|
||||
def document_embeddings_query(
|
||||
self,
|
||||
text: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
limit: int = 10,
|
||||
**kwargs: Any
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Query document chunks using semantic similarity.
|
||||
|
||||
Args:
|
||||
text: Query text for semantic search
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
limit: Maximum number of results (default: 10)
|
||||
**kwargs: Additional parameters passed to the service
|
||||
|
||||
Returns:
|
||||
dict: Query results with similar document chunks
|
||||
|
||||
Example:
|
||||
```python
|
||||
socket = api.socket()
|
||||
flow = socket.flow("default")
|
||||
|
||||
results = flow.document_embeddings_query(
|
||||
text="machine learning algorithms",
|
||||
user="trustgraph",
|
||||
collection="research-papers",
|
||||
limit=5
|
||||
)
|
||||
```
|
||||
"""
|
||||
# First convert text to embeddings vectors
|
||||
emb_result = self.embeddings(text=text)
|
||||
vectors = emb_result.get("vectors", [])
|
||||
|
||||
request = {
|
||||
"vectors": vectors,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
request.update(kwargs)
|
||||
|
||||
return self.client._send_request_sync("document-embeddings", self.flow_id, request, False)
|
||||
|
||||
def embeddings(self, text: str, **kwargs: Any) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate vector embeddings for text.
|
||||
|
|
@ -737,7 +789,7 @@ class SocketFlowInstance:
|
|||
|
||||
return self.client._send_request_sync("triples", self.flow_id, request, False)
|
||||
|
||||
def objects_query(
|
||||
def rows_query(
|
||||
self,
|
||||
query: str,
|
||||
user: str,
|
||||
|
|
@ -747,7 +799,7 @@ class SocketFlowInstance:
|
|||
**kwargs: Any
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute a GraphQL query against structured objects.
|
||||
Execute a GraphQL query against structured rows.
|
||||
|
||||
Args:
|
||||
query: GraphQL query string
|
||||
|
|
@ -774,7 +826,7 @@ class SocketFlowInstance:
|
|||
}
|
||||
}
|
||||
'''
|
||||
result = flow.objects_query(
|
||||
result = flow.rows_query(
|
||||
query=query,
|
||||
user="trustgraph",
|
||||
collection="scientists"
|
||||
|
|
@ -792,7 +844,7 @@ class SocketFlowInstance:
|
|||
request["operationName"] = operation_name
|
||||
request.update(kwargs)
|
||||
|
||||
return self.client._send_request_sync("objects", self.flow_id, request, False)
|
||||
return self.client._send_request_sync("rows", self.flow_id, request, False)
|
||||
|
||||
def mcp_tool(
|
||||
self,
|
||||
|
|
@ -829,3 +881,73 @@ class SocketFlowInstance:
|
|||
request.update(kwargs)
|
||||
|
||||
return self.client._send_request_sync("mcp-tool", self.flow_id, request, False)
|
||||
|
||||
def row_embeddings_query(
|
||||
self,
|
||||
text: str,
|
||||
schema_name: str,
|
||||
user: str = "trustgraph",
|
||||
collection: str = "default",
|
||||
index_name: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
**kwargs: Any
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Query row data using semantic similarity on indexed fields.
|
||||
|
||||
Finds rows whose indexed field values are semantically similar to the
|
||||
input text, using vector embeddings. This enables fuzzy/semantic matching
|
||||
on structured data.
|
||||
|
||||
Args:
|
||||
text: Query text for semantic search
|
||||
schema_name: Schema name to search within
|
||||
user: User/keyspace identifier (default: "trustgraph")
|
||||
collection: Collection identifier (default: "default")
|
||||
index_name: Optional index name to filter search to specific index
|
||||
limit: Maximum number of results (default: 10)
|
||||
**kwargs: Additional parameters passed to the service
|
||||
|
||||
Returns:
|
||||
dict: Query results with matches containing index_name, index_value,
|
||||
text, and score
|
||||
|
||||
Example:
|
||||
```python
|
||||
socket = api.socket()
|
||||
flow = socket.flow("default")
|
||||
|
||||
# Search for customers by name similarity
|
||||
results = flow.row_embeddings_query(
|
||||
text="John Smith",
|
||||
schema_name="customers",
|
||||
user="trustgraph",
|
||||
collection="sales",
|
||||
limit=5
|
||||
)
|
||||
|
||||
# Filter to specific index
|
||||
results = flow.row_embeddings_query(
|
||||
text="machine learning engineer",
|
||||
schema_name="employees",
|
||||
index_name="job_title",
|
||||
limit=10
|
||||
)
|
||||
```
|
||||
"""
|
||||
# First convert text to embeddings vectors
|
||||
emb_result = self.embeddings(text=text)
|
||||
vectors = emb_result.get("vectors", [])
|
||||
|
||||
request = {
|
||||
"vectors": vectors,
|
||||
"schema_name": schema_name,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"limit": limit
|
||||
}
|
||||
if index_name:
|
||||
request["index_name"] = index_name
|
||||
request.update(kwargs)
|
||||
|
||||
return self.client._send_request_sync("row-embeddings", self.flow_id, request, False)
|
||||
|
|
|
|||
|
|
@ -34,5 +34,6 @@ from . tool_service import ToolService
|
|||
from . tool_client import ToolClientSpec
|
||||
from . agent_client import AgentClientSpec
|
||||
from . structured_query_client import StructuredQueryClientSpec
|
||||
from . row_embeddings_query_client import RowEmbeddingsQueryClientSpec
|
||||
from . collection_config_handler import CollectionConfigHandler
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ embeddings.
|
|||
import logging
|
||||
|
||||
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
|
||||
from .. schema import Error, Value
|
||||
from .. schema import Error, Term
|
||||
|
||||
from . flow_processor import FlowProcessor
|
||||
from . consumer_spec import ConsumerSpec
|
||||
|
|
@ -16,7 +16,7 @@ from . producer_spec import ProducerSpec
|
|||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "ge-query"
|
||||
default_ident = "doc-embeddings-query"
|
||||
|
||||
class DocumentEmbeddingsQueryService(FlowProcessor):
|
||||
|
||||
|
|
|
|||
|
|
@ -2,15 +2,21 @@
|
|||
import logging
|
||||
|
||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
|
||||
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse, IRI, LITERAL
|
||||
from .. knowledge import Uri, Literal
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def to_value(x):
|
||||
if x.is_uri: return Uri(x.value)
|
||||
return Literal(x.value)
|
||||
"""Convert schema Term to Uri or Literal."""
|
||||
if x.type == IRI:
|
||||
return Uri(x.iri)
|
||||
elif x.type == LITERAL:
|
||||
return Literal(x.value)
|
||||
# Fallback
|
||||
return Literal(x.value or x.iri)
|
||||
|
||||
class GraphEmbeddingsClient(RequestResponse):
|
||||
async def query(self, vectors, limit=20, user="trustgraph",
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ embeddings.
|
|||
import logging
|
||||
|
||||
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
|
||||
from .. schema import Error, Value
|
||||
from .. schema import Error, Term
|
||||
|
||||
from . flow_processor import FlowProcessor
|
||||
from . consumer_spec import ConsumerSpec
|
||||
|
|
@ -16,7 +16,7 @@ from . producer_spec import ProducerSpec
|
|||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "ge-query"
|
||||
default_ident = "graph-embeddings-query"
|
||||
|
||||
class GraphEmbeddingsQueryService(FlowProcessor):
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,45 @@
|
|||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||
from .. schema import RowEmbeddingsRequest, RowEmbeddingsResponse
|
||||
|
||||
class RowEmbeddingsQueryClient(RequestResponse):
|
||||
async def row_embeddings_query(
|
||||
self, vectors, schema_name, user="trustgraph", collection="default",
|
||||
index_name=None, limit=10, timeout=600
|
||||
):
|
||||
request = RowEmbeddingsRequest(
|
||||
vectors=vectors,
|
||||
schema_name=schema_name,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=limit
|
||||
)
|
||||
if index_name:
|
||||
request.index_name = index_name
|
||||
|
||||
resp = await self.request(request, timeout=timeout)
|
||||
|
||||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
||||
# Return matches as list of dicts
|
||||
return [
|
||||
{
|
||||
"index_name": match.index_name,
|
||||
"index_value": match.index_value,
|
||||
"text": match.text,
|
||||
"score": match.score
|
||||
}
|
||||
for match in (resp.matches or [])
|
||||
]
|
||||
|
||||
class RowEmbeddingsQueryClientSpec(RequestResponseSpec):
|
||||
def __init__(
|
||||
self, request_name, response_name,
|
||||
):
|
||||
super(RowEmbeddingsQueryClientSpec, self).__init__(
|
||||
request_name = request_name,
|
||||
request_schema = RowEmbeddingsRequest,
|
||||
response_name = response_name,
|
||||
response_schema = RowEmbeddingsResponse,
|
||||
impl = RowEmbeddingsQueryClient,
|
||||
)
|
||||
|
|
@ -222,35 +222,50 @@ class Subscriber:
|
|||
# Store message for later acknowledgment
|
||||
msg_id = str(uuid.uuid4())
|
||||
self.pending_acks[msg_id] = msg
|
||||
|
||||
|
||||
try:
|
||||
id = msg.properties()["id"]
|
||||
except:
|
||||
id = None
|
||||
|
||||
|
||||
value = msg.value()
|
||||
delivery_success = False
|
||||
|
||||
has_matching_waiter = False
|
||||
|
||||
async with self.lock:
|
||||
# Deliver to specific subscribers
|
||||
if id in self.q:
|
||||
has_matching_waiter = True
|
||||
delivery_success = await self._deliver_to_queue(
|
||||
self.q[id], value
|
||||
)
|
||||
|
||||
|
||||
# Deliver to all subscribers
|
||||
for q in self.full.values():
|
||||
has_matching_waiter = True
|
||||
if await self._deliver_to_queue(q, value):
|
||||
delivery_success = True
|
||||
|
||||
# Acknowledge only on successful delivery
|
||||
if delivery_success:
|
||||
self.consumer.acknowledge(msg)
|
||||
del self.pending_acks[msg_id]
|
||||
else:
|
||||
# Negative acknowledge for retry
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
del self.pending_acks[msg_id]
|
||||
|
||||
# Always acknowledge the message to prevent redelivery storms
|
||||
# on shared topics. Negative acknowledging orphaned messages
|
||||
# (no matching waiter) causes immediate redelivery to all
|
||||
# subscribers, none of whom can handle it either.
|
||||
self.consumer.acknowledge(msg)
|
||||
del self.pending_acks[msg_id]
|
||||
|
||||
if not delivery_success:
|
||||
if not has_matching_waiter:
|
||||
# Message arrived for a waiter that no longer exists
|
||||
# (likely due to client disconnect or timeout)
|
||||
logger.debug(
|
||||
f"Discarding orphaned message with id={id} - "
|
||||
"no matching waiter"
|
||||
)
|
||||
else:
|
||||
# Delivery failed (e.g., queue full with drop_new strategy)
|
||||
logger.debug(
|
||||
f"Message with id={id} dropped due to backpressure"
|
||||
)
|
||||
|
||||
async def _deliver_to_queue(self, queue, value):
|
||||
"""Deliver message to queue with backpressure handling"""
|
||||
|
|
|
|||
|
|
@ -1,24 +1,34 @@
|
|||
|
||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Value
|
||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL
|
||||
from .. knowledge import Uri, Literal
|
||||
|
||||
|
||||
class Triple:
|
||||
def __init__(self, s, p, o):
|
||||
self.s = s
|
||||
self.p = p
|
||||
self.o = o
|
||||
|
||||
|
||||
def to_value(x):
|
||||
if x.is_uri: return Uri(x.value)
|
||||
return Literal(x.value)
|
||||
"""Convert schema Term to Uri or Literal."""
|
||||
if x.type == IRI:
|
||||
return Uri(x.iri)
|
||||
elif x.type == LITERAL:
|
||||
return Literal(x.value)
|
||||
# Fallback
|
||||
return Literal(x.value or x.iri)
|
||||
|
||||
|
||||
def from_value(x):
|
||||
if x is None: return None
|
||||
"""Convert Uri or Literal to schema Term."""
|
||||
if x is None:
|
||||
return None
|
||||
if isinstance(x, Uri):
|
||||
return Value(value=str(x), is_uri=True)
|
||||
return Term(type=IRI, iri=str(x))
|
||||
else:
|
||||
return Value(value=str(x), is_uri=False)
|
||||
return Term(type=LITERAL, value=str(x))
|
||||
|
||||
class TriplesClient(RequestResponse):
|
||||
async def query(self, s=None, p=None, o=None, limit=20,
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ null. Output is a list of triples.
|
|||
import logging
|
||||
|
||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||
from .. schema import Value, Triple
|
||||
from .. schema import Term, Triple
|
||||
|
||||
from . flow_processor import FlowProcessor
|
||||
from . consumer_spec import ConsumerSpec
|
||||
|
|
|
|||
60
trustgraph-base/trustgraph/clients/row_embeddings_client.py
Normal file
60
trustgraph-base/trustgraph/clients/row_embeddings_client.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import RowEmbeddingsRequest, RowEmbeddingsResponse
|
||||
from .. schema import row_embeddings_request_queue
|
||||
from .. schema import row_embeddings_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class RowEmbeddingsClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
pulsar_api_key=None,
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue = row_embeddings_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue = row_embeddings_response_queue
|
||||
|
||||
super(RowEmbeddingsClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
pulsar_api_key=pulsar_api_key,
|
||||
input_schema=RowEmbeddingsRequest,
|
||||
output_schema=RowEmbeddingsResponse,
|
||||
)
|
||||
|
||||
def request(
|
||||
self, vectors, schema_name, user="trustgraph", collection="default",
|
||||
index_name=None, limit=10, timeout=300
|
||||
):
|
||||
kwargs = dict(
|
||||
user=user, collection=collection,
|
||||
vectors=vectors, schema_name=schema_name,
|
||||
limit=limit, timeout=timeout
|
||||
)
|
||||
if index_name:
|
||||
kwargs["index_name"] = index_name
|
||||
|
||||
response = self.call(**kwargs)
|
||||
|
||||
if response.error:
|
||||
raise RuntimeError(f"{response.error.type}: {response.error.message}")
|
||||
|
||||
return response.matches
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Value
|
||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL
|
||||
from .. schema import triples_request_queue
|
||||
from .. schema import triples_response_queue
|
||||
from . base import BaseClient
|
||||
|
|
@ -46,9 +46,9 @@ class TriplesQueryClient(BaseClient):
|
|||
if ent == None: return None
|
||||
|
||||
if ent.startswith("http://") or ent.startswith("https://"):
|
||||
return Value(value=ent, is_uri=True)
|
||||
return Term(type=IRI, iri=ent)
|
||||
|
||||
return Value(value=ent, is_uri=False)
|
||||
return Term(type=LITERAL, value=ent)
|
||||
|
||||
def request(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -19,9 +19,10 @@ from .translators.prompt import PromptRequestTranslator, PromptResponseTranslato
|
|||
from .translators.tool import ToolRequestTranslator, ToolResponseTranslator
|
||||
from .translators.embeddings_query import (
|
||||
DocumentEmbeddingsRequestTranslator, DocumentEmbeddingsResponseTranslator,
|
||||
GraphEmbeddingsRequestTranslator, GraphEmbeddingsResponseTranslator
|
||||
GraphEmbeddingsRequestTranslator, GraphEmbeddingsResponseTranslator,
|
||||
RowEmbeddingsRequestTranslator, RowEmbeddingsResponseTranslator
|
||||
)
|
||||
from .translators.objects_query import ObjectsQueryRequestTranslator, ObjectsQueryResponseTranslator
|
||||
from .translators.rows_query import RowsQueryRequestTranslator, RowsQueryResponseTranslator
|
||||
from .translators.nlp_query import QuestionToStructuredQueryRequestTranslator, QuestionToStructuredQueryResponseTranslator
|
||||
from .translators.structured_query import StructuredQueryRequestTranslator, StructuredQueryResponseTranslator
|
||||
from .translators.diagnosis import StructuredDataDiagnosisRequestTranslator, StructuredDataDiagnosisResponseTranslator
|
||||
|
|
@ -107,15 +108,21 @@ TranslatorRegistry.register_service(
|
|||
)
|
||||
|
||||
TranslatorRegistry.register_service(
|
||||
"graph-embeddings-query",
|
||||
GraphEmbeddingsRequestTranslator(),
|
||||
"graph-embeddings-query",
|
||||
GraphEmbeddingsRequestTranslator(),
|
||||
GraphEmbeddingsResponseTranslator()
|
||||
)
|
||||
|
||||
TranslatorRegistry.register_service(
|
||||
"objects-query",
|
||||
ObjectsQueryRequestTranslator(),
|
||||
ObjectsQueryResponseTranslator()
|
||||
"row-embeddings-query",
|
||||
RowEmbeddingsRequestTranslator(),
|
||||
RowEmbeddingsResponseTranslator()
|
||||
)
|
||||
|
||||
TranslatorRegistry.register_service(
|
||||
"rows-query",
|
||||
RowsQueryRequestTranslator(),
|
||||
RowsQueryResponseTranslator()
|
||||
)
|
||||
|
||||
TranslatorRegistry.register_service(
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from .base import Translator, MessageTranslator
|
||||
from .primitives import ValueTranslator, TripleTranslator, SubgraphTranslator, RowSchemaTranslator, FieldTranslator, row_schema_translator, field_translator
|
||||
from .primitives import TermTranslator, ValueTranslator, TripleTranslator, SubgraphTranslator, RowSchemaTranslator, FieldTranslator, row_schema_translator, field_translator
|
||||
from .metadata import DocumentMetadataTranslator, ProcessingMetadataTranslator
|
||||
from .agent import AgentRequestTranslator, AgentResponseTranslator
|
||||
from .embeddings import EmbeddingsRequestTranslator, EmbeddingsResponseTranslator
|
||||
|
|
@ -15,7 +15,8 @@ from .flow import FlowRequestTranslator, FlowResponseTranslator
|
|||
from .prompt import PromptRequestTranslator, PromptResponseTranslator
|
||||
from .embeddings_query import (
|
||||
DocumentEmbeddingsRequestTranslator, DocumentEmbeddingsResponseTranslator,
|
||||
GraphEmbeddingsRequestTranslator, GraphEmbeddingsResponseTranslator
|
||||
GraphEmbeddingsRequestTranslator, GraphEmbeddingsResponseTranslator,
|
||||
RowEmbeddingsRequestTranslator, RowEmbeddingsResponseTranslator
|
||||
)
|
||||
from .objects_query import ObjectsQueryRequestTranslator, ObjectsQueryResponseTranslator
|
||||
from .rows_query import RowsQueryRequestTranslator, RowsQueryResponseTranslator
|
||||
from .diagnosis import StructuredDataDiagnosisRequestTranslator, StructuredDataDiagnosisResponseTranslator
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
from typing import Dict, Any, Tuple
|
||||
from ...schema import (
|
||||
DocumentEmbeddingsRequest, DocumentEmbeddingsResponse,
|
||||
GraphEmbeddingsRequest, GraphEmbeddingsResponse
|
||||
GraphEmbeddingsRequest, GraphEmbeddingsResponse,
|
||||
RowEmbeddingsRequest, RowEmbeddingsResponse, RowIndexMatch
|
||||
)
|
||||
from .base import MessageTranslator
|
||||
from .primitives import ValueTranslator
|
||||
|
|
@ -92,3 +93,62 @@ class GraphEmbeddingsResponseTranslator(MessageTranslator):
|
|||
def from_response_with_completion(self, obj: GraphEmbeddingsResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
"""Returns (response_dict, is_final)"""
|
||||
return self.from_pulsar(obj), True
|
||||
|
||||
|
||||
class RowEmbeddingsRequestTranslator(MessageTranslator):
|
||||
"""Translator for RowEmbeddingsRequest schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> RowEmbeddingsRequest:
|
||||
return RowEmbeddingsRequest(
|
||||
vectors=data["vectors"],
|
||||
limit=int(data.get("limit", 10)),
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default"),
|
||||
schema_name=data.get("schema_name", ""),
|
||||
index_name=data.get("index_name")
|
||||
)
|
||||
|
||||
def from_pulsar(self, obj: RowEmbeddingsRequest) -> Dict[str, Any]:
|
||||
result = {
|
||||
"vectors": obj.vectors,
|
||||
"limit": obj.limit,
|
||||
"user": obj.user,
|
||||
"collection": obj.collection,
|
||||
"schema_name": obj.schema_name,
|
||||
}
|
||||
if obj.index_name:
|
||||
result["index_name"] = obj.index_name
|
||||
return result
|
||||
|
||||
|
||||
class RowEmbeddingsResponseTranslator(MessageTranslator):
|
||||
"""Translator for RowEmbeddingsResponse schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> RowEmbeddingsResponse:
|
||||
raise NotImplementedError("Response translation to Pulsar not typically needed")
|
||||
|
||||
def from_pulsar(self, obj: RowEmbeddingsResponse) -> Dict[str, Any]:
|
||||
result = {}
|
||||
|
||||
if obj.error is not None:
|
||||
result["error"] = {
|
||||
"type": obj.error.type,
|
||||
"message": obj.error.message
|
||||
}
|
||||
|
||||
if obj.matches is not None:
|
||||
result["matches"] = [
|
||||
{
|
||||
"index_name": match.index_name,
|
||||
"index_value": match.index_value,
|
||||
"text": match.text,
|
||||
"score": match.score
|
||||
}
|
||||
for match in obj.matches
|
||||
]
|
||||
|
||||
return result
|
||||
|
||||
def from_response_with_completion(self, obj: RowEmbeddingsResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
"""Returns (response_dict, is_final)"""
|
||||
return self.from_pulsar(obj), True
|
||||
|
|
|
|||
|
|
@ -1,37 +1,133 @@
|
|||
from typing import Dict, Any, List
|
||||
from ...schema import Value, Triple, RowSchema, Field
|
||||
from ...schema import Term, Triple, RowSchema, Field, IRI, BLANK, LITERAL, TRIPLE
|
||||
from .base import Translator
|
||||
|
||||
|
||||
class ValueTranslator(Translator):
|
||||
"""Translator for Value schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> Value:
|
||||
return Value(value=data["v"], is_uri=data["e"])
|
||||
|
||||
def from_pulsar(self, obj: Value) -> Dict[str, Any]:
|
||||
return {"v": obj.value, "e": obj.is_uri}
|
||||
class TermTranslator(Translator):
|
||||
"""
|
||||
Translator for Term schema objects.
|
||||
|
||||
Wire format (compact keys):
|
||||
- "t": type (i/b/l/t)
|
||||
- "i": iri (for IRI type)
|
||||
- "d": id (for BLANK type)
|
||||
- "v": value (for LITERAL type)
|
||||
- "dt": datatype (for LITERAL type)
|
||||
- "ln": language (for LITERAL type)
|
||||
- "tr": triple (for TRIPLE type, nested)
|
||||
"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> Term:
|
||||
term_type = data.get("t", "")
|
||||
|
||||
if term_type == IRI:
|
||||
return Term(type=IRI, iri=data.get("i", ""))
|
||||
|
||||
elif term_type == BLANK:
|
||||
return Term(type=BLANK, id=data.get("d", ""))
|
||||
|
||||
elif term_type == LITERAL:
|
||||
return Term(
|
||||
type=LITERAL,
|
||||
value=data.get("v", ""),
|
||||
datatype=data.get("dt", ""),
|
||||
language=data.get("ln", ""),
|
||||
)
|
||||
|
||||
elif term_type == TRIPLE:
|
||||
# Nested triple - use TripleTranslator
|
||||
triple_data = data.get("tr")
|
||||
if triple_data:
|
||||
triple = _triple_translator_to_pulsar(triple_data)
|
||||
else:
|
||||
triple = None
|
||||
return Term(type=TRIPLE, triple=triple)
|
||||
|
||||
else:
|
||||
# Unknown or empty type
|
||||
return Term(type=term_type)
|
||||
|
||||
def from_pulsar(self, obj: Term) -> Dict[str, Any]:
|
||||
result: Dict[str, Any] = {"t": obj.type}
|
||||
|
||||
if obj.type == IRI:
|
||||
result["i"] = obj.iri
|
||||
|
||||
elif obj.type == BLANK:
|
||||
result["d"] = obj.id
|
||||
|
||||
elif obj.type == LITERAL:
|
||||
result["v"] = obj.value
|
||||
if obj.datatype:
|
||||
result["dt"] = obj.datatype
|
||||
if obj.language:
|
||||
result["ln"] = obj.language
|
||||
|
||||
elif obj.type == TRIPLE:
|
||||
if obj.triple:
|
||||
result["tr"] = _triple_translator_from_pulsar(obj.triple)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Module-level helper functions to avoid circular instantiation
|
||||
def _triple_translator_to_pulsar(data: Dict[str, Any]) -> Triple:
|
||||
term_translator = TermTranslator()
|
||||
return Triple(
|
||||
s=term_translator.to_pulsar(data["s"]) if data.get("s") else None,
|
||||
p=term_translator.to_pulsar(data["p"]) if data.get("p") else None,
|
||||
o=term_translator.to_pulsar(data["o"]) if data.get("o") else None,
|
||||
g=data.get("g"),
|
||||
)
|
||||
|
||||
|
||||
def _triple_translator_from_pulsar(obj: Triple) -> Dict[str, Any]:
|
||||
term_translator = TermTranslator()
|
||||
result: Dict[str, Any] = {}
|
||||
|
||||
if obj.s:
|
||||
result["s"] = term_translator.from_pulsar(obj.s)
|
||||
if obj.p:
|
||||
result["p"] = term_translator.from_pulsar(obj.p)
|
||||
if obj.o:
|
||||
result["o"] = term_translator.from_pulsar(obj.o)
|
||||
if obj.g:
|
||||
result["g"] = obj.g
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class TripleTranslator(Translator):
|
||||
"""Translator for Triple schema objects"""
|
||||
|
||||
"""Translator for Triple schema objects (quads with optional graph)"""
|
||||
|
||||
def __init__(self):
|
||||
self.value_translator = ValueTranslator()
|
||||
|
||||
self.term_translator = TermTranslator()
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> Triple:
|
||||
return Triple(
|
||||
s=self.value_translator.to_pulsar(data["s"]),
|
||||
p=self.value_translator.to_pulsar(data["p"]),
|
||||
o=self.value_translator.to_pulsar(data["o"])
|
||||
s=self.term_translator.to_pulsar(data["s"]) if data.get("s") else None,
|
||||
p=self.term_translator.to_pulsar(data["p"]) if data.get("p") else None,
|
||||
o=self.term_translator.to_pulsar(data["o"]) if data.get("o") else None,
|
||||
g=data.get("g"),
|
||||
)
|
||||
|
||||
|
||||
def from_pulsar(self, obj: Triple) -> Dict[str, Any]:
|
||||
return {
|
||||
"s": self.value_translator.from_pulsar(obj.s),
|
||||
"p": self.value_translator.from_pulsar(obj.p),
|
||||
"o": self.value_translator.from_pulsar(obj.o)
|
||||
}
|
||||
result: Dict[str, Any] = {}
|
||||
|
||||
if obj.s:
|
||||
result["s"] = self.term_translator.from_pulsar(obj.s)
|
||||
if obj.p:
|
||||
result["p"] = self.term_translator.from_pulsar(obj.p)
|
||||
if obj.o:
|
||||
result["o"] = self.term_translator.from_pulsar(obj.o)
|
||||
if obj.g:
|
||||
result["g"] = obj.g
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Backward compatibility alias
|
||||
ValueTranslator = TermTranslator
|
||||
|
||||
|
||||
class SubgraphTranslator(Translator):
|
||||
|
|
|
|||
|
|
@ -1,44 +1,44 @@
|
|||
from typing import Dict, Any, Tuple, Optional
|
||||
from ...schema import ObjectsQueryRequest, ObjectsQueryResponse
|
||||
from ...schema import RowsQueryRequest, RowsQueryResponse
|
||||
from .base import MessageTranslator
|
||||
import json
|
||||
|
||||
|
||||
class ObjectsQueryRequestTranslator(MessageTranslator):
|
||||
"""Translator for ObjectsQueryRequest schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> ObjectsQueryRequest:
|
||||
return ObjectsQueryRequest(
|
||||
class RowsQueryRequestTranslator(MessageTranslator):
|
||||
"""Translator for RowsQueryRequest schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> RowsQueryRequest:
|
||||
return RowsQueryRequest(
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default"),
|
||||
query=data.get("query", ""),
|
||||
variables=data.get("variables", {}),
|
||||
operation_name=data.get("operation_name", None)
|
||||
)
|
||||
|
||||
def from_pulsar(self, obj: ObjectsQueryRequest) -> Dict[str, Any]:
|
||||
|
||||
def from_pulsar(self, obj: RowsQueryRequest) -> Dict[str, Any]:
|
||||
result = {
|
||||
"user": obj.user,
|
||||
"collection": obj.collection,
|
||||
"query": obj.query,
|
||||
"variables": dict(obj.variables) if obj.variables else {}
|
||||
}
|
||||
|
||||
|
||||
if obj.operation_name:
|
||||
result["operation_name"] = obj.operation_name
|
||||
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class ObjectsQueryResponseTranslator(MessageTranslator):
|
||||
"""Translator for ObjectsQueryResponse schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> ObjectsQueryResponse:
|
||||
class RowsQueryResponseTranslator(MessageTranslator):
|
||||
"""Translator for RowsQueryResponse schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> RowsQueryResponse:
|
||||
raise NotImplementedError("Response translation to Pulsar not typically needed")
|
||||
|
||||
def from_pulsar(self, obj: ObjectsQueryResponse) -> Dict[str, Any]:
|
||||
|
||||
def from_pulsar(self, obj: RowsQueryResponse) -> Dict[str, Any]:
|
||||
result = {}
|
||||
|
||||
|
||||
# Handle GraphQL response data
|
||||
if obj.data:
|
||||
try:
|
||||
|
|
@ -47,7 +47,7 @@ class ObjectsQueryResponseTranslator(MessageTranslator):
|
|||
result["data"] = obj.data
|
||||
else:
|
||||
result["data"] = None
|
||||
|
||||
|
||||
# Handle GraphQL errors
|
||||
if obj.errors:
|
||||
result["errors"] = []
|
||||
|
|
@ -60,20 +60,20 @@ class ObjectsQueryResponseTranslator(MessageTranslator):
|
|||
if error.extensions:
|
||||
error_dict["extensions"] = dict(error.extensions)
|
||||
result["errors"].append(error_dict)
|
||||
|
||||
|
||||
# Handle extensions
|
||||
if obj.extensions:
|
||||
result["extensions"] = dict(obj.extensions)
|
||||
|
||||
|
||||
# Handle system-level error
|
||||
if obj.error:
|
||||
result["error"] = {
|
||||
"type": obj.error.type,
|
||||
"message": obj.error.message
|
||||
}
|
||||
|
||||
|
||||
return result
|
||||
|
||||
def from_response_with_completion(self, obj: ObjectsQueryResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
|
||||
def from_response_with_completion(self, obj: RowsQueryResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
"""Returns (response_dict, is_final)"""
|
||||
return self.from_pulsar(obj), True
|
||||
return self.from_pulsar(obj), True
|
||||
|
|
@ -14,11 +14,13 @@ class TriplesQueryRequestTranslator(MessageTranslator):
|
|||
s = self.value_translator.to_pulsar(data["s"]) if "s" in data else None
|
||||
p = self.value_translator.to_pulsar(data["p"]) if "p" in data else None
|
||||
o = self.value_translator.to_pulsar(data["o"]) if "o" in data else None
|
||||
|
||||
g = data.get("g") # None=default graph, "*"=all graphs
|
||||
|
||||
return TriplesQueryRequest(
|
||||
s=s,
|
||||
p=p,
|
||||
o=o,
|
||||
g=g,
|
||||
limit=int(data.get("limit", 10000)),
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default")
|
||||
|
|
@ -30,14 +32,16 @@ class TriplesQueryRequestTranslator(MessageTranslator):
|
|||
"user": obj.user,
|
||||
"collection": obj.collection
|
||||
}
|
||||
|
||||
|
||||
if obj.s:
|
||||
result["s"] = self.value_translator.from_pulsar(obj.s)
|
||||
if obj.p:
|
||||
result["p"] = self.value_translator.from_pulsar(obj.p)
|
||||
if obj.o:
|
||||
result["o"] = self.value_translator.from_pulsar(obj.o)
|
||||
|
||||
if obj.g is not None:
|
||||
result["g"] = obj.g
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,22 +1,57 @@
|
|||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
# Term type constants
|
||||
IRI = "i" # IRI/URI node
|
||||
BLANK = "b" # Blank node
|
||||
LITERAL = "l" # Literal value
|
||||
TRIPLE = "t" # Quoted triple (RDF-star)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Error:
|
||||
type: str = ""
|
||||
message: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class Value:
|
||||
class Term:
|
||||
"""
|
||||
RDF Term - can represent an IRI, blank node, literal, or quoted triple.
|
||||
|
||||
The 'type' field determines which other fields are relevant:
|
||||
- IRI: use 'iri' field
|
||||
- BLANK: use 'id' field
|
||||
- LITERAL: use 'value', 'datatype', 'language' fields
|
||||
- TRIPLE: use 'triple' field
|
||||
"""
|
||||
type: str = "" # One of: IRI, BLANK, LITERAL, TRIPLE
|
||||
|
||||
# For IRI terms (type == IRI)
|
||||
iri: str = ""
|
||||
|
||||
# For blank nodes (type == BLANK)
|
||||
id: str = ""
|
||||
|
||||
# For literals (type == LITERAL)
|
||||
value: str = ""
|
||||
is_uri: bool = False
|
||||
type: str = ""
|
||||
datatype: str = "" # XSD datatype URI (mutually exclusive with language)
|
||||
language: str = "" # Language tag (mutually exclusive with datatype)
|
||||
|
||||
# For quoted triples (type == TRIPLE)
|
||||
triple: "Triple | None" = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Triple:
|
||||
s: Value | None = None
|
||||
p: Value | None = None
|
||||
o: Value | None = None
|
||||
"""
|
||||
RDF Triple / Quad.
|
||||
|
||||
The optional 'g' field specifies the named graph (None = default graph).
|
||||
"""
|
||||
s: Term | None = None # Subject
|
||||
p: Term | None = None # Predicate
|
||||
o: Term | None = None # Object
|
||||
g: str | None = None # Graph name (IRI), None = default graph
|
||||
|
||||
@dataclass
|
||||
class Field:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.primitives import Value, RowSchema
|
||||
from ..core.primitives import Term, RowSchema
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
|
@ -10,7 +10,7 @@ from ..core.topic import topic
|
|||
|
||||
@dataclass
|
||||
class EntityEmbeddings:
|
||||
entity: Value | None = None
|
||||
entity: Term | None = None
|
||||
vectors: list[list[float]] = field(default_factory=list)
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
|
|
@ -60,3 +60,23 @@ class StructuredObjectEmbedding:
|
|||
field_embeddings: dict[str, list[float]] = field(default_factory=dict) # Per-field embeddings
|
||||
|
||||
############################################################################
|
||||
|
||||
# Row embeddings are embeddings associated with indexed field values
|
||||
# in structured row data. Each index gets embedded separately.
|
||||
|
||||
@dataclass
|
||||
class RowIndexEmbedding:
|
||||
"""Single row's embedding for one index"""
|
||||
index_name: str = "" # The indexed field name(s)
|
||||
index_value: list[str] = field(default_factory=list) # The field value(s)
|
||||
text: str = "" # Text that was embedded
|
||||
vectors: list[list[float]] = field(default_factory=list)
|
||||
|
||||
@dataclass
|
||||
class RowEmbeddings:
|
||||
"""Batched row embeddings for a schema"""
|
||||
metadata: Metadata | None = None
|
||||
schema_name: str = ""
|
||||
embeddings: list[RowIndexEmbedding] = field(default_factory=list)
|
||||
|
||||
############################################################################
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.primitives import Value, Triple
|
||||
from ..core.primitives import Term, Triple
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.topic import topic
|
||||
|
||||
|
|
@ -10,7 +10,7 @@ from ..core.topic import topic
|
|||
|
||||
@dataclass
|
||||
class EntityContext:
|
||||
entity: Value | None = None
|
||||
entity: Term | None = None
|
||||
context: str = ""
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from .library import *
|
|||
from .lookup import *
|
||||
from .nlp_query import *
|
||||
from .structured_query import *
|
||||
from .objects_query import *
|
||||
from .rows_query import *
|
||||
from .diagnosis import *
|
||||
from .collection import *
|
||||
from .storage import *
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
from dataclasses import dataclass
|
||||
|
||||
from ..core.primitives import Error, Value, Triple
|
||||
from ..core.primitives import Error, Term, Triple
|
||||
from ..core.topic import topic
|
||||
from ..core.metadata import Metadata
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from dataclasses import dataclass, field
|
||||
|
||||
from ..core.primitives import Error, Value, Triple
|
||||
from ..core.primitives import Error, Term, Triple
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
|
@ -17,7 +17,7 @@ class GraphEmbeddingsRequest:
|
|||
@dataclass
|
||||
class GraphEmbeddingsResponse:
|
||||
error: Error | None = None
|
||||
entities: list[Value] = field(default_factory=list)
|
||||
entities: list[Term] = field(default_factory=list)
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
@ -27,9 +27,10 @@ class GraphEmbeddingsResponse:
|
|||
class TriplesQueryRequest:
|
||||
user: str = ""
|
||||
collection: str = ""
|
||||
s: Value | None = None
|
||||
p: Value | None = None
|
||||
o: Value | None = None
|
||||
s: Term | None = None
|
||||
p: Term | None = None
|
||||
o: Term | None = None
|
||||
g: str | None = None # Graph IRI. None=default graph, "*"=all graphs
|
||||
limit: int = 0
|
||||
|
||||
@dataclass
|
||||
|
|
@ -58,4 +59,39 @@ document_embeddings_request_queue = topic(
|
|||
)
|
||||
document_embeddings_response_queue = topic(
|
||||
"document-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
|
||||
)
|
||||
|
||||
############################################################################
|
||||
|
||||
# Row embeddings query - for semantic/fuzzy matching on row index values
|
||||
|
||||
@dataclass
|
||||
class RowIndexMatch:
|
||||
"""A single matching row index from a semantic search"""
|
||||
index_name: str = "" # The indexed field(s)
|
||||
index_value: list[str] = field(default_factory=list) # The index values
|
||||
text: str = "" # The text that was embedded
|
||||
score: float = 0.0 # Similarity score
|
||||
|
||||
@dataclass
|
||||
class RowEmbeddingsRequest:
|
||||
"""Request for row embeddings semantic search"""
|
||||
vectors: list[list[float]] = field(default_factory=list) # Query vectors
|
||||
limit: int = 10 # Max results to return
|
||||
user: str = "" # User/keyspace
|
||||
collection: str = "" # Collection name
|
||||
schema_name: str = "" # Schema name to search within
|
||||
index_name: str | None = None # Optional: filter to specific index
|
||||
|
||||
@dataclass
|
||||
class RowEmbeddingsResponse:
|
||||
"""Response from row embeddings semantic search"""
|
||||
error: Error | None = None
|
||||
matches: list[RowIndexMatch] = field(default_factory=list)
|
||||
|
||||
row_embeddings_request_queue = topic(
|
||||
"row-embeddings-request", qos='q0', tenant='trustgraph', namespace='flow'
|
||||
)
|
||||
row_embeddings_response_queue = topic(
|
||||
"row-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
|
||||
)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
from dataclasses import dataclass
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error, Value
|
||||
from ..core.primitives import Error, Term
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from ..core.topic import topic
|
|||
|
||||
############################################################################
|
||||
|
||||
# Objects Query Service - executes GraphQL queries against structured data
|
||||
# Rows Query Service - executes GraphQL queries against structured data
|
||||
|
||||
@dataclass
|
||||
class GraphQLError:
|
||||
|
|
@ -15,7 +15,7 @@ class GraphQLError:
|
|||
extensions: dict[str, str] = field(default_factory=dict) # Additional error metadata
|
||||
|
||||
@dataclass
|
||||
class ObjectsQueryRequest:
|
||||
class RowsQueryRequest:
|
||||
user: str = "" # Cassandra keyspace (follows pattern from TriplesQueryRequest)
|
||||
collection: str = "" # Data collection identifier (required for partition key)
|
||||
query: str = "" # GraphQL query string
|
||||
|
|
@ -23,7 +23,7 @@ class ObjectsQueryRequest:
|
|||
operation_name: Optional[str] = None # Operation to execute for multi-operation documents
|
||||
|
||||
@dataclass
|
||||
class ObjectsQueryResponse:
|
||||
class RowsQueryResponse:
|
||||
error: Error | None = None # System-level error (connection, timeout, etc.)
|
||||
data: str = "" # JSON-encoded GraphQL response data
|
||||
errors: list[GraphQLError] = field(default_factory=list) # GraphQL field-level errors
|
||||
Loading…
Add table
Add a link
Reference in a new issue