mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-05 13:22:37 +02:00
Structured data 2 (#645)
* Structured data refactor - multi-index tables, remove need for manual mods to the Cassandra tables * Tech spec updated to track implementation
This commit is contained in:
parent
5ffad92345
commit
1809c1f56d
87 changed files with 5233 additions and 3235 deletions
|
|
@ -101,7 +101,7 @@ from .exceptions import (
|
|||
LoadError,
|
||||
LookupError,
|
||||
NLPQueryError,
|
||||
ObjectsQueryError,
|
||||
RowsQueryError,
|
||||
RequestError,
|
||||
StructuredQueryError,
|
||||
UnexpectedError,
|
||||
|
|
@ -161,7 +161,7 @@ __all__ = [
|
|||
"LoadError",
|
||||
"LookupError",
|
||||
"NLPQueryError",
|
||||
"ObjectsQueryError",
|
||||
"RowsQueryError",
|
||||
"RequestError",
|
||||
"StructuredQueryError",
|
||||
"UnexpectedError",
|
||||
|
|
|
|||
|
|
@ -115,15 +115,15 @@ class AsyncBulkClient:
|
|||
async for raw_message in websocket:
|
||||
yield json.loads(raw_message)
|
||||
|
||||
async def import_objects(self, flow: str, objects: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None:
|
||||
"""Bulk import objects via WebSocket"""
|
||||
ws_url = f"{self.url}/api/v1/flow/{flow}/import/objects"
|
||||
async def import_rows(self, flow: str, rows: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None:
|
||||
"""Bulk import rows via WebSocket"""
|
||||
ws_url = f"{self.url}/api/v1/flow/{flow}/import/rows"
|
||||
if self.token:
|
||||
ws_url = f"{ws_url}?token={self.token}"
|
||||
|
||||
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
|
||||
async for obj in objects:
|
||||
await websocket.send(json.dumps(obj))
|
||||
async for row in rows:
|
||||
await websocket.send(json.dumps(row))
|
||||
|
||||
async def aclose(self) -> None:
|
||||
"""Close connections"""
|
||||
|
|
|
|||
|
|
@ -708,18 +708,18 @@ class AsyncFlowInstance:
|
|||
|
||||
return await self.request("triples", request_data)
|
||||
|
||||
async def objects_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
|
||||
operation_name: Optional[str] = None, **kwargs: Any):
|
||||
async def rows_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
|
||||
operation_name: Optional[str] = None, **kwargs: Any):
|
||||
"""
|
||||
Execute a GraphQL query on stored objects.
|
||||
Execute a GraphQL query on stored rows.
|
||||
|
||||
Queries structured data objects using GraphQL syntax. Supports complex
|
||||
Queries structured data rows using GraphQL syntax. Supports complex
|
||||
queries with variables and named operations.
|
||||
|
||||
Args:
|
||||
query: GraphQL query string
|
||||
user: User identifier
|
||||
collection: Collection identifier containing objects
|
||||
collection: Collection identifier containing rows
|
||||
variables: Optional GraphQL query variables
|
||||
operation_name: Optional operation name for multi-operation queries
|
||||
**kwargs: Additional service-specific parameters
|
||||
|
|
@ -743,7 +743,7 @@ class AsyncFlowInstance:
|
|||
}
|
||||
'''
|
||||
|
||||
result = await flow.objects_query(
|
||||
result = await flow.rows_query(
|
||||
query=query,
|
||||
user="trustgraph",
|
||||
collection="users",
|
||||
|
|
@ -765,4 +765,4 @@ class AsyncFlowInstance:
|
|||
request_data["operationName"] = operation_name
|
||||
request_data.update(kwargs)
|
||||
|
||||
return await self.request("objects", request_data)
|
||||
return await self.request("rows", request_data)
|
||||
|
|
|
|||
|
|
@ -320,9 +320,9 @@ class AsyncSocketFlowInstance:
|
|||
|
||||
return await self.client._send_request("triples", self.flow_id, request)
|
||||
|
||||
async def objects_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
|
||||
operation_name: Optional[str] = None, **kwargs):
|
||||
"""GraphQL query"""
|
||||
async def rows_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
|
||||
operation_name: Optional[str] = None, **kwargs):
|
||||
"""GraphQL query against structured rows"""
|
||||
request = {
|
||||
"query": query,
|
||||
"user": user,
|
||||
|
|
@ -334,7 +334,7 @@ class AsyncSocketFlowInstance:
|
|||
request["operationName"] = operation_name
|
||||
request.update(kwargs)
|
||||
|
||||
return await self.client._send_request("objects", self.flow_id, request)
|
||||
return await self.client._send_request("rows", self.flow_id, request)
|
||||
|
||||
async def mcp_tool(self, name: str, parameters: Dict[str, Any], **kwargs):
|
||||
"""Execute MCP tool"""
|
||||
|
|
|
|||
|
|
@ -530,45 +530,45 @@ class BulkClient:
|
|||
async for raw_message in websocket:
|
||||
yield json.loads(raw_message)
|
||||
|
||||
def import_objects(self, flow: str, objects: Iterator[Dict[str, Any]], **kwargs: Any) -> None:
|
||||
def import_rows(self, flow: str, rows: Iterator[Dict[str, Any]], **kwargs: Any) -> None:
|
||||
"""
|
||||
Bulk import structured objects into a flow.
|
||||
Bulk import structured rows into a flow.
|
||||
|
||||
Efficiently uploads structured data objects via WebSocket streaming
|
||||
Efficiently uploads structured data rows via WebSocket streaming
|
||||
for use in GraphQL queries.
|
||||
|
||||
Args:
|
||||
flow: Flow identifier
|
||||
objects: Iterator yielding object dictionaries
|
||||
rows: Iterator yielding row dictionaries
|
||||
**kwargs: Additional parameters (reserved for future use)
|
||||
|
||||
Example:
|
||||
```python
|
||||
bulk = api.bulk()
|
||||
|
||||
# Generate objects to import
|
||||
def object_generator():
|
||||
yield {"id": "obj1", "name": "Object 1", "value": 100}
|
||||
yield {"id": "obj2", "name": "Object 2", "value": 200}
|
||||
# ... more objects
|
||||
# Generate rows to import
|
||||
def row_generator():
|
||||
yield {"id": "row1", "name": "Row 1", "value": 100}
|
||||
yield {"id": "row2", "name": "Row 2", "value": 200}
|
||||
# ... more rows
|
||||
|
||||
bulk.import_objects(
|
||||
bulk.import_rows(
|
||||
flow="default",
|
||||
objects=object_generator()
|
||||
rows=row_generator()
|
||||
)
|
||||
```
|
||||
"""
|
||||
self._run_async(self._import_objects_async(flow, objects))
|
||||
self._run_async(self._import_rows_async(flow, rows))
|
||||
|
||||
async def _import_objects_async(self, flow: str, objects: Iterator[Dict[str, Any]]) -> None:
|
||||
"""Async implementation of objects import"""
|
||||
ws_url = f"{self.url}/api/v1/flow/{flow}/import/objects"
|
||||
async def _import_rows_async(self, flow: str, rows: Iterator[Dict[str, Any]]) -> None:
|
||||
"""Async implementation of rows import"""
|
||||
ws_url = f"{self.url}/api/v1/flow/{flow}/import/rows"
|
||||
if self.token:
|
||||
ws_url = f"{ws_url}?token={self.token}"
|
||||
|
||||
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
|
||||
for obj in objects:
|
||||
await websocket.send(json.dumps(obj))
|
||||
for row in rows:
|
||||
await websocket.send(json.dumps(row))
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close connections"""
|
||||
|
|
|
|||
|
|
@ -71,8 +71,8 @@ class NLPQueryError(TrustGraphException):
|
|||
pass
|
||||
|
||||
|
||||
class ObjectsQueryError(TrustGraphException):
|
||||
"""Objects query service error"""
|
||||
class RowsQueryError(TrustGraphException):
|
||||
"""Rows query service error"""
|
||||
pass
|
||||
|
||||
|
||||
|
|
@ -103,7 +103,7 @@ ERROR_TYPE_MAPPING = {
|
|||
"load-error": LoadError,
|
||||
"lookup-error": LookupError,
|
||||
"nlp-query-error": NLPQueryError,
|
||||
"objects-query-error": ObjectsQueryError,
|
||||
"rows-query-error": RowsQueryError,
|
||||
"request-error": RequestError,
|
||||
"structured-query-error": StructuredQueryError,
|
||||
"unexpected-error": UnexpectedError,
|
||||
|
|
|
|||
|
|
@ -1001,12 +1001,12 @@ class FlowInstance:
|
|||
input
|
||||
)
|
||||
|
||||
def objects_query(
|
||||
def rows_query(
|
||||
self, query, user="trustgraph", collection="default",
|
||||
variables=None, operation_name=None
|
||||
):
|
||||
"""
|
||||
Execute a GraphQL query against structured objects in the knowledge graph.
|
||||
Execute a GraphQL query against structured rows in the knowledge graph.
|
||||
|
||||
Queries structured data using GraphQL syntax, allowing complex queries
|
||||
with filtering, aggregation, and relationship traversal.
|
||||
|
|
@ -1038,7 +1038,7 @@ class FlowInstance:
|
|||
}
|
||||
}
|
||||
'''
|
||||
result = flow.objects_query(
|
||||
result = flow.rows_query(
|
||||
query=query,
|
||||
user="trustgraph",
|
||||
collection="scientists"
|
||||
|
|
@ -1053,7 +1053,7 @@ class FlowInstance:
|
|||
}
|
||||
}
|
||||
'''
|
||||
result = flow.objects_query(
|
||||
result = flow.rows_query(
|
||||
query=query,
|
||||
variables={"name": "Marie Curie"}
|
||||
)
|
||||
|
|
@ -1074,7 +1074,7 @@ class FlowInstance:
|
|||
input["operation_name"] = operation_name
|
||||
|
||||
response = self.request(
|
||||
"service/objects",
|
||||
"service/rows",
|
||||
input
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -789,7 +789,7 @@ class SocketFlowInstance:
|
|||
|
||||
return self.client._send_request_sync("triples", self.flow_id, request, False)
|
||||
|
||||
def objects_query(
|
||||
def rows_query(
|
||||
self,
|
||||
query: str,
|
||||
user: str,
|
||||
|
|
@ -799,7 +799,7 @@ class SocketFlowInstance:
|
|||
**kwargs: Any
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute a GraphQL query against structured objects.
|
||||
Execute a GraphQL query against structured rows.
|
||||
|
||||
Args:
|
||||
query: GraphQL query string
|
||||
|
|
@ -826,7 +826,7 @@ class SocketFlowInstance:
|
|||
}
|
||||
}
|
||||
'''
|
||||
result = flow.objects_query(
|
||||
result = flow.rows_query(
|
||||
query=query,
|
||||
user="trustgraph",
|
||||
collection="scientists"
|
||||
|
|
@ -844,7 +844,7 @@ class SocketFlowInstance:
|
|||
request["operationName"] = operation_name
|
||||
request.update(kwargs)
|
||||
|
||||
return self.client._send_request_sync("objects", self.flow_id, request, False)
|
||||
return self.client._send_request_sync("rows", self.flow_id, request, False)
|
||||
|
||||
def mcp_tool(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from .translators.embeddings_query import (
|
|||
DocumentEmbeddingsRequestTranslator, DocumentEmbeddingsResponseTranslator,
|
||||
GraphEmbeddingsRequestTranslator, GraphEmbeddingsResponseTranslator
|
||||
)
|
||||
from .translators.objects_query import ObjectsQueryRequestTranslator, ObjectsQueryResponseTranslator
|
||||
from .translators.rows_query import RowsQueryRequestTranslator, RowsQueryResponseTranslator
|
||||
from .translators.nlp_query import QuestionToStructuredQueryRequestTranslator, QuestionToStructuredQueryResponseTranslator
|
||||
from .translators.structured_query import StructuredQueryRequestTranslator, StructuredQueryResponseTranslator
|
||||
from .translators.diagnosis import StructuredDataDiagnosisRequestTranslator, StructuredDataDiagnosisResponseTranslator
|
||||
|
|
@ -113,9 +113,9 @@ TranslatorRegistry.register_service(
|
|||
)
|
||||
|
||||
TranslatorRegistry.register_service(
|
||||
"objects-query",
|
||||
ObjectsQueryRequestTranslator(),
|
||||
ObjectsQueryResponseTranslator()
|
||||
"rows-query",
|
||||
RowsQueryRequestTranslator(),
|
||||
RowsQueryResponseTranslator()
|
||||
)
|
||||
|
||||
TranslatorRegistry.register_service(
|
||||
|
|
|
|||
|
|
@ -17,5 +17,5 @@ from .embeddings_query import (
|
|||
DocumentEmbeddingsRequestTranslator, DocumentEmbeddingsResponseTranslator,
|
||||
GraphEmbeddingsRequestTranslator, GraphEmbeddingsResponseTranslator
|
||||
)
|
||||
from .objects_query import ObjectsQueryRequestTranslator, ObjectsQueryResponseTranslator
|
||||
from .rows_query import RowsQueryRequestTranslator, RowsQueryResponseTranslator
|
||||
from .diagnosis import StructuredDataDiagnosisRequestTranslator, StructuredDataDiagnosisResponseTranslator
|
||||
|
|
|
|||
|
|
@ -1,44 +1,44 @@
|
|||
from typing import Dict, Any, Tuple, Optional
|
||||
from ...schema import ObjectsQueryRequest, ObjectsQueryResponse
|
||||
from ...schema import RowsQueryRequest, RowsQueryResponse
|
||||
from .base import MessageTranslator
|
||||
import json
|
||||
|
||||
|
||||
class ObjectsQueryRequestTranslator(MessageTranslator):
|
||||
"""Translator for ObjectsQueryRequest schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> ObjectsQueryRequest:
|
||||
return ObjectsQueryRequest(
|
||||
class RowsQueryRequestTranslator(MessageTranslator):
|
||||
"""Translator for RowsQueryRequest schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> RowsQueryRequest:
|
||||
return RowsQueryRequest(
|
||||
user=data.get("user", "trustgraph"),
|
||||
collection=data.get("collection", "default"),
|
||||
query=data.get("query", ""),
|
||||
variables=data.get("variables", {}),
|
||||
operation_name=data.get("operation_name", None)
|
||||
)
|
||||
|
||||
def from_pulsar(self, obj: ObjectsQueryRequest) -> Dict[str, Any]:
|
||||
|
||||
def from_pulsar(self, obj: RowsQueryRequest) -> Dict[str, Any]:
|
||||
result = {
|
||||
"user": obj.user,
|
||||
"collection": obj.collection,
|
||||
"query": obj.query,
|
||||
"variables": dict(obj.variables) if obj.variables else {}
|
||||
}
|
||||
|
||||
|
||||
if obj.operation_name:
|
||||
result["operation_name"] = obj.operation_name
|
||||
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class ObjectsQueryResponseTranslator(MessageTranslator):
|
||||
"""Translator for ObjectsQueryResponse schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> ObjectsQueryResponse:
|
||||
class RowsQueryResponseTranslator(MessageTranslator):
|
||||
"""Translator for RowsQueryResponse schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> RowsQueryResponse:
|
||||
raise NotImplementedError("Response translation to Pulsar not typically needed")
|
||||
|
||||
def from_pulsar(self, obj: ObjectsQueryResponse) -> Dict[str, Any]:
|
||||
|
||||
def from_pulsar(self, obj: RowsQueryResponse) -> Dict[str, Any]:
|
||||
result = {}
|
||||
|
||||
|
||||
# Handle GraphQL response data
|
||||
if obj.data:
|
||||
try:
|
||||
|
|
@ -47,7 +47,7 @@ class ObjectsQueryResponseTranslator(MessageTranslator):
|
|||
result["data"] = obj.data
|
||||
else:
|
||||
result["data"] = None
|
||||
|
||||
|
||||
# Handle GraphQL errors
|
||||
if obj.errors:
|
||||
result["errors"] = []
|
||||
|
|
@ -60,20 +60,20 @@ class ObjectsQueryResponseTranslator(MessageTranslator):
|
|||
if error.extensions:
|
||||
error_dict["extensions"] = dict(error.extensions)
|
||||
result["errors"].append(error_dict)
|
||||
|
||||
|
||||
# Handle extensions
|
||||
if obj.extensions:
|
||||
result["extensions"] = dict(obj.extensions)
|
||||
|
||||
|
||||
# Handle system-level error
|
||||
if obj.error:
|
||||
result["error"] = {
|
||||
"type": obj.error.type,
|
||||
"message": obj.error.message
|
||||
}
|
||||
|
||||
|
||||
return result
|
||||
|
||||
def from_response_with_completion(self, obj: ObjectsQueryResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
|
||||
def from_response_with_completion(self, obj: RowsQueryResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
"""Returns (response_dict, is_final)"""
|
||||
return self.from_pulsar(obj), True
|
||||
return self.from_pulsar(obj), True
|
||||
|
|
@ -60,3 +60,23 @@ class StructuredObjectEmbedding:
|
|||
field_embeddings: dict[str, list[float]] = field(default_factory=dict) # Per-field embeddings
|
||||
|
||||
############################################################################
|
||||
|
||||
# Row embeddings are embeddings associated with indexed field values
|
||||
# in structured row data. Each index gets embedded separately.
|
||||
|
||||
@dataclass
|
||||
class RowIndexEmbedding:
|
||||
"""Single row's embedding for one index"""
|
||||
index_name: str = "" # The indexed field name(s)
|
||||
index_value: list[str] = field(default_factory=list) # The field value(s)
|
||||
text: str = "" # Text that was embedded
|
||||
vectors: list[list[float]] = field(default_factory=list)
|
||||
|
||||
@dataclass
|
||||
class RowEmbeddings:
|
||||
"""Batched row embeddings for a schema"""
|
||||
metadata: Metadata | None = None
|
||||
schema_name: str = ""
|
||||
embeddings: list[RowIndexEmbedding] = field(default_factory=list)
|
||||
|
||||
############################################################################
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from .library import *
|
|||
from .lookup import *
|
||||
from .nlp_query import *
|
||||
from .structured_query import *
|
||||
from .objects_query import *
|
||||
from .rows_query import *
|
||||
from .diagnosis import *
|
||||
from .collection import *
|
||||
from .storage import *
|
||||
|
|
@ -59,4 +59,39 @@ document_embeddings_request_queue = topic(
|
|||
)
|
||||
document_embeddings_response_queue = topic(
|
||||
"document-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
|
||||
)
|
||||
|
||||
############################################################################
|
||||
|
||||
# Row embeddings query - for semantic/fuzzy matching on row index values
|
||||
|
||||
@dataclass
|
||||
class RowIndexMatch:
|
||||
"""A single matching row index from a semantic search"""
|
||||
index_name: str = "" # The indexed field(s)
|
||||
index_value: list[str] = field(default_factory=list) # The index values
|
||||
text: str = "" # The text that was embedded
|
||||
score: float = 0.0 # Similarity score
|
||||
|
||||
@dataclass
|
||||
class RowEmbeddingsRequest:
|
||||
"""Request for row embeddings semantic search"""
|
||||
vectors: list[list[float]] = field(default_factory=list) # Query vectors
|
||||
limit: int = 10 # Max results to return
|
||||
user: str = "" # User/keyspace
|
||||
collection: str = "" # Collection name
|
||||
schema_name: str = "" # Schema name to search within
|
||||
index_name: str | None = None # Optional: filter to specific index
|
||||
|
||||
@dataclass
|
||||
class RowEmbeddingsResponse:
|
||||
"""Response from row embeddings semantic search"""
|
||||
error: Error | None = None
|
||||
matches: list[RowIndexMatch] = field(default_factory=list)
|
||||
|
||||
row_embeddings_request_queue = topic(
|
||||
"row-embeddings-request", qos='q0', tenant='trustgraph', namespace='flow'
|
||||
)
|
||||
row_embeddings_response_queue = topic(
|
||||
"row-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
|
||||
)
|
||||
|
|
@ -6,7 +6,7 @@ from ..core.topic import topic
|
|||
|
||||
############################################################################
|
||||
|
||||
# Objects Query Service - executes GraphQL queries against structured data
|
||||
# Rows Query Service - executes GraphQL queries against structured data
|
||||
|
||||
@dataclass
|
||||
class GraphQLError:
|
||||
|
|
@ -15,7 +15,7 @@ class GraphQLError:
|
|||
extensions: dict[str, str] = field(default_factory=dict) # Additional error metadata
|
||||
|
||||
@dataclass
|
||||
class ObjectsQueryRequest:
|
||||
class RowsQueryRequest:
|
||||
user: str = "" # Cassandra keyspace (follows pattern from TriplesQueryRequest)
|
||||
collection: str = "" # Data collection identifier (required for partition key)
|
||||
query: str = "" # GraphQL query string
|
||||
|
|
@ -23,7 +23,7 @@ class ObjectsQueryRequest:
|
|||
operation_name: Optional[str] = None # Operation to execute for multi-operation documents
|
||||
|
||||
@dataclass
|
||||
class ObjectsQueryResponse:
|
||||
class RowsQueryResponse:
|
||||
error: Error | None = None # System-level error (connection, timeout, etc.)
|
||||
data: str = "" # JSON-encoded GraphQL response data
|
||||
errors: list[GraphQLError] = field(default_factory=list) # GraphQL field-level errors
|
||||
Loading…
Add table
Add a link
Reference in a new issue