Structured data 2 (#645)

* Structured data refactor - multi-index tables, remove need for manual mods to the Cassandra tables

* Tech spec updated to track implementation
This commit is contained in:
cybermaggedon 2026-02-23 15:56:29 +00:00 committed by GitHub
parent 5ffad92345
commit 1809c1f56d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
87 changed files with 5233 additions and 3235 deletions

View file

@ -101,7 +101,7 @@ from .exceptions import (
LoadError,
LookupError,
NLPQueryError,
ObjectsQueryError,
RowsQueryError,
RequestError,
StructuredQueryError,
UnexpectedError,
@ -161,7 +161,7 @@ __all__ = [
"LoadError",
"LookupError",
"NLPQueryError",
"ObjectsQueryError",
"RowsQueryError",
"RequestError",
"StructuredQueryError",
"UnexpectedError",

View file

@ -115,15 +115,15 @@ class AsyncBulkClient:
async for raw_message in websocket:
yield json.loads(raw_message)
async def import_objects(self, flow: str, objects: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None:
"""Bulk import objects via WebSocket"""
ws_url = f"{self.url}/api/v1/flow/{flow}/import/objects"
async def import_rows(self, flow: str, rows: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None:
"""Bulk import rows via WebSocket"""
ws_url = f"{self.url}/api/v1/flow/{flow}/import/rows"
if self.token:
ws_url = f"{ws_url}?token={self.token}"
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
async for obj in objects:
await websocket.send(json.dumps(obj))
async for row in rows:
await websocket.send(json.dumps(row))
async def aclose(self) -> None:
"""Close connections"""

View file

@ -708,18 +708,18 @@ class AsyncFlowInstance:
return await self.request("triples", request_data)
async def objects_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
operation_name: Optional[str] = None, **kwargs: Any):
async def rows_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
operation_name: Optional[str] = None, **kwargs: Any):
"""
Execute a GraphQL query on stored objects.
Execute a GraphQL query on stored rows.
Queries structured data objects using GraphQL syntax. Supports complex
Queries structured data rows using GraphQL syntax. Supports complex
queries with variables and named operations.
Args:
query: GraphQL query string
user: User identifier
collection: Collection identifier containing objects
collection: Collection identifier containing rows
variables: Optional GraphQL query variables
operation_name: Optional operation name for multi-operation queries
**kwargs: Additional service-specific parameters
@ -743,7 +743,7 @@ class AsyncFlowInstance:
}
'''
result = await flow.objects_query(
result = await flow.rows_query(
query=query,
user="trustgraph",
collection="users",
@ -765,4 +765,4 @@ class AsyncFlowInstance:
request_data["operationName"] = operation_name
request_data.update(kwargs)
return await self.request("objects", request_data)
return await self.request("rows", request_data)

View file

@ -320,9 +320,9 @@ class AsyncSocketFlowInstance:
return await self.client._send_request("triples", self.flow_id, request)
async def objects_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
operation_name: Optional[str] = None, **kwargs):
"""GraphQL query"""
async def rows_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
operation_name: Optional[str] = None, **kwargs):
"""GraphQL query against structured rows"""
request = {
"query": query,
"user": user,
@ -334,7 +334,7 @@ class AsyncSocketFlowInstance:
request["operationName"] = operation_name
request.update(kwargs)
return await self.client._send_request("objects", self.flow_id, request)
return await self.client._send_request("rows", self.flow_id, request)
async def mcp_tool(self, name: str, parameters: Dict[str, Any], **kwargs):
"""Execute MCP tool"""

View file

@ -530,45 +530,45 @@ class BulkClient:
async for raw_message in websocket:
yield json.loads(raw_message)
def import_objects(self, flow: str, objects: Iterator[Dict[str, Any]], **kwargs: Any) -> None:
def import_rows(self, flow: str, rows: Iterator[Dict[str, Any]], **kwargs: Any) -> None:
"""
Bulk import structured objects into a flow.
Bulk import structured rows into a flow.
Efficiently uploads structured data objects via WebSocket streaming
Efficiently uploads structured data rows via WebSocket streaming
for use in GraphQL queries.
Args:
flow: Flow identifier
objects: Iterator yielding object dictionaries
rows: Iterator yielding row dictionaries
**kwargs: Additional parameters (reserved for future use)
Example:
```python
bulk = api.bulk()
# Generate objects to import
def object_generator():
yield {"id": "obj1", "name": "Object 1", "value": 100}
yield {"id": "obj2", "name": "Object 2", "value": 200}
# ... more objects
# Generate rows to import
def row_generator():
yield {"id": "row1", "name": "Row 1", "value": 100}
yield {"id": "row2", "name": "Row 2", "value": 200}
# ... more rows
bulk.import_objects(
bulk.import_rows(
flow="default",
objects=object_generator()
rows=row_generator()
)
```
"""
self._run_async(self._import_objects_async(flow, objects))
self._run_async(self._import_rows_async(flow, rows))
async def _import_objects_async(self, flow: str, objects: Iterator[Dict[str, Any]]) -> None:
"""Async implementation of objects import"""
ws_url = f"{self.url}/api/v1/flow/{flow}/import/objects"
async def _import_rows_async(self, flow: str, rows: Iterator[Dict[str, Any]]) -> None:
"""Async implementation of rows import"""
ws_url = f"{self.url}/api/v1/flow/{flow}/import/rows"
if self.token:
ws_url = f"{ws_url}?token={self.token}"
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
for obj in objects:
await websocket.send(json.dumps(obj))
for row in rows:
await websocket.send(json.dumps(row))
def close(self) -> None:
"""Close connections"""

View file

@ -71,8 +71,8 @@ class NLPQueryError(TrustGraphException):
pass
class ObjectsQueryError(TrustGraphException):
"""Objects query service error"""
class RowsQueryError(TrustGraphException):
"""Rows query service error"""
pass
@ -103,7 +103,7 @@ ERROR_TYPE_MAPPING = {
"load-error": LoadError,
"lookup-error": LookupError,
"nlp-query-error": NLPQueryError,
"objects-query-error": ObjectsQueryError,
"rows-query-error": RowsQueryError,
"request-error": RequestError,
"structured-query-error": StructuredQueryError,
"unexpected-error": UnexpectedError,

View file

@ -1001,12 +1001,12 @@ class FlowInstance:
input
)
def objects_query(
def rows_query(
self, query, user="trustgraph", collection="default",
variables=None, operation_name=None
):
"""
Execute a GraphQL query against structured objects in the knowledge graph.
Execute a GraphQL query against structured rows in the knowledge graph.
Queries structured data using GraphQL syntax, allowing complex queries
with filtering, aggregation, and relationship traversal.
@ -1038,7 +1038,7 @@ class FlowInstance:
}
}
'''
result = flow.objects_query(
result = flow.rows_query(
query=query,
user="trustgraph",
collection="scientists"
@ -1053,7 +1053,7 @@ class FlowInstance:
}
}
'''
result = flow.objects_query(
result = flow.rows_query(
query=query,
variables={"name": "Marie Curie"}
)
@ -1074,7 +1074,7 @@ class FlowInstance:
input["operation_name"] = operation_name
response = self.request(
"service/objects",
"service/rows",
input
)

View file

@ -789,7 +789,7 @@ class SocketFlowInstance:
return self.client._send_request_sync("triples", self.flow_id, request, False)
def objects_query(
def rows_query(
self,
query: str,
user: str,
@ -799,7 +799,7 @@ class SocketFlowInstance:
**kwargs: Any
) -> Dict[str, Any]:
"""
Execute a GraphQL query against structured objects.
Execute a GraphQL query against structured rows.
Args:
query: GraphQL query string
@ -826,7 +826,7 @@ class SocketFlowInstance:
}
}
'''
result = flow.objects_query(
result = flow.rows_query(
query=query,
user="trustgraph",
collection="scientists"
@ -844,7 +844,7 @@ class SocketFlowInstance:
request["operationName"] = operation_name
request.update(kwargs)
return self.client._send_request_sync("objects", self.flow_id, request, False)
return self.client._send_request_sync("rows", self.flow_id, request, False)
def mcp_tool(
self,