Structured data 2 (#645)

* Structured data refactor - multi-index tables, remove need for manual mods to the Cassandra tables

* Tech spec updated to track implementation
This commit is contained in:
cybermaggedon 2026-02-23 15:56:29 +00:00 committed by GitHub
parent 5ffad92345
commit 1809c1f56d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
87 changed files with 5233 additions and 3235 deletions

View file

@ -9,7 +9,7 @@ from .library import *
from .lookup import *
from .nlp_query import *
from .structured_query import *
from .objects_query import *
from .rows_query import *
from .diagnosis import *
from .collection import *
from .storage import *

View file

@ -59,4 +59,39 @@ document_embeddings_request_queue = topic(
)
document_embeddings_response_queue = topic(
"document-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
)
############################################################################
# Row embeddings query - for semantic/fuzzy matching on row index values
@dataclass
class RowIndexMatch:
"""A single matching row index from a semantic search"""
index_name: str = "" # The indexed field(s)
index_value: list[str] = field(default_factory=list) # The index values
text: str = "" # The text that was embedded
score: float = 0.0 # Similarity score
@dataclass
class RowEmbeddingsRequest:
"""Request for row embeddings semantic search"""
vectors: list[list[float]] = field(default_factory=list) # Query vectors
limit: int = 10 # Max results to return
user: str = "" # User/keyspace
collection: str = "" # Collection name
schema_name: str = "" # Schema name to search within
index_name: str | None = None # Optional: filter to specific index
@dataclass
class RowEmbeddingsResponse:
"""Response from row embeddings semantic search"""
error: Error | None = None
matches: list[RowIndexMatch] = field(default_factory=list)
row_embeddings_request_queue = topic(
"row-embeddings-request", qos='q0', tenant='trustgraph', namespace='flow'
)
row_embeddings_response_queue = topic(
"row-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
)

View file

@ -6,7 +6,7 @@ from ..core.topic import topic
############################################################################
# Objects Query Service - executes GraphQL queries against structured data
# Rows Query Service - executes GraphQL queries against structured data
@dataclass
class GraphQLError:
@ -15,7 +15,7 @@ class GraphQLError:
extensions: dict[str, str] = field(default_factory=dict) # Additional error metadata
@dataclass
class ObjectsQueryRequest:
class RowsQueryRequest:
user: str = "" # Cassandra keyspace (follows pattern from TriplesQueryRequest)
collection: str = "" # Data collection identifier (required for partition key)
query: str = "" # GraphQL query string
@ -23,7 +23,7 @@ class ObjectsQueryRequest:
operation_name: Optional[str] = None # Operation to execute for multi-operation documents
@dataclass
class ObjectsQueryResponse:
class RowsQueryResponse:
error: Error | None = None # System-level error (connection, timeout, etc.)
data: str = "" # JSON-encoded GraphQL response data
errors: list[GraphQLError] = field(default_factory=list) # GraphQL field-level errors