mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-18 03:45:12 +02:00
Structured data 2 (#645)
* Structured data refactor - multi-index tables, remove need for manual mods to the Cassandra tables * Tech spec updated to track implementation
This commit is contained in:
parent
5ffad92345
commit
1809c1f56d
87 changed files with 5233 additions and 3235 deletions
|
|
@ -60,3 +60,23 @@ class StructuredObjectEmbedding:
|
|||
field_embeddings: dict[str, list[float]] = field(default_factory=dict) # Per-field embeddings
|
||||
|
||||
############################################################################
|
||||
|
||||
# Row embeddings are embeddings associated with indexed field values
|
||||
# in structured row data. Each index gets embedded separately.
|
||||
|
||||
@dataclass
|
||||
class RowIndexEmbedding:
|
||||
"""Single row's embedding for one index"""
|
||||
index_name: str = "" # The indexed field name(s)
|
||||
index_value: list[str] = field(default_factory=list) # The field value(s)
|
||||
text: str = "" # Text that was embedded
|
||||
vectors: list[list[float]] = field(default_factory=list)
|
||||
|
||||
@dataclass
|
||||
class RowEmbeddings:
|
||||
"""Batched row embeddings for a schema"""
|
||||
metadata: Metadata | None = None
|
||||
schema_name: str = ""
|
||||
embeddings: list[RowIndexEmbedding] = field(default_factory=list)
|
||||
|
||||
############################################################################
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from .library import *
|
|||
from .lookup import *
|
||||
from .nlp_query import *
|
||||
from .structured_query import *
|
||||
from .objects_query import *
|
||||
from .rows_query import *
|
||||
from .diagnosis import *
|
||||
from .collection import *
|
||||
from .storage import *
|
||||
|
|
@ -59,4 +59,39 @@ document_embeddings_request_queue = topic(
|
|||
)
|
||||
document_embeddings_response_queue = topic(
|
||||
"document-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
|
||||
)
|
||||
|
||||
############################################################################
|
||||
|
||||
# Row embeddings query - for semantic/fuzzy matching on row index values
|
||||
|
||||
@dataclass
|
||||
class RowIndexMatch:
|
||||
"""A single matching row index from a semantic search"""
|
||||
index_name: str = "" # The indexed field(s)
|
||||
index_value: list[str] = field(default_factory=list) # The index values
|
||||
text: str = "" # The text that was embedded
|
||||
score: float = 0.0 # Similarity score
|
||||
|
||||
@dataclass
|
||||
class RowEmbeddingsRequest:
|
||||
"""Request for row embeddings semantic search"""
|
||||
vectors: list[list[float]] = field(default_factory=list) # Query vectors
|
||||
limit: int = 10 # Max results to return
|
||||
user: str = "" # User/keyspace
|
||||
collection: str = "" # Collection name
|
||||
schema_name: str = "" # Schema name to search within
|
||||
index_name: str | None = None # Optional: filter to specific index
|
||||
|
||||
@dataclass
|
||||
class RowEmbeddingsResponse:
|
||||
"""Response from row embeddings semantic search"""
|
||||
error: Error | None = None
|
||||
matches: list[RowIndexMatch] = field(default_factory=list)
|
||||
|
||||
row_embeddings_request_queue = topic(
|
||||
"row-embeddings-request", qos='q0', tenant='trustgraph', namespace='flow'
|
||||
)
|
||||
row_embeddings_response_queue = topic(
|
||||
"row-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
|
||||
)
|
||||
|
|
@ -6,7 +6,7 @@ from ..core.topic import topic
|
|||
|
||||
############################################################################
|
||||
|
||||
# Objects Query Service - executes GraphQL queries against structured data
|
||||
# Rows Query Service - executes GraphQL queries against structured data
|
||||
|
||||
@dataclass
|
||||
class GraphQLError:
|
||||
|
|
@ -15,7 +15,7 @@ class GraphQLError:
|
|||
extensions: dict[str, str] = field(default_factory=dict) # Additional error metadata
|
||||
|
||||
@dataclass
|
||||
class ObjectsQueryRequest:
|
||||
class RowsQueryRequest:
|
||||
user: str = "" # Cassandra keyspace (follows pattern from TriplesQueryRequest)
|
||||
collection: str = "" # Data collection identifier (required for partition key)
|
||||
query: str = "" # GraphQL query string
|
||||
|
|
@ -23,7 +23,7 @@ class ObjectsQueryRequest:
|
|||
operation_name: Optional[str] = None # Operation to execute for multi-operation documents
|
||||
|
||||
@dataclass
|
||||
class ObjectsQueryResponse:
|
||||
class RowsQueryResponse:
|
||||
error: Error | None = None # System-level error (connection, timeout, etc.)
|
||||
data: str = "" # JSON-encoded GraphQL response data
|
||||
errors: list[GraphQLError] = field(default_factory=list) # GraphQL field-level errors
|
||||
Loading…
Add table
Add a link
Reference in a new issue