trustgraph/trustgraph-base/trustgraph/schema/services/query.py
cybermaggedon 24bbe94136
Document chunks not stored in vector store (#665)
- Schema - ChunkEmbeddings now uses chunk_id: str instead of chunk: bytes
- Schema - DocumentEmbeddingsResponse now returns chunk_ids: list[str]
  instead of chunks
- Translators - Updated to serialize/deserialize chunk_id
- Clients - DocumentEmbeddingsClient.query() returns chunk_ids
- SDK/API - flow.py, socket_client.py, bulk_client.py updated
- Document embeddings service - Stores chunk_id (document ID) instead
  of chunk text
- Storage writers - Qdrant, Milvus, Pinecone store chunk_id in payload
- Query services - Return chunk_id from vector store searches
- Gateway dispatchers - Serialize chunk_id in API responses
- Document RAG - Added librarian client to fetch chunk content from
  Garage using chunk_ids
- CLI tools - Updated all three tools:
  - invoke_document_embeddings.py - displays chunk_ids, removed
    max_chunk_length
  - save_doc_embeds.py - exports chunk_id
  - load_doc_embeds.py - imports chunk_id
2026-03-07 23:10:45 +00:00

97 lines
No EOL
3.1 KiB
Python

from dataclasses import dataclass, field
from ..core.primitives import Error, Term, Triple
from ..core.topic import topic
############################################################################
# Graph embeddings query
@dataclass
class GraphEmbeddingsRequest:
vectors: list[list[float]] = field(default_factory=list)
limit: int = 0
user: str = ""
collection: str = ""
@dataclass
class GraphEmbeddingsResponse:
error: Error | None = None
entities: list[Term] = field(default_factory=list)
############################################################################
# Graph triples query
@dataclass
class TriplesQueryRequest:
user: str = ""
collection: str = ""
s: Term | None = None
p: Term | None = None
o: Term | None = None
g: str | None = None # Graph IRI. None=default graph, "*"=all graphs
limit: int = 0
@dataclass
class TriplesQueryResponse:
error: Error | None = None
triples: list[Triple] = field(default_factory=list)
############################################################################
# Doc embeddings query
@dataclass
class DocumentEmbeddingsRequest:
vectors: list[list[float]] = field(default_factory=list)
limit: int = 0
user: str = ""
collection: str = ""
@dataclass
class DocumentEmbeddingsResponse:
error: Error | None = None
chunk_ids: list[str] = field(default_factory=list)
document_embeddings_request_queue = topic(
"document-embeddings-request", qos='q0', tenant='trustgraph', namespace='flow'
)
document_embeddings_response_queue = topic(
"document-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
)
############################################################################
# Row embeddings query - for semantic/fuzzy matching on row index values
@dataclass
class RowIndexMatch:
"""A single matching row index from a semantic search"""
index_name: str = "" # The indexed field(s)
index_value: list[str] = field(default_factory=list) # The index values
text: str = "" # The text that was embedded
score: float = 0.0 # Similarity score
@dataclass
class RowEmbeddingsRequest:
"""Request for row embeddings semantic search"""
vectors: list[list[float]] = field(default_factory=list) # Query vectors
limit: int = 10 # Max results to return
user: str = "" # User/keyspace
collection: str = "" # Collection name
schema_name: str = "" # Schema name to search within
index_name: str | None = None # Optional: filter to specific index
@dataclass
class RowEmbeddingsResponse:
"""Response from row embeddings semantic search"""
error: Error | None = None
matches: list[RowIndexMatch] = field(default_factory=list)
row_embeddings_request_queue = topic(
"row-embeddings-request", qos='q0', tenant='trustgraph', namespace='flow'
)
row_embeddings_response_queue = topic(
"row-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
)