mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-17 03:15:14 +02:00
release/v2.4 -> master (#924)
* CLI auth migration, document embeddings core lifecycle (#913) Migrate get_kg_core and put_kg_core CLI tools to use Api/SocketClient with first-frame auth (fixes broken raw websocket path). Fix wire format field names (root/vector). Remove ~600 lines of dead raw websocket code from invoke_graph_rag.py. Add document embeddings core lifecycle to the knowledge service: list/get/put/delete/load operations across schema, translator, Cassandra table store, knowledge manager, gateway registry, REST API, socket client, and CLI (tg-get-de-core, tg-put-de-core). Fix delete_kg_core to also clean up document embeddings rows. * Remove spurious workspace parameter from SPARQL algebra evaluator (#915) Fix threading of workspace paramater: - The SPARQL algebra evaluator was threading a workspace parameter through every function and passing it to TriplesClient.query(), which doesn't accept it. Workspace isolation is handled by pub/sub topic routing — the TriplesClient is already scoped to a workspace-specific flow, same as GraphRAG. Passing workspace explicitly was both incorrect and unnecessary. Update tests: - tests/unit/test_query/test_sparql_algebra.py (new) — Tests _query_pattern, _eval_bgp, and evaluate() with various algebra nodes. Key tests assert workspace is never in tc.query() kwargs, plus correctness tests for BGP, JOIN, UNION, SLICE, DISTINCT, and edge cases. - tests/unit/test_retrieval/test_graph_rag.py — Added test_triples_query_never_passes_workspace (checks query()) and test_follow_edges_never_passes_workspace (checks query_stream()). * Make all Cassandra and Qdrant I/O async-safe with proper concurrency controls (#916) Cassandra triples services were using syncronous EntityCentricKnowledgeGraph methods from async contexts, and connection state was managed with threading.local which is wrong for asyncio coroutines sharing a single thread. Qdrant services had no async wrapping at all, blocking the event loop on every network call. Rows services had unprotected shared state mutations across concurrent coroutines. - Add async methods to EntityCentricKnowledgeGraph (async_insert, async_get_s/p/o/sp/po/os/spo/all, async_collection_exists, async_create_collection, async_delete_collection) using the existing cassandra_async.async_execute bridge - Rewrite triples write + query services: replace threading.local with asyncio.Lock + dict cache for per-workspace connections, use async ECKG methods for all data operations, keep asyncio.to_thread only for one-time blocking ECKG construction - Wrap all Qdrant calls in asyncio.to_thread across all 6 services (doc/graph/row embeddings write + query), add asyncio.Lock + set cache for collection existence checks - Add asyncio.Lock to rows write + query services to protect shared state (schemas, sessions, config caches) from concurrent mutation - Update all affected tests to match new async patterns * Fixed error only returning a page of results (#921) The root cause: async_execute only materialises the first result page (by design — it says so in its docstring). The streaming query set fetch_size=20 and expected to iterate all results, but only got the first 20 rows back. The fix uses asyncio.to_thread(lambda: list(tg.session.execute(...))) which lets the sync driver iterate all pages in a worker thread — exactly what the pre-async code did. * Optional test warning suppression (#923) * Fix test collection module errors & silence upstream Pytest warnings (#823) * chore: add virtual environment and .env directories to gitignore * test: filter upstream DeprecationWarning and UserWarning messages * fix(namespace): remove empty __init__.py files to fix PEP 420 implicit namespace routing for trustgraph sub-packages * Revert __init__.py deletions * Add .ini changes but commented out, will be useful at times --------- Co-authored-by: Salil M <d2kyt@protonmail.com>
This commit is contained in:
parent
159b1e2824
commit
142dd0231c
42 changed files with 1910 additions and 1492 deletions
|
|
@ -132,3 +132,34 @@ class Knowledge:
|
|||
|
||||
self.request(request = input)
|
||||
|
||||
def list_de_cores(self):
|
||||
|
||||
input = {
|
||||
"operation": "list-de-cores",
|
||||
"workspace": self.api.workspace,
|
||||
}
|
||||
|
||||
return self.request(request = input)["ids"]
|
||||
|
||||
def delete_de_core(self, id):
|
||||
|
||||
input = {
|
||||
"operation": "delete-de-core",
|
||||
"workspace": self.api.workspace,
|
||||
"id": id,
|
||||
}
|
||||
|
||||
self.request(request = input)
|
||||
|
||||
def load_de_core(self, id, flow="default", collection="default"):
|
||||
|
||||
input = {
|
||||
"operation": "load-de-core",
|
||||
"workspace": self.api.workspace,
|
||||
"id": id,
|
||||
"flow": flow,
|
||||
"collection": collection,
|
||||
}
|
||||
|
||||
self.request(request = input)
|
||||
|
||||
|
|
|
|||
|
|
@ -491,6 +491,58 @@ class SocketClient:
|
|||
triples=raw_triples,
|
||||
)
|
||||
|
||||
def get_kg_core(self, id: str) -> Iterator[Dict[str, Any]]:
|
||||
request = {
|
||||
"operation": "get-kg-core",
|
||||
"workspace": self.workspace,
|
||||
"id": id,
|
||||
}
|
||||
for response in self._send_request_sync(
|
||||
"knowledge", None, request, streaming_raw=True,
|
||||
):
|
||||
if response.get("eos"):
|
||||
break
|
||||
yield response
|
||||
|
||||
def put_kg_core(
|
||||
self, id: str, triples=None, graph_embeddings=None,
|
||||
) -> Dict[str, Any]:
|
||||
request = {
|
||||
"operation": "put-kg-core",
|
||||
"workspace": self.workspace,
|
||||
"id": id,
|
||||
}
|
||||
if triples is not None:
|
||||
request["triples"] = triples
|
||||
if graph_embeddings is not None:
|
||||
request["graph-embeddings"] = graph_embeddings
|
||||
return self._send_request_sync("knowledge", None, request)
|
||||
|
||||
def get_de_core(self, id: str) -> Iterator[Dict[str, Any]]:
|
||||
request = {
|
||||
"operation": "get-de-core",
|
||||
"workspace": self.workspace,
|
||||
"id": id,
|
||||
}
|
||||
for response in self._send_request_sync(
|
||||
"knowledge", None, request, streaming_raw=True,
|
||||
):
|
||||
if response.get("eos"):
|
||||
break
|
||||
yield response
|
||||
|
||||
def put_de_core(
|
||||
self, id: str, document_embeddings=None,
|
||||
) -> Dict[str, Any]:
|
||||
request = {
|
||||
"operation": "put-de-core",
|
||||
"workspace": self.workspace,
|
||||
"id": id,
|
||||
}
|
||||
if document_embeddings is not None:
|
||||
request["document-embeddings"] = document_embeddings
|
||||
return self._send_request_sync("knowledge", None, request)
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the persistent WebSocket connection."""
|
||||
if self._loop and not self._loop.is_closed():
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from typing import Dict, Any, Tuple, Optional
|
||||
from ...schema import (
|
||||
KnowledgeRequest, KnowledgeResponse, Triples, GraphEmbeddings,
|
||||
DocumentEmbeddings, ChunkEmbeddings,
|
||||
Metadata, EntityEmbeddings
|
||||
)
|
||||
from .base import MessageTranslator
|
||||
|
|
@ -43,6 +44,23 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
]
|
||||
)
|
||||
|
||||
document_embeddings = None
|
||||
if "document-embeddings" in data:
|
||||
document_embeddings = DocumentEmbeddings(
|
||||
metadata=Metadata(
|
||||
id=data["document-embeddings"]["metadata"]["id"],
|
||||
root=data["document-embeddings"]["metadata"].get("root", ""),
|
||||
collection=data["document-embeddings"]["metadata"]["collection"]
|
||||
),
|
||||
chunks=[
|
||||
ChunkEmbeddings(
|
||||
chunk_id=ch["chunk_id"],
|
||||
vector=ch["vector"],
|
||||
)
|
||||
for ch in data["document-embeddings"]["chunks"]
|
||||
]
|
||||
)
|
||||
|
||||
return KnowledgeRequest(
|
||||
operation=data.get("operation"),
|
||||
id=data.get("id"),
|
||||
|
|
@ -50,6 +68,7 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
collection=data.get("collection"),
|
||||
triples=triples,
|
||||
graph_embeddings=graph_embeddings,
|
||||
document_embeddings=document_embeddings,
|
||||
)
|
||||
|
||||
def encode(self, obj: KnowledgeRequest) -> Dict[str, Any]:
|
||||
|
|
@ -90,6 +109,22 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
],
|
||||
}
|
||||
|
||||
if obj.document_embeddings:
|
||||
result["document-embeddings"] = {
|
||||
"metadata": {
|
||||
"id": obj.document_embeddings.metadata.id,
|
||||
"root": obj.document_embeddings.metadata.root,
|
||||
"collection": obj.document_embeddings.metadata.collection,
|
||||
},
|
||||
"chunks": [
|
||||
{
|
||||
"chunk_id": ch.chunk_id,
|
||||
"vector": ch.vector,
|
||||
}
|
||||
for ch in obj.document_embeddings.chunks
|
||||
],
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -140,6 +175,25 @@ class KnowledgeResponseTranslator(MessageTranslator):
|
|||
}
|
||||
}
|
||||
|
||||
# Streaming document embeddings response
|
||||
if obj.document_embeddings:
|
||||
return {
|
||||
"document-embeddings": {
|
||||
"metadata": {
|
||||
"id": obj.document_embeddings.metadata.id,
|
||||
"root": obj.document_embeddings.metadata.root,
|
||||
"collection": obj.document_embeddings.metadata.collection,
|
||||
},
|
||||
"chunks": [
|
||||
{
|
||||
"chunk_id": ch.chunk_id,
|
||||
"vector": ch.vector,
|
||||
}
|
||||
for ch in obj.document_embeddings.chunks
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
# End of stream marker
|
||||
if obj.eos is True:
|
||||
return {"eos": True}
|
||||
|
|
@ -155,7 +209,7 @@ class KnowledgeResponseTranslator(MessageTranslator):
|
|||
is_final = (
|
||||
obj.ids is not None or # List response
|
||||
obj.eos is True or # End of stream
|
||||
(not obj.triples and not obj.graph_embeddings) # Empty response
|
||||
(not obj.triples and not obj.graph_embeddings and not obj.document_embeddings) # Empty response
|
||||
)
|
||||
|
||||
return response, is_final
|
||||
|
|
@ -4,7 +4,7 @@ from ..core.topic import queue
|
|||
from ..core.metadata import Metadata
|
||||
from .document import Document, TextDocument
|
||||
from .graph import Triples
|
||||
from .embeddings import GraphEmbeddings
|
||||
from .embeddings import GraphEmbeddings, DocumentEmbeddings
|
||||
|
||||
# get-kg-core
|
||||
# -> (???)
|
||||
|
|
@ -41,6 +41,9 @@ class KnowledgeRequest:
|
|||
triples: Triples | None = None
|
||||
graph_embeddings: GraphEmbeddings | None = None
|
||||
|
||||
# put-de-core
|
||||
document_embeddings: DocumentEmbeddings | None = None
|
||||
|
||||
@dataclass
|
||||
class KnowledgeResponse:
|
||||
error: Error | None = None
|
||||
|
|
@ -48,6 +51,7 @@ class KnowledgeResponse:
|
|||
eos: bool = False # Indicates end of knowledge core stream
|
||||
triples: Triples | None = None
|
||||
graph_embeddings: GraphEmbeddings | None = None
|
||||
document_embeddings: DocumentEmbeddings | None = None
|
||||
|
||||
knowledge_request_queue = queue('knowledge', cls='request')
|
||||
knowledge_response_queue = queue('knowledge', cls='response')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue