mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-12 08:42:37 +02:00
Workspace identity is now determined by queue infrastructure instead of message body fields, closing a privilege-escalation vector where a caller could spoof workspace in the request payload. - Add WorkspaceProcessor base class: discovers workspaces from config at startup, creates per-workspace consumers (queue:workspace), and manages consumer lifecycle on workspace create/delete events - Roll out to librarian, flow-svc, knowledge cores, and config-svc - Config service gets a dual-queue regime: a system queue for cross-workspace ops (getvalues-all-ws, bootstrapper writes to __workspaces__) and per-workspace queues for tenant-scoped ops, with workspace discovery from its own Cassandra store - Remove workspace field from request schemas (FlowRequest, LibrarianRequest, KnowledgeRequest, CollectionManagementRequest) and from DocumentMetadata / ProcessingMetadata — table stores now accept workspace as an explicit parameter - Strip workspace encode/decode from all message translators and gateway serializers - Gateway enforces workspace existence: reject requests targeting non-existent workspaces instead of routing to queues with no consumer - Config service provisions new workspaces from __template__ on creation - Add workspace lifecycle hooks to AsyncProcessor so any processor can react to workspace create/delete without subclassing WorkspaceProcessor
176 lines
4.1 KiB
Python
176 lines
4.1 KiB
Python
|
|
import base64
|
|
|
|
from ... schema import Term, Triple, DocumentMetadata, ProcessingMetadata
|
|
from ... messaging.translators.primitives import TermTranslator, TripleTranslator
|
|
|
|
# Singleton translator instances
|
|
_term_translator = TermTranslator()
|
|
_triple_translator = TripleTranslator()
|
|
|
|
|
|
def to_value(x):
|
|
"""Convert dict to Term. Delegates to TermTranslator."""
|
|
return _term_translator.decode(x)
|
|
|
|
|
|
def to_subgraph(x):
|
|
"""Convert list of dicts to list of Triples. Delegates to TripleTranslator."""
|
|
return [_triple_translator.decode(t) for t in x]
|
|
|
|
|
|
def serialize_value(v):
|
|
"""Convert Term to dict. Delegates to TermTranslator."""
|
|
return _term_translator.encode(v)
|
|
|
|
|
|
def serialize_triple(t):
|
|
"""Convert Triple to dict. Delegates to TripleTranslator."""
|
|
return _triple_translator.encode(t)
|
|
|
|
|
|
def serialize_subgraph(sg):
|
|
"""Convert list of Triples to list of dicts."""
|
|
return [serialize_triple(t) for t in sg]
|
|
|
|
def serialize_triples(message):
|
|
return {
|
|
"metadata": {
|
|
"id": message.metadata.id,
|
|
"root": message.metadata.root,
|
|
"collection": message.metadata.collection,
|
|
},
|
|
"triples": serialize_subgraph(message.triples),
|
|
}
|
|
|
|
|
|
def serialize_graph_embeddings(message):
|
|
return {
|
|
"metadata": {
|
|
"id": message.metadata.id,
|
|
"root": message.metadata.root,
|
|
"collection": message.metadata.collection,
|
|
},
|
|
"entities": [
|
|
{
|
|
"vector": entity.vector,
|
|
"entity": serialize_value(entity.entity),
|
|
}
|
|
for entity in message.entities
|
|
],
|
|
}
|
|
|
|
|
|
def serialize_entity_contexts(message):
|
|
return {
|
|
"metadata": {
|
|
"id": message.metadata.id,
|
|
"root": message.metadata.root,
|
|
"collection": message.metadata.collection,
|
|
},
|
|
"entities": [
|
|
{
|
|
"context": entity.context,
|
|
"entity": serialize_value(entity.entity),
|
|
}
|
|
for entity in message.entities
|
|
],
|
|
}
|
|
|
|
|
|
def serialize_document_embeddings(message):
|
|
return {
|
|
"metadata": {
|
|
"id": message.metadata.id,
|
|
"root": message.metadata.root,
|
|
"collection": message.metadata.collection,
|
|
},
|
|
"chunks": [
|
|
{
|
|
"vector": chunk.vector,
|
|
"chunk_id": chunk.chunk_id,
|
|
}
|
|
for chunk in message.chunks
|
|
],
|
|
}
|
|
|
|
def serialize_document_metadata(message):
|
|
|
|
ret = {}
|
|
|
|
if message.id:
|
|
ret["id"] = message.id
|
|
|
|
if message.time:
|
|
ret["time"] = message.time
|
|
|
|
if message.kind:
|
|
ret["kind"] = message.kind
|
|
|
|
if message.title:
|
|
ret["title"] = message.title
|
|
|
|
if message.comments:
|
|
ret["comments"] = message.comments
|
|
|
|
if message.metadata:
|
|
ret["metadata"] = serialize_subgraph(message.metadata)
|
|
|
|
if message.tags is not None:
|
|
ret["tags"] = message.tags
|
|
|
|
return ret
|
|
|
|
def serialize_processing_metadata(message):
|
|
|
|
ret = {}
|
|
|
|
if message.id:
|
|
ret["id"] = message.id
|
|
|
|
if message.id:
|
|
ret["document-id"] = message.document_id
|
|
|
|
if message.time:
|
|
ret["time"] = message.time
|
|
|
|
if message.flow:
|
|
ret["flow"] = message.flow
|
|
|
|
if message.collection:
|
|
ret["collection"] = message.collection
|
|
|
|
if message.tags is not None:
|
|
ret["tags"] = message.tags
|
|
|
|
return ret
|
|
|
|
def to_document_metadata(x):
|
|
|
|
return DocumentMetadata(
|
|
id = x.get("id", None),
|
|
time = x.get("time", None),
|
|
kind = x.get("kind", None),
|
|
title = x.get("title", None),
|
|
comments = x.get("comments", None),
|
|
metadata = to_subgraph(x["metadata"]),
|
|
tags = x.get("tags", None),
|
|
)
|
|
|
|
def to_processing_metadata(x):
|
|
|
|
return ProcessingMetadata(
|
|
id = x.get("id", None),
|
|
document_id = x.get("document-id", None),
|
|
time = x.get("time", None),
|
|
flow = x.get("flow", None),
|
|
collection = x.get("collection", None),
|
|
tags = x.get("tags", None),
|
|
)
|
|
|
|
def to_criteria(x):
|
|
return [
|
|
Critera(v["key"], v["value"], v["operator"])
|
|
for v in x
|
|
]
|
|
|