mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 17:06:22 +02:00
Collection management (#520)
* Tech spec * Refactored Cassanda knowledge graph for single table * Collection management, librarian services to manage metadata and collection deletion
This commit is contained in:
parent
48016d8fb2
commit
13ff7d765d
48 changed files with 2941 additions and 425 deletions
|
|
@ -8,6 +8,7 @@ from . library import Library
|
|||
from . flow import Flow
|
||||
from . config import Config
|
||||
from . knowledge import Knowledge
|
||||
from . collection import Collection
|
||||
from . exceptions import *
|
||||
from . types import *
|
||||
|
||||
|
|
@ -68,3 +69,6 @@ class Api:
|
|||
|
||||
def library(self):
|
||||
return Library(self)
|
||||
|
||||
def collection(self):
|
||||
return Collection(self)
|
||||
|
|
|
|||
90
trustgraph-base/trustgraph/api/collection.py
Normal file
90
trustgraph-base/trustgraph/api/collection.py
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
import datetime
|
||||
import logging
|
||||
|
||||
from . types import CollectionMetadata
|
||||
from . exceptions import *
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Collection:
|
||||
|
||||
def __init__(self, api):
|
||||
self.api = api
|
||||
|
||||
def request(self, request):
|
||||
return self.api.request(f"collection-management", request)
|
||||
|
||||
def list_collections(self, user, tag_filter=None):
|
||||
|
||||
input = {
|
||||
"operation": "list-collections",
|
||||
"user": user,
|
||||
}
|
||||
|
||||
if tag_filter:
|
||||
input["tag_filter"] = tag_filter
|
||||
|
||||
object = self.request(input)
|
||||
|
||||
try:
|
||||
return [
|
||||
CollectionMetadata(
|
||||
user = v["user"],
|
||||
collection = v["collection"],
|
||||
name = v["name"],
|
||||
description = v["description"],
|
||||
tags = v["tags"],
|
||||
created_at = v["created_at"],
|
||||
updated_at = v["updated_at"]
|
||||
)
|
||||
for v in object["collections"]
|
||||
]
|
||||
except Exception as e:
|
||||
logger.error("Failed to parse collection list response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def update_collection(self, user, collection, name=None, description=None, tags=None):
|
||||
|
||||
input = {
|
||||
"operation": "update-collection",
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
}
|
||||
|
||||
if name is not None:
|
||||
input["name"] = name
|
||||
if description is not None:
|
||||
input["description"] = description
|
||||
if tags is not None:
|
||||
input["tags"] = tags
|
||||
|
||||
object = self.request(input)
|
||||
|
||||
try:
|
||||
if "collections" in object and object["collections"]:
|
||||
v = object["collections"][0]
|
||||
return CollectionMetadata(
|
||||
user = v["user"],
|
||||
collection = v["collection"],
|
||||
name = v["name"],
|
||||
description = v["description"],
|
||||
tags = v["tags"],
|
||||
created_at = v["created_at"],
|
||||
updated_at = v["updated_at"]
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error("Failed to parse collection update response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def delete_collection(self, user, collection):
|
||||
|
||||
input = {
|
||||
"operation": "delete-collection",
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
}
|
||||
|
||||
object = self.request(input)
|
||||
|
||||
return {}
|
||||
|
|
@ -41,3 +41,13 @@ class ProcessingMetadata:
|
|||
user : str
|
||||
collection : str
|
||||
tags : List[str]
|
||||
|
||||
@dataclasses.dataclass
|
||||
class CollectionMetadata:
|
||||
user : str
|
||||
collection : str
|
||||
name : str
|
||||
description : str
|
||||
tags : List[str]
|
||||
created_at : str
|
||||
updated_at : str
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ from .translators.objects_query import ObjectsQueryRequestTranslator, ObjectsQue
|
|||
from .translators.nlp_query import QuestionToStructuredQueryRequestTranslator, QuestionToStructuredQueryResponseTranslator
|
||||
from .translators.structured_query import StructuredQueryRequestTranslator, StructuredQueryResponseTranslator
|
||||
from .translators.diagnosis import StructuredDataDiagnosisRequestTranslator, StructuredDataDiagnosisResponseTranslator
|
||||
from .translators.collection import CollectionManagementRequestTranslator, CollectionManagementResponseTranslator
|
||||
|
||||
# Register all service translators
|
||||
TranslatorRegistry.register_service(
|
||||
|
|
@ -135,6 +136,12 @@ TranslatorRegistry.register_service(
|
|||
StructuredDataDiagnosisResponseTranslator()
|
||||
)
|
||||
|
||||
TranslatorRegistry.register_service(
|
||||
"collection-management",
|
||||
CollectionManagementRequestTranslator(),
|
||||
CollectionManagementResponseTranslator()
|
||||
)
|
||||
|
||||
# Register single-direction translators for document loading
|
||||
TranslatorRegistry.register_request("document", DocumentTranslator())
|
||||
TranslatorRegistry.register_request("text-document", TextDocumentTranslator())
|
||||
|
|
|
|||
112
trustgraph-base/trustgraph/messaging/translators/collection.py
Normal file
112
trustgraph-base/trustgraph/messaging/translators/collection.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
from typing import Dict, Any, List
|
||||
from ...schema import CollectionManagementRequest, CollectionManagementResponse, CollectionMetadata, Error
|
||||
from .base import MessageTranslator
|
||||
|
||||
|
||||
class CollectionManagementRequestTranslator(MessageTranslator):
|
||||
"""Translator for CollectionManagementRequest schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> CollectionManagementRequest:
|
||||
return CollectionManagementRequest(
|
||||
operation=data.get("operation", ""),
|
||||
user=data.get("user", ""),
|
||||
collection=data.get("collection", ""),
|
||||
timestamp=data.get("timestamp", ""),
|
||||
name=data.get("name", ""),
|
||||
description=data.get("description", ""),
|
||||
tags=data.get("tags", []),
|
||||
created_at=data.get("created_at", ""),
|
||||
updated_at=data.get("updated_at", ""),
|
||||
tag_filter=data.get("tag_filter", []),
|
||||
limit=data.get("limit", 50)
|
||||
)
|
||||
|
||||
def from_pulsar(self, obj: CollectionManagementRequest) -> Dict[str, Any]:
|
||||
result = {}
|
||||
|
||||
if obj.operation:
|
||||
result["operation"] = obj.operation
|
||||
if obj.user:
|
||||
result["user"] = obj.user
|
||||
if obj.collection:
|
||||
result["collection"] = obj.collection
|
||||
if obj.timestamp:
|
||||
result["timestamp"] = obj.timestamp
|
||||
if obj.name:
|
||||
result["name"] = obj.name
|
||||
if obj.description:
|
||||
result["description"] = obj.description
|
||||
if obj.tags:
|
||||
result["tags"] = list(obj.tags)
|
||||
if obj.created_at:
|
||||
result["created_at"] = obj.created_at
|
||||
if obj.updated_at:
|
||||
result["updated_at"] = obj.updated_at
|
||||
if obj.tag_filter:
|
||||
result["tag_filter"] = list(obj.tag_filter)
|
||||
if obj.limit:
|
||||
result["limit"] = obj.limit
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class CollectionManagementResponseTranslator(MessageTranslator):
|
||||
"""Translator for CollectionManagementResponse schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> CollectionManagementResponse:
|
||||
# Handle error
|
||||
error = None
|
||||
if "error" in data and data["error"]:
|
||||
error_data = data["error"]
|
||||
error = Error(
|
||||
type=error_data.get("type", ""),
|
||||
message=error_data.get("message", "")
|
||||
)
|
||||
|
||||
# Handle collections array
|
||||
collections = []
|
||||
if "collections" in data:
|
||||
for coll_data in data["collections"]:
|
||||
collections.append(CollectionMetadata(
|
||||
user=coll_data.get("user", ""),
|
||||
collection=coll_data.get("collection", ""),
|
||||
name=coll_data.get("name", ""),
|
||||
description=coll_data.get("description", ""),
|
||||
tags=coll_data.get("tags", []),
|
||||
created_at=coll_data.get("created_at", ""),
|
||||
updated_at=coll_data.get("updated_at", "")
|
||||
))
|
||||
|
||||
return CollectionManagementResponse(
|
||||
success=data.get("success", ""),
|
||||
error=error,
|
||||
timestamp=data.get("timestamp", ""),
|
||||
collections=collections
|
||||
)
|
||||
|
||||
def from_pulsar(self, obj: CollectionManagementResponse) -> Dict[str, Any]:
|
||||
result = {}
|
||||
|
||||
if obj.success:
|
||||
result["success"] = obj.success
|
||||
if obj.error:
|
||||
result["error"] = {
|
||||
"type": obj.error.type,
|
||||
"message": obj.error.message
|
||||
}
|
||||
if obj.timestamp:
|
||||
result["timestamp"] = obj.timestamp
|
||||
if obj.collections:
|
||||
result["collections"] = []
|
||||
for coll in obj.collections:
|
||||
result["collections"].append({
|
||||
"user": coll.user,
|
||||
"collection": coll.collection,
|
||||
"name": coll.name,
|
||||
"description": coll.description,
|
||||
"tags": list(coll.tags) if coll.tags else [],
|
||||
"created_at": coll.created_at,
|
||||
"updated_at": coll.updated_at
|
||||
})
|
||||
|
||||
return result
|
||||
|
|
@ -10,4 +10,6 @@ from .lookup import *
|
|||
from .nlp_query import *
|
||||
from .structured_query import *
|
||||
from .objects_query import *
|
||||
from .diagnosis import *
|
||||
from .diagnosis import *
|
||||
from .collection import *
|
||||
from .storage import *
|
||||
60
trustgraph-base/trustgraph/schema/services/collection.py
Normal file
60
trustgraph-base/trustgraph/schema/services/collection.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
from pulsar.schema import Record, String, Integer, Array
|
||||
from datetime import datetime
|
||||
|
||||
from ..core.primitives import Error
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Collection management operations
|
||||
|
||||
# Collection metadata operations (for librarian service)
|
||||
|
||||
class CollectionMetadata(Record):
|
||||
"""Collection metadata record"""
|
||||
user = String()
|
||||
collection = String()
|
||||
name = String()
|
||||
description = String()
|
||||
tags = Array(String())
|
||||
created_at = String() # ISO timestamp
|
||||
updated_at = String() # ISO timestamp
|
||||
|
||||
############################################################################
|
||||
|
||||
class CollectionManagementRequest(Record):
|
||||
"""Request for collection management operations"""
|
||||
operation = String() # e.g., "delete-collection"
|
||||
|
||||
# For 'list-collections'
|
||||
user = String()
|
||||
collection = String()
|
||||
timestamp = String() # ISO timestamp
|
||||
name = String()
|
||||
description = String()
|
||||
tags = Array(String())
|
||||
created_at = String() # ISO timestamp
|
||||
updated_at = String() # ISO timestamp
|
||||
|
||||
# For list
|
||||
tag_filter = Array(String()) # Optional filter by tags
|
||||
limit = Integer()
|
||||
|
||||
class CollectionManagementResponse(Record):
|
||||
"""Response for collection management operations"""
|
||||
success = String() # "true" or "false"
|
||||
error = Error() # Only populated if success is "false"
|
||||
timestamp = String() # ISO timestamp
|
||||
collections = Array(CollectionMetadata())
|
||||
|
||||
|
||||
############################################################################
|
||||
|
||||
# Topics
|
||||
|
||||
collection_request_queue = topic(
|
||||
'collection', kind='non-persistent', namespace='request'
|
||||
)
|
||||
collection_response_queue = topic(
|
||||
'collection', kind='non-persistent', namespace='response'
|
||||
)
|
||||
42
trustgraph-base/trustgraph/schema/services/storage.py
Normal file
42
trustgraph-base/trustgraph/schema/services/storage.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
from pulsar.schema import Record, String
|
||||
|
||||
from ..core.primitives import Error
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Storage management operations
|
||||
|
||||
class StorageManagementRequest(Record):
|
||||
"""Request for storage management operations sent to store processors"""
|
||||
operation = String() # e.g., "delete-collection"
|
||||
user = String()
|
||||
collection = String()
|
||||
|
||||
class StorageManagementResponse(Record):
|
||||
"""Response from storage processors for management operations"""
|
||||
error = Error() # Only populated if there's an error, if null success
|
||||
|
||||
############################################################################
|
||||
|
||||
# Storage management topics
|
||||
|
||||
# Topics for sending collection management requests to different storage types
|
||||
vector_storage_management_topic = topic(
|
||||
'vector-storage-management', kind='non-persistent', namespace='request'
|
||||
)
|
||||
|
||||
object_storage_management_topic = topic(
|
||||
'object-storage-management', kind='non-persistent', namespace='request'
|
||||
)
|
||||
|
||||
triples_storage_management_topic = topic(
|
||||
'triples-storage-management', kind='non-persistent', namespace='request'
|
||||
)
|
||||
|
||||
# Topic for receiving responses from storage processors
|
||||
storage_management_response_topic = topic(
|
||||
'storage-management', kind='non-persistent', namespace='response'
|
||||
)
|
||||
|
||||
############################################################################
|
||||
Loading…
Add table
Add a link
Reference in a new issue