mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-04 12:52:36 +02:00
Collection management part 2 (#522)
* Plumb collection manager into librarian * Test end-to-end
This commit is contained in:
parent
d378db9370
commit
fcd15d1833
16 changed files with 617 additions and 434 deletions
|
|
@ -8,6 +8,7 @@ import asyncio
|
|||
import base64
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
from .. base import AsyncProcessor, Consumer, Producer, Publisher, Subscriber
|
||||
from .. base import ConsumerMetrics, ProducerMetrics
|
||||
|
|
@ -15,6 +16,11 @@ from .. base.cassandra_config import add_cassandra_args, resolve_cassandra_confi
|
|||
|
||||
from .. schema import LibrarianRequest, LibrarianResponse, Error
|
||||
from .. schema import librarian_request_queue, librarian_response_queue
|
||||
from .. schema import CollectionManagementRequest, CollectionManagementResponse
|
||||
from .. schema import collection_request_queue, collection_response_queue
|
||||
from .. schema import StorageManagementRequest, StorageManagementResponse
|
||||
from .. schema import vector_storage_management_topic, object_storage_management_topic
|
||||
from .. schema import triples_storage_management_topic, storage_management_response_topic
|
||||
|
||||
from .. schema import Document, Metadata
|
||||
from .. schema import TextDocument, Metadata
|
||||
|
|
@ -22,6 +28,7 @@ from .. schema import TextDocument, Metadata
|
|||
from .. exceptions import RequestError
|
||||
|
||||
from . librarian import Librarian
|
||||
from . collection_manager import CollectionManager
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -30,6 +37,8 @@ default_ident = "librarian"
|
|||
|
||||
default_librarian_request_queue = librarian_request_queue
|
||||
default_librarian_response_queue = librarian_response_queue
|
||||
default_collection_request_queue = collection_request_queue
|
||||
default_collection_response_queue = collection_response_queue
|
||||
|
||||
default_minio_host = "minio:9000"
|
||||
default_minio_access_key = "minioadmin"
|
||||
|
|
@ -57,6 +66,14 @@ class Processor(AsyncProcessor):
|
|||
"librarian_response_queue", default_librarian_response_queue
|
||||
)
|
||||
|
||||
collection_request_queue = params.get(
|
||||
"collection_request_queue", default_collection_request_queue
|
||||
)
|
||||
|
||||
collection_response_queue = params.get(
|
||||
"collection_response_queue", default_collection_response_queue
|
||||
)
|
||||
|
||||
minio_host = params.get("minio_host", default_minio_host)
|
||||
minio_access_key = params.get(
|
||||
"minio_access_key",
|
||||
|
|
@ -87,6 +104,8 @@ class Processor(AsyncProcessor):
|
|||
**params | {
|
||||
"librarian_request_queue": librarian_request_queue,
|
||||
"librarian_response_queue": librarian_response_queue,
|
||||
"collection_request_queue": collection_request_queue,
|
||||
"collection_response_queue": collection_response_queue,
|
||||
"minio_host": minio_host,
|
||||
"minio_access_key": minio_access_key,
|
||||
"cassandra_host": self.cassandra_host,
|
||||
|
|
@ -103,6 +122,18 @@ class Processor(AsyncProcessor):
|
|||
processor = self.id, flow = None, name = "librarian-response"
|
||||
)
|
||||
|
||||
collection_request_metrics = ConsumerMetrics(
|
||||
processor = self.id, flow = None, name = "collection-request"
|
||||
)
|
||||
|
||||
collection_response_metrics = ProducerMetrics(
|
||||
processor = self.id, flow = None, name = "collection-response"
|
||||
)
|
||||
|
||||
storage_response_metrics = ConsumerMetrics(
|
||||
processor = self.id, flow = None, name = "storage-response"
|
||||
)
|
||||
|
||||
self.librarian_request_consumer = Consumer(
|
||||
taskgroup = self.taskgroup,
|
||||
client = self.pulsar_client,
|
||||
|
|
@ -121,6 +152,54 @@ class Processor(AsyncProcessor):
|
|||
metrics = librarian_response_metrics,
|
||||
)
|
||||
|
||||
self.collection_request_consumer = Consumer(
|
||||
taskgroup = self.taskgroup,
|
||||
client = self.pulsar_client,
|
||||
flow = None,
|
||||
topic = collection_request_queue,
|
||||
subscriber = id,
|
||||
schema = CollectionManagementRequest,
|
||||
handler = self.on_collection_request,
|
||||
metrics = collection_request_metrics,
|
||||
)
|
||||
|
||||
self.collection_response_producer = Producer(
|
||||
client = self.pulsar_client,
|
||||
topic = collection_response_queue,
|
||||
schema = CollectionManagementResponse,
|
||||
metrics = collection_response_metrics,
|
||||
)
|
||||
|
||||
# Storage management producers for collection deletion
|
||||
self.vector_storage_producer = Producer(
|
||||
client = self.pulsar_client,
|
||||
topic = vector_storage_management_topic,
|
||||
schema = StorageManagementRequest,
|
||||
)
|
||||
|
||||
self.object_storage_producer = Producer(
|
||||
client = self.pulsar_client,
|
||||
topic = object_storage_management_topic,
|
||||
schema = StorageManagementRequest,
|
||||
)
|
||||
|
||||
self.triples_storage_producer = Producer(
|
||||
client = self.pulsar_client,
|
||||
topic = triples_storage_management_topic,
|
||||
schema = StorageManagementRequest,
|
||||
)
|
||||
|
||||
self.storage_response_consumer = Consumer(
|
||||
taskgroup = self.taskgroup,
|
||||
client = self.pulsar_client,
|
||||
flow = None,
|
||||
topic = storage_management_response_topic,
|
||||
subscriber = id,
|
||||
schema = StorageManagementResponse,
|
||||
handler = self.on_storage_response,
|
||||
metrics = storage_response_metrics,
|
||||
)
|
||||
|
||||
self.librarian = Librarian(
|
||||
cassandra_host = self.cassandra_host,
|
||||
cassandra_username = self.cassandra_username,
|
||||
|
|
@ -133,6 +212,17 @@ class Processor(AsyncProcessor):
|
|||
load_document = self.load_document,
|
||||
)
|
||||
|
||||
self.collection_manager = CollectionManager(
|
||||
cassandra_host = self.cassandra_host,
|
||||
cassandra_username = self.cassandra_username,
|
||||
cassandra_password = self.cassandra_password,
|
||||
keyspace = keyspace,
|
||||
vector_storage_producer = self.vector_storage_producer,
|
||||
object_storage_producer = self.object_storage_producer,
|
||||
triples_storage_producer = self.triples_storage_producer,
|
||||
storage_response_consumer = self.storage_response_consumer,
|
||||
)
|
||||
|
||||
self.register_config_handler(self.on_librarian_config)
|
||||
|
||||
self.flows = {}
|
||||
|
|
@ -144,6 +234,12 @@ class Processor(AsyncProcessor):
|
|||
await super(Processor, self).start()
|
||||
await self.librarian_request_consumer.start()
|
||||
await self.librarian_response_producer.start()
|
||||
await self.collection_request_consumer.start()
|
||||
await self.collection_response_producer.start()
|
||||
await self.vector_storage_producer.start()
|
||||
await self.object_storage_producer.start()
|
||||
await self.triples_storage_producer.start()
|
||||
await self.storage_response_consumer.start()
|
||||
|
||||
async def on_librarian_config(self, config, version):
|
||||
|
||||
|
|
@ -223,6 +319,19 @@ class Processor(AsyncProcessor):
|
|||
|
||||
logger.debug("Document submitted")
|
||||
|
||||
async def add_processing_with_collection(self, request):
|
||||
"""
|
||||
Wrapper for add_processing that ensures collection exists
|
||||
"""
|
||||
# Ensure collection exists when processing is added
|
||||
if hasattr(request, 'processing_metadata') and request.processing_metadata:
|
||||
user = request.processing_metadata.user
|
||||
collection = request.processing_metadata.collection
|
||||
await self.collection_manager.ensure_collection_exists(user, collection)
|
||||
|
||||
# Call the original add_processing method
|
||||
return await self.librarian.add_processing(request)
|
||||
|
||||
async def process_request(self, v):
|
||||
|
||||
if v.operation is None:
|
||||
|
|
@ -236,7 +345,7 @@ class Processor(AsyncProcessor):
|
|||
"update-document": self.librarian.update_document,
|
||||
"get-document-metadata": self.librarian.get_document_metadata,
|
||||
"get-document-content": self.librarian.get_document_content,
|
||||
"add-processing": self.librarian.add_processing,
|
||||
"add-processing": self.add_processing_with_collection,
|
||||
"remove-processing": self.librarian.remove_processing,
|
||||
"list-documents": self.librarian.list_documents,
|
||||
"list-processing": self.librarian.list_processing,
|
||||
|
|
@ -296,6 +405,73 @@ class Processor(AsyncProcessor):
|
|||
|
||||
logger.debug("Librarian input processing complete")
|
||||
|
||||
async def process_collection_request(self, v):
|
||||
"""
|
||||
Process collection management requests
|
||||
"""
|
||||
if v.operation is None:
|
||||
raise RequestError("Null operation")
|
||||
|
||||
logger.debug(f"Collection request: {v.operation}")
|
||||
|
||||
impls = {
|
||||
"list-collections": self.collection_manager.list_collections,
|
||||
"update-collection": self.collection_manager.update_collection,
|
||||
"delete-collection": self.collection_manager.delete_collection,
|
||||
}
|
||||
|
||||
if v.operation not in impls:
|
||||
raise RequestError(f"Invalid collection operation: {v.operation}")
|
||||
|
||||
return await impls[v.operation](v)
|
||||
|
||||
async def on_collection_request(self, msg, consumer, flow):
|
||||
"""
|
||||
Handle collection management request messages
|
||||
"""
|
||||
v = msg.value()
|
||||
id = msg.properties().get("id", "unknown")
|
||||
|
||||
logger.info(f"Handling collection request {id}...")
|
||||
|
||||
try:
|
||||
resp = await self.process_collection_request(v)
|
||||
await self.collection_response_producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
except RequestError as e:
|
||||
resp = CollectionManagementResponse(
|
||||
error=Error(
|
||||
type="request-error",
|
||||
message=str(e),
|
||||
),
|
||||
timestamp=datetime.now().isoformat()
|
||||
)
|
||||
await self.collection_response_producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
except Exception as e:
|
||||
resp = CollectionManagementResponse(
|
||||
error=Error(
|
||||
type="unexpected-error",
|
||||
message=str(e),
|
||||
),
|
||||
timestamp=datetime.now().isoformat()
|
||||
)
|
||||
await self.collection_response_producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
logger.debug("Collection request processing complete")
|
||||
|
||||
async def on_storage_response(self, msg, consumer, flow):
|
||||
"""
|
||||
Handle storage management response messages
|
||||
"""
|
||||
v = msg.value()
|
||||
logger.debug("Received storage management response")
|
||||
await self.collection_manager.on_storage_response(v)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
|
|
@ -313,6 +489,18 @@ class Processor(AsyncProcessor):
|
|||
help=f'Config response queue {default_librarian_response_queue}',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--collection-request-queue',
|
||||
default=default_collection_request_queue,
|
||||
help=f'Collection request queue (default: {default_collection_request_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--collection-response-queue',
|
||||
default=default_collection_response_queue,
|
||||
help=f'Collection response queue (default: {default_collection_response_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--minio-host',
|
||||
default=default_minio_host,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue