trustgraph/trustgraph-base/trustgraph/base/document_embeddings_query_service.py
cybermaggedon 45e6ad4abc
Fix ontology RAG pipeline + add query concurrency (#691)
- Fix ontology RAG pipeline: embeddings API, chunker provenance, and query concurrency

- Fix ontology embeddings to use correct response shape from embed()
  API (returns list of vectors, not list of list of vectors).
- Simplify chunker URI logic to append /c{index} to parent ID
  instead of parsing page/doc URI structure which was fragile.

- Add provenance tracking and librarian integration to token
  chunker, matching recursive chunker capabilities.

- Add configurable concurrency (default 10) to Cassandra, Qdrant,
  and embeddings query services.
2026-03-12 11:34:42 +00:00

99 lines
2.6 KiB
Python

"""
Document embeddings query service. Input is vectors. Output is list of
embeddings.
"""
import logging
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
from .. schema import Error, Term
from . flow_processor import FlowProcessor
from . consumer_spec import ConsumerSpec
from . producer_spec import ProducerSpec
# Module logger
logger = logging.getLogger(__name__)
default_ident = "doc-embeddings-query"
default_concurrency = 10
class DocumentEmbeddingsQueryService(FlowProcessor):
def __init__(self, **params):
id = params.get("id")
concurrency = params.get("concurrency", default_concurrency)
super(DocumentEmbeddingsQueryService, self).__init__(
**params | { "id": id }
)
self.register_specification(
ConsumerSpec(
name = "request",
schema = DocumentEmbeddingsRequest,
handler = self.on_message,
concurrency = concurrency,
)
)
self.register_specification(
ProducerSpec(
name = "response",
schema = DocumentEmbeddingsResponse,
)
)
async def on_message(self, msg, consumer, flow):
try:
request = msg.value()
# Sender-produced ID
id = msg.properties()["id"]
logger.debug(f"Handling document embeddings query request {id}...")
docs = await self.query_document_embeddings(request)
logger.debug("Sending document embeddings query response...")
r = DocumentEmbeddingsResponse(chunks=docs, error=None)
await flow("response").send(r, properties={"id": id})
logger.debug("Document embeddings query request completed")
except Exception as e:
logger.error(f"Exception in document embeddings query service: {e}", exc_info=True)
logger.info("Sending error response...")
r = DocumentEmbeddingsResponse(
error=Error(
type = "document-embeddings-query-error",
message = str(e),
),
chunks=[],
)
await flow("response").send(r, properties={"id": id})
@staticmethod
def add_args(parser):
FlowProcessor.add_args(parser)
parser.add_argument(
'-c', '--concurrency',
type=int,
default=default_concurrency,
help=f'Number of concurrent requests (default: {default_concurrency})'
)
def run():
Processor.launch(default_ident, __doc__)