Fix ontology RAG pipeline + add query concurrency (#691)

- Fix ontology RAG pipeline: embeddings API, chunker provenance, and query concurrency

- Fix ontology embeddings to use correct response shape from embed()
  API (returns list of vectors, not list of list of vectors).
- Simplify chunker URI logic to append /c{index} to parent ID
  instead of parsing page/doc URI structure which was fragile.

- Add provenance tracking and librarian integration to token
  chunker, matching recursive chunker capabilities.

- Add configurable concurrency (default 10) to Cassandra, Qdrant,
  and embeddings query services.
This commit is contained in:
cybermaggedon 2026-03-12 11:34:42 +00:00 committed by GitHub
parent 312174eb88
commit 45e6ad4abc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 148 additions and 50 deletions

View file

@ -17,12 +17,14 @@ from . producer_spec import ProducerSpec
logger = logging.getLogger(__name__)
default_ident = "doc-embeddings-query"
default_concurrency = 10
class DocumentEmbeddingsQueryService(FlowProcessor):
def __init__(self, **params):
id = params.get("id")
concurrency = params.get("concurrency", default_concurrency)
super(DocumentEmbeddingsQueryService, self).__init__(
**params | { "id": id }
@ -32,7 +34,8 @@ class DocumentEmbeddingsQueryService(FlowProcessor):
ConsumerSpec(
name = "request",
schema = DocumentEmbeddingsRequest,
handler = self.on_message
handler = self.on_message,
concurrency = concurrency,
)
)
@ -83,6 +86,13 @@ class DocumentEmbeddingsQueryService(FlowProcessor):
FlowProcessor.add_args(parser)
parser.add_argument(
'-c', '--concurrency',
type=int,
default=default_concurrency,
help=f'Number of concurrent requests (default: {default_concurrency})'
)
def run():
Processor.launch(default_ident, __doc__)

View file

@ -17,12 +17,14 @@ from . producer_spec import ProducerSpec
logger = logging.getLogger(__name__)
default_ident = "graph-embeddings-query"
default_concurrency = 10
class GraphEmbeddingsQueryService(FlowProcessor):
def __init__(self, **params):
id = params.get("id")
concurrency = params.get("concurrency", default_concurrency)
super(GraphEmbeddingsQueryService, self).__init__(
**params | { "id": id }
@ -32,7 +34,8 @@ class GraphEmbeddingsQueryService(FlowProcessor):
ConsumerSpec(
name = "request",
schema = GraphEmbeddingsRequest,
handler = self.on_message
handler = self.on_message,
concurrency = concurrency,
)
)
@ -83,6 +86,13 @@ class GraphEmbeddingsQueryService(FlowProcessor):
FlowProcessor.add_args(parser)
parser.add_argument(
'-c', '--concurrency',
type=int,
default=default_concurrency,
help=f'Number of concurrent requests (default: {default_concurrency})'
)
def run():
Processor.launch(default_ident, __doc__)