trustgraph/trustgraph-flow/trustgraph/storage/knowledge/store.py
cybermaggedon 08bfec1539
fix: wire replication params through YAML/params path for Cassandra and Qdrant (#976)
resolve_cassandra_config did not accept replication_factor as a kwarg,
so cassandra_replication_factor from YAML params was silently ignored
by all 6 callers. Add the kwarg and pass it from every caller.

Same fix for Qdrant: 3 writers now pass qdrant_replication_factor and
qdrant_shard_number from params.

Add tests covering the params path for both helpers.
2026-06-04 12:36:36 +01:00

87 lines
2.3 KiB
Python

"""
Stores knowledge-cores in Cassandra
"""
import json
import urllib.parse
from ... schema import Triples, GraphEmbeddings
from ... base import FlowProcessor, ConsumerSpec
from ... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
from ... tables.knowledge import KnowledgeTableStore
default_ident = "kg-store"
keyspace = "knowledge"
class Processor(FlowProcessor):
def __init__(self, **params):
id = params.get("id")
# Use helper to resolve configuration
hosts, username, password, keyspace, replication_factor = resolve_cassandra_config(
host=params.get("cassandra_host"),
username=params.get("cassandra_username"),
password=params.get("cassandra_password"),
default_keyspace='knowledge',
replication_factor=params.get("cassandra_replication_factor"),
)
super(Processor, self).__init__(
**params | {
"id": id,
"cassandra_host": ','.join(hosts),
"cassandra_username": username,
}
)
self.register_specification(
ConsumerSpec(
name = "triples-input",
schema = Triples,
handler = self.on_triples
)
)
self.register_specification(
ConsumerSpec(
name = "graph-embeddings-input",
schema = GraphEmbeddings,
handler = self.on_graph_embeddings
)
)
self.table_store = KnowledgeTableStore(
cassandra_host = hosts,
cassandra_username = username,
cassandra_password = password,
keyspace = keyspace,
replication_factor = replication_factor,
)
async def on_triples(self, msg, consumer, flow):
v = msg.value()
if v.triples:
await self.table_store.add_triples(flow.workspace, v)
async def on_graph_embeddings(self, msg, consumer, flow):
v = msg.value()
if v.entities:
await self.table_store.add_graph_embeddings(flow.workspace, v)
@staticmethod
def add_args(parser):
FlowProcessor.add_args(parser)
add_cassandra_args(parser)
def run():
Processor.launch(default_ident, __doc__)