knowledge service (#367)

* Write knowledge core elements to Cassandra

* Store service works, building management service

* kg-manager
This commit is contained in:
cybermaggedon 2025-05-06 23:44:10 +01:00 committed by GitHub
parent d0da122bed
commit 807c19fd22
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 1196 additions and 243 deletions

View file

@ -0,0 +1,3 @@
from . service import run

View file

@ -0,0 +1,5 @@
from . service import run
if __name__ == '__main__':
run()

View file

@ -0,0 +1,104 @@
from .. schema import KnowledgeResponse, Error
from .. knowledge import hash
from .. exceptions import RequestError
from .. tables.knowledge import KnowledgeTableStore
import base64
import uuid
class KnowledgeManager:
def __init__(
self, cassandra_host, cassandra_user, cassandra_password,
keyspace,
):
self.table_store = KnowledgeTableStore(
cassandra_host, cassandra_user, cassandra_password, keyspace
)
async def delete_kg_core(self, request, respond):
print("Deleting core...", flush=True)
await self.table_store.delete_kg_core(
request.user, request.id
)
await respond(
KnowledgeResponse(
error = None,
ids = None,
eos = False,
triples = None,
graph_embeddings = None,
)
)
async def fetch_kg_core(self, request, respond):
print("Fetch core...", flush=True)
async def publish_triples(t):
await respond(
KnowledgeResponse(
error = None,
ids = None,
eos = False,
triples = t,
graph_embeddings = None,
)
)
# Remove doc table row
await self.table_store.get_triples(
request.user,
request.id,
publish_triples,
)
async def publish_ge(g):
await respond(
KnowledgeResponse(
error = None,
ids = None,
eos = False,
triples = None,
graph_embeddings = g,
)
)
# Remove doc table row
await self.table_store.get_graph_embeddings(
request.user,
request.id,
publish_ge,
)
print("Fetch complete", flush=True)
await respond(
KnowledgeResponse(
error = None,
ids = None,
eos = True,
triples = None,
graph_embeddings = None,
)
)
async def list_kg_cores(self, request, respond):
ids = await self.table_store.list_kg_cores(request.user)
await respond(
KnowledgeResponse(
error = None,
ids = ids,
eos = False,
triples = None,
graph_embeddings = None
)
)

View file

@ -0,0 +1,228 @@
"""
Knowledge core service, manages cores and exports them
"""
from functools import partial
import asyncio
import base64
import json
from .. base import AsyncProcessor, Consumer, Producer, Publisher, Subscriber
from .. base import ConsumerMetrics, ProducerMetrics
from .. schema import KnowledgeRequest, KnowledgeResponse, Error
from .. schema import knowledge_request_queue, knowledge_response_queue
from .. schema import Document, Metadata
from .. schema import TextDocument, Metadata
from .. exceptions import RequestError
from . knowledge import KnowledgeManager
default_ident = "knowledge"
default_knowledge_request_queue = knowledge_request_queue
default_knowledge_response_queue = knowledge_response_queue
default_cassandra_host = "cassandra"
# FIXME: How to ensure this doesn't conflict with other usage?
keyspace = "knowledge"
class Processor(AsyncProcessor):
def __init__(self, **params):
id = params.get("id")
knowledge_request_queue = params.get(
"knowledge_request_queue", default_knowledge_request_queue
)
knowledge_response_queue = params.get(
"knowledge_response_queue", default_knowledge_response_queue
)
cassandra_host = params.get("cassandra_host", default_cassandra_host)
cassandra_user = params.get("cassandra_user")
cassandra_password = params.get("cassandra_password")
super(Processor, self).__init__(
**params | {
"knowledge_request_queue": knowledge_request_queue,
"knowledge_response_queue": knowledge_response_queue,
"cassandra_host": cassandra_host,
"cassandra_user": cassandra_user,
}
)
knowledge_request_metrics = ConsumerMetrics(
processor = self.id, flow = None, name = "knowledge-request"
)
knowledge_response_metrics = ProducerMetrics(
processor = self.id, flow = None, name = "knowledge-response"
)
self.knowledge_request_consumer = Consumer(
taskgroup = self.taskgroup,
client = self.pulsar_client,
flow = None,
topic = knowledge_request_queue,
subscriber = id,
schema = KnowledgeRequest,
handler = self.on_knowledge_request,
metrics = knowledge_request_metrics,
)
self.knowledge_response_producer = Producer(
client = self.pulsar_client,
topic = knowledge_response_queue,
schema = KnowledgeResponse,
metrics = knowledge_response_metrics,
)
self.knowledge = KnowledgeManager(
cassandra_host = cassandra_host.split(","),
cassandra_user = cassandra_user,
cassandra_password = cassandra_password,
keyspace = keyspace,
)
self.register_config_handler(self.on_knowledge_config)
self.flows = {}
print("Initialised.", flush=True)
async def start(self):
await super(Processor, self).start()
await self.knowledge_request_consumer.start()
await self.knowledge_response_producer.start()
async def on_knowledge_config(self, config, version):
print("config version", version)
if "flows" in config:
self.flows = {
k: json.loads(v)
for k, v in config["flows"].items()
}
print(self.flows)
async def process_request(self, v, id):
if v.operation is None:
raise RequestError("Null operation")
print("request", v.operation)
impls = {
"list-kg-cores": self.knowledge.list_kg_cores,
"fetch-kg-core": self.knowledge.fetch_kg_core,
"delete-kg-core": self.knowledge.delete_kg_core,
}
if v.operation not in impls:
raise RequestError(f"Invalid operation: {v.operation}")
async def respond(x):
await self.knowledge_response_producer.send(
x, { "id": id }
)
return await impls[v.operation](v, respond)
async def on_knowledge_request(self, msg, consumer, flow):
v = msg.value()
# Sender-produced ID
id = msg.properties()["id"]
print(f"Handling input {id}...", flush=True)
try:
await self.process_request(v, id)
# await self.knowledge_response_producer.send(
# resp, properties={"id": id}
# )
return
except RequestError as e:
resp = KnowledgeResponse(
error = Error(
type = "request-error",
message = str(e),
)
)
await self.knowledge_response_producer.send(
resp, properties={"id": id}
)
return
except Exception as e:
resp = KnowledgeResponse(
error = Error(
type = "unexpected-error",
message = str(e),
)
)
await self.knowledge_response_producer.send(
resp, properties={"id": id}
)
return
print("Done.", flush=True)
@staticmethod
def add_args(parser):
AsyncProcessor.add_args(parser)
parser.add_argument(
'--knowledge-request-queue',
default=default_knowledge_request_queue,
help=f'Config request queue (default: {default_knowledge_request_queue})'
)
parser.add_argument(
'--knowledge-response-queue',
default=default_knowledge_response_queue,
help=f'Config response queue {default_knowledge_response_queue}',
)
parser.add_argument(
'--cassandra-host',
default="cassandra",
help=f'Graph host (default: cassandra)'
)
parser.add_argument(
'--cassandra-user',
default=None,
help=f'Cassandra user'
)
parser.add_argument(
'--cassandra-password',
default=None,
help=f'Cassandra password'
)
def run():
Processor.launch(default_ident, __doc__)