Implement logging strategy (#444)

* Logging strategy and convert all prints() to logging invocations
This commit is contained in:
cybermaggedon 2025-07-30 23:18:38 +01:00 committed by GitHub
parent 3e0651222b
commit dd70aade11
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
117 changed files with 1216 additions and 667 deletions

View file

@ -5,11 +5,15 @@ name + parameters, output is the response, either a string or an object.
"""
import json
import logging
from mcp.client.streamable_http import streamablehttp_client
from mcp import ClientSession
from ... base import ToolService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "mcp-tool"
class Service(ToolService):
@ -26,7 +30,7 @@ class Service(ToolService):
async def on_mcp_config(self, config, version):
print("Got config version", version)
logger.info(f"Got config version {version}")
if "mcp" not in config: return
@ -52,7 +56,7 @@ class Service(ToolService):
else:
remote_name = name
print("Invoking", remote_name, "at", url, flush=True)
logger.info(f"Invoking {remote_name} at {url}")
# Connect to a streamable HTTP server
async with streamablehttp_client(url) as (
@ -86,13 +90,13 @@ class Service(ToolService):
except BaseExceptionGroup as e:
for child in e.exceptions:
print(child)
logger.debug(f"Child: {child}")
raise e.exceptions[0]
except Exception as e:
print(e)
logger.error(f"Error invoking MCP tool: {e}", exc_info=True)
raise e
@staticmethod

View file

@ -164,18 +164,18 @@ class AgentManager:
async def reason(self, question, history, context):
print(f"calling reason: {question}", flush=True)
logger.debug(f"calling reason: {question}")
tools = self.tools
print(f"in reason", flush=True)
print(tools, flush=True)
logger.debug("in reason")
logger.debug(f"tools: {tools}")
tool_names = ",".join([
t for t in self.tools.keys()
])
print("Tool names:", tool_names, flush=True)
logger.debug(f"Tool names: {tool_names}")
variables = {
"question": question,
@ -208,14 +208,14 @@ class AgentManager:
]
}
print(json.dumps(variables, indent=4), flush=True)
logger.debug(f"Variables: {json.dumps(variables, indent=4)}")
logger.info(f"prompt: {variables}")
# Get text response from prompt service
response_text = await context("prompt-request").agent_react(variables)
print(f"Response text:\n{response_text}", flush=True)
logger.debug(f"Response text:\n{response_text}")
logger.info(f"response: {response_text}")
@ -233,7 +233,6 @@ class AgentManager:
async def react(self, question, history, think, observe, context):
logger.info(f"question: {question}")
print(f"question: {question}", flush=True)
act = await self.reason(
question = question,
@ -256,7 +255,7 @@ class AgentManager:
else:
raise RuntimeError(f"No action for {act.name}!")
print("TOOL>>>", act, flush=True)
logger.debug(f"TOOL>>> {act}")
resp = await action.implementation(context).invoke(
**act.arguments

View file

@ -8,7 +8,7 @@ import sys
import functools
import logging
logging.basicConfig(level=logging.DEBUG)
# Module logger
logger = logging.getLogger(__name__)
from ... base import AgentService, TextCompletionClientSpec, PromptClientSpec
@ -81,7 +81,7 @@ class Processor(AgentService):
async def on_tools_config(self, config, version):
print("Loading configuration version", version)
logger.info(f"Loading configuration version {version}")
try:
@ -151,13 +151,13 @@ class Processor(AgentService):
additional_context=additional
)
print(f"Loaded {len(tools)} tools", flush=True)
print("Tool configuration reloaded.", flush=True)
logger.info(f"Loaded {len(tools)} tools")
logger.info("Tool configuration reloaded.")
except Exception as e:
print("on_tools_config Exception:", e, flush=True)
print("Configuration reload failed", flush=True)
logger.error(f"on_tools_config Exception: {e}", exc_info=True)
logger.error("Configuration reload failed")
async def agent_request(self, request, respond, next, flow):
@ -176,16 +176,16 @@ class Processor(AgentService):
else:
history = []
print(f"Question: {request.question}", flush=True)
logger.info(f"Question: {request.question}")
if len(history) >= self.max_iterations:
raise RuntimeError("Too many agent iterations")
print(f"History: {history}", flush=True)
logger.debug(f"History: {history}")
async def think(x):
print(f"Think: {x}", flush=True)
logger.debug(f"Think: {x}")
r = AgentResponse(
answer=None,
@ -198,7 +198,7 @@ class Processor(AgentService):
async def observe(x):
print(f"Observe: {x}", flush=True)
logger.debug(f"Observe: {x}")
r = AgentResponse(
answer=None,
@ -209,7 +209,7 @@ class Processor(AgentService):
await respond(r)
print("Call React", flush=True)
logger.debug("Call React")
act = await self.agent.react(
question = request.question,
@ -219,11 +219,11 @@ class Processor(AgentService):
context = flow,
)
print(f"Action: {act}", flush=True)
logger.debug(f"Action: {act}")
if isinstance(act, Final):
print("Send final response...", flush=True)
logger.debug("Send final response...")
if isinstance(act.final, str):
f = act.final
@ -238,11 +238,11 @@ class Processor(AgentService):
await respond(r)
print("Done.", flush=True)
logger.debug("Done.")
return
print("Send next...", flush=True)
logger.debug("Send next...")
history.append(act)
@ -263,15 +263,15 @@ class Processor(AgentService):
await next(r)
print("Done.", flush=True)
logger.debug("React agent processing complete")
return
except Exception as e:
print(f"agent_request Exception: {e}")
logger.error(f"agent_request Exception: {e}", exc_info=True)
print("Send error response...", flush=True)
logger.debug("Send error response...")
r = AgentResponse(
error=Error(

View file

@ -1,7 +1,11 @@
import json
import logging
from .types import Argument
# Module logger
logger = logging.getLogger(__name__)
# This tool implementation knows how to put a question to the graph RAG
# service
class KnowledgeQueryImpl:
@ -21,7 +25,7 @@ class KnowledgeQueryImpl:
async def invoke(self, **arguments):
client = self.context("graph-rag-request")
print("Graph RAG question...", flush=True)
logger.debug("Graph RAG question...")
return await client.rag(
arguments.get("question")
)
@ -44,7 +48,7 @@ class TextCompletionImpl:
async def invoke(self, **arguments):
client = self.context("prompt-request")
print("Prompt question...", flush=True)
logger.debug("Prompt question...")
return await client.question(
arguments.get("question")
)
@ -67,13 +71,13 @@ class McpToolImpl:
client = self.context("mcp-tool-request")
print(f"MCP tool invocation: {self.mcp_tool_id}...", flush=True)
logger.debug(f"MCP tool invocation: {self.mcp_tool_id}...")
output = await client.invoke(
name = self.mcp_tool_id,
parameters = arguments, # Pass the actual arguments
)
print(output)
logger.debug(f"MCP tool output: {output}")
if isinstance(output, str):
return output
@ -94,7 +98,7 @@ class PromptImpl:
async def invoke(self, **arguments):
client = self.context("prompt-request")
print(f"Prompt template invocation: {self.template_id}...", flush=True)
logger.debug(f"Prompt template invocation: {self.template_id}...")
return await client.prompt(
id=self.template_id,
variables=arguments

View file

@ -4,12 +4,16 @@ Simple decoder, accepts text documents on input, outputs chunks from the
as text as separate output objects.
"""
import logging
from langchain_text_splitters import RecursiveCharacterTextSplitter
from prometheus_client import Histogram
from ... schema import TextDocument, Chunk
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
# Module logger
logger = logging.getLogger(__name__)
default_ident = "chunker"
class Processor(FlowProcessor):
@ -54,12 +58,12 @@ class Processor(FlowProcessor):
)
)
print("Chunker initialised", flush=True)
logger.info("Recursive chunker initialized")
async def on_message(self, msg, consumer, flow):
v = msg.value()
print(f"Chunking {v.metadata.id}...", flush=True)
logger.info(f"Chunking document {v.metadata.id}...")
texts = self.text_splitter.create_documents(
[v.text.decode("utf-8")]
@ -67,7 +71,7 @@ class Processor(FlowProcessor):
for ix, chunk in enumerate(texts):
print("Chunk", len(chunk.page_content), flush=True)
logger.debug(f"Created chunk of size {len(chunk.page_content)}")
r = Chunk(
metadata=v.metadata,
@ -80,7 +84,7 @@ class Processor(FlowProcessor):
await flow("output").send(r)
print("Done.", flush=True)
logger.debug("Document chunking complete")
@staticmethod
def add_args(parser):

View file

@ -4,12 +4,16 @@ Simple decoder, accepts text documents on input, outputs chunks from the
as text as separate output objects.
"""
import logging
from langchain_text_splitters import TokenTextSplitter
from prometheus_client import Histogram
from ... schema import TextDocument, Chunk
from ... base import FlowProcessor
# Module logger
logger = logging.getLogger(__name__)
default_ident = "chunker"
class Processor(FlowProcessor):
@ -53,12 +57,12 @@ class Processor(FlowProcessor):
)
)
print("Chunker initialised", flush=True)
logger.info("Token chunker initialized")
async def on_message(self, msg, consumer, flow):
v = msg.value()
print(f"Chunking {v.metadata.id}...", flush=True)
logger.info(f"Chunking document {v.metadata.id}...")
texts = self.text_splitter.create_documents(
[v.text.decode("utf-8")]
@ -66,7 +70,7 @@ class Processor(FlowProcessor):
for ix, chunk in enumerate(texts):
print("Chunk", len(chunk.page_content), flush=True)
logger.debug(f"Created chunk of size {len(chunk.page_content)}")
r = Chunk(
metadata=v.metadata,
@ -79,7 +83,7 @@ class Processor(FlowProcessor):
await flow("output").send(r)
print("Done.", flush=True)
logger.debug("Document chunking complete")
@staticmethod
def add_args(parser):

View file

@ -1,9 +1,14 @@
import logging
from trustgraph.schema import ConfigResponse
from trustgraph.schema import ConfigValue, Error
from ... tables.config import ConfigTableStore
# Module logger
logger = logging.getLogger(__name__)
class ConfigurationClass:
async def keys(self):
@ -228,7 +233,7 @@ class Configuration:
async def handle(self, msg):
print("Handle message ", msg.operation)
logger.debug(f"Handling config message: {msg.operation}")
if msg.operation == "get":

View file

@ -1,6 +1,10 @@
from trustgraph.schema import FlowResponse, Error
import json
import logging
# Module logger
logger = logging.getLogger(__name__)
class FlowConfig:
def __init__(self, config):
@ -41,7 +45,7 @@ class FlowConfig:
async def handle_delete_class(self, msg):
print(msg)
logger.debug(f"Flow config message: {msg}")
await self.config.get("flow-classes").delete(msg.class_name)
@ -218,7 +222,7 @@ class FlowConfig:
async def handle(self, msg):
print("Handle message ", msg.operation)
logger.debug(f"Handling flow message: {msg.operation}")
if msg.operation == "list-classes":
resp = await self.handle_list_classes(msg)

View file

@ -3,6 +3,8 @@
Config service. Manages system global configuration state
"""
import logging
from trustgraph.schema import Error
from trustgraph.schema import ConfigRequest, ConfigResponse, ConfigPush
@ -20,6 +22,9 @@ from . flow import FlowConfig
from ... base import ProcessorMetrics, ConsumerMetrics, ProducerMetrics
from ... base import Consumer, Producer
# Module logger
logger = logging.getLogger(__name__)
# FIXME: How to ensure this doesn't conflict with other usage?
keyspace = "config"
@ -146,7 +151,7 @@ class Processor(AsyncProcessor):
self.flow = FlowConfig(self.config)
print("Service initialised.")
logger.info("Config service initialized")
async def start(self):
@ -172,7 +177,7 @@ class Processor(AsyncProcessor):
# Race condition, should make sure version & config sync
print("Pushed version ", await self.config.get_version())
logger.info(f"Pushed configuration version {await self.config.get_version()}")
async def on_config_request(self, msg, consumer, flow):
@ -183,7 +188,7 @@ class Processor(AsyncProcessor):
# Sender-produced ID
id = msg.properties()["id"]
print(f"Handling {id}...", flush=True)
logger.info(f"Handling config request {id}...")
resp = await self.config.handle(v)
@ -214,7 +219,7 @@ class Processor(AsyncProcessor):
# Sender-produced ID
id = msg.properties()["id"]
print(f"Handling {id}...", flush=True)
logger.info(f"Handling flow request {id}...")
resp = await self.flow.handle(v)

View file

@ -8,6 +8,10 @@ from .. base import Publisher
import base64
import asyncio
import uuid
import logging
# Module logger
logger = logging.getLogger(__name__)
class KnowledgeManager:
@ -26,7 +30,7 @@ class KnowledgeManager:
async def delete_kg_core(self, request, respond):
print("Deleting core...", flush=True)
logger.info("Deleting knowledge core...")
await self.table_store.delete_kg_core(
request.user, request.id
@ -44,7 +48,7 @@ class KnowledgeManager:
async def get_kg_core(self, request, respond):
print("Get core...", flush=True)
logger.info("Getting knowledge core...")
async def publish_triples(t):
await respond(
@ -82,7 +86,7 @@ class KnowledgeManager:
publish_ge,
)
print("Get complete", flush=True)
logger.debug("Knowledge core retrieval complete")
await respond(
KnowledgeResponse(
@ -158,13 +162,13 @@ class KnowledgeManager:
async def core_loader(self):
print("Running...", flush=True)
logger.info("Knowledge background processor running...")
while True:
print("Wait for next load...", flush=True)
logger.debug("Waiting for next load...")
request, respond = await self.loader_queue.get()
print("Loading...", request.id, flush=True)
logger.info(f"Loading knowledge: {request.id}")
try:
@ -204,7 +208,7 @@ class KnowledgeManager:
except Exception as e:
print("Exception:", e, flush=True)
logger.error(f"Knowledge exception: {e}", exc_info=True)
await respond(
KnowledgeResponse(
error = Error(
@ -219,15 +223,15 @@ class KnowledgeManager:
)
print("Going to start loading...", flush=True)
logger.debug("Starting knowledge loading process...")
try:
t_pub = None
ge_pub = None
print(t_q, flush=True)
print(ge_q, flush=True)
logger.debug(f"Triples queue: {t_q}")
logger.debug(f"Graph embeddings queue: {ge_q}")
t_pub = Publisher(
self.flow_config.pulsar_client, t_q,
@ -238,7 +242,7 @@ class KnowledgeManager:
schema=GraphEmbeddings
)
print("Start publishers...", flush=True)
logger.debug("Starting publishers...")
await t_pub.start()
await ge_pub.start()
@ -246,7 +250,7 @@ class KnowledgeManager:
async def publish_triples(t):
await t_pub.send(None, t)
print("Publish triples...", flush=True)
logger.debug("Publishing triples...")
# Remove doc table row
await self.table_store.get_triples(
@ -258,7 +262,7 @@ class KnowledgeManager:
async def publish_ge(g):
await ge_pub.send(None, g)
print("Publish GEs...", flush=True)
logger.debug("Publishing graph embeddings...")
# Remove doc table row
await self.table_store.get_graph_embeddings(
@ -267,19 +271,19 @@ class KnowledgeManager:
publish_ge,
)
print("Completed that.", flush=True)
logger.debug("Knowledge loading completed")
except Exception as e:
print("Exception:", e, flush=True)
logger.error(f"Knowledge exception: {e}", exc_info=True)
finally:
print("Stopping publishers...", flush=True)
logger.debug("Stopping publishers...")
if t_pub: await t_pub.stop()
if ge_pub: await ge_pub.stop()
print("Done", flush=True)
logger.debug("Knowledge processing done")
continue

View file

@ -7,6 +7,7 @@ from functools import partial
import asyncio
import base64
import json
import logging
from .. base import AsyncProcessor, Consumer, Producer, Publisher, Subscriber
from .. base import ConsumerMetrics, ProducerMetrics
@ -21,6 +22,9 @@ from .. exceptions import RequestError
from . knowledge import KnowledgeManager
# Module logger
logger = logging.getLogger(__name__)
default_ident = "knowledge"
default_knowledge_request_queue = knowledge_request_queue
@ -96,7 +100,7 @@ class Processor(AsyncProcessor):
self.flows = {}
print("Initialised.", flush=True)
logger.info("Knowledge service initialized")
async def start(self):
@ -106,7 +110,7 @@ class Processor(AsyncProcessor):
async def on_knowledge_config(self, config, version):
print("config version", version)
logger.info(f"Configuration version: {version}")
if "flows" in config:
@ -115,14 +119,14 @@ class Processor(AsyncProcessor):
for k, v in config["flows"].items()
}
print(self.flows)
logger.debug(f"Flows: {self.flows}")
async def process_request(self, v, id):
if v.operation is None:
raise RequestError("Null operation")
print("request", v.operation)
logger.debug(f"Knowledge request: {v.operation}")
impls = {
"list-kg-cores": self.knowledge.list_kg_cores,
@ -150,7 +154,7 @@ class Processor(AsyncProcessor):
id = msg.properties()["id"]
print(f"Handling input {id}...", flush=True)
logger.info(f"Handling knowledge input {id}...")
try:
@ -187,7 +191,7 @@ class Processor(AsyncProcessor):
return
print("Done.", flush=True)
logger.debug("Knowledge input processing complete")
@staticmethod
def add_args(parser):

View file

@ -19,6 +19,10 @@ from ... schema import document_ingest_queue, text_ingest_queue
from ... log_level import LogLevel
from ... base import InputOutputProcessor
import logging
logger = logging.getLogger(__name__)
module = "ocr"
default_subscriber = module
@ -94,18 +98,18 @@ class Processor(InputOutputProcessor):
# Used with Mistral doc upload
self.unique_id = str(uuid.uuid4())
print("PDF inited")
logger.info("PDF inited")
def ocr(self, blob):
print("Parse PDF...", flush=True)
logger.debug("Parse PDF...")
pdfbuf = BytesIO(blob)
pdf = PdfReader(pdfbuf)
for chunk in chunks(pdf.pages, pages_per_chunk):
print("Get next pages...", flush=True)
logger.debug("Get next pages...")
part = PdfWriter()
for page in chunk:
@ -114,7 +118,7 @@ class Processor(InputOutputProcessor):
buf = BytesIO()
part.write_stream(buf)
print("Upload chunk...", flush=True)
logger.debug("Upload chunk...")
uploaded_file = self.mistral.files.upload(
file={
@ -128,7 +132,7 @@ class Processor(InputOutputProcessor):
file_id=uploaded_file.id, expiry=1
)
print("OCR...", flush=True)
logger.debug("OCR...")
processed = self.mistral.ocr.process(
model="mistral-ocr-latest",
@ -139,21 +143,21 @@ class Processor(InputOutputProcessor):
}
)
print("Extract markdown...", flush=True)
logger.debug("Extract markdown...")
markdown = get_combined_markdown(processed)
print("OCR complete.", flush=True)
logger.info("OCR complete.")
return markdown
async def on_message(self, msg, consumer):
print("PDF message received")
logger.debug("PDF message received")
v = msg.value()
print(f"Decoding {v.metadata.id}...", flush=True)
logger.info(f"Decoding {v.metadata.id}...")
markdown = self.ocr(base64.b64decode(v.data))
@ -164,7 +168,7 @@ class Processor(InputOutputProcessor):
await consumer.q.output.send(r)
print("Done.", flush=True)
logger.info("Done.")
@staticmethod
def add_args(parser):

View file

@ -6,11 +6,15 @@ PDF document as text as separate output objects.
import tempfile
import base64
import logging
from langchain_community.document_loaders import PyPDFLoader
from ... schema import Document, TextDocument, Metadata
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
# Module logger
logger = logging.getLogger(__name__)
default_ident = "pdf-decoder"
class Processor(FlowProcessor):
@ -40,15 +44,15 @@ class Processor(FlowProcessor):
)
)
print("PDF inited", flush=True)
logger.info("PDF decoder initialized")
async def on_message(self, msg, consumer, flow):
print("PDF message received", flush=True)
logger.debug("PDF message received")
v = msg.value()
print(f"Decoding {v.metadata.id}...", flush=True)
logger.info(f"Decoding PDF {v.metadata.id}...")
with tempfile.NamedTemporaryFile(delete_on_close=False) as fp:
@ -62,7 +66,7 @@ class Processor(FlowProcessor):
for ix, page in enumerate(pages):
print("page", ix, flush=True)
logger.debug(f"Processing page {ix}")
r = TextDocument(
metadata=v.metadata,
@ -71,7 +75,7 @@ class Processor(FlowProcessor):
await flow("output").send(r)
print("Done.", flush=True)
logger.debug("PDF decoding complete")
@staticmethod
def add_args(parser):

View file

@ -1,6 +1,9 @@
from pymilvus import MilvusClient, CollectionSchema, FieldSchema, DataType
import time
import logging
logger = logging.getLogger(__name__)
class DocVectors:
@ -21,7 +24,7 @@ class DocVectors:
# Next time to reload - this forces a reload at next window
self.next_reload = time.time() + self.reload_time
print("Reload at", self.next_reload)
logger.debug(f"Reload at {self.next_reload}")
def init_collection(self, dimension):
@ -110,12 +113,12 @@ class DocVectors:
}
}
print("Loading...")
logger.debug("Loading...")
self.client.load_collection(
collection_name=coll,
)
print("Searching...")
logger.debug("Searching...")
res = self.client.search(
collection_name=coll,
@ -128,7 +131,7 @@ class DocVectors:
# If reload time has passed, unload collection
if time.time() > self.next_reload:
print("Unloading, reload at", self.next_reload)
logger.debug(f"Unloading, reload at {self.next_reload}")
self.client.release_collection(
collection_name=coll,
)

View file

@ -1,6 +1,9 @@
from pymilvus import MilvusClient, CollectionSchema, FieldSchema, DataType
import time
import logging
logger = logging.getLogger(__name__)
class EntityVectors:
@ -21,7 +24,7 @@ class EntityVectors:
# Next time to reload - this forces a reload at next window
self.next_reload = time.time() + self.reload_time
print("Reload at", self.next_reload)
logger.debug(f"Reload at {self.next_reload}")
def init_collection(self, dimension):
@ -110,12 +113,12 @@ class EntityVectors:
}
}
print("Loading...")
logger.debug("Loading...")
self.client.load_collection(
collection_name=coll,
)
print("Searching...")
logger.debug("Searching...")
res = self.client.search(
collection_name=coll,
@ -128,7 +131,7 @@ class EntityVectors:
# If reload time has passed, unload collection
if time.time() > self.next_reload:
print("Unloading, reload at", self.next_reload)
logger.debug(f"Unloading, reload at {self.next_reload}")
self.client.release_collection(
collection_name=coll,
)

View file

@ -1,6 +1,9 @@
from pymilvus import MilvusClient, CollectionSchema, FieldSchema, DataType
import time
import logging
logger = logging.getLogger(__name__)
class ObjectVectors:
@ -21,7 +24,7 @@ class ObjectVectors:
# Next time to reload - this forces a reload at next window
self.next_reload = time.time() + self.reload_time
print("Reload at", self.next_reload)
logger.debug(f"Reload at {self.next_reload}")
def init_collection(self, dimension, name):
@ -126,12 +129,12 @@ class ObjectVectors:
}
}
print("Loading...")
logger.debug("Loading...")
self.client.load_collection(
collection_name=coll,
)
print("Searching...")
logger.debug("Searching...")
res = self.client.search(
collection_name=coll,
@ -144,7 +147,7 @@ class ObjectVectors:
# If reload time has passed, unload collection
if time.time() > self.next_reload:
print("Unloading, reload at", self.next_reload)
logger.debug(f"Unloading, reload at {self.next_reload}")
self.client.release_collection(
collection_name=coll,
)

View file

@ -11,6 +11,10 @@ from ... schema import EmbeddingsRequest, EmbeddingsResponse
from ... base import FlowProcessor, RequestResponseSpec, ConsumerSpec
from ... base import ProducerSpec
import logging
logger = logging.getLogger(__name__)
default_ident = "document-embeddings"
class Processor(FlowProcessor):
@ -52,7 +56,7 @@ class Processor(FlowProcessor):
async def on_message(self, msg, consumer, flow):
v = msg.value()
print(f"Indexing {v.metadata.id}...", flush=True)
logger.info(f"Indexing {v.metadata.id}...")
try:
@ -79,12 +83,12 @@ class Processor(FlowProcessor):
await flow("output").send(r)
except Exception as e:
print("Exception:", e, flush=True)
logger.error("Exception occurred", exc_info=True)
# Retry
raise e
print("Done.", flush=True)
logger.info("Done.")
@staticmethod
def add_args(parser):

View file

@ -4,10 +4,15 @@ Embeddings service, applies an embeddings model using fastembed
Input is text, output is embeddings vector.
"""
import logging
from ... base import EmbeddingsService
from fastembed import TextEmbedding
# Module logger
logger = logging.getLogger(__name__)
default_ident = "embeddings"
default_model="sentence-transformers/all-MiniLM-L6-v2"
@ -22,7 +27,7 @@ class Processor(EmbeddingsService):
**params | { "model": model }
)
print("Get model...", flush=True)
logger.info("Loading FastEmbed model...")
self.embeddings = TextEmbedding(model_name = model)
async def on_embeddings(self, text):

View file

@ -11,6 +11,10 @@ from ... schema import EmbeddingsRequest, EmbeddingsResponse
from ... base import FlowProcessor, EmbeddingsClientSpec, ConsumerSpec
from ... base import ProducerSpec
import logging
logger = logging.getLogger(__name__)
default_ident = "graph-embeddings"
class Processor(FlowProcessor):
@ -50,7 +54,7 @@ class Processor(FlowProcessor):
async def on_message(self, msg, consumer, flow):
v = msg.value()
print(f"Indexing {v.metadata.id}...", flush=True)
logger.info(f"Indexing {v.metadata.id}...")
entities = []
@ -77,12 +81,12 @@ class Processor(FlowProcessor):
await flow("output").send(r)
except Exception as e:
print("Exception:", e, flush=True)
logger.error("Exception occurred", exc_info=True)
# Retry
raise e
print("Done.", flush=True)
logger.info("Done.")
@staticmethod
def add_args(parser):

View file

@ -10,6 +10,9 @@ from trustgraph.schema import encyclopedia_lookup_response_queue
from trustgraph.log_level import LogLevel
from trustgraph.base import ConsumerProducer
import requests
import logging
logger = logging.getLogger(__name__)
module = "wikipedia"
@ -46,7 +49,7 @@ class Processor(ConsumerProducer):
# Sender-produced ID
id = msg.properties()["id"]
print(f"Handling {v.kind} / {v.term}...", flush=True)
logger.info(f"Handling {v.kind} / {v.term}...")
try:

View file

@ -1,6 +1,7 @@
import re
import json
import urllib.parse
import logging
from ....schema import Chunk, Triple, Triples, Metadata, Value
from ....schema import EntityContext, EntityContexts
@ -12,6 +13,9 @@ from ....base import AgentClientSpec
from ....template import PromptManager
# Module logger
logger = logging.getLogger(__name__)
default_ident = "kg-extract-agent"
default_concurrency = 1
default_template_id = "agent-kg-extract"
@ -74,10 +78,10 @@ class Processor(FlowProcessor):
async def on_prompt_config(self, config, version):
print("Loading configuration version", version, flush=True)
logger.info(f"Loading configuration version {version}")
if self.config_key not in config:
print(f"No key {self.config_key} in config", flush=True)
logger.warning(f"No key {self.config_key} in config")
return
config = config[self.config_key]
@ -86,12 +90,12 @@ class Processor(FlowProcessor):
self.manager.load_config(config)
print("Prompt configuration reloaded.", flush=True)
logger.info("Prompt configuration reloaded")
except Exception as e:
print("Exception:", e, flush=True)
print("Configuration reload failed", flush=True)
logger.error(f"Configuration reload exception: {e}", exc_info=True)
logger.error("Configuration reload failed")
def to_uri(self, text):
return TRUSTGRAPH_ENTITIES + urllib.parse.quote(text)
@ -142,7 +146,7 @@ class Processor(FlowProcessor):
# Extract chunk text
chunk_text = v.chunk.decode('utf-8')
print("Got chunk", flush=True)
logger.debug("Processing chunk for agent extraction")
prompt = self.manager.render(
self.template_id,
@ -151,11 +155,11 @@ class Processor(FlowProcessor):
}
)
print("Prompt:", prompt, flush=True)
logger.debug(f"Agent prompt: {prompt}")
async def handle(response):
print("Response:", response, flush=True)
logger.debug(f"Agent response: {response}")
if response.error is not None:
if response.error.message:
@ -201,7 +205,7 @@ class Processor(FlowProcessor):
)
except Exception as e:
print(f"Error processing chunk: {e}", flush=True)
logger.error(f"Error processing chunk: {e}", exc_info=True)
raise
def process_extraction_data(self, data, metadata):

View file

@ -7,8 +7,12 @@ entity/context definitions for embedding.
import json
import urllib.parse
import logging
from .... schema import Chunk, Triple, Triples, Metadata, Value
# Module logger
logger = logging.getLogger(__name__)
from .... schema import EntityContext, EntityContexts
from .... schema import PromptRequest, PromptResponse
from .... rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
@ -94,11 +98,11 @@ class Processor(FlowProcessor):
async def on_message(self, msg, consumer, flow):
v = msg.value()
print(f"Indexing {v.metadata.id}...", flush=True)
logger.info(f"Extracting definitions from {v.metadata.id}...")
chunk = v.chunk.decode("utf-8")
print(chunk, flush=True)
logger.debug(f"Processing chunk: {chunk[:200]}...") # Log first 200 chars
try:
@ -108,13 +112,13 @@ class Processor(FlowProcessor):
text = chunk
)
print("Response", defs, flush=True)
logger.debug(f"Definitions response: {defs}")
if type(defs) != list:
raise RuntimeError("Expecting array in prompt response")
except Exception as e:
print("Prompt exception:", e, flush=True)
logger.error(f"Prompt exception: {e}", exc_info=True)
raise e
triples = []
@ -187,9 +191,9 @@ class Processor(FlowProcessor):
)
except Exception as e:
print("Exception: ", e, flush=True)
logger.error(f"Definitions extraction exception: {e}", exc_info=True)
print("Done.", flush=True)
logger.debug("Definitions extraction complete")
@staticmethod
def add_args(parser):

View file

@ -6,8 +6,12 @@ graph edges.
"""
import json
import logging
import urllib.parse
# Module logger
logger = logging.getLogger(__name__)
from .... schema import Chunk, Triple, Triples
from .... schema import Metadata, Value
from .... schema import PromptRequest, PromptResponse
@ -78,11 +82,11 @@ class Processor(FlowProcessor):
async def on_message(self, msg, consumer, flow):
v = msg.value()
print(f"Indexing {v.metadata.id}...", flush=True)
logger.info(f"Extracting relationships from {v.metadata.id}...")
chunk = v.chunk.decode("utf-8")
print(chunk, flush=True)
logger.debug(f"Processing chunk: {chunk[:100]}..." if len(chunk) > 100 else f"Processing chunk: {chunk}")
try:
@ -92,13 +96,13 @@ class Processor(FlowProcessor):
text = chunk
)
print("Response", rels, flush=True)
logger.debug(f"Prompt response: {rels}")
if type(rels) != list:
raise RuntimeError("Expecting array in prompt response")
except Exception as e:
print("Prompt exception:", e, flush=True)
logger.error(f"Prompt exception: {e}", exc_info=True)
raise e
triples = []
@ -189,9 +193,9 @@ class Processor(FlowProcessor):
)
except Exception as e:
print("Exception: ", e, flush=True)
logger.error(f"Relationship extraction exception: {e}", exc_info=True)
print("Done.", flush=True)
logger.debug("Relationship extraction complete")
@staticmethod
def add_args(parser):

View file

@ -6,6 +6,10 @@ get topics which are output as graph edges.
import urllib.parse
import json
import logging
# Module logger
logger = logging.getLogger(__name__)
from .... schema import Chunk, Triple, Triples, Metadata, Value
from .... schema import chunk_ingest_queue, triples_store_queue
@ -81,7 +85,7 @@ class Processor(ConsumerProducer):
async def handle(self, msg):
v = msg.value()
print(f"Indexing {v.metadata.id}...", flush=True)
logger.info(f"Extracting topics from {v.metadata.id}...")
chunk = v.chunk.decode("utf-8")
@ -110,9 +114,9 @@ class Processor(ConsumerProducer):
)
except Exception as e:
print("Exception: ", e, flush=True)
logger.error(f"Topic extraction exception: {e}", exc_info=True)
print("Done.", flush=True)
logger.debug("Topic extraction complete")
@staticmethod
def add_args(parser):

View file

@ -6,8 +6,12 @@ out a row of fields. Output as a vector plus object.
import urllib.parse
import os
import logging
from pulsar.schema import JsonSchema
# Module logger
logger = logging.getLogger(__name__)
from .... schema import ChunkEmbeddings, Rows, ObjectEmbeddings, Metadata
from .... schema import RowSchema, Field
from .... schema import chunk_embeddings_ingest_queue, rows_store_queue
@ -75,7 +79,7 @@ class Processor(ConsumerProducer):
flds = __class__.parse_fields(params["field"])
for fld in flds:
print(fld)
logger.debug(f"Field configuration: {fld}")
self.primary = None
@ -142,7 +146,7 @@ class Processor(ConsumerProducer):
async def handle(self, msg):
v = msg.value()
print(f"Indexing {v.metadata.id}...", flush=True)
logger.info(f"Extracting rows from {v.metadata.id}...")
chunk = v.chunk.decode("utf-8")
@ -163,12 +167,12 @@ class Processor(ConsumerProducer):
)
for row in rows:
print(row)
logger.debug(f"Extracted row: {row}")
except Exception as e:
print("Exception: ", e, flush=True)
logger.error(f"Row extraction exception: {e}", exc_info=True)
print("Done.", flush=True)
logger.debug("Row extraction complete")
@staticmethod
def add_args(parser):

View file

@ -18,6 +18,9 @@ import logging
import os
import base64
import uuid
# Module logger
logger = logging.getLogger(__name__)
import json
import pulsar
@ -48,7 +51,7 @@ class ConfigReceiver:
v = msg.value()
print(f"Config version", v.version)
logger.info(f"Config version: {v.version}")
if "flows" in v.config:
@ -68,29 +71,29 @@ class ConfigReceiver:
del self.flows[k]
except Exception as e:
print(f"Exception: {e}", flush=True)
logger.error(f"Config processing exception: {e}", exc_info=True)
async def start_flow(self, id, flow):
print("Start flow", id)
logger.info(f"Starting flow: {id}")
for handler in self.flow_handlers:
try:
await handler.start_flow(id, flow)
except Exception as e:
print(f"Exception: {e}", flush=True)
logger.error(f"Config processing exception: {e}", exc_info=True)
async def stop_flow(self, id, flow):
print("Stop flow", id)
logger.info(f"Stopping flow: {id}")
for handler in self.flow_handlers:
try:
await handler.stop_flow(id, flow)
except Exception as e:
print(f"Exception: {e}", flush=True)
logger.error(f"Config processing exception: {e}", exc_info=True)
async def config_loader(self):
@ -111,9 +114,9 @@ class ConfigReceiver:
await self.config_cons.start()
print("Waiting...")
logger.debug("Waiting for config updates...")
print("Config consumer done. :/")
logger.info("Config consumer finished")
async def start(self):

View file

@ -2,8 +2,12 @@
import asyncio
import uuid
import msgpack
import logging
from . knowledge import KnowledgeRequestor
# Module logger
logger = logging.getLogger(__name__)
class CoreExport:
def __init__(self, pulsar_client):
@ -84,7 +88,7 @@ class CoreExport:
except Exception as e:
print("Exception:", e)
logger.error(f"Core export exception: {e}", exc_info=True)
finally:

View file

@ -3,8 +3,12 @@ import asyncio
import json
import uuid
import msgpack
import logging
from . knowledge import KnowledgeRequestor
# Module logger
logger = logging.getLogger(__name__)
class CoreImport:
def __init__(self, pulsar_client):
@ -80,14 +84,14 @@ class CoreImport:
await kr.process(msg)
except Exception as e:
print("Exception:", e)
logger.error(f"Core import exception: {e}", exc_info=True)
await error(str(e))
finally:
await kr.stop()
print("All done.")
logger.info("Core import completed")
response = await ok()
await response.write_eof()

View file

@ -2,12 +2,16 @@
import asyncio
import queue
import uuid
import logging
from ... schema import DocumentEmbeddings
from ... base import Subscriber
from . serialize import serialize_document_embeddings
# Module logger
logger = logging.getLogger(__name__)
class DocumentEmbeddingsExport:
def __init__(
@ -55,7 +59,7 @@ class DocumentEmbeddingsExport:
continue
except Exception as e:
print(f"Exception: {str(e)}", flush=True)
logger.error(f"Exception: {str(e)}", exc_info=True)
break
await subs.unsubscribe_all(id)

View file

@ -1,11 +1,15 @@
import base64
import logging
from ... schema import Document, Metadata
from ... messaging import TranslatorRegistry
from . sender import ServiceSender
# Module logger
logger = logging.getLogger(__name__)
class DocumentLoad(ServiceSender):
def __init__(self, pulsar_client, queue):
@ -18,6 +22,6 @@ class DocumentLoad(ServiceSender):
self.translator = TranslatorRegistry.get_request_translator("document")
def to_request(self, body):
print("Document received")
logger.info("Document received")
return self.translator.to_pulsar(body)

View file

@ -2,12 +2,16 @@
import asyncio
import queue
import uuid
import logging
from ... schema import EntityContexts
from ... base import Subscriber
from . serialize import serialize_entity_contexts
# Module logger
logger = logging.getLogger(__name__)
class EntityContextsExport:
def __init__(
@ -55,7 +59,7 @@ class EntityContextsExport:
continue
except Exception as e:
print(f"Exception: {str(e)}", flush=True)
logger.error(f"Exception: {str(e)}", exc_info=True)
break
await subs.unsubscribe_all(id)

View file

@ -2,12 +2,16 @@
import asyncio
import queue
import uuid
import logging
from ... schema import GraphEmbeddings
from ... base import Subscriber
from . serialize import serialize_graph_embeddings
# Module logger
logger = logging.getLogger(__name__)
class GraphEmbeddingsExport:
def __init__(
@ -55,7 +59,7 @@ class GraphEmbeddingsExport:
continue
except Exception as e:
print(f"Exception: {str(e)}", flush=True)
logger.error(f"Exception: {str(e)}", exc_info=True)
break
await subs.unsubscribe_all(id)

View file

@ -2,6 +2,10 @@
import asyncio
from aiohttp import web
import uuid
import logging
# Module logger
logger = logging.getLogger(__name__)
from . config import ConfigRequestor
from . flow import FlowRequestor
@ -92,12 +96,12 @@ class DispatcherManager:
self.dispatchers = {}
async def start_flow(self, id, flow):
print("Start flow", id)
logger.info(f"Starting flow {id}")
self.flows[id] = flow
return
async def stop_flow(self, id, flow):
print("Stop flow", id)
logger.info(f"Stopping flow {id}")
del self.flows[id]
return

View file

@ -2,6 +2,10 @@
import asyncio
import queue
import uuid
import logging
# Module logger
logger = logging.getLogger(__name__)
MAX_OUTSTANDING_REQUESTS = 15
WORKER_CLOSE_WAIT = 0.01
@ -46,7 +50,7 @@ class Mux:
))
except Exception as e:
print("receive exception:", str(e), flush=True)
logger.error(f"Receive exception: {str(e)}", exc_info=True)
await self.ws.send_json({"error": str(e)})
async def maybe_tidy_workers(self, workers):
@ -138,7 +142,7 @@ class Mux:
except Exception as e:
# This is an internal working error, may not be recoverable
print("run prepare exception:", e)
logger.error(f"Run prepare exception: {e}", exc_info=True)
await self.ws.send_json({"id": id, "error": str(e)})
self.running.stop()
@ -155,7 +159,7 @@ class Mux:
)
except Exception as e:
print("Exception2:", e)
logger.error(f"Exception in mux: {e}", exc_info=True)
await self.ws.send_json({"error": str(e)})
self.running.stop()

View file

@ -68,7 +68,7 @@ class ServiceRequestor:
q.get(), timeout=self.timeout
)
except Exception as e:
print("Exception", e)
logger.error(f"Request timeout exception: {e}", exc_info=True)
raise RuntimeError("Timeout")
if resp.error:

View file

@ -1,11 +1,15 @@
import base64
import logging
from ... schema import TextDocument, Metadata
from ... messaging import TranslatorRegistry
from . sender import ServiceSender
# Module logger
logger = logging.getLogger(__name__)
class TextLoad(ServiceSender):
def __init__(self, pulsar_client, queue):
@ -18,6 +22,6 @@ class TextLoad(ServiceSender):
self.translator = TranslatorRegistry.get_request_translator("text-document")
def to_request(self, body):
print("Text document received")
logger.info("Text document received")
return self.translator.to_pulsar(body)

View file

@ -2,12 +2,16 @@
import asyncio
import queue
import uuid
import logging
from ... schema import Triples
from ... base import Subscriber
from . serialize import serialize_triples
# Module logger
logger = logging.getLogger(__name__)
class TriplesExport:
def __init__(
@ -55,7 +59,7 @@ class TriplesExport:
continue
except Exception as e:
print(f"Exception: {str(e)}", flush=True)
logger.error(f"Exception: {str(e)}", exc_info=True)
break
await subs.unsubscribe_all(id)

View file

@ -29,7 +29,7 @@ class ConstantEndpoint:
async def handle(self, request):
print(request.path, "...")
logger.debug(f"Processing request: {request.path}")
try:
ht = request.headers["Authorization"]

View file

@ -33,7 +33,7 @@ class MetricsEndpoint:
async def handle(self, request):
print(request.path, "...")
logger.debug(f"Processing metrics request: {request.path}")
try:
ht = request.headers["Authorization"]

View file

@ -74,24 +74,24 @@ class SocketEndpoint:
self.listener(ws, dispatcher, running)
)
print("Created taskgroup, waiting...")
logger.debug("Created task group, waiting for completion...")
# Wait for threads to complete
print("Task group closed")
logger.debug("Task group closed")
# Finally?
await dispatcher.destroy()
except ExceptionGroup as e:
print("Exception group:", flush=True)
logger.error("Exception group occurred:", exc_info=True)
for se in e.exceptions:
print(" Type:", type(se), flush=True)
print(f" Exception: {se}", flush=True)
logger.error(f" Exception type: {type(se)}")
logger.error(f" Exception: {se}")
except Exception as e:
print("Socket exception:", e, flush=True)
logger.error(f"Socket exception: {e}", exc_info=True)
await ws.close()

View file

@ -36,7 +36,7 @@ class StreamEndpoint:
async def handle(self, request):
print(request.path, "...")
logger.debug(f"Processing request: {request.path}")
try:
ht = request.headers["Authorization"]

View file

@ -28,7 +28,7 @@ class VariableEndpoint:
async def handle(self, request):
print(request.path, "...")
logger.debug(f"Processing request: {request.path}")
try:
ht = request.headers["Authorization"]

View file

@ -5,6 +5,10 @@ from .. exceptions import RequestError
from minio import Minio
import time
import io
import logging
# Module logger
logger = logging.getLogger(__name__)
class BlobStore:
@ -23,7 +27,7 @@ class BlobStore:
self.bucket_name = bucket_name
print("Connected to minio", flush=True)
logger.info("Connected to MinIO")
self.ensure_bucket()
@ -33,9 +37,9 @@ class BlobStore:
found = self.minio.bucket_exists(self.bucket_name)
if not found:
self.minio.make_bucket(self.bucket_name)
print("Created bucket", self.bucket_name, flush=True)
logger.info(f"Created bucket {self.bucket_name}")
else:
print("Bucket", self.bucket_name, "already exists", flush=True)
logger.debug(f"Bucket {self.bucket_name} already exists")
async def add(self, object_id, blob, kind):
@ -48,7 +52,7 @@ class BlobStore:
content_type = kind,
)
print("Add blob complete", flush=True)
logger.debug("Add blob complete")
async def remove(self, object_id):
@ -58,7 +62,7 @@ class BlobStore:
object_name = "doc/" + str(object_id),
)
print("Remove blob complete", flush=True)
logger.debug("Remove blob complete")
async def get(self, object_id):

View file

@ -5,9 +5,13 @@ from .. exceptions import RequestError
from .. tables.library import LibraryTableStore
from . blob_store import BlobStore
import base64
import logging
import uuid
# Module logger
logger = logging.getLogger(__name__)
class Librarian:
def __init__(
@ -45,20 +49,20 @@ class Librarian:
# Create object ID for blob
object_id = uuid.uuid4()
print("Add blob...")
logger.debug("Adding blob...")
await self.blob_store.add(
object_id, base64.b64decode(request.content),
request.document_metadata.kind
)
print("Add table...")
logger.debug("Adding to table...")
await self.table_store.add_document(
request.document_metadata, object_id
)
print("Add complete", flush=True)
logger.debug("Add complete")
return LibrarianResponse(
error = None,
@ -70,7 +74,7 @@ class Librarian:
async def remove_document(self, request):
print("Removing doc...")
logger.debug("Removing document...")
if not await self.table_store.document_exists(
request.user,
@ -92,7 +96,7 @@ class Librarian:
request.document_id
)
print("Remove complete", flush=True)
logger.debug("Remove complete")
return LibrarianResponse(
error = None,
@ -104,7 +108,7 @@ class Librarian:
async def update_document(self, request):
print("Updating doc...")
logger.debug("Updating document...")
# You can't update the document ID, user or kind.
@ -116,7 +120,7 @@ class Librarian:
await self.table_store.update_document(request.document_metadata)
print("Update complete", flush=True)
logger.debug("Update complete")
return LibrarianResponse(
error = None,
@ -128,14 +132,14 @@ class Librarian:
async def get_document_metadata(self, request):
print("Get doc...")
logger.debug("Getting document metadata...")
doc = await self.table_store.get_document(
request.user,
request.document_id
)
print("Get complete", flush=True)
logger.debug("Get complete")
return LibrarianResponse(
error = None,
@ -147,7 +151,7 @@ class Librarian:
async def get_document_content(self, request):
print("Get doc content...")
logger.debug("Getting document content...")
object_id = await self.table_store.get_document_object_id(
request.user,
@ -158,7 +162,7 @@ class Librarian:
object_id
)
print("Get complete", flush=True)
logger.debug("Get complete")
return LibrarianResponse(
error = None,
@ -170,7 +174,7 @@ class Librarian:
async def add_processing(self, request):
print("Add processing")
logger.debug("Adding processing metadata...")
if await self.table_store.processing_exists(
request.processing_metadata.user,
@ -192,13 +196,13 @@ class Librarian:
object_id
)
print("Got content")
logger.debug("Retrieved content")
print("Add processing...")
logger.debug("Adding processing to table...")
await self.table_store.add_processing(request.processing_metadata)
print("Invoke document processing...")
logger.debug("Invoking document processing...")
await self.load_document(
document = doc,
@ -206,7 +210,7 @@ class Librarian:
content = content,
)
print("Add complete", flush=True)
logger.debug("Add complete")
return LibrarianResponse(
error = None,
@ -218,7 +222,7 @@ class Librarian:
async def remove_processing(self, request):
print("Removing processing...")
logger.debug("Removing processing metadata...")
if not await self.table_store.processing_exists(
request.user,
@ -232,7 +236,7 @@ class Librarian:
request.processing_id
)
print("Remove complete", flush=True)
logger.debug("Remove complete")
return LibrarianResponse(
error = None,

View file

@ -7,6 +7,7 @@ from functools import partial
import asyncio
import base64
import json
import logging
from .. base import AsyncProcessor, Consumer, Producer, Publisher, Subscriber
from .. base import ConsumerMetrics, ProducerMetrics
@ -21,6 +22,9 @@ from .. exceptions import RequestError
from . librarian import Librarian
# Module logger
logger = logging.getLogger(__name__)
default_ident = "librarian"
default_librarian_request_queue = librarian_request_queue
@ -119,7 +123,7 @@ class Processor(AsyncProcessor):
self.flows = {}
print("Initialised.", flush=True)
logger.info("Librarian service initialized")
async def start(self):
@ -129,7 +133,7 @@ class Processor(AsyncProcessor):
async def on_librarian_config(self, config, version):
print("config version", version)
logger.info(f"Configuration version: {version}")
if "flows" in config:
@ -138,7 +142,7 @@ class Processor(AsyncProcessor):
for k, v in config["flows"].items()
}
print(self.flows)
logger.debug(f"Flows: {self.flows}")
def __del__(self):
@ -146,9 +150,9 @@ class Processor(AsyncProcessor):
async def load_document(self, document, processing, content):
print("Ready for processing...")
logger.debug("Ready for document processing...")
print(document, processing, len(content))
logger.debug(f"Document: {document}, processing: {processing}, content length: {len(content)}")
if processing.flow not in self.flows:
raise RuntimeError("Invalid flow ID")
@ -188,7 +192,7 @@ class Processor(AsyncProcessor):
)
schema = Document
print(f"Submit on queue {q}...")
logger.debug(f"Submitting to queue {q}...")
pub = Publisher(
self.pulsar_client, q, schema=schema
@ -203,14 +207,14 @@ class Processor(AsyncProcessor):
await pub.stop()
print("Document submitted")
logger.debug("Document submitted")
async def process_request(self, v):
if v.operation is None:
raise RequestError("Null operation")
print("request", v.operation)
logger.debug(f"Librarian request: {v.operation}")
impls = {
"add-document": self.librarian.add_document,
@ -237,7 +241,7 @@ class Processor(AsyncProcessor):
id = msg.properties()["id"]
print(f"Handling input {id}...", flush=True)
logger.info(f"Handling librarian input {id}...")
try:
@ -276,7 +280,7 @@ class Processor(AsyncProcessor):
return
print("Done.", flush=True)
logger.debug("Librarian input processing complete")
@staticmethod
def add_args(parser):

View file

@ -4,10 +4,14 @@ Simple token counter for each LLM response.
from prometheus_client import Counter
import json
import logging
from .. schema import TextCompletionResponse, Error
from .. base import FlowProcessor, ConsumerSpec
# Module logger
logger = logging.getLogger(__name__)
default_ident = "metering"
class Processor(FlowProcessor):
@ -59,10 +63,10 @@ class Processor(FlowProcessor):
# Load token costs from the config service
async def on_cost_config(self, config, version):
print("Loading configuration version", version)
logger.info(f"Loading metering configuration version {version}")
if self.config_key not in config:
print(f"No key {self.config_key} in config", flush=True)
logger.warning(f"No key {self.config_key} in config")
return
config = config[self.config_key]
@ -102,9 +106,9 @@ class Processor(FlowProcessor):
__class__.input_cost_metric.inc(cost_in)
__class__.output_cost_metric.inc(cost_out)
print(f"Input Tokens: {num_in}", flush=True)
print(f"Output Tokens: {num_out}", flush=True)
print(f"Cost for call: ${cost_per_call}", flush=True)
logger.info(f"Input Tokens: {num_in}")
logger.info(f"Output Tokens: {num_out}")
logger.info(f"Cost for call: ${cost_per_call}")
@staticmethod
def add_args(parser):

View file

@ -8,10 +8,14 @@ import requests
import json
from prometheus_client import Histogram
import os
import logging
from .... exceptions import TooManyRequests
from .... base import LlmService, LlmResult
# Module logger
logger = logging.getLogger(__name__)
default_ident = "text-completion"
default_temperature = 0.0
@ -111,11 +115,11 @@ class Processor(LlmService):
inputtokens = response['usage']['prompt_tokens']
outputtokens = response['usage']['completion_tokens']
print(resp, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
logger.debug(f"LLM response: {resp}")
logger.info(f"Input Tokens: {inputtokens}")
logger.info(f"Output Tokens: {outputtokens}")
print("Send response...", flush=True)
logger.debug("Sending response...")
resp = LlmResult(
text = resp,
@ -128,7 +132,7 @@ class Processor(LlmService):
except TooManyRequests:
print("Rate limit...")
logger.warning("Rate limit exceeded")
# Leave rate limit retries to the base handler
raise TooManyRequests()
@ -137,10 +141,10 @@ class Processor(LlmService):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {e}")
logger.error(f"Azure LLM exception ({type(e).__name__}): {e}", exc_info=True)
raise e
print("Done.", flush=True)
logger.debug("Azure LLM processing complete")
@staticmethod
def add_args(parser):

View file

@ -8,6 +8,10 @@ import json
from prometheus_client import Histogram
from openai import AzureOpenAI, RateLimitError
import os
import logging
# Module logger
logger = logging.getLogger(__name__)
from .... exceptions import TooManyRequests
from .... base import LlmService, LlmResult
@ -84,10 +88,10 @@ class Processor(LlmService):
inputtokens = resp.usage.prompt_tokens
outputtokens = resp.usage.completion_tokens
print(resp.choices[0].message.content, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
print("Send response...", flush=True)
logger.debug(f"LLM response: {resp.choices[0].message.content}")
logger.info(f"Input Tokens: {inputtokens}")
logger.info(f"Output Tokens: {outputtokens}")
logger.debug("Sending response...")
r = LlmResult(
text = resp.choices[0].message.content,
@ -100,7 +104,7 @@ class Processor(LlmService):
except RateLimitError:
print("Send rate limit response...", flush=True)
logger.warning("Rate limit exceeded")
# Leave rate limit retries to the base handler
raise TooManyRequests()
@ -108,10 +112,10 @@ class Processor(LlmService):
except Exception as e:
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {e}")
logger.error(f"Azure OpenAI LLM exception ({type(e).__name__}): {e}", exc_info=True)
raise e
print("Done.", flush=True)
logger.debug("Azure OpenAI LLM processing complete")
@staticmethod
def add_args(parser):

View file

@ -6,10 +6,14 @@ Input is prompt, output is response.
import anthropic
import os
import logging
from .... exceptions import TooManyRequests
from .... base import LlmService, LlmResult
# Module logger
logger = logging.getLogger(__name__)
default_ident = "text-completion"
default_model = 'claude-3-5-sonnet-20240620'
@ -42,7 +46,7 @@ class Processor(LlmService):
self.temperature = temperature
self.max_output = max_output
print("Initialised", flush=True)
logger.info("Claude LLM service initialized")
async def generate_content(self, system, prompt):
@ -69,9 +73,9 @@ class Processor(LlmService):
resp = response.content[0].text
inputtokens = response.usage.input_tokens
outputtokens = response.usage.output_tokens
print(resp, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
logger.debug(f"LLM response: {resp}")
logger.info(f"Input Tokens: {inputtokens}")
logger.info(f"Output Tokens: {outputtokens}")
resp = LlmResult(
text = resp,
@ -91,7 +95,7 @@ class Processor(LlmService):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {e}")
logger.error(f"Claude LLM exception ({type(e).__name__}): {e}", exc_info=True)
raise e
@staticmethod

View file

@ -7,6 +7,10 @@ Input is prompt, output is response.
import cohere
from prometheus_client import Histogram
import os
import logging
# Module logger
logger = logging.getLogger(__name__)
from .... exceptions import TooManyRequests
from .... base import LlmService, LlmResult
@ -39,7 +43,7 @@ class Processor(LlmService):
self.temperature = temperature
self.cohere = cohere.Client(api_key=api_key)
print("Initialised", flush=True)
logger.info("Cohere LLM service initialized")
async def generate_content(self, system, prompt):
@ -59,9 +63,9 @@ class Processor(LlmService):
inputtokens = int(output.meta.billed_units.input_tokens)
outputtokens = int(output.meta.billed_units.output_tokens)
print(resp, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
logger.debug(f"LLM response: {resp}")
logger.info(f"Input Tokens: {inputtokens}")
logger.info(f"Output Tokens: {outputtokens}")
resp = LlmResult(
text = resp,
@ -83,7 +87,7 @@ class Processor(LlmService):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {e}")
logger.error(f"Cohere LLM exception ({type(e).__name__}): {e}", exc_info=True)
raise e
@staticmethod

View file

@ -17,6 +17,10 @@ from google.genai import types
from google.genai.types import HarmCategory, HarmBlockThreshold
from google.api_core.exceptions import ResourceExhausted
import os
import logging
# Module logger
logger = logging.getLogger(__name__)
from .... exceptions import TooManyRequests
from .... base import LlmService, LlmResult
@ -77,7 +81,7 @@ class Processor(LlmService):
# HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY: block_level,
]
print("Initialised", flush=True)
logger.info("GoogleAIStudio LLM service initialized")
async def generate_content(self, system, prompt):
@ -102,9 +106,9 @@ class Processor(LlmService):
resp = response.text
inputtokens = int(response.usage_metadata.prompt_token_count)
outputtokens = int(response.usage_metadata.candidates_token_count)
print(resp, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
logger.debug(f"LLM response: {resp}")
logger.info(f"Input Tokens: {inputtokens}")
logger.info(f"Output Tokens: {outputtokens}")
resp = LlmResult(
text = resp,
@ -117,7 +121,7 @@ class Processor(LlmService):
except ResourceExhausted as e:
print("Hit rate limit:", e, flush=True)
logger.warning("Rate limit exceeded")
# Leave rate limit retries to the default handler
raise TooManyRequests()
@ -126,8 +130,7 @@ class Processor(LlmService):
# Apart from rate limits, treat all exceptions as unrecoverable
print(type(e), flush=True)
print(f"Exception: {e}", flush=True)
logger.error(f"GoogleAIStudio LLM exception ({type(e).__name__}): {e}", exc_info=True)
raise e
@staticmethod

View file

@ -6,6 +6,10 @@ Input is prompt, output is response.
from openai import OpenAI
import os
import logging
# Module logger
logger = logging.getLogger(__name__)
from .... exceptions import TooManyRequests
from .... base import LlmService, LlmResult
@ -44,7 +48,7 @@ class Processor(LlmService):
api_key = "sk-no-key-required",
)
print("Initialised", flush=True)
logger.info("Llamafile LLM service initialized")
async def generate_content(self, system, prompt):
@ -70,9 +74,9 @@ class Processor(LlmService):
inputtokens = resp.usage.prompt_tokens
outputtokens = resp.usage.completion_tokens
print(resp.choices[0].message.content, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
logger.debug(f"LLM response: {resp.choices[0].message.content}")
logger.info(f"Input Tokens: {inputtokens}")
logger.info(f"Output Tokens: {outputtokens}")
resp = LlmResult(
text = resp.choices[0].message.content,
@ -87,7 +91,7 @@ class Processor(LlmService):
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Llamafile LLM exception ({type(e).__name__}): {e}", exc_info=True)
raise e
@staticmethod

View file

@ -6,6 +6,10 @@ Input is prompt, output is response.
from openai import OpenAI
import os
import logging
# Module logger
logger = logging.getLogger(__name__)
from .... exceptions import TooManyRequests
from .... base import LlmService, LlmResult
@ -44,7 +48,7 @@ class Processor(LlmService):
api_key = "sk-no-key-required",
)
print("Initialised", flush=True)
logger.info("LMStudio LLM service initialized")
async def generate_content(self, system, prompt):
@ -52,7 +56,7 @@ class Processor(LlmService):
try:
print(prompt)
logger.debug(f"Prompt: {prompt}")
resp = self.openai.chat.completions.create(
model=self.model,
@ -69,14 +73,14 @@ class Processor(LlmService):
#}
)
print(resp)
logger.debug(f"Full response: {resp}")
inputtokens = resp.usage.prompt_tokens
outputtokens = resp.usage.completion_tokens
print(resp.choices[0].message.content, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
logger.debug(f"LLM response: {resp.choices[0].message.content}")
logger.info(f"Input Tokens: {inputtokens}")
logger.info(f"Output Tokens: {outputtokens}")
resp = LlmResult(
text = resp.choices[0].message.content,
@ -91,7 +95,7 @@ class Processor(LlmService):
except Exception as e:
print(f"Exception: {e}")
logger.error(f"LMStudio LLM exception ({type(e).__name__}): {e}", exc_info=True)
raise e
@staticmethod

View file

@ -6,6 +6,10 @@ Input is prompt, output is response.
from mistralai import Mistral
import os
import logging
# Module logger
logger = logging.getLogger(__name__)
from .... exceptions import TooManyRequests
from .... base import LlmService, LlmResult
@ -42,7 +46,7 @@ class Processor(LlmService):
self.max_output = max_output
self.mistral = Mistral(api_key=api_key)
print("Initialised", flush=True)
logger.info("Mistral LLM service initialized")
async def generate_content(self, system, prompt):
@ -75,9 +79,9 @@ class Processor(LlmService):
inputtokens = resp.usage.prompt_tokens
outputtokens = resp.usage.completion_tokens
print(resp.choices[0].message.content, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
logger.debug(f"LLM response: {resp.choices[0].message.content}")
logger.info(f"Input Tokens: {inputtokens}")
logger.info(f"Output Tokens: {outputtokens}")
resp = LlmResult(
text = resp.choices[0].message.content,
@ -105,7 +109,7 @@ class Processor(LlmService):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {e}")
logger.error(f"Mistral LLM exception ({type(e).__name__}): {e}", exc_info=True)
raise e
@staticmethod

View file

@ -6,6 +6,10 @@ Input is prompt, output is response.
from ollama import Client
import os
import logging
# Module logger
logger = logging.getLogger(__name__)
from .... exceptions import TooManyRequests
from .... base import LlmService, LlmResult
@ -41,8 +45,8 @@ class Processor(LlmService):
response = self.llm.generate(self.model, prompt)
response_text = response['response']
print("Send response...", flush=True)
print(response_text, flush=True)
logger.debug("Sending response...")
logger.debug(f"LLM response: {response_text}")
inputtokens = int(response['prompt_eval_count'])
outputtokens = int(response['eval_count'])
@ -60,7 +64,7 @@ class Processor(LlmService):
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Ollama LLM exception ({type(e).__name__}): {e}", exc_info=True)
raise e
@staticmethod

View file

@ -6,10 +6,14 @@ Input is prompt, output is response.
from openai import OpenAI, RateLimitError
import os
import logging
from .... exceptions import TooManyRequests
from .... base import LlmService, LlmResult
# Module logger
logger = logging.getLogger(__name__)
default_ident = "text-completion"
default_model = 'gpt-3.5-turbo'
@ -52,7 +56,7 @@ class Processor(LlmService):
else:
self.openai = OpenAI(api_key=api_key)
print("Initialised", flush=True)
logger.info("OpenAI LLM service initialized")
async def generate_content(self, system, prompt):
@ -85,9 +89,9 @@ class Processor(LlmService):
inputtokens = resp.usage.prompt_tokens
outputtokens = resp.usage.completion_tokens
print(resp.choices[0].message.content, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
logger.debug(f"LLM response: {resp.choices[0].message.content}")
logger.info(f"Input Tokens: {inputtokens}")
logger.info(f"Output Tokens: {outputtokens}")
resp = LlmResult(
text = resp.choices[0].message.content,
@ -109,7 +113,7 @@ class Processor(LlmService):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {type(e)} {e}")
logger.error(f"OpenAI LLM exception ({type(e).__name__}): {e}", exc_info=True)
raise e
@staticmethod

View file

@ -6,6 +6,10 @@ Input is prompt, output is response.
import os
import aiohttp
import logging
# Module logger
logger = logging.getLogger(__name__)
from .... exceptions import TooManyRequests
from .... base import LlmService, LlmResult
@ -41,9 +45,8 @@ class Processor(LlmService):
self.session = aiohttp.ClientSession()
print("Using TGI service at", base_url)
print("Initialised", flush=True)
logger.info(f"Using TGI service at {base_url}")
logger.info("TGI LLM service initialized")
async def generate_content(self, system, prompt):
@ -85,9 +88,9 @@ class Processor(LlmService):
inputtokens = resp["usage"]["prompt_tokens"]
outputtokens = resp["usage"]["completion_tokens"]
ans = resp["choices"][0]["message"]["content"]
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
print(ans, flush=True)
logger.info(f"Input Tokens: {inputtokens}")
logger.info(f"Output Tokens: {outputtokens}")
logger.debug(f"LLM response: {ans}")
resp = LlmResult(
text = ans,
@ -104,7 +107,7 @@ class Processor(LlmService):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {type(e)} {e}")
logger.error(f"TGI LLM exception ({type(e).__name__}): {e}", exc_info=True)
raise e
@staticmethod

View file

@ -6,6 +6,10 @@ Input is prompt, output is response.
import os
import aiohttp
import logging
# Module logger
logger = logging.getLogger(__name__)
from .... exceptions import TooManyRequests
from .... base import LlmService, LlmResult
@ -45,9 +49,8 @@ class Processor(LlmService):
self.session = aiohttp.ClientSession()
print("Using vLLM service at", base_url)
print("Initialised", flush=True)
logger.info(f"Using vLLM service at {base_url}")
logger.info("vLLM LLM service initialized")
async def generate_content(self, system, prompt):
@ -80,9 +83,9 @@ class Processor(LlmService):
inputtokens = resp["usage"]["prompt_tokens"]
outputtokens = resp["usage"]["completion_tokens"]
ans = resp["choices"][0]["text"]
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
print(ans, flush=True)
logger.info(f"Input Tokens: {inputtokens}")
logger.info(f"Output Tokens: {outputtokens}")
logger.debug(f"LLM response: {ans}")
resp = LlmResult(
text = ans,
@ -99,7 +102,7 @@ class Processor(LlmService):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {type(e)} {e}")
logger.error(f"vLLM LLM exception ({type(e).__name__}): {e}", exc_info=True)
raise e
@staticmethod

View file

@ -11,9 +11,13 @@ import importlib
from .. log_level import LogLevel
import logging
logger = logging.getLogger(__name__)
def fn(module_name, class_name, params, w):
print(f"Starting {module_name}...")
logger.info(f"Starting {module_name}...")
if "log_level" in params:
params["log_level"] = LogLevel(params["log_level"])
@ -22,7 +26,7 @@ def fn(module_name, class_name, params, w):
try:
print(f"Starting {class_name} using {module_name}...")
logger.info(f"Starting {class_name} using {module_name}...")
module = importlib.import_module(module_name)
class_object = getattr(module, class_name)
@ -30,16 +34,16 @@ def fn(module_name, class_name, params, w):
processor = class_object(**params)
processor.run()
print(f"{module_name} stopped.")
logger.info(f"{module_name} stopped.")
except Exception as e:
print("Exception:", e)
logger.error("Exception occurred", exc_info=True)
print("Restarting in 10...")
logger.info("Restarting in 10...")
time.sleep(10)
print("Closing")
logger.info("Closing")
w.close()
class Processing:
@ -108,7 +112,7 @@ class Processing:
readers.remove(r)
wait_for -= 1
print("All processes exited")
logger.info("All processes exited")
for p in procs:
p.join()
@ -169,13 +173,12 @@ def run():
p.run()
print("Finished.")
logger.info("Finished.")
break
except Exception as e:
print("Exception:", e, flush=True)
print("Will retry...", flush=True)
logger.error("Exception occurred, will retry...", exc_info=True)
time.sleep(10)

View file

@ -6,6 +6,7 @@ Language service abstracts prompt engineering from LLM.
import asyncio
import json
import re
import logging
from ...schema import Definition, Relationship, Triple
from ...schema import Topic
@ -17,6 +18,9 @@ from ...base import ProducerSpec, ConsumerSpec, TextCompletionClientSpec
from ...template import PromptManager
# Module logger
logger = logging.getLogger(__name__)
default_ident = "prompt"
default_concurrency = 1
@ -68,10 +72,10 @@ class Processor(FlowProcessor):
async def on_prompt_config(self, config, version):
print("Loading configuration version", version)
logger.info(f"Loading prompt configuration version {version}")
if self.config_key not in config:
print(f"No key {self.config_key} in config", flush=True)
logger.warning(f"No key {self.config_key} in config")
return
config = config[self.config_key]
@ -80,12 +84,12 @@ class Processor(FlowProcessor):
self.manager.load_config(config)
print("Prompt configuration reloaded.", flush=True)
logger.info("Prompt configuration reloaded")
except Exception as e:
print("Exception:", e, flush=True)
print("Configuration reload failed", flush=True)
logger.error(f"Prompt configuration exception: {e}", exc_info=True)
logger.error("Configuration reload failed")
async def on_request(self, msg, consumer, flow):
@ -99,19 +103,19 @@ class Processor(FlowProcessor):
try:
print(v.terms, flush=True)
logger.debug(f"Prompt terms: {v.terms}")
input = {
k: json.loads(v)
for k, v in v.terms.items()
}
print(f"Handling kind {kind}...", flush=True)
logger.debug(f"Handling prompt kind {kind}...")
async def llm(system, prompt):
print(system, flush=True)
print(prompt, flush=True)
logger.debug(f"System prompt: {system}")
logger.debug(f"User prompt: {prompt}")
resp = await flow("text-completion-request").text_completion(
system = system, prompt = prompt,
@ -120,20 +124,20 @@ class Processor(FlowProcessor):
try:
return resp
except Exception as e:
print("LLM Exception:", e, flush=True)
logger.error(f"LLM Exception: {e}", exc_info=True)
return None
try:
resp = await self.manager.invoke(kind, input, llm)
except Exception as e:
print("Invocation exception:", e, flush=True)
logger.error(f"Prompt invocation exception: {e}", exc_info=True)
raise e
print(resp, flush=True)
logger.debug(f"Prompt response: {resp}")
if isinstance(resp, str):
print("Send text response...", flush=True)
logger.debug("Sending text response...")
r = PromptResponse(
text=resp,
@ -147,8 +151,8 @@ class Processor(FlowProcessor):
else:
print("Send object response...", flush=True)
print(json.dumps(resp, indent=4), flush=True)
logger.debug("Sending object response...")
logger.debug(f"Response object: {json.dumps(resp, indent=4)}")
r = PromptResponse(
text=None,
@ -162,9 +166,9 @@ class Processor(FlowProcessor):
except Exception as e:
print(f"Exception: {e}", flush=True)
logger.error(f"Prompt service exception: {e}", exc_info=True)
print("Send error response...", flush=True)
logger.debug("Sending error response...")
r = PromptResponse(
error=Error(
@ -178,9 +182,9 @@ class Processor(FlowProcessor):
except Exception as e:
print(f"Exception: {e}", flush=True)
logger.error(f"Prompt service exception: {e}", exc_info=True)
print("Send error response...", flush=True)
logger.debug("Sending error response...")
r = PromptResponse(
error=Error(

View file

@ -4,11 +4,16 @@ Document embeddings query service. Input is vector, output is an array
of chunks
"""
import logging
from .... direct.milvus_doc_embeddings import DocVectors
from .... schema import DocumentEmbeddingsResponse
from .... schema import Error, Value
from .... base import DocumentEmbeddingsQueryService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "de-query"
default_store_uri = 'http://localhost:19530'
@ -48,7 +53,7 @@ class Processor(DocumentEmbeddingsQueryService):
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception querying document embeddings: {e}", exc_info=True)
raise e
@staticmethod

View file

@ -4,14 +4,18 @@ Document embeddings query service. Input is vector, output is an array
of chunks. Pinecone implementation.
"""
from pinecone import Pinecone, ServerlessSpec
from pinecone.grpc import PineconeGRPC, GRPCClientConfig
import logging
import uuid
import os
from pinecone import Pinecone, ServerlessSpec
from pinecone.grpc import PineconeGRPC, GRPCClientConfig
from .... base import DocumentEmbeddingsQueryService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "de-query"
default_api_key = os.getenv("PINECONE_API_KEY", "not-specified")
@ -78,7 +82,7 @@ class Processor(DocumentEmbeddingsQueryService):
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception querying document embeddings: {e}", exc_info=True)
raise e
@staticmethod

View file

@ -4,6 +4,8 @@ Document embeddings query service. Input is vector, output is an array
of chunks
"""
import logging
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct
from qdrant_client.models import Distance, VectorParams
@ -12,6 +14,9 @@ from .... schema import DocumentEmbeddingsResponse
from .... schema import Error, Value
from .... base import DocumentEmbeddingsQueryService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "de-query"
default_store_uri = 'http://localhost:6333'
@ -63,7 +68,7 @@ class Processor(DocumentEmbeddingsQueryService):
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception querying document embeddings: {e}", exc_info=True)
raise e
@staticmethod

View file

@ -4,11 +4,16 @@ Graph embeddings query service. Input is vector, output is list of
entities
"""
import logging
from .... direct.milvus_graph_embeddings import EntityVectors
from .... schema import GraphEmbeddingsResponse
from .... schema import Error, Value
from .... base import GraphEmbeddingsQueryService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "ge-query"
default_store_uri = 'http://localhost:19530'
@ -68,14 +73,12 @@ class Processor(GraphEmbeddingsQueryService):
entities = ents2
print("Send response...", flush=True)
logger.debug("Send response...")
return entities
print("Done.", flush=True)
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception querying graph embeddings: {e}", exc_info=True)
raise e
@staticmethod

View file

@ -4,16 +4,20 @@ Graph embeddings query service. Input is vector, output is list of
entities. Pinecone implementation.
"""
from pinecone import Pinecone, ServerlessSpec
from pinecone.grpc import PineconeGRPC, GRPCClientConfig
import logging
import uuid
import os
from pinecone import Pinecone, ServerlessSpec
from pinecone.grpc import PineconeGRPC, GRPCClientConfig
from .... schema import GraphEmbeddingsResponse
from .... schema import Error, Value
from .... base import GraphEmbeddingsQueryService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "ge-query"
default_api_key = os.getenv("PINECONE_API_KEY", "not-specified")
@ -107,7 +111,7 @@ class Processor(GraphEmbeddingsQueryService):
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception querying graph embeddings: {e}", exc_info=True)
raise e
@staticmethod

View file

@ -4,6 +4,8 @@ Graph embeddings query service. Input is vector, output is list of
entities
"""
import logging
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct
from qdrant_client.models import Distance, VectorParams
@ -12,6 +14,9 @@ from .... schema import GraphEmbeddingsResponse
from .... schema import Error, Value
from .... base import GraphEmbeddingsQueryService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "ge-query"
default_store_uri = 'http://localhost:6333'
@ -85,14 +90,12 @@ class Processor(GraphEmbeddingsQueryService):
entities = ents2
print("Send response...", flush=True)
logger.debug("Send response...")
return entities
print("Done.", flush=True)
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception querying graph embeddings: {e}", exc_info=True)
raise e
@staticmethod

View file

@ -4,11 +4,16 @@ Triples query service. Input is a (s, p, o) triple, some values may be
null. Output is a list of triples.
"""
import logging
from .... direct.cassandra import TrustGraph
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
from .... schema import Value, Triple
from .... base import TriplesQueryService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-query"
default_graph_host='localhost'
@ -135,7 +140,7 @@ class Processor(TriplesQueryService):
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception querying triples: {e}", exc_info=True)
raise e
@staticmethod

View file

@ -5,12 +5,17 @@ Input is a (s, p, o) triple, some values may be null. Output is a list of
triples.
"""
import logging
from falkordb import FalkorDB
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
from .... schema import Value, Triple
from .... base import TriplesQueryService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-query"
default_graph_url = 'falkor://falkordb:6379'
@ -299,7 +304,7 @@ class Processor(TriplesQueryService):
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception querying triples: {e}", exc_info=True)
raise e
@staticmethod

View file

@ -5,12 +5,17 @@ Input is a (s, p, o) triple, some values may be null. Output is a list of
triples.
"""
import logging
from neo4j import GraphDatabase
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
from .... schema import Value, Triple
from .... base import TriplesQueryService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-query"
default_graph_host = 'bolt://memgraph:7687'
@ -296,9 +301,7 @@ class Processor(TriplesQueryService):
except Exception as e:
print(f"Exception: {e}")
print(f"Exception: {e}")
logger.error(f"Exception querying triples: {e}", exc_info=True)
raise e
@staticmethod

View file

@ -5,12 +5,17 @@ Input is a (s, p, o) triple, some values may be null. Output is a list of
triples.
"""
import logging
from neo4j import GraphDatabase
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
from .... schema import Value, Triple
from .... base import TriplesQueryService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-query"
default_graph_host = 'bolt://neo4j:7687'
@ -280,7 +285,7 @@ class Processor(TriplesQueryService):
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception querying triples: {e}", exc_info=True)
raise e
@staticmethod

View file

@ -1,5 +1,9 @@
import asyncio
import logging
# Module logger
logger = logging.getLogger(__name__)
LABEL="http://www.w3.org/2000/01/rdf-schema#label"
@ -18,12 +22,12 @@ class Query:
async def get_vector(self, query):
if self.verbose:
print("Compute embeddings...", flush=True)
logger.debug("Computing embeddings...")
qembeds = await self.rag.embeddings_client.embed(query)
if self.verbose:
print("Done.", flush=True)
logger.debug("Embeddings computed")
return qembeds
@ -32,7 +36,7 @@ class Query:
vectors = await self.get_vector(query)
if self.verbose:
print("Get docs...", flush=True)
logger.debug("Getting documents...")
docs = await self.rag.doc_embeddings_client.query(
vectors, limit=self.doc_limit,
@ -40,9 +44,9 @@ class Query:
)
if self.verbose:
print("Docs:", flush=True)
logger.debug("Documents:")
for doc in docs:
print(doc, flush=True)
logger.debug(f" {doc}")
return docs
@ -60,7 +64,7 @@ class DocumentRag:
self.doc_embeddings_client = doc_embeddings_client
if self.verbose:
print("Initialised", flush=True)
logger.debug("DocumentRag initialized")
async def query(
self, query, user="trustgraph", collection="default",
@ -68,7 +72,7 @@ class DocumentRag:
):
if self.verbose:
print("Construct prompt...", flush=True)
logger.debug("Constructing prompt...")
q = Query(
rag=self, user=user, collection=collection, verbose=self.verbose,
@ -78,9 +82,9 @@ class DocumentRag:
docs = await q.get_docs(query)
if self.verbose:
print("Invoke LLM...", flush=True)
print(docs)
print(query)
logger.debug("Invoking LLM...")
logger.debug(f"Documents: {docs}")
logger.debug(f"Query: {query}")
resp = await self.prompt_client.document_prompt(
query = query,
@ -88,7 +92,7 @@ class DocumentRag:
)
if self.verbose:
print("Done", flush=True)
logger.debug("Query processing complete")
return resp

View file

@ -4,12 +4,16 @@ Simple RAG service, performs query using document RAG an LLM.
Input is query, output is response.
"""
import logging
from ... schema import DocumentRagQuery, DocumentRagResponse, Error
from . document_rag import DocumentRag
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
from ... base import PromptClientSpec, EmbeddingsClientSpec
from ... base import DocumentEmbeddingsClientSpec
# Module logger
logger = logging.getLogger(__name__)
default_ident = "document-rag"
class Processor(FlowProcessor):
@ -81,7 +85,7 @@ class Processor(FlowProcessor):
# Sender-produced ID
id = msg.properties()["id"]
print(f"Handling input {id}...", flush=True)
logger.info(f"Handling input {id}...")
if v.doc_limit:
doc_limit = v.doc_limit
@ -98,13 +102,13 @@ class Processor(FlowProcessor):
properties = {"id": id}
)
print("Done.", flush=True)
logger.info("Request processing complete")
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Document RAG service exception: {e}", exc_info=True)
print("Send error response...", flush=True)
logger.debug("Sending error response...")
await flow("response").send(
DocumentRagResponse(

View file

@ -1,5 +1,9 @@
import asyncio
import logging
# Module logger
logger = logging.getLogger(__name__)
LABEL="http://www.w3.org/2000/01/rdf-schema#label"
@ -22,12 +26,12 @@ class Query:
async def get_vector(self, query):
if self.verbose:
print("Compute embeddings...", flush=True)
logger.debug("Computing embeddings...")
qembeds = await self.rag.embeddings_client.embed(query)
if self.verbose:
print("Done.", flush=True)
logger.debug("Done.")
return qembeds
@ -36,7 +40,7 @@ class Query:
vectors = await self.get_vector(query)
if self.verbose:
print("Get entities...", flush=True)
logger.debug("Getting entities...")
entities = await self.rag.graph_embeddings_client.query(
vectors=vectors, limit=self.entity_limit,
@ -49,9 +53,9 @@ class Query:
]
if self.verbose:
print("Entities:", flush=True)
logger.debug("Entities:")
for ent in entities:
print(" ", ent, flush=True)
logger.debug(f" {ent}")
return entities
@ -126,7 +130,7 @@ class Query:
entities = await self.get_entities(query)
if self.verbose:
print("Get subgraph...", flush=True)
logger.debug("Getting subgraph...")
subgraph = set()
@ -157,12 +161,12 @@ class Query:
sg2 = sg2[0:self.max_subgraph_size]
if self.verbose:
print("Subgraph:", flush=True)
logger.debug("Subgraph:")
for edge in sg2:
print(" ", str(edge), flush=True)
logger.debug(f" {str(edge)}")
if self.verbose:
print("Done.", flush=True)
logger.debug("Done.")
return sg2
@ -183,7 +187,7 @@ class GraphRag:
self.label_cache = {}
if self.verbose:
print("Initialised", flush=True)
logger.debug("GraphRag initialized")
async def query(
self, query, user = "trustgraph", collection = "default",
@ -192,7 +196,7 @@ class GraphRag:
):
if self.verbose:
print("Construct prompt...", flush=True)
logger.debug("Constructing prompt...")
q = Query(
rag = self, user = user, collection = collection,
@ -205,14 +209,14 @@ class GraphRag:
kg = await q.get_labelgraph(query)
if self.verbose:
print("Invoke LLM...", flush=True)
print(kg)
print(query)
logger.debug("Invoking LLM...")
logger.debug(f"Knowledge graph: {kg}")
logger.debug(f"Query: {query}")
resp = await self.prompt_client.kg_prompt(query, kg)
if self.verbose:
print("Done", flush=True)
logger.debug("Query processing complete")
return resp

View file

@ -4,12 +4,16 @@ Simple RAG service, performs query using graph RAG an LLM.
Input is query, output is response.
"""
import logging
from ... schema import GraphRagQuery, GraphRagResponse, Error
from . graph_rag import GraphRag
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
from ... base import PromptClientSpec, EmbeddingsClientSpec
from ... base import GraphEmbeddingsClientSpec, TriplesClientSpec
# Module logger
logger = logging.getLogger(__name__)
default_ident = "graph-rag"
default_concurrency = 1
@ -102,7 +106,7 @@ class Processor(FlowProcessor):
# Sender-produced ID
id = msg.properties()["id"]
print(f"Handling input {id}...", flush=True)
logger.info(f"Handling input {id}...")
if v.entity_limit:
entity_limit = v.entity_limit
@ -139,13 +143,13 @@ class Processor(FlowProcessor):
properties = {"id": id}
)
print("Done.", flush=True)
logger.info("Request processing complete")
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Graph RAG service exception: {e}", exc_info=True)
print("Send error response...", flush=True)
logger.debug("Sending error response...")
await flow("response").send(
GraphRagResponse(

View file

@ -107,7 +107,7 @@ class ReverseGateway:
async def handle_message(self, message: str):
try:
print(f"Received: {message}", flush=True)
logger.debug(f"Received message: {message}")
msg_data = json.loads(message)
response = await self.dispatcher.handle_message(msg_data)
@ -228,15 +228,15 @@ def run():
pulsar_listener=args.pulsar_listener
)
print(f"Starting reverse gateway:")
print(f" WebSocket URI: {gateway.url}")
print(f" Max workers: {args.max_workers}")
print(f" Pulsar host: {gateway.pulsar_host}")
logger.info(f"Starting reverse gateway:")
logger.info(f" WebSocket URI: {gateway.url}")
logger.info(f" Max workers: {args.max_workers}")
logger.info(f" Pulsar host: {gateway.pulsar_host}")
try:
asyncio.run(gateway.run())
except KeyboardInterrupt:
print("\nShutdown requested by user")
logger.info("Shutdown requested by user")
except Exception as e:
print(f"Fatal error: {e}")
logger.error(f"Fatal error: {e}", exc_info=True)
sys.exit(1)

View file

@ -9,9 +9,13 @@ from pinecone.grpc import PineconeGRPC, GRPCClientConfig
import time
import uuid
import os
import logging
from .... base import DocumentEmbeddingsStoreService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "de-write"
default_api_key = os.getenv("PINECONE_API_KEY", "not-specified")
default_cloud = "aws"
@ -104,10 +108,10 @@ class Processor(DocumentEmbeddingsStoreService):
self.create_index(index_name, dim)
except Exception as e:
print("Pinecone index creation failed")
logger.error("Pinecone index creation failed")
raise e
print(f"Index {index_name} created", flush=True)
logger.info(f"Index {index_name} created")
self.last_index_name = index_name

View file

@ -7,9 +7,13 @@ from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct
from qdrant_client.models import Distance, VectorParams
import uuid
import logging
from .... base import DocumentEmbeddingsStoreService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "de-write"
default_store_uri = 'http://localhost:6333'
@ -60,7 +64,7 @@ class Processor(DocumentEmbeddingsStoreService):
),
)
except Exception as e:
print("Qdrant collection creation failed")
logger.error("Qdrant collection creation failed")
raise e
self.last_collection = collection

View file

@ -9,9 +9,13 @@ from pinecone.grpc import PineconeGRPC, GRPCClientConfig
import time
import uuid
import os
import logging
from .... base import GraphEmbeddingsStoreService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "ge-write"
default_api_key = os.getenv("PINECONE_API_KEY", "not-specified")
default_cloud = "aws"
@ -103,10 +107,10 @@ class Processor(GraphEmbeddingsStoreService):
self.create_index(index_name, dim)
except Exception as e:
print("Pinecone index creation failed")
logger.error("Pinecone index creation failed")
raise e
print(f"Index {index_name} created", flush=True)
logger.info(f"Index {index_name} created")
self.last_index_name = index_name

View file

@ -7,9 +7,13 @@ from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct
from qdrant_client.models import Distance, VectorParams
import uuid
import logging
from .... base import GraphEmbeddingsStoreService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "ge-write"
default_store_uri = 'http://localhost:6333'
@ -50,7 +54,7 @@ class Processor(GraphEmbeddingsStoreService):
),
)
except Exception as e:
print("Qdrant collection creation failed")
logger.error("Qdrant collection creation failed")
raise e
self.last_collection = cname

View file

@ -8,6 +8,7 @@ import base64
import os
import argparse
import time
import logging
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
from ssl import SSLContext, PROTOCOL_TLSv1_2
@ -17,6 +18,9 @@ from .... schema import rows_store_queue
from .... log_level import LogLevel
from .... base import Consumer
# Module logger
logger = logging.getLogger(__name__)
module = "rows-write"
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
@ -111,7 +115,7 @@ class Processor(Consumer):
except Exception as e:
print("Exception:", str(e), flush=True)
logger.error(f"Exception: {str(e)}", exc_info=True)
# If there's an error make sure to do table creation etc.
self.tables.remove(name)

View file

@ -8,10 +8,14 @@ import base64
import os
import argparse
import time
import logging
from .... direct.cassandra import TrustGraph
from .... base import TriplesStoreService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-write"
default_graph_host='localhost'
@ -61,7 +65,7 @@ class Processor(TriplesStoreService):
table=message.metadata.collection,
)
except Exception as e:
print("Exception", e, flush=True)
logger.error(f"Exception: {e}", exc_info=True)
time.sleep(1)
raise e

View file

@ -8,11 +8,15 @@ import base64
import os
import argparse
import time
import logging
from falkordb import FalkorDB
from .... base import TriplesStoreService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-write"
default_graph_url = 'falkor://falkordb:6379'
@ -38,7 +42,7 @@ class Processor(TriplesStoreService):
def create_node(self, uri):
print("Create node", uri)
logger.debug(f"Create node {uri}")
res = self.io.query(
"MERGE (n:Node {uri: $uri})",
@ -47,14 +51,14 @@ class Processor(TriplesStoreService):
},
)
print("Created {nodes_created} nodes in {time} ms.".format(
logger.debug("Created {nodes_created} nodes in {time} ms.".format(
nodes_created=res.nodes_created,
time=res.run_time_ms
))
def create_literal(self, value):
print("Create literal", value)
logger.debug(f"Create literal {value}")
res = self.io.query(
"MERGE (n:Literal {value: $value})",
@ -63,14 +67,14 @@ class Processor(TriplesStoreService):
},
)
print("Created {nodes_created} nodes in {time} ms.".format(
logger.debug("Created {nodes_created} nodes in {time} ms.".format(
nodes_created=res.nodes_created,
time=res.run_time_ms
))
def relate_node(self, src, uri, dest):
print("Create node rel", src, uri, dest)
logger.debug(f"Create node rel {src} {uri} {dest}")
res = self.io.query(
"MATCH (src:Node {uri: $src}) "
@ -83,14 +87,14 @@ class Processor(TriplesStoreService):
},
)
print("Created {nodes_created} nodes in {time} ms.".format(
logger.debug("Created {nodes_created} nodes in {time} ms.".format(
nodes_created=res.nodes_created,
time=res.run_time_ms
))
def relate_literal(self, src, uri, dest):
print("Create literal rel", src, uri, dest)
logger.debug(f"Create literal rel {src} {uri} {dest}")
res = self.io.query(
"MATCH (src:Node {uri: $src}) "
@ -103,7 +107,7 @@ class Processor(TriplesStoreService):
},
)
print("Created {nodes_created} nodes in {time} ms.".format(
logger.debug("Created {nodes_created} nodes in {time} ms.".format(
nodes_created=res.nodes_created,
time=res.run_time_ms
))

View file

@ -8,11 +8,15 @@ import base64
import os
import argparse
import time
import logging
from neo4j import GraphDatabase
from .... base import TriplesStoreService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-write"
default_graph_host = 'bolt://memgraph:7687'
@ -55,49 +59,49 @@ class Processor(TriplesStoreService):
# and this process will restart several times until Pulsar arrives,
# so should be safe
print("Create indexes...", flush=True)
logger.info("Create indexes...")
try:
session.run(
"CREATE INDEX ON :Node",
)
except Exception as e:
print(e, flush=True)
logger.warning(f"Index create failure: {e}")
# Maybe index already exists
print("Index create failure ignored", flush=True)
logger.warning("Index create failure ignored")
try:
session.run(
"CREATE INDEX ON :Node(uri)"
)
except Exception as e:
print(e, flush=True)
logger.warning(f"Index create failure: {e}")
# Maybe index already exists
print("Index create failure ignored", flush=True)
logger.warning("Index create failure ignored")
try:
session.run(
"CREATE INDEX ON :Literal",
)
except Exception as e:
print(e, flush=True)
logger.warning(f"Index create failure: {e}")
# Maybe index already exists
print("Index create failure ignored", flush=True)
logger.warning("Index create failure ignored")
try:
session.run(
"CREATE INDEX ON :Literal(value)"
)
except Exception as e:
print(e, flush=True)
logger.warning(f"Index create failure: {e}")
# Maybe index already exists
print("Index create failure ignored", flush=True)
logger.warning("Index create failure ignored")
print("Index creation done", flush=True)
logger.info("Index creation done")
def create_node(self, uri):
print("Create node", uri)
logger.debug(f"Create node {uri}")
summary = self.io.execute_query(
"MERGE (n:Node {uri: $uri})",
@ -105,14 +109,14 @@ class Processor(TriplesStoreService):
database_=self.db,
).summary
print("Created {nodes_created} nodes in {time} ms.".format(
logger.debug("Created {nodes_created} nodes in {time} ms.".format(
nodes_created=summary.counters.nodes_created,
time=summary.result_available_after
))
def create_literal(self, value):
print("Create literal", value)
logger.debug(f"Create literal {value}")
summary = self.io.execute_query(
"MERGE (n:Literal {value: $value})",
@ -120,14 +124,14 @@ class Processor(TriplesStoreService):
database_=self.db,
).summary
print("Created {nodes_created} nodes in {time} ms.".format(
logger.debug("Created {nodes_created} nodes in {time} ms.".format(
nodes_created=summary.counters.nodes_created,
time=summary.result_available_after
))
def relate_node(self, src, uri, dest):
print("Create node rel", src, uri, dest)
logger.debug(f"Create node rel {src} {uri} {dest}")
summary = self.io.execute_query(
"MATCH (src:Node {uri: $src}) "
@ -137,14 +141,14 @@ class Processor(TriplesStoreService):
database_=self.db,
).summary
print("Created {nodes_created} nodes in {time} ms.".format(
logger.debug("Created {nodes_created} nodes in {time} ms.".format(
nodes_created=summary.counters.nodes_created,
time=summary.result_available_after
))
def relate_literal(self, src, uri, dest):
print("Create literal rel", src, uri, dest)
logger.debug(f"Create literal rel {src} {uri} {dest}")
summary = self.io.execute_query(
"MATCH (src:Node {uri: $src}) "
@ -154,7 +158,7 @@ class Processor(TriplesStoreService):
database_=self.db,
).summary
print("Created {nodes_created} nodes in {time} ms.".format(
logger.debug("Created {nodes_created} nodes in {time} ms.".format(
nodes_created=summary.counters.nodes_created,
time=summary.result_available_after
))

View file

@ -8,10 +8,14 @@ import base64
import os
import argparse
import time
import logging
from neo4j import GraphDatabase
from .... base import TriplesStoreService
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-write"
default_graph_host = 'bolt://neo4j:7687'
@ -55,40 +59,40 @@ class Processor(TriplesStoreService):
# and this process will restart several times until Pulsar arrives,
# so should be safe
print("Create indexes...", flush=True)
logger.info("Create indexes...")
try:
session.run(
"CREATE INDEX Node_uri FOR (n:Node) ON (n.uri)",
)
except Exception as e:
print(e, flush=True)
logger.warning(f"Index create failure: {e}")
# Maybe index already exists
print("Index create failure ignored", flush=True)
logger.warning("Index create failure ignored")
try:
session.run(
"CREATE INDEX Literal_value FOR (n:Literal) ON (n.value)",
)
except Exception as e:
print(e, flush=True)
logger.warning(f"Index create failure: {e}")
# Maybe index already exists
print("Index create failure ignored", flush=True)
logger.warning("Index create failure ignored")
try:
session.run(
"CREATE INDEX Rel_uri FOR ()-[r:Rel]-() ON (r.uri)",
)
except Exception as e:
print(e, flush=True)
logger.warning(f"Index create failure: {e}")
# Maybe index already exists
print("Index create failure ignored", flush=True)
logger.warning("Index create failure ignored")
print("Index creation done", flush=True)
logger.info("Index creation done")
def create_node(self, uri):
print("Create node", uri)
logger.debug(f"Create node {uri}")
summary = self.io.execute_query(
"MERGE (n:Node {uri: $uri})",
@ -96,14 +100,14 @@ class Processor(TriplesStoreService):
database_=self.db,
).summary
print("Created {nodes_created} nodes in {time} ms.".format(
logger.debug("Created {nodes_created} nodes in {time} ms.".format(
nodes_created=summary.counters.nodes_created,
time=summary.result_available_after
))
def create_literal(self, value):
print("Create literal", value)
logger.debug(f"Create literal {value}")
summary = self.io.execute_query(
"MERGE (n:Literal {value: $value})",
@ -111,14 +115,14 @@ class Processor(TriplesStoreService):
database_=self.db,
).summary
print("Created {nodes_created} nodes in {time} ms.".format(
logger.debug("Created {nodes_created} nodes in {time} ms.".format(
nodes_created=summary.counters.nodes_created,
time=summary.result_available_after
))
def relate_node(self, src, uri, dest):
print("Create node rel", src, uri, dest)
logger.debug(f"Create node rel {src} {uri} {dest}")
summary = self.io.execute_query(
"MATCH (src:Node {uri: $src}) "
@ -128,14 +132,14 @@ class Processor(TriplesStoreService):
database_=self.db,
).summary
print("Created {nodes_created} nodes in {time} ms.".format(
logger.debug("Created {nodes_created} nodes in {time} ms.".format(
nodes_created=summary.counters.nodes_created,
time=summary.result_available_after
))
def relate_literal(self, src, uri, dest):
print("Create literal rel", src, uri, dest)
logger.debug(f"Create literal rel {src} {uri} {dest}")
summary = self.io.execute_query(
"MATCH (src:Node {uri: $src}) "
@ -145,7 +149,7 @@ class Processor(TriplesStoreService):
database_=self.db,
).summary
print("Created {nodes_created} nodes in {time} ms.".format(
logger.debug("Created {nodes_created} nodes in {time} ms.".format(
nodes_created=summary.counters.nodes_created,
time=summary.result_available_after
))

View file

@ -9,6 +9,9 @@ from ssl import SSLContext, PROTOCOL_TLSv1_2
import uuid
import time
import asyncio
import logging
logger = logging.getLogger(__name__)
class ConfigTableStore:
@ -19,7 +22,7 @@ class ConfigTableStore:
self.keyspace = keyspace
print("Connecting to Cassandra...", flush=True)
logger.info("Connecting to Cassandra...")
if cassandra_user and cassandra_password:
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
@ -36,7 +39,7 @@ class ConfigTableStore:
self.cassandra = self.cluster.connect()
print("Connected.", flush=True)
logger.info("Connected.")
self.ensure_cassandra_schema()
@ -44,9 +47,9 @@ class ConfigTableStore:
def ensure_cassandra_schema(self):
print("Ensure Cassandra schema...", flush=True)
logger.debug("Ensure Cassandra schema...")
print("Keyspace...", flush=True)
logger.debug("Keyspace...")
# FIXME: Replication factor should be configurable
self.cassandra.execute(f"""
@ -59,7 +62,7 @@ class ConfigTableStore:
self.cassandra.set_keyspace(self.keyspace)
print("config table...", flush=True)
logger.debug("config table...")
self.cassandra.execute("""
CREATE TABLE IF NOT EXISTS config (
@ -70,7 +73,7 @@ class ConfigTableStore:
);
""");
print("version table...", flush=True)
logger.debug("version table...")
self.cassandra.execute("""
CREATE TABLE IF NOT EXISTS version (
@ -84,14 +87,14 @@ class ConfigTableStore:
SELECT version FROM version
""")
print("ensure version...", flush=True)
logger.debug("ensure version...")
self.cassandra.execute("""
UPDATE version set version = version + 0
WHERE id = 'version'
""")
print("Cassandra schema OK.", flush=True)
logger.info("Cassandra schema OK.")
async def inc_version(self):
@ -160,10 +163,8 @@ class ConfigTableStore:
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
async def get_value(self, cls, key):
@ -180,10 +181,8 @@ class ConfigTableStore:
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
for row in resp:
return row[0]
@ -205,10 +204,8 @@ class ConfigTableStore:
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
return [
[row[0], row[1]]
@ -230,10 +227,8 @@ class ConfigTableStore:
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
return [
row[0] for row in resp
@ -254,10 +249,8 @@ class ConfigTableStore:
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
return [
(row[0], row[1], row[2])
@ -279,10 +272,8 @@ class ConfigTableStore:
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
return [
row[0] for row in resp
@ -302,8 +293,6 @@ class ConfigTableStore:
break
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)

View file

@ -9,6 +9,9 @@ from ssl import SSLContext, PROTOCOL_TLSv1_2
import uuid
import time
import asyncio
import logging
logger = logging.getLogger(__name__)
class KnowledgeTableStore:
@ -19,7 +22,7 @@ class KnowledgeTableStore:
self.keyspace = keyspace
print("Connecting to Cassandra...", flush=True)
logger.info("Connecting to Cassandra...")
if cassandra_user and cassandra_password:
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
@ -36,7 +39,7 @@ class KnowledgeTableStore:
self.cassandra = self.cluster.connect()
print("Connected.", flush=True)
logger.info("Connected.")
self.ensure_cassandra_schema()
@ -44,9 +47,9 @@ class KnowledgeTableStore:
def ensure_cassandra_schema(self):
print("Ensure Cassandra schema...", flush=True)
logger.debug("Ensure Cassandra schema...")
print("Keyspace...", flush=True)
logger.debug("Keyspace...")
# FIXME: Replication factor should be configurable
self.cassandra.execute(f"""
@ -59,7 +62,7 @@ class KnowledgeTableStore:
self.cassandra.set_keyspace(self.keyspace)
print("triples table...", flush=True)
logger.debug("triples table...")
self.cassandra.execute("""
CREATE TABLE IF NOT EXISTS triples (
@ -77,7 +80,7 @@ class KnowledgeTableStore:
);
""");
print("graph_embeddings table...", flush=True)
logger.debug("graph_embeddings table...")
self.cassandra.execute("""
create table if not exists graph_embeddings (
@ -103,7 +106,7 @@ class KnowledgeTableStore:
graph_embeddings ( user );
""");
print("document_embeddings table...", flush=True)
logger.debug("document_embeddings table...")
self.cassandra.execute("""
create table if not exists document_embeddings (
@ -129,7 +132,7 @@ class KnowledgeTableStore:
document_embeddings ( user );
""");
print("Cassandra schema OK.", flush=True)
logger.info("Cassandra schema OK.")
def prepare_statements(self):
@ -231,10 +234,8 @@ class KnowledgeTableStore:
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
async def add_graph_embeddings(self, m):
@ -276,10 +277,8 @@ class KnowledgeTableStore:
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
async def add_document_embeddings(self, m):
@ -321,14 +320,12 @@ class KnowledgeTableStore:
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
async def list_kg_cores(self, user):
print("List kg cores...")
logger.debug("List kg cores...")
while True:
@ -342,10 +339,8 @@ class KnowledgeTableStore:
break
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
lst = [
@ -353,13 +348,13 @@ class KnowledgeTableStore:
for row in resp
]
print("Done")
logger.debug("Done")
return lst
async def delete_kg_core(self, user, document_id):
print("Delete kg cores...")
logger.debug("Delete kg cores...")
while True:
@ -373,10 +368,8 @@ class KnowledgeTableStore:
break
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
while True:
@ -390,14 +383,12 @@ class KnowledgeTableStore:
break
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
async def get_triples(self, user, document_id, receiver):
print("Get triples...")
logger.debug("Get triples...")
while True:
@ -411,10 +402,8 @@ class KnowledgeTableStore:
break
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
for row in resp:
@ -451,11 +440,11 @@ class KnowledgeTableStore:
)
)
print("Done")
logger.debug("Done")
async def get_graph_embeddings(self, user, document_id, receiver):
print("Get GE...")
logger.debug("Get GE...")
while True:
@ -469,10 +458,8 @@ class KnowledgeTableStore:
break
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
for row in resp:
@ -508,5 +495,5 @@ class KnowledgeTableStore:
)
)
print("Done")
logger.debug("Done")

View file

@ -13,6 +13,9 @@ from ssl import SSLContext, PROTOCOL_TLSv1_2
import uuid
import time
import asyncio
import logging
logger = logging.getLogger(__name__)
class LibraryTableStore:
@ -23,7 +26,7 @@ class LibraryTableStore:
self.keyspace = keyspace
print("Connecting to Cassandra...", flush=True)
logger.info("Connecting to Cassandra...")
if cassandra_user and cassandra_password:
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
@ -40,7 +43,7 @@ class LibraryTableStore:
self.cassandra = self.cluster.connect()
print("Connected.", flush=True)
logger.info("Connected.")
self.ensure_cassandra_schema()
@ -48,9 +51,9 @@ class LibraryTableStore:
def ensure_cassandra_schema(self):
print("Ensure Cassandra schema...", flush=True)
logger.debug("Ensure Cassandra schema...")
print("Keyspace...", flush=True)
logger.debug("Keyspace...")
# FIXME: Replication factor should be configurable
self.cassandra.execute(f"""
@ -63,7 +66,7 @@ class LibraryTableStore:
self.cassandra.set_keyspace(self.keyspace)
print("document table...", flush=True)
logger.debug("document table...")
self.cassandra.execute("""
CREATE TABLE IF NOT EXISTS document (
@ -82,14 +85,14 @@ class LibraryTableStore:
);
""");
print("object index...", flush=True)
logger.debug("object index...")
self.cassandra.execute("""
CREATE INDEX IF NOT EXISTS document_object
ON document (object_id)
""");
print("processing table...", flush=True)
logger.debug("processing table...")
self.cassandra.execute("""
CREATE TABLE IF NOT EXISTS processing (
@ -104,7 +107,7 @@ class LibraryTableStore:
);
""");
print("Cassandra schema OK.", flush=True)
logger.info("Cassandra schema OK.")
def prepare_statements(self):
@ -204,7 +207,7 @@ class LibraryTableStore:
async def add_document(self, document, object_id):
print("Adding document", document.id, object_id)
logger.info(f"Adding document {document.id} {object_id}")
metadata = [
(
@ -231,16 +234,14 @@ class LibraryTableStore:
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
print("Add complete", flush=True)
logger.debug("Add complete")
async def update_document(self, document):
print("Updating document", document.id)
logger.info(f"Updating document {document.id}")
metadata = [
(
@ -267,16 +268,14 @@ class LibraryTableStore:
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
print("Update complete", flush=True)
logger.debug("Update complete")
async def remove_document(self, user, document_id):
print("Removing document", document_id)
logger.info(f"Removing document {document_id}")
while True:
@ -293,16 +292,14 @@ class LibraryTableStore:
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
print("Delete complete", flush=True)
logger.debug("Delete complete")
async def list_documents(self, user):
print("List documents...")
logger.debug("List documents...")
while True:
@ -316,10 +313,8 @@ class LibraryTableStore:
break
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
lst = [
@ -344,13 +339,13 @@ class LibraryTableStore:
for row in resp
]
print("Done")
logger.debug("Done")
return lst
async def get_document(self, user, id):
print("Get document")
logger.debug("Get document")
while True:
@ -364,10 +359,8 @@ class LibraryTableStore:
break
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
for row in resp:
@ -390,14 +383,14 @@ class LibraryTableStore:
object_id = row[6],
)
print("Done")
logger.debug("Done")
return doc
raise RuntimeError("No such document row?")
async def get_document_object_id(self, user, id):
print("Get document obj ID")
logger.debug("Get document obj ID")
while True:
@ -411,14 +404,12 @@ class LibraryTableStore:
break
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
for row in resp:
print("Done")
logger.debug("Done")
return row[6]
raise RuntimeError("No such document row?")
@ -440,7 +431,7 @@ class LibraryTableStore:
async def add_processing(self, processing):
print("Adding processing", processing.id)
logger.info(f"Adding processing {processing.id}")
while True:
@ -460,16 +451,14 @@ class LibraryTableStore:
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
print("Add complete", flush=True)
logger.debug("Add complete")
async def remove_processing(self, user, processing_id):
print("Removing processing", processing_id)
logger.info(f"Removing processing {processing_id}")
while True:
@ -486,16 +475,14 @@ class LibraryTableStore:
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
print("Delete complete", flush=True)
logger.debug("Delete complete")
async def list_processing(self, user):
print("List processing objects")
logger.debug("List processing objects")
while True:
@ -509,10 +496,8 @@ class LibraryTableStore:
break
except Exception as e:
print("Exception:", type(e))
logger.error("Exception occurred", exc_info=True)
raise e
print(f"{e}, retry...", flush=True)
await asyncio.sleep(1)
lst = [
@ -528,7 +513,7 @@ class LibraryTableStore:
for row in resp
]
print("Done")
logger.debug("Done")
return lst

View file

@ -3,6 +3,10 @@ import ibis
import json
from jsonschema import validate
import re
import logging
# Module logger
logger = logging.getLogger(__name__)
class PromptConfiguration:
def __init__(self, system_template, global_terms={}, prompts={}):
@ -101,7 +105,7 @@ class PromptManager:
async def invoke(self, id, input, llm):
print("Invoke...", flush=True)
logger.debug("Invoking prompt template...")
terms = self.terms | self.prompts[id].terms | input
@ -123,13 +127,13 @@ class PromptManager:
try:
obj = self.parse_json(resp)
except:
print("Parse fail:", resp, flush=True)
logger.error(f"JSON parse failed: {resp}")
raise RuntimeError("JSON parse fail")
if self.prompts[id].schema:
try:
validate(instance=obj, schema=self.prompts[id].schema)
print("Validated", flush=True)
logger.debug("Schema validation successful")
except Exception as e:
raise RuntimeError(f"Schema validation fail: {e}")