Implement logging strategy (#444)

* Logging strategy and convert all prints() to logging invocations
This commit is contained in:
cybermaggedon 2025-07-30 23:18:38 +01:00 committed by GitHub
parent 3e0651222b
commit dd70aade11
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
117 changed files with 1216 additions and 667 deletions

View file

@ -49,9 +49,6 @@ class Api:
url = f"{self.url}{path}"
# print("uri:", url)
# print(json.dumps(request, indent=4))
# Invoke the API, input is passed as JSON
resp = requests.post(url, json=request, timeout=self.timeout)
@ -59,8 +56,6 @@ class Api:
if resp.status_code != 200:
raise ProtocolException(f"Status code {resp.status_code}")
# print(resp.text)
try:
# Parse the response as JSON
object = resp.json()

View file

@ -1,7 +1,11 @@
import logging
from . exceptions import *
from . types import ConfigValue
logger = logging.getLogger(__name__)
class Config:
def __init__(self, api):
@ -33,7 +37,7 @@ class Config:
for v in object["values"]
]
except Exception as e:
print(e)
logger.error("Failed to parse config get response", exc_info=True)
raise ProtocolException("Response not formatted correctly")
def put(self, values):

View file

@ -2,11 +2,14 @@
import datetime
import time
import base64
import logging
from . types import DocumentMetadata, ProcessingMetadata, Triple
from .. knowledge import hash, Uri, Literal
from . exceptions import *
logger = logging.getLogger(__name__)
def to_value(x):
if x["e"]: return Uri(x["v"])
return Literal(x["v"])
@ -112,7 +115,7 @@ class Library:
for v in object["document-metadatas"]
]
except Exception as e:
print(e)
logger.error("Failed to parse document list response", exc_info=True)
raise ProtocolException(f"Response not formatted correctly")
def get_document(self, user, id):
@ -145,7 +148,7 @@ class Library:
tags = doc["tags"]
)
except Exception as e:
print(e)
logger.error("Failed to parse document response", exc_info=True)
raise ProtocolException(f"Response not formatted correctly")
def update_document(self, user, id, metadata):
@ -192,7 +195,7 @@ class Library:
tags = doc["tags"]
)
except Exception as e:
print(e)
logger.error("Failed to parse document update response", exc_info=True)
raise ProtocolException(f"Response not formatted correctly")
def remove_document(self, user, id):
@ -266,6 +269,6 @@ class Library:
for v in object["processing-metadatas"]
]
except Exception as e:
print(e)
logger.error("Failed to parse processing list response", exc_info=True)
raise ProtocolException(f"Response not formatted correctly")

View file

@ -4,12 +4,16 @@ Agent manager service completion base class
"""
import time
import logging
from prometheus_client import Histogram
from .. schema import AgentRequest, AgentResponse, Error
from .. exceptions import TooManyRequests
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
# Module logger
logger = logging.getLogger(__name__)
default_ident = "agent-manager"
class AgentService(FlowProcessor):
@ -76,9 +80,9 @@ class AgentService(FlowProcessor):
except Exception as e:
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"on_request Exception: {e}")
logger.error(f"Exception in agent service on_request: {e}", exc_info=True)
print("Send error response...", flush=True)
logger.info("Sending error response...")
await flow.producer["response"].send(
AgentResponse(

View file

@ -9,6 +9,8 @@ import argparse
import _pulsar
import time
import uuid
import logging
import os
from prometheus_client import start_http_server, Info
from .. schema import ConfigPush, config_push_queue
@ -20,6 +22,9 @@ from . metrics import ProcessorMetrics, ConsumerMetrics
default_config_queue = config_push_queue
# Module logger
logger = logging.getLogger(__name__)
# Async processor
class AsyncProcessor:
@ -113,7 +118,7 @@ class AsyncProcessor:
version = message.value().version
# Invoke message handlers
print("Config change event", version, flush=True)
logger.info(f"Config change event: version={version}")
for ch in self.config_handlers:
await ch(config, version)
@ -156,9 +161,23 @@ class AsyncProcessor:
# This is here to output a debug message, shouldn't be needed.
except Exception as e:
print("Exception, closing taskgroup", flush=True)
logger.error("Exception, closing taskgroup", exc_info=True)
raise e
@classmethod
def setup_logging(cls, log_level='INFO'):
"""Configure logging for the entire application"""
# Support environment variable override
env_log_level = os.environ.get('TRUSTGRAPH_LOG_LEVEL', log_level)
# Configure logging
logging.basicConfig(
level=getattr(logging, env_log_level.upper()),
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler()]
)
logger.info(f"Logging configured with level: {env_log_level}")
# Startup fabric. launch calls launch_async in async mode.
@classmethod
def launch(cls, ident, doc):
@ -183,8 +202,11 @@ class AsyncProcessor:
args = parser.parse_args()
args = vars(args)
# Setup logging before anything else
cls.setup_logging(args.get('log_level', 'INFO').upper())
# Debug
print(args, flush=True)
logger.debug(f"Arguments: {args}")
# Start the Prometheus metrics service if needed
if args["metrics"]:
@ -193,7 +215,7 @@ class AsyncProcessor:
# Loop forever, exception handler
while True:
print("Starting...", flush=True)
logger.info("Starting...")
try:
@ -203,30 +225,30 @@ class AsyncProcessor:
))
except KeyboardInterrupt:
print("Keyboard interrupt.", flush=True)
logger.info("Keyboard interrupt.")
return
except _pulsar.Interrupted:
print("Pulsar Interrupted.", flush=True)
logger.info("Pulsar Interrupted.")
return
# Exceptions from a taskgroup come in as an exception group
except ExceptionGroup as e:
print("Exception group:", flush=True)
logger.error("Exception group:")
for se in e.exceptions:
print(" Type:", type(se), flush=True)
print(f" Exception: {se}", flush=True)
logger.error(f" Type: {type(se)}")
logger.error(f" Exception: {se}", exc_info=se)
except Exception as e:
print("Type:", type(e), flush=True)
print("Exception:", e, flush=True)
logger.error(f"Type: {type(e)}")
logger.error(f"Exception: {e}", exc_info=True)
# Retry occurs here
print("Will retry...", flush=True)
logger.warning("Will retry...")
time.sleep(4)
print("Retrying...", flush=True)
logger.info("Retrying...")
# The command-line arguments are built using a stack of add_args
# invocations
@ -254,3 +276,4 @@ class AsyncProcessor:
default=8000,
help=f'Pulsar host (default: 8000)',
)

View file

@ -14,9 +14,13 @@ import pulsar
import _pulsar
import asyncio
import time
import logging
from .. exceptions import TooManyRequests
# Module logger
logger = logging.getLogger(__name__)
class Consumer:
def __init__(
@ -90,7 +94,7 @@ class Consumer:
try:
print(self.topic, "subscribing...", flush=True)
logger.info(f"Subscribing to topic: {self.topic}")
if self.start_of_messages:
pos = pulsar.InitialPosition.Earliest
@ -108,21 +112,18 @@ class Consumer:
except Exception as e:
print("consumer subs Exception:", e, flush=True)
logger.error(f"Consumer subscription exception: {e}", exc_info=True)
await asyncio.sleep(self.reconnect_time)
continue
print(self.topic, "subscribed", flush=True)
logger.info(f"Successfully subscribed to topic: {self.topic}")
if self.metrics:
self.metrics.state("running")
try:
print(
"Starting", self.concurrency, "receiver threads",
flush=True
)
logger.info(f"Starting {self.concurrency} receiver threads")
async with asyncio.TaskGroup() as tg:
@ -138,7 +139,7 @@ class Consumer:
except Exception as e:
print("consumer loop exception:", e, flush=True)
logger.error(f"Consumer loop exception: {e}", exc_info=True)
self.consumer.unsubscribe()
self.consumer.close()
self.consumer = None
@ -174,7 +175,7 @@ class Consumer:
if time.time() > expiry:
print("Gave up waiting for rate-limit retry", flush=True)
logger.warning("Gave up waiting for rate-limit retry")
# Message failed to be processed, this causes it to
# be retried
@ -188,7 +189,7 @@ class Consumer:
try:
print("Handle...", flush=True)
logger.debug("Processing message...")
if self.metrics:
@ -198,7 +199,7 @@ class Consumer:
else:
await self.handler(msg, self, self.flow)
print("Handled.", flush=True)
logger.debug("Message processed successfully")
# Acknowledge successful processing of the message
self.consumer.acknowledge(msg)
@ -211,7 +212,7 @@ class Consumer:
except TooManyRequests:
print("TooManyRequests: will retry...", flush=True)
logger.warning("Rate limit exceeded, will retry...")
if self.metrics:
self.metrics.rate_limit()
@ -224,7 +225,7 @@ class Consumer:
except Exception as e:
print("consume exception:", e, flush=True)
logger.error(f"Message processing exception: {e}", exc_info=True)
# Message failed to be processed, this causes it to
# be retried

View file

@ -1,8 +1,13 @@
import logging
from . request_response_spec import RequestResponse, RequestResponseSpec
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
from .. knowledge import Uri, Literal
# Module logger
logger = logging.getLogger(__name__)
class DocumentEmbeddingsClient(RequestResponse):
async def query(self, vectors, limit=20, user="trustgraph",
collection="default", timeout=30):
@ -17,7 +22,7 @@ class DocumentEmbeddingsClient(RequestResponse):
timeout=timeout
)
print(resp, flush=True)
logger.debug(f"Document embeddings response: {resp}")
if resp.error:
raise RuntimeError(resp.error.message)

View file

@ -4,6 +4,8 @@ Document embeddings query service. Input is vectors. Output is list of
embeddings.
"""
import logging
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
from .. schema import Error, Value
@ -11,6 +13,9 @@ from . flow_processor import FlowProcessor
from . consumer_spec import ConsumerSpec
from . producer_spec import ProducerSpec
# Module logger
logger = logging.getLogger(__name__)
default_ident = "ge-query"
class DocumentEmbeddingsQueryService(FlowProcessor):
@ -47,21 +52,21 @@ class DocumentEmbeddingsQueryService(FlowProcessor):
# Sender-produced ID
id = msg.properties()["id"]
print(f"Handling input {id}...", flush=True)
logger.debug(f"Handling document embeddings query request {id}...")
docs = await self.query_document_embeddings(request)
print("Send response...", flush=True)
logger.debug("Sending document embeddings query response...")
r = DocumentEmbeddingsResponse(documents=docs, error=None)
await flow("response").send(r, properties={"id": id})
print("Done.", flush=True)
logger.debug("Document embeddings query request completed")
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception in document embeddings query service: {e}", exc_info=True)
print("Send error response...", flush=True)
logger.info("Sending error response...")
r = DocumentEmbeddingsResponse(
error=Error(

View file

@ -3,10 +3,15 @@
Document embeddings store base class
"""
import logging
from .. schema import DocumentEmbeddings
from .. base import FlowProcessor, ConsumerSpec
from .. exceptions import TooManyRequests
# Module logger
logger = logging.getLogger(__name__)
default_ident = "document-embeddings-write"
class DocumentEmbeddingsStoreService(FlowProcessor):
@ -40,7 +45,7 @@ class DocumentEmbeddingsStoreService(FlowProcessor):
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception in document embeddings store service: {e}", exc_info=True)
raise e
@staticmethod

View file

@ -4,12 +4,16 @@ Embeddings resolution base class
"""
import time
import logging
from prometheus_client import Histogram
from .. schema import EmbeddingsRequest, EmbeddingsResponse, Error
from .. exceptions import TooManyRequests
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
# Module logger
logger = logging.getLogger(__name__)
default_ident = "embeddings"
default_concurrency = 1
@ -51,7 +55,7 @@ class EmbeddingsService(FlowProcessor):
id = msg.properties()["id"]
print("Handling request", id, "...", flush=True)
logger.debug(f"Handling embeddings request {id}...")
vectors = await self.on_embeddings(request.text)
@ -63,7 +67,7 @@ class EmbeddingsService(FlowProcessor):
properties={"id": id}
)
print("Handled.", flush=True)
logger.debug("Embeddings request handled successfully")
except TooManyRequests as e:
raise e
@ -72,9 +76,9 @@ class EmbeddingsService(FlowProcessor):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {e}", flush=True)
logger.error(f"Exception in embeddings service: {e}", exc_info=True)
print("Send error response...", flush=True)
logger.info("Sending error response...")
await flow.producer["response"].send(
EmbeddingsResponse(

View file

@ -4,6 +4,7 @@
# configuration service which can't manage itself.
import json
import logging
from pulsar.schema import JsonSchema
@ -14,6 +15,9 @@ from .. log_level import LogLevel
from . async_processor import AsyncProcessor
from . flow import Flow
# Module logger
logger = logging.getLogger(__name__)
# Parent class for configurable processors, configured with flows by
# the config service
class FlowProcessor(AsyncProcessor):
@ -34,7 +38,7 @@ class FlowProcessor(AsyncProcessor):
# Array of specifications: ConsumerSpec, ProducerSpec, SettingSpec
self.specifications = []
print("Service initialised.")
logger.info("Service initialised.")
# Register a configuration variable
def register_specification(self, spec):
@ -44,19 +48,19 @@ class FlowProcessor(AsyncProcessor):
async def start_flow(self, flow, defn):
self.flows[flow] = Flow(self.id, flow, self, defn)
await self.flows[flow].start()
print("Started flow: ", flow)
logger.info(f"Started flow: {flow}")
# Stop processing for a new flow
async def stop_flow(self, flow):
if flow in self.flows:
await self.flows[flow].stop()
del self.flows[flow]
print("Stopped flow: ", flow, flush=True)
logger.info(f"Stopped flow: {flow}")
# Event handler - called for a configuration change
async def on_configure_flows(self, config, version):
print("Got config version", version, flush=True)
logger.info(f"Got config version {version}")
# Skip over invalid data
if "flows-active" not in config: return
@ -69,7 +73,7 @@ class FlowProcessor(AsyncProcessor):
else:
print("No configuration settings for me.", flush=True)
logger.debug("No configuration settings for me.")
flow_config = {}
# Get list of flows which should be running and are currently
@ -88,7 +92,7 @@ class FlowProcessor(AsyncProcessor):
if flow not in wanted_flows:
await self.stop_flow(flow)
print("Handled config update")
logger.info("Handled config update")
# Start threads, just call parent
async def start(self):

View file

@ -1,8 +1,13 @@
import logging
from . request_response_spec import RequestResponse, RequestResponseSpec
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
from .. knowledge import Uri, Literal
# Module logger
logger = logging.getLogger(__name__)
def to_value(x):
if x.is_uri: return Uri(x.value)
return Literal(x.value)
@ -21,7 +26,7 @@ class GraphEmbeddingsClient(RequestResponse):
timeout=timeout
)
print(resp, flush=True)
logger.debug(f"Graph embeddings response: {resp}")
if resp.error:
raise RuntimeError(resp.error.message)

View file

@ -4,6 +4,8 @@ Graph embeddings query service. Input is vectors. Output is list of
embeddings.
"""
import logging
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
from .. schema import Error, Value
@ -11,6 +13,9 @@ from . flow_processor import FlowProcessor
from . consumer_spec import ConsumerSpec
from . producer_spec import ProducerSpec
# Module logger
logger = logging.getLogger(__name__)
default_ident = "ge-query"
class GraphEmbeddingsQueryService(FlowProcessor):
@ -47,21 +52,21 @@ class GraphEmbeddingsQueryService(FlowProcessor):
# Sender-produced ID
id = msg.properties()["id"]
print(f"Handling input {id}...", flush=True)
logger.debug(f"Handling graph embeddings query request {id}...")
entities = await self.query_graph_embeddings(request)
print("Send response...", flush=True)
logger.debug("Sending graph embeddings query response...")
r = GraphEmbeddingsResponse(entities=entities, error=None)
await flow("response").send(r, properties={"id": id})
print("Done.", flush=True)
logger.debug("Graph embeddings query request completed")
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception in graph embeddings query service: {e}", exc_info=True)
print("Send error response...", flush=True)
logger.info("Sending error response...")
r = GraphEmbeddingsResponse(
error=Error(

View file

@ -3,10 +3,15 @@
Graph embeddings store base class
"""
import logging
from .. schema import GraphEmbeddings
from .. base import FlowProcessor, ConsumerSpec
from .. exceptions import TooManyRequests
# Module logger
logger = logging.getLogger(__name__)
default_ident = "graph-embeddings-write"
class GraphEmbeddingsStoreService(FlowProcessor):
@ -40,7 +45,7 @@ class GraphEmbeddingsStoreService(FlowProcessor):
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception in graph embeddings store service: {e}", exc_info=True)
raise e
@staticmethod

View file

@ -4,12 +4,16 @@ LLM text completion base class
"""
import time
import logging
from prometheus_client import Histogram
from .. schema import TextCompletionRequest, TextCompletionResponse, Error
from .. exceptions import TooManyRequests
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
# Module logger
logger = logging.getLogger(__name__)
default_ident = "text-completion"
default_concurrency = 1
@ -103,9 +107,9 @@ class LlmService(FlowProcessor):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {e}")
logger.error(f"LLM service exception: {e}", exc_info=True)
print("Send error response...", flush=True)
logger.debug("Sending error response...")
await flow.producer["response"].send(
TextCompletionResponse(

View file

@ -1,6 +1,10 @@
from pulsar.schema import JsonSchema
import asyncio
import logging
# Module logger
logger = logging.getLogger(__name__)
class Producer:
@ -39,15 +43,15 @@ class Producer:
while self.running and self.producer is None:
try:
print("Connect publisher to", self.topic, "...", flush=True)
logger.info(f"Connecting publisher to {self.topic}...")
self.producer = self.client.create_producer(
topic = self.topic,
schema = JsonSchema(self.schema),
chunking_enabled = self.chunking_enabled,
)
print("Connected to", self.topic, flush=True)
logger.info(f"Connected publisher to {self.topic}")
except Exception as e:
print("Exception:", e, flush=True)
logger.error(f"Exception connecting publisher: {e}", exc_info=True)
await asyncio.sleep(2)
if not self.running: break
@ -68,7 +72,7 @@ class Producer:
break
except Exception as e:
print("Exception:", e, flush=True)
logger.error(f"Exception sending message: {e}", exc_info=True)
self.producer.close()
self.producer = None

View file

@ -4,6 +4,10 @@ from pulsar.schema import JsonSchema
import asyncio
import time
import pulsar
import logging
# Module logger
logger = logging.getLogger(__name__)
class Publisher:
@ -62,7 +66,7 @@ class Publisher:
producer.send(item)
except Exception as e:
print("Exception:", e, flush=True)
logger.error(f"Exception in publisher: {e}", exc_info=True)
if not self.running:
return

View file

@ -1,12 +1,16 @@
import uuid
import asyncio
import logging
from . subscriber import Subscriber
from . producer import Producer
from . spec import Spec
from . metrics import ConsumerMetrics, ProducerMetrics, SubscriberMetrics
# Module logger
logger = logging.getLogger(__name__)
class RequestResponse(Subscriber):
def __init__(
@ -45,7 +49,7 @@ class RequestResponse(Subscriber):
id = str(uuid.uuid4())
print("Request", id, "...", flush=True)
logger.debug(f"Sending request {id}...")
q = await self.subscribe(id)
@ -58,7 +62,7 @@ class RequestResponse(Subscriber):
except Exception as e:
print("Exception:", e)
logger.error(f"Exception sending request: {e}", exc_info=True)
raise e
@ -71,7 +75,7 @@ class RequestResponse(Subscriber):
timeout=timeout
)
print("Got response.", flush=True)
logger.debug("Received response")
if recipient is None:
@ -93,7 +97,7 @@ class RequestResponse(Subscriber):
except Exception as e:
print("Exception:", e)
logger.error(f"Exception processing response: {e}", exc_info=True)
raise e
finally:

View file

@ -7,6 +7,10 @@ from pulsar.schema import JsonSchema
import asyncio
import _pulsar
import time
import logging
# Module logger
logger = logging.getLogger(__name__)
class Subscriber:
@ -66,7 +70,7 @@ class Subscriber:
if self.metrics:
self.metrics.state("running")
print("Subscriber running...", flush=True)
logger.info("Subscriber running...")
while self.running:
@ -78,8 +82,7 @@ class Subscriber:
except _pulsar.Timeout:
continue
except Exception as e:
print("Exception:", e, flush=True)
print(type(e))
logger.error(f"Exception in subscriber receive: {e}", exc_info=True)
raise e
if self.metrics:
@ -110,7 +113,7 @@ class Subscriber:
except Exception as e:
self.metrics.dropped()
print("Q Put:", e, flush=True)
logger.warning(f"Failed to put message in queue: {e}")
for q in self.full.values():
try:
@ -121,10 +124,10 @@ class Subscriber:
)
except Exception as e:
self.metrics.dropped()
print("Q Put:", e, flush=True)
logger.warning(f"Failed to put message in full queue: {e}")
except Exception as e:
print("Subscriber exception:", e, flush=True)
logger.error(f"Subscriber exception: {e}", exc_info=True)
finally:

View file

@ -4,12 +4,16 @@ Tool invocation base class
"""
import json
import logging
from prometheus_client import Counter
from .. schema import ToolRequest, ToolResponse, Error
from .. exceptions import TooManyRequests
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
# Module logger
logger = logging.getLogger(__name__)
default_concurrency = 1
class ToolService(FlowProcessor):
@ -91,9 +95,9 @@ class ToolService(FlowProcessor):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {e}")
logger.error(f"Exception in tool service: {e}", exc_info=True)
print("Send error response...", flush=True)
logger.info("Sending error response...")
await flow.producer["response"].send(
ToolResponse(

View file

@ -4,6 +4,8 @@ Triples query service. Input is a (s, p, o) triple, some values may be
null. Output is a list of triples.
"""
import logging
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Error
from .. schema import Value, Triple
@ -11,6 +13,9 @@ from . flow_processor import FlowProcessor
from . consumer_spec import ConsumerSpec
from . producer_spec import ProducerSpec
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-query"
class TriplesQueryService(FlowProcessor):
@ -45,21 +50,21 @@ class TriplesQueryService(FlowProcessor):
# Sender-produced ID
id = msg.properties()["id"]
print(f"Handling input {id}...", flush=True)
logger.debug(f"Handling triples query request {id}...")
triples = await self.query_triples(request)
print("Send response...", flush=True)
logger.debug("Sending triples query response...")
r = TriplesQueryResponse(triples=triples, error=None)
await flow("response").send(r, properties={"id": id})
print("Done.", flush=True)
logger.debug("Triples query request completed")
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception in triples query service: {e}", exc_info=True)
print("Send error response...", flush=True)
logger.info("Sending error response...")
r = TriplesQueryResponse(
error = Error(

View file

@ -3,10 +3,15 @@
Triples store base class
"""
import logging
from .. schema import Triples
from .. base import FlowProcessor, ConsumerSpec
from .. exceptions import TooManyRequests
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-write"
class TriplesStoreService(FlowProcessor):
@ -38,7 +43,7 @@ class TriplesStoreService(FlowProcessor):
except Exception as e:
print(f"Exception: {e}")
logger.error(f"Exception in triples store service: {e}", exc_info=True)
raise e
@staticmethod