Implement logging strategy (#444)

* Logging strategy and convert all prints() to logging invocations
This commit is contained in:
cybermaggedon 2025-07-30 23:18:38 +01:00 committed by GitHub
parent 3e0651222b
commit dd70aade11
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
117 changed files with 1216 additions and 667 deletions

View file

@ -4,12 +4,16 @@ Simple decoder, accepts text documents on input, outputs chunks from the
as text as separate output objects.
"""
import logging
from langchain_text_splitters import TokenTextSplitter
from prometheus_client import Histogram
from ... schema import TextDocument, Chunk
from ... base import FlowProcessor
# Module logger
logger = logging.getLogger(__name__)
default_ident = "chunker"
class Processor(FlowProcessor):
@ -53,12 +57,12 @@ class Processor(FlowProcessor):
)
)
print("Chunker initialised", flush=True)
logger.info("Token chunker initialized")
async def on_message(self, msg, consumer, flow):
v = msg.value()
print(f"Chunking {v.metadata.id}...", flush=True)
logger.info(f"Chunking document {v.metadata.id}...")
texts = self.text_splitter.create_documents(
[v.text.decode("utf-8")]
@ -66,7 +70,7 @@ class Processor(FlowProcessor):
for ix, chunk in enumerate(texts):
print("Chunk", len(chunk.page_content), flush=True)
logger.debug(f"Created chunk of size {len(chunk.page_content)}")
r = Chunk(
metadata=v.metadata,
@ -79,7 +83,7 @@ class Processor(FlowProcessor):
await flow("output").send(r)
print("Done.", flush=True)
logger.debug("Document chunking complete")
@staticmethod
def add_args(parser):