Implement logging strategy (#444)

* Logging strategy and convert all prints() to logging invocations
2026-04-26 00:46:22 +02:00 · 2025-07-30 23:18:38 +01:00 · 2025-07-30 23:18:38 +01:00 · dd70aade11
commit dd70aade11
parent 3e0651222b
117 changed files with 1216 additions and 667 deletions
--- a/trustgraph-flow/trustgraph/chunking/token/chunker.py
+++ b/trustgraph-flow/trustgraph/chunking/token/chunker.py
@ -4,12 +4,16 @@ Simple decoder, accepts text documents on input, outputs chunks from the
 as text as separate output objects.
 """

+import logging
 from langchain_text_splitters import TokenTextSplitter
 from prometheus_client import Histogram

 from ... schema import TextDocument, Chunk
 from ... base import FlowProcessor

+# Module logger
+logger = logging.getLogger(__name__)
+
 default_ident = "chunker"

 class Processor(FlowProcessor):
@ -53,12 +57,12 @@ class Processor(FlowProcessor):
            )
        )

-        print("Chunker initialised", flush=True)
+        logger.info("Token chunker initialized")

    async def on_message(self, msg, consumer, flow):

        v = msg.value()
-        print(f"Chunking {v.metadata.id}...", flush=True)
+        logger.info(f"Chunking document {v.metadata.id}...")

        texts = self.text_splitter.create_documents(
            [v.text.decode("utf-8")]
@ -66,7 +70,7 @@ class Processor(FlowProcessor):

        for ix, chunk in enumerate(texts):

-            print("Chunk", len(chunk.page_content), flush=True)
+            logger.debug(f"Created chunk of size {len(chunk.page_content)}")

            r = Chunk(
                metadata=v.metadata,
@ -79,7 +83,7 @@ class Processor(FlowProcessor):

            await flow("output").send(r)

-        print("Done.", flush=True)
+        logger.debug("Document chunking complete")

    @staticmethod
    def add_args(parser):