From c112af0ab0818024be02d2b42f35a996851b2a7c Mon Sep 17 00:00:00 2001 From: Cyber MacGeddon Date: Wed, 29 Apr 2026 18:00:55 +0100 Subject: [PATCH] align chunker + googleaistudio fixes with release/v2.4 Master had a parallel sibling fix for issue #821 (PR #828) using self.RecursiveCharacterTextSplitter / self.TokenTextSplitter; release branches converged on the bare module-level form. Adopt release/v2.4's version so downstream branches don't drift further. --- .../trustgraph/chunking/recursive/chunker.py | 4 ++-- .../trustgraph/chunking/token/chunker.py | 4 ++-- .../text_completion/googleaistudio/llm.py | 18 +++--------------- 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/trustgraph-flow/trustgraph/chunking/recursive/chunker.py b/trustgraph-flow/trustgraph/chunking/recursive/chunker.py index 098e6111..a0052c79 100755 --- a/trustgraph-flow/trustgraph/chunking/recursive/chunker.py +++ b/trustgraph-flow/trustgraph/chunking/recursive/chunker.py @@ -58,7 +58,7 @@ class Processor(ChunkingService): 2500, 4000, 6400, 10000, 16000] ) - self.text_splitter = self.RecursiveCharacterTextSplitter( + self.text_splitter = RecursiveCharacterTextSplitter( chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len, @@ -111,7 +111,7 @@ class Processor(ChunkingService): chunk_overlap = int(chunk_overlap) # Create text splitter with effective parameters - text_splitter = self.RecursiveCharacterTextSplitter( + text_splitter = RecursiveCharacterTextSplitter( chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len, diff --git a/trustgraph-flow/trustgraph/chunking/token/chunker.py b/trustgraph-flow/trustgraph/chunking/token/chunker.py index 3bf907a4..c3935e4b 100755 --- a/trustgraph-flow/trustgraph/chunking/token/chunker.py +++ b/trustgraph-flow/trustgraph/chunking/token/chunker.py @@ -56,7 +56,7 @@ class Processor(ChunkingService): 2500, 4000, 6400, 10000, 16000] ) - self.text_splitter = self.TokenTextSplitter( + self.text_splitter = TokenTextSplitter( encoding_name="cl100k_base", chunk_size=chunk_size, chunk_overlap=chunk_overlap, @@ -108,7 +108,7 @@ class Processor(ChunkingService): chunk_overlap = int(chunk_overlap) # Create text splitter with effective parameters - text_splitter = self.TokenTextSplitter( + text_splitter = TokenTextSplitter( encoding_name="cl100k_base", chunk_size=chunk_size, chunk_overlap=chunk_overlap, diff --git a/trustgraph-vertexai/trustgraph/model/text_completion/googleaistudio/llm.py b/trustgraph-vertexai/trustgraph/model/text_completion/googleaistudio/llm.py index 142fc45c..b01ff410 100644 --- a/trustgraph-vertexai/trustgraph/model/text_completion/googleaistudio/llm.py +++ b/trustgraph-vertexai/trustgraph/model/text_completion/googleaistudio/llm.py @@ -43,18 +43,6 @@ class Processor(LlmService): temperature = params.get("temperature", default_temperature) max_output = params.get("max_output", default_max_output) - from google import genai - from google.genai import types - from google.genai.types import HarmCategory, HarmBlockThreshold - from google.genai.errors import ClientError - from google.api_core.exceptions import ResourceExhausted - self.genai = genai - self.types = types - self.HarmCategory = HarmCategory - self.HarmBlockThreshold = HarmBlockThreshold - self.ClientError = ClientError - self.ResourceExhausted = ResourceExhausted - if api_key is None: raise RuntimeError("Google AI Studio API key not specified") @@ -66,7 +54,7 @@ class Processor(LlmService): } ) - self.client = self.genai.Client(api_key=api_key, vertexai=False) + self.client = genai.Client(api_key=api_key, vertexai=False) self.default_model = model self.temperature = temperature self.max_output = max_output @@ -74,7 +62,7 @@ class Processor(LlmService): # Cache for generation configs per model self.generation_configs = {} - block_level = self.HarmBlockThreshold.BLOCK_ONLY_HIGH + block_level = HarmBlockThreshold.BLOCK_ONLY_HIGH self.safety_settings = [ types.SafetySetting( @@ -159,7 +147,7 @@ class Processor(LlmService): return resp - except self.ResourceExhausted as e: + except ResourceExhausted as e: logger.warning("Rate limit exceeded")