align chunker + googleaistudio fixes with release/v2.4

Master had a parallel sibling fix for issue #821 (PR #828) using
self.RecursiveCharacterTextSplitter / self.TokenTextSplitter; release
branches converged on the bare module-level form.  Adopt release/v2.4's
version so downstream branches don't drift further.
This commit is contained in:
Cyber MacGeddon 2026-04-29 18:00:55 +01:00
parent f3434307c5
commit c112af0ab0
3 changed files with 7 additions and 19 deletions

View file

@ -58,7 +58,7 @@ class Processor(ChunkingService):
2500, 4000, 6400, 10000, 16000]
)
self.text_splitter = self.RecursiveCharacterTextSplitter(
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len,
@ -111,7 +111,7 @@ class Processor(ChunkingService):
chunk_overlap = int(chunk_overlap)
# Create text splitter with effective parameters
text_splitter = self.RecursiveCharacterTextSplitter(
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len,

View file

@ -56,7 +56,7 @@ class Processor(ChunkingService):
2500, 4000, 6400, 10000, 16000]
)
self.text_splitter = self.TokenTextSplitter(
self.text_splitter = TokenTextSplitter(
encoding_name="cl100k_base",
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
@ -108,7 +108,7 @@ class Processor(ChunkingService):
chunk_overlap = int(chunk_overlap)
# Create text splitter with effective parameters
text_splitter = self.TokenTextSplitter(
text_splitter = TokenTextSplitter(
encoding_name="cl100k_base",
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,

View file

@ -43,18 +43,6 @@ class Processor(LlmService):
temperature = params.get("temperature", default_temperature)
max_output = params.get("max_output", default_max_output)
from google import genai
from google.genai import types
from google.genai.types import HarmCategory, HarmBlockThreshold
from google.genai.errors import ClientError
from google.api_core.exceptions import ResourceExhausted
self.genai = genai
self.types = types
self.HarmCategory = HarmCategory
self.HarmBlockThreshold = HarmBlockThreshold
self.ClientError = ClientError
self.ResourceExhausted = ResourceExhausted
if api_key is None:
raise RuntimeError("Google AI Studio API key not specified")
@ -66,7 +54,7 @@ class Processor(LlmService):
}
)
self.client = self.genai.Client(api_key=api_key, vertexai=False)
self.client = genai.Client(api_key=api_key, vertexai=False)
self.default_model = model
self.temperature = temperature
self.max_output = max_output
@ -74,7 +62,7 @@ class Processor(LlmService):
# Cache for generation configs per model
self.generation_configs = {}
block_level = self.HarmBlockThreshold.BLOCK_ONLY_HIGH
block_level = HarmBlockThreshold.BLOCK_ONLY_HIGH
self.safety_settings = [
types.SafetySetting(
@ -159,7 +147,7 @@ class Processor(LlmService):
return resp
except self.ResourceExhausted as e:
except ResourceExhausted as e:
logger.warning("Rate limit exceeded")