mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-22 22:05:13 +02:00
align chunker + googleaistudio fixes with release/v2.4
Master had a parallel sibling fix for issue #821 (PR #828) using self.RecursiveCharacterTextSplitter / self.TokenTextSplitter; release branches converged on the bare module-level form. Adopt release/v2.4's version so downstream branches don't drift further.
This commit is contained in:
parent
f3434307c5
commit
c112af0ab0
3 changed files with 7 additions and 19 deletions
|
|
@ -58,7 +58,7 @@ class Processor(ChunkingService):
|
||||||
2500, 4000, 6400, 10000, 16000]
|
2500, 4000, 6400, 10000, 16000]
|
||||||
)
|
)
|
||||||
|
|
||||||
self.text_splitter = self.RecursiveCharacterTextSplitter(
|
self.text_splitter = RecursiveCharacterTextSplitter(
|
||||||
chunk_size=chunk_size,
|
chunk_size=chunk_size,
|
||||||
chunk_overlap=chunk_overlap,
|
chunk_overlap=chunk_overlap,
|
||||||
length_function=len,
|
length_function=len,
|
||||||
|
|
@ -111,7 +111,7 @@ class Processor(ChunkingService):
|
||||||
chunk_overlap = int(chunk_overlap)
|
chunk_overlap = int(chunk_overlap)
|
||||||
|
|
||||||
# Create text splitter with effective parameters
|
# Create text splitter with effective parameters
|
||||||
text_splitter = self.RecursiveCharacterTextSplitter(
|
text_splitter = RecursiveCharacterTextSplitter(
|
||||||
chunk_size=chunk_size,
|
chunk_size=chunk_size,
|
||||||
chunk_overlap=chunk_overlap,
|
chunk_overlap=chunk_overlap,
|
||||||
length_function=len,
|
length_function=len,
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ class Processor(ChunkingService):
|
||||||
2500, 4000, 6400, 10000, 16000]
|
2500, 4000, 6400, 10000, 16000]
|
||||||
)
|
)
|
||||||
|
|
||||||
self.text_splitter = self.TokenTextSplitter(
|
self.text_splitter = TokenTextSplitter(
|
||||||
encoding_name="cl100k_base",
|
encoding_name="cl100k_base",
|
||||||
chunk_size=chunk_size,
|
chunk_size=chunk_size,
|
||||||
chunk_overlap=chunk_overlap,
|
chunk_overlap=chunk_overlap,
|
||||||
|
|
@ -108,7 +108,7 @@ class Processor(ChunkingService):
|
||||||
chunk_overlap = int(chunk_overlap)
|
chunk_overlap = int(chunk_overlap)
|
||||||
|
|
||||||
# Create text splitter with effective parameters
|
# Create text splitter with effective parameters
|
||||||
text_splitter = self.TokenTextSplitter(
|
text_splitter = TokenTextSplitter(
|
||||||
encoding_name="cl100k_base",
|
encoding_name="cl100k_base",
|
||||||
chunk_size=chunk_size,
|
chunk_size=chunk_size,
|
||||||
chunk_overlap=chunk_overlap,
|
chunk_overlap=chunk_overlap,
|
||||||
|
|
|
||||||
|
|
@ -43,18 +43,6 @@ class Processor(LlmService):
|
||||||
temperature = params.get("temperature", default_temperature)
|
temperature = params.get("temperature", default_temperature)
|
||||||
max_output = params.get("max_output", default_max_output)
|
max_output = params.get("max_output", default_max_output)
|
||||||
|
|
||||||
from google import genai
|
|
||||||
from google.genai import types
|
|
||||||
from google.genai.types import HarmCategory, HarmBlockThreshold
|
|
||||||
from google.genai.errors import ClientError
|
|
||||||
from google.api_core.exceptions import ResourceExhausted
|
|
||||||
self.genai = genai
|
|
||||||
self.types = types
|
|
||||||
self.HarmCategory = HarmCategory
|
|
||||||
self.HarmBlockThreshold = HarmBlockThreshold
|
|
||||||
self.ClientError = ClientError
|
|
||||||
self.ResourceExhausted = ResourceExhausted
|
|
||||||
|
|
||||||
if api_key is None:
|
if api_key is None:
|
||||||
raise RuntimeError("Google AI Studio API key not specified")
|
raise RuntimeError("Google AI Studio API key not specified")
|
||||||
|
|
||||||
|
|
@ -66,7 +54,7 @@ class Processor(LlmService):
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
self.client = self.genai.Client(api_key=api_key, vertexai=False)
|
self.client = genai.Client(api_key=api_key, vertexai=False)
|
||||||
self.default_model = model
|
self.default_model = model
|
||||||
self.temperature = temperature
|
self.temperature = temperature
|
||||||
self.max_output = max_output
|
self.max_output = max_output
|
||||||
|
|
@ -74,7 +62,7 @@ class Processor(LlmService):
|
||||||
# Cache for generation configs per model
|
# Cache for generation configs per model
|
||||||
self.generation_configs = {}
|
self.generation_configs = {}
|
||||||
|
|
||||||
block_level = self.HarmBlockThreshold.BLOCK_ONLY_HIGH
|
block_level = HarmBlockThreshold.BLOCK_ONLY_HIGH
|
||||||
|
|
||||||
self.safety_settings = [
|
self.safety_settings = [
|
||||||
types.SafetySetting(
|
types.SafetySetting(
|
||||||
|
|
@ -159,7 +147,7 @@ class Processor(LlmService):
|
||||||
|
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
except self.ResourceExhausted as e:
|
except ResourceExhausted as e:
|
||||||
|
|
||||||
logger.warning("Rate limit exceeded")
|
logger.warning("Rate limit exceeded")
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue