diff --git a/trustgraph-flow/trustgraph/model/text_completion/googleaistudio/llm.py b/trustgraph-flow/trustgraph/model/text_completion/googleaistudio/llm.py index 1e9160ed..ca55a7e3 100644 --- a/trustgraph-flow/trustgraph/model/text_completion/googleaistudio/llm.py +++ b/trustgraph-flow/trustgraph/model/text_completion/googleaistudio/llm.py @@ -15,6 +15,7 @@ Input is prompt, output is response. from google import genai from google.genai import types from google.genai.types import HarmCategory, HarmBlockThreshold +from google.genai.errors import ClientError from google.api_core.exceptions import ResourceExhausted import os import logging @@ -152,6 +153,15 @@ class Processor(LlmService): # Leave rate limit retries to the default handler raise TooManyRequests() + except ClientError as e: + # google-genai SDK throws ClientError for 4xx errors + if e.code == 429: + logger.warning(f"Rate limit exceeded (ClientError 429): {e}") + raise TooManyRequests() + # Other client errors are unrecoverable + logger.error(f"GoogleAIStudio ClientError: {e}", exc_info=True) + raise e + except Exception as e: # Apart from rate limits, treat all exceptions as unrecoverable @@ -216,6 +226,15 @@ class Processor(LlmService): logger.warning("Rate limit exceeded during streaming") raise TooManyRequests() + except ClientError as e: + # google-genai SDK throws ClientError for 4xx errors + if e.code == 429: + logger.warning(f"Rate limit exceeded during streaming (ClientError 429): {e}") + raise TooManyRequests() + # Other client errors are unrecoverable + logger.error(f"GoogleAIStudio streaming ClientError: {e}", exc_info=True) + raise e + except Exception as e: logger.error(f"GoogleAIStudio streaming exception ({type(e).__name__}): {e}", exc_info=True) raise e diff --git a/trustgraph-vertexai/trustgraph/model/text_completion/vertexai/llm.py b/trustgraph-vertexai/trustgraph/model/text_completion/vertexai/llm.py index 59aa5bfe..d7a7dd2a 100755 --- a/trustgraph-vertexai/trustgraph/model/text_completion/vertexai/llm.py +++ b/trustgraph-vertexai/trustgraph/model/text_completion/vertexai/llm.py @@ -16,6 +16,7 @@ import logging from google import genai from google.genai import types from google.genai.types import HarmCategory, HarmBlockThreshold +from google.genai.errors import ClientError from google.api_core.exceptions import ResourceExhausted # Added for Anthropic model support @@ -229,6 +230,15 @@ class Processor(LlmService): # Leave rate limit retries to the base handler raise TooManyRequests() + except ClientError as e: + # google-genai SDK throws ClientError for 4xx errors + if e.code == 429: + logger.warning(f"Hit rate limit (ClientError 429): {e}") + raise TooManyRequests() + # Other client errors are unrecoverable + logger.error(f"VertexAI ClientError: {e}", exc_info=True) + raise e + except Exception as e: # Apart from rate limits, treat all exceptions as unrecoverable logger.error(f"VertexAI LLM exception: {e}", exc_info=True) @@ -346,6 +356,15 @@ class Processor(LlmService): logger.warning(f"Hit rate limit during streaming: {e}") raise TooManyRequests() + except ClientError as e: + # google-genai SDK throws ClientError for 4xx errors + if e.code == 429: + logger.warning(f"Hit rate limit during streaming (ClientError 429): {e}") + raise TooManyRequests() + # Other client errors are unrecoverable + logger.error(f"VertexAI streaming ClientError: {e}", exc_info=True) + raise e + except Exception as e: logger.error(f"VertexAI streaming exception: {e}", exc_info=True) raise e