diff --git a/templates/components/mistral-ocr.jsonnet b/templates/components/mistral-ocr.jsonnet index 8049c514..a70addd5 100644 --- a/templates/components/mistral-ocr.jsonnet +++ b/templates/components/mistral-ocr.jsonnet @@ -43,5 +43,5 @@ local url = import "values/url.jsonnet"; }, -} + prompts +} diff --git a/templates/components/ocr.jsonnet b/templates/components/ocr.jsonnet index 4353b7f9..cdd49583 100644 --- a/templates/components/ocr.jsonnet +++ b/templates/components/ocr.jsonnet @@ -27,12 +27,11 @@ local url = import "values/url.jsonnet"; .with_port(8080, 8080, "metrics"); engine.resources([ - envSecrets, containerSet, service, ]) }, -} + prompts +} diff --git a/trustgraph-flow/trustgraph/model/text_completion/mistral/llm.py b/trustgraph-flow/trustgraph/model/text_completion/mistral/llm.py index 8130cf8a..45f1311c 100755 --- a/trustgraph-flow/trustgraph/model/text_completion/mistral/llm.py +++ b/trustgraph-flow/trustgraph/model/text_completion/mistral/llm.py @@ -4,7 +4,7 @@ Simple LLM service, performs text prompt completion using Mistral. Input is prompt, output is response. """ -from mistralai import Mistral, RateLimitError +from mistralai import Mistral from prometheus_client import Histogram import os @@ -130,12 +130,18 @@ class Processor(ConsumerProducer): print("Done.", flush=True) - # FIXME: Wrong exception, don't know what this LLM throws - # for a rate limit - except Mistral.RateLimitError: + # FIXME: Wrong exception. The MistralAI library has retry logic + # so retry-able errors are retried transparently. It means we + # don't get rate limit events. - # Leave rate limit retries to the base handler - raise TooManyRequests() + # We could choose to turn off retry and handle all that here + # or subclass BackoffStrategy to keep the retry logic, but + # get the events out. + +# except Mistral.RateLimitError: + +# # Leave rate limit retries to the base handler +# raise TooManyRequests() except Exception as e: