mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-06-10 23:35:14 +02:00
Merge branch 'release/v0.21'
This commit is contained in:
commit
f8586dd9c3
7 changed files with 149 additions and 11 deletions
|
|
@ -11,6 +11,7 @@
|
|||
"claude": import "components/claude.jsonnet",
|
||||
"cohere": import "components/cohere.jsonnet",
|
||||
"googleaistudio": import "components/googleaistudio.jsonnet",
|
||||
"lmstudio": import "components/lmstudio.jsonnet",
|
||||
"mistral": import "components/mistral.jsonnet",
|
||||
"ollama": import "components/ollama.jsonnet",
|
||||
"openai": import "components/openai.jsonnet",
|
||||
|
|
@ -23,6 +24,7 @@
|
|||
"claude-rag": import "components/claude-rag.jsonnet",
|
||||
"cohere-rag": import "components/cohere-rag.jsonnet",
|
||||
"googleaistudio-rag": import "components/googleaistudio-rag.jsonnet",
|
||||
"lmstudio-rag": import "components/lmstudio-rag.jsonnet",
|
||||
"mistral-rag": import "components/mistral-rag.jsonnet",
|
||||
"ollama-rag": import "components/ollama-rag.jsonnet",
|
||||
"openai-rag": import "components/openai-rag.jsonnet",
|
||||
|
|
|
|||
63
templates/components/lmstudio-rag.jsonnet
Normal file
63
templates/components/lmstudio-rag.jsonnet
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
local base = import "base/base.jsonnet";
|
||||
local images = import "values/images.jsonnet";
|
||||
local url = import "values/url.jsonnet";
|
||||
local prompts = import "prompts/mixtral.jsonnet";
|
||||
|
||||
{
|
||||
|
||||
with:: function(key, value)
|
||||
self + {
|
||||
["lmstudio-rag-" + key]:: value,
|
||||
},
|
||||
|
||||
"lmstudio-rag-max-output-tokens":: 4096,
|
||||
"lmstudio-rag-temperature":: 0.0,
|
||||
"lmstudio-rag-model":: "GPT-3.5-Turbo",
|
||||
|
||||
"text-completion-rag" +: {
|
||||
|
||||
create:: function(engine)
|
||||
|
||||
local envSecrets = engine.envSecrets("lmstudio-credentials")
|
||||
.with_env_var("LMSTUDIO_URL", "lmstudio-url");
|
||||
|
||||
local containerRag =
|
||||
engine.container("text-completion-rag")
|
||||
.with_image(images.trustgraph_flow)
|
||||
.with_command([
|
||||
"text-completion-lmstudio",
|
||||
"-p",
|
||||
url.pulsar,
|
||||
"-x",
|
||||
std.toString($["lmstudio-rag-max-output-tokens"]),
|
||||
"-t",
|
||||
"%0.3f" % $["lmstudio-rag-temperature"],
|
||||
"-m",
|
||||
$["lmstudio-rag-model"],
|
||||
"-i",
|
||||
"non-persistent://tg/request/text-completion-rag",
|
||||
"-o",
|
||||
"non-persistent://tg/response/text-completion-rag",
|
||||
])
|
||||
.with_env_var_secrets(envSecrets)
|
||||
.with_limits("0.5", "128M")
|
||||
.with_reservations("0.1", "128M");
|
||||
|
||||
local containerSetRag = engine.containers(
|
||||
"text-completion-rag", [ containerRag ]
|
||||
);
|
||||
|
||||
local serviceRag =
|
||||
engine.internalService(containerSetRag)
|
||||
.with_port(8080, 8080, "metrics");
|
||||
|
||||
engine.resources([
|
||||
envSecrets,
|
||||
containerSetRag,
|
||||
serviceRag,
|
||||
])
|
||||
|
||||
},
|
||||
|
||||
} + prompts
|
||||
|
||||
59
templates/components/lmstudio.jsonnet
Normal file
59
templates/components/lmstudio.jsonnet
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
local base = import "base/base.jsonnet";
|
||||
local images = import "values/images.jsonnet";
|
||||
local url = import "values/url.jsonnet";
|
||||
local prompts = import "prompts/mixtral.jsonnet";
|
||||
|
||||
{
|
||||
|
||||
with:: function(key, value)
|
||||
self + {
|
||||
["lmstudio-" + key]:: value,
|
||||
},
|
||||
|
||||
"lmstudio-max-output-tokens":: 4096,
|
||||
"lmstudio-temperature":: 0.0,
|
||||
"lmstudio-model":: "GPT-3.5-Turbo",
|
||||
|
||||
"text-completion" +: {
|
||||
|
||||
create:: function(engine)
|
||||
|
||||
local envSecrets = engine.envSecrets("lmstudio-credentials")
|
||||
.with_env_var("LMSTUDIO_URL", "lmstudio-url");
|
||||
|
||||
local container =
|
||||
engine.container("text-completion")
|
||||
.with_image(images.trustgraph_flow)
|
||||
.with_command([
|
||||
"text-completion-lmstudio",
|
||||
"-p",
|
||||
url.pulsar,
|
||||
"-x",
|
||||
std.toString($["lmstudio-max-output-tokens"]),
|
||||
"-t",
|
||||
"%0.3f" % $["lmstudio-temperature"],
|
||||
"-m",
|
||||
$["lmstudio-model"],
|
||||
])
|
||||
.with_env_var_secrets(envSecrets)
|
||||
.with_limits("0.5", "128M")
|
||||
.with_reservations("0.1", "128M");
|
||||
|
||||
local containerSet = engine.containers(
|
||||
"text-completion", [ container ]
|
||||
);
|
||||
|
||||
local service =
|
||||
engine.internalService(containerSet)
|
||||
.with_port(8080, 8080, "metrics");
|
||||
|
||||
engine.resources([
|
||||
envSecrets,
|
||||
containerSet,
|
||||
service,
|
||||
])
|
||||
|
||||
},
|
||||
|
||||
} + prompts
|
||||
|
||||
|
|
@ -43,5 +43,5 @@ local url = import "values/url.jsonnet";
|
|||
|
||||
},
|
||||
|
||||
} + prompts
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -27,12 +27,11 @@ local url = import "values/url.jsonnet";
|
|||
.with_port(8080, 8080, "metrics");
|
||||
|
||||
engine.resources([
|
||||
envSecrets,
|
||||
containerSet,
|
||||
service,
|
||||
])
|
||||
|
||||
},
|
||||
|
||||
} + prompts
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ Simple LLM service, performs text prompt completion using Mistral.
|
|||
Input is prompt, output is response.
|
||||
"""
|
||||
|
||||
from mistralai import Mistral, RateLimitError
|
||||
from mistralai import Mistral
|
||||
from prometheus_client import Histogram
|
||||
import os
|
||||
|
||||
|
|
@ -130,12 +130,18 @@ class Processor(ConsumerProducer):
|
|||
|
||||
print("Done.", flush=True)
|
||||
|
||||
# FIXME: Wrong exception, don't know what this LLM throws
|
||||
# for a rate limit
|
||||
except Mistral.RateLimitError:
|
||||
# FIXME: Wrong exception. The MistralAI library has retry logic
|
||||
# so retry-able errors are retried transparently. It means we
|
||||
# don't get rate limit events.
|
||||
|
||||
# Leave rate limit retries to the base handler
|
||||
raise TooManyRequests()
|
||||
# We could choose to turn off retry and handle all that here
|
||||
# or subclass BackoffStrategy to keep the retry logic, but
|
||||
# get the events out.
|
||||
|
||||
# except Mistral.RateLimitError:
|
||||
|
||||
# # Leave rate limit retries to the base handler
|
||||
# raise TooManyRequests()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ default_model = 'gpt-3.5-turbo'
|
|||
default_temperature = 0.0
|
||||
default_max_output = 4096
|
||||
default_api_key = os.getenv("OPENAI_TOKEN")
|
||||
default_base_url = os.getenv("OPENAI_BASE_URL", None)
|
||||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
|
|
@ -34,6 +35,7 @@ class Processor(ConsumerProducer):
|
|||
subscriber = params.get("subscriber", default_subscriber)
|
||||
model = params.get("model", default_model)
|
||||
api_key = params.get("api_key", default_api_key)
|
||||
base_url = params.get("base_url", default_base_url)
|
||||
temperature = params.get("temperature", default_temperature)
|
||||
max_output = params.get("max_output", default_max_output)
|
||||
|
||||
|
|
@ -50,6 +52,7 @@ class Processor(ConsumerProducer):
|
|||
"model": model,
|
||||
"temperature": temperature,
|
||||
"max_output": max_output,
|
||||
"base_url": base_url,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -69,7 +72,7 @@ class Processor(ConsumerProducer):
|
|||
self.model = model
|
||||
self.temperature = temperature
|
||||
self.max_output = max_output
|
||||
self.openai = OpenAI(api_key=api_key)
|
||||
self.openai = OpenAI(base_url=base_url, api_key=api_key)
|
||||
|
||||
print("Initialised", flush=True)
|
||||
|
||||
|
|
@ -132,7 +135,7 @@ class Processor(ConsumerProducer):
|
|||
|
||||
# FIXME: Wrong exception, don't know what this LLM throws
|
||||
# for a rate limit
|
||||
except openai.RateLimitError:
|
||||
except RateLimitError:
|
||||
|
||||
# Leave rate limit retries to the base handler
|
||||
raise TooManyRequests()
|
||||
|
|
@ -180,6 +183,12 @@ class Processor(ConsumerProducer):
|
|||
help=f'OpenAI API key'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-u', '--url',
|
||||
default=default_base_url,
|
||||
help=f'OpenAI service base URL'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-t', '--temperature',
|
||||
type=float,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue