Feature/environment var creds (#116)

- Change templates to interpolate environment variables in docker compose
- Change templates to invoke secrets for environment variable credentials in K8s configuration
- Update LLMs to pull in credentials from environment variables if not specified
This commit is contained in:
cybermaggedon 2024-10-15 00:34:52 +01:00 committed by GitHub
parent 43756d872b
commit 86288339cf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 327 additions and 271 deletions

View file

@ -7,6 +7,7 @@ serverless endpoint service. Input is prompt, output is response.
import requests
import json
from prometheus_client import Histogram
import os
from .... schema import TextCompletionRequest, TextCompletionResponse, Error
from .... schema import text_completion_request_queue
@ -23,6 +24,8 @@ default_subscriber = module
default_temperature = 0.0
default_max_output = 4192
default_model = "AzureAI"
default_endpoint = os.getenv("AZURE_ENDPOINT")
default_token = os.getenv("AZURE_TOKEN")
class Processor(ConsumerProducer):
@ -31,12 +34,18 @@ class Processor(ConsumerProducer):
input_queue = params.get("input_queue", default_input_queue)
output_queue = params.get("output_queue", default_output_queue)
subscriber = params.get("subscriber", default_subscriber)
endpoint = params.get("endpoint")
token = params.get("token")
endpoint = params.get("endpoint", default_endpoint)
token = params.get("token", default_token)
temperature = params.get("temperature", default_temperature)
max_output = params.get("max_output", default_max_output)
model = default_model
if endpoint is None:
raise RuntimeError("Azure endpoint not specified")
if token is None:
raise RuntimeError("Azure token not specified")
super(Processor, self).__init__(
**params | {
"input_queue": input_queue,

View file

@ -8,6 +8,7 @@ import requests
import json
from prometheus_client import Histogram
from openai import AzureOpenAI
import os
from .... schema import TextCompletionRequest, TextCompletionResponse, Error
from .... schema import text_completion_request_queue
@ -24,6 +25,8 @@ default_subscriber = module
default_temperature = 0.0
default_max_output = 4192
default_api = "2024-02-15-preview"
default_endpoint = os.getenv("AZURE_ENDPOINT")
default_token = os.getenv("AZURE_TOKEN")
class Processor(ConsumerProducer):
@ -32,13 +35,19 @@ class Processor(ConsumerProducer):
input_queue = params.get("input_queue", default_input_queue)
output_queue = params.get("output_queue", default_output_queue)
subscriber = params.get("subscriber", default_subscriber)
endpoint = params.get("endpoint")
token = params.get("token")
endpoint = params.get("endpoint", default_endpoint)
token = params.get("token", default_token)
temperature = params.get("temperature", default_temperature)
max_output = params.get("max_output", default_max_output)
model = params.get("model")
api = params.get("api_version", default_api)
if endpoint is None:
raise RuntimeError("Azure endpoint not specified")
if token is None:
raise RuntimeError("Azure token not specified")
super(Processor, self).__init__(
**params | {
"input_queue": input_queue,

View file

@ -6,6 +6,7 @@ Input is prompt, output is response.
import anthropic
from prometheus_client import Histogram
import os
from .... schema import TextCompletionRequest, TextCompletionResponse, Error
from .... schema import text_completion_request_queue
@ -22,6 +23,7 @@ default_subscriber = module
default_model = 'claude-3-5-sonnet-20240620'
default_temperature = 0.0
default_max_output = 8192
default_api_key = os.getenv("CLAUDE_KEY")
class Processor(ConsumerProducer):
@ -31,10 +33,13 @@ class Processor(ConsumerProducer):
output_queue = params.get("output_queue", default_output_queue)
subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
api_key = params.get("api_key")
api_key = params.get("api_key", default_api_key)
temperature = params.get("temperature", default_temperature)
max_output = params.get("max_output", default_max_output)
if api_key is None:
raise RuntimeError("Claude API key not specified")
super(Processor, self).__init__(
**params | {
"input_queue": input_queue,

View file

@ -6,6 +6,7 @@ Input is prompt, output is response.
import cohere
from prometheus_client import Histogram
import os
from .... schema import TextCompletionRequest, TextCompletionResponse, Error
from .... schema import text_completion_request_queue
@ -21,6 +22,7 @@ default_output_queue = text_completion_response_queue
default_subscriber = module
default_model = 'c4ai-aya-23-8b'
default_temperature = 0.0
default_api_key = os.getenv("COHERE_KEY")
class Processor(ConsumerProducer):
@ -30,9 +32,12 @@ class Processor(ConsumerProducer):
output_queue = params.get("output_queue", default_output_queue)
subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
api_key = params.get("api_key")
api_key = params.get("api_key", default_api_key)
temperature = params.get("temperature", default_temperature)
if api_key is None:
raise RuntimeError("Cohere API key not specified")
super(Processor, self).__init__(
**params | {
"input_queue": input_queue,

View file

@ -7,6 +7,7 @@ Input is prompt, output is response.
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from prometheus_client import Histogram
import os
from .... schema import TextCompletionRequest, TextCompletionResponse, Error
from .... schema import text_completion_request_queue
@ -23,6 +24,7 @@ default_subscriber = module
default_model = 'gemini-1.5-flash-002'
default_temperature = 0.0
default_max_output = 8192
default_api_key = os.getenv("GOOGLE_AI_STUDIO_KEY")
class Processor(ConsumerProducer):
@ -32,10 +34,13 @@ class Processor(ConsumerProducer):
output_queue = params.get("output_queue", default_output_queue)
subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
api_key = params.get("api_key")
api_key = params.get("api_key", default_api_key)
temperature = params.get("temperature", default_temperature)
max_output = params.get("max_output", default_max_output)
if api_key is None:
raise RuntimeError("Google AI Studio API key not specified")
super(Processor, self).__init__(
**params | {
"input_queue": input_queue,
@ -211,4 +216,4 @@ def run():
Processor.start(module, __doc__)

View file

@ -20,7 +20,7 @@ default_input_queue = text_completion_request_queue
default_output_queue = text_completion_response_queue
default_subscriber = module
default_model = 'LLaMA_CPP'
default_llamafile = 'http://localhost:8080/v1'
default_llamafile = os.getenv("LLAMAFILE_URL", "http://localhost:8080/v1")
default_temperature = 0.0
default_max_output = 4096

View file

@ -6,6 +6,7 @@ Input is prompt, output is response.
from ollama import Client
from prometheus_client import Histogram, Info
import os
from .... schema import TextCompletionRequest, TextCompletionResponse, Error
from .... schema import text_completion_request_queue
@ -19,8 +20,8 @@ module = ".".join(__name__.split(".")[1:-1])
default_input_queue = text_completion_request_queue
default_output_queue = text_completion_response_queue
default_subscriber = module
default_model = 'gemma2'
default_ollama = 'http://localhost:11434'
default_model = 'gemma2:9b'
default_ollama = os.getenv("OLLAMA_HOST", 'http://localhost:11434')
class Processor(ConsumerProducer):
@ -152,7 +153,7 @@ class Processor(ConsumerProducer):
parser.add_argument(
'-m', '--model',
default="gemma2",
help=f'LLM model (default: gemma2)'
help=f'LLM model (default: {default_model})'
)
parser.add_argument(

View file

@ -6,6 +6,7 @@ Input is prompt, output is response.
from openai import OpenAI
from prometheus_client import Histogram
import os
from .... schema import TextCompletionRequest, TextCompletionResponse, Error
from .... schema import text_completion_request_queue
@ -22,6 +23,7 @@ default_subscriber = module
default_model = 'gpt-3.5-turbo'
default_temperature = 0.0
default_max_output = 4096
default_api_key = os.getenv("OPENAI_KEY")
class Processor(ConsumerProducer):
@ -31,10 +33,13 @@ class Processor(ConsumerProducer):
output_queue = params.get("output_queue", default_output_queue)
subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
api_key = params.get("api_key")
api_key = params.get("api_key", default_api_key)
temperature = params.get("temperature", default_temperature)
max_output = params.get("max_output", default_max_output)
if api_key is None:
raise RuntimeError("OpenAI API key not specified")
super(Processor, self).__init__(
**params | {
"input_queue": input_queue,