Other LLMs

This commit is contained in:
Cyber MacGeddon 2024-07-17 17:18:24 +01:00
parent ab616b2779
commit 96a12efd70
3 changed files with 159 additions and 414 deletions

View file

@ -4,30 +4,21 @@ Simple LLM service, performs text prompt completion using the Azure
serverless endpoint service. Input is prompt, output is response. serverless endpoint service. Input is prompt, output is response.
""" """
import pulsar
from pulsar.schema import JsonSchema
import tempfile
import base64
import os
import argparse
from langchain_community.llms import Ollama
import requests import requests
import time
import json
from ... schema import TextCompletionRequest, TextCompletionResponse from ... schema import TextCompletionRequest, TextCompletionResponse
from ... log_level import LogLevel from ... log_level import LogLevel
from ... base import ConsumerProducer
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
default_input_queue = 'llm-complete-text' default_input_queue = 'llm-complete-text'
default_output_queue = 'llm-complete-text-response' default_output_queue = 'llm-complete-text-response'
default_subscriber = 'llm-azure-text' default_subscriber = 'llm-azure-text'
class Processor: class Processor(ConsumerProducer):
def __init__( def __init__(
self, self,
pulsar_host=default_pulsar_host, pulsar_host=None,
input_queue=default_input_queue, input_queue=default_input_queue,
output_queue=default_output_queue, output_queue=default_output_queue,
subscriber=default_subscriber, subscriber=default_subscriber,
@ -36,21 +27,14 @@ class Processor:
token=None, token=None,
): ):
self.client = None super(Processor, self).__init__(
pulsar_host=pulsar_host,
self.client = pulsar.Client( log_level=log_level,
pulsar_host, input_queue=input_queue,
logger=pulsar.ConsoleLogger(log_level.to_pulsar()) output_queue=output_queue,
) subscriber=subscriber,
input_schema=TextCompletionRequest,
self.consumer = self.client.subscribe( output_schema=TextCompletionResponse,
input_queue, subscriber,
schema=JsonSchema(TextCompletionRequest),
)
self.producer = self.client.create_producer(
topic=output_queue,
schema=JsonSchema(TextCompletionResponse),
) )
self.endpoint = endpoint self.endpoint = endpoint
@ -96,120 +80,47 @@ class Processor:
return message_content return message_content
def run(self): def handle(self, msg):
while True: v = msg.value()
msg = self.consumer.receive() # Sender-produced ID
try: id = msg.properties()["id"]
v = msg.value() print(f"Handling prompt {id}...", flush=True)
# Sender-produced ID prompt = self.build_prompt(
"You are a helpful chatbot",
v.prompt
)
id = msg.properties()["id"] response = self.call_llm(prompt)
print(f"Handling prompt {id}...", flush=True) print("Send response...", flush=True)
r = TextCompletionResponse(response=response)
self.producer.send(r, properties={"id": id})
prompt = self.build_prompt( print("Done.", flush=True)
"You are a helpful chatbot",
v.prompt
)
response = self.call_llm(prompt) @staticmethod
def add_args(parser):
print("Send response...", flush=True) ConsumerProducer.add_args(
r = TextCompletionResponse(response=response) parser, default_input_queue, default_subscriber,
self.producer.send(r, properties={"id": id}) default_output_queue,
)
print("Done.", flush=True) parser.add_argument(
'-e', '--endpoint',
help=f'LLM model endpoint'
)
# Acknowledge successful processing of the message parser.add_argument(
self.consumer.acknowledge(msg) '-k', '--token',
help=f'LLM model token'
except Exception as e: )
print("Exception:", e, flush=True)
# Message failed to be processed
self.consumer.negative_acknowledge(msg)
def __del__(self):
self.client.close()
def run(): def run():
parser = argparse.ArgumentParser( Processor.start("llm-azure-text", __doc__)
prog='llm-ollama-text',
description=__doc__,
)
parser.add_argument(
'-p', '--pulsar-host',
default=default_pulsar_host,
help=f'Pulsar host (default: {default_pulsar_host})',
)
parser.add_argument(
'-i', '--input-queue',
default=default_input_queue,
help=f'Input queue (default: {default_input_queue})'
)
parser.add_argument(
'-s', '--subscriber',
default=default_subscriber,
help=f'Queue subscriber name (default: {default_subscriber})'
)
parser.add_argument(
'-o', '--output-queue',
default=default_output_queue,
help=f'Output queue (default: {default_output_queue})'
)
parser.add_argument(
'-l', '--log-level',
type=LogLevel,
default=LogLevel.INFO,
choices=list(LogLevel),
help=f'Output queue (default: info)'
)
parser.add_argument(
'-e', '--endpoint',
help=f'LLM model endpoint'
)
parser.add_argument(
'-k', '--token',
help=f'LLM model token'
)
args = parser.parse_args()
while True:
try:
p = Processor(
pulsar_host=args.pulsar_host,
input_queue=args.input_queue,
output_queue=args.output_queue,
subscriber=args.subscriber,
log_level=args.log_level,
endpoint=args.endpoint,
token=args.token,
)
p.run()
except Exception as e:
print("Exception:", e, flush=True)
print("Will retry...", flush=True)
time.sleep(10)

View file

@ -4,19 +4,12 @@ Simple LLM service, performs text prompt completion using Claude.
Input is prompt, output is response. Input is prompt, output is response.
""" """
import pulsar
from pulsar.schema import JsonSchema
import tempfile
import base64
import os
import argparse
import anthropic import anthropic
import time
from ... schema import TextCompletionRequest, TextCompletionResponse from ... schema import TextCompletionRequest, TextCompletionResponse
from ... log_level import LogLevel from ... log_level import LogLevel
from ... base import ConsumerProducer
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
default_input_queue = 'llm-complete-text' default_input_queue = 'llm-complete-text'
default_output_queue = 'llm-complete-text-response' default_output_queue = 'llm-complete-text-response'
default_subscriber = 'llm-claude-text' default_subscriber = 'llm-claude-text'
@ -26,30 +19,23 @@ class Processor:
def __init__( def __init__(
self, self,
pulsar_host=default_pulsar_host, pulsar_host=None,
input_queue=default_input_queue, input_queue=default_input_queue,
output_queue=default_output_queue, output_queue=default_output_queue,
subscriber=default_subscriber, subscriber=default_subscriber,
log_level=LogLevel.INFO, log_level=LogLevel.INFO,
model=default_model, model=default_model,
api_key, api_key="",
): ):
self.client = None super(Processor, self).__init__(
pulsar_host=pulsar_host,
self.client = pulsar.Client( log_level=log_level,
pulsar_host, input_queue=input_queue,
logger=pulsar.ConsoleLogger(log_level.to_pulsar()) output_queue=output_queue,
) subscriber=subscriber,
input_schema=TextCompletionRequest,
self.consumer = self.client.subscribe( output_schema=TextCompletionResponse,
input_queue, subscriber,
schema=JsonSchema(TextCompletionRequest),
)
self.producer = self.client.create_producer(
topic=output_queue,
schema=JsonSchema(TextCompletionResponse),
) )
self.model = model self.model = model
@ -58,135 +44,65 @@ class Processor:
print("Initialised", flush=True) print("Initialised", flush=True)
def run(self): def handle(self, msg):
while True: v = msg.value()
msg = self.consumer.receive() # Sender-produced ID
try: id = msg.properties()["id"]
v = msg.value() print(f"Handling prompt {id}...", flush=True)
# Sender-produced ID prompt = v.prompt
response = message = self.claude.messages.create(
id = msg.properties()["id"] model=self.model,
max_tokens=1000,
print(f"Handling prompt {id}...", flush=True) temperature=0.1,
system = "You are a helpful chatbot.",
prompt = v.prompt messages=[
response = message = self.claude.messages.create( {
model=self.model, "role": "user",
max_tokens=1000, "content": [
temperature=0.1,
system = "You are a helpful chatbot.",
messages=[
{ {
"role": "user", "type": "text",
"content": [ "text": prompt
{
"type": "text",
"text": prompt
}
]
} }
] ]
) }
]
)
resp = response.content[0].text resp = response.content[0].text
print(resp, flush=True) print(resp, flush=True)
print("Send response...", flush=True) print("Send response...", flush=True)
r = TextCompletionResponse(response=resp) r = TextCompletionResponse(response=resp)
self.producer.send(r, properties={"id": id}) self.send(r, properties={"id": id})
print("Done.", flush=True) print("Done.", flush=True)
# Acknowledge successful processing of the message @staticmethod
self.consumer.acknowledge(msg) def add_args(parser):
except Exception as e: ConsumerProducer.add_args(
parser, default_input_queue, default_subscriber,
default_output_queue,
)
print("Exception:", e, flush=True) parser.add_argument(
'-m', '--model',
default="claude-3-5-sonnet-20240620",
help=f'LLM model (default: claude-3-5-sonnet-20240620)'
)
# Message failed to be processed parser.add_argument(
self.consumer.negative_acknowledge(msg) '-k', '--api-key',
help=f'Claude API key'
def __del__(self): )
self.client.close()
def run(): def run():
parser = argparse.ArgumentParser( Processor.start("llm-claude-text", __doc__)
prog='llm-ollama-text',
description=__doc__,
)
parser.add_argument(
'-p', '--pulsar-host',
default=default_pulsar_host,
help=f'Pulsar host (default: {default_pulsar_host})',
)
parser.add_argument(
'-i', '--input-queue',
default=default_input_queue,
help=f'Input queue (default: {default_input_queue})'
)
parser.add_argument(
'-s', '--subscriber',
default=default_subscriber,
help=f'Queue subscriber name (default: {default_subscriber})'
)
parser.add_argument(
'-o', '--output-queue',
default=default_output_queue,
help=f'Output queue (default: {default_output_queue})'
)
parser.add_argument(
'-l', '--log-level',
type=LogLevel,
default=LogLevel.INFO,
choices=list(LogLevel),
help=f'Output queue (default: info)'
)
parser.add_argument(
'-m', '--model',
default="claude-3-5-sonnet-20240620",
help=f'LLM model (default: claude-3-5-sonnet-20240620)'
)
parser.add_argument(
'-k', '--api-key',
help=f'Claude API key'
)
args = parser.parse_args()
while True:
try:
p = Processor(
pulsar_host=args.pulsar_host,
input_queue=args.input_queue,
output_queue=args.output_queue,
subscriber=args.subscriber,
log_level=args.log_level,
model=args.model,
api_key=args.api_key,
)
p.run()
except Exception as e:
print("Exception:", e, flush=True)
print("Will retry...", flush=True)
time.sleep(10)

View file

@ -4,12 +4,6 @@ Simple LLM service, performs text prompt completion using VertexAI on
Google Cloud. Input is prompt, output is response. Google Cloud. Input is prompt, output is response.
""" """
import pulsar
from pulsar.schema import JsonSchema
import tempfile
import base64
import os
import argparse
import vertexai import vertexai
import time import time
@ -29,41 +23,34 @@ from vertexai.preview.generative_models import (
from ... schema import TextCompletionRequest, TextCompletionResponse from ... schema import TextCompletionRequest, TextCompletionResponse
from ... log_level import LogLevel from ... log_level import LogLevel
from ... base import ConsumerProducer
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
default_input_queue = 'llm-complete-text' default_input_queue = 'llm-complete-text'
default_output_queue = 'llm-complete-text-response' default_output_queue = 'llm-complete-text-response'
default_subscriber = 'llm-vertexai-text' default_subscriber = 'llm-vertexai-text'
class Processor: class Processor(ConsumerProducer):
def __init__( def __init__(
self, self,
pulsar_host=default_pulsar_host, pulsar_host=None,
input_queue=default_input_queue, input_queue=default_input_queue,
output_queue=default_output_queue, output_queue=default_output_queue,
subscriber=default_subscriber, subscriber=default_subscriber,
log_level=LogLevel.INFO, log_level=LogLevel.INFO,
region="us-west1", region="us-west1",
model="gemini-1.0-pro-001", model="gemini-1.0-pro-001",
credentials, private_key=None,
): ):
self.client = None super(Processor, self).__init__(
pulsar_host=pulsar_host,
self.client = pulsar.Client( log_level=log_level,
pulsar_host, input_queue=input_queue,
logger=pulsar.ConsoleLogger(log_level.to_pulsar()) output_queue=output_queue,
) subscriber=subscriber,
input_schema=TextCompletionRequest,
self.consumer = self.client.subscribe( output_schema=TextCompletionResponse,
input_queue, subscriber,
schema=JsonSchema(TextCompletionRequest),
)
self.producer = self.client.create_producer(
topic=output_queue,
schema=JsonSchema(TextCompletionResponse),
) )
self.parameters = { self.parameters = {
@ -95,6 +82,11 @@ class Processor:
print("Initialise VertexAI...", flush=True) print("Initialise VertexAI...", flush=True)
if private_key:
credentials = service_account.Credentials.from_service_account_file(private_key)
else:
credentials = None
if credentials: if credentials:
vertexai.init( vertexai.init(
location=region, location=region,
@ -111,148 +103,74 @@ class Processor:
print("Initialisation complete", flush=True) print("Initialisation complete", flush=True)
def run(self): def handle(self, msg):
while True:
msg = self.consumer.receive()
try:
v = msg.value()
# Sender-produced ID
id = msg.properties()["id"]
print(f"Handling prompt {id}...", flush=True)
prompt = v.prompt
resp = self.llm.generate_content(
prompt, generation_config=self.generation_config,
safety_settings=self.safety_settings
)
resp = resp.text
resp = resp.replace("```json", "")
resp = resp.replace("```", "")
print("Send response...", flush=True)
r = TextCompletionResponse(response=resp)
self.producer.send(r, properties={"id": id})
print("Done.", flush=True)
# Acknowledge successful processing of the message
self.consumer.acknowledge(msg)
except google.api_core.exceptions.ResourceExhausted:
print("429, resource busy, sleeping", flush=True)
time.sleep(15)
self.consumer.negative_acknowledge(msg)
except Exception as e:
print("Exception:", e, flush=True)
# Message failed to be processed
self.consumer.negative_acknowledge(msg)
def __del__(self):
if self.client:
self.client.close()
def run():
parser = argparse.ArgumentParser(
prog='llm-ollama-text',
description=__doc__,
)
parser.add_argument(
'-p', '--pulsar-host',
default=default_pulsar_host,
help=f'Pulsar host (default: {default_pulsar_host})',
)
parser.add_argument(
'-i', '--input-queue',
default=default_input_queue,
help=f'Input queue (default: {default_input_queue})'
)
parser.add_argument(
'-s', '--subscriber',
default=default_subscriber,
help=f'Queue subscriber name (default: {default_subscriber})'
)
parser.add_argument(
'-o', '--output-queue',
default=default_output_queue,
help=f'Output queue (default: {default_output_queue})'
)
parser.add_argument(
'-l', '--log-level',
type=LogLevel,
default=LogLevel.INFO,
choices=list(LogLevel),
help=f'Output queue (default: info)'
)
parser.add_argument(
'-m', '--model',
default="gemini-1.0-pro-001",
help=f'LLM model (default: gemini-1.0-pro-001)'
)
# Also: text-bison-32k
parser.add_argument(
'-k', '--private-key',
help=f'Google Cloud private JSON file'
)
parser.add_argument(
'-r', '--region',
default='us-west1',
help=f'Google Cloud region (default: us-west1)',
)
args = parser.parse_args()
if args.private_key:
credentials = service_account.Credentials.from_service_account_file(
args.private_key
)
else:
credentials = None
while True:
try: try:
p = Processor( v = msg.value()
pulsar_host=args.pulsar_host,
input_queue=args.input_queue, # Sender-produced ID
output_queue=args.output_queue,
subscriber=args.subscriber, id = msg.properties()["id"]
log_level=args.log_level,
credentials=credentials, print(f"Handling prompt {id}...", flush=True)
region=args.region,
model=args.model, prompt = v.prompt
resp = self.llm.generate_content(
prompt, generation_config=self.generation_config,
safety_settings=self.safety_settings
) )
p.run() resp = resp.text
except Exception as e: resp = resp.replace("```json", "")
resp = resp.replace("```", "")
print("Exception:", e, flush=True) print("Send response...", flush=True)
print("Will retry...", flush=True) r = TextCompletionResponse(response=resp)
self.producer.send(r, properties={"id": id})
time.sleep(10) print("Done.", flush=True)
# Acknowledge successful processing of the message
self.consumer.acknowledge(msg)
except google.api_core.exceptions.ResourceExhausted:
print("429, resource busy, sleeping", flush=True)
time.sleep(15)
self.consumer.negative_acknowledge(msg)
# Let other exceptions fall through
@staticmethod
def add_args(parser):
ConsumerProducer.add_args(
parser, default_input_queue, default_subscriber,
default_output_queue,
)
parser.add_argument(
'-m', '--model',
default="gemini-1.0-pro-001",
help=f'LLM model (default: gemini-1.0-pro-001)'
)
# Also: text-bison-32k
parser.add_argument(
'-k', '--private-key',
help=f'Google Cloud private JSON file'
)
parser.add_argument(
'-r', '--region',
default='us-west1',
help=f'Google Cloud region (default: us-west1)',
)
def run():
Processor.start("llm-vertexai-text", __doc__)