trustgraph/trustgraph/llm/vertexai_text/llm.py


"""
Simple LLM service, performs text prompt completion using VertexAI on
Google Cloud.   Input is prompt, output is response.
"""

import vertexai
import time

from google.oauth2 import service_account
import google

from vertexai.preview.generative_models import (
    Content,
    FunctionDeclaration,
    GenerativeModel,
    GenerationConfig,
    HarmCategory,
    HarmBlockThreshold,
    Part,
    Tool,
)

from ... schema import TextCompletionRequest, TextCompletionResponse
from ... log_level import LogLevel
from ... base import ConsumerProducer

default_input_queue = 'llm-complete-text'
default_output_queue = 'llm-complete-text-response'
default_subscriber = 'llm-vertexai-text'

class Processor(ConsumerProducer):

    def __init__(
            self,
            pulsar_host=None,
            input_queue=default_input_queue,
            output_queue=default_output_queue,
            subscriber=default_subscriber,
            log_level=LogLevel.INFO,
            region="us-west1",
            model="gemini-1.0-pro-001",
            private_key=None,
    ):

        super(Processor, self).__init__(
            pulsar_host=pulsar_host,
            log_level=log_level,
            input_queue=input_queue,
            output_queue=output_queue,
            subscriber=subscriber,
            input_schema=TextCompletionRequest,
            output_schema=TextCompletionResponse,
        )

        self.parameters = {
            "temperature": 0.2,
            "top_p": 1.0,
            "top_k": 32,
            "candidate_count": 1,
            "max_output_tokens": 8192,
        }

        self.generation_config = GenerationConfig(
            temperature=0.2,
            top_p=1.0,
            top_k=10,
            candidate_count=1,
            max_output_tokens=8191,
        )

        # Block none doesn't seem to work
        block_level = HarmBlockThreshold.BLOCK_ONLY_HIGH
        #     block_level = HarmBlockThreshold.BLOCK_NONE

        self.safety_settings = {
            HarmCategory.HARM_CATEGORY_HARASSMENT: block_level,
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: block_level,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: block_level,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: block_level,
        }

        print("Initialise VertexAI...", flush=True)

        if private_key:
            credentials = service_account.Credentials.from_service_account_file(private_key)
        else:
            credentials = None

        if credentials:
            vertexai.init(
                location=region,
                credentials=credentials,
                project=credentials.project_id,
            )
        else:
            vertexai.init(
                location=region
            )

        print(f"Initialise model {model}", flush=True)
        self.llm = GenerativeModel(model)

        print("Initialisation complete", flush=True)

    def handle(self, msg):

        try:

            v = msg.value()

            # Sender-produced ID

            id = msg.properties()["id"]

            print(f"Handling prompt {id}...", flush=True)

            prompt = v.prompt

            resp = self.llm.generate_content(
                prompt, generation_config=self.generation_config,
                safety_settings=self.safety_settings
            )

            resp = resp.text

            resp = resp.replace("```json", "")
            resp = resp.replace("```", "")

            print("Send response...", flush=True)
            r = TextCompletionResponse(response=resp)
            self.producer.send(r, properties={"id": id})

            print("Done.", flush=True)

            # Acknowledge successful processing of the message
            self.consumer.acknowledge(msg)

        except google.api_core.exceptions.ResourceExhausted:

            print("429, resource busy, sleeping", flush=True)
            time.sleep(15)
            self.consumer.negative_acknowledge(msg)

        # Let other exceptions fall through

    @staticmethod
    def add_args(parser):

        ConsumerProducer.add_args(
            parser, default_input_queue, default_subscriber,
            default_output_queue,
        )

        parser.add_argument(
            '-m', '--model',
            default="gemini-1.0-pro-001",
            help=f'LLM model (default: gemini-1.0-pro-001)'
        )
        # Also: text-bison-32k

        parser.add_argument(
            '-k', '--private-key',
            help=f'Google Cloud private JSON file'
        )

        parser.add_argument(
            '-r', '--region',
            default='us-west1',
            help=f'Google Cloud region (default: us-west1)',
        )

def run():

    Processor.start("llm-vertexai-text", __doc__)
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
			`"""`
Updated doc strings 2024-07-12 15:12:40 +01:00			`Simple LLM service, performs text prompt completion using VertexAI on`
			`Google Cloud. Input is prompt, output is response.`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00			`"""`

			`import vertexai`
			`import time`

			`from google.oauth2 import service_account`
			`import google`

			`from vertexai.preview.generative_models import (`
			`Content,`
			`FunctionDeclaration,`
			`GenerativeModel,`
			`GenerationConfig,`
			`HarmCategory,`
			`HarmBlockThreshold,`
			`Part,`
			`Tool,`
			`)`

			`from ... schema import TextCompletionRequest, TextCompletionResponse`
			`from ... log_level import LogLevel`
Other LLMs 2024-07-17 17:18:24 +01:00			`from ... base import ConsumerProducer`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Processor model prototype 2024-07-15 17:17:04 +01:00			`default_input_queue = 'llm-complete-text'`
			`default_output_queue = 'llm-complete-text-response'`
			`default_subscriber = 'llm-vertexai-text'`

Other LLMs 2024-07-17 17:18:24 +01:00			`class Processor(ConsumerProducer):`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
			`def __init__(`
			`self,`
Other LLMs 2024-07-17 17:18:24 +01:00			`pulsar_host=None,`
Processor model prototype 2024-07-15 17:17:04 +01:00			`input_queue=default_input_queue,`
			`output_queue=default_output_queue,`
			`subscriber=default_subscriber,`
			`log_level=LogLevel.INFO,`
			`region="us-west1",`
			`model="gemini-1.0-pro-001",`
Other LLMs 2024-07-17 17:18:24 +01:00			`private_key=None,`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00			`):`

Other LLMs 2024-07-17 17:18:24 +01:00			`super(Processor, self).__init__(`
			`pulsar_host=pulsar_host,`
			`log_level=log_level,`
			`input_queue=input_queue,`
			`output_queue=output_queue,`
			`subscriber=subscriber,`
			`input_schema=TextCompletionRequest,`
			`output_schema=TextCompletionResponse,`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00			`)`

			`self.parameters = {`
			`"temperature": 0.2,`
			`"top_p": 1.0,`
			`"top_k": 32,`
			`"candidate_count": 1,`
			`"max_output_tokens": 8192,`
			`}`

			`self.generation_config = GenerationConfig(`
			`temperature=0.2,`
			`top_p=1.0,`
			`top_k=10,`
			`candidate_count=1,`
			`max_output_tokens=8191,`
			`)`

			`# Block none doesn't seem to work`
			`block_level = HarmBlockThreshold.BLOCK_ONLY_HIGH`
			`# block_level = HarmBlockThreshold.BLOCK_NONE`

			`self.safety_settings = {`
			`HarmCategory.HARM_CATEGORY_HARASSMENT: block_level,`
			`HarmCategory.HARM_CATEGORY_HATE_SPEECH: block_level,`
			`HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: block_level,`
			`HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: block_level,`
			`}`

			`print("Initialise VertexAI...", flush=True)`

Other LLMs 2024-07-17 17:18:24 +01:00			`if private_key:`
			`credentials = service_account.Credentials.from_service_account_file(private_key)`
			`else:`
			`credentials = None`

Trustgraph initial code drop 2024-07-10 23:20:06 +01:00			`if credentials:`
			`vertexai.init(`
			`location=region,`
			`credentials=credentials,`
			`project=credentials.project_id,`
			`)`
			`else:`
			`vertexai.init(`
			`location=region`
			`)`

			`print(f"Initialise model {model}", flush=True)`
			`self.llm = GenerativeModel(model)`

			`print("Initialisation complete", flush=True)`

Other LLMs 2024-07-17 17:18:24 +01:00			`def handle(self, msg):`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`try:`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`v = msg.value()`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`# Sender-produced ID`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`id = msg.properties()["id"]`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`print(f"Handling prompt {id}...", flush=True)`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`prompt = v.prompt`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`resp = self.llm.generate_content(`
			`prompt, generation_config=self.generation_config,`
			`safety_settings=self.safety_settings`
			`)`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`resp = resp.text`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			resp = resp.replace("```json", "")
			resp = resp.replace("```", "")
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`print("Send response...", flush=True)`
			`r = TextCompletionResponse(response=resp)`
			`self.producer.send(r, properties={"id": id})`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`print("Done.", flush=True)`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`# Acknowledge successful processing of the message`
			`self.consumer.acknowledge(msg)`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`except google.api_core.exceptions.ResourceExhausted:`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`print("429, resource busy, sleeping", flush=True)`
			`time.sleep(15)`
			`self.consumer.negative_acknowledge(msg)`
Processor model prototype 2024-07-15 17:17:04 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`# Let other exceptions fall through`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`@staticmethod`
			`def add_args(parser):`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`ConsumerProducer.add_args(`
			`parser, default_input_queue, default_subscriber,`
			`default_output_queue,`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00			`)`

Other LLMs 2024-07-17 17:18:24 +01:00			`parser.add_argument(`
			`'-m', '--model',`
			`default="gemini-1.0-pro-001",`
			`help=f'LLM model (default: gemini-1.0-pro-001)'`
			`)`
			`# Also: text-bison-32k`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`parser.add_argument(`
			`'-k', '--private-key',`
			`help=f'Google Cloud private JSON file'`
			`)`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`parser.add_argument(`
			`'-r', '--region',`
			`default='us-west1',`
			`help=f'Google Cloud region (default: us-west1)',`
			`)`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`def run():`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Other LLMs 2024-07-17 17:18:24 +01:00			`Processor.start("llm-vertexai-text", __doc__)`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00