mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-27 01:16:22 +02:00
* Tidy up duplicate tech specs in doc directory * Streaming LLM text-completion service tech spec. * text-completion and prompt interfaces * streaming change applied to all LLMs, so far tested with VertexAI * Skip Pinecone unit tests, upstream module issue is affecting things, tests are passing again * Added agent streaming, not working and has broken tests
263 lines
7.6 KiB
Python
Executable file
263 lines
7.6 KiB
Python
Executable file
|
|
"""
|
|
Language service abstracts prompt engineering from LLM.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import re
|
|
import logging
|
|
|
|
from ...schema import Definition, Relationship, Triple
|
|
from ...schema import Topic
|
|
from ...schema import PromptRequest, PromptResponse, Error
|
|
from ...schema import TextCompletionRequest, TextCompletionResponse
|
|
|
|
from ...base import FlowProcessor
|
|
from ...base import ProducerSpec, ConsumerSpec, TextCompletionClientSpec
|
|
|
|
from ...template import PromptManager
|
|
|
|
# Module logger
|
|
logger = logging.getLogger(__name__)
|
|
|
|
default_ident = "prompt"
|
|
default_concurrency = 1
|
|
|
|
class Processor(FlowProcessor):
|
|
|
|
def __init__(self, **params):
|
|
|
|
id = params.get("id")
|
|
concurrency = params.get("concurrency", 1)
|
|
|
|
# Config key for prompts
|
|
self.config_key = params.get("config_type", "prompt")
|
|
|
|
super(Processor, self).__init__(
|
|
**params | {
|
|
"id": id,
|
|
"config_type": self.config_key,
|
|
"concurrency": concurrency,
|
|
}
|
|
)
|
|
|
|
self.register_specification(
|
|
ConsumerSpec(
|
|
name = "request",
|
|
schema = PromptRequest,
|
|
handler = self.on_request,
|
|
concurrency = concurrency,
|
|
)
|
|
)
|
|
|
|
self.register_specification(
|
|
TextCompletionClientSpec(
|
|
request_name = "text-completion-request",
|
|
response_name = "text-completion-response",
|
|
)
|
|
)
|
|
|
|
self.register_specification(
|
|
ProducerSpec(
|
|
name = "response",
|
|
schema = PromptResponse
|
|
)
|
|
)
|
|
|
|
self.register_config_handler(self.on_prompt_config)
|
|
|
|
# Null configuration, should reload quickly
|
|
self.manager = PromptManager()
|
|
|
|
async def on_prompt_config(self, config, version):
|
|
|
|
logger.info(f"Loading prompt configuration version {version}")
|
|
|
|
if self.config_key not in config:
|
|
logger.warning(f"No key {self.config_key} in config")
|
|
return
|
|
|
|
config = config[self.config_key]
|
|
|
|
try:
|
|
|
|
self.manager.load_config(config)
|
|
|
|
logger.info("Prompt configuration reloaded")
|
|
|
|
except Exception as e:
|
|
|
|
logger.error(f"Prompt configuration exception: {e}", exc_info=True)
|
|
logger.error("Configuration reload failed")
|
|
|
|
async def on_request(self, msg, consumer, flow):
|
|
|
|
v = msg.value()
|
|
|
|
# Sender-produced ID
|
|
|
|
id = msg.properties()["id"]
|
|
|
|
kind = v.id
|
|
|
|
# Check if streaming is requested
|
|
streaming = getattr(v, 'streaming', False)
|
|
|
|
try:
|
|
|
|
logger.debug(f"Prompt terms: {v.terms}")
|
|
|
|
input = {
|
|
k: json.loads(v)
|
|
for k, v in v.terms.items()
|
|
}
|
|
|
|
logger.debug(f"Handling prompt kind {kind}... (streaming={streaming})")
|
|
|
|
# If streaming, we need to handle it differently
|
|
if streaming:
|
|
# For streaming, we need to intercept LLM responses
|
|
# and forward them as they arrive
|
|
|
|
async def llm_streaming(system, prompt):
|
|
logger.debug(f"System prompt: {system}")
|
|
logger.debug(f"User prompt: {prompt}")
|
|
|
|
# Use the text completion client with recipient handler
|
|
client = flow("text-completion-request")
|
|
|
|
async def forward_chunks(resp):
|
|
if resp.error:
|
|
raise RuntimeError(resp.error.message)
|
|
|
|
is_final = getattr(resp, 'end_of_stream', False)
|
|
|
|
# Always send a message if there's content OR if it's the final message
|
|
if resp.response or is_final:
|
|
# Forward each chunk immediately
|
|
r = PromptResponse(
|
|
text=resp.response if resp.response else "",
|
|
object=None,
|
|
error=None,
|
|
end_of_stream=is_final,
|
|
)
|
|
await flow("response").send(r, properties={"id": id})
|
|
|
|
# Return True when end_of_stream
|
|
return is_final
|
|
|
|
await client.request(
|
|
TextCompletionRequest(
|
|
system=system, prompt=prompt, streaming=True
|
|
),
|
|
recipient=forward_chunks,
|
|
timeout=600
|
|
)
|
|
|
|
# Return empty string since we already sent all chunks
|
|
return ""
|
|
|
|
try:
|
|
await self.manager.invoke(kind, input, llm_streaming)
|
|
except Exception as e:
|
|
logger.error(f"Prompt streaming exception: {e}", exc_info=True)
|
|
raise e
|
|
|
|
return
|
|
|
|
# Non-streaming path (original behavior)
|
|
async def llm(system, prompt):
|
|
|
|
logger.debug(f"System prompt: {system}")
|
|
logger.debug(f"User prompt: {prompt}")
|
|
|
|
resp = await flow("text-completion-request").text_completion(
|
|
system = system, prompt = prompt, streaming = False,
|
|
)
|
|
|
|
try:
|
|
return resp
|
|
except Exception as e:
|
|
logger.error(f"LLM Exception: {e}", exc_info=True)
|
|
return None
|
|
|
|
try:
|
|
resp = await self.manager.invoke(kind, input, llm)
|
|
except Exception as e:
|
|
logger.error(f"Prompt invocation exception: {e}", exc_info=True)
|
|
raise e
|
|
|
|
logger.debug(f"Prompt response: {resp}")
|
|
|
|
if isinstance(resp, str):
|
|
|
|
logger.debug("Sending text response...")
|
|
|
|
r = PromptResponse(
|
|
text=resp,
|
|
object=None,
|
|
error=None,
|
|
end_of_stream=True,
|
|
)
|
|
|
|
await flow("response").send(r, properties={"id": id})
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
logger.debug("Sending object response...")
|
|
logger.debug(f"Response object: {json.dumps(resp, indent=4)}")
|
|
|
|
r = PromptResponse(
|
|
text=None,
|
|
object=json.dumps(resp),
|
|
error=None,
|
|
end_of_stream=True,
|
|
)
|
|
|
|
await flow("response").send(r, properties={"id": id})
|
|
|
|
return
|
|
|
|
except Exception as e:
|
|
|
|
logger.error(f"Prompt service exception: {e}", exc_info=True)
|
|
|
|
logger.debug("Sending error response...")
|
|
|
|
r = PromptResponse(
|
|
error=Error(
|
|
type = "llm-error",
|
|
message = str(e),
|
|
),
|
|
text=None,
|
|
object=None,
|
|
end_of_stream=True,
|
|
)
|
|
|
|
await flow("response").send(r, properties={"id": id})
|
|
|
|
@staticmethod
|
|
def add_args(parser):
|
|
|
|
parser.add_argument(
|
|
'-c', '--concurrency',
|
|
type=int,
|
|
default=default_concurrency,
|
|
help=f'Concurrent processing threads (default: {default_concurrency})'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--config-type',
|
|
default="prompt",
|
|
help=f'Configuration key for prompts (default: prompt)',
|
|
)
|
|
|
|
FlowProcessor.add_args(parser)
|
|
|
|
def run():
|
|
|
|
Processor.launch(default_ident, __doc__)
|
|
|