mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-04-28 09:56:23 +02:00
Add agents with custom swarm implementation
This commit is contained in:
parent
24c4f6e552
commit
a19dedd59f
35 changed files with 3413 additions and 0 deletions
219
apps/agents/src/graph/guardrails.py
Normal file
219
apps/agents/src/graph/guardrails.py
Normal file
|
|
@ -0,0 +1,219 @@
|
|||
# Guardrails
|
||||
from src.utils.common import generate_llm_output
|
||||
import os
|
||||
import copy
|
||||
|
||||
from src.swarm.types import Response, Agent
|
||||
|
||||
from src.utils.common import common_logger, generate_openai_output, update_tokens_used
|
||||
logger = common_logger
|
||||
|
||||
def classify_hallucination(context: str, assistant_response: str, chat_history: list, model: str) -> str:
|
||||
"""
|
||||
Checks if an assistant's response contains hallucinations by comparing against provided context.
|
||||
|
||||
Args:
|
||||
context (str): The context/knowledge base to check the response against
|
||||
assistant_response (str): The response from the assistant to validate
|
||||
chat_history (list): List of previous chat messages for context
|
||||
|
||||
Returns:
|
||||
str: Verdict indicating level of hallucination:
|
||||
'yes-absolute' - completely supported by context
|
||||
'yes-common-sensical' - supported with common sense interpretation
|
||||
'no-absolute' - not supported by context
|
||||
'no-subtle' - not supported but difference is subtle
|
||||
"""
|
||||
chat_history_str = "\n".join([f"{message['role']}: {message['content']}" for message in chat_history])
|
||||
|
||||
prompt = f"""
|
||||
You are a guardrail agent. Your job is to check if the response is hallucinating.
|
||||
|
||||
------------------------------------------------------------------------
|
||||
Here is the context:
|
||||
{context}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
Here is the chat history message:
|
||||
{chat_history_str}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
Here is the response:
|
||||
{assistant_response}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
As a hallucination guardrail, your job is to go through each line of the response and check if it is completely supported by the context. Even if a single line is not supported, the response is no.
|
||||
|
||||
Output a single verdict for the entire response. don't provide any reasoning. The output classes are
|
||||
|
||||
yes-absolute: completely supported by the context
|
||||
yes-common-sensical: but with some common sense interpretation
|
||||
no-absolute: not supported by the context
|
||||
no-subtle: not supported by the context but the difference is subtle
|
||||
|
||||
Output of of the classes:
|
||||
verdict : yes-absolute/yes-common-sensical/no-absolute/no-subtle
|
||||
|
||||
Example 1: The response is completely supported by the context.
|
||||
User Input:
|
||||
Context: "Our airline provides complimentary meals and beverages on all international flights. Passengers are allowed one carry-on bag and one personal item."
|
||||
Chat History:
|
||||
User: "Do international flights with your airline offer free meals?"
|
||||
Response: "Yes, all international flights with our airline offer free meals and beverages."
|
||||
Output: verdict: yes-absolute
|
||||
|
||||
Example 2: The response is generally true and could be deduced with common sense interpretation, though not explicitly stated in the context.
|
||||
User Input:
|
||||
Context: "Flights may experience delays due to weather conditions. In such cases, the airline staff will provide updates at the airport."
|
||||
Chat History:
|
||||
User: "Will there be announcements if my flight is delayed?"
|
||||
Response: "Yes, if your flight is delayed, there will be announcements at the airport."
|
||||
Output: verdict: yes-common-sensical
|
||||
|
||||
Example 3: The response is not supported by the context and contains glaring inaccuracies.
|
||||
User Input:
|
||||
Context: "You can cancel your ticket online up to 24 hours before the flight's departure time and receive a full refund."
|
||||
Chat History:
|
||||
User: "Can I get a refund if I cancel 12 hours before the flight?"
|
||||
Response: "Yes, you can get a refund if you cancel 12 hours before the flight."
|
||||
Output: verdict: no-absolute
|
||||
|
||||
Example 4: The response is not supported by the context but the difference is subtle.
|
||||
User Input:
|
||||
Context: "Our frequent flyer program offers discounts on checked bags for members who have achieved Gold status."
|
||||
Chat History:
|
||||
User: "As a member, do I get discounts on checked bags?"
|
||||
Response: "Yes, members of our frequent flyer program get discounts on checked bags."
|
||||
Output: verdict: no-subtle
|
||||
"""
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": prompt,
|
||||
},
|
||||
]
|
||||
response = generate_llm_output(messages, model)
|
||||
return response
|
||||
|
||||
def post_process_response(messages: list, post_processing_agent_name: str, post_process_instructions: str, style_prompt: str = None, context: str = None, model: str = "gpt-4o", tokens_used: dict = {}, last_agent: Agent = None) -> dict:
|
||||
agent_instructions = last_agent.instructions
|
||||
agent_history = last_agent.history
|
||||
# agent_instructions = ''
|
||||
# agent_history = []
|
||||
|
||||
pending_msg = copy.deepcopy(messages[-1])
|
||||
logger.debug(f"Pending message keys: {pending_msg.keys()}")
|
||||
|
||||
skip = False
|
||||
|
||||
if pending_msg.get("tool_calls"):
|
||||
logger.info("Last message is a tool call, skipping post processing and setting last message to external")
|
||||
skip = True
|
||||
|
||||
elif not pending_msg['response_type'] == "internal":
|
||||
logger.info("Last message is not internal, skipping post processing and setting last message to external")
|
||||
skip = True
|
||||
|
||||
elif not pending_msg['content']:
|
||||
logger.info("Last message has no content, skipping post processing and setting last message to external")
|
||||
skip = True
|
||||
|
||||
elif not post_process_instructions:
|
||||
logger.info("No post process instructions, skipping post processing and setting last message to external")
|
||||
skip = True
|
||||
|
||||
if skip:
|
||||
pending_msg['response_type'] = "external"
|
||||
response = Response(
|
||||
messages=[],
|
||||
tokens_used=tokens_used,
|
||||
agent=last_agent,
|
||||
error_msg=''
|
||||
)
|
||||
return response
|
||||
|
||||
agent_history_str = f"\n{'*'*100}\n".join([f"Role: {message['role']} | Content: {message.get('content', 'None')} | Tool Calls: {message.get('tool_calls', 'None')}" for message in agent_history[:-1]])
|
||||
logger.debug(f"Agent history: {agent_history_str}")
|
||||
|
||||
prompt = f"""
|
||||
# ROLE
|
||||
|
||||
You are a post processing agent responsible for rewriting a response generated by an agent, according to instructions provided below. Ensure that the response you produce adheres to the instructions provided to you (if any). Further, the response should not violate the instructions provided to the agent, the context that the agent has used, the chat history of the agent, the context and the style provided. Some of these might or might not be provided.
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
# ADDITIONAL INSTRUCTIONS
|
||||
|
||||
Here are additional instructions that the admin might have configured for you:
|
||||
{post_process_instructions}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
# CHAT HISTORY
|
||||
|
||||
Here is the chat history:
|
||||
{agent_history_str}
|
||||
"""
|
||||
if context:
|
||||
context_prompt = f"""
|
||||
------------------------------------------------------------------------
|
||||
# CONTEXT
|
||||
|
||||
Here is the context:
|
||||
{context}
|
||||
"""
|
||||
prompt += context_prompt
|
||||
|
||||
if style_prompt:
|
||||
style_prompt = f"""
|
||||
------------------------------------------------------------------------
|
||||
# STYLE PROMPT
|
||||
|
||||
Here is the style prompt:
|
||||
{style_prompt}
|
||||
"""
|
||||
prompt += style_prompt
|
||||
|
||||
agent_response_and_instructions = f"""
|
||||
|
||||
------------------------------------------------------------------------
|
||||
# AGENT INSTRUCTIONS
|
||||
|
||||
Here are the instructions to the agent generating the response:
|
||||
{agent_instructions}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
# AGENT RESPONSE
|
||||
|
||||
Here is the response that the agent has generated:
|
||||
{pending_msg['content']}
|
||||
|
||||
"""
|
||||
prompt += agent_response_and_instructions
|
||||
|
||||
logger.debug(f"Sanitizing response for style. Original response: {pending_msg['content']}")
|
||||
completion = generate_openai_output(
|
||||
messages=[
|
||||
{"role": "system", "content": prompt}
|
||||
],
|
||||
model = model,
|
||||
return_completion=True
|
||||
)
|
||||
content = completion.choices[0].message.content
|
||||
if content:
|
||||
content = content.strip().lstrip().rstrip()
|
||||
tokens_used = update_tokens_used(provider="openai", model=model, tokens_used=tokens_used, completion=completion)
|
||||
logger.debug(f"Response after style check: {content}, tokens used: {tokens_used}")
|
||||
|
||||
pending_msg['content'] = content if content else pending_msg['content']
|
||||
pending_msg['response_type'] = "external"
|
||||
pending_msg['sender'] = pending_msg['sender'] + f' >> {post_processing_agent_name}'
|
||||
|
||||
response = Response(
|
||||
messages=[pending_msg],
|
||||
tokens_used=tokens_used,
|
||||
agent=last_agent,
|
||||
error_msg=''
|
||||
)
|
||||
|
||||
return response
|
||||
Loading…
Add table
Add a link
Reference in a new issue