mirror of
https://github.com/katanemo/plano.git
synced 2026-07-02 15:51:02 +02:00
delete rag agent demo
This commit is contained in:
parent
d3595aeee1
commit
eed776fc94
15 changed files with 0 additions and 2765 deletions
|
|
@ -1,28 +0,0 @@
|
||||||
# RAG Agent Query Parser
|
|
||||||
|
|
||||||
A FastAPI service that rewrites user queries using archgw and gpt-4o-mini for better retrieval accuracy.
|
|
||||||
|
|
||||||
## How it Works
|
|
||||||
|
|
||||||
1. Receives a chat completion request with conversation history
|
|
||||||
2. Calls archgw's LLM gateway with gpt-4o-mini to rewrite the last user query
|
|
||||||
3. Returns the rewritten query as the assistant response
|
|
||||||
|
|
||||||
## Setup and Running
|
|
||||||
|
|
||||||
1. **Start archgw**:
|
|
||||||
```bash
|
|
||||||
archgw up --foreground
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Start the query parser service**:
|
|
||||||
```bash
|
|
||||||
uv run python -m rag_agent.query_parser
|
|
||||||
```
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# archgw LLM Gateway base URL (default: http://localhost:12000/v1)
|
|
||||||
export LLM_GATEWAY_ENDPOINT="http://localhost:12000/v1"
|
|
||||||
```
|
|
||||||
|
|
@ -1,43 +0,0 @@
|
||||||
version: v0.3.0
|
|
||||||
|
|
||||||
agents:
|
|
||||||
- id: query_rewriter
|
|
||||||
url: http://host.docker.internal:10500/v1/chat/completions
|
|
||||||
- id: context_builder
|
|
||||||
url: http://host.docker.internal:10501/v1/chat/completions
|
|
||||||
- id: rag_agent
|
|
||||||
url: http://host.docker.internal:10502/v1/chat/completions
|
|
||||||
- id: research_agent
|
|
||||||
url: http://host.docker.internal:10503/v1/chat/completions
|
|
||||||
- id: weather_forecast_agent
|
|
||||||
url: http://host.docker.internal:10504/process
|
|
||||||
|
|
||||||
model_providers:
|
|
||||||
- model: openai/gpt-4o-mini
|
|
||||||
access_key: $OPENAI_API_KEY
|
|
||||||
default: true
|
|
||||||
- model: openai/gpt-4o
|
|
||||||
access_key: $OPENAI_API_KEY
|
|
||||||
- model: ollama/llama3.1
|
|
||||||
base_url: http://host.docker.internal:11434
|
|
||||||
|
|
||||||
model_aliases:
|
|
||||||
fast-llm:
|
|
||||||
target: gpt-4o-mini
|
|
||||||
smart-llm:
|
|
||||||
target: gpt-4o
|
|
||||||
|
|
||||||
listeners:
|
|
||||||
- type: agent
|
|
||||||
name: agent_1
|
|
||||||
port: 8001
|
|
||||||
router: arch_agent_router
|
|
||||||
agents:
|
|
||||||
- id: rag_agent
|
|
||||||
description: virtual assistant for device contracts for simple queries
|
|
||||||
filter_chain:
|
|
||||||
- query_rewriter
|
|
||||||
- context_builder
|
|
||||||
|
|
||||||
tracing:
|
|
||||||
random_sampling: 100
|
|
||||||
|
|
@ -1,17 +0,0 @@
|
||||||
services:
|
|
||||||
jaeger:
|
|
||||||
build:
|
|
||||||
context: ../../shared/jaeger
|
|
||||||
ports:
|
|
||||||
- "16686:16686"
|
|
||||||
- "4317:4317"
|
|
||||||
- "4318:4318"
|
|
||||||
open-web-ui:
|
|
||||||
image: dyrnq/open-webui:main
|
|
||||||
restart: always
|
|
||||||
ports:
|
|
||||||
- "8080:8080"
|
|
||||||
environment:
|
|
||||||
- DEFAULT_MODEL=gpt-4o-mini
|
|
||||||
- ENABLE_OPENAI_API=true
|
|
||||||
- OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
||||||
[project]
|
|
||||||
name = "rag_agent"
|
|
||||||
version = "0.1.0"
|
|
||||||
description = "RAG Agent"
|
|
||||||
readme = "README.md"
|
|
||||||
requires-python = ">=3.10"
|
|
||||||
dependencies = [
|
|
||||||
"click>=8.2.1",
|
|
||||||
"mcp>=1.13.1",
|
|
||||||
"fastmcp>=2.12.2",
|
|
||||||
"pydantic>=2.11.7",
|
|
||||||
"fastapi>=0.104.1",
|
|
||||||
"uvicorn>=0.24.0",
|
|
||||||
"openai>=1.0.0",
|
|
||||||
]
|
|
||||||
|
|
||||||
[project.scripts]
|
|
||||||
rag_agent = "rag_agent:main"
|
|
||||||
|
|
||||||
[build-system]
|
|
||||||
requires = ["hatchling"]
|
|
||||||
build-backend = "hatchling.build"
|
|
||||||
|
|
@ -1,64 +0,0 @@
|
||||||
# Sample Queries for Knowledge Base RAG Agent
|
|
||||||
|
|
||||||
## Service Level Agreement Queries
|
|
||||||
- What is the guaranteed uptime percentage for TechCorp's cloud services?
|
|
||||||
- What remedies are available if the API response time exceeds the agreed threshold?
|
|
||||||
- How quickly must TechCorp respond to critical support issues?
|
|
||||||
- What monitoring and reporting requirements are specified in the SLA?
|
|
||||||
- When was the TechCorp service agreement signed and by whom?
|
|
||||||
|
|
||||||
## Privacy Policy Queries
|
|
||||||
- What encryption methods does DataSecure use to protect data?
|
|
||||||
- How long does DataSecure retain personal data after account deletion?
|
|
||||||
- What rights do users have regarding their personal information?
|
|
||||||
- Can DataSecure sell user data to third parties for marketing?
|
|
||||||
- Who should be contacted for privacy-related concerns at DataSecure?
|
|
||||||
|
|
||||||
## Supply Chain Agreement Queries
|
|
||||||
- What types of automotive components does PrecisionParts supply?
|
|
||||||
- What are the payment terms and volume discount structure?
|
|
||||||
- What quality standards must the supplied components meet?
|
|
||||||
- What are the penalties for late delivery?
|
|
||||||
- What insurance coverage requirements apply to the supplier?
|
|
||||||
|
|
||||||
## Student Data Management Queries
|
|
||||||
- What federal laws must EduTech comply with regarding student data?
|
|
||||||
- What security measures are in place to protect student information?
|
|
||||||
- How long are student records retained after graduation?
|
|
||||||
- What consent is required for students under 13 years old?
|
|
||||||
- Who can access student educational records?
|
|
||||||
|
|
||||||
## Investment Advisory Queries
|
|
||||||
- What is FinanceFirst's management fee structure?
|
|
||||||
- What types of investments are included in the advisory services?
|
|
||||||
- What regulatory body oversees FinanceFirst Advisors?
|
|
||||||
- How often are portfolio reviews conducted?
|
|
||||||
- What are the client's responsibilities under this agreement?
|
|
||||||
|
|
||||||
## Healthcare Standards Queries
|
|
||||||
- What is the target response time for emergency code teams?
|
|
||||||
- What hand hygiene compliance rate is required?
|
|
||||||
- How quickly must medical records be completed after patient encounters?
|
|
||||||
- What continuing education requirements apply to nursing staff?
|
|
||||||
- What patient safety protocols are mandatory upon admission?
|
|
||||||
|
|
||||||
## Cross-Document Queries
|
|
||||||
- Which agreements include confidentiality or data protection provisions?
|
|
||||||
- What are the common termination notice periods across different contract types?
|
|
||||||
- Which documents specify insurance or liability coverage requirements?
|
|
||||||
- What compliance and regulatory requirements are mentioned across agreements?
|
|
||||||
- Which contracts include performance metrics or service level commitments?
|
|
||||||
|
|
||||||
## Complex Analysis Queries
|
|
||||||
- Compare the data retention policies across the privacy policy and student data management documents.
|
|
||||||
- What are the different approaches to risk management across the supply chain and investment advisory agreements?
|
|
||||||
- How do the security measures in the healthcare standards compare to those in the privacy policy?
|
|
||||||
- Which agreements provide the most detailed compliance and regulatory frameworks?
|
|
||||||
- What common themes exist in the quality assurance requirements across different industries?
|
|
||||||
|
|
||||||
## Document-Specific Detail Queries
|
|
||||||
- List all the specific percentages, timeframes, and numerical requirements mentioned in the SLA.
|
|
||||||
- What are all the contact persons and their roles mentioned across the documents?
|
|
||||||
- Identify all the compliance standards and certifications referenced in the supply chain agreement.
|
|
||||||
- What are the specific consequences or penalties mentioned for non-compliance across agreements?
|
|
||||||
- List all the third-party systems, tools, or services mentioned in the documents.
|
|
||||||
|
|
@ -1,63 +0,0 @@
|
||||||
import click
|
|
||||||
from mcp.server.fastmcp import FastMCP
|
|
||||||
|
|
||||||
mcp = None
|
|
||||||
|
|
||||||
|
|
||||||
@click.command()
|
|
||||||
@click.option("--transport", "transport", default="stdio")
|
|
||||||
@click.option("--host", "host", default="localhost")
|
|
||||||
@click.option("--port", "port", default=10101)
|
|
||||||
@click.option("--agent", "agent", default=None)
|
|
||||||
@click.option(
|
|
||||||
"--rest-server",
|
|
||||||
"rest_server",
|
|
||||||
is_flag=True,
|
|
||||||
help="Start REST server instead of MCP server",
|
|
||||||
)
|
|
||||||
@click.option("--rest-port", "rest_port", default=8000, help="Port for REST server")
|
|
||||||
def main(host, port, agent, transport, rest_server, rest_port):
|
|
||||||
if rest_server:
|
|
||||||
print(f"Starting REST server on {host}:{rest_port} for agent: {agent}")
|
|
||||||
|
|
||||||
if agent == "query_parser":
|
|
||||||
from rag_agent.query_rewriter_agent import start_server
|
|
||||||
|
|
||||||
start_server(host=host, port=rest_port)
|
|
||||||
return
|
|
||||||
elif agent == "context_builder":
|
|
||||||
from rag_agent.context_builder_agent import (
|
|
||||||
start_server,
|
|
||||||
)
|
|
||||||
|
|
||||||
start_server(host=host, port=rest_port)
|
|
||||||
return
|
|
||||||
elif agent == "response_generator":
|
|
||||||
from rag_agent.response_generator_agent import start_server
|
|
||||||
|
|
||||||
start_server(host=host, port=rest_port)
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
print("Please specify an agent to start with --agent option.")
|
|
||||||
return
|
|
||||||
|
|
||||||
print(f"Starting agent(s): {agent if agent else 'all'}")
|
|
||||||
global mcp
|
|
||||||
mcp = FastMCP("RAG Agent Demo", host=host, port=port)
|
|
||||||
|
|
||||||
if agent == "query_parser":
|
|
||||||
import rag_agent.query_parser
|
|
||||||
elif agent == "document_store":
|
|
||||||
import rag_agent.document_store
|
|
||||||
elif agent == "response_generator":
|
|
||||||
import rag_agent.response_generator
|
|
||||||
else:
|
|
||||||
import rag_agent.query_parser
|
|
||||||
import rag_agent.document_store
|
|
||||||
import rag_agent.response_generator
|
|
||||||
print("All agents loaded.")
|
|
||||||
mcp.run(transport=transport)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
@ -1,4 +0,0 @@
|
||||||
from . import main
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
@ -1,36 +0,0 @@
|
||||||
from pydantic import BaseModel
|
|
||||||
from typing import List, Optional, Dict, Any
|
|
||||||
|
|
||||||
|
|
||||||
class ChatMessage(BaseModel):
|
|
||||||
role: str
|
|
||||||
content: str
|
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionRequest(BaseModel):
|
|
||||||
model: str
|
|
||||||
messages: List[ChatMessage]
|
|
||||||
temperature: Optional[float] = 1.0
|
|
||||||
max_tokens: Optional[int] = None
|
|
||||||
top_p: Optional[float] = 1.0
|
|
||||||
frequency_penalty: Optional[float] = 0.0
|
|
||||||
presence_penalty: Optional[float] = 0.0
|
|
||||||
stream: Optional[bool] = False
|
|
||||||
stop: Optional[List[str]] = None
|
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionResponse(BaseModel):
|
|
||||||
id: str
|
|
||||||
object: str = "chat.completion"
|
|
||||||
created: int
|
|
||||||
model: str
|
|
||||||
choices: List[Dict[str, Any]]
|
|
||||||
usage: Dict[str, int]
|
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionStreamResponse(BaseModel):
|
|
||||||
id: str
|
|
||||||
object: str = "chat.completion.chunk"
|
|
||||||
created: int
|
|
||||||
model: str
|
|
||||||
choices: List[Dict[str, Any]]
|
|
||||||
|
|
@ -1,280 +0,0 @@
|
||||||
import json
|
|
||||||
from pydantic import BaseModel
|
|
||||||
from typing import List, Optional, Dict, Any
|
|
||||||
from fastapi import FastAPI, HTTPException, Request
|
|
||||||
from openai import AsyncOpenAI
|
|
||||||
import os
|
|
||||||
import logging
|
|
||||||
import csv
|
|
||||||
from pathlib import Path
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
from .api import ChatMessage, ChatCompletionRequest, ChatCompletionResponse
|
|
||||||
|
|
||||||
|
|
||||||
# Set up logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format="%(asctime)s - [CONTEXT_BUILDER] - %(levelname)s - %(message)s",
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
# Configuration for archgw LLM gateway
|
|
||||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
|
||||||
RAG_MODEL = "gpt-4o-mini"
|
|
||||||
|
|
||||||
# Initialize OpenAI client for archgw
|
|
||||||
archgw_client = AsyncOpenAI(
|
|
||||||
base_url=LLM_GATEWAY_ENDPOINT,
|
|
||||||
api_key="EMPTY", # archgw doesn't require a real API key
|
|
||||||
)
|
|
||||||
|
|
||||||
# Global variable to store the knowledge base
|
|
||||||
knowledge_base = []
|
|
||||||
|
|
||||||
|
|
||||||
def load_knowledge_base():
|
|
||||||
"""Load the sample_knowledge_base.csv file into memory on startup."""
|
|
||||||
global knowledge_base
|
|
||||||
|
|
||||||
# Get the path to the CSV file relative to this script
|
|
||||||
current_dir = Path(__file__).parent
|
|
||||||
csv_path = current_dir / "sample_knowledge_base.csv"
|
|
||||||
|
|
||||||
print(f"Loading knowledge base from {csv_path}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
knowledge_base = []
|
|
||||||
with open(csv_path, "r", encoding="utf-8-sig") as file:
|
|
||||||
csv_reader = csv.DictReader(file)
|
|
||||||
for row in csv_reader:
|
|
||||||
knowledge_base.append({"path": row["path"], "content": row["content"]})
|
|
||||||
|
|
||||||
logger.info(f"Loaded {len(knowledge_base)} documents from knowledge base")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error loading knowledge base: {e}")
|
|
||||||
knowledge_base = []
|
|
||||||
|
|
||||||
|
|
||||||
async def find_relevant_passages(
|
|
||||||
query: str, traceparent: Optional[str] = None, top_k: int = 3
|
|
||||||
) -> List[Dict[str, str]]:
|
|
||||||
"""Use the LLM to find the most relevant passages from the knowledge base."""
|
|
||||||
|
|
||||||
if not knowledge_base:
|
|
||||||
logger.warning("Knowledge base is empty")
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Create a system prompt for passage selection
|
|
||||||
system_prompt = f"""You are a retrieval assistant that selects the most relevant document passages for a given query.
|
|
||||||
|
|
||||||
Given a user query and a list of document passages, identify the {top_k} most relevant passages that would help answer the query.
|
|
||||||
|
|
||||||
Query: {query}
|
|
||||||
|
|
||||||
Available passages:
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Add all passages with indices
|
|
||||||
for i, doc in enumerate(knowledge_base):
|
|
||||||
system_prompt += (
|
|
||||||
f"\n[{i}] Path: {doc['path']}\nContent: {doc['content'][:500]}...\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
system_prompt += f"""
|
|
||||||
|
|
||||||
Please respond with ONLY the indices of the {top_k} most relevant passages, separated by commas (e.g., "0,3,7").
|
|
||||||
If fewer than {top_k} passages are relevant, return only the relevant ones.
|
|
||||||
If no passages are relevant, return "NONE"."""
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Call archgw to select relevant passages
|
|
||||||
logger.info(f"Calling archgw to find relevant passages for query: '{query}'")
|
|
||||||
|
|
||||||
# Prepare extra headers if traceparent is provided
|
|
||||||
extra_headers = {"x-envoy-max-retries": "3"}
|
|
||||||
if traceparent:
|
|
||||||
extra_headers["traceparent"] = traceparent
|
|
||||||
|
|
||||||
response = await archgw_client.chat.completions.create(
|
|
||||||
model=RAG_MODEL,
|
|
||||||
messages=[{"role": "system", "content": system_prompt}],
|
|
||||||
temperature=0.1,
|
|
||||||
max_tokens=50,
|
|
||||||
extra_headers=extra_headers,
|
|
||||||
)
|
|
||||||
|
|
||||||
result = response.choices[0].message.content.strip()
|
|
||||||
logger.info(f"LLM selected passages: {result}")
|
|
||||||
|
|
||||||
# Parse the indices
|
|
||||||
if result.upper() == "NONE":
|
|
||||||
return []
|
|
||||||
|
|
||||||
selected_passages = []
|
|
||||||
indices = [
|
|
||||||
int(idx.strip()) for idx in result.split(",") if idx.strip().isdigit()
|
|
||||||
]
|
|
||||||
|
|
||||||
for idx in indices:
|
|
||||||
if 0 <= idx < len(knowledge_base):
|
|
||||||
selected_passages.append(knowledge_base[idx])
|
|
||||||
|
|
||||||
logger.info(f"Selected {len(selected_passages)} relevant passages")
|
|
||||||
return selected_passages
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error finding relevant passages: {e}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
async def augment_query_with_context(
|
|
||||||
messages: List[ChatMessage], traceparent: Optional[str] = None
|
|
||||||
) -> List[ChatMessage]:
|
|
||||||
"""Extract user query, find relevant context, and augment the messages."""
|
|
||||||
|
|
||||||
# Find the last user message
|
|
||||||
last_user_message = None
|
|
||||||
last_user_index = -1
|
|
||||||
|
|
||||||
for i in range(len(messages) - 1, -1, -1):
|
|
||||||
if messages[i].role == "user":
|
|
||||||
last_user_message = messages[i].content
|
|
||||||
last_user_index = i
|
|
||||||
break
|
|
||||||
|
|
||||||
if not last_user_message:
|
|
||||||
logger.warning("No user message found in conversation")
|
|
||||||
return messages
|
|
||||||
|
|
||||||
logger.info(f"Processing user query: '{last_user_message}'")
|
|
||||||
|
|
||||||
# Find relevant passages
|
|
||||||
relevant_passages = await find_relevant_passages(last_user_message, traceparent)
|
|
||||||
|
|
||||||
if not relevant_passages:
|
|
||||||
logger.info("No relevant passages found, returning original messages")
|
|
||||||
return messages
|
|
||||||
|
|
||||||
# Build context from relevant passages
|
|
||||||
context_parts = []
|
|
||||||
for i, passage in enumerate(relevant_passages):
|
|
||||||
context_parts.append(
|
|
||||||
f"Document {i+1} ({passage['path']}):\n{passage['content']}"
|
|
||||||
)
|
|
||||||
|
|
||||||
context = "\n\n".join(context_parts)
|
|
||||||
|
|
||||||
# Create augmented content with original query and context
|
|
||||||
augmented_content = f"""{last_user_message} RELEVANT CONTEXT:
|
|
||||||
{context}"""
|
|
||||||
|
|
||||||
# Create updated messages with the augmented query
|
|
||||||
updated_messages = messages.copy()
|
|
||||||
updated_messages[last_user_index] = ChatMessage(
|
|
||||||
role="user", content=augmented_content
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"Augmented user query with {len(relevant_passages)} relevant passages")
|
|
||||||
|
|
||||||
return updated_messages
|
|
||||||
|
|
||||||
|
|
||||||
class Response(BaseModel):
|
|
||||||
query: str
|
|
||||||
metadata: dict
|
|
||||||
|
|
||||||
|
|
||||||
# FastAPI app for REST server
|
|
||||||
app = FastAPI(title="RAG Content Builder Agent", version="1.0.0")
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/chat/completions")
|
|
||||||
async def chat_completions(
|
|
||||||
request_body: ChatCompletionRequest, request: Request
|
|
||||||
) -> ChatCompletionResponse:
|
|
||||||
"""Chat completions endpoint that augments user queries with relevant context from the knowledge base."""
|
|
||||||
import time
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"Received chat completion request with {len(request_body.messages)} messages"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Read traceparent header if present
|
|
||||||
traceparent_header = request.headers.get("traceparent")
|
|
||||||
if traceparent_header:
|
|
||||||
logger.info(f"Received traceparent header: {traceparent_header}")
|
|
||||||
else:
|
|
||||||
logger.info("No traceparent header found")
|
|
||||||
|
|
||||||
# Augment the user query with relevant context
|
|
||||||
updated_messages = await augment_query_with_context(
|
|
||||||
request_body.messages, traceparent_header
|
|
||||||
)
|
|
||||||
messages_history_json = json.dumps([msg.dict() for msg in updated_messages])
|
|
||||||
|
|
||||||
response = ChatCompletionResponse(
|
|
||||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
|
||||||
created=int(time.time()),
|
|
||||||
model=request_body.model,
|
|
||||||
choices=[
|
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"message": {"role": "user", "content": messages_history_json},
|
|
||||||
"finish_reason": "stop",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
usage={
|
|
||||||
"prompt_tokens": sum(len(msg.content.split()) for msg in updated_messages),
|
|
||||||
"completion_tokens": len("Context added to user query.".split()),
|
|
||||||
"total_tokens": sum(len(msg.content.split()) for msg in updated_messages)
|
|
||||||
+ len("Context added to user query.".split()),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main function to initialize the knowledge base and start the server."""
|
|
||||||
load_knowledge_base()
|
|
||||||
|
|
||||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
||||||
|
|
||||||
def start_server(host: str = "localhost", port: int = 8000):
|
|
||||||
"""Start the REST server."""
|
|
||||||
load_knowledge_base()
|
|
||||||
# Rename the uvicorn.error logger
|
|
||||||
uvicorn.run(
|
|
||||||
app,
|
|
||||||
host=host,
|
|
||||||
port=port,
|
|
||||||
log_config={
|
|
||||||
"version": 1,
|
|
||||||
"disable_existing_loggers": False,
|
|
||||||
"formatters": {
|
|
||||||
"default": {
|
|
||||||
"format": "%(asctime)s - [CONTEXT_BUILDER] - %(levelname)s - %(message)s",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"handlers": {
|
|
||||||
"default": {
|
|
||||||
"formatter": "default",
|
|
||||||
"class": "logging.StreamHandler",
|
|
||||||
"stream": "ext://sys.stdout",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"root": {
|
|
||||||
"level": "INFO",
|
|
||||||
"handlers": ["default"],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
@ -1,188 +0,0 @@
|
||||||
import json
|
|
||||||
from pydantic import BaseModel
|
|
||||||
from typing import List, Optional, Dict, Any
|
|
||||||
from fastapi import FastAPI, HTTPException, Request
|
|
||||||
from openai import AsyncOpenAI
|
|
||||||
import os
|
|
||||||
import logging
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
from .api import ChatMessage, ChatCompletionRequest, ChatCompletionResponse
|
|
||||||
|
|
||||||
|
|
||||||
# Set up logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format="%(asctime)s - [QUERY_REWRITER] - %(levelname)s - %(message)s",
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
# Configuration for archgw LLM gateway
|
|
||||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
|
||||||
QUERY_REWRITE_MODEL = "gpt-4o-mini"
|
|
||||||
|
|
||||||
# Initialize OpenAI client for archgw
|
|
||||||
archgw_client = AsyncOpenAI(
|
|
||||||
base_url=LLM_GATEWAY_ENDPOINT,
|
|
||||||
api_key="EMPTY", # archgw doesn't require a real API key
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def rewrite_query_with_archgw(
|
|
||||||
messages: List[ChatMessage], traceparent_header: str
|
|
||||||
) -> str:
|
|
||||||
# Prepare the system prompt for query rewriting
|
|
||||||
system_prompt = """You are a query rewriter that improves user queries for better retrieval.
|
|
||||||
|
|
||||||
Given a conversation history, rewrite the last user message to be more specific and context-aware.
|
|
||||||
The rewritten query should:
|
|
||||||
1. Include relevant context from previous messages
|
|
||||||
2. Be clear and specific for information retrieval
|
|
||||||
3. Maintain the user's intent
|
|
||||||
4. Be concise but comprehensive
|
|
||||||
|
|
||||||
Return only the rewritten query, nothing else."""
|
|
||||||
|
|
||||||
# Prepare messages for the query rewriter - just add system prompt to existing messages
|
|
||||||
rewrite_messages = [{"role": "system", "content": system_prompt}]
|
|
||||||
|
|
||||||
# Add conversation history
|
|
||||||
for msg in messages:
|
|
||||||
rewrite_messages.append({"role": msg.role, "content": msg.content})
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Call archgw using OpenAI client
|
|
||||||
extra_headers = {"x-envoy-max-retries": "3"}
|
|
||||||
if traceparent_header:
|
|
||||||
extra_headers["traceparent"] = traceparent_header
|
|
||||||
logger.info(f"Calling archgw at {LLM_GATEWAY_ENDPOINT} to rewrite query")
|
|
||||||
response = await archgw_client.chat.completions.create(
|
|
||||||
model=QUERY_REWRITE_MODEL,
|
|
||||||
messages=rewrite_messages,
|
|
||||||
temperature=0.3,
|
|
||||||
max_tokens=200,
|
|
||||||
extra_headers=extra_headers,
|
|
||||||
)
|
|
||||||
|
|
||||||
rewritten_query = response.choices[0].message.content.strip()
|
|
||||||
logger.info(f"Query rewritten successfully: '{rewritten_query}'")
|
|
||||||
return rewritten_query
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error rewriting query: {e}")
|
|
||||||
|
|
||||||
# If rewriting fails, return the original last user message
|
|
||||||
logger.info("Falling back to original user message")
|
|
||||||
for message in reversed(messages):
|
|
||||||
if message.role == "user":
|
|
||||||
return message.content
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
class Response(BaseModel):
|
|
||||||
query: str
|
|
||||||
metadata: dict
|
|
||||||
|
|
||||||
|
|
||||||
# FastAPI app for REST server
|
|
||||||
app = FastAPI(title="RAG Agent Query Parser", version="1.0.0")
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/chat/completions")
|
|
||||||
async def chat_completions(request_body: ChatCompletionRequest, request: Request):
|
|
||||||
"""Chat completions endpoint that rewrites the last user query using archgw."""
|
|
||||||
import time
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"Received chat completion request with {len(request_body.messages)} messages"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Read traceparent header if present
|
|
||||||
traceparent_header = request.headers.get("traceparent")
|
|
||||||
if traceparent_header:
|
|
||||||
logger.info(f"Received traceparent header: {traceparent_header}")
|
|
||||||
else:
|
|
||||||
logger.info("No traceparent header found")
|
|
||||||
|
|
||||||
# Call archgw to rewrite the last user query
|
|
||||||
rewritten_query = await rewrite_query_with_archgw(
|
|
||||||
request_body.messages, traceparent_header
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create updated messages with the rewritten query
|
|
||||||
updated_messages = request_body.messages.copy()
|
|
||||||
|
|
||||||
# Find and update the last user message with the rewritten query
|
|
||||||
for i in range(len(updated_messages) - 1, -1, -1):
|
|
||||||
if updated_messages[i].role == "user":
|
|
||||||
original_query = updated_messages[i].content
|
|
||||||
updated_messages[i] = ChatMessage(role="user", content=rewritten_query)
|
|
||||||
logger.info(
|
|
||||||
f"Updated user query from '{original_query}' to '{rewritten_query}'"
|
|
||||||
)
|
|
||||||
break
|
|
||||||
|
|
||||||
messages_history_json = json.dumps([msg.dict() for msg in updated_messages])
|
|
||||||
|
|
||||||
response = ChatCompletionResponse(
|
|
||||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
|
||||||
created=int(time.time()),
|
|
||||||
model=request_body.model,
|
|
||||||
choices=[
|
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"message": {"role": "user", "content": messages_history_json},
|
|
||||||
"finish_reason": "stop",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
usage={
|
|
||||||
"prompt_tokens": sum(len(msg.content.split()) for msg in updated_messages),
|
|
||||||
"completion_tokens": len("Updated query for better retrieval.".split()),
|
|
||||||
"total_tokens": sum(len(msg.content.split()) for msg in updated_messages)
|
|
||||||
+ len("Updated query for better retrieval.".split()),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
|
||||||
async def health_check():
|
|
||||||
"""Health check endpoint."""
|
|
||||||
return {"status": "healthy"}
|
|
||||||
|
|
||||||
|
|
||||||
def parse_query(query):
|
|
||||||
"""Parse the user query and returns metadata extracted from query."""
|
|
||||||
return Response(query=query, metadata={"is_valid": True})
|
|
||||||
|
|
||||||
|
|
||||||
def start_server(host: str = "localhost", port: int = 8000):
|
|
||||||
"""Start the REST server."""
|
|
||||||
uvicorn.run(
|
|
||||||
app,
|
|
||||||
host=host,
|
|
||||||
port=port,
|
|
||||||
log_config={
|
|
||||||
"version": 1,
|
|
||||||
"disable_existing_loggers": False,
|
|
||||||
"formatters": {
|
|
||||||
"default": {
|
|
||||||
"format": "%(asctime)s - [QUERY_REWRITER] - %(levelname)s - %(message)s",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"handlers": {
|
|
||||||
"default": {
|
|
||||||
"formatter": "default",
|
|
||||||
"class": "logging.StreamHandler",
|
|
||||||
"stream": "ext://sys.stdout",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"root": {
|
|
||||||
"level": "INFO",
|
|
||||||
"handlers": ["default"],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
@ -1,302 +0,0 @@
|
||||||
import json
|
|
||||||
from fastapi import FastAPI, Request
|
|
||||||
from fastapi.responses import StreamingResponse
|
|
||||||
from openai import AsyncOpenAI
|
|
||||||
import os
|
|
||||||
import logging
|
|
||||||
import time
|
|
||||||
import uuid
|
|
||||||
import uvicorn
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
from .api import (
|
|
||||||
ChatCompletionRequest,
|
|
||||||
ChatCompletionResponse,
|
|
||||||
ChatCompletionStreamResponse,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Set up logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format="%(asctime)s - [RESPONSE_GENERATOR] - %(levelname)s - %(message)s",
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Configuration for archgw LLM gateway
|
|
||||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
|
||||||
RESPONSE_MODEL = "gpt-4o"
|
|
||||||
|
|
||||||
# System prompt for response generation
|
|
||||||
SYSTEM_PROMPT = """You are a helpful assistant that generates coherent, contextual responses.
|
|
||||||
|
|
||||||
Given a conversation history, generate a helpful and relevant response based on all the context available in the messages.
|
|
||||||
Your response should:
|
|
||||||
1. Be contextually aware of the entire conversation
|
|
||||||
2. Address the user's needs appropriately
|
|
||||||
3. Be helpful and informative
|
|
||||||
4. Maintain a natural conversational tone
|
|
||||||
|
|
||||||
Generate a complete response to assist the user."""
|
|
||||||
|
|
||||||
# Initialize OpenAI client for archgw
|
|
||||||
archgw_client = AsyncOpenAI(
|
|
||||||
base_url=LLM_GATEWAY_ENDPOINT,
|
|
||||||
api_key="EMPTY", # archgw doesn't require a real API key
|
|
||||||
)
|
|
||||||
|
|
||||||
# FastAPI app for REST server
|
|
||||||
app = FastAPI(title="RAG Agent Response Generator", version="1.0.0")
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_response_messages(request_body: ChatCompletionRequest):
|
|
||||||
"""Prepare messages for response generation by adding system prompt."""
|
|
||||||
response_messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
|
||||||
|
|
||||||
# Add conversation history
|
|
||||||
for msg in request_body.messages:
|
|
||||||
response_messages.append({"role": msg.role, "content": msg.content})
|
|
||||||
|
|
||||||
return response_messages
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/chat/completions")
|
|
||||||
async def chat_completions(request_body: ChatCompletionRequest, request: Request):
|
|
||||||
"""Chat completions endpoint that generates a coherent response based on all context."""
|
|
||||||
logger.info(
|
|
||||||
f"Received chat completion request with {len(request_body.messages)} messages"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Read traceparent header if present
|
|
||||||
traceparent_header = request.headers.get("traceparent")
|
|
||||||
if traceparent_header:
|
|
||||||
logger.info(f"Received traceparent header: {traceparent_header}")
|
|
||||||
else:
|
|
||||||
logger.info("No traceparent header found")
|
|
||||||
|
|
||||||
# Check if streaming is requested
|
|
||||||
if request_body.stream:
|
|
||||||
return StreamingResponse(
|
|
||||||
stream_chat_completions(request_body, traceparent_header),
|
|
||||||
media_type="text/plain",
|
|
||||||
headers={
|
|
||||||
"content-type": "text/event-stream",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return await non_streaming_chat_completions(request_body, traceparent_header)
|
|
||||||
|
|
||||||
|
|
||||||
async def stream_chat_completions(
|
|
||||||
request_body: ChatCompletionRequest, traceparent_header: str = None
|
|
||||||
):
|
|
||||||
"""Generate streaming chat completions."""
|
|
||||||
# Prepare messages for response generation
|
|
||||||
response_messages = prepare_response_messages(request_body)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Call archgw using OpenAI client for streaming
|
|
||||||
logger.info(
|
|
||||||
f"Calling archgw at {LLM_GATEWAY_ENDPOINT} to generate streaming response"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Prepare extra headers if traceparent is provided
|
|
||||||
extra_headers = {"x-envoy-max-retries": "3"}
|
|
||||||
if traceparent_header:
|
|
||||||
extra_headers["traceparent"] = traceparent_header
|
|
||||||
|
|
||||||
response_stream = await archgw_client.chat.completions.create(
|
|
||||||
model=RESPONSE_MODEL,
|
|
||||||
messages=response_messages,
|
|
||||||
temperature=request_body.temperature or 0.7,
|
|
||||||
max_tokens=request_body.max_tokens or 1000,
|
|
||||||
stream=True,
|
|
||||||
extra_headers=extra_headers,
|
|
||||||
)
|
|
||||||
|
|
||||||
completion_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
|
|
||||||
created_time = int(time.time())
|
|
||||||
collected_content = []
|
|
||||||
|
|
||||||
async for chunk in response_stream:
|
|
||||||
if chunk.choices and chunk.choices[0].delta.content:
|
|
||||||
content = chunk.choices[0].delta.content
|
|
||||||
collected_content.append(content)
|
|
||||||
|
|
||||||
# Create streaming response chunk
|
|
||||||
stream_chunk = ChatCompletionStreamResponse(
|
|
||||||
id=completion_id,
|
|
||||||
created=created_time,
|
|
||||||
model=request_body.model,
|
|
||||||
choices=[
|
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"delta": {"content": content},
|
|
||||||
"finish_reason": None,
|
|
||||||
}
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
yield f"data: {stream_chunk.model_dump_json()}\n\n"
|
|
||||||
|
|
||||||
# Send final chunk with complete response in expected format
|
|
||||||
full_response = "".join(collected_content)
|
|
||||||
updated_history = [{"role": "assistant", "content": full_response}]
|
|
||||||
|
|
||||||
final_chunk = ChatCompletionStreamResponse(
|
|
||||||
id=completion_id,
|
|
||||||
created=created_time,
|
|
||||||
model=request_body.model,
|
|
||||||
choices=[
|
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"delta": {},
|
|
||||||
"finish_reason": "stop",
|
|
||||||
"message": {
|
|
||||||
"role": "assistant",
|
|
||||||
"content": json.dumps(updated_history),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
yield f"data: {final_chunk.model_dump_json()}\n\n"
|
|
||||||
yield "data: [DONE]\n\n"
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error generating streaming response: {e}")
|
|
||||||
|
|
||||||
# Send error as streaming response
|
|
||||||
error_chunk = ChatCompletionStreamResponse(
|
|
||||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
|
||||||
created=int(time.time()),
|
|
||||||
model=request_body.model,
|
|
||||||
choices=[
|
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"delta": {
|
|
||||||
"content": "I apologize, but I'm having trouble generating a response right now. Please try again."
|
|
||||||
},
|
|
||||||
"finish_reason": "stop",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
yield f"data: {error_chunk.model_dump_json()}\n\n"
|
|
||||||
yield "data: [DONE]\n\n"
|
|
||||||
|
|
||||||
|
|
||||||
async def non_streaming_chat_completions(
|
|
||||||
request_body: ChatCompletionRequest, traceparent_header: str = None
|
|
||||||
):
|
|
||||||
"""Generate non-streaming chat completions."""
|
|
||||||
# Prepare messages for response generation
|
|
||||||
response_messages = prepare_response_messages(request_body)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Call archgw using OpenAI client
|
|
||||||
logger.info(f"Calling archgw at {LLM_GATEWAY_ENDPOINT} to generate response")
|
|
||||||
|
|
||||||
# Prepare extra headers if traceparent is provided
|
|
||||||
extra_headers = {"x-envoy-max-retries": "3"}
|
|
||||||
if traceparent_header:
|
|
||||||
extra_headers["traceparent"] = traceparent_header
|
|
||||||
|
|
||||||
response = await archgw_client.chat.completions.create(
|
|
||||||
model=RESPONSE_MODEL,
|
|
||||||
messages=response_messages,
|
|
||||||
temperature=request_body.temperature or 0.7,
|
|
||||||
max_tokens=request_body.max_tokens or 1000,
|
|
||||||
extra_headers=extra_headers,
|
|
||||||
)
|
|
||||||
|
|
||||||
generated_response = response.choices[0].message.content.strip()
|
|
||||||
logger.info(f"Response generated successfully")
|
|
||||||
|
|
||||||
return ChatCompletionResponse(
|
|
||||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
|
||||||
created=int(time.time()),
|
|
||||||
model=request_body.model,
|
|
||||||
choices=[
|
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"message": {
|
|
||||||
"role": "assistant",
|
|
||||||
"content": generated_response,
|
|
||||||
},
|
|
||||||
"finish_reason": "stop",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
usage={
|
|
||||||
"prompt_tokens": sum(
|
|
||||||
len(msg.content.split()) for msg in request_body.messages
|
|
||||||
),
|
|
||||||
"completion_tokens": len(generated_response.split()),
|
|
||||||
"total_tokens": sum(
|
|
||||||
len(msg.content.split()) for msg in request_body.messages
|
|
||||||
)
|
|
||||||
+ len(generated_response.split()),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error generating response: {e}")
|
|
||||||
|
|
||||||
# Fallback response
|
|
||||||
fallback_message = "I apologize, but I'm having trouble generating a response right now. Please try again."
|
|
||||||
return ChatCompletionResponse(
|
|
||||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
|
||||||
created=int(time.time()),
|
|
||||||
model=request_body.model,
|
|
||||||
choices=[
|
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"message": {"role": "assistant", "content": fallback_message},
|
|
||||||
"finish_reason": "stop",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
usage={
|
|
||||||
"prompt_tokens": sum(
|
|
||||||
len(msg.content.split()) for msg in request_body.messages
|
|
||||||
),
|
|
||||||
"completion_tokens": len(fallback_message.split()),
|
|
||||||
"total_tokens": sum(
|
|
||||||
len(msg.content.split()) for msg in request_body.messages
|
|
||||||
)
|
|
||||||
+ len(fallback_message.split()),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
|
||||||
async def health_check():
|
|
||||||
"""Health check endpoint."""
|
|
||||||
return {"status": "healthy"}
|
|
||||||
|
|
||||||
|
|
||||||
def start_server(host: str = "localhost", port: int = 8000):
|
|
||||||
"""Start the REST server."""
|
|
||||||
uvicorn.run(
|
|
||||||
app,
|
|
||||||
host=host,
|
|
||||||
port=port,
|
|
||||||
log_config={
|
|
||||||
"version": 1,
|
|
||||||
"disable_existing_loggers": False,
|
|
||||||
"formatters": {
|
|
||||||
"default": {
|
|
||||||
"format": "%(asctime)s - [RESPONSE_GENERATOR] - %(levelname)s - %(message)s",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"handlers": {
|
|
||||||
"default": {
|
|
||||||
"formatter": "default",
|
|
||||||
"class": "logging.StreamHandler",
|
|
||||||
"stream": "ext://sys.stdout",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"root": {
|
|
||||||
"level": "INFO",
|
|
||||||
"handlers": ["default"],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
@ -1,257 +0,0 @@
|
||||||
path,content
|
|
||||||
TechCorp_CloudServices_SLA_Agreement_2024,"SERVICE LEVEL AGREEMENT
|
|
||||||
This Service Level Agreement (""SLA"") is entered into on March 15, 2024, between TechCorp Solutions Inc., a Delaware corporation (""Provider""), and CloudFirst Enterprises LLC (""Customer"").
|
|
||||||
|
|
||||||
DEFINITIONS
|
|
||||||
Service Availability: The percentage of time during which the cloud services are operational and accessible.
|
|
||||||
Downtime: Any period when the services are unavailable or inaccessible to Customer.
|
|
||||||
Response Time: The time between service request submission and initial response from Provider.
|
|
||||||
|
|
||||||
SERVICE COMMITMENTS
|
|
||||||
Provider guarantees 99.9% uptime for all cloud infrastructure services during any calendar month.
|
|
||||||
Average response time for API calls shall not exceed 200 milliseconds under normal operating conditions.
|
|
||||||
Customer support response times: Critical issues within 1 hour, Standard issues within 4 hours.
|
|
||||||
|
|
||||||
REMEDIES
|
|
||||||
For each full percentage point below 99.9% availability, Customer receives 10% credit on monthly fees.
|
|
||||||
If response times exceed 500ms for more than 5 minutes in any hour, Customer receives 5% monthly credit.
|
|
||||||
|
|
||||||
MONITORING AND REPORTING
|
|
||||||
Provider will maintain real-time monitoring systems and provide monthly performance reports.
|
|
||||||
All metrics will be measured from Provider's monitoring systems located in primary data centers.
|
|
||||||
|
|
||||||
This SLA remains in effect for the duration of the underlying service agreement.
|
|
||||||
|
|
||||||
Executed by:
|
|
||||||
TechCorp Solutions Inc.
|
|
||||||
Sarah Mitchell, VP Operations
|
|
||||||
Date: March 15, 2024
|
|
||||||
|
|
||||||
CloudFirst Enterprises LLC
|
|
||||||
Robert Chen, CTO
|
|
||||||
Date: March 16, 2024"
|
|
||||||
|
|
||||||
DataSecure_Privacy_Policy_v3.2,"PRIVACY POLICY
|
|
||||||
DataSecure Analytics, Inc. (""Company"") Privacy Policy
|
|
||||||
Effective Date: January 1, 2024
|
|
||||||
Last Updated: February 28, 2024
|
|
||||||
|
|
||||||
INFORMATION COLLECTION
|
|
||||||
We collect information you provide directly, such as account details, usage preferences, and communication records.
|
|
||||||
Automatically collected data includes IP addresses, browser types, device information, and service interaction logs.
|
|
||||||
Third-party integrations may provide additional user behavior and demographic information with consent.
|
|
||||||
|
|
||||||
DATA USAGE
|
|
||||||
Personal information is used to provide services, improve user experience, and communicate service updates.
|
|
||||||
Aggregated, non-identifiable data may be used for analytics, research, and service enhancement.
|
|
||||||
We do not sell personal information to third parties for marketing purposes.
|
|
||||||
|
|
||||||
DATA PROTECTION
|
|
||||||
All data is encrypted in transit using TLS 1.3 and at rest using AES-256 encryption.
|
|
||||||
Access controls limit data access to authorized personnel only on a need-to-know basis.
|
|
||||||
Regular security audits and penetration testing ensure ongoing protection measures.
|
|
||||||
|
|
||||||
DATA RETENTION
|
|
||||||
Personal data is retained for the duration of active service plus 24 months.
|
|
||||||
Logs and analytics data are retained for 12 months unless legally required otherwise.
|
|
||||||
Upon account deletion, personal data is permanently removed within 30 days.
|
|
||||||
|
|
||||||
USER RIGHTS
|
|
||||||
Users may request access to, correction of, or deletion of their personal information.
|
|
||||||
Data portability requests will be fulfilled in standard formats within 30 days.
|
|
||||||
Marketing communications can be opted out of at any time.
|
|
||||||
|
|
||||||
CONTACT
|
|
||||||
For privacy concerns, contact: privacy@datasecure.com
|
|
||||||
Data Protection Officer: Jennifer Walsh, jwalsh@datasecure.com"
|
|
||||||
|
|
||||||
GlobalManufacturing_SupplyChain_Contract_Q2_2024,"SUPPLY CHAIN AGREEMENT
|
|
||||||
This Supply Chain Agreement is entered into between GlobalManufacturing Corp (""Buyer"") and PrecisionParts Ltd (""Supplier"") effective April 1, 2024.
|
|
||||||
|
|
||||||
SCOPE OF SERVICES
|
|
||||||
Supplier will provide automotive components including brake assemblies, suspension parts, and electrical harnesses.
|
|
||||||
All products must meet ISO 9001 quality standards and automotive industry specifications.
|
|
||||||
Delivery schedule: Weekly shipments every Tuesday, with 48-hour advance shipping notifications.
|
|
||||||
|
|
||||||
PRICING AND PAYMENT
|
|
||||||
Component pricing is fixed for initial 6-month term with quarterly price review thereafter.
|
|
||||||
Payment terms: Net 45 days from invoice date via electronic transfer.
|
|
||||||
Volume discounts apply: 5% for orders exceeding 10,000 units per month, 8% for orders exceeding 25,000 units.
|
|
||||||
|
|
||||||
QUALITY REQUIREMENTS
|
|
||||||
All components must pass incoming inspection with less than 0.1% defect rate.
|
|
||||||
Supplier maintains quality certifications including IATF 16949 and environmental compliance.
|
|
||||||
Batch tracking and traceability required for all delivered components.
|
|
||||||
|
|
||||||
LOGISTICS AND DELIVERY
|
|
||||||
Supplier responsible for packaging, labeling, and delivery to Buyer's distribution centers.
|
|
||||||
Delivery windows: 8 AM - 4 PM, Monday through Friday, with advance appointment scheduling.
|
|
||||||
Late delivery penalties: 2% of shipment value for each day beyond scheduled delivery.
|
|
||||||
|
|
||||||
RISK MANAGEMENT
|
|
||||||
Supplier maintains business continuity plans and alternative sourcing strategies.
|
|
||||||
Force majeure events must be reported within 24 hours with mitigation plans.
|
|
||||||
Insurance requirements: $5M general liability, $2M product liability coverage.
|
|
||||||
|
|
||||||
INTELLECTUAL PROPERTY
|
|
||||||
All custom tooling and specifications remain property of Buyer.
|
|
||||||
Supplier grants license to use necessary patents for component manufacturing.
|
|
||||||
|
|
||||||
This agreement shall remain in effect for 24 months with automatic renewal unless terminated.
|
|
||||||
|
|
||||||
GlobalManufacturing Corp
|
|
||||||
Michael Rodriguez, Supply Chain Director
|
|
||||||
Date: April 1, 2024
|
|
||||||
|
|
||||||
PrecisionParts Ltd
|
|
||||||
Amanda Foster, VP Sales
|
|
||||||
Date: April 2, 2024"
|
|
||||||
|
|
||||||
EduTech_StudentData_Management_Policy_2024,"STUDENT DATA MANAGEMENT POLICY
|
|
||||||
EduTech Learning Platform - Data Management and Protection Policy
|
|
||||||
Document Version: 2.1
|
|
||||||
Effective Date: August 15, 2024
|
|
||||||
|
|
||||||
SCOPE AND PURPOSE
|
|
||||||
This policy governs the collection, use, storage, and protection of student educational records and personal information.
|
|
||||||
Applies to all employees, contractors, and third-party service providers accessing student data.
|
|
||||||
Compliance with FERPA, COPPA, and state student privacy laws is mandatory.
|
|
||||||
|
|
||||||
DATA CLASSIFICATION
|
|
||||||
Educational Records: Grades, attendance, assignments, and academic progress information.
|
|
||||||
Personal Information: Names, addresses, contact details, and demographic information.
|
|
||||||
Behavioral Data: Learning patterns, platform usage, and engagement metrics.
|
|
||||||
|
|
||||||
COLLECTION PRINCIPLES
|
|
||||||
Data collection is limited to educational purposes and service improvement only.
|
|
||||||
Parental consent required for students under 13 years of age.
|
|
||||||
Students and parents have right to review and request corrections to educational records.
|
|
||||||
|
|
||||||
ACCESS CONTROLS
|
|
||||||
Role-based access ensures personnel see only data necessary for their functions.
|
|
||||||
Multi-factor authentication required for all system access.
|
|
||||||
Access logs maintained and reviewed monthly for unauthorized activity.
|
|
||||||
|
|
||||||
DATA SHARING
|
|
||||||
Educational records shared only with authorized school personnel and parents/students.
|
|
||||||
No data sharing with third parties for commercial purposes without explicit consent.
|
|
||||||
Research data must be de-identified and aggregated before external sharing.
|
|
||||||
|
|
||||||
SECURITY MEASURES
|
|
||||||
Data encrypted using industry-standard protocols during transmission and storage.
|
|
||||||
Regular security assessments and vulnerability testing conducted quarterly.
|
|
||||||
Incident response plan includes notification procedures for data breaches.
|
|
||||||
|
|
||||||
RETENTION AND DISPOSAL
|
|
||||||
Student records retained according to school district policies, typically 5-7 years post-graduation.
|
|
||||||
Inactive accounts and associated data purged after 2 years of non-use.
|
|
||||||
Secure data destruction protocols ensure complete removal of sensitive information.
|
|
||||||
|
|
||||||
COMPLIANCE MONITORING
|
|
||||||
Annual privacy training required for all staff handling student data.
|
|
||||||
Regular audits ensure ongoing compliance with applicable privacy regulations.
|
|
||||||
Privacy impact assessments conducted for new features or data uses.
|
|
||||||
|
|
||||||
Contact: Dr. Lisa Thompson, Chief Privacy Officer
|
|
||||||
Email: privacy@edutech-learning.com
|
|
||||||
Phone: (555) 123-4567"
|
|
||||||
|
|
||||||
FinanceFirst_Investment_Advisory_Agreement_2024,"INVESTMENT ADVISORY AGREEMENT
|
|
||||||
This Investment Advisory Agreement is entered into between FinanceFirst Advisors LLC (""Advisor"") and Madison Investment Group (""Client"") on May 20, 2024.
|
|
||||||
|
|
||||||
ADVISORY SERVICES
|
|
||||||
Advisor will provide comprehensive investment management and financial planning services.
|
|
||||||
Services include portfolio construction, asset allocation, risk assessment, and performance monitoring.
|
|
||||||
Regular portfolio reviews conducted quarterly with detailed performance reporting.
|
|
||||||
|
|
||||||
INVESTMENT AUTHORITY
|
|
||||||
Client grants Advisor discretionary authority to make investment decisions within agreed parameters.
|
|
||||||
Investment universe includes stocks, bonds, ETFs, mutual funds, and alternative investments as appropriate.
|
|
||||||
All trades executed through qualified broker-dealers with best execution practices.
|
|
||||||
|
|
||||||
FEE STRUCTURE
|
|
||||||
Management fee: 1.25% annually on assets under management, calculated and billed quarterly.
|
|
||||||
Performance fee: 15% of returns exceeding S&P 500 benchmark, calculated annually.
|
|
||||||
Additional fees may apply for specialized services such as tax planning or estate planning.
|
|
||||||
|
|
||||||
CLIENT RESPONSIBILITIES
|
|
||||||
Client must provide accurate financial information and promptly communicate changes in circumstances.
|
|
||||||
Investment objectives and risk tolerance should be reviewed and updated annually.
|
|
||||||
Client responsible for reviewing and approving investment policy statement.
|
|
||||||
|
|
||||||
RISK DISCLOSURE
|
|
||||||
All investments carry risk of loss, and past performance does not guarantee future results.
|
|
||||||
Diversification does not ensure profit or protect against loss in declining markets.
|
|
||||||
Alternative investments may have limited liquidity and higher volatility.
|
|
||||||
|
|
||||||
REGULATORY COMPLIANCE
|
|
||||||
Advisor is registered with the Securities and Exchange Commission as an investment advisor.
|
|
||||||
All activities conducted in accordance with Investment Advisers Act of 1940 and applicable regulations.
|
|
||||||
Form ADV Part 2 brochure provided annually with material updates.
|
|
||||||
|
|
||||||
CONFIDENTIALITY
|
|
||||||
All client information treated as confidential and shared only as necessary for service provision.
|
|
||||||
Third-party service providers bound by confidentiality agreements.
|
|
||||||
Client data protected through secure systems and access controls.
|
|
||||||
|
|
||||||
TERMINATION
|
|
||||||
Either party may terminate agreement with 30 days written notice.
|
|
||||||
Upon termination, Advisor will assist with orderly transfer of assets to new custodian or advisor.
|
|
||||||
Final fee calculation prorated to date of termination.
|
|
||||||
|
|
||||||
FinanceFirst Advisors LLC
|
|
||||||
Thomas Anderson, Managing Partner
|
|
||||||
Date: May 20, 2024
|
|
||||||
|
|
||||||
Madison Investment Group
|
|
||||||
Rebecca Martinez, Chief Investment Officer
|
|
||||||
Date: May 21, 2024"
|
|
||||||
|
|
||||||
HealthSystem_PatientCare_Standards_2024,"PATIENT CARE STANDARDS AND PROTOCOLS
|
|
||||||
Metropolitan Health System - Clinical Care Standards
|
|
||||||
Document ID: MHS-PCS-2024-001
|
|
||||||
Effective Date: June 1, 2024
|
|
||||||
|
|
||||||
PATIENT SAFETY PROTOCOLS
|
|
||||||
All patients must have proper identification verification using two unique identifiers.
|
|
||||||
Medication administration requires independent double-check for high-risk medications.
|
|
||||||
Fall risk assessments completed within 4 hours of admission with appropriate interventions.
|
|
||||||
|
|
||||||
CLINICAL DOCUMENTATION
|
|
||||||
Medical records must be completed within 24 hours of patient encounter.
|
|
||||||
All entries require electronic signature with timestamp and provider identification.
|
|
||||||
Critical values and abnormal results must be communicated and documented immediately.
|
|
||||||
|
|
||||||
INFECTION CONTROL
|
|
||||||
Hand hygiene compliance monitored with target rate of 95% or higher.
|
|
||||||
Personal protective equipment used according to transmission-based precautions.
|
|
||||||
Isolation procedures implemented within 2 hours of identification of infectious conditions.
|
|
||||||
|
|
||||||
EMERGENCY RESPONSE
|
|
||||||
Code team response time target: 3 minutes from activation to arrival.
|
|
||||||
Crash cart and emergency equipment checks performed daily and documented.
|
|
||||||
All staff required to maintain current CPR and emergency response certifications.
|
|
||||||
|
|
||||||
PATIENT COMMUNICATION
|
|
||||||
Patient rights and responsibilities communicated upon admission.
|
|
||||||
Informed consent obtained and documented prior to procedures and treatments.
|
|
||||||
Family involvement encouraged with respect for patient privacy preferences.
|
|
||||||
|
|
||||||
QUALITY MEASURES
|
|
||||||
Patient satisfaction scores monitored monthly with target of 4.5/5.0 or higher.
|
|
||||||
Medication error rates tracked with goal of less than 1 per 1000 patient days.
|
|
||||||
Hospital-acquired infection rates measured and benchmarked against national standards.
|
|
||||||
|
|
||||||
STAFF COMPETENCY
|
|
||||||
Annual competency assessments required for all clinical staff.
|
|
||||||
Continuing education requirements: 24 hours annually for nurses, 40 hours for physicians.
|
|
||||||
Specialty certifications maintained according to department and role requirements.
|
|
||||||
|
|
||||||
TECHNOLOGY STANDARDS
|
|
||||||
Electronic health record system used for all patient documentation.
|
|
||||||
Telemedicine capabilities available for remote consultations and monitoring.
|
|
||||||
Clinical decision support tools integrated to assist with diagnosis and treatment decisions.
|
|
||||||
|
|
||||||
Contact: Dr. Patricia Williams, Chief Medical Officer
|
|
||||||
Email: pwilliams@metrohealthsystem.org
|
|
||||||
Phone: (555) 987-6543"
|
|
||||||
|
|
|
@ -1,38 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
WAIT_FOR_PIDS=()
|
|
||||||
|
|
||||||
log() {
|
|
||||||
timestamp=$(python3 -c 'from datetime import datetime; print(datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:23])')
|
|
||||||
message="$*"
|
|
||||||
echo "$timestamp - $message"
|
|
||||||
}
|
|
||||||
|
|
||||||
cleanup() {
|
|
||||||
log "Caught signal, terminating all user processes ..."
|
|
||||||
for PID in "${WAIT_FOR_PIDS[@]}"; do
|
|
||||||
if kill $PID 2> /dev/null; then
|
|
||||||
log "killed process: $PID"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
trap cleanup EXIT
|
|
||||||
|
|
||||||
log "Starting query_parser agent on port 10500..."
|
|
||||||
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10500 --agent query_parser &
|
|
||||||
WAIT_FOR_PIDS+=($!)
|
|
||||||
|
|
||||||
log "Starting context_builder agent on port 10501..."
|
|
||||||
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10501 --agent context_builder &
|
|
||||||
WAIT_FOR_PIDS+=($!)
|
|
||||||
|
|
||||||
log "Starting response_generator agent on port 10502..."
|
|
||||||
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10502 --agent response_generator &
|
|
||||||
WAIT_FOR_PIDS+=($!)
|
|
||||||
|
|
||||||
for PID in "${WAIT_FOR_PIDS[@]}"; do
|
|
||||||
wait "$PID"
|
|
||||||
done
|
|
||||||
|
|
@ -1,67 +0,0 @@
|
||||||
@baseUrl = http://0.0.0.0:10502
|
|
||||||
@model = gpt-4o
|
|
||||||
|
|
||||||
# Health Check
|
|
||||||
GET {{baseUrl}}/health
|
|
||||||
|
|
||||||
###
|
|
||||||
|
|
||||||
# Test 1: Simple Non-Streaming Chat Completion
|
|
||||||
POST {{baseUrl}}/v1/chat/completions
|
|
||||||
Content-Type: application/json
|
|
||||||
|
|
||||||
{
|
|
||||||
"model": "{{model}}",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "Hello! Can you help me understand what machine learning is?"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
###
|
|
||||||
|
|
||||||
# Test 2: Simple Streaming Chat Completion
|
|
||||||
POST {{baseUrl}}/v1/chat/completions
|
|
||||||
Content-Type: application/json
|
|
||||||
|
|
||||||
{
|
|
||||||
"model": "{{model}}",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "Explain the concept of artificial intelligence in simple terms."
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"stream": true
|
|
||||||
}
|
|
||||||
|
|
||||||
### Test 3
|
|
||||||
POST http://localhost:8001/v1/chat/completions
|
|
||||||
Content-Type: application/json
|
|
||||||
|
|
||||||
{
|
|
||||||
"model": "{{model}}",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"stream": false
|
|
||||||
}
|
|
||||||
|
|
||||||
### send request to context builder agent
|
|
||||||
POST http://localhost:10501/v1/chat/completions
|
|
||||||
Content-Type: application/json
|
|
||||||
|
|
||||||
{
|
|
||||||
"model": "gpt-4o-mini",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
1356
demos/use_cases/rag_agent/uv.lock
generated
1356
demos/use_cases/rag_agent/uv.lock
generated
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue