mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
delete rag agent demo
This commit is contained in:
parent
d3595aeee1
commit
eed776fc94
15 changed files with 0 additions and 2765 deletions
|
|
@ -1,28 +0,0 @@
|
|||
# RAG Agent Query Parser
|
||||
|
||||
A FastAPI service that rewrites user queries using archgw and gpt-4o-mini for better retrieval accuracy.
|
||||
|
||||
## How it Works
|
||||
|
||||
1. Receives a chat completion request with conversation history
|
||||
2. Calls archgw's LLM gateway with gpt-4o-mini to rewrite the last user query
|
||||
3. Returns the rewritten query as the assistant response
|
||||
|
||||
## Setup and Running
|
||||
|
||||
1. **Start archgw**:
|
||||
```bash
|
||||
archgw up --foreground
|
||||
```
|
||||
|
||||
2. **Start the query parser service**:
|
||||
```bash
|
||||
uv run python -m rag_agent.query_parser
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
```bash
|
||||
# archgw LLM Gateway base URL (default: http://localhost:12000/v1)
|
||||
export LLM_GATEWAY_ENDPOINT="http://localhost:12000/v1"
|
||||
```
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
version: v0.3.0
|
||||
|
||||
agents:
|
||||
- id: query_rewriter
|
||||
url: http://host.docker.internal:10500/v1/chat/completions
|
||||
- id: context_builder
|
||||
url: http://host.docker.internal:10501/v1/chat/completions
|
||||
- id: rag_agent
|
||||
url: http://host.docker.internal:10502/v1/chat/completions
|
||||
- id: research_agent
|
||||
url: http://host.docker.internal:10503/v1/chat/completions
|
||||
- id: weather_forecast_agent
|
||||
url: http://host.docker.internal:10504/process
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
- model: ollama/llama3.1
|
||||
base_url: http://host.docker.internal:11434
|
||||
|
||||
model_aliases:
|
||||
fast-llm:
|
||||
target: gpt-4o-mini
|
||||
smart-llm:
|
||||
target: gpt-4o
|
||||
|
||||
listeners:
|
||||
- type: agent
|
||||
name: agent_1
|
||||
port: 8001
|
||||
router: arch_agent_router
|
||||
agents:
|
||||
- id: rag_agent
|
||||
description: virtual assistant for device contracts for simple queries
|
||||
filter_chain:
|
||||
- query_rewriter
|
||||
- context_builder
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
services:
|
||||
jaeger:
|
||||
build:
|
||||
context: ../../shared/jaeger
|
||||
ports:
|
||||
- "16686:16686"
|
||||
- "4317:4317"
|
||||
- "4318:4318"
|
||||
open-web-ui:
|
||||
image: dyrnq/open-webui:main
|
||||
restart: always
|
||||
ports:
|
||||
- "8080:8080"
|
||||
environment:
|
||||
- DEFAULT_MODEL=gpt-4o-mini
|
||||
- ENABLE_OPENAI_API=true
|
||||
- OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
[project]
|
||||
name = "rag_agent"
|
||||
version = "0.1.0"
|
||||
description = "RAG Agent"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"click>=8.2.1",
|
||||
"mcp>=1.13.1",
|
||||
"fastmcp>=2.12.2",
|
||||
"pydantic>=2.11.7",
|
||||
"fastapi>=0.104.1",
|
||||
"uvicorn>=0.24.0",
|
||||
"openai>=1.0.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
rag_agent = "rag_agent:main"
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
# Sample Queries for Knowledge Base RAG Agent
|
||||
|
||||
## Service Level Agreement Queries
|
||||
- What is the guaranteed uptime percentage for TechCorp's cloud services?
|
||||
- What remedies are available if the API response time exceeds the agreed threshold?
|
||||
- How quickly must TechCorp respond to critical support issues?
|
||||
- What monitoring and reporting requirements are specified in the SLA?
|
||||
- When was the TechCorp service agreement signed and by whom?
|
||||
|
||||
## Privacy Policy Queries
|
||||
- What encryption methods does DataSecure use to protect data?
|
||||
- How long does DataSecure retain personal data after account deletion?
|
||||
- What rights do users have regarding their personal information?
|
||||
- Can DataSecure sell user data to third parties for marketing?
|
||||
- Who should be contacted for privacy-related concerns at DataSecure?
|
||||
|
||||
## Supply Chain Agreement Queries
|
||||
- What types of automotive components does PrecisionParts supply?
|
||||
- What are the payment terms and volume discount structure?
|
||||
- What quality standards must the supplied components meet?
|
||||
- What are the penalties for late delivery?
|
||||
- What insurance coverage requirements apply to the supplier?
|
||||
|
||||
## Student Data Management Queries
|
||||
- What federal laws must EduTech comply with regarding student data?
|
||||
- What security measures are in place to protect student information?
|
||||
- How long are student records retained after graduation?
|
||||
- What consent is required for students under 13 years old?
|
||||
- Who can access student educational records?
|
||||
|
||||
## Investment Advisory Queries
|
||||
- What is FinanceFirst's management fee structure?
|
||||
- What types of investments are included in the advisory services?
|
||||
- What regulatory body oversees FinanceFirst Advisors?
|
||||
- How often are portfolio reviews conducted?
|
||||
- What are the client's responsibilities under this agreement?
|
||||
|
||||
## Healthcare Standards Queries
|
||||
- What is the target response time for emergency code teams?
|
||||
- What hand hygiene compliance rate is required?
|
||||
- How quickly must medical records be completed after patient encounters?
|
||||
- What continuing education requirements apply to nursing staff?
|
||||
- What patient safety protocols are mandatory upon admission?
|
||||
|
||||
## Cross-Document Queries
|
||||
- Which agreements include confidentiality or data protection provisions?
|
||||
- What are the common termination notice periods across different contract types?
|
||||
- Which documents specify insurance or liability coverage requirements?
|
||||
- What compliance and regulatory requirements are mentioned across agreements?
|
||||
- Which contracts include performance metrics or service level commitments?
|
||||
|
||||
## Complex Analysis Queries
|
||||
- Compare the data retention policies across the privacy policy and student data management documents.
|
||||
- What are the different approaches to risk management across the supply chain and investment advisory agreements?
|
||||
- How do the security measures in the healthcare standards compare to those in the privacy policy?
|
||||
- Which agreements provide the most detailed compliance and regulatory frameworks?
|
||||
- What common themes exist in the quality assurance requirements across different industries?
|
||||
|
||||
## Document-Specific Detail Queries
|
||||
- List all the specific percentages, timeframes, and numerical requirements mentioned in the SLA.
|
||||
- What are all the contact persons and their roles mentioned across the documents?
|
||||
- Identify all the compliance standards and certifications referenced in the supply chain agreement.
|
||||
- What are the specific consequences or penalties mentioned for non-compliance across agreements?
|
||||
- List all the third-party systems, tools, or services mentioned in the documents.
|
||||
|
|
@ -1,63 +0,0 @@
|
|||
import click
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
|
||||
mcp = None
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--transport", "transport", default="stdio")
|
||||
@click.option("--host", "host", default="localhost")
|
||||
@click.option("--port", "port", default=10101)
|
||||
@click.option("--agent", "agent", default=None)
|
||||
@click.option(
|
||||
"--rest-server",
|
||||
"rest_server",
|
||||
is_flag=True,
|
||||
help="Start REST server instead of MCP server",
|
||||
)
|
||||
@click.option("--rest-port", "rest_port", default=8000, help="Port for REST server")
|
||||
def main(host, port, agent, transport, rest_server, rest_port):
|
||||
if rest_server:
|
||||
print(f"Starting REST server on {host}:{rest_port} for agent: {agent}")
|
||||
|
||||
if agent == "query_parser":
|
||||
from rag_agent.query_rewriter_agent import start_server
|
||||
|
||||
start_server(host=host, port=rest_port)
|
||||
return
|
||||
elif agent == "context_builder":
|
||||
from rag_agent.context_builder_agent import (
|
||||
start_server,
|
||||
)
|
||||
|
||||
start_server(host=host, port=rest_port)
|
||||
return
|
||||
elif agent == "response_generator":
|
||||
from rag_agent.response_generator_agent import start_server
|
||||
|
||||
start_server(host=host, port=rest_port)
|
||||
return
|
||||
else:
|
||||
print("Please specify an agent to start with --agent option.")
|
||||
return
|
||||
|
||||
print(f"Starting agent(s): {agent if agent else 'all'}")
|
||||
global mcp
|
||||
mcp = FastMCP("RAG Agent Demo", host=host, port=port)
|
||||
|
||||
if agent == "query_parser":
|
||||
import rag_agent.query_parser
|
||||
elif agent == "document_store":
|
||||
import rag_agent.document_store
|
||||
elif agent == "response_generator":
|
||||
import rag_agent.response_generator
|
||||
else:
|
||||
import rag_agent.query_parser
|
||||
import rag_agent.document_store
|
||||
import rag_agent.response_generator
|
||||
print("All agents loaded.")
|
||||
mcp.run(transport=transport)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
from . import main
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
from pydantic import BaseModel
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
|
||||
class ChatMessage(BaseModel):
|
||||
role: str
|
||||
content: str
|
||||
|
||||
|
||||
class ChatCompletionRequest(BaseModel):
|
||||
model: str
|
||||
messages: List[ChatMessage]
|
||||
temperature: Optional[float] = 1.0
|
||||
max_tokens: Optional[int] = None
|
||||
top_p: Optional[float] = 1.0
|
||||
frequency_penalty: Optional[float] = 0.0
|
||||
presence_penalty: Optional[float] = 0.0
|
||||
stream: Optional[bool] = False
|
||||
stop: Optional[List[str]] = None
|
||||
|
||||
|
||||
class ChatCompletionResponse(BaseModel):
|
||||
id: str
|
||||
object: str = "chat.completion"
|
||||
created: int
|
||||
model: str
|
||||
choices: List[Dict[str, Any]]
|
||||
usage: Dict[str, int]
|
||||
|
||||
|
||||
class ChatCompletionStreamResponse(BaseModel):
|
||||
id: str
|
||||
object: str = "chat.completion.chunk"
|
||||
created: int
|
||||
model: str
|
||||
choices: List[Dict[str, Any]]
|
||||
|
|
@ -1,280 +0,0 @@
|
|||
import json
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional, Dict, Any
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from openai import AsyncOpenAI
|
||||
import os
|
||||
import logging
|
||||
import csv
|
||||
from pathlib import Path
|
||||
import uvicorn
|
||||
|
||||
from .api import ChatMessage, ChatCompletionRequest, ChatCompletionResponse
|
||||
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - [CONTEXT_BUILDER] - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Configuration for archgw LLM gateway
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
||||
RAG_MODEL = "gpt-4o-mini"
|
||||
|
||||
# Initialize OpenAI client for archgw
|
||||
archgw_client = AsyncOpenAI(
|
||||
base_url=LLM_GATEWAY_ENDPOINT,
|
||||
api_key="EMPTY", # archgw doesn't require a real API key
|
||||
)
|
||||
|
||||
# Global variable to store the knowledge base
|
||||
knowledge_base = []
|
||||
|
||||
|
||||
def load_knowledge_base():
|
||||
"""Load the sample_knowledge_base.csv file into memory on startup."""
|
||||
global knowledge_base
|
||||
|
||||
# Get the path to the CSV file relative to this script
|
||||
current_dir = Path(__file__).parent
|
||||
csv_path = current_dir / "sample_knowledge_base.csv"
|
||||
|
||||
print(f"Loading knowledge base from {csv_path}")
|
||||
|
||||
try:
|
||||
knowledge_base = []
|
||||
with open(csv_path, "r", encoding="utf-8-sig") as file:
|
||||
csv_reader = csv.DictReader(file)
|
||||
for row in csv_reader:
|
||||
knowledge_base.append({"path": row["path"], "content": row["content"]})
|
||||
|
||||
logger.info(f"Loaded {len(knowledge_base)} documents from knowledge base")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading knowledge base: {e}")
|
||||
knowledge_base = []
|
||||
|
||||
|
||||
async def find_relevant_passages(
|
||||
query: str, traceparent: Optional[str] = None, top_k: int = 3
|
||||
) -> List[Dict[str, str]]:
|
||||
"""Use the LLM to find the most relevant passages from the knowledge base."""
|
||||
|
||||
if not knowledge_base:
|
||||
logger.warning("Knowledge base is empty")
|
||||
return []
|
||||
|
||||
# Create a system prompt for passage selection
|
||||
system_prompt = f"""You are a retrieval assistant that selects the most relevant document passages for a given query.
|
||||
|
||||
Given a user query and a list of document passages, identify the {top_k} most relevant passages that would help answer the query.
|
||||
|
||||
Query: {query}
|
||||
|
||||
Available passages:
|
||||
"""
|
||||
|
||||
# Add all passages with indices
|
||||
for i, doc in enumerate(knowledge_base):
|
||||
system_prompt += (
|
||||
f"\n[{i}] Path: {doc['path']}\nContent: {doc['content'][:500]}...\n"
|
||||
)
|
||||
|
||||
system_prompt += f"""
|
||||
|
||||
Please respond with ONLY the indices of the {top_k} most relevant passages, separated by commas (e.g., "0,3,7").
|
||||
If fewer than {top_k} passages are relevant, return only the relevant ones.
|
||||
If no passages are relevant, return "NONE"."""
|
||||
|
||||
try:
|
||||
# Call archgw to select relevant passages
|
||||
logger.info(f"Calling archgw to find relevant passages for query: '{query}'")
|
||||
|
||||
# Prepare extra headers if traceparent is provided
|
||||
extra_headers = {"x-envoy-max-retries": "3"}
|
||||
if traceparent:
|
||||
extra_headers["traceparent"] = traceparent
|
||||
|
||||
response = await archgw_client.chat.completions.create(
|
||||
model=RAG_MODEL,
|
||||
messages=[{"role": "system", "content": system_prompt}],
|
||||
temperature=0.1,
|
||||
max_tokens=50,
|
||||
extra_headers=extra_headers,
|
||||
)
|
||||
|
||||
result = response.choices[0].message.content.strip()
|
||||
logger.info(f"LLM selected passages: {result}")
|
||||
|
||||
# Parse the indices
|
||||
if result.upper() == "NONE":
|
||||
return []
|
||||
|
||||
selected_passages = []
|
||||
indices = [
|
||||
int(idx.strip()) for idx in result.split(",") if idx.strip().isdigit()
|
||||
]
|
||||
|
||||
for idx in indices:
|
||||
if 0 <= idx < len(knowledge_base):
|
||||
selected_passages.append(knowledge_base[idx])
|
||||
|
||||
logger.info(f"Selected {len(selected_passages)} relevant passages")
|
||||
return selected_passages
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error finding relevant passages: {e}")
|
||||
return []
|
||||
|
||||
|
||||
async def augment_query_with_context(
|
||||
messages: List[ChatMessage], traceparent: Optional[str] = None
|
||||
) -> List[ChatMessage]:
|
||||
"""Extract user query, find relevant context, and augment the messages."""
|
||||
|
||||
# Find the last user message
|
||||
last_user_message = None
|
||||
last_user_index = -1
|
||||
|
||||
for i in range(len(messages) - 1, -1, -1):
|
||||
if messages[i].role == "user":
|
||||
last_user_message = messages[i].content
|
||||
last_user_index = i
|
||||
break
|
||||
|
||||
if not last_user_message:
|
||||
logger.warning("No user message found in conversation")
|
||||
return messages
|
||||
|
||||
logger.info(f"Processing user query: '{last_user_message}'")
|
||||
|
||||
# Find relevant passages
|
||||
relevant_passages = await find_relevant_passages(last_user_message, traceparent)
|
||||
|
||||
if not relevant_passages:
|
||||
logger.info("No relevant passages found, returning original messages")
|
||||
return messages
|
||||
|
||||
# Build context from relevant passages
|
||||
context_parts = []
|
||||
for i, passage in enumerate(relevant_passages):
|
||||
context_parts.append(
|
||||
f"Document {i+1} ({passage['path']}):\n{passage['content']}"
|
||||
)
|
||||
|
||||
context = "\n\n".join(context_parts)
|
||||
|
||||
# Create augmented content with original query and context
|
||||
augmented_content = f"""{last_user_message} RELEVANT CONTEXT:
|
||||
{context}"""
|
||||
|
||||
# Create updated messages with the augmented query
|
||||
updated_messages = messages.copy()
|
||||
updated_messages[last_user_index] = ChatMessage(
|
||||
role="user", content=augmented_content
|
||||
)
|
||||
|
||||
logger.info(f"Augmented user query with {len(relevant_passages)} relevant passages")
|
||||
|
||||
return updated_messages
|
||||
|
||||
|
||||
class Response(BaseModel):
|
||||
query: str
|
||||
metadata: dict
|
||||
|
||||
|
||||
# FastAPI app for REST server
|
||||
app = FastAPI(title="RAG Content Builder Agent", version="1.0.0")
|
||||
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
async def chat_completions(
|
||||
request_body: ChatCompletionRequest, request: Request
|
||||
) -> ChatCompletionResponse:
|
||||
"""Chat completions endpoint that augments user queries with relevant context from the knowledge base."""
|
||||
import time
|
||||
import uuid
|
||||
|
||||
logger.info(
|
||||
f"Received chat completion request with {len(request_body.messages)} messages"
|
||||
)
|
||||
|
||||
# Read traceparent header if present
|
||||
traceparent_header = request.headers.get("traceparent")
|
||||
if traceparent_header:
|
||||
logger.info(f"Received traceparent header: {traceparent_header}")
|
||||
else:
|
||||
logger.info("No traceparent header found")
|
||||
|
||||
# Augment the user query with relevant context
|
||||
updated_messages = await augment_query_with_context(
|
||||
request_body.messages, traceparent_header
|
||||
)
|
||||
messages_history_json = json.dumps([msg.dict() for msg in updated_messages])
|
||||
|
||||
response = ChatCompletionResponse(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
||||
created=int(time.time()),
|
||||
model=request_body.model,
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"message": {"role": "user", "content": messages_history_json},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
usage={
|
||||
"prompt_tokens": sum(len(msg.content.split()) for msg in updated_messages),
|
||||
"completion_tokens": len("Context added to user query.".split()),
|
||||
"total_tokens": sum(len(msg.content.split()) for msg in updated_messages)
|
||||
+ len("Context added to user query.".split()),
|
||||
},
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to initialize the knowledge base and start the server."""
|
||||
load_knowledge_base()
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
def start_server(host: str = "localhost", port: int = 8000):
|
||||
"""Start the REST server."""
|
||||
load_knowledge_base()
|
||||
# Rename the uvicorn.error logger
|
||||
uvicorn.run(
|
||||
app,
|
||||
host=host,
|
||||
port=port,
|
||||
log_config={
|
||||
"version": 1,
|
||||
"disable_existing_loggers": False,
|
||||
"formatters": {
|
||||
"default": {
|
||||
"format": "%(asctime)s - [CONTEXT_BUILDER] - %(levelname)s - %(message)s",
|
||||
},
|
||||
},
|
||||
"handlers": {
|
||||
"default": {
|
||||
"formatter": "default",
|
||||
"class": "logging.StreamHandler",
|
||||
"stream": "ext://sys.stdout",
|
||||
},
|
||||
},
|
||||
"root": {
|
||||
"level": "INFO",
|
||||
"handlers": ["default"],
|
||||
},
|
||||
},
|
||||
)
|
||||
|
|
@ -1,188 +0,0 @@
|
|||
import json
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional, Dict, Any
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from openai import AsyncOpenAI
|
||||
import os
|
||||
import logging
|
||||
import uvicorn
|
||||
|
||||
from .api import ChatMessage, ChatCompletionRequest, ChatCompletionResponse
|
||||
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - [QUERY_REWRITER] - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Configuration for archgw LLM gateway
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
||||
QUERY_REWRITE_MODEL = "gpt-4o-mini"
|
||||
|
||||
# Initialize OpenAI client for archgw
|
||||
archgw_client = AsyncOpenAI(
|
||||
base_url=LLM_GATEWAY_ENDPOINT,
|
||||
api_key="EMPTY", # archgw doesn't require a real API key
|
||||
)
|
||||
|
||||
|
||||
async def rewrite_query_with_archgw(
|
||||
messages: List[ChatMessage], traceparent_header: str
|
||||
) -> str:
|
||||
# Prepare the system prompt for query rewriting
|
||||
system_prompt = """You are a query rewriter that improves user queries for better retrieval.
|
||||
|
||||
Given a conversation history, rewrite the last user message to be more specific and context-aware.
|
||||
The rewritten query should:
|
||||
1. Include relevant context from previous messages
|
||||
2. Be clear and specific for information retrieval
|
||||
3. Maintain the user's intent
|
||||
4. Be concise but comprehensive
|
||||
|
||||
Return only the rewritten query, nothing else."""
|
||||
|
||||
# Prepare messages for the query rewriter - just add system prompt to existing messages
|
||||
rewrite_messages = [{"role": "system", "content": system_prompt}]
|
||||
|
||||
# Add conversation history
|
||||
for msg in messages:
|
||||
rewrite_messages.append({"role": msg.role, "content": msg.content})
|
||||
|
||||
try:
|
||||
# Call archgw using OpenAI client
|
||||
extra_headers = {"x-envoy-max-retries": "3"}
|
||||
if traceparent_header:
|
||||
extra_headers["traceparent"] = traceparent_header
|
||||
logger.info(f"Calling archgw at {LLM_GATEWAY_ENDPOINT} to rewrite query")
|
||||
response = await archgw_client.chat.completions.create(
|
||||
model=QUERY_REWRITE_MODEL,
|
||||
messages=rewrite_messages,
|
||||
temperature=0.3,
|
||||
max_tokens=200,
|
||||
extra_headers=extra_headers,
|
||||
)
|
||||
|
||||
rewritten_query = response.choices[0].message.content.strip()
|
||||
logger.info(f"Query rewritten successfully: '{rewritten_query}'")
|
||||
return rewritten_query
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error rewriting query: {e}")
|
||||
|
||||
# If rewriting fails, return the original last user message
|
||||
logger.info("Falling back to original user message")
|
||||
for message in reversed(messages):
|
||||
if message.role == "user":
|
||||
return message.content
|
||||
return ""
|
||||
|
||||
|
||||
class Response(BaseModel):
|
||||
query: str
|
||||
metadata: dict
|
||||
|
||||
|
||||
# FastAPI app for REST server
|
||||
app = FastAPI(title="RAG Agent Query Parser", version="1.0.0")
|
||||
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
async def chat_completions(request_body: ChatCompletionRequest, request: Request):
|
||||
"""Chat completions endpoint that rewrites the last user query using archgw."""
|
||||
import time
|
||||
import uuid
|
||||
|
||||
logger.info(
|
||||
f"Received chat completion request with {len(request_body.messages)} messages"
|
||||
)
|
||||
|
||||
# Read traceparent header if present
|
||||
traceparent_header = request.headers.get("traceparent")
|
||||
if traceparent_header:
|
||||
logger.info(f"Received traceparent header: {traceparent_header}")
|
||||
else:
|
||||
logger.info("No traceparent header found")
|
||||
|
||||
# Call archgw to rewrite the last user query
|
||||
rewritten_query = await rewrite_query_with_archgw(
|
||||
request_body.messages, traceparent_header
|
||||
)
|
||||
|
||||
# Create updated messages with the rewritten query
|
||||
updated_messages = request_body.messages.copy()
|
||||
|
||||
# Find and update the last user message with the rewritten query
|
||||
for i in range(len(updated_messages) - 1, -1, -1):
|
||||
if updated_messages[i].role == "user":
|
||||
original_query = updated_messages[i].content
|
||||
updated_messages[i] = ChatMessage(role="user", content=rewritten_query)
|
||||
logger.info(
|
||||
f"Updated user query from '{original_query}' to '{rewritten_query}'"
|
||||
)
|
||||
break
|
||||
|
||||
messages_history_json = json.dumps([msg.dict() for msg in updated_messages])
|
||||
|
||||
response = ChatCompletionResponse(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
||||
created=int(time.time()),
|
||||
model=request_body.model,
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"message": {"role": "user", "content": messages_history_json},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
usage={
|
||||
"prompt_tokens": sum(len(msg.content.split()) for msg in updated_messages),
|
||||
"completion_tokens": len("Updated query for better retrieval.".split()),
|
||||
"total_tokens": sum(len(msg.content.split()) for msg in updated_messages)
|
||||
+ len("Updated query for better retrieval.".split()),
|
||||
},
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint."""
|
||||
return {"status": "healthy"}
|
||||
|
||||
|
||||
def parse_query(query):
|
||||
"""Parse the user query and returns metadata extracted from query."""
|
||||
return Response(query=query, metadata={"is_valid": True})
|
||||
|
||||
|
||||
def start_server(host: str = "localhost", port: int = 8000):
|
||||
"""Start the REST server."""
|
||||
uvicorn.run(
|
||||
app,
|
||||
host=host,
|
||||
port=port,
|
||||
log_config={
|
||||
"version": 1,
|
||||
"disable_existing_loggers": False,
|
||||
"formatters": {
|
||||
"default": {
|
||||
"format": "%(asctime)s - [QUERY_REWRITER] - %(levelname)s - %(message)s",
|
||||
},
|
||||
},
|
||||
"handlers": {
|
||||
"default": {
|
||||
"formatter": "default",
|
||||
"class": "logging.StreamHandler",
|
||||
"stream": "ext://sys.stdout",
|
||||
},
|
||||
},
|
||||
"root": {
|
||||
"level": "INFO",
|
||||
"handlers": ["default"],
|
||||
},
|
||||
},
|
||||
)
|
||||
|
|
@ -1,302 +0,0 @@
|
|||
import json
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import StreamingResponse
|
||||
from openai import AsyncOpenAI
|
||||
import os
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
import uvicorn
|
||||
import asyncio
|
||||
|
||||
from .api import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionStreamResponse,
|
||||
)
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - [RESPONSE_GENERATOR] - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration for archgw LLM gateway
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
||||
RESPONSE_MODEL = "gpt-4o"
|
||||
|
||||
# System prompt for response generation
|
||||
SYSTEM_PROMPT = """You are a helpful assistant that generates coherent, contextual responses.
|
||||
|
||||
Given a conversation history, generate a helpful and relevant response based on all the context available in the messages.
|
||||
Your response should:
|
||||
1. Be contextually aware of the entire conversation
|
||||
2. Address the user's needs appropriately
|
||||
3. Be helpful and informative
|
||||
4. Maintain a natural conversational tone
|
||||
|
||||
Generate a complete response to assist the user."""
|
||||
|
||||
# Initialize OpenAI client for archgw
|
||||
archgw_client = AsyncOpenAI(
|
||||
base_url=LLM_GATEWAY_ENDPOINT,
|
||||
api_key="EMPTY", # archgw doesn't require a real API key
|
||||
)
|
||||
|
||||
# FastAPI app for REST server
|
||||
app = FastAPI(title="RAG Agent Response Generator", version="1.0.0")
|
||||
|
||||
|
||||
def prepare_response_messages(request_body: ChatCompletionRequest):
|
||||
"""Prepare messages for response generation by adding system prompt."""
|
||||
response_messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
||||
|
||||
# Add conversation history
|
||||
for msg in request_body.messages:
|
||||
response_messages.append({"role": msg.role, "content": msg.content})
|
||||
|
||||
return response_messages
|
||||
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
async def chat_completions(request_body: ChatCompletionRequest, request: Request):
|
||||
"""Chat completions endpoint that generates a coherent response based on all context."""
|
||||
logger.info(
|
||||
f"Received chat completion request with {len(request_body.messages)} messages"
|
||||
)
|
||||
|
||||
# Read traceparent header if present
|
||||
traceparent_header = request.headers.get("traceparent")
|
||||
if traceparent_header:
|
||||
logger.info(f"Received traceparent header: {traceparent_header}")
|
||||
else:
|
||||
logger.info("No traceparent header found")
|
||||
|
||||
# Check if streaming is requested
|
||||
if request_body.stream:
|
||||
return StreamingResponse(
|
||||
stream_chat_completions(request_body, traceparent_header),
|
||||
media_type="text/plain",
|
||||
headers={
|
||||
"content-type": "text/event-stream",
|
||||
},
|
||||
)
|
||||
else:
|
||||
return await non_streaming_chat_completions(request_body, traceparent_header)
|
||||
|
||||
|
||||
async def stream_chat_completions(
|
||||
request_body: ChatCompletionRequest, traceparent_header: str = None
|
||||
):
|
||||
"""Generate streaming chat completions."""
|
||||
# Prepare messages for response generation
|
||||
response_messages = prepare_response_messages(request_body)
|
||||
|
||||
try:
|
||||
# Call archgw using OpenAI client for streaming
|
||||
logger.info(
|
||||
f"Calling archgw at {LLM_GATEWAY_ENDPOINT} to generate streaming response"
|
||||
)
|
||||
|
||||
# Prepare extra headers if traceparent is provided
|
||||
extra_headers = {"x-envoy-max-retries": "3"}
|
||||
if traceparent_header:
|
||||
extra_headers["traceparent"] = traceparent_header
|
||||
|
||||
response_stream = await archgw_client.chat.completions.create(
|
||||
model=RESPONSE_MODEL,
|
||||
messages=response_messages,
|
||||
temperature=request_body.temperature or 0.7,
|
||||
max_tokens=request_body.max_tokens or 1000,
|
||||
stream=True,
|
||||
extra_headers=extra_headers,
|
||||
)
|
||||
|
||||
completion_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
|
||||
created_time = int(time.time())
|
||||
collected_content = []
|
||||
|
||||
async for chunk in response_stream:
|
||||
if chunk.choices and chunk.choices[0].delta.content:
|
||||
content = chunk.choices[0].delta.content
|
||||
collected_content.append(content)
|
||||
|
||||
# Create streaming response chunk
|
||||
stream_chunk = ChatCompletionStreamResponse(
|
||||
id=completion_id,
|
||||
created=created_time,
|
||||
model=request_body.model,
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {"content": content},
|
||||
"finish_reason": None,
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
yield f"data: {stream_chunk.model_dump_json()}\n\n"
|
||||
|
||||
# Send final chunk with complete response in expected format
|
||||
full_response = "".join(collected_content)
|
||||
updated_history = [{"role": "assistant", "content": full_response}]
|
||||
|
||||
final_chunk = ChatCompletionStreamResponse(
|
||||
id=completion_id,
|
||||
created=created_time,
|
||||
model=request_body.model,
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {},
|
||||
"finish_reason": "stop",
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": json.dumps(updated_history),
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
yield f"data: {final_chunk.model_dump_json()}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating streaming response: {e}")
|
||||
|
||||
# Send error as streaming response
|
||||
error_chunk = ChatCompletionStreamResponse(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
||||
created=int(time.time()),
|
||||
model=request_body.model,
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {
|
||||
"content": "I apologize, but I'm having trouble generating a response right now. Please try again."
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
yield f"data: {error_chunk.model_dump_json()}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
|
||||
async def non_streaming_chat_completions(
|
||||
request_body: ChatCompletionRequest, traceparent_header: str = None
|
||||
):
|
||||
"""Generate non-streaming chat completions."""
|
||||
# Prepare messages for response generation
|
||||
response_messages = prepare_response_messages(request_body)
|
||||
|
||||
try:
|
||||
# Call archgw using OpenAI client
|
||||
logger.info(f"Calling archgw at {LLM_GATEWAY_ENDPOINT} to generate response")
|
||||
|
||||
# Prepare extra headers if traceparent is provided
|
||||
extra_headers = {"x-envoy-max-retries": "3"}
|
||||
if traceparent_header:
|
||||
extra_headers["traceparent"] = traceparent_header
|
||||
|
||||
response = await archgw_client.chat.completions.create(
|
||||
model=RESPONSE_MODEL,
|
||||
messages=response_messages,
|
||||
temperature=request_body.temperature or 0.7,
|
||||
max_tokens=request_body.max_tokens or 1000,
|
||||
extra_headers=extra_headers,
|
||||
)
|
||||
|
||||
generated_response = response.choices[0].message.content.strip()
|
||||
logger.info(f"Response generated successfully")
|
||||
|
||||
return ChatCompletionResponse(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
||||
created=int(time.time()),
|
||||
model=request_body.model,
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": generated_response,
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
usage={
|
||||
"prompt_tokens": sum(
|
||||
len(msg.content.split()) for msg in request_body.messages
|
||||
),
|
||||
"completion_tokens": len(generated_response.split()),
|
||||
"total_tokens": sum(
|
||||
len(msg.content.split()) for msg in request_body.messages
|
||||
)
|
||||
+ len(generated_response.split()),
|
||||
},
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating response: {e}")
|
||||
|
||||
# Fallback response
|
||||
fallback_message = "I apologize, but I'm having trouble generating a response right now. Please try again."
|
||||
return ChatCompletionResponse(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
||||
created=int(time.time()),
|
||||
model=request_body.model,
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"message": {"role": "assistant", "content": fallback_message},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
usage={
|
||||
"prompt_tokens": sum(
|
||||
len(msg.content.split()) for msg in request_body.messages
|
||||
),
|
||||
"completion_tokens": len(fallback_message.split()),
|
||||
"total_tokens": sum(
|
||||
len(msg.content.split()) for msg in request_body.messages
|
||||
)
|
||||
+ len(fallback_message.split()),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint."""
|
||||
return {"status": "healthy"}
|
||||
|
||||
|
||||
def start_server(host: str = "localhost", port: int = 8000):
|
||||
"""Start the REST server."""
|
||||
uvicorn.run(
|
||||
app,
|
||||
host=host,
|
||||
port=port,
|
||||
log_config={
|
||||
"version": 1,
|
||||
"disable_existing_loggers": False,
|
||||
"formatters": {
|
||||
"default": {
|
||||
"format": "%(asctime)s - [RESPONSE_GENERATOR] - %(levelname)s - %(message)s",
|
||||
},
|
||||
},
|
||||
"handlers": {
|
||||
"default": {
|
||||
"formatter": "default",
|
||||
"class": "logging.StreamHandler",
|
||||
"stream": "ext://sys.stdout",
|
||||
},
|
||||
},
|
||||
"root": {
|
||||
"level": "INFO",
|
||||
"handlers": ["default"],
|
||||
},
|
||||
},
|
||||
)
|
||||
|
|
@ -1,257 +0,0 @@
|
|||
path,content
|
||||
TechCorp_CloudServices_SLA_Agreement_2024,"SERVICE LEVEL AGREEMENT
|
||||
This Service Level Agreement (""SLA"") is entered into on March 15, 2024, between TechCorp Solutions Inc., a Delaware corporation (""Provider""), and CloudFirst Enterprises LLC (""Customer"").
|
||||
|
||||
DEFINITIONS
|
||||
Service Availability: The percentage of time during which the cloud services are operational and accessible.
|
||||
Downtime: Any period when the services are unavailable or inaccessible to Customer.
|
||||
Response Time: The time between service request submission and initial response from Provider.
|
||||
|
||||
SERVICE COMMITMENTS
|
||||
Provider guarantees 99.9% uptime for all cloud infrastructure services during any calendar month.
|
||||
Average response time for API calls shall not exceed 200 milliseconds under normal operating conditions.
|
||||
Customer support response times: Critical issues within 1 hour, Standard issues within 4 hours.
|
||||
|
||||
REMEDIES
|
||||
For each full percentage point below 99.9% availability, Customer receives 10% credit on monthly fees.
|
||||
If response times exceed 500ms for more than 5 minutes in any hour, Customer receives 5% monthly credit.
|
||||
|
||||
MONITORING AND REPORTING
|
||||
Provider will maintain real-time monitoring systems and provide monthly performance reports.
|
||||
All metrics will be measured from Provider's monitoring systems located in primary data centers.
|
||||
|
||||
This SLA remains in effect for the duration of the underlying service agreement.
|
||||
|
||||
Executed by:
|
||||
TechCorp Solutions Inc.
|
||||
Sarah Mitchell, VP Operations
|
||||
Date: March 15, 2024
|
||||
|
||||
CloudFirst Enterprises LLC
|
||||
Robert Chen, CTO
|
||||
Date: March 16, 2024"
|
||||
|
||||
DataSecure_Privacy_Policy_v3.2,"PRIVACY POLICY
|
||||
DataSecure Analytics, Inc. (""Company"") Privacy Policy
|
||||
Effective Date: January 1, 2024
|
||||
Last Updated: February 28, 2024
|
||||
|
||||
INFORMATION COLLECTION
|
||||
We collect information you provide directly, such as account details, usage preferences, and communication records.
|
||||
Automatically collected data includes IP addresses, browser types, device information, and service interaction logs.
|
||||
Third-party integrations may provide additional user behavior and demographic information with consent.
|
||||
|
||||
DATA USAGE
|
||||
Personal information is used to provide services, improve user experience, and communicate service updates.
|
||||
Aggregated, non-identifiable data may be used for analytics, research, and service enhancement.
|
||||
We do not sell personal information to third parties for marketing purposes.
|
||||
|
||||
DATA PROTECTION
|
||||
All data is encrypted in transit using TLS 1.3 and at rest using AES-256 encryption.
|
||||
Access controls limit data access to authorized personnel only on a need-to-know basis.
|
||||
Regular security audits and penetration testing ensure ongoing protection measures.
|
||||
|
||||
DATA RETENTION
|
||||
Personal data is retained for the duration of active service plus 24 months.
|
||||
Logs and analytics data are retained for 12 months unless legally required otherwise.
|
||||
Upon account deletion, personal data is permanently removed within 30 days.
|
||||
|
||||
USER RIGHTS
|
||||
Users may request access to, correction of, or deletion of their personal information.
|
||||
Data portability requests will be fulfilled in standard formats within 30 days.
|
||||
Marketing communications can be opted out of at any time.
|
||||
|
||||
CONTACT
|
||||
For privacy concerns, contact: privacy@datasecure.com
|
||||
Data Protection Officer: Jennifer Walsh, jwalsh@datasecure.com"
|
||||
|
||||
GlobalManufacturing_SupplyChain_Contract_Q2_2024,"SUPPLY CHAIN AGREEMENT
|
||||
This Supply Chain Agreement is entered into between GlobalManufacturing Corp (""Buyer"") and PrecisionParts Ltd (""Supplier"") effective April 1, 2024.
|
||||
|
||||
SCOPE OF SERVICES
|
||||
Supplier will provide automotive components including brake assemblies, suspension parts, and electrical harnesses.
|
||||
All products must meet ISO 9001 quality standards and automotive industry specifications.
|
||||
Delivery schedule: Weekly shipments every Tuesday, with 48-hour advance shipping notifications.
|
||||
|
||||
PRICING AND PAYMENT
|
||||
Component pricing is fixed for initial 6-month term with quarterly price review thereafter.
|
||||
Payment terms: Net 45 days from invoice date via electronic transfer.
|
||||
Volume discounts apply: 5% for orders exceeding 10,000 units per month, 8% for orders exceeding 25,000 units.
|
||||
|
||||
QUALITY REQUIREMENTS
|
||||
All components must pass incoming inspection with less than 0.1% defect rate.
|
||||
Supplier maintains quality certifications including IATF 16949 and environmental compliance.
|
||||
Batch tracking and traceability required for all delivered components.
|
||||
|
||||
LOGISTICS AND DELIVERY
|
||||
Supplier responsible for packaging, labeling, and delivery to Buyer's distribution centers.
|
||||
Delivery windows: 8 AM - 4 PM, Monday through Friday, with advance appointment scheduling.
|
||||
Late delivery penalties: 2% of shipment value for each day beyond scheduled delivery.
|
||||
|
||||
RISK MANAGEMENT
|
||||
Supplier maintains business continuity plans and alternative sourcing strategies.
|
||||
Force majeure events must be reported within 24 hours with mitigation plans.
|
||||
Insurance requirements: $5M general liability, $2M product liability coverage.
|
||||
|
||||
INTELLECTUAL PROPERTY
|
||||
All custom tooling and specifications remain property of Buyer.
|
||||
Supplier grants license to use necessary patents for component manufacturing.
|
||||
|
||||
This agreement shall remain in effect for 24 months with automatic renewal unless terminated.
|
||||
|
||||
GlobalManufacturing Corp
|
||||
Michael Rodriguez, Supply Chain Director
|
||||
Date: April 1, 2024
|
||||
|
||||
PrecisionParts Ltd
|
||||
Amanda Foster, VP Sales
|
||||
Date: April 2, 2024"
|
||||
|
||||
EduTech_StudentData_Management_Policy_2024,"STUDENT DATA MANAGEMENT POLICY
|
||||
EduTech Learning Platform - Data Management and Protection Policy
|
||||
Document Version: 2.1
|
||||
Effective Date: August 15, 2024
|
||||
|
||||
SCOPE AND PURPOSE
|
||||
This policy governs the collection, use, storage, and protection of student educational records and personal information.
|
||||
Applies to all employees, contractors, and third-party service providers accessing student data.
|
||||
Compliance with FERPA, COPPA, and state student privacy laws is mandatory.
|
||||
|
||||
DATA CLASSIFICATION
|
||||
Educational Records: Grades, attendance, assignments, and academic progress information.
|
||||
Personal Information: Names, addresses, contact details, and demographic information.
|
||||
Behavioral Data: Learning patterns, platform usage, and engagement metrics.
|
||||
|
||||
COLLECTION PRINCIPLES
|
||||
Data collection is limited to educational purposes and service improvement only.
|
||||
Parental consent required for students under 13 years of age.
|
||||
Students and parents have right to review and request corrections to educational records.
|
||||
|
||||
ACCESS CONTROLS
|
||||
Role-based access ensures personnel see only data necessary for their functions.
|
||||
Multi-factor authentication required for all system access.
|
||||
Access logs maintained and reviewed monthly for unauthorized activity.
|
||||
|
||||
DATA SHARING
|
||||
Educational records shared only with authorized school personnel and parents/students.
|
||||
No data sharing with third parties for commercial purposes without explicit consent.
|
||||
Research data must be de-identified and aggregated before external sharing.
|
||||
|
||||
SECURITY MEASURES
|
||||
Data encrypted using industry-standard protocols during transmission and storage.
|
||||
Regular security assessments and vulnerability testing conducted quarterly.
|
||||
Incident response plan includes notification procedures for data breaches.
|
||||
|
||||
RETENTION AND DISPOSAL
|
||||
Student records retained according to school district policies, typically 5-7 years post-graduation.
|
||||
Inactive accounts and associated data purged after 2 years of non-use.
|
||||
Secure data destruction protocols ensure complete removal of sensitive information.
|
||||
|
||||
COMPLIANCE MONITORING
|
||||
Annual privacy training required for all staff handling student data.
|
||||
Regular audits ensure ongoing compliance with applicable privacy regulations.
|
||||
Privacy impact assessments conducted for new features or data uses.
|
||||
|
||||
Contact: Dr. Lisa Thompson, Chief Privacy Officer
|
||||
Email: privacy@edutech-learning.com
|
||||
Phone: (555) 123-4567"
|
||||
|
||||
FinanceFirst_Investment_Advisory_Agreement_2024,"INVESTMENT ADVISORY AGREEMENT
|
||||
This Investment Advisory Agreement is entered into between FinanceFirst Advisors LLC (""Advisor"") and Madison Investment Group (""Client"") on May 20, 2024.
|
||||
|
||||
ADVISORY SERVICES
|
||||
Advisor will provide comprehensive investment management and financial planning services.
|
||||
Services include portfolio construction, asset allocation, risk assessment, and performance monitoring.
|
||||
Regular portfolio reviews conducted quarterly with detailed performance reporting.
|
||||
|
||||
INVESTMENT AUTHORITY
|
||||
Client grants Advisor discretionary authority to make investment decisions within agreed parameters.
|
||||
Investment universe includes stocks, bonds, ETFs, mutual funds, and alternative investments as appropriate.
|
||||
All trades executed through qualified broker-dealers with best execution practices.
|
||||
|
||||
FEE STRUCTURE
|
||||
Management fee: 1.25% annually on assets under management, calculated and billed quarterly.
|
||||
Performance fee: 15% of returns exceeding S&P 500 benchmark, calculated annually.
|
||||
Additional fees may apply for specialized services such as tax planning or estate planning.
|
||||
|
||||
CLIENT RESPONSIBILITIES
|
||||
Client must provide accurate financial information and promptly communicate changes in circumstances.
|
||||
Investment objectives and risk tolerance should be reviewed and updated annually.
|
||||
Client responsible for reviewing and approving investment policy statement.
|
||||
|
||||
RISK DISCLOSURE
|
||||
All investments carry risk of loss, and past performance does not guarantee future results.
|
||||
Diversification does not ensure profit or protect against loss in declining markets.
|
||||
Alternative investments may have limited liquidity and higher volatility.
|
||||
|
||||
REGULATORY COMPLIANCE
|
||||
Advisor is registered with the Securities and Exchange Commission as an investment advisor.
|
||||
All activities conducted in accordance with Investment Advisers Act of 1940 and applicable regulations.
|
||||
Form ADV Part 2 brochure provided annually with material updates.
|
||||
|
||||
CONFIDENTIALITY
|
||||
All client information treated as confidential and shared only as necessary for service provision.
|
||||
Third-party service providers bound by confidentiality agreements.
|
||||
Client data protected through secure systems and access controls.
|
||||
|
||||
TERMINATION
|
||||
Either party may terminate agreement with 30 days written notice.
|
||||
Upon termination, Advisor will assist with orderly transfer of assets to new custodian or advisor.
|
||||
Final fee calculation prorated to date of termination.
|
||||
|
||||
FinanceFirst Advisors LLC
|
||||
Thomas Anderson, Managing Partner
|
||||
Date: May 20, 2024
|
||||
|
||||
Madison Investment Group
|
||||
Rebecca Martinez, Chief Investment Officer
|
||||
Date: May 21, 2024"
|
||||
|
||||
HealthSystem_PatientCare_Standards_2024,"PATIENT CARE STANDARDS AND PROTOCOLS
|
||||
Metropolitan Health System - Clinical Care Standards
|
||||
Document ID: MHS-PCS-2024-001
|
||||
Effective Date: June 1, 2024
|
||||
|
||||
PATIENT SAFETY PROTOCOLS
|
||||
All patients must have proper identification verification using two unique identifiers.
|
||||
Medication administration requires independent double-check for high-risk medications.
|
||||
Fall risk assessments completed within 4 hours of admission with appropriate interventions.
|
||||
|
||||
CLINICAL DOCUMENTATION
|
||||
Medical records must be completed within 24 hours of patient encounter.
|
||||
All entries require electronic signature with timestamp and provider identification.
|
||||
Critical values and abnormal results must be communicated and documented immediately.
|
||||
|
||||
INFECTION CONTROL
|
||||
Hand hygiene compliance monitored with target rate of 95% or higher.
|
||||
Personal protective equipment used according to transmission-based precautions.
|
||||
Isolation procedures implemented within 2 hours of identification of infectious conditions.
|
||||
|
||||
EMERGENCY RESPONSE
|
||||
Code team response time target: 3 minutes from activation to arrival.
|
||||
Crash cart and emergency equipment checks performed daily and documented.
|
||||
All staff required to maintain current CPR and emergency response certifications.
|
||||
|
||||
PATIENT COMMUNICATION
|
||||
Patient rights and responsibilities communicated upon admission.
|
||||
Informed consent obtained and documented prior to procedures and treatments.
|
||||
Family involvement encouraged with respect for patient privacy preferences.
|
||||
|
||||
QUALITY MEASURES
|
||||
Patient satisfaction scores monitored monthly with target of 4.5/5.0 or higher.
|
||||
Medication error rates tracked with goal of less than 1 per 1000 patient days.
|
||||
Hospital-acquired infection rates measured and benchmarked against national standards.
|
||||
|
||||
STAFF COMPETENCY
|
||||
Annual competency assessments required for all clinical staff.
|
||||
Continuing education requirements: 24 hours annually for nurses, 40 hours for physicians.
|
||||
Specialty certifications maintained according to department and role requirements.
|
||||
|
||||
TECHNOLOGY STANDARDS
|
||||
Electronic health record system used for all patient documentation.
|
||||
Telemedicine capabilities available for remote consultations and monitoring.
|
||||
Clinical decision support tools integrated to assist with diagnosis and treatment decisions.
|
||||
|
||||
Contact: Dr. Patricia Williams, Chief Medical Officer
|
||||
Email: pwilliams@metrohealthsystem.org
|
||||
Phone: (555) 987-6543"
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
WAIT_FOR_PIDS=()
|
||||
|
||||
log() {
|
||||
timestamp=$(python3 -c 'from datetime import datetime; print(datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:23])')
|
||||
message="$*"
|
||||
echo "$timestamp - $message"
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
log "Caught signal, terminating all user processes ..."
|
||||
for PID in "${WAIT_FOR_PIDS[@]}"; do
|
||||
if kill $PID 2> /dev/null; then
|
||||
log "killed process: $PID"
|
||||
fi
|
||||
done
|
||||
exit 1
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
log "Starting query_parser agent on port 10500..."
|
||||
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10500 --agent query_parser &
|
||||
WAIT_FOR_PIDS+=($!)
|
||||
|
||||
log "Starting context_builder agent on port 10501..."
|
||||
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10501 --agent context_builder &
|
||||
WAIT_FOR_PIDS+=($!)
|
||||
|
||||
log "Starting response_generator agent on port 10502..."
|
||||
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10502 --agent response_generator &
|
||||
WAIT_FOR_PIDS+=($!)
|
||||
|
||||
for PID in "${WAIT_FOR_PIDS[@]}"; do
|
||||
wait "$PID"
|
||||
done
|
||||
|
|
@ -1,67 +0,0 @@
|
|||
@baseUrl = http://0.0.0.0:10502
|
||||
@model = gpt-4o
|
||||
|
||||
# Health Check
|
||||
GET {{baseUrl}}/health
|
||||
|
||||
###
|
||||
|
||||
# Test 1: Simple Non-Streaming Chat Completion
|
||||
POST {{baseUrl}}/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "{{model}}",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello! Can you help me understand what machine learning is?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
###
|
||||
|
||||
# Test 2: Simple Streaming Chat Completion
|
||||
POST {{baseUrl}}/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "{{model}}",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Explain the concept of artificial intelligence in simple terms."
|
||||
}
|
||||
],
|
||||
"stream": true
|
||||
}
|
||||
|
||||
### Test 3
|
||||
POST http://localhost:8001/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "{{model}}",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
}
|
||||
|
||||
### send request to context builder agent
|
||||
POST http://localhost:10501/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
|
||||
}
|
||||
]
|
||||
}
|
||||
1356
demos/use_cases/rag_agent/uv.lock
generated
1356
demos/use_cases/rag_agent/uv.lock
generated
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue