Salmanap/fix network agent demo (#153)

* staging my changes to re-based from main

* adding debug statements to rust

* merged with main

* ready to push network agent

* removed the incomplete sql example

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
This commit is contained in:
Salman Paracha 2024-10-08 22:19:20 -07:00 committed by GitHub
parent 6acfea7787
commit b63a01fe82
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
41 changed files with 252 additions and 1987 deletions

View file

@ -7,5 +7,6 @@ services:
volumes:
- ${ARCH_CONFIG_FILE:-./demos/function_calling/arch_confg.yaml}:/config/arch_config.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ~/archgw_logs/arch_logs:/var/log/
env_file:
- stage.env

View file

@ -2,9 +2,10 @@ use crate::stats::{Gauge, IncrementingMetric};
use derivative::Derivative;
use log::debug;
use proxy_wasm::{traits::Context, types::Status};
use serde::Serialize;
use std::{cell::RefCell, collections::HashMap, fmt::Debug, time::Duration};
#[derive(Derivative)]
#[derive(Derivative, Serialize)]
#[derivative(Debug)]
pub struct CallArgs<'a> {
upstream: &'a str,

View file

@ -1,24 +0,0 @@
FROM Bolt-Function-Calling-1B-Q4_K_M.gguf
# Set the size of the context window used to generate the next token
PARAMETER num_ctx 4096
# Set parameters for response generation
PARAMETER num_predict 1024
PARAMETER temperature 0.1
PARAMETER top_p 0.5
PARAMETER top_k 32022
PARAMETER repeat_penalty 1.0
PARAMETER stop "<|EOT|>"
# Set the random number seed to use for generation
PARAMETER seed 42
# Set the prompt template to be passed into the model
TEMPLATE """{{ if .System }}<begin▁of▁sentence>
{{ .System }}
{{ end }}{{ if .Prompt }}### Instruction:
{{ .Prompt }}
{{ end }}### Response:
{{ .Response }}
<|EOT|>"""

View file

@ -1,24 +0,0 @@
# Function calling
This demo shows how you can use intelligent prompt gateway as copilot to explore employee data by calling the correct api functions. It calls appropriate function and also engages with user to extract required parameters. This demo assumes you are using ollama natively.
# Starting the demo
1. Create `.env` file and set OpenAI key using env var `OPENAI_API_KEY`
1. Start services
```sh
docker compose up
```
1. Download Bolt-FC model. This demo assumes we have downloaded [Bolt-Function-Calling-1B:Q4_K_M](https://huggingface.co/katanemolabs/Bolt-Function-Calling-1B.gguf/blob/main/Bolt-Function-Calling-1B-Q4_K_M.gguf) to local folder.
1. If running ollama natively run
```sh
ollama serve
```
2. Create model file in ollama repository
```sh
ollama create Bolt-Function-Calling-1B:Q4_K_M -f Bolt-FC-1B-Q4_K_M.model_file
```
3. Navigate to http://localhost:18080/
4. You can type in queries like "show me the top 5 employees in each department with highest salary"
- You can also ask follow up questions like "just show the top 2"
5. To see metrics navigate to "http://localhost:3000/" (use admin/grafana for login)
- Open up dahsboard named "Intelligent Gateway Overview"
- On this dashboard you can see reuqest latency and number of requests

View file

@ -1,16 +0,0 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "function-calling api server",
"cwd": "${workspaceFolder}/app",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"args": ["main:app","--reload", "--port", "8001"],
}
]
}

View file

@ -1,19 +0,0 @@
FROM python:3 AS base
FROM base AS builder
WORKDIR /src
COPY requirements.txt /src/
RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
COPY . /src
FROM python:3-slim AS output
COPY --from=builder /runtime /usr/local
COPY /app /app
WORKDIR /app
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]

View file

@ -1,289 +0,0 @@
import random
from typing import List
from fastapi import FastAPI, HTTPException, Response
from datetime import datetime, date, timedelta, timezone
import logging
from pydantic import BaseModel
from utils import load_sql
import pandas as pd
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
app = FastAPI()
@app.get("/healthz")
async def healthz():
return {
"status": "ok"
}
conn = load_sql()
name_col = "name"
class TopEmployees(BaseModel):
grouping: str
ranking_criteria: str
top_n: int
@app.post("/top_employees")
async def top_employees(req: TopEmployees, res: Response):
name_col = "name"
# Check if `req.ranking_criteria` is a Text object and extract its value accordingly
logger.info(
f"{'* ' * 50}\n\nCaptured Ranking Criteria: {req.ranking_criteria}\n\n{'* ' * 50}"
)
if req.ranking_criteria == "yoe":
req.ranking_criteria = "years_of_experience"
elif req.ranking_criteria == "rating":
req.ranking_criteria = "performance_score"
logger.info(
f"{'* ' * 50}\n\nFinal Ranking Criteria: {req.ranking_criteria}\n\n{'* ' * 50}"
)
query = f"""
SELECT {req.grouping}, {name_col}, {req.ranking_criteria}
FROM (
SELECT {req.grouping}, {name_col}, {req.ranking_criteria},
DENSE_RANK() OVER (PARTITION BY {req.grouping} ORDER BY {req.ranking_criteria} DESC) as emp_rank
FROM employees
) ranked_employees
WHERE emp_rank <= {req.top_n};
"""
result_df = pd.read_sql_query(query, conn)
result = result_df.to_dict(orient="records")
return result
class AggregateStats(BaseModel):
grouping: str
aggregate_criteria: str
aggregate_type: str
@app.post("/aggregate_stats")
async def aggregate_stats(req: AggregateStats, res: Response):
logger.info(
f"{'* ' * 50}\n\nCaptured Aggregate Criteria: {req.aggregate_criteria}\n\n{'* ' * 50}"
)
if req.aggregate_criteria == "yoe":
req.aggregate_criteria = "years_of_experience"
logger.info(
f"{'* ' * 50}\n\nFinal Aggregate Criteria: {req.aggregate_criteria}\n\n{'* ' * 50}"
)
logger.info(
f"{'* ' * 50}\n\nCaptured Aggregate Type: {req.aggregate_type}\n\n{'* ' * 50}"
)
if req.aggregate_type.lower() not in ["sum", "avg", "min", "max"]:
if req.aggregate_type.lower() == "count":
req.aggregate_type = "COUNT"
elif req.aggregate_type.lower() == "total":
req.aggregate_type = "SUM"
elif req.aggregate_type.lower() == "average":
req.aggregate_type = "AVG"
elif req.aggregate_type.lower() == "minimum":
req.aggregate_type = "MIN"
elif req.aggregate_type.lower() == "maximum":
req.aggregate_type = "MAX"
else:
raise HTTPException(status_code=400, detail="Invalid aggregate type")
logger.info(
f"{'* ' * 50}\n\nFinal Aggregate Type: {req.aggregate_type}\n\n{'* ' * 50}"
)
query = f"""
SELECT {req.grouping}, {req.aggregate_type}({req.aggregate_criteria}) as {req.aggregate_type}_{req.aggregate_criteria}
FROM employees
GROUP BY {req.grouping};
"""
result_df = pd.read_sql_query(query, conn)
result = result_df.to_dict(orient="records")
return result
# 1. Top Employees by Performance, Projects, and Timeframe
class TopEmployeesProjects(BaseModel):
min_performance_score: float
min_years_experience: int
department: str
min_project_count: int = None # Optional
months_range: int = None # Optional (for filtering recent projects)
@app.post("/top_employees_projects")
async def employees_projects(req: TopEmployeesProjects, res: Response):
params, filters = {}, []
# Add optional months_range filter
if req.months_range:
params['months_range'] = req.months_range
filters.append(f"p.start_date >= DATE('now', '-{req.months_range} months')")
# Add project count filter if provided
if req.min_project_count:
filters.append(f"COUNT(p.project_id) >= {req.min_project_count}")
where_clause = " AND ".join(filters)
if where_clause:
where_clause = "AND " + where_clause
query = f"""
SELECT e.name, e.department, e.years_of_experience, e.performance_score, COUNT(p.project_id) as project_count
FROM employees e
LEFT JOIN projects p ON e.eid = p.eid
WHERE e.performance_score >= {req.min_performance_score}
AND e.years_of_experience >= {req.min_years_experience}
AND e.department = '{req.department}'
{where_clause}
GROUP BY e.eid, e.name, e.department, e.years_of_experience, e.performance_score
ORDER BY e.performance_score DESC;
"""
result_df = pd.read_sql_query(query, conn, params=params)
return result_df.to_dict(orient='records')
# 2. Employees with Salary Growth Since Last Promotion
class SalaryGrowthRequest(BaseModel):
min_salary_increase_percentage: float
department: str = None # Optional
@app.post("/salary_growth")
async def salary_growth(req: SalaryGrowthRequest, res: Response):
params, filters = {}, []
if req.department:
filters.append("e.department = :department")
params['department'] = req.department
where_clause = " AND ".join(filters)
if where_clause:
where_clause = "AND " + where_clause
query = f"""
SELECT e.name, e.department, s.salary_increase_percentage
FROM employees e
JOIN salary_history s ON e.eid = s.eid
WHERE s.salary_increase_percentage >= {req.min_salary_increase_percentage}
AND s.promotion_date IS NOT NULL
{where_clause}
ORDER BY s.salary_increase_percentage DESC;
"""
result_df = pd.read_sql_query(query, conn, params=params)
return result_df.to_dict(orient='records')
# 4. Employees with Promotions and Salary Increases
class PromotionsIncreasesRequest(BaseModel):
year: int
min_salary_increase_percentage: float = None # Optional
department: str = None # Optional
@app.post("/promotions_increases")
async def promotions_increases(req: PromotionsIncreasesRequest, res: Response):
params, filters = {}, []
if req.min_salary_increase_percentage:
filters.append(f"s.salary_increase_percentage >= {req.min_salary_increase_percentage}")
if req.department:
filters.append("e.department = :department")
params['department'] = req.department
where_clause = " AND ".join(filters)
if where_clause:
where_clause = "AND " + where_clause
query = f"""
SELECT e.name, e.department, s.salary_increase_percentage, s.promotion_date
FROM employees e
JOIN salary_history s ON e.eid = s.eid
WHERE strftime('%Y', s.promotion_date) = '{req.year}'
{where_clause}
ORDER BY s.salary_increase_percentage DESC;
"""
result_df = pd.read_sql_query(query, conn, params=params)
return result_df.to_dict(orient='records')
# 5. Employees with Highest Average Project Performance
class AvgProjPerformanceRequest(BaseModel):
min_project_count: int
min_performance_score: float
department: str = None # Optional
@app.post("/avg_project_performance")
async def avg_project_performance(req: AvgProjPerformanceRequest, res: Response):
params, filters = {}, []
if req.department:
filters.append("e.department = :department")
params['department'] = req.department
filters.append(f"p.performance_score >= {req.min_performance_score}")
where_clause = " AND ".join(filters)
query = f"""
SELECT e.name, e.department, AVG(p.performance_score) as avg_performance_score, COUNT(p.project_id) as project_count
FROM employees e
JOIN projects p ON e.eid = p.eid
WHERE {where_clause}
GROUP BY e.eid, e.name, e.department
HAVING COUNT(p.project_id) >= {req.min_project_count}
ORDER BY avg_performance_score DESC;
"""
result_df = pd.read_sql_query(query, conn, params=params)
return result_df.to_dict(orient='records')
# 6. Employees by Certification and Years of Experience
class CertificationsExperienceRequest(BaseModel):
certifications: List[str]
min_years_experience: int
department: str = None # Optional
@app.post("/employees_certifications_experience")
async def certifications_experience(req: CertificationsExperienceRequest, res: Response):
# Convert the list of certifications into a format for SQL query
certs_filter = ', '.join([f"'{cert}'" for cert in req.certifications])
params, filters = {}, []
# Add department filter if provided
if req.department:
filters.append("e.department = :department")
params['department'] = req.department
filters.append("e.years_of_experience >= :min_years_experience")
params['min_years_experience'] = req.min_years_experience
where_clause = " AND ".join(filters)
query = f"""
SELECT e.name, e.department, e.years_of_experience, COUNT(c.certification_name) as cert_count
FROM employees e
JOIN certifications c ON e.eid = c.eid
WHERE c.certification_name IN ({certs_filter})
AND {where_clause}
GROUP BY e.eid, e.name, e.department, e.years_of_experience
HAVING COUNT(c.certification_name) = {len(req.certifications)}
ORDER BY e.years_of_experience DESC;
"""
result_df = pd.read_sql_query(query, conn, params=params)
return result_df.to_dict(orient='records')

View file

@ -1,157 +0,0 @@
import pandas as pd
import random
import datetime
import sqlite3
def load_sql():
# Example Usage
conn = sqlite3.connect(":memory:")
# create and load the employees table
generate_employee_data(conn)
# create and load the projects table
generate_project_data(conn)
# create and load the salary_history table
generate_salary_history(conn)
# create and load the certifications table
generate_certifications(conn)
return conn
# Function to generate random employee data with `eid` as the primary key
def generate_employee_data(conn):
# List of possible names, positions, departments, and locations
names = [
"Alice",
"Bob",
"Charlie",
"David",
"Eve",
"Frank",
"Grace",
"Hank",
"Ivy",
"Jack",
]
positions = [
"Manager",
"Engineer",
"Salesperson",
"HR Specialist",
"Marketing Analyst",
]
# List of possible names, positions, departments, locations, and certifications
names = ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Hank", "Ivy", "Jack"]
positions = ["Manager", "Engineer", "Salesperson", "HR Specialist", "Marketing Analyst"]
departments = ["Engineering", "Marketing", "HR", "Sales", "Finance"]
locations = ["New York", "San Francisco", "Austin", "Boston", "Chicago"]
certifications = ["AWS Certified", "Google Cloud Certified", "PMP", "Scrum Master", "Cisco Certified"]
# Generate random hire dates
def random_hire_date():
start_date = datetime.date(2000, 1, 1)
end_date = datetime.date(2023, 12, 31)
time_between_dates = end_date - start_date
days_between_dates = time_between_dates.days
random_number_of_days = random.randrange(days_between_dates)
return start_date + datetime.timedelta(days=random_number_of_days)
# Generate random employee records with an employee ID (eid)
employees = []
for eid in range(1, 101): # 100 employees with `eid` starting from 1
name = random.choice(names)
position = random.choice(positions)
salary = round(random.uniform(50000, 150000), 2) # Salary between 50,000 and 150,000
department = random.choice(departments)
location = random.choice(locations)
hire_date = random_hire_date()
performance_score = round(random.uniform(1, 5), 2) # Performance score between 1.0 and 5.0
years_of_experience = random.randint(1, 30) # Years of experience between 1 and 30
employee = {
"eid": eid, # Employee ID
"name": name,
"position": position,
"salary": salary,
"department": department,
"location": location,
"hire_date": hire_date,
"performance_score": performance_score,
"years_of_experience": years_of_experience
}
employees.append(employee)
# Convert the list of dictionaries to a DataFrame and save to DB
df_employees = pd.DataFrame(employees)
df_employees.to_sql('employees', conn, index=False, if_exists='replace')
# Function to generate random project data with `eid`
def generate_project_data(conn):
employees = pd.read_sql_query("SELECT eid FROM employees", conn)
projects = []
for _ in range(500): # 500 projects
eid = random.choice(employees['eid'])
project_name = f"Project_{random.randint(1, 100)}"
start_date = datetime.date(2020, 1, 1) + datetime.timedelta(days=random.randint(0, 365 * 3)) # Within the last 3 years
performance_score = round(random.uniform(1, 5), 2) # Performance score for the project between 1.0 and 5.0
project = {
"eid": eid, # Foreign key from employees table
"project_name": project_name,
"start_date": start_date,
"performance_score": performance_score
}
projects.append(project)
# Convert the list of dictionaries to a DataFrame and save to DB
df_projects = pd.DataFrame(projects)
df_projects.to_sql('projects', conn, index=False, if_exists='replace')
# Function to generate random salary history data with `eid`
def generate_salary_history(conn):
employees = pd.read_sql_query("SELECT eid FROM employees", conn)
salary_history = []
for _ in range(300): # 300 salary records
eid = random.choice(employees['eid'])
salary_increase_percentage = round(random.uniform(5, 30), 2) # Salary increase between 5% and 30%
promotion_date = datetime.date(2018, 1, 1) + datetime.timedelta(days=random.randint(0, 365 * 5)) # Promotions in the last 5 years
salary_record = {
"eid": eid, # Foreign key from employees table
"salary_increase_percentage": salary_increase_percentage,
"promotion_date": promotion_date
}
salary_history.append(salary_record)
# Convert the list of dictionaries to a DataFrame and save to DB
df_salary_history = pd.DataFrame(salary_history)
df_salary_history.to_sql('salary_history', conn, index=False, if_exists='replace')
# Function to generate random certifications data with `eid`
def generate_certifications(conn):
employees = pd.read_sql_query("SELECT eid FROM employees", conn)
certifications_list = ["AWS Certified", "Google Cloud Certified", "PMP", "Scrum Master", "Cisco Certified"]
employee_certifications = []
for _ in range(300): # 300 certification records
eid = random.choice(employees['eid'])
certification = random.choice(certifications_list)
cert_record = {
"eid": eid, # Foreign key from employees table
"certification_name": certification
}
employee_certifications.append(cert_record)
# Convert the list of dictionaries to a DataFrame and save to DB
df_certifications = pd.DataFrame(employee_certifications)
df_certifications.to_sql('certifications', conn, index=False, if_exists='replace')

View file

@ -1,4 +0,0 @@
fastapi
uvicorn
pandas
dateparser

View file

@ -1,197 +0,0 @@
default_prompt_endpoint: "127.0.0.1"
load_balancing: "round_robin"
timeout_ms: 5000
overrides:
# confidence threshold for prompt target intent matching
prompt_target_intent_matching_threshold: 0.7
llm_providers:
- name: open-ai-gpt-4
api_key: $OPEN_AI_API_KEY
model: gpt-4
default: true
prompt_targets:
- type: function_resolver
name: top_employees
description: |
Allows you to find the top employees in different groups, such as departments, locations, or position. You can rank the employees by different criteria, like salary, yoe, or rating. Returns the best-ranked employees for each group, helping you identify top n in the list.
parameters:
- name: grouping
description: |
Select how you'd like to group the employees. For example, you can group them by department, location, or their position. The tool will provide the top-ranked employees within each group you choose.
required: true
type: string
enum: [department, location, position]
- name: ranking_criteria
required: true
type: string
description: |
Choose how you'd like to rank the employees. You can rank them by their salary, their years of experience, or their rating. The tool will sort the employees based on this ranking and return the best ones from each group.
enum: [salary, years_of_experience, performance_score]
- name: top_n
required: true
type: integer
description: |
Enter how many of the top employees you want to see in each group. For example, if you enter 3, the tool will show you the top 3 employees for each group you selected.
endpoint:
cluster: api_server
path: /top_employees
system_prompt: |
You are responsible for retrieving the top N employees per group ranked by a constraint.
- type: function_resolver
name: aggregate_stats
description: |
Calculate summary statistics for groups of employees. You can group employees by categories like department or location and then compute totals, averages, or other statistics for specific attributes such as salary or years of experience.
parameters:
- name: grouping
description: |
Choose how you'd like to organize the employees. For example, you can group them by department, location, or position. The tool will calculate the summary statistics for each group.
required: true
enum: [department, location, position]
- name: aggregate_criteria
description: |
Select the specific attribute you'd like to analyze. This could be something like salary, years of experience, or rating. The tool will calculate the statistic you request for this attribute.
required: true
enum: [salary, years_of_experience, performance_score]
- name: aggregate_type
description: |
Choose the type of statistic you'd like to calculate for the selected attribute. For example, you can calculate the sum, average, minimum, or maximum value for each group.
required: true
enum: [SUM, AVG, MIN, MAX]
endpoint:
cluster: api_server
path: /aggregate_stats
system_prompt: |
You help calculate summary statistics for groups of employees. First, organize the employees by the specified grouping (e.g., department, location, or position). Then, compute the requested statistic (e.g., total, average, minimum, or maximum) for a specific attribute like salary, experience, or rating.
# 1. Top Employees by Performance, Projects, and Timeframe
- type: function_resolver
name: employees_projects
description: |
Fetch employees with the highest performance scores, considering their project participation and years of experience. You can filter by minimum performance score, years of experience, and department. Optionally, you can also filter by recent project participation within the last Y months.
parameters:
- name: min_performance_score
description: Minimum performance score to filter employees.
required: true
type: float
- name: min_years_experience
description: Minimum years of experience to filter employees.
required: true
type: integer
- name: department
description: Department to filter employees by.
required: true
type: string
- name: min_project_count
description: Minimum number of projects employees participated in (optional).
required: false
type: integer
- name: months_range
description: Timeframe (in months) for filtering recent projects (optional).
required: false
type: integer
endpoint:
cluster: api_server
path: /employees_projects
system_prompt: |
You are responsible for retrieving the top N employees ranked by performance and project participation. Use filters for experience and optional project criteria.
# 2. Employees with Salary Growth Since Last Promotion
- type: function_resolver
name: salary_growth
description: |
Fetch employees with the highest salary growth since their last promotion, grouped by department. You can filter by a minimum salary increase percentage and department.
parameters:
- name: min_salary_increase_percentage
description: Minimum percentage increase in salary since the last promotion.
required: true
type: float
- name: department
description: Department to filter employees by (optional).
required: false
type: string
endpoint:
cluster: api_server
path: /salary_growth
system_prompt: |
You are responsible for retrieving employees with the highest salary growth since their last promotion. Filter by minimum salary increase percentage and department.
# 4. Employees with Promotions and Salary Increases by Year
- type: function_resolver
name: promotions_increases
description: |
Fetch employees who were promoted and received a salary increase in a specific year, grouped by department. You can optionally filter by minimum percentage salary increase and department.
parameters:
- name: year
description: The year in which the promotion and salary increase occurred.
required: true
type: integer
- name: min_salary_increase_percentage
description: Minimum percentage salary increase to filter employees.
required: false
type: float
- name: department
description: Department to filter by (optional).
required: false
type: string
endpoint:
cluster: api_server
path: /promotions_increases
system_prompt: |
You are responsible for fetching employees who were promoted and received a salary increase in a specific year. Apply filters for salary increase percentage and department.
# 5. Employees with Highest Average Project Performance
- type: function_resolver
name: avg_project_performance
description: |
Fetch employees with the highest average performance across all projects they have worked on over time. You can filter by minimum project count, department, and minimum performance score.
parameters:
- name: min_project_count
description: Minimum number of projects an employee must have participated in.
required: true
type: integer
- name: min_performance_score
description: Minimum performance score to filter employees.
required: true
type: float
- name: department
description: Department to filter by (optional).
required: false
type: string
endpoint:
cluster: api_server
path: /avg_project_performance
system_prompt: |
You are responsible for fetching employees with the highest average performance across all projects theyve worked on. Apply filters for minimum project count, performance score, and department.
# 6. Employees by Certification and Years of Experience
- type: function_resolver
name: certifications_experience
description: |
Fetch employees who have all the required certifications and meet the minimum years of experience. You can filter by department and provide a list of certifications to match.
parameters:
- name: certifications
description: List of required certifications.
required: true
type: list
- name: min_years_experience
description: Minimum years of experience.
required: true
type: integer
- name: department
description: Department to filter employees by (optional).
required: false
type: string
endpoint:
cluster: api_server
path: /certifications_experience
system_prompt: |
You are responsible for fetching employees who have the required certifications and meet the minimum years of experience. Optionally, filter by department.

View file

@ -1,143 +0,0 @@
services:
config_generator:
build:
context: ../../
dockerfile: config_generator/Dockerfile
volumes:
- ../../arch/envoy.template.yaml:/usr/src/app/envoy.template.yaml
- ./arch_config.yaml:/usr/src/app/arch_config.yaml
- ./generated:/usr/src/app/out
arch:
build:
context: ../../
dockerfile: arch/Dockerfile
hostname: arch
ports:
- "10010:10000"
- "19911:9901"
volumes:
- ./generated/envoy.yaml:/etc/envoy/envoy.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ./arch_config.yaml:/config/arch_config.yaml
depends_on:
config_generator:
condition: service_completed_successfully
model_server:
condition: service_healthy
environment:
- LOG_LEVEL=debug
model_server:
build:
context: ../../model_server
dockerfile: Dockerfile
ports:
- "18091:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
- ./arch_config.yaml:/root/arch_config.yaml
api_server:
build:
context: api_server
dockerfile: Dockerfile
ports:
- "18093:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
function_resolver:
build:
context: ../../function_resolver
dockerfile: Dockerfile
ports:
- "18092:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
environment:
# use ollama endpoint that is hosted by host machine (no virtualization)
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
# uncomment following line to use ollama endpoint that is hosted by docker
# - OLLAMA_ENDPOINT=ollama
- OLLAMA_MODEL=Arch-Function-Calling-1.5B:Q4_K_M
ollama:
image: ollama/ollama
container_name: ollama
volumes:
- ./ollama:/root/.ollama
restart: unless-stopped
ports:
- '11444:11434'
profiles:
- manual
open-webui:
image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main}
container_name: open-webui
volumes:
- ./open-webui:/app/backend/data
# depends_on:
# - ollama
ports:
- 18100:8080
environment:
- OLLAMA_BASE_URL=http://${OLLAMA_ENDPOINT:-host.docker.internal}:11434
- WEBUI_AUTH=false
extra_hosts:
- host.docker.internal:host-gateway
restart: unless-stopped
profiles:
- monitoring
chatbot_ui:
build:
context: ../../chatbot_ui
dockerfile: Dockerfile
ports:
- "18090:8080"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- CHAT_COMPLETION_ENDPOINT=http://arch:10000/v1
prometheus:
image: prom/prometheus
container_name: prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yaml'
ports:
- 9100:9090
restart: unless-stopped
volumes:
- ./prometheus:/etc/prometheus
- ./prom_data:/prometheus
profiles:
- monitoring
grafana:
image: grafana/grafana
container_name: grafana
ports:
- 3010:3000
restart: unless-stopped
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=grafana
volumes:
- ./grafana:/etc/grafana/provisioning/datasources
- ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml
- ./grafana/dashboards:/var/lib/grafana/dashboards
profiles:
- monitoring

View file

@ -1,23 +0,0 @@
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: []
scheme: http
timeout: 10s
api_version: v1
scrape_configs:
- job_name: envoy
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 10s
metrics_path: /stats
scheme: http
static_configs:
- targets:
- arch:9901
params:
format: ['prometheus']

View file

@ -19,7 +19,7 @@ services:
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 #this is only because we are running the sample app in the same docker container environemtn as archgw
opentelemetry:
build:

View file

@ -17,6 +17,4 @@ services:
ports:
- "18090:8080"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1

View file

@ -1,4 +1,4 @@
FROM python:3 AS base
FROM python:3.10 AS base
FROM base AS builder
@ -9,11 +9,11 @@ RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
COPY . /src
FROM python:3-slim AS output
FROM python:3.10-slim AS output
COPY --from=builder /runtime /usr/local
COPY /app /app
COPY . /app
WORKDIR /app
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--log-level", "info"]

View file

View file

@ -0,0 +1,71 @@
version: v0.1
listener:
address: 127.0.0.1
port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
message_format: huggingface
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
- name: OpenAI
provider: openai
access_key: OPENAI_API_KEY
model: gpt-4o
default: true
# default system prompt used by all prompt targets
system_prompt: |
You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
prompt_targets:
- name: reboot_devices
description: Reboot specific devices or device groups
endpoint:
name: app_server
path: /agent/device_reboot
parameters:
- name: device_ids
type: list
description: A list of device identifiers (IDs) to reboot.
required: true
- name: time_range
type: int
description: Optional time range in days for reboot operations. Defaults to 7.
- name: network_qa
endpoint:
name: app_server
path: /agent/network_summary
description: Handle general Q/A related to networking.
default: true
- name: device_summary
description: Retrieve statistics for specific devices within a time range
endpoint:
name: app_server
path: /agent/device_summary
parameters:
- name: device_ids
type: list
description: A list of device identifiers (IDs) to retrieve statistics for.
required: true # device_ids are required to get device statistics
- name: time_range
type: int
description: Time range in days for which to gather device statistics. Defaults to 7.
default: "7"
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
endpoints:
app_server:
# value could be ip address or a hostname with port
# this could also be a list of endpoints for load balancing
# for example endpoint: [ ip1:port, ip2:port ]
endpoint: host.docker.internal:18083
# max time to wait for a connection to be established
connect_timeout: 0.005s
ratelimits:
- model: gpt-4
selector:
key: selector-key
value: selector-value
limit:
tokens: 1
unit: minute

View file

@ -0,0 +1,21 @@
services:
api_server:
build:
context: .
dockerfile: Dockerfile
ports:
- "18083:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
chatbot_ui:
build:
context: ../../chatbot_ui
dockerfile: Dockerfile
ports:
- "18080:8080"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1

104
demos/network_agent/main.py Normal file
View file

@ -0,0 +1,104 @@
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
from typing import List, Optional
app = FastAPI()
# Define the request model
class DeviceSummaryRequest(BaseModel):
device_ids: List[int]
time_range: Optional[int] = Field(default=7, description="Time range in days, defaults to 7")
# Define the response model
class DeviceStatistics(BaseModel):
device_id: int
time_range: str
data: str
class DeviceSummaryResponse(BaseModel):
statistics: List[DeviceStatistics]
# Request model for device reboot
class DeviceRebootRequest(BaseModel):
device_ids: List[int]
# Response model for the device reboot
class CoverageResponse(BaseModel):
status: str
summary: dict
@app.post("/agent/device_reboot", response_model=CoverageResponse)
def reboot_network_device(request_data: DeviceRebootRequest):
"""
Endpoint to reboot network devices based on device IDs and an optional time range.
"""
# Access data from the Pydantic model
device_ids = request_data.device_ids
# Validate 'device_ids' (This is already validated by Pydantic, but additional logic can be added if needed)
if not device_ids:
raise HTTPException(status_code=400, detail="'device_ids' parameter is required")
# Simulate reboot operation and return the response
statistics = []
for device_id in device_ids:
# Placeholder for actual data retrieval or device reboot logic
stats = {
"data": f"Device {device_id} has been successfully rebooted."
}
statistics.append(stats)
# Return the response with a summary
return CoverageResponse(status="success", summary={"device_ids": device_ids})
# Post method for device summary
@app.post("/agent/device_summary", response_model=DeviceSummaryResponse)
def get_device_summary(request: DeviceSummaryRequest):
"""
Endpoint to retrieve device statistics based on device IDs and an optional time range.
"""
# Extract 'device_ids' and 'time_range' from the request
device_ids = request.device_ids
time_range = request.time_range
# Simulate retrieving statistics for the given device IDs and time range
statistics = []
minutes = 1
for device_id in device_ids:
stats = {
"device_id": device_id,
"time_range": f"Last {time_range} days",
"data": f"Device {device_id} over the last {time_range} days experienced {minutes} minutes of downtime.",
}
minutes += 1
statistics.append(DeviceStatistics(**stats))
return DeviceSummaryResponse(statistics=statistics)
@app.post("/agent/network_summary")
async def policy_qa():
"""
This method handles Q/A related to general issues in networks.
It forwards the conversation to the OpenAI client via a local proxy and returns the response.
"""
return {
"choices": [
{
"message": {
"role": "assistant",
"content": "I am a helpful networking agent, and I can help you get status for network devices or reboot them"
},
"finish_reason": "completed",
"index": 0
}
],
"model": "network_agent",
"usage": {
"completion_tokens": 0
}
}
if __name__ == "__main__":
app.run(debug=True)

View file

@ -0,0 +1,4 @@
fastapi
uvicorn
pydantic
typing

View file

@ -1,24 +0,0 @@
FROM Bolt-Function-Calling-1B-Q4_K_M.gguf
# Set the size of the context window used to generate the next token
PARAMETER num_ctx 4096
# Set parameters for response generation
PARAMETER num_predict 1024
PARAMETER temperature 0.1
PARAMETER top_p 0.5
PARAMETER top_k 32022
PARAMETER repeat_penalty 1.0
PARAMETER stop "<|EOT|>"
# Set the random number seed to use for generation
PARAMETER seed 42
# Set the prompt template to be passed into the model
TEMPLATE """{{ if .System }}<begin▁of▁sentence>
{{ .System }}
{{ end }}{{ if .Prompt }}### Instruction:
{{ .Prompt }}
{{ end }}### Response:
{{ .Response }}
<|EOT|>"""

View file

@ -1,24 +0,0 @@
# Function calling
This demo shows how you can use intelligent prompt gateway as a network copilot that could give information about correlation between packet loss with device reboots, downs, or maintainence. This demo assumes you are using ollama running natively. If you want to run ollama running inside docker then please update ollama endpoint in docker-compose file.
# Starting the demo
1. Create `.env` file and set OpenAI key using env var `OPENAI_API_KEY`
1. Start services
```sh
docker compose up
```
1. Download Bolt-FC model. This demo assumes we have downloaded [Bolt-Function-Calling-1B:Q4_K_M](https://huggingface.co/katanemolabs/Bolt-Function-Calling-1B.gguf/blob/main/Bolt-Function-Calling-1B-Q4_K_M.gguf) to local folder.
1. If running ollama natively run
```sh
ollama serve
```
2. Create model file in ollama repository
```sh
ollama create Bolt-Function-Calling-1B:Q4_K_M -f Bolt-FC-1B-Q4_K_M.model_file
```
3. Navigate to http://localhost:18080/
4. You can type in queries like "show me any packet drops due to interface failure in the past 3 days"
- You can also ask follow up questions like "show me just the ones with maximum 200 in errors"
5. To see metrics navigate to "http://localhost:3000/" (use admin/grafana for login)
- Open up dahsboard named "Intelligent Gateway Overview"
- On this dashboard you can see reuqest latency and number of requests

View file

@ -1,16 +0,0 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "function-calling api server",
"cwd": "${workspaceFolder}/app",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"args": ["main:app","--reload", "--port", "8001"],
}
]
}

View file

@ -1,184 +0,0 @@
from fastapi import FastAPI, Response
from datetime import datetime, timezone
import logging
from pydantic import BaseModel
from utils import load_sql, load_params
import pandas as pd
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
app = FastAPI()
@app.get("/healthz")
async def healthz():
return {
"status": "ok"
}
conn = load_sql()
name_col = "name"
class PacketDropCorrelationRequest(BaseModel):
from_time: str = None # Optional natural language timeframe
ifname: str = None # Optional interface name filter
region: str = None # Optional region filter
min_in_errors: int = None
max_in_errors: int = None
min_out_errors: int = None
max_out_errors: int = None
min_in_discards: int = None
max_in_discards: int = None
min_out_discards: int = None
max_out_discards: int = None
@app.post("/interface_down_pkt_drop")
async def interface_down_packet_drop(req: PacketDropCorrelationRequest, res: Response):
params, filters = load_params(req)
# Join the filters using AND
where_clause = " AND ".join(filters)
if where_clause:
where_clause = "AND " + where_clause
# Step 3: Query packet errors and flows from interfacestats and ts_flow
query = f"""
SELECT
d.switchip AS device_ip_address,
i.in_errors,
i.in_discards,
i.out_errors,
i.out_discards,
i.ifname,
t.src_addr,
t.dst_addr,
t.time AS flow_time,
i.time AS interface_time
FROM
device d
INNER JOIN
interfacestats i
ON d.device_mac_address = i.device_mac_address
INNER JOIN
ts_flow t
ON d.switchip = t.sampler_address
WHERE
i.time >= :from_time -- Using the converted timestamp
{where_clause}
ORDER BY
i.time;
"""
correlated_data = pd.read_sql_query(query, conn, params=params)
if correlated_data.empty:
default_response = {
"device_ip_address": "0.0.0.0", # Placeholder IP
"in_errors": 0,
"in_discards": 0,
"out_errors": 0,
"out_discards": 0,
"ifname": req.ifname
or "unknown", # Placeholder or interface provided in the request
"src_addr": "0.0.0.0", # Placeholder source IP
"dst_addr": "0.0.0.0", # Placeholder destination IP
"flow_time": str(
datetime.now(timezone.utc)
), # Current timestamp or placeholder
"interface_time": str(
datetime.now(timezone.utc)
), # Current timestamp or placeholder
}
return [default_response]
logger.info(f"Correlated Packet Drop Data: {correlated_data}")
return correlated_data.to_dict(orient='records')
class FlowPacketErrorCorrelationRequest(BaseModel):
from_time: str = None # Optional natural language timeframe
ifname: str = None # Optional interface name filter
region: str = None # Optional region filter
min_in_errors: int = None
max_in_errors: int = None
min_out_errors: int = None
max_out_errors: int = None
min_in_discards: int = None
max_in_discards: int = None
min_out_discards: int = None
max_out_discards: int = None
@app.post("/packet_errors_impact_flow")
async def packet_errors_impact_flow(
req: FlowPacketErrorCorrelationRequest, res: Response
):
params, filters = load_params(req)
# Join the filters using AND
where_clause = " AND ".join(filters)
if where_clause:
where_clause = "AND " + where_clause
# Step 3: Query the packet errors and flows, correlating by timestamps
query = f"""
SELECT
d.switchip AS device_ip_address,
i.in_errors,
i.in_discards,
i.out_errors,
i.out_discards,
i.ifname,
t.src_addr,
t.dst_addr,
t.src_port,
t.dst_port,
t.packets,
t.time AS flow_time,
i.time AS error_time
FROM
device d
INNER JOIN
interfacestats i
ON d.device_mac_address = i.device_mac_address
INNER JOIN
ts_flow t
ON d.switchip = t.sampler_address
WHERE
i.time >= :from_time
AND ABS(strftime('%s', t.time) - strftime('%s', i.time)) <= 300 -- Correlate within 5 minutes
{where_clause}
ORDER BY
i.time;
"""
correlated_data = pd.read_sql_query(query, conn, params=params)
if correlated_data.empty:
default_response = {
"device_ip_address": "0.0.0.0", # Placeholder IP
"in_errors": 0,
"in_discards": 0,
"out_errors": 0,
"out_discards": 0,
"ifname": req.ifname
or "unknown", # Placeholder or interface provided in the request
"src_addr": "0.0.0.0", # Placeholder source IP
"dst_addr": "0.0.0.0", # Placeholder destination IP
"src_port": 0,
"dst_port": 0,
"packets": 0,
"flow_time": str(
datetime.now(timezone.utc)
), # Current timestamp or placeholder
"error_time": str(
datetime.now(timezone.utc)
), # Current timestamp or placeholder
}
return [default_response]
# Return the correlated data if found
return correlated_data.to_dict(orient='records')

View file

@ -1,4 +0,0 @@
fastapi
uvicorn
pandas
dateparser

View file

@ -1,126 +0,0 @@
default_prompt_endpoint: "127.0.0.1"
load_balancing: "round_robin"
timeout_ms: 5000
overrides:
# confidence threshold for prompt target intent matching
prompt_target_intent_matching_threshold: 0.7
llm_providers:
- name: open-ai-gpt-4
api_key: $OPEN_AI_API_KEY
model: gpt-4
default: true
prompt_targets:
- type: function_resolver
name: interface_down_packet_drop
description: |
Checks for packet drops due to interface unavailability like reboots, shutdowns, or maintainence events. It allows filtering the results by timeframes, interface name, region, and packet error thresholds.
parameters:
- name: from_time
description: An optional natural language timeframe (e.g., "past 7 days", "since a month") to define the time range for packet drop analysis.
required: false
type: string
- name: ifname
description: An optional interface name filter to apply.
required: false
type: string
- name: region
description: An optional region filter to apply (from the device table).
required: false
type: string
- name: min_in_errors
description: Minimum number of in_errors to filter results.
required: false
type: integer
- name: max_in_errors
description: Maximum number of in_errors to filter results.
required: false
type: integer
- name: min_out_errors
description: Minimum number of out_errors to filter results.
required: false
type: integer
- name: max_out_errors
description: Maximum number of out_errors to filter results.
required: false
type: integer
- name: min_in_discards
description: Minimum number of in_discards to filter results.
required: false
type: integer
- name: max_in_discards
description: Maximum number of in_discards to filter results.
required: false
type: integer
- name: min_out_discards
description: Minimum number of out_discards to filter results.
required: false
type: integer
- name: max_out_discards
description: Maximum number of out_discards to filter results.
required: false
type: integer
endpoint:
cluster: api_server
path: /interface_down_packet_drop
system_prompt: |
You are responsible for correlating packet drops with interface down events by analyzing packet errors from the given data.
- type: function_resolver
name: packet_errors_impact_flow
description: |
To find whether packet flows are impacted due to packet errors by correlating the timestamps between the packet errors and the flows. It allows filtering the results by timeframes, interface name, region, and packet error thresholds.
parameters:
- name: from_time
description: An optional natural language timeframe (e.g., "past 7 days", "since a month") to define the time range for the analysis.
required: false
type: string
- name: ifname
description: An optional interface name filter to apply.
required: false
type: string
- name: region
description: An optional region filter to apply (from the device table).
required: false
type: string
- name: min_in_errors
description: Minimum number of in_errors to filter results.
required: false
type: integer
- name: max_in_errors
description: Maximum number of in_errors to filter results.
required: false
type: integer
- name: min_out_errors
description: Minimum number of out_errors to filter results.
required: false
type: integer
- name: max_out_errors
description: Maximum number of out_errors to filter results.
required: false
type: integer
- name: min_in_discards
description: Minimum number of in_discards to filter results.
required: false
type: integer
- name: max_in_discards
description: Maximum number of in_discards to filter results.
required: false
type: integer
- name: min_out_discards
description: Minimum number of out_discards to filter results.
required: false
type: integer
- name: max_out_discards
description: Maximum number of out_discards to filter results.
required: false
type: integer
endpoint:
cluster: api_server
path: /packet_errors_impact_flow
system_prompt: |
You are responsible for finding and correlating packet errors with the packet flows based on timestamps given in the data. This correlation helps identify if packet flows are impacted by packet errors.

View file

@ -1,142 +0,0 @@
services:
config_generator:
build:
context: ../../
dockerfile: config_generator/Dockerfile
volumes:
- ../../arch/envoy.template.yaml:/usr/src/app/envoy.template.yaml
- ./arch_config.yaml:/usr/src/app/arch_config.yaml
- ./generated:/usr/src/app/out
arch:
build:
context: ../../
dockerfile: arch/Dockerfile
hostname: arch
ports:
- "10000:10000"
- "19901:9901"
volumes:
- ./generated/envoy.yaml:/etc/envoy/envoy.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ./arch_config.yaml:/config/arch_config.yaml
depends_on:
config_generator:
condition: service_completed_successfully
model_server:
condition: service_healthy
environment:
- LOG_LEVEL=debug
model_server:
build:
context: ../../model_server
dockerfile: Dockerfile
ports:
- "18081:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
- ./arch_config.yaml:/root/arch_config.yaml
api_server:
build:
context: api_server
dockerfile: Dockerfile
ports:
- "18083:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
function_resolver:
build:
context: ../../function_resolver
dockerfile: Dockerfile
ports:
- "18082:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
environment:
# use ollama endpoint that is hosted by host machine (no virtualization)
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
# uncomment following line to use ollama endpoint that is hosted by docker
# - OLLAMA_ENDPOINT=ollama
ollama:
image: ollama/ollama
container_name: ollama
volumes:
- ./ollama:/root/.ollama
restart: unless-stopped
ports:
- '11434:11434'
profiles:
- manual
open-webui:
image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main}
container_name: open-webui
volumes:
- ./open-webui:/app/backend/data
# depends_on:
# - ollama
ports:
- 18090:8080
environment:
- OLLAMA_BASE_URL=http://${OLLAMA_ENDPOINT:-host.docker.internal}:11434
- WEBUI_AUTH=false
extra_hosts:
- host.docker.internal:host-gateway
restart: unless-stopped
profiles:
- monitoring
chatbot_ui:
build:
context: ../../chatbot_ui
dockerfile: Dockerfile
ports:
- "18080:8080"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- CHAT_COMPLETION_ENDPOINT=http://arch:10000/v1
prometheus:
image: prom/prometheus
container_name: prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yaml'
ports:
- 9090:9090
restart: unless-stopped
volumes:
- ./prometheus:/etc/prometheus
- ./prom_data:/prometheus
profiles:
- monitoring
grafana:
image: grafana/grafana
container_name: grafana
ports:
- 3000:3000
restart: unless-stopped
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=grafana
volumes:
- ./grafana:/etc/grafana/provisioning/datasources
- ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml
- ./grafana/dashboards:/var/lib/grafana/dashboards
profiles:
- monitoring

View file

@ -1,12 +0,0 @@
apiVersion: 1
providers:
- name: "Dashboard provider"
orgId: 1
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: false
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: true

View file

@ -1,355 +0,0 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_internal_upstream_rq_time_sum[1m]) / rate(envoy_cluster_internal_upstream_rq_time_count[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "request latency - internal (ms)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_external_upstream_rq_time_sum[1m]) / rate(envoy_cluster_external_upstream_rq_time_count[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "request latency - external (ms)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_internal_upstream_rq_completed[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_external_upstream_rq_completed[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "B",
"useBackend": false
}
],
"title": "Upstream request count",
"type": "timeseries"
}
],
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Intelligent Gateway Overview",
"uid": "adt6uhx5lk8aob",
"version": 3,
"weekStart": ""
}

View file

@ -1,9 +0,0 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus:9090
isDefault: true
access: proxy
editable: true

View file

@ -1,23 +0,0 @@
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: []
scheme: http
timeout: 10s
api_version: v1
scrape_configs:
- job_name: envoy
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 10s
metrics_path: /stats
scheme: http
static_configs:
- targets:
- arch:9901
params:
format: ['prometheus']

View file

@ -1,43 +0,0 @@
default_prompt_endpoint: "127.0.0.1"
load_balancing: "round_robin"
timeout_ms: 5000
# should not be here
embedding_provider:
name: "bge-large-en-v1.5"
model: "BAAI/bge-large-en-v1.5"
llm_providers:
- name: open-ai-gpt-4
api_key: $OPENAI_API_KEY
model: gpt-4
default: true
prompt_guards:
input_guards:
jailbreak:
on_exception_message: Looks like you are curious about my jailbreak detection abilities.
toxicity:
on_exception_message: Looks like you are curious about my toxicity detection abilities.
prompt_targets:
- type: function_resolver
name: weather_forecast
description: This function resolver provides weather forecast information for a given city.
parameters:
- name: city
required: true
description: The city for which the weather forecast is requested.
- name: days
description: The number of days for which the weather forecast is requested.
- name: units
description: The units in which the weather forecast is requested.
endpoint:
cluster: weatherhost
path: /weather
system_prompt: |
You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
- Use farenheight for temperature
- Use miles per hour for wind speed

View file

@ -1,94 +0,0 @@
services:
config_generator:
build:
context: ../../
dockerfile: config_generator/Dockerfile
volumes:
- ../../arch/envoy.template.yaml:/usr/src/app/envoy.template.yaml
- ./arch_config.yaml:/usr/src/app/arch_config.yaml
- ./generated:/usr/src/app/out
arch:
build:
context: ../../
dockerfile: arch/Dockerfile
hostname: arch
ports:
- "10000:10000"
- "19901:9901"
volumes:
- ./generated/envoy.yaml:/etc/envoy/envoy.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ./arch_config.yaml:/config/arch_config.yaml
depends_on:
config_generator:
condition: service_completed_successfully
model_server:
condition: service_healthy
environment:
- LOG_LEVEL=debug
model_server:
build:
context: ../../model_server
dockerfile: Dockerfile
ports:
- "18081:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
- ./arch_config.yaml:/root/arch_config.yaml
# Uncomment following lines to enable GPU support
# deploy:
# resources:
# reservations:
# devices:
# - capabilities: [gpu]
# runtime: nvidia # Enables GPU support
# environment:
# - NVIDIA_VISIBLE_DEVICES=all # Use all available GPUs
function_resolver:
build:
context: ../../function_resolver
dockerfile: Dockerfile
ports:
- "18082:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
environment:
# use ollama endpoint that is hosted by host machine (no virtualization)
- OLLAMA_ENDPOINT=host.docker.internal
# uncomment following line to use ollama endpoint that is hosted by docker
# - OLLAMA_ENDPOINT=ollama
ollama:
image: ollama/ollama
container_name: ollama
volumes:
- ./ollama:/root/.ollama
restart: unless-stopped
ports:
- '11434:11434'
profiles:
- manual
chatbot_ui:
build:
context: ../../chatbot_ui
dockerfile: Dockerfile
ports:
- "18080:8080"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- CHAT_COMPLETION_ENDPOINT=http://arch:10000/v1

View file

@ -1,9 +1,7 @@
version: v0.1
listen:
address: 0.0.0.0 # or 127.0.0.1
port: 10000
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
listener:
address: 127.0.0.1
port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
message_format: huggingface
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
@ -13,25 +11,42 @@ llm_providers:
access_key: OPENAI_API_KEY
model: gpt-4o
default: true
stream: true
# default system prompt used by all prompt targets
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
system_prompt: |
You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
prompt_targets:
- name: reboot_devices
description: Reboot specific devices or device groups
path: /agent/device_reboot
parameters:
- name: device_ids
type: list
description: A list of device identifiers (IDs) to reboot.
required: false
- name: device_group
type: str
description: The name of the device group to reboot
required: false
- name: network_qa
endpoint:
name: app_server
path: /agent/network_summary
description: Handle general Q/A related to networking.
default: true
- name: reboot_devices
description: Reboot specific devices or device groups
endpoint:
name: app_server
path: /agent/device_reboot
parameters:
- name: device_ids
type: list
description: A list of device identifiers (IDs) to reboot.
required: true
- name: device_summary
description: Retrieve statistics for specific devices within a time range
endpoint:
name: app_server
path: /agent/device_summary
parameters:
- name: device_ids
type: list
description: A list of device identifiers (IDs) to retrieve statistics for.
required: true # device_ids are required to get device statistics
- name: time_range
type: int
description: Time range in days for which to gather device statistics. Defaults to 7.
default: "7"
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
endpoints:
@ -39,6 +54,6 @@ endpoints:
# value could be ip address or a hostname with port
# this could also be a list of endpoints for load balancing
# for example endpoint: [ ip1:port, ip2:port ]
endpoint: 127.0.0.1:80
endpoint: host.docker.internal:18083
# max time to wait for a connection to be established
connect_timeout: 0.005s

View file

@ -35,11 +35,11 @@ def start_server():
print("Server is already running. Use 'model_server restart' to restart it.")
sys.exit(1)
print(f"Starting Archgw Model Server")
print(f"Starting Archgw Model Server - Loading some awesomeness, this may take a little time.)")
process = subprocess.Popen(
["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "51000"],
start_new_session=True,
stdout=subprocess.DEVNULL, # Suppress standard output. There is a logger that model_server prints to
stdout=subprocess.DEVNULL, # Suppress standard output. There is a logger that model_server prints to
stderr=subprocess.DEVNULL, # Suppress standard error. There is a logger that model_server prints to
)

View file

@ -7,6 +7,7 @@ from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForSequenc
def get_device():
if torch.cuda.is_available():
device = "cuda"
elif torch.backends.mps.is_available():
@ -14,10 +15,12 @@ def get_device():
else:
device = "cpu"
print(f"Devices Avialble: {device}")
return device
def load_transformers(model_name=os.getenv("MODELS", "katanemo/bge-large-en-v1.5-onnx")):
print("Loading Embedding Model")
transformers = {}
device = get_device()
transformers["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
@ -33,6 +36,7 @@ def load_guard_model(
model_name,
hardware_config="cpu",
):
print("Loading Guard Model")
guard_model = {}
guard_model["tokenizer"] = AutoTokenizer.from_pretrained(
model_name, trust_remote_code=True
@ -58,9 +62,7 @@ def load_guard_model(
return guard_model
def load_zero_shot_models(
model_name=os.getenv("ZERO_SHOT_MODELS", "katanemo/deberta-base-nli-onnx")
):
def load_zero_shot_models(model_name=os.getenv("ZERO_SHOT_MODELS", "katanemo/deberta-base-nli-onnx")):
zero_shot_model = {}
device = get_device()
zero_shot_model["model"] = ORTModelForSequenceClassification.from_pretrained(
@ -79,6 +81,5 @@ def load_zero_shot_models(
return zero_shot_model
if __name__ == "__main__":
print(get_device())

View file

@ -26,6 +26,7 @@ guard_model_config = load_yaml_config("guard_model_config.yaml")
mode = os.getenv("MODE", "cloud")
logger.info(f"Serving model mode: {mode}")
print(f"Serving model mode: {mode}")
if mode not in ["cloud", "local-gpu", "local-cpu"]:
raise ValueError(f"Invalid mode: {mode}")
if mode == "local-cpu":