diff --git a/arch/docker-compose.yaml b/arch/docker-compose.yaml index 750ef19d..582e5a2f 100644 --- a/arch/docker-compose.yaml +++ b/arch/docker-compose.yaml @@ -7,5 +7,6 @@ services: volumes: - ${ARCH_CONFIG_FILE:-./demos/function_calling/arch_confg.yaml}:/config/arch_config.yaml - /etc/ssl/cert.pem:/etc/ssl/cert.pem + - ~/archgw_logs/arch_logs:/var/log/ env_file: - stage.env diff --git a/arch/src/http.rs b/arch/src/http.rs index 93cf5118..21380b0f 100644 --- a/arch/src/http.rs +++ b/arch/src/http.rs @@ -2,9 +2,10 @@ use crate::stats::{Gauge, IncrementingMetric}; use derivative::Derivative; use log::debug; use proxy_wasm::{traits::Context, types::Status}; +use serde::Serialize; use std::{cell::RefCell, collections::HashMap, fmt::Debug, time::Duration}; -#[derive(Derivative)] +#[derive(Derivative, Serialize)] #[derivative(Debug)] pub struct CallArgs<'a> { upstream: &'a str, diff --git a/demos/employee_details_copilot_arch/Bolt-FC-1B-Q4_K_M.model_file b/demos/employee_details_copilot_arch/Bolt-FC-1B-Q4_K_M.model_file deleted file mode 100644 index 1def85b1..00000000 --- a/demos/employee_details_copilot_arch/Bolt-FC-1B-Q4_K_M.model_file +++ /dev/null @@ -1,24 +0,0 @@ -FROM Bolt-Function-Calling-1B-Q4_K_M.gguf - -# Set the size of the context window used to generate the next token -PARAMETER num_ctx 4096 - -# Set parameters for response generation -PARAMETER num_predict 1024 -PARAMETER temperature 0.1 -PARAMETER top_p 0.5 -PARAMETER top_k 32022 -PARAMETER repeat_penalty 1.0 -PARAMETER stop "<|EOT|>" - -# Set the random number seed to use for generation -PARAMETER seed 42 - -# Set the prompt template to be passed into the model -TEMPLATE """{{ if .System }}<|begin▁of▁sentence|> -{{ .System }} -{{ end }}{{ if .Prompt }}### Instruction: -{{ .Prompt }} -{{ end }}### Response: -{{ .Response }} -<|EOT|>""" diff --git a/demos/employee_details_copilot_arch/README.md b/demos/employee_details_copilot_arch/README.md deleted file mode 100644 index 44d7d60b..00000000 --- a/demos/employee_details_copilot_arch/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Function calling -This demo shows how you can use intelligent prompt gateway as copilot to explore employee data by calling the correct api functions. It calls appropriate function and also engages with user to extract required parameters. This demo assumes you are using ollama natively. - -# Starting the demo -1. Create `.env` file and set OpenAI key using env var `OPENAI_API_KEY` -1. Start services - ```sh - docker compose up - ``` -1. Download Bolt-FC model. This demo assumes we have downloaded [Bolt-Function-Calling-1B:Q4_K_M](https://huggingface.co/katanemolabs/Bolt-Function-Calling-1B.gguf/blob/main/Bolt-Function-Calling-1B-Q4_K_M.gguf) to local folder. -1. If running ollama natively run - ```sh - ollama serve - ``` -2. Create model file in ollama repository - ```sh - ollama create Bolt-Function-Calling-1B:Q4_K_M -f Bolt-FC-1B-Q4_K_M.model_file - ``` -3. Navigate to http://localhost:18080/ -4. You can type in queries like "show me the top 5 employees in each department with highest salary" - - You can also ask follow up questions like "just show the top 2" -5. To see metrics navigate to "http://localhost:3000/" (use admin/grafana for login) - - Open up dahsboard named "Intelligent Gateway Overview" - - On this dashboard you can see reuqest latency and number of requests diff --git a/demos/employee_details_copilot_arch/api_server/.vscode/launch.json b/demos/employee_details_copilot_arch/api_server/.vscode/launch.json deleted file mode 100644 index 4d9c76d4..00000000 --- a/demos/employee_details_copilot_arch/api_server/.vscode/launch.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "name": "function-calling api server", - "cwd": "${workspaceFolder}/app", - "type": "debugpy", - "request": "launch", - "module": "uvicorn", - "args": ["main:app","--reload", "--port", "8001"], - } - ] -} diff --git a/demos/employee_details_copilot_arch/api_server/Dockerfile b/demos/employee_details_copilot_arch/api_server/Dockerfile deleted file mode 100644 index abd21357..00000000 --- a/demos/employee_details_copilot_arch/api_server/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -FROM python:3 AS base - -FROM base AS builder - -WORKDIR /src - -COPY requirements.txt /src/ -RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt - -COPY . /src - -FROM python:3-slim AS output - -COPY --from=builder /runtime /usr/local - -COPY /app /app -WORKDIR /app - -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"] diff --git a/demos/employee_details_copilot_arch/api_server/app/main.py b/demos/employee_details_copilot_arch/api_server/app/main.py deleted file mode 100644 index 11d36758..00000000 --- a/demos/employee_details_copilot_arch/api_server/app/main.py +++ /dev/null @@ -1,289 +0,0 @@ -import random -from typing import List -from fastapi import FastAPI, HTTPException, Response -from datetime import datetime, date, timedelta, timezone -import logging -from pydantic import BaseModel -from utils import load_sql -import pandas as pd - - -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') -logger = logging.getLogger(__name__) - -app = FastAPI() - -@app.get("/healthz") -async def healthz(): - return { - "status": "ok" - } - -conn = load_sql() -name_col = "name" - - -class TopEmployees(BaseModel): - grouping: str - ranking_criteria: str - top_n: int - - -@app.post("/top_employees") -async def top_employees(req: TopEmployees, res: Response): - name_col = "name" - # Check if `req.ranking_criteria` is a Text object and extract its value accordingly - logger.info( - f"{'* ' * 50}\n\nCaptured Ranking Criteria: {req.ranking_criteria}\n\n{'* ' * 50}" - ) - - if req.ranking_criteria == "yoe": - req.ranking_criteria = "years_of_experience" - elif req.ranking_criteria == "rating": - req.ranking_criteria = "performance_score" - - logger.info( - f"{'* ' * 50}\n\nFinal Ranking Criteria: {req.ranking_criteria}\n\n{'* ' * 50}" - ) - - query = f""" - SELECT {req.grouping}, {name_col}, {req.ranking_criteria} - FROM ( - SELECT {req.grouping}, {name_col}, {req.ranking_criteria}, - DENSE_RANK() OVER (PARTITION BY {req.grouping} ORDER BY {req.ranking_criteria} DESC) as emp_rank - FROM employees - ) ranked_employees - WHERE emp_rank <= {req.top_n}; - """ - result_df = pd.read_sql_query(query, conn) - result = result_df.to_dict(orient="records") - return result - - -class AggregateStats(BaseModel): - grouping: str - aggregate_criteria: str - aggregate_type: str - - -@app.post("/aggregate_stats") -async def aggregate_stats(req: AggregateStats, res: Response): - logger.info( - f"{'* ' * 50}\n\nCaptured Aggregate Criteria: {req.aggregate_criteria}\n\n{'* ' * 50}" - ) - - if req.aggregate_criteria == "yoe": - req.aggregate_criteria = "years_of_experience" - - logger.info( - f"{'* ' * 50}\n\nFinal Aggregate Criteria: {req.aggregate_criteria}\n\n{'* ' * 50}" - ) - - logger.info( - f"{'* ' * 50}\n\nCaptured Aggregate Type: {req.aggregate_type}\n\n{'* ' * 50}" - ) - if req.aggregate_type.lower() not in ["sum", "avg", "min", "max"]: - if req.aggregate_type.lower() == "count": - req.aggregate_type = "COUNT" - elif req.aggregate_type.lower() == "total": - req.aggregate_type = "SUM" - elif req.aggregate_type.lower() == "average": - req.aggregate_type = "AVG" - elif req.aggregate_type.lower() == "minimum": - req.aggregate_type = "MIN" - elif req.aggregate_type.lower() == "maximum": - req.aggregate_type = "MAX" - else: - raise HTTPException(status_code=400, detail="Invalid aggregate type") - - logger.info( - f"{'* ' * 50}\n\nFinal Aggregate Type: {req.aggregate_type}\n\n{'* ' * 50}" - ) - - query = f""" - SELECT {req.grouping}, {req.aggregate_type}({req.aggregate_criteria}) as {req.aggregate_type}_{req.aggregate_criteria} - FROM employees - GROUP BY {req.grouping}; - """ - result_df = pd.read_sql_query(query, conn) - result = result_df.to_dict(orient="records") - return result - -# 1. Top Employees by Performance, Projects, and Timeframe -class TopEmployeesProjects(BaseModel): - min_performance_score: float - min_years_experience: int - department: str - min_project_count: int = None # Optional - months_range: int = None # Optional (for filtering recent projects) - - -@app.post("/top_employees_projects") -async def employees_projects(req: TopEmployeesProjects, res: Response): - params, filters = {}, [] - - # Add optional months_range filter - if req.months_range: - params['months_range'] = req.months_range - filters.append(f"p.start_date >= DATE('now', '-{req.months_range} months')") - - # Add project count filter if provided - if req.min_project_count: - filters.append(f"COUNT(p.project_id) >= {req.min_project_count}") - - where_clause = " AND ".join(filters) - if where_clause: - where_clause = "AND " + where_clause - - query = f""" - SELECT e.name, e.department, e.years_of_experience, e.performance_score, COUNT(p.project_id) as project_count - FROM employees e - LEFT JOIN projects p ON e.eid = p.eid - WHERE e.performance_score >= {req.min_performance_score} - AND e.years_of_experience >= {req.min_years_experience} - AND e.department = '{req.department}' - {where_clause} - GROUP BY e.eid, e.name, e.department, e.years_of_experience, e.performance_score - ORDER BY e.performance_score DESC; - """ - - result_df = pd.read_sql_query(query, conn, params=params) - return result_df.to_dict(orient='records') - - -# 2. Employees with Salary Growth Since Last Promotion -class SalaryGrowthRequest(BaseModel): - min_salary_increase_percentage: float - department: str = None # Optional - - -@app.post("/salary_growth") -async def salary_growth(req: SalaryGrowthRequest, res: Response): - params, filters = {}, [] - - if req.department: - filters.append("e.department = :department") - params['department'] = req.department - - where_clause = " AND ".join(filters) - if where_clause: - where_clause = "AND " + where_clause - - query = f""" - SELECT e.name, e.department, s.salary_increase_percentage - FROM employees e - JOIN salary_history s ON e.eid = s.eid - WHERE s.salary_increase_percentage >= {req.min_salary_increase_percentage} - AND s.promotion_date IS NOT NULL - {where_clause} - ORDER BY s.salary_increase_percentage DESC; - """ - - result_df = pd.read_sql_query(query, conn, params=params) - return result_df.to_dict(orient='records') - - -# 4. Employees with Promotions and Salary Increases -class PromotionsIncreasesRequest(BaseModel): - year: int - min_salary_increase_percentage: float = None # Optional - department: str = None # Optional - - -@app.post("/promotions_increases") -async def promotions_increases(req: PromotionsIncreasesRequest, res: Response): - params, filters = {}, [] - - if req.min_salary_increase_percentage: - filters.append(f"s.salary_increase_percentage >= {req.min_salary_increase_percentage}") - - if req.department: - filters.append("e.department = :department") - params['department'] = req.department - - where_clause = " AND ".join(filters) - if where_clause: - where_clause = "AND " + where_clause - - query = f""" - SELECT e.name, e.department, s.salary_increase_percentage, s.promotion_date - FROM employees e - JOIN salary_history s ON e.eid = s.eid - WHERE strftime('%Y', s.promotion_date) = '{req.year}' - {where_clause} - ORDER BY s.salary_increase_percentage DESC; - """ - - result_df = pd.read_sql_query(query, conn, params=params) - return result_df.to_dict(orient='records') - - -# 5. Employees with Highest Average Project Performance -class AvgProjPerformanceRequest(BaseModel): - min_project_count: int - min_performance_score: float - department: str = None # Optional - - -@app.post("/avg_project_performance") -async def avg_project_performance(req: AvgProjPerformanceRequest, res: Response): - params, filters = {}, [] - - if req.department: - filters.append("e.department = :department") - params['department'] = req.department - - filters.append(f"p.performance_score >= {req.min_performance_score}") - - where_clause = " AND ".join(filters) - - query = f""" - SELECT e.name, e.department, AVG(p.performance_score) as avg_performance_score, COUNT(p.project_id) as project_count - FROM employees e - JOIN projects p ON e.eid = p.eid - WHERE {where_clause} - GROUP BY e.eid, e.name, e.department - HAVING COUNT(p.project_id) >= {req.min_project_count} - ORDER BY avg_performance_score DESC; - """ - - result_df = pd.read_sql_query(query, conn, params=params) - return result_df.to_dict(orient='records') - - -# 6. Employees by Certification and Years of Experience -class CertificationsExperienceRequest(BaseModel): - certifications: List[str] - min_years_experience: int - department: str = None # Optional - -@app.post("/employees_certifications_experience") -async def certifications_experience(req: CertificationsExperienceRequest, res: Response): - # Convert the list of certifications into a format for SQL query - certs_filter = ', '.join([f"'{cert}'" for cert in req.certifications]) - - params, filters = {}, [] - - # Add department filter if provided - if req.department: - filters.append("e.department = :department") - params['department'] = req.department - - filters.append("e.years_of_experience >= :min_years_experience") - params['min_years_experience'] = req.min_years_experience - - where_clause = " AND ".join(filters) - - query = f""" - SELECT e.name, e.department, e.years_of_experience, COUNT(c.certification_name) as cert_count - FROM employees e - JOIN certifications c ON e.eid = c.eid - WHERE c.certification_name IN ({certs_filter}) - AND {where_clause} - GROUP BY e.eid, e.name, e.department, e.years_of_experience - HAVING COUNT(c.certification_name) = {len(req.certifications)} - ORDER BY e.years_of_experience DESC; - """ - - result_df = pd.read_sql_query(query, conn, params=params) - return result_df.to_dict(orient='records') diff --git a/demos/employee_details_copilot_arch/api_server/app/utils.py b/demos/employee_details_copilot_arch/api_server/app/utils.py deleted file mode 100644 index 3db7b8f9..00000000 --- a/demos/employee_details_copilot_arch/api_server/app/utils.py +++ /dev/null @@ -1,157 +0,0 @@ -import pandas as pd -import random -import datetime -import sqlite3 - -def load_sql(): - # Example Usage - conn = sqlite3.connect(":memory:") - - # create and load the employees table - generate_employee_data(conn) - - # create and load the projects table - generate_project_data(conn) - - # create and load the salary_history table - generate_salary_history(conn) - - # create and load the certifications table - generate_certifications(conn) - - return conn - -# Function to generate random employee data with `eid` as the primary key -def generate_employee_data(conn): - # List of possible names, positions, departments, and locations - names = [ - "Alice", - "Bob", - "Charlie", - "David", - "Eve", - "Frank", - "Grace", - "Hank", - "Ivy", - "Jack", - ] - positions = [ - "Manager", - "Engineer", - "Salesperson", - "HR Specialist", - "Marketing Analyst", - ] - # List of possible names, positions, departments, locations, and certifications - names = ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Hank", "Ivy", "Jack"] - positions = ["Manager", "Engineer", "Salesperson", "HR Specialist", "Marketing Analyst"] - departments = ["Engineering", "Marketing", "HR", "Sales", "Finance"] - locations = ["New York", "San Francisco", "Austin", "Boston", "Chicago"] - certifications = ["AWS Certified", "Google Cloud Certified", "PMP", "Scrum Master", "Cisco Certified"] - - # Generate random hire dates - def random_hire_date(): - start_date = datetime.date(2000, 1, 1) - end_date = datetime.date(2023, 12, 31) - time_between_dates = end_date - start_date - days_between_dates = time_between_dates.days - random_number_of_days = random.randrange(days_between_dates) - return start_date + datetime.timedelta(days=random_number_of_days) - - # Generate random employee records with an employee ID (eid) - employees = [] - for eid in range(1, 101): # 100 employees with `eid` starting from 1 - name = random.choice(names) - position = random.choice(positions) - salary = round(random.uniform(50000, 150000), 2) # Salary between 50,000 and 150,000 - department = random.choice(departments) - location = random.choice(locations) - hire_date = random_hire_date() - performance_score = round(random.uniform(1, 5), 2) # Performance score between 1.0 and 5.0 - years_of_experience = random.randint(1, 30) # Years of experience between 1 and 30 - - employee = { - "eid": eid, # Employee ID - "name": name, - "position": position, - "salary": salary, - "department": department, - "location": location, - "hire_date": hire_date, - "performance_score": performance_score, - "years_of_experience": years_of_experience - } - - employees.append(employee) - - # Convert the list of dictionaries to a DataFrame and save to DB - df_employees = pd.DataFrame(employees) - df_employees.to_sql('employees', conn, index=False, if_exists='replace') - -# Function to generate random project data with `eid` -def generate_project_data(conn): - employees = pd.read_sql_query("SELECT eid FROM employees", conn) - projects = [] - - for _ in range(500): # 500 projects - eid = random.choice(employees['eid']) - project_name = f"Project_{random.randint(1, 100)}" - start_date = datetime.date(2020, 1, 1) + datetime.timedelta(days=random.randint(0, 365 * 3)) # Within the last 3 years - performance_score = round(random.uniform(1, 5), 2) # Performance score for the project between 1.0 and 5.0 - - project = { - "eid": eid, # Foreign key from employees table - "project_name": project_name, - "start_date": start_date, - "performance_score": performance_score - } - - projects.append(project) - - # Convert the list of dictionaries to a DataFrame and save to DB - df_projects = pd.DataFrame(projects) - df_projects.to_sql('projects', conn, index=False, if_exists='replace') - -# Function to generate random salary history data with `eid` -def generate_salary_history(conn): - employees = pd.read_sql_query("SELECT eid FROM employees", conn) - salary_history = [] - - for _ in range(300): # 300 salary records - eid = random.choice(employees['eid']) - salary_increase_percentage = round(random.uniform(5, 30), 2) # Salary increase between 5% and 30% - promotion_date = datetime.date(2018, 1, 1) + datetime.timedelta(days=random.randint(0, 365 * 5)) # Promotions in the last 5 years - - salary_record = { - "eid": eid, # Foreign key from employees table - "salary_increase_percentage": salary_increase_percentage, - "promotion_date": promotion_date - } - - salary_history.append(salary_record) - - # Convert the list of dictionaries to a DataFrame and save to DB - df_salary_history = pd.DataFrame(salary_history) - df_salary_history.to_sql('salary_history', conn, index=False, if_exists='replace') - -# Function to generate random certifications data with `eid` -def generate_certifications(conn): - employees = pd.read_sql_query("SELECT eid FROM employees", conn) - certifications_list = ["AWS Certified", "Google Cloud Certified", "PMP", "Scrum Master", "Cisco Certified"] - employee_certifications = [] - - for _ in range(300): # 300 certification records - eid = random.choice(employees['eid']) - certification = random.choice(certifications_list) - - cert_record = { - "eid": eid, # Foreign key from employees table - "certification_name": certification - } - - employee_certifications.append(cert_record) - - # Convert the list of dictionaries to a DataFrame and save to DB - df_certifications = pd.DataFrame(employee_certifications) - df_certifications.to_sql('certifications', conn, index=False, if_exists='replace') diff --git a/demos/employee_details_copilot_arch/api_server/requirements.txt b/demos/employee_details_copilot_arch/api_server/requirements.txt deleted file mode 100644 index bfc7be35..00000000 --- a/demos/employee_details_copilot_arch/api_server/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -fastapi -uvicorn -pandas -dateparser diff --git a/demos/employee_details_copilot_arch/bolt_config.yaml b/demos/employee_details_copilot_arch/bolt_config.yaml deleted file mode 100644 index 3b4b2a01..00000000 --- a/demos/employee_details_copilot_arch/bolt_config.yaml +++ /dev/null @@ -1,197 +0,0 @@ -default_prompt_endpoint: "127.0.0.1" -load_balancing: "round_robin" -timeout_ms: 5000 - -overrides: - # confidence threshold for prompt target intent matching - prompt_target_intent_matching_threshold: 0.7 - -llm_providers: - - - name: open-ai-gpt-4 - api_key: $OPEN_AI_API_KEY - model: gpt-4 - default: true - -prompt_targets: - - - type: function_resolver - name: top_employees - description: | - Allows you to find the top employees in different groups, such as departments, locations, or position. You can rank the employees by different criteria, like salary, yoe, or rating. Returns the best-ranked employees for each group, helping you identify top n in the list. - parameters: - - name: grouping - description: | - Select how you'd like to group the employees. For example, you can group them by department, location, or their position. The tool will provide the top-ranked employees within each group you choose. - required: true - type: string - enum: [department, location, position] - - name: ranking_criteria - required: true - type: string - description: | - Choose how you'd like to rank the employees. You can rank them by their salary, their years of experience, or their rating. The tool will sort the employees based on this ranking and return the best ones from each group. - enum: [salary, years_of_experience, performance_score] - - name: top_n - required: true - type: integer - description: | - Enter how many of the top employees you want to see in each group. For example, if you enter 3, the tool will show you the top 3 employees for each group you selected. - endpoint: - cluster: api_server - path: /top_employees - system_prompt: | - You are responsible for retrieving the top N employees per group ranked by a constraint. - - - type: function_resolver - name: aggregate_stats - description: | - Calculate summary statistics for groups of employees. You can group employees by categories like department or location and then compute totals, averages, or other statistics for specific attributes such as salary or years of experience. - parameters: - - name: grouping - description: | - Choose how you'd like to organize the employees. For example, you can group them by department, location, or position. The tool will calculate the summary statistics for each group. - required: true - enum: [department, location, position] - - name: aggregate_criteria - description: | - Select the specific attribute you'd like to analyze. This could be something like salary, years of experience, or rating. The tool will calculate the statistic you request for this attribute. - required: true - enum: [salary, years_of_experience, performance_score] - - name: aggregate_type - description: | - Choose the type of statistic you'd like to calculate for the selected attribute. For example, you can calculate the sum, average, minimum, or maximum value for each group. - required: true - enum: [SUM, AVG, MIN, MAX] - endpoint: - cluster: api_server - path: /aggregate_stats - system_prompt: | - You help calculate summary statistics for groups of employees. First, organize the employees by the specified grouping (e.g., department, location, or position). Then, compute the requested statistic (e.g., total, average, minimum, or maximum) for a specific attribute like salary, experience, or rating. - - # 1. Top Employees by Performance, Projects, and Timeframe - - type: function_resolver - name: employees_projects - description: | - Fetch employees with the highest performance scores, considering their project participation and years of experience. You can filter by minimum performance score, years of experience, and department. Optionally, you can also filter by recent project participation within the last Y months. - parameters: - - name: min_performance_score - description: Minimum performance score to filter employees. - required: true - type: float - - name: min_years_experience - description: Minimum years of experience to filter employees. - required: true - type: integer - - name: department - description: Department to filter employees by. - required: true - type: string - - name: min_project_count - description: Minimum number of projects employees participated in (optional). - required: false - type: integer - - name: months_range - description: Timeframe (in months) for filtering recent projects (optional). - required: false - type: integer - endpoint: - cluster: api_server - path: /employees_projects - system_prompt: | - You are responsible for retrieving the top N employees ranked by performance and project participation. Use filters for experience and optional project criteria. - - - # 2. Employees with Salary Growth Since Last Promotion - - type: function_resolver - name: salary_growth - description: | - Fetch employees with the highest salary growth since their last promotion, grouped by department. You can filter by a minimum salary increase percentage and department. - parameters: - - name: min_salary_increase_percentage - description: Minimum percentage increase in salary since the last promotion. - required: true - type: float - - name: department - description: Department to filter employees by (optional). - required: false - type: string - endpoint: - cluster: api_server - path: /salary_growth - system_prompt: | - You are responsible for retrieving employees with the highest salary growth since their last promotion. Filter by minimum salary increase percentage and department. - - # 4. Employees with Promotions and Salary Increases by Year - - type: function_resolver - name: promotions_increases - description: | - Fetch employees who were promoted and received a salary increase in a specific year, grouped by department. You can optionally filter by minimum percentage salary increase and department. - parameters: - - name: year - description: The year in which the promotion and salary increase occurred. - required: true - type: integer - - name: min_salary_increase_percentage - description: Minimum percentage salary increase to filter employees. - required: false - type: float - - name: department - description: Department to filter by (optional). - required: false - type: string - endpoint: - cluster: api_server - path: /promotions_increases - system_prompt: | - You are responsible for fetching employees who were promoted and received a salary increase in a specific year. Apply filters for salary increase percentage and department. - - - # 5. Employees with Highest Average Project Performance - - type: function_resolver - name: avg_project_performance - description: | - Fetch employees with the highest average performance across all projects they have worked on over time. You can filter by minimum project count, department, and minimum performance score. - parameters: - - name: min_project_count - description: Minimum number of projects an employee must have participated in. - required: true - type: integer - - name: min_performance_score - description: Minimum performance score to filter employees. - required: true - type: float - - name: department - description: Department to filter by (optional). - required: false - type: string - endpoint: - cluster: api_server - path: /avg_project_performance - system_prompt: | - You are responsible for fetching employees with the highest average performance across all projects they’ve worked on. Apply filters for minimum project count, performance score, and department. - - - # 6. Employees by Certification and Years of Experience - - type: function_resolver - name: certifications_experience - description: | - Fetch employees who have all the required certifications and meet the minimum years of experience. You can filter by department and provide a list of certifications to match. - parameters: - - name: certifications - description: List of required certifications. - required: true - type: list - - name: min_years_experience - description: Minimum years of experience. - required: true - type: integer - - name: department - description: Department to filter employees by (optional). - required: false - type: string - endpoint: - cluster: api_server - path: /certifications_experience - system_prompt: | - You are responsible for fetching employees who have the required certifications and meet the minimum years of experience. Optionally, filter by department. diff --git a/demos/employee_details_copilot_arch/docker-compose.yaml b/demos/employee_details_copilot_arch/docker-compose.yaml deleted file mode 100644 index ffa1d0d5..00000000 --- a/demos/employee_details_copilot_arch/docker-compose.yaml +++ /dev/null @@ -1,143 +0,0 @@ -services: - - config_generator: - build: - context: ../../ - dockerfile: config_generator/Dockerfile - volumes: - - ../../arch/envoy.template.yaml:/usr/src/app/envoy.template.yaml - - ./arch_config.yaml:/usr/src/app/arch_config.yaml - - ./generated:/usr/src/app/out - - arch: - build: - context: ../../ - dockerfile: arch/Dockerfile - hostname: arch - ports: - - "10010:10000" - - "19911:9901" - volumes: - - ./generated/envoy.yaml:/etc/envoy/envoy.yaml - - /etc/ssl/cert.pem:/etc/ssl/cert.pem - - ./arch_config.yaml:/config/arch_config.yaml - depends_on: - config_generator: - condition: service_completed_successfully - model_server: - condition: service_healthy - environment: - - LOG_LEVEL=debug - - model_server: - build: - context: ../../model_server - dockerfile: Dockerfile - ports: - - "18091:80" - healthcheck: - test: ["CMD", "curl" ,"http://localhost:80/healthz"] - interval: 5s - retries: 20 - volumes: - - ~/.cache/huggingface:/root/.cache/huggingface - - ./arch_config.yaml:/root/arch_config.yaml - - api_server: - build: - context: api_server - dockerfile: Dockerfile - ports: - - "18093:80" - healthcheck: - test: ["CMD", "curl" ,"http://localhost:80/healthz"] - interval: 5s - retries: 20 - - function_resolver: - build: - context: ../../function_resolver - dockerfile: Dockerfile - ports: - - "18092:80" - healthcheck: - test: ["CMD", "curl" ,"http://localhost:80/healthz"] - interval: 5s - retries: 20 - volumes: - - ~/.cache/huggingface:/root/.cache/huggingface - environment: - # use ollama endpoint that is hosted by host machine (no virtualization) - - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal} - # uncomment following line to use ollama endpoint that is hosted by docker - # - OLLAMA_ENDPOINT=ollama - - OLLAMA_MODEL=Arch-Function-Calling-1.5B:Q4_K_M - - ollama: - image: ollama/ollama - container_name: ollama - volumes: - - ./ollama:/root/.ollama - restart: unless-stopped - ports: - - '11444:11434' - profiles: - - manual - - open-webui: - image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main} - container_name: open-webui - volumes: - - ./open-webui:/app/backend/data - # depends_on: - # - ollama - ports: - - 18100:8080 - environment: - - OLLAMA_BASE_URL=http://${OLLAMA_ENDPOINT:-host.docker.internal}:11434 - - WEBUI_AUTH=false - extra_hosts: - - host.docker.internal:host-gateway - restart: unless-stopped - profiles: - - monitoring - - chatbot_ui: - build: - context: ../../chatbot_ui - dockerfile: Dockerfile - ports: - - "18090:8080" - environment: - - OPENAI_API_KEY=${OPENAI_API_KEY:?error} - - CHAT_COMPLETION_ENDPOINT=http://arch:10000/v1 - - prometheus: - image: prom/prometheus - container_name: prometheus - command: - - '--config.file=/etc/prometheus/prometheus.yaml' - ports: - - 9100:9090 - restart: unless-stopped - volumes: - - ./prometheus:/etc/prometheus - - ./prom_data:/prometheus - profiles: - - monitoring - - grafana: - image: grafana/grafana - container_name: grafana - ports: - - 3010:3000 - restart: unless-stopped - environment: - - GF_SECURITY_ADMIN_USER=admin - - GF_SECURITY_ADMIN_PASSWORD=grafana - volumes: - - ./grafana:/etc/grafana/provisioning/datasources - - ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml - - ./grafana/dashboards:/var/lib/grafana/dashboards - profiles: - - monitoring diff --git a/demos/employee_details_copilot_arch/prometheus/prometheus.yaml b/demos/employee_details_copilot_arch/prometheus/prometheus.yaml deleted file mode 100644 index 95fcfd41..00000000 --- a/demos/employee_details_copilot_arch/prometheus/prometheus.yaml +++ /dev/null @@ -1,23 +0,0 @@ -global: - scrape_interval: 15s - scrape_timeout: 10s - evaluation_interval: 15s -alerting: - alertmanagers: - - static_configs: - - targets: [] - scheme: http - timeout: 10s - api_version: v1 -scrape_configs: -- job_name: envoy - honor_timestamps: true - scrape_interval: 15s - scrape_timeout: 10s - metrics_path: /stats - scheme: http - static_configs: - - targets: - - arch:9901 - params: - format: ['prometheus'] diff --git a/demos/function_calling/docker-compose.yaml b/demos/function_calling/docker-compose.yaml index a4e316ae..34251b13 100644 --- a/demos/function_calling/docker-compose.yaml +++ b/demos/function_calling/docker-compose.yaml @@ -19,7 +19,7 @@ services: environment: - OPENAI_API_KEY=${OPENAI_API_KEY:?error} - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error} - - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 + - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 #this is only because we are running the sample app in the same docker container environemtn as archgw opentelemetry: build: diff --git a/demos/insurance_agent/docker-compose.yaml b/demos/insurance_agent/docker-compose.yaml index c5ca405e..901f694a 100644 --- a/demos/insurance_agent/docker-compose.yaml +++ b/demos/insurance_agent/docker-compose.yaml @@ -17,6 +17,4 @@ services: ports: - "18090:8080" environment: - - OPENAI_API_KEY=${OPENAI_API_KEY:?error} - - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error} - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 diff --git a/demos/network_copilot/api_server/Dockerfile b/demos/network_agent/Dockerfile similarity index 56% rename from demos/network_copilot/api_server/Dockerfile rename to demos/network_agent/Dockerfile index abd21357..503ffeff 100644 --- a/demos/network_copilot/api_server/Dockerfile +++ b/demos/network_agent/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3 AS base +FROM python:3.10 AS base FROM base AS builder @@ -9,11 +9,11 @@ RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt COPY . /src -FROM python:3-slim AS output +FROM python:3.10-slim AS output COPY --from=builder /runtime /usr/local -COPY /app /app +COPY . /app WORKDIR /app -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"] +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--log-level", "info"] diff --git a/demos/network_agent/README.md b/demos/network_agent/README.md new file mode 100644 index 00000000..e69de29b diff --git a/demos/network_agent/arch_config.yaml b/demos/network_agent/arch_config.yaml new file mode 100644 index 00000000..81195488 --- /dev/null +++ b/demos/network_agent/arch_config.yaml @@ -0,0 +1,71 @@ +version: v0.1 +listener: + address: 127.0.0.1 + port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates + message_format: huggingface + +# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way +llm_providers: + - name: OpenAI + provider: openai + access_key: OPENAI_API_KEY + model: gpt-4o + default: true + +# default system prompt used by all prompt targets +system_prompt: | + You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions. + +prompt_targets: + - name: reboot_devices + description: Reboot specific devices or device groups + endpoint: + name: app_server + path: /agent/device_reboot + parameters: + - name: device_ids + type: list + description: A list of device identifiers (IDs) to reboot. + required: true + - name: time_range + type: int + description: Optional time range in days for reboot operations. Defaults to 7. + - name: network_qa + endpoint: + name: app_server + path: /agent/network_summary + description: Handle general Q/A related to networking. + default: true + - name: device_summary + description: Retrieve statistics for specific devices within a time range + endpoint: + name: app_server + path: /agent/device_summary + parameters: + - name: device_ids + type: list + description: A list of device identifiers (IDs) to retrieve statistics for. + required: true # device_ids are required to get device statistics + - name: time_range + type: int + description: Time range in days for which to gather device statistics. Defaults to 7. + default: "7" + +# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem. +endpoints: + app_server: + # value could be ip address or a hostname with port + # this could also be a list of endpoints for load balancing + # for example endpoint: [ ip1:port, ip2:port ] + endpoint: host.docker.internal:18083 + # max time to wait for a connection to be established + connect_timeout: 0.005s + +ratelimits: + - model: gpt-4 + selector: + key: selector-key + value: selector-value + limit: + tokens: 1 + unit: minute diff --git a/demos/network_agent/docker-compose.yaml b/demos/network_agent/docker-compose.yaml new file mode 100644 index 00000000..25e19420 --- /dev/null +++ b/demos/network_agent/docker-compose.yaml @@ -0,0 +1,21 @@ +services: + api_server: + build: + context: . + dockerfile: Dockerfile + ports: + - "18083:80" + healthcheck: + test: ["CMD", "curl" ,"http://localhost:80/healthz"] + interval: 5s + retries: 20 + + chatbot_ui: + build: + context: ../../chatbot_ui + dockerfile: Dockerfile + ports: + - "18080:8080" + environment: + - OPENAI_API_KEY=${OPENAI_API_KEY:?error} + - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 diff --git a/demos/employee_details_copilot_arch/grafana/dashboard.yaml b/demos/network_agent/grafana/dashboard.yaml similarity index 100% rename from demos/employee_details_copilot_arch/grafana/dashboard.yaml rename to demos/network_agent/grafana/dashboard.yaml diff --git a/demos/employee_details_copilot_arch/grafana/dashboards/envoy_overview.json b/demos/network_agent/grafana/dashboards/envoy_overview.json similarity index 100% rename from demos/employee_details_copilot_arch/grafana/dashboards/envoy_overview.json rename to demos/network_agent/grafana/dashboards/envoy_overview.json diff --git a/demos/employee_details_copilot_arch/grafana/datasource.yaml b/demos/network_agent/grafana/datasource.yaml similarity index 100% rename from demos/employee_details_copilot_arch/grafana/datasource.yaml rename to demos/network_agent/grafana/datasource.yaml diff --git a/demos/network_agent/main.py b/demos/network_agent/main.py new file mode 100644 index 00000000..682f89ae --- /dev/null +++ b/demos/network_agent/main.py @@ -0,0 +1,104 @@ +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel, Field +from typing import List, Optional + +app = FastAPI() + +# Define the request model +class DeviceSummaryRequest(BaseModel): + device_ids: List[int] + time_range: Optional[int] = Field(default=7, description="Time range in days, defaults to 7") + +# Define the response model +class DeviceStatistics(BaseModel): + device_id: int + time_range: str + data: str + +class DeviceSummaryResponse(BaseModel): + statistics: List[DeviceStatistics] + + # Request model for device reboot +class DeviceRebootRequest(BaseModel): + device_ids: List[int] + +# Response model for the device reboot +class CoverageResponse(BaseModel): + status: str + summary: dict + +@app.post("/agent/device_reboot", response_model=CoverageResponse) +def reboot_network_device(request_data: DeviceRebootRequest): + """ + Endpoint to reboot network devices based on device IDs and an optional time range. + """ + + # Access data from the Pydantic model + device_ids = request_data.device_ids + + # Validate 'device_ids' (This is already validated by Pydantic, but additional logic can be added if needed) + if not device_ids: + raise HTTPException(status_code=400, detail="'device_ids' parameter is required") + + # Simulate reboot operation and return the response + statistics = [] + for device_id in device_ids: + # Placeholder for actual data retrieval or device reboot logic + stats = { + "data": f"Device {device_id} has been successfully rebooted." + } + statistics.append(stats) + + # Return the response with a summary + return CoverageResponse(status="success", summary={"device_ids": device_ids}) + +# Post method for device summary +@app.post("/agent/device_summary", response_model=DeviceSummaryResponse) +def get_device_summary(request: DeviceSummaryRequest): + """ + Endpoint to retrieve device statistics based on device IDs and an optional time range. + """ + + # Extract 'device_ids' and 'time_range' from the request + device_ids = request.device_ids + time_range = request.time_range + + # Simulate retrieving statistics for the given device IDs and time range + statistics = [] + minutes = 1 + for device_id in device_ids: + stats = { + "device_id": device_id, + "time_range": f"Last {time_range} days", + "data": f"Device {device_id} over the last {time_range} days experienced {minutes} minutes of downtime.", + } + minutes += 1 + statistics.append(DeviceStatistics(**stats)) + + return DeviceSummaryResponse(statistics=statistics) + +@app.post("/agent/network_summary") +async def policy_qa(): + """ + This method handles Q/A related to general issues in networks. + It forwards the conversation to the OpenAI client via a local proxy and returns the response. + """ + return { + "choices": [ + { + "message": { + "role": "assistant", + "content": "I am a helpful networking agent, and I can help you get status for network devices or reboot them" + }, + "finish_reason": "completed", + "index": 0 + } + ], + "model": "network_agent", + "usage": { + "completion_tokens": 0 + } + } + +if __name__ == "__main__": + app.run(debug=True) diff --git a/demos/network_agent/requirements.txt b/demos/network_agent/requirements.txt new file mode 100644 index 00000000..77e7584c --- /dev/null +++ b/demos/network_agent/requirements.txt @@ -0,0 +1,4 @@ +fastapi +uvicorn +pydantic +typing diff --git a/demos/network_copilot/api_server/app/utils.py b/demos/network_agent/utils.py similarity index 100% rename from demos/network_copilot/api_server/app/utils.py rename to demos/network_agent/utils.py diff --git a/demos/network_copilot/Bolt-FC-1B-Q4_K_M.model_file b/demos/network_copilot/Bolt-FC-1B-Q4_K_M.model_file deleted file mode 100644 index 1def85b1..00000000 --- a/demos/network_copilot/Bolt-FC-1B-Q4_K_M.model_file +++ /dev/null @@ -1,24 +0,0 @@ -FROM Bolt-Function-Calling-1B-Q4_K_M.gguf - -# Set the size of the context window used to generate the next token -PARAMETER num_ctx 4096 - -# Set parameters for response generation -PARAMETER num_predict 1024 -PARAMETER temperature 0.1 -PARAMETER top_p 0.5 -PARAMETER top_k 32022 -PARAMETER repeat_penalty 1.0 -PARAMETER stop "<|EOT|>" - -# Set the random number seed to use for generation -PARAMETER seed 42 - -# Set the prompt template to be passed into the model -TEMPLATE """{{ if .System }}<|begin▁of▁sentence|> -{{ .System }} -{{ end }}{{ if .Prompt }}### Instruction: -{{ .Prompt }} -{{ end }}### Response: -{{ .Response }} -<|EOT|>""" diff --git a/demos/network_copilot/README.md b/demos/network_copilot/README.md deleted file mode 100644 index c19898fd..00000000 --- a/demos/network_copilot/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Function calling -This demo shows how you can use intelligent prompt gateway as a network copilot that could give information about correlation between packet loss with device reboots, downs, or maintainence. This demo assumes you are using ollama running natively. If you want to run ollama running inside docker then please update ollama endpoint in docker-compose file. - -# Starting the demo -1. Create `.env` file and set OpenAI key using env var `OPENAI_API_KEY` -1. Start services - ```sh - docker compose up - ``` -1. Download Bolt-FC model. This demo assumes we have downloaded [Bolt-Function-Calling-1B:Q4_K_M](https://huggingface.co/katanemolabs/Bolt-Function-Calling-1B.gguf/blob/main/Bolt-Function-Calling-1B-Q4_K_M.gguf) to local folder. -1. If running ollama natively run - ```sh - ollama serve - ``` -2. Create model file in ollama repository - ```sh - ollama create Bolt-Function-Calling-1B:Q4_K_M -f Bolt-FC-1B-Q4_K_M.model_file - ``` -3. Navigate to http://localhost:18080/ -4. You can type in queries like "show me any packet drops due to interface failure in the past 3 days" - - You can also ask follow up questions like "show me just the ones with maximum 200 in errors" -5. To see metrics navigate to "http://localhost:3000/" (use admin/grafana for login) - - Open up dahsboard named "Intelligent Gateway Overview" - - On this dashboard you can see reuqest latency and number of requests diff --git a/demos/network_copilot/api_server/.vscode/launch.json b/demos/network_copilot/api_server/.vscode/launch.json deleted file mode 100644 index 4d9c76d4..00000000 --- a/demos/network_copilot/api_server/.vscode/launch.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "name": "function-calling api server", - "cwd": "${workspaceFolder}/app", - "type": "debugpy", - "request": "launch", - "module": "uvicorn", - "args": ["main:app","--reload", "--port", "8001"], - } - ] -} diff --git a/demos/network_copilot/api_server/app/main.py b/demos/network_copilot/api_server/app/main.py deleted file mode 100644 index 22ec58e7..00000000 --- a/demos/network_copilot/api_server/app/main.py +++ /dev/null @@ -1,184 +0,0 @@ -from fastapi import FastAPI, Response -from datetime import datetime, timezone -import logging -from pydantic import BaseModel -from utils import load_sql, load_params -import pandas as pd - -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') -logger = logging.getLogger(__name__) - -app = FastAPI() - -@app.get("/healthz") -async def healthz(): - return { - "status": "ok" - } - -conn = load_sql() -name_col = "name" - - -class PacketDropCorrelationRequest(BaseModel): - from_time: str = None # Optional natural language timeframe - ifname: str = None # Optional interface name filter - region: str = None # Optional region filter - min_in_errors: int = None - max_in_errors: int = None - min_out_errors: int = None - max_out_errors: int = None - min_in_discards: int = None - max_in_discards: int = None - min_out_discards: int = None - max_out_discards: int = None - - -@app.post("/interface_down_pkt_drop") -async def interface_down_packet_drop(req: PacketDropCorrelationRequest, res: Response): - params, filters = load_params(req) - - # Join the filters using AND - where_clause = " AND ".join(filters) - if where_clause: - where_clause = "AND " + where_clause - - # Step 3: Query packet errors and flows from interfacestats and ts_flow - query = f""" - SELECT - d.switchip AS device_ip_address, - i.in_errors, - i.in_discards, - i.out_errors, - i.out_discards, - i.ifname, - t.src_addr, - t.dst_addr, - t.time AS flow_time, - i.time AS interface_time - FROM - device d - INNER JOIN - interfacestats i - ON d.device_mac_address = i.device_mac_address - INNER JOIN - ts_flow t - ON d.switchip = t.sampler_address - WHERE - i.time >= :from_time -- Using the converted timestamp - {where_clause} - ORDER BY - i.time; - """ - - correlated_data = pd.read_sql_query(query, conn, params=params) - - if correlated_data.empty: - default_response = { - "device_ip_address": "0.0.0.0", # Placeholder IP - "in_errors": 0, - "in_discards": 0, - "out_errors": 0, - "out_discards": 0, - "ifname": req.ifname - or "unknown", # Placeholder or interface provided in the request - "src_addr": "0.0.0.0", # Placeholder source IP - "dst_addr": "0.0.0.0", # Placeholder destination IP - "flow_time": str( - datetime.now(timezone.utc) - ), # Current timestamp or placeholder - "interface_time": str( - datetime.now(timezone.utc) - ), # Current timestamp or placeholder - } - return [default_response] - - logger.info(f"Correlated Packet Drop Data: {correlated_data}") - - return correlated_data.to_dict(orient='records') - - -class FlowPacketErrorCorrelationRequest(BaseModel): - from_time: str = None # Optional natural language timeframe - ifname: str = None # Optional interface name filter - region: str = None # Optional region filter - min_in_errors: int = None - max_in_errors: int = None - min_out_errors: int = None - max_out_errors: int = None - min_in_discards: int = None - max_in_discards: int = None - min_out_discards: int = None - max_out_discards: int = None - - -@app.post("/packet_errors_impact_flow") -async def packet_errors_impact_flow( - req: FlowPacketErrorCorrelationRequest, res: Response -): - params, filters = load_params(req) - - # Join the filters using AND - where_clause = " AND ".join(filters) - if where_clause: - where_clause = "AND " + where_clause - - # Step 3: Query the packet errors and flows, correlating by timestamps - query = f""" - SELECT - d.switchip AS device_ip_address, - i.in_errors, - i.in_discards, - i.out_errors, - i.out_discards, - i.ifname, - t.src_addr, - t.dst_addr, - t.src_port, - t.dst_port, - t.packets, - t.time AS flow_time, - i.time AS error_time - FROM - device d - INNER JOIN - interfacestats i - ON d.device_mac_address = i.device_mac_address - INNER JOIN - ts_flow t - ON d.switchip = t.sampler_address - WHERE - i.time >= :from_time - AND ABS(strftime('%s', t.time) - strftime('%s', i.time)) <= 300 -- Correlate within 5 minutes - {where_clause} - ORDER BY - i.time; - """ - - correlated_data = pd.read_sql_query(query, conn, params=params) - - if correlated_data.empty: - default_response = { - "device_ip_address": "0.0.0.0", # Placeholder IP - "in_errors": 0, - "in_discards": 0, - "out_errors": 0, - "out_discards": 0, - "ifname": req.ifname - or "unknown", # Placeholder or interface provided in the request - "src_addr": "0.0.0.0", # Placeholder source IP - "dst_addr": "0.0.0.0", # Placeholder destination IP - "src_port": 0, - "dst_port": 0, - "packets": 0, - "flow_time": str( - datetime.now(timezone.utc) - ), # Current timestamp or placeholder - "error_time": str( - datetime.now(timezone.utc) - ), # Current timestamp or placeholder - } - return [default_response] - - # Return the correlated data if found - return correlated_data.to_dict(orient='records') diff --git a/demos/network_copilot/api_server/requirements.txt b/demos/network_copilot/api_server/requirements.txt deleted file mode 100644 index bfc7be35..00000000 --- a/demos/network_copilot/api_server/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -fastapi -uvicorn -pandas -dateparser diff --git a/demos/network_copilot/bolt_config.yaml b/demos/network_copilot/bolt_config.yaml deleted file mode 100644 index e8df29d5..00000000 --- a/demos/network_copilot/bolt_config.yaml +++ /dev/null @@ -1,126 +0,0 @@ -default_prompt_endpoint: "127.0.0.1" -load_balancing: "round_robin" -timeout_ms: 5000 - -overrides: - # confidence threshold for prompt target intent matching - prompt_target_intent_matching_threshold: 0.7 - -llm_providers: - - - name: open-ai-gpt-4 - api_key: $OPEN_AI_API_KEY - model: gpt-4 - default: true - -prompt_targets: - - - type: function_resolver - name: interface_down_packet_drop - description: | - Checks for packet drops due to interface unavailability like reboots, shutdowns, or maintainence events. It allows filtering the results by timeframes, interface name, region, and packet error thresholds. - parameters: - - name: from_time - description: An optional natural language timeframe (e.g., "past 7 days", "since a month") to define the time range for packet drop analysis. - required: false - type: string - - name: ifname - description: An optional interface name filter to apply. - required: false - type: string - - name: region - description: An optional region filter to apply (from the device table). - required: false - type: string - - name: min_in_errors - description: Minimum number of in_errors to filter results. - required: false - type: integer - - name: max_in_errors - description: Maximum number of in_errors to filter results. - required: false - type: integer - - name: min_out_errors - description: Minimum number of out_errors to filter results. - required: false - type: integer - - name: max_out_errors - description: Maximum number of out_errors to filter results. - required: false - type: integer - - name: min_in_discards - description: Minimum number of in_discards to filter results. - required: false - type: integer - - name: max_in_discards - description: Maximum number of in_discards to filter results. - required: false - type: integer - - name: min_out_discards - description: Minimum number of out_discards to filter results. - required: false - type: integer - - name: max_out_discards - description: Maximum number of out_discards to filter results. - required: false - type: integer - endpoint: - cluster: api_server - path: /interface_down_packet_drop - system_prompt: | - You are responsible for correlating packet drops with interface down events by analyzing packet errors from the given data. - - - type: function_resolver - name: packet_errors_impact_flow - description: | - To find whether packet flows are impacted due to packet errors by correlating the timestamps between the packet errors and the flows. It allows filtering the results by timeframes, interface name, region, and packet error thresholds. - parameters: - - name: from_time - description: An optional natural language timeframe (e.g., "past 7 days", "since a month") to define the time range for the analysis. - required: false - type: string - - name: ifname - description: An optional interface name filter to apply. - required: false - type: string - - name: region - description: An optional region filter to apply (from the device table). - required: false - type: string - - name: min_in_errors - description: Minimum number of in_errors to filter results. - required: false - type: integer - - name: max_in_errors - description: Maximum number of in_errors to filter results. - required: false - type: integer - - name: min_out_errors - description: Minimum number of out_errors to filter results. - required: false - type: integer - - name: max_out_errors - description: Maximum number of out_errors to filter results. - required: false - type: integer - - name: min_in_discards - description: Minimum number of in_discards to filter results. - required: false - type: integer - - name: max_in_discards - description: Maximum number of in_discards to filter results. - required: false - type: integer - - name: min_out_discards - description: Minimum number of out_discards to filter results. - required: false - type: integer - - name: max_out_discards - description: Maximum number of out_discards to filter results. - required: false - type: integer - endpoint: - cluster: api_server - path: /packet_errors_impact_flow - system_prompt: | - You are responsible for finding and correlating packet errors with the packet flows based on timestamps given in the data. This correlation helps identify if packet flows are impacted by packet errors. diff --git a/demos/network_copilot/docker-compose.yaml b/demos/network_copilot/docker-compose.yaml deleted file mode 100644 index 942e8392..00000000 --- a/demos/network_copilot/docker-compose.yaml +++ /dev/null @@ -1,142 +0,0 @@ -services: - - config_generator: - build: - context: ../../ - dockerfile: config_generator/Dockerfile - volumes: - - ../../arch/envoy.template.yaml:/usr/src/app/envoy.template.yaml - - ./arch_config.yaml:/usr/src/app/arch_config.yaml - - ./generated:/usr/src/app/out - - arch: - build: - context: ../../ - dockerfile: arch/Dockerfile - hostname: arch - ports: - - "10000:10000" - - "19901:9901" - volumes: - - ./generated/envoy.yaml:/etc/envoy/envoy.yaml - - /etc/ssl/cert.pem:/etc/ssl/cert.pem - - ./arch_config.yaml:/config/arch_config.yaml - depends_on: - config_generator: - condition: service_completed_successfully - model_server: - condition: service_healthy - environment: - - LOG_LEVEL=debug - - model_server: - build: - context: ../../model_server - dockerfile: Dockerfile - ports: - - "18081:80" - healthcheck: - test: ["CMD", "curl" ,"http://localhost:80/healthz"] - interval: 5s - retries: 20 - volumes: - - ~/.cache/huggingface:/root/.cache/huggingface - - ./arch_config.yaml:/root/arch_config.yaml - - api_server: - build: - context: api_server - dockerfile: Dockerfile - ports: - - "18083:80" - healthcheck: - test: ["CMD", "curl" ,"http://localhost:80/healthz"] - interval: 5s - retries: 20 - - function_resolver: - build: - context: ../../function_resolver - dockerfile: Dockerfile - ports: - - "18082:80" - healthcheck: - test: ["CMD", "curl" ,"http://localhost:80/healthz"] - interval: 5s - retries: 20 - volumes: - - ~/.cache/huggingface:/root/.cache/huggingface - environment: - # use ollama endpoint that is hosted by host machine (no virtualization) - - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal} - # uncomment following line to use ollama endpoint that is hosted by docker - # - OLLAMA_ENDPOINT=ollama - - ollama: - image: ollama/ollama - container_name: ollama - volumes: - - ./ollama:/root/.ollama - restart: unless-stopped - ports: - - '11434:11434' - profiles: - - manual - - open-webui: - image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main} - container_name: open-webui - volumes: - - ./open-webui:/app/backend/data - # depends_on: - # - ollama - ports: - - 18090:8080 - environment: - - OLLAMA_BASE_URL=http://${OLLAMA_ENDPOINT:-host.docker.internal}:11434 - - WEBUI_AUTH=false - extra_hosts: - - host.docker.internal:host-gateway - restart: unless-stopped - profiles: - - monitoring - - chatbot_ui: - build: - context: ../../chatbot_ui - dockerfile: Dockerfile - ports: - - "18080:8080" - environment: - - OPENAI_API_KEY=${OPENAI_API_KEY:?error} - - CHAT_COMPLETION_ENDPOINT=http://arch:10000/v1 - - prometheus: - image: prom/prometheus - container_name: prometheus - command: - - '--config.file=/etc/prometheus/prometheus.yaml' - ports: - - 9090:9090 - restart: unless-stopped - volumes: - - ./prometheus:/etc/prometheus - - ./prom_data:/prometheus - profiles: - - monitoring - - grafana: - image: grafana/grafana - container_name: grafana - ports: - - 3000:3000 - restart: unless-stopped - environment: - - GF_SECURITY_ADMIN_USER=admin - - GF_SECURITY_ADMIN_PASSWORD=grafana - volumes: - - ./grafana:/etc/grafana/provisioning/datasources - - ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml - - ./grafana/dashboards:/var/lib/grafana/dashboards - profiles: - - monitoring diff --git a/demos/network_copilot/grafana/dashboard.yaml b/demos/network_copilot/grafana/dashboard.yaml deleted file mode 100644 index fd66a479..00000000 --- a/demos/network_copilot/grafana/dashboard.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: 1 - -providers: - - name: "Dashboard provider" - orgId: 1 - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: false - options: - path: /var/lib/grafana/dashboards - foldersFromFilesStructure: true diff --git a/demos/network_copilot/grafana/dashboards/envoy_overview.json b/demos/network_copilot/grafana/dashboards/envoy_overview.json deleted file mode 100644 index 51bff777..00000000 --- a/demos/network_copilot/grafana/dashboards/envoy_overview.json +++ /dev/null @@ -1,355 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 1, - "links": [], - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "avg(rate(envoy_cluster_internal_upstream_rq_time_sum[1m]) / rate(envoy_cluster_internal_upstream_rq_time_count[1m])) by (envoy_cluster_name)", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "request latency - internal (ms)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "id": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "avg(rate(envoy_cluster_external_upstream_rq_time_sum[1m]) / rate(envoy_cluster_external_upstream_rq_time_count[1m])) by (envoy_cluster_name)", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "request latency - external (ms)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 3, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "avg(rate(envoy_cluster_internal_upstream_rq_completed[1m])) by (envoy_cluster_name)", - "fullMetaSearch": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "avg(rate(envoy_cluster_external_upstream_rq_completed[1m])) by (envoy_cluster_name)", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "Upstream request count", - "type": "timeseries" - } - ], - "schemaVersion": 39, - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-15m", - "to": "now" - }, - "timepicker": {}, - "timezone": "browser", - "title": "Intelligent Gateway Overview", - "uid": "adt6uhx5lk8aob", - "version": 3, - "weekStart": "" -} diff --git a/demos/network_copilot/grafana/datasource.yaml b/demos/network_copilot/grafana/datasource.yaml deleted file mode 100644 index 4870174e..00000000 --- a/demos/network_copilot/grafana/datasource.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: 1 - -datasources: -- name: Prometheus - type: prometheus - url: http://prometheus:9090 - isDefault: true - access: proxy - editable: true diff --git a/demos/network_copilot/prometheus/prometheus.yaml b/demos/network_copilot/prometheus/prometheus.yaml deleted file mode 100644 index 95fcfd41..00000000 --- a/demos/network_copilot/prometheus/prometheus.yaml +++ /dev/null @@ -1,23 +0,0 @@ -global: - scrape_interval: 15s - scrape_timeout: 10s - evaluation_interval: 15s -alerting: - alertmanagers: - - static_configs: - - targets: [] - scheme: http - timeout: 10s - api_version: v1 -scrape_configs: -- job_name: envoy - honor_timestamps: true - scrape_interval: 15s - scrape_timeout: 10s - metrics_path: /stats - scheme: http - static_configs: - - targets: - - arch:9901 - params: - format: ['prometheus'] diff --git a/demos/prompt_guards/bolt_config.yaml b/demos/prompt_guards/bolt_config.yaml deleted file mode 100644 index fb7bc1ff..00000000 --- a/demos/prompt_guards/bolt_config.yaml +++ /dev/null @@ -1,43 +0,0 @@ -default_prompt_endpoint: "127.0.0.1" -load_balancing: "round_robin" -timeout_ms: 5000 - -# should not be here -embedding_provider: - name: "bge-large-en-v1.5" - model: "BAAI/bge-large-en-v1.5" - -llm_providers: - - - name: open-ai-gpt-4 - api_key: $OPENAI_API_KEY - model: gpt-4 - default: true - -prompt_guards: - input_guards: - jailbreak: - on_exception_message: Looks like you are curious about my jailbreak detection abilities. - toxicity: - on_exception_message: Looks like you are curious about my toxicity detection abilities. - -prompt_targets: - - - type: function_resolver - name: weather_forecast - description: This function resolver provides weather forecast information for a given city. - parameters: - - name: city - required: true - description: The city for which the weather forecast is requested. - - name: days - description: The number of days for which the weather forecast is requested. - - name: units - description: The units in which the weather forecast is requested. - endpoint: - cluster: weatherhost - path: /weather - system_prompt: | - You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries: - - Use farenheight for temperature - - Use miles per hour for wind speed diff --git a/demos/prompt_guards/docker-compose.yaml b/demos/prompt_guards/docker-compose.yaml deleted file mode 100644 index 5380b023..00000000 --- a/demos/prompt_guards/docker-compose.yaml +++ /dev/null @@ -1,94 +0,0 @@ - -services: - - config_generator: - build: - context: ../../ - dockerfile: config_generator/Dockerfile - volumes: - - ../../arch/envoy.template.yaml:/usr/src/app/envoy.template.yaml - - ./arch_config.yaml:/usr/src/app/arch_config.yaml - - ./generated:/usr/src/app/out - - arch: - build: - context: ../../ - dockerfile: arch/Dockerfile - hostname: arch - ports: - - "10000:10000" - - "19901:9901" - volumes: - - ./generated/envoy.yaml:/etc/envoy/envoy.yaml - - /etc/ssl/cert.pem:/etc/ssl/cert.pem - - ./arch_config.yaml:/config/arch_config.yaml - depends_on: - config_generator: - condition: service_completed_successfully - model_server: - condition: service_healthy - environment: - - LOG_LEVEL=debug - - model_server: - build: - context: ../../model_server - dockerfile: Dockerfile - ports: - - "18081:80" - healthcheck: - test: ["CMD", "curl" ,"http://localhost:80/healthz"] - interval: 5s - retries: 20 - volumes: - - ~/.cache/huggingface:/root/.cache/huggingface - - ./arch_config.yaml:/root/arch_config.yaml - # Uncomment following lines to enable GPU support - # deploy: - # resources: - # reservations: - # devices: - # - capabilities: [gpu] - # runtime: nvidia # Enables GPU support - # environment: - # - NVIDIA_VISIBLE_DEVICES=all # Use all available GPUs - - - function_resolver: - build: - context: ../../function_resolver - dockerfile: Dockerfile - ports: - - "18082:80" - healthcheck: - test: ["CMD", "curl" ,"http://localhost:80/healthz"] - interval: 5s - retries: 20 - volumes: - - ~/.cache/huggingface:/root/.cache/huggingface - environment: - # use ollama endpoint that is hosted by host machine (no virtualization) - - OLLAMA_ENDPOINT=host.docker.internal - # uncomment following line to use ollama endpoint that is hosted by docker - # - OLLAMA_ENDPOINT=ollama - - ollama: - image: ollama/ollama - container_name: ollama - volumes: - - ./ollama:/root/.ollama - restart: unless-stopped - ports: - - '11434:11434' - profiles: - - manual - - chatbot_ui: - build: - context: ../../chatbot_ui - dockerfile: Dockerfile - ports: - - "18080:8080" - environment: - - OPENAI_API_KEY=${OPENAI_API_KEY} - - CHAT_COMPLETION_ENDPOINT=http://arch:10000/v1 diff --git a/docs/source/build_with_arch/includes/agent/function-calling-agent.yaml b/docs/source/build_with_arch/includes/agent/function-calling-agent.yaml index c448ae82..6d4c0409 100644 --- a/docs/source/build_with_arch/includes/agent/function-calling-agent.yaml +++ b/docs/source/build_with_arch/includes/agent/function-calling-agent.yaml @@ -1,9 +1,7 @@ version: v0.1 - -listen: - address: 0.0.0.0 # or 127.0.0.1 - port: 10000 - # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request +listener: + address: 127.0.0.1 + port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates message_format: huggingface # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way @@ -13,25 +11,42 @@ llm_providers: access_key: OPENAI_API_KEY model: gpt-4o default: true - stream: true # default system prompt used by all prompt targets -system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions. +system_prompt: | + You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions. prompt_targets: - - name: reboot_devices - description: Reboot specific devices or device groups - - path: /agent/device_reboot - parameters: - - name: device_ids - type: list - description: A list of device identifiers (IDs) to reboot. - required: false - - name: device_group - type: str - description: The name of the device group to reboot - required: false + - name: network_qa + endpoint: + name: app_server + path: /agent/network_summary + description: Handle general Q/A related to networking. + default: true + - name: reboot_devices + description: Reboot specific devices or device groups + endpoint: + name: app_server + path: /agent/device_reboot + parameters: + - name: device_ids + type: list + description: A list of device identifiers (IDs) to reboot. + required: true + - name: device_summary + description: Retrieve statistics for specific devices within a time range + endpoint: + name: app_server + path: /agent/device_summary + parameters: + - name: device_ids + type: list + description: A list of device identifiers (IDs) to retrieve statistics for. + required: true # device_ids are required to get device statistics + - name: time_range + type: int + description: Time range in days for which to gather device statistics. Defaults to 7. + default: "7" # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem. endpoints: @@ -39,6 +54,6 @@ endpoints: # value could be ip address or a hostname with port # this could also be a list of endpoints for load balancing # for example endpoint: [ ip1:port, ip2:port ] - endpoint: 127.0.0.1:80 + endpoint: host.docker.internal:18083 # max time to wait for a connection to be established connect_timeout: 0.005s diff --git a/model_server/app/__init__.py b/model_server/app/__init__.py index 7c40bad4..0e103e11 100644 --- a/model_server/app/__init__.py +++ b/model_server/app/__init__.py @@ -35,11 +35,11 @@ def start_server(): print("Server is already running. Use 'model_server restart' to restart it.") sys.exit(1) - print(f"Starting Archgw Model Server") + print(f"Starting Archgw Model Server - Loading some awesomeness, this may take a little time.)") process = subprocess.Popen( ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "51000"], start_new_session=True, - stdout=subprocess.DEVNULL, # Suppress standard output. There is a logger that model_server prints to + stdout=subprocess.DEVNULL, # Suppress standard output. There is a logger that model_server prints to stderr=subprocess.DEVNULL, # Suppress standard error. There is a logger that model_server prints to ) diff --git a/model_server/app/load_models.py b/model_server/app/load_models.py index f1feea17..60b62daf 100644 --- a/model_server/app/load_models.py +++ b/model_server/app/load_models.py @@ -7,6 +7,7 @@ from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForSequenc def get_device(): + if torch.cuda.is_available(): device = "cuda" elif torch.backends.mps.is_available(): @@ -14,10 +15,12 @@ def get_device(): else: device = "cpu" + print(f"Devices Avialble: {device}") return device def load_transformers(model_name=os.getenv("MODELS", "katanemo/bge-large-en-v1.5-onnx")): + print("Loading Embedding Model") transformers = {} device = get_device() transformers["tokenizer"] = AutoTokenizer.from_pretrained(model_name) @@ -33,6 +36,7 @@ def load_guard_model( model_name, hardware_config="cpu", ): + print("Loading Guard Model") guard_model = {} guard_model["tokenizer"] = AutoTokenizer.from_pretrained( model_name, trust_remote_code=True @@ -58,9 +62,7 @@ def load_guard_model( return guard_model -def load_zero_shot_models( - model_name=os.getenv("ZERO_SHOT_MODELS", "katanemo/deberta-base-nli-onnx") -): +def load_zero_shot_models(model_name=os.getenv("ZERO_SHOT_MODELS", "katanemo/deberta-base-nli-onnx")): zero_shot_model = {} device = get_device() zero_shot_model["model"] = ORTModelForSequenceClassification.from_pretrained( @@ -79,6 +81,5 @@ def load_zero_shot_models( return zero_shot_model - if __name__ == "__main__": print(get_device()) diff --git a/model_server/app/main.py b/model_server/app/main.py index 95be9578..88db3701 100644 --- a/model_server/app/main.py +++ b/model_server/app/main.py @@ -26,6 +26,7 @@ guard_model_config = load_yaml_config("guard_model_config.yaml") mode = os.getenv("MODE", "cloud") logger.info(f"Serving model mode: {mode}") +print(f"Serving model mode: {mode}") if mode not in ["cloud", "local-gpu", "local-cpu"]: raise ValueError(f"Invalid mode: {mode}") if mode == "local-cpu":