fix demos code (#76)

2026-05-07 14:52:42 +02:00 · 2024-09-24 14:34:22 -07:00 · 2024-09-24 14:34:22 -07:00 · 685144bbd7
commit 685144bbd7
parent 13dff3089d
29 changed files with 2020 additions and 21 deletions
--- a/demos/employee_details_copilot/api_server/app/main.py
+++ b/demos/employee_details_copilot/api_server/app/main.py
@ -0,0 +1,289 @@
+import random
+from typing import List
+from fastapi import FastAPI, HTTPException, Response
+from datetime import datetime, date, timedelta, timezone
+import logging
+from pydantic import BaseModel
+from utils import load_sql
+import pandas as pd
+
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+app = FastAPI()
+
+@app.get("/healthz")
+async def healthz():
+    return {
+        "status": "ok"
+    }
+
+conn = load_sql()
+name_col = "name"
+
+
+class TopEmployees(BaseModel):
+    grouping: str
+    ranking_criteria: str
+    top_n: int
+
+
+@app.post("/top_employees")
+async def top_employees(req: TopEmployees, res: Response):
+    name_col = "name"
+    # Check if `req.ranking_criteria` is a Text object and extract its value accordingly
+    logger.info(
+        f"{'* ' * 50}\n\nCaptured Ranking Criteria: {req.ranking_criteria}\n\n{'* ' * 50}"
+    )
+
+    if req.ranking_criteria == "yoe":
+        req.ranking_criteria = "years_of_experience"
+    elif req.ranking_criteria == "rating":
+        req.ranking_criteria = "performance_score"
+
+    logger.info(
+        f"{'* ' * 50}\n\nFinal Ranking Criteria: {req.ranking_criteria}\n\n{'* ' * 50}"
+    )
+
+    query = f"""
+    SELECT {req.grouping}, {name_col}, {req.ranking_criteria}
+    FROM (
+        SELECT {req.grouping}, {name_col}, {req.ranking_criteria},
+               DENSE_RANK() OVER (PARTITION BY {req.grouping} ORDER BY {req.ranking_criteria} DESC) as emp_rank
+        FROM employees
+    ) ranked_employees
+    WHERE emp_rank <= {req.top_n};
+    """
+    result_df = pd.read_sql_query(query, conn)
+    result = result_df.to_dict(orient="records")
+    return result
+
+
+class AggregateStats(BaseModel):
+    grouping: str
+    aggregate_criteria: str
+    aggregate_type: str
+
+
+@app.post("/aggregate_stats")
+async def aggregate_stats(req: AggregateStats, res: Response):
+    logger.info(
+        f"{'* ' * 50}\n\nCaptured Aggregate Criteria: {req.aggregate_criteria}\n\n{'* ' * 50}"
+    )
+
+    if req.aggregate_criteria == "yoe":
+        req.aggregate_criteria = "years_of_experience"
+
+    logger.info(
+        f"{'* ' * 50}\n\nFinal Aggregate Criteria: {req.aggregate_criteria}\n\n{'* ' * 50}"
+    )
+
+    logger.info(
+        f"{'* ' * 50}\n\nCaptured Aggregate Type: {req.aggregate_type}\n\n{'* ' * 50}"
+    )
+    if req.aggregate_type.lower() not in ["sum", "avg", "min", "max"]:
+        if req.aggregate_type.lower() == "count":
+            req.aggregate_type = "COUNT"
+        elif req.aggregate_type.lower() == "total":
+            req.aggregate_type = "SUM"
+        elif req.aggregate_type.lower() == "average":
+            req.aggregate_type = "AVG"
+        elif req.aggregate_type.lower() == "minimum":
+            req.aggregate_type = "MIN"
+        elif req.aggregate_type.lower() == "maximum":
+            req.aggregate_type = "MAX"
+        else:
+            raise HTTPException(status_code=400, detail="Invalid aggregate type")
+
+    logger.info(
+        f"{'* ' * 50}\n\nFinal Aggregate Type: {req.aggregate_type}\n\n{'* ' * 50}"
+    )
+
+    query = f"""
+    SELECT {req.grouping}, {req.aggregate_type}({req.aggregate_criteria}) as {req.aggregate_type}_{req.aggregate_criteria}
+    FROM employees
+    GROUP BY {req.grouping};
+    """
+    result_df = pd.read_sql_query(query, conn)
+    result = result_df.to_dict(orient="records")
+    return result
+
+# 1. Top Employees by Performance, Projects, and Timeframe
+class TopEmployeesProjects(BaseModel):
+    min_performance_score: float
+    min_years_experience: int
+    department: str
+    min_project_count: int = None  # Optional
+    months_range: int = None  # Optional (for filtering recent projects)
+
+
+@app.post("/top_employees_projects")
+async def employees_projects(req: TopEmployeesProjects, res: Response):
+    params, filters = {}, []
+
+    # Add optional months_range filter
+    if req.months_range:
+        params['months_range'] = req.months_range
+        filters.append(f"p.start_date >= DATE('now', '-{req.months_range} months')")
+
+    # Add project count filter if provided
+    if req.min_project_count:
+        filters.append(f"COUNT(p.project_id) >= {req.min_project_count}")
+
+    where_clause = " AND ".join(filters)
+    if where_clause:
+        where_clause = "AND " + where_clause
+
+    query = f"""
+    SELECT e.name, e.department, e.years_of_experience, e.performance_score, COUNT(p.project_id) as project_count
+    FROM employees e
+    LEFT JOIN projects p ON e.eid = p.eid
+    WHERE e.performance_score >= {req.min_performance_score}
+      AND e.years_of_experience >= {req.min_years_experience}
+      AND e.department = '{req.department}'
+      {where_clause}
+    GROUP BY e.eid, e.name, e.department, e.years_of_experience, e.performance_score
+    ORDER BY e.performance_score DESC;
+    """
+
+    result_df = pd.read_sql_query(query, conn, params=params)
+    return result_df.to_dict(orient='records')
+
+
+# 2. Employees with Salary Growth Since Last Promotion
+class SalaryGrowthRequest(BaseModel):
+    min_salary_increase_percentage: float
+    department: str = None  # Optional
+
+
+@app.post("/salary_growth")
+async def salary_growth(req: SalaryGrowthRequest, res: Response):
+    params, filters = {}, []
+
+    if req.department:
+        filters.append("e.department = :department")
+        params['department'] = req.department
+
+    where_clause = " AND ".join(filters)
+    if where_clause:
+        where_clause = "AND " + where_clause
+
+    query = f"""
+    SELECT e.name, e.department, s.salary_increase_percentage
+    FROM employees e
+    JOIN salary_history s ON e.eid = s.eid
+    WHERE s.salary_increase_percentage >= {req.min_salary_increase_percentage}
+      AND s.promotion_date IS NOT NULL
+      {where_clause}
+    ORDER BY s.salary_increase_percentage DESC;
+    """
+
+    result_df = pd.read_sql_query(query, conn, params=params)
+    return result_df.to_dict(orient='records')
+
+
+# 4. Employees with Promotions and Salary Increases
+class PromotionsIncreasesRequest(BaseModel):
+    year: int
+    min_salary_increase_percentage: float = None  # Optional
+    department: str = None  # Optional
+
+
+@app.post("/promotions_increases")
+async def promotions_increases(req: PromotionsIncreasesRequest, res: Response):
+    params, filters = {}, []
+
+    if req.min_salary_increase_percentage:
+        filters.append(f"s.salary_increase_percentage >= {req.min_salary_increase_percentage}")
+
+    if req.department:
+        filters.append("e.department = :department")
+        params['department'] = req.department
+
+    where_clause = " AND ".join(filters)
+    if where_clause:
+        where_clause = "AND " + where_clause
+
+    query = f"""
+    SELECT e.name, e.department, s.salary_increase_percentage, s.promotion_date
+    FROM employees e
+    JOIN salary_history s ON e.eid = s.eid
+    WHERE strftime('%Y', s.promotion_date) = '{req.year}'
+      {where_clause}
+    ORDER BY s.salary_increase_percentage DESC;
+    """
+
+    result_df = pd.read_sql_query(query, conn, params=params)
+    return result_df.to_dict(orient='records')
+
+
+# 5. Employees with Highest Average Project Performance
+class AvgProjPerformanceRequest(BaseModel):
+    min_project_count: int
+    min_performance_score: float
+    department: str = None  # Optional
+
+
+@app.post("/avg_project_performance")
+async def avg_project_performance(req: AvgProjPerformanceRequest, res: Response):
+    params, filters = {}, []
+
+    if req.department:
+        filters.append("e.department = :department")
+        params['department'] = req.department
+
+    filters.append(f"p.performance_score >= {req.min_performance_score}")
+
+    where_clause = " AND ".join(filters)
+
+    query = f"""
+    SELECT e.name, e.department, AVG(p.performance_score) as avg_performance_score, COUNT(p.project_id) as project_count
+    FROM employees e
+    JOIN projects p ON e.eid = p.eid
+    WHERE {where_clause}
+    GROUP BY e.eid, e.name, e.department
+    HAVING COUNT(p.project_id) >= {req.min_project_count}
+    ORDER BY avg_performance_score DESC;
+    """
+
+    result_df = pd.read_sql_query(query, conn, params=params)
+    return result_df.to_dict(orient='records')
+
+
+# 6. Employees by Certification and Years of Experience
+class CertificationsExperienceRequest(BaseModel):
+    certifications: List[str]
+    min_years_experience: int
+    department: str = None  # Optional
+
+@app.post("/employees_certifications_experience")
+async def certifications_experience(req: CertificationsExperienceRequest, res: Response):
+    # Convert the list of certifications into a format for SQL query
+    certs_filter = ', '.join([f"'{cert}'" for cert in req.certifications])
+
+    params, filters = {}, []
+
+    # Add department filter if provided
+    if req.department:
+        filters.append("e.department = :department")
+        params['department'] = req.department
+
+    filters.append("e.years_of_experience >= :min_years_experience")
+    params['min_years_experience'] = req.min_years_experience
+
+    where_clause = " AND ".join(filters)
+
+    query = f"""
+    SELECT e.name, e.department, e.years_of_experience, COUNT(c.certification_name) as cert_count
+    FROM employees e
+    JOIN certifications c ON e.eid = c.eid
+    WHERE c.certification_name IN ({certs_filter})
+      AND {where_clause}
+    GROUP BY e.eid, e.name, e.department, e.years_of_experience
+    HAVING COUNT(c.certification_name) = {len(req.certifications)}
+    ORDER BY e.years_of_experience DESC;
+    """
+
+    result_df = pd.read_sql_query(query, conn, params=params)
+    return result_df.to_dict(orient='records')
--- a/demos/employee_details_copilot/api_server/app/utils.py
+++ b/demos/employee_details_copilot/api_server/app/utils.py
@ -0,0 +1,157 @@
+import pandas as pd
+import random
+import datetime
+import sqlite3
+
+def load_sql():
+    # Example Usage
+    conn = sqlite3.connect(":memory:")
+
+    # create and load the employees table
+    generate_employee_data(conn)
+
+    # create and load the projects table
+    generate_project_data(conn)
+
+    # create and load the salary_history table
+    generate_salary_history(conn)
+
+    # create and load the certifications table
+    generate_certifications(conn)
+
+    return conn
+
+# Function to generate random employee data with `eid` as the primary key
+def generate_employee_data(conn):
+    # List of possible names, positions, departments, and locations
+    names = [
+        "Alice",
+        "Bob",
+        "Charlie",
+        "David",
+        "Eve",
+        "Frank",
+        "Grace",
+        "Hank",
+        "Ivy",
+        "Jack",
+    ]
+    positions = [
+        "Manager",
+        "Engineer",
+        "Salesperson",
+        "HR Specialist",
+        "Marketing Analyst",
+    ]
+    # List of possible names, positions, departments, locations, and certifications
+    names = ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Hank", "Ivy", "Jack"]
+    positions = ["Manager", "Engineer", "Salesperson", "HR Specialist", "Marketing Analyst"]
+    departments = ["Engineering", "Marketing", "HR", "Sales", "Finance"]
+    locations = ["New York", "San Francisco", "Austin", "Boston", "Chicago"]
+    certifications = ["AWS Certified", "Google Cloud Certified", "PMP", "Scrum Master", "Cisco Certified"]
+
+    # Generate random hire dates
+    def random_hire_date():
+        start_date = datetime.date(2000, 1, 1)
+        end_date = datetime.date(2023, 12, 31)
+        time_between_dates = end_date - start_date
+        days_between_dates = time_between_dates.days
+        random_number_of_days = random.randrange(days_between_dates)
+        return start_date + datetime.timedelta(days=random_number_of_days)
+
+    # Generate random employee records with an employee ID (eid)
+    employees = []
+    for eid in range(1, 101):  # 100 employees with `eid` starting from 1
+        name = random.choice(names)
+        position = random.choice(positions)
+        salary = round(random.uniform(50000, 150000), 2)  # Salary between 50,000 and 150,000
+        department = random.choice(departments)
+        location = random.choice(locations)
+        hire_date = random_hire_date()
+        performance_score = round(random.uniform(1, 5), 2)  # Performance score between 1.0 and 5.0
+        years_of_experience = random.randint(1, 30)  # Years of experience between 1 and 30
+
+        employee = {
+            "eid": eid,  # Employee ID
+            "name": name,
+            "position": position,
+            "salary": salary,
+            "department": department,
+            "location": location,
+            "hire_date": hire_date,
+            "performance_score": performance_score,
+            "years_of_experience": years_of_experience
+        }
+
+        employees.append(employee)
+
+    # Convert the list of dictionaries to a DataFrame and save to DB
+    df_employees = pd.DataFrame(employees)
+    df_employees.to_sql('employees', conn, index=False, if_exists='replace')
+
+# Function to generate random project data with `eid`
+def generate_project_data(conn):
+    employees = pd.read_sql_query("SELECT eid FROM employees", conn)
+    projects = []
+
+    for _ in range(500):  # 500 projects
+        eid = random.choice(employees['eid'])
+        project_name = f"Project_{random.randint(1, 100)}"
+        start_date = datetime.date(2020, 1, 1) + datetime.timedelta(days=random.randint(0, 365 * 3))  # Within the last 3 years
+        performance_score = round(random.uniform(1, 5), 2)  # Performance score for the project between 1.0 and 5.0
+
+        project = {
+            "eid": eid,  # Foreign key from employees table
+            "project_name": project_name,
+            "start_date": start_date,
+            "performance_score": performance_score
+        }
+
+        projects.append(project)
+
+    # Convert the list of dictionaries to a DataFrame and save to DB
+    df_projects = pd.DataFrame(projects)
+    df_projects.to_sql('projects', conn, index=False, if_exists='replace')
+
+# Function to generate random salary history data with `eid`
+def generate_salary_history(conn):
+    employees = pd.read_sql_query("SELECT eid FROM employees", conn)
+    salary_history = []
+
+    for _ in range(300):  # 300 salary records
+        eid = random.choice(employees['eid'])
+        salary_increase_percentage = round(random.uniform(5, 30), 2)  # Salary increase between 5% and 30%
+        promotion_date = datetime.date(2018, 1, 1) + datetime.timedelta(days=random.randint(0, 365 * 5))  # Promotions in the last 5 years
+
+        salary_record = {
+            "eid": eid,  # Foreign key from employees table
+            "salary_increase_percentage": salary_increase_percentage,
+            "promotion_date": promotion_date
+        }
+
+        salary_history.append(salary_record)
+
+    # Convert the list of dictionaries to a DataFrame and save to DB
+    df_salary_history = pd.DataFrame(salary_history)
+    df_salary_history.to_sql('salary_history', conn, index=False, if_exists='replace')
+
+# Function to generate random certifications data with `eid`
+def generate_certifications(conn):
+    employees = pd.read_sql_query("SELECT eid FROM employees", conn)
+    certifications_list = ["AWS Certified", "Google Cloud Certified", "PMP", "Scrum Master", "Cisco Certified"]
+    employee_certifications = []
+
+    for _ in range(300):  # 300 certification records
+        eid = random.choice(employees['eid'])
+        certification = random.choice(certifications_list)
+
+        cert_record = {
+            "eid": eid,  # Foreign key from employees table
+            "certification_name": certification
+        }
+
+        employee_certifications.append(cert_record)
+
+    # Convert the list of dictionaries to a DataFrame and save to DB
+    df_certifications = pd.DataFrame(employee_certifications)
+    df_certifications.to_sql('certifications', conn, index=False, if_exists='replace')