fix demos code (#76)

This commit is contained in:
Adil Hafeez 2024-09-24 14:34:22 -07:00 committed by GitHub
parent 13dff3089d
commit 685144bbd7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 2020 additions and 21 deletions

View file

@ -0,0 +1,289 @@
import random
from typing import List
from fastapi import FastAPI, HTTPException, Response
from datetime import datetime, date, timedelta, timezone
import logging
from pydantic import BaseModel
from utils import load_sql
import pandas as pd
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
app = FastAPI()
@app.get("/healthz")
async def healthz():
return {
"status": "ok"
}
conn = load_sql()
name_col = "name"
class TopEmployees(BaseModel):
grouping: str
ranking_criteria: str
top_n: int
@app.post("/top_employees")
async def top_employees(req: TopEmployees, res: Response):
name_col = "name"
# Check if `req.ranking_criteria` is a Text object and extract its value accordingly
logger.info(
f"{'* ' * 50}\n\nCaptured Ranking Criteria: {req.ranking_criteria}\n\n{'* ' * 50}"
)
if req.ranking_criteria == "yoe":
req.ranking_criteria = "years_of_experience"
elif req.ranking_criteria == "rating":
req.ranking_criteria = "performance_score"
logger.info(
f"{'* ' * 50}\n\nFinal Ranking Criteria: {req.ranking_criteria}\n\n{'* ' * 50}"
)
query = f"""
SELECT {req.grouping}, {name_col}, {req.ranking_criteria}
FROM (
SELECT {req.grouping}, {name_col}, {req.ranking_criteria},
DENSE_RANK() OVER (PARTITION BY {req.grouping} ORDER BY {req.ranking_criteria} DESC) as emp_rank
FROM employees
) ranked_employees
WHERE emp_rank <= {req.top_n};
"""
result_df = pd.read_sql_query(query, conn)
result = result_df.to_dict(orient="records")
return result
class AggregateStats(BaseModel):
grouping: str
aggregate_criteria: str
aggregate_type: str
@app.post("/aggregate_stats")
async def aggregate_stats(req: AggregateStats, res: Response):
logger.info(
f"{'* ' * 50}\n\nCaptured Aggregate Criteria: {req.aggregate_criteria}\n\n{'* ' * 50}"
)
if req.aggregate_criteria == "yoe":
req.aggregate_criteria = "years_of_experience"
logger.info(
f"{'* ' * 50}\n\nFinal Aggregate Criteria: {req.aggregate_criteria}\n\n{'* ' * 50}"
)
logger.info(
f"{'* ' * 50}\n\nCaptured Aggregate Type: {req.aggregate_type}\n\n{'* ' * 50}"
)
if req.aggregate_type.lower() not in ["sum", "avg", "min", "max"]:
if req.aggregate_type.lower() == "count":
req.aggregate_type = "COUNT"
elif req.aggregate_type.lower() == "total":
req.aggregate_type = "SUM"
elif req.aggregate_type.lower() == "average":
req.aggregate_type = "AVG"
elif req.aggregate_type.lower() == "minimum":
req.aggregate_type = "MIN"
elif req.aggregate_type.lower() == "maximum":
req.aggregate_type = "MAX"
else:
raise HTTPException(status_code=400, detail="Invalid aggregate type")
logger.info(
f"{'* ' * 50}\n\nFinal Aggregate Type: {req.aggregate_type}\n\n{'* ' * 50}"
)
query = f"""
SELECT {req.grouping}, {req.aggregate_type}({req.aggregate_criteria}) as {req.aggregate_type}_{req.aggregate_criteria}
FROM employees
GROUP BY {req.grouping};
"""
result_df = pd.read_sql_query(query, conn)
result = result_df.to_dict(orient="records")
return result
# 1. Top Employees by Performance, Projects, and Timeframe
class TopEmployeesProjects(BaseModel):
min_performance_score: float
min_years_experience: int
department: str
min_project_count: int = None # Optional
months_range: int = None # Optional (for filtering recent projects)
@app.post("/top_employees_projects")
async def employees_projects(req: TopEmployeesProjects, res: Response):
params, filters = {}, []
# Add optional months_range filter
if req.months_range:
params['months_range'] = req.months_range
filters.append(f"p.start_date >= DATE('now', '-{req.months_range} months')")
# Add project count filter if provided
if req.min_project_count:
filters.append(f"COUNT(p.project_id) >= {req.min_project_count}")
where_clause = " AND ".join(filters)
if where_clause:
where_clause = "AND " + where_clause
query = f"""
SELECT e.name, e.department, e.years_of_experience, e.performance_score, COUNT(p.project_id) as project_count
FROM employees e
LEFT JOIN projects p ON e.eid = p.eid
WHERE e.performance_score >= {req.min_performance_score}
AND e.years_of_experience >= {req.min_years_experience}
AND e.department = '{req.department}'
{where_clause}
GROUP BY e.eid, e.name, e.department, e.years_of_experience, e.performance_score
ORDER BY e.performance_score DESC;
"""
result_df = pd.read_sql_query(query, conn, params=params)
return result_df.to_dict(orient='records')
# 2. Employees with Salary Growth Since Last Promotion
class SalaryGrowthRequest(BaseModel):
min_salary_increase_percentage: float
department: str = None # Optional
@app.post("/salary_growth")
async def salary_growth(req: SalaryGrowthRequest, res: Response):
params, filters = {}, []
if req.department:
filters.append("e.department = :department")
params['department'] = req.department
where_clause = " AND ".join(filters)
if where_clause:
where_clause = "AND " + where_clause
query = f"""
SELECT e.name, e.department, s.salary_increase_percentage
FROM employees e
JOIN salary_history s ON e.eid = s.eid
WHERE s.salary_increase_percentage >= {req.min_salary_increase_percentage}
AND s.promotion_date IS NOT NULL
{where_clause}
ORDER BY s.salary_increase_percentage DESC;
"""
result_df = pd.read_sql_query(query, conn, params=params)
return result_df.to_dict(orient='records')
# 4. Employees with Promotions and Salary Increases
class PromotionsIncreasesRequest(BaseModel):
year: int
min_salary_increase_percentage: float = None # Optional
department: str = None # Optional
@app.post("/promotions_increases")
async def promotions_increases(req: PromotionsIncreasesRequest, res: Response):
params, filters = {}, []
if req.min_salary_increase_percentage:
filters.append(f"s.salary_increase_percentage >= {req.min_salary_increase_percentage}")
if req.department:
filters.append("e.department = :department")
params['department'] = req.department
where_clause = " AND ".join(filters)
if where_clause:
where_clause = "AND " + where_clause
query = f"""
SELECT e.name, e.department, s.salary_increase_percentage, s.promotion_date
FROM employees e
JOIN salary_history s ON e.eid = s.eid
WHERE strftime('%Y', s.promotion_date) = '{req.year}'
{where_clause}
ORDER BY s.salary_increase_percentage DESC;
"""
result_df = pd.read_sql_query(query, conn, params=params)
return result_df.to_dict(orient='records')
# 5. Employees with Highest Average Project Performance
class AvgProjPerformanceRequest(BaseModel):
min_project_count: int
min_performance_score: float
department: str = None # Optional
@app.post("/avg_project_performance")
async def avg_project_performance(req: AvgProjPerformanceRequest, res: Response):
params, filters = {}, []
if req.department:
filters.append("e.department = :department")
params['department'] = req.department
filters.append(f"p.performance_score >= {req.min_performance_score}")
where_clause = " AND ".join(filters)
query = f"""
SELECT e.name, e.department, AVG(p.performance_score) as avg_performance_score, COUNT(p.project_id) as project_count
FROM employees e
JOIN projects p ON e.eid = p.eid
WHERE {where_clause}
GROUP BY e.eid, e.name, e.department
HAVING COUNT(p.project_id) >= {req.min_project_count}
ORDER BY avg_performance_score DESC;
"""
result_df = pd.read_sql_query(query, conn, params=params)
return result_df.to_dict(orient='records')
# 6. Employees by Certification and Years of Experience
class CertificationsExperienceRequest(BaseModel):
certifications: List[str]
min_years_experience: int
department: str = None # Optional
@app.post("/employees_certifications_experience")
async def certifications_experience(req: CertificationsExperienceRequest, res: Response):
# Convert the list of certifications into a format for SQL query
certs_filter = ', '.join([f"'{cert}'" for cert in req.certifications])
params, filters = {}, []
# Add department filter if provided
if req.department:
filters.append("e.department = :department")
params['department'] = req.department
filters.append("e.years_of_experience >= :min_years_experience")
params['min_years_experience'] = req.min_years_experience
where_clause = " AND ".join(filters)
query = f"""
SELECT e.name, e.department, e.years_of_experience, COUNT(c.certification_name) as cert_count
FROM employees e
JOIN certifications c ON e.eid = c.eid
WHERE c.certification_name IN ({certs_filter})
AND {where_clause}
GROUP BY e.eid, e.name, e.department, e.years_of_experience
HAVING COUNT(c.certification_name) = {len(req.certifications)}
ORDER BY e.years_of_experience DESC;
"""
result_df = pd.read_sql_query(query, conn, params=params)
return result_df.to_dict(orient='records')

View file

@ -0,0 +1,157 @@
import pandas as pd
import random
import datetime
import sqlite3
def load_sql():
# Example Usage
conn = sqlite3.connect(":memory:")
# create and load the employees table
generate_employee_data(conn)
# create and load the projects table
generate_project_data(conn)
# create and load the salary_history table
generate_salary_history(conn)
# create and load the certifications table
generate_certifications(conn)
return conn
# Function to generate random employee data with `eid` as the primary key
def generate_employee_data(conn):
# List of possible names, positions, departments, and locations
names = [
"Alice",
"Bob",
"Charlie",
"David",
"Eve",
"Frank",
"Grace",
"Hank",
"Ivy",
"Jack",
]
positions = [
"Manager",
"Engineer",
"Salesperson",
"HR Specialist",
"Marketing Analyst",
]
# List of possible names, positions, departments, locations, and certifications
names = ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Hank", "Ivy", "Jack"]
positions = ["Manager", "Engineer", "Salesperson", "HR Specialist", "Marketing Analyst"]
departments = ["Engineering", "Marketing", "HR", "Sales", "Finance"]
locations = ["New York", "San Francisco", "Austin", "Boston", "Chicago"]
certifications = ["AWS Certified", "Google Cloud Certified", "PMP", "Scrum Master", "Cisco Certified"]
# Generate random hire dates
def random_hire_date():
start_date = datetime.date(2000, 1, 1)
end_date = datetime.date(2023, 12, 31)
time_between_dates = end_date - start_date
days_between_dates = time_between_dates.days
random_number_of_days = random.randrange(days_between_dates)
return start_date + datetime.timedelta(days=random_number_of_days)
# Generate random employee records with an employee ID (eid)
employees = []
for eid in range(1, 101): # 100 employees with `eid` starting from 1
name = random.choice(names)
position = random.choice(positions)
salary = round(random.uniform(50000, 150000), 2) # Salary between 50,000 and 150,000
department = random.choice(departments)
location = random.choice(locations)
hire_date = random_hire_date()
performance_score = round(random.uniform(1, 5), 2) # Performance score between 1.0 and 5.0
years_of_experience = random.randint(1, 30) # Years of experience between 1 and 30
employee = {
"eid": eid, # Employee ID
"name": name,
"position": position,
"salary": salary,
"department": department,
"location": location,
"hire_date": hire_date,
"performance_score": performance_score,
"years_of_experience": years_of_experience
}
employees.append(employee)
# Convert the list of dictionaries to a DataFrame and save to DB
df_employees = pd.DataFrame(employees)
df_employees.to_sql('employees', conn, index=False, if_exists='replace')
# Function to generate random project data with `eid`
def generate_project_data(conn):
employees = pd.read_sql_query("SELECT eid FROM employees", conn)
projects = []
for _ in range(500): # 500 projects
eid = random.choice(employees['eid'])
project_name = f"Project_{random.randint(1, 100)}"
start_date = datetime.date(2020, 1, 1) + datetime.timedelta(days=random.randint(0, 365 * 3)) # Within the last 3 years
performance_score = round(random.uniform(1, 5), 2) # Performance score for the project between 1.0 and 5.0
project = {
"eid": eid, # Foreign key from employees table
"project_name": project_name,
"start_date": start_date,
"performance_score": performance_score
}
projects.append(project)
# Convert the list of dictionaries to a DataFrame and save to DB
df_projects = pd.DataFrame(projects)
df_projects.to_sql('projects', conn, index=False, if_exists='replace')
# Function to generate random salary history data with `eid`
def generate_salary_history(conn):
employees = pd.read_sql_query("SELECT eid FROM employees", conn)
salary_history = []
for _ in range(300): # 300 salary records
eid = random.choice(employees['eid'])
salary_increase_percentage = round(random.uniform(5, 30), 2) # Salary increase between 5% and 30%
promotion_date = datetime.date(2018, 1, 1) + datetime.timedelta(days=random.randint(0, 365 * 5)) # Promotions in the last 5 years
salary_record = {
"eid": eid, # Foreign key from employees table
"salary_increase_percentage": salary_increase_percentage,
"promotion_date": promotion_date
}
salary_history.append(salary_record)
# Convert the list of dictionaries to a DataFrame and save to DB
df_salary_history = pd.DataFrame(salary_history)
df_salary_history.to_sql('salary_history', conn, index=False, if_exists='replace')
# Function to generate random certifications data with `eid`
def generate_certifications(conn):
employees = pd.read_sql_query("SELECT eid FROM employees", conn)
certifications_list = ["AWS Certified", "Google Cloud Certified", "PMP", "Scrum Master", "Cisco Certified"]
employee_certifications = []
for _ in range(300): # 300 certification records
eid = random.choice(employees['eid'])
certification = random.choice(certifications_list)
cert_record = {
"eid": eid, # Foreign key from employees table
"certification_name": certification
}
employee_certifications.append(cert_record)
# Convert the list of dictionaries to a DataFrame and save to DB
df_certifications = pd.DataFrame(employee_certifications)
df_certifications.to_sql('certifications', conn, index=False, if_exists='replace')