plano/demos/employee_details_copilot/api_server/app/utils.py

import pandas as pd
import random
import datetime
import sqlite3

def load_sql():
    # Example Usage
    conn = sqlite3.connect(":memory:")

    # create and load the employees table
    generate_employee_data(conn)

    # create and load the projects table
    generate_project_data(conn)

    # create and load the salary_history table
    generate_salary_history(conn)

    # create and load the certifications table
    generate_certifications(conn)

    return conn

# Function to generate random employee data with `eid` as the primary key
def generate_employee_data(conn):
    # List of possible names, positions, departments, and locations
    names = [
        "Alice",
        "Bob",
        "Charlie",
        "David",
        "Eve",
        "Frank",
        "Grace",
        "Hank",
        "Ivy",
        "Jack",
    ]
    positions = [
        "Manager",
        "Engineer",
        "Salesperson",
        "HR Specialist",
        "Marketing Analyst",
    ]
    # List of possible names, positions, departments, locations, and certifications
    names = ["Alice", "Bob", "Charlie", "David", "Eve", "Frank", "Grace", "Hank", "Ivy", "Jack"]
    positions = ["Manager", "Engineer", "Salesperson", "HR Specialist", "Marketing Analyst"]
    departments = ["Engineering", "Marketing", "HR", "Sales", "Finance"]
    locations = ["New York", "San Francisco", "Austin", "Boston", "Chicago"]
    certifications = ["AWS Certified", "Google Cloud Certified", "PMP", "Scrum Master", "Cisco Certified"]

    # Generate random hire dates
    def random_hire_date():
        start_date = datetime.date(2000, 1, 1)
        end_date = datetime.date(2023, 12, 31)
        time_between_dates = end_date - start_date
        days_between_dates = time_between_dates.days
        random_number_of_days = random.randrange(days_between_dates)
        return start_date + datetime.timedelta(days=random_number_of_days)

    # Generate random employee records with an employee ID (eid)
    employees = []
    for eid in range(1, 101):  # 100 employees with `eid` starting from 1
        name = random.choice(names)
        position = random.choice(positions)
        salary = round(random.uniform(50000, 150000), 2)  # Salary between 50,000 and 150,000
        department = random.choice(departments)
        location = random.choice(locations)
        hire_date = random_hire_date()
        performance_score = round(random.uniform(1, 5), 2)  # Performance score between 1.0 and 5.0
        years_of_experience = random.randint(1, 30)  # Years of experience between 1 and 30

        employee = {
            "eid": eid,  # Employee ID
            "name": name,
            "position": position,
            "salary": salary,
            "department": department,
            "location": location,
            "hire_date": hire_date,
            "performance_score": performance_score,
            "years_of_experience": years_of_experience
        }

        employees.append(employee)

    # Convert the list of dictionaries to a DataFrame and save to DB
    df_employees = pd.DataFrame(employees)
    df_employees.to_sql('employees', conn, index=False, if_exists='replace')

# Function to generate random project data with `eid`
def generate_project_data(conn):
    employees = pd.read_sql_query("SELECT eid FROM employees", conn)
    projects = []

    for _ in range(500):  # 500 projects
        eid = random.choice(employees['eid'])
        project_name = f"Project_{random.randint(1, 100)}"
        start_date = datetime.date(2020, 1, 1) + datetime.timedelta(days=random.randint(0, 365 * 3))  # Within the last 3 years
        performance_score = round(random.uniform(1, 5), 2)  # Performance score for the project between 1.0 and 5.0

        project = {
            "eid": eid,  # Foreign key from employees table
            "project_name": project_name,
            "start_date": start_date,
            "performance_score": performance_score
        }

        projects.append(project)

    # Convert the list of dictionaries to a DataFrame and save to DB
    df_projects = pd.DataFrame(projects)
    df_projects.to_sql('projects', conn, index=False, if_exists='replace')

# Function to generate random salary history data with `eid`
def generate_salary_history(conn):
    employees = pd.read_sql_query("SELECT eid FROM employees", conn)
    salary_history = []

    for _ in range(300):  # 300 salary records
        eid = random.choice(employees['eid'])
        salary_increase_percentage = round(random.uniform(5, 30), 2)  # Salary increase between 5% and 30%
        promotion_date = datetime.date(2018, 1, 1) + datetime.timedelta(days=random.randint(0, 365 * 5))  # Promotions in the last 5 years

        salary_record = {
            "eid": eid,  # Foreign key from employees table
            "salary_increase_percentage": salary_increase_percentage,
            "promotion_date": promotion_date
        }

        salary_history.append(salary_record)

    # Convert the list of dictionaries to a DataFrame and save to DB
    df_salary_history = pd.DataFrame(salary_history)
    df_salary_history.to_sql('salary_history', conn, index=False, if_exists='replace')

# Function to generate random certifications data with `eid`
def generate_certifications(conn):
    employees = pd.read_sql_query("SELECT eid FROM employees", conn)
    certifications_list = ["AWS Certified", "Google Cloud Certified", "PMP", "Scrum Master", "Cisco Certified"]
    employee_certifications = []

    for _ in range(300):  # 300 certification records
        eid = random.choice(employees['eid'])
        certification = random.choice(certifications_list)

        cert_record = {
            "eid": eid,  # Foreign key from employees table
            "certification_name": certification
        }

        employee_certifications.append(cert_record)

    # Convert the list of dictionaries to a DataFrame and save to DB
    df_certifications = pd.DataFrame(employee_certifications)
    df_certifications.to_sql('certifications', conn, index=False, if_exists='replace')