fix demos code (#76)

This commit is contained in:
Adil Hafeez 2024-09-24 14:34:22 -07:00 committed by GitHub
parent 13dff3089d
commit 685144bbd7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 2020 additions and 21 deletions

View file

@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "function-calling api server",
"cwd": "${workspaceFolder}/app",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"args": ["main:app","--reload", "--port", "8001"],
}
]
}

View file

@ -0,0 +1,19 @@
FROM python:3 AS base
FROM base AS builder
WORKDIR /src
COPY requirements.txt /src/
RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
COPY . /src
FROM python:3-slim AS output
COPY --from=builder /runtime /usr/local
COPY /app /app
WORKDIR /app
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]

View file

@ -0,0 +1,184 @@
from fastapi import FastAPI, Response
from datetime import datetime, timezone
import logging
from pydantic import BaseModel
from utils import load_sql, load_params
import pandas as pd
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
app = FastAPI()
@app.get("/healthz")
async def healthz():
return {
"status": "ok"
}
conn = load_sql()
name_col = "name"
class PacketDropCorrelationRequest(BaseModel):
from_time: str = None # Optional natural language timeframe
ifname: str = None # Optional interface name filter
region: str = None # Optional region filter
min_in_errors: int = None
max_in_errors: int = None
min_out_errors: int = None
max_out_errors: int = None
min_in_discards: int = None
max_in_discards: int = None
min_out_discards: int = None
max_out_discards: int = None
@app.post("/interface_down_pkt_drop")
async def interface_down_packet_drop(req: PacketDropCorrelationRequest, res: Response):
params, filters = load_params(req)
# Join the filters using AND
where_clause = " AND ".join(filters)
if where_clause:
where_clause = "AND " + where_clause
# Step 3: Query packet errors and flows from interfacestats and ts_flow
query = f"""
SELECT
d.switchip AS device_ip_address,
i.in_errors,
i.in_discards,
i.out_errors,
i.out_discards,
i.ifname,
t.src_addr,
t.dst_addr,
t.time AS flow_time,
i.time AS interface_time
FROM
device d
INNER JOIN
interfacestats i
ON d.device_mac_address = i.device_mac_address
INNER JOIN
ts_flow t
ON d.switchip = t.sampler_address
WHERE
i.time >= :from_time -- Using the converted timestamp
{where_clause}
ORDER BY
i.time;
"""
correlated_data = pd.read_sql_query(query, conn, params=params)
if correlated_data.empty:
default_response = {
"device_ip_address": "0.0.0.0", # Placeholder IP
"in_errors": 0,
"in_discards": 0,
"out_errors": 0,
"out_discards": 0,
"ifname": req.ifname
or "unknown", # Placeholder or interface provided in the request
"src_addr": "0.0.0.0", # Placeholder source IP
"dst_addr": "0.0.0.0", # Placeholder destination IP
"flow_time": str(
datetime.now(timezone.utc)
), # Current timestamp or placeholder
"interface_time": str(
datetime.now(timezone.utc)
), # Current timestamp or placeholder
}
return [default_response]
logger.info(f"Correlated Packet Drop Data: {correlated_data}")
return correlated_data.to_dict(orient='records')
class FlowPacketErrorCorrelationRequest(BaseModel):
from_time: str = None # Optional natural language timeframe
ifname: str = None # Optional interface name filter
region: str = None # Optional region filter
min_in_errors: int = None
max_in_errors: int = None
min_out_errors: int = None
max_out_errors: int = None
min_in_discards: int = None
max_in_discards: int = None
min_out_discards: int = None
max_out_discards: int = None
@app.post("/packet_errors_impact_flow")
async def packet_errors_impact_flow(
req: FlowPacketErrorCorrelationRequest, res: Response
):
params, filters = load_params(req)
# Join the filters using AND
where_clause = " AND ".join(filters)
if where_clause:
where_clause = "AND " + where_clause
# Step 3: Query the packet errors and flows, correlating by timestamps
query = f"""
SELECT
d.switchip AS device_ip_address,
i.in_errors,
i.in_discards,
i.out_errors,
i.out_discards,
i.ifname,
t.src_addr,
t.dst_addr,
t.src_port,
t.dst_port,
t.packets,
t.time AS flow_time,
i.time AS error_time
FROM
device d
INNER JOIN
interfacestats i
ON d.device_mac_address = i.device_mac_address
INNER JOIN
ts_flow t
ON d.switchip = t.sampler_address
WHERE
i.time >= :from_time
AND ABS(strftime('%s', t.time) - strftime('%s', i.time)) <= 300 -- Correlate within 5 minutes
{where_clause}
ORDER BY
i.time;
"""
correlated_data = pd.read_sql_query(query, conn, params=params)
if correlated_data.empty:
default_response = {
"device_ip_address": "0.0.0.0", # Placeholder IP
"in_errors": 0,
"in_discards": 0,
"out_errors": 0,
"out_discards": 0,
"ifname": req.ifname
or "unknown", # Placeholder or interface provided in the request
"src_addr": "0.0.0.0", # Placeholder source IP
"dst_addr": "0.0.0.0", # Placeholder destination IP
"src_port": 0,
"dst_port": 0,
"packets": 0,
"flow_time": str(
datetime.now(timezone.utc)
), # Current timestamp or placeholder
"error_time": str(
datetime.now(timezone.utc)
), # Current timestamp or placeholder
}
return [default_response]
# Return the correlated data if found
return correlated_data.to_dict(orient='records')

View file

@ -0,0 +1,247 @@
import pandas as pd
import random
from datetime import datetime, timedelta, timezone
import re
import logging
from dateparser import parse
import sqlite3
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
def load_sql():
# Example Usage
conn = sqlite3.connect(":memory:")
# create and load the devices table
device_data = generate_device_data(conn)
# create and load the interface_stats table
generate_interface_stats_data(conn, device_data)
# create and load the flow table
generate_flow_data(conn, device_data)
return conn
# Function to convert natural language time expressions to "X {time} ago" format
def convert_to_ago_format(expression):
# Define patterns for different time units
time_units = {
r"seconds": "seconds",
r"minutes": "minutes",
r"mins": "mins",
r"hrs": "hrs",
r"hours": "hours",
r"hour": "hour",
r"hr": "hour",
r"days": "days",
r"day": "day",
r"weeks": "weeks",
r"week": "week",
r"months": "months",
r"month": "month",
r"years": "years",
r"yrs": "years",
r"year": "year",
r"yr": "year",
}
# Iterate over each time unit and create regex for each phrase format
for pattern, unit in time_units.items():
# Handle "for the past X {unit}"
match = re.search(rf"(\d+) {pattern}", expression)
if match:
quantity = match.group(1)
return f"{quantity} {unit} ago"
# If the format is not recognized, return None or raise an error
return None
# Function to generate random MAC addresses
def random_mac():
return "AA:BB:CC:DD:EE:" + ":".join(
[f"{random.randint(0, 255):02X}" for _ in range(2)]
)
# Function to generate random IP addresses
def random_ip():
return f"{random.randint(1, 255)}.{random.randint(1, 255)}.{random.randint(1, 255)}.{random.randint(1, 255)}"
# Generate synthetic data for the device table
def generate_device_data(
conn,
n=1000,
):
device_data = {
"switchip": [random_ip() for _ in range(n)],
"hwsku": [f"HW{i+1}" for i in range(n)],
"hostname": [f"switch{i+1}" for i in range(n)],
"osversion": [f"v{i+1}" for i in range(n)],
"layer": ["L2" if i % 2 == 0 else "L3" for i in range(n)],
"region": [random.choice(["US", "EU", "ASIA"]) for _ in range(n)],
"uptime": [
f"{random.randint(0, 10)} days {random.randint(0, 23)}:{random.randint(0, 59)}:{random.randint(0, 59)}"
for _ in range(n)
],
"device_mac_address": [random_mac() for _ in range(n)],
}
df = pd.DataFrame(device_data)
df.to_sql("device", conn, index=False)
return df
# Generate synthetic data for the interfacestats table
def generate_interface_stats_data(conn, device_df, n=1000):
interface_stats_data = []
for _ in range(n):
device_mac = random.choice(device_df["device_mac_address"])
ifname = random.choice(["eth0", "eth1", "eth2", "eth3"])
time = datetime.now(timezone.utc) - timedelta(
minutes=random.randint(0, 1440 * 5)
) # random timestamps in the past 5 day
in_discards = random.randint(0, 1000)
in_errors = random.randint(0, 500)
out_discards = random.randint(0, 800)
out_errors = random.randint(0, 400)
in_octets = random.randint(1000, 100000)
out_octets = random.randint(1000, 100000)
interface_stats_data.append(
{
"device_mac_address": device_mac,
"ifname": ifname,
"time": time,
"in_discards": in_discards,
"in_errors": in_errors,
"out_discards": out_discards,
"out_errors": out_errors,
"in_octets": in_octets,
"out_octets": out_octets,
}
)
df = pd.DataFrame(interface_stats_data)
df.to_sql("interfacestats", conn, index=False)
return
# Generate synthetic data for the ts_flow table
def generate_flow_data(conn, device_df, n=1000):
flow_data = []
for _ in range(n):
sampler_address = random.choice(device_df["switchip"])
proto = random.choice(["TCP", "UDP"])
src_addr = random_ip()
dst_addr = random_ip()
src_port = random.randint(1024, 65535)
dst_port = random.randint(1024, 65535)
in_if = random.randint(1, 10)
out_if = random.randint(1, 10)
flow_start = int(
(datetime.now() - timedelta(days=random.randint(1, 30))).timestamp()
)
flow_end = int(
(datetime.now() - timedelta(days=random.randint(1, 30))).timestamp()
)
bytes_transferred = random.randint(1000, 100000)
packets = random.randint(1, 1000)
flow_time = datetime.now(timezone.utc) - timedelta(
minutes=random.randint(0, 1440 * 5)
) # random flow time
flow_data.append(
{
"sampler_address": sampler_address,
"proto": proto,
"src_addr": src_addr,
"dst_addr": dst_addr,
"src_port": src_port,
"dst_port": dst_port,
"in_if": in_if,
"out_if": out_if,
"flow_start": flow_start,
"flow_end": flow_end,
"bytes": bytes_transferred,
"packets": packets,
"time": flow_time,
}
)
df = pd.DataFrame(flow_data)
df.to_sql("ts_flow", conn, index=False)
return
def load_params(req):
# Step 1: Convert the from_time natural language string to a timestamp if provided
if req.from_time:
# Use `dateparser` to parse natural language timeframes
logger.info(f"{'* ' * 50}\n\nCaptured from time: {req.from_time}\n\n")
parsed_time = parse(req.from_time, settings={"RELATIVE_BASE": datetime.now()})
if not parsed_time:
conv_time = convert_to_ago_format(req.from_time)
if conv_time:
parsed_time = parse(
conv_time, settings={"RELATIVE_BASE": datetime.now()}
)
else:
return {
"error": "Invalid from_time format. Please provide a valid time description such as 'past 7 days' or 'since last month'."
}
logger.info(f"\n\nConverted from time: {parsed_time}\n\n{'* ' * 50}\n\n")
from_time = parsed_time
logger.info(f"Using parsed from_time: {from_time}")
else:
# If no from_time is provided, use a default value (e.g., the past 7 days)
from_time = datetime.now() - timedelta(days=7)
logger.info(f"Using default from_time: {from_time}")
# Step 2: Build the dynamic SQL query based on the optional filters
filters = []
params = {"from_time": from_time}
if req.ifname:
filters.append("i.ifname = :ifname")
params["ifname"] = req.ifname
if req.region:
filters.append("d.region = :region")
params["region"] = req.region
if req.min_in_errors is not None:
filters.append("i.in_errors >= :min_in_errors")
params["min_in_errors"] = req.min_in_errors
if req.max_in_errors is not None:
filters.append("i.in_errors <= :max_in_errors")
params["max_in_errors"] = req.max_in_errors
if req.min_out_errors is not None:
filters.append("i.out_errors >= :min_out_errors")
params["min_out_errors"] = req.min_out_errors
if req.max_out_errors is not None:
filters.append("i.out_errors <= :max_out_errors")
params["max_out_errors"] = req.max_out_errors
if req.min_in_discards is not None:
filters.append("i.in_discards >= :min_in_discards")
params["min_in_discards"] = req.min_in_discards
if req.max_in_discards is not None:
filters.append("i.in_discards <= :max_in_discards")
params["max_in_discards"] = req.max_in_discards
if req.min_out_discards is not None:
filters.append("i.out_discards >= :min_out_discards")
params["min_out_discards"] = req.min_out_discards
if req.max_out_discards is not None:
filters.append("i.out_discards <= :max_out_discards")
params["max_out_discards"] = req.max_out_discards
return params, filters

View file

@ -0,0 +1,4 @@
fastapi
uvicorn
pandas
dateparser