fix demos code (#76)

2026-04-25 16:56:24 +02:00 · 2024-09-24 14:34:22 -07:00 · 2024-09-24 14:34:22 -07:00 · 685144bbd7
commit 685144bbd7
parent 13dff3089d
29 changed files with 2020 additions and 21 deletions
--- a/demos/network_copilot/api_server/.vscode/launch.json
+++ b/demos/network_copilot/api_server/.vscode/launch.json
@ -0,0 +1,16 @@
+{
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "function-calling api server",
+      "cwd": "${workspaceFolder}/app",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "uvicorn",
+      "args": ["main:app","--reload", "--port", "8001"],
+    }
+  ]
+}
--- a/demos/network_copilot/api_server/Dockerfile
+++ b/demos/network_copilot/api_server/Dockerfile
@ -0,0 +1,19 @@
+FROM python:3 AS base
+
+FROM base AS builder
+
+WORKDIR /src
+
+COPY requirements.txt /src/
+RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
+
+COPY . /src
+
+FROM python:3-slim AS output
+
+COPY --from=builder /runtime /usr/local
+
+COPY /app /app
+WORKDIR /app
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
--- a/demos/network_copilot/api_server/app/main.py
+++ b/demos/network_copilot/api_server/app/main.py
@ -0,0 +1,184 @@
+from fastapi import FastAPI, Response
+from datetime import datetime, timezone
+import logging
+from pydantic import BaseModel
+from utils import load_sql, load_params
+import pandas as pd
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+app = FastAPI()
+
+@app.get("/healthz")
+async def healthz():
+    return {
+        "status": "ok"
+    }
+
+conn = load_sql()
+name_col = "name"
+
+
+class PacketDropCorrelationRequest(BaseModel):
+    from_time: str = None  # Optional natural language timeframe
+    ifname: str = None  # Optional interface name filter
+    region: str = None  # Optional region filter
+    min_in_errors: int = None
+    max_in_errors: int = None
+    min_out_errors: int = None
+    max_out_errors: int = None
+    min_in_discards: int = None
+    max_in_discards: int = None
+    min_out_discards: int = None
+    max_out_discards: int = None
+
+
+@app.post("/interface_down_pkt_drop")
+async def interface_down_packet_drop(req: PacketDropCorrelationRequest, res: Response):
+    params, filters = load_params(req)
+
+    # Join the filters using AND
+    where_clause = " AND ".join(filters)
+    if where_clause:
+        where_clause = "AND " + where_clause
+
+    # Step 3: Query packet errors and flows from interfacestats and ts_flow
+    query = f"""
+    SELECT
+      d.switchip AS device_ip_address,
+      i.in_errors,
+      i.in_discards,
+      i.out_errors,
+      i.out_discards,
+      i.ifname,
+      t.src_addr,
+      t.dst_addr,
+      t.time AS flow_time,
+      i.time AS interface_time
+    FROM
+      device d
+    INNER JOIN
+      interfacestats i
+      ON d.device_mac_address = i.device_mac_address
+    INNER JOIN
+      ts_flow t
+      ON d.switchip = t.sampler_address
+    WHERE
+      i.time >= :from_time  -- Using the converted timestamp
+      {where_clause}
+    ORDER BY
+      i.time;
+    """
+
+    correlated_data = pd.read_sql_query(query, conn, params=params)
+
+    if correlated_data.empty:
+        default_response = {
+            "device_ip_address": "0.0.0.0",  # Placeholder IP
+            "in_errors": 0,
+            "in_discards": 0,
+            "out_errors": 0,
+            "out_discards": 0,
+            "ifname": req.ifname
+            or "unknown",  # Placeholder or interface provided in the request
+            "src_addr": "0.0.0.0",  # Placeholder source IP
+            "dst_addr": "0.0.0.0",  # Placeholder destination IP
+            "flow_time": str(
+                datetime.now(timezone.utc)
+            ),  # Current timestamp or placeholder
+            "interface_time": str(
+                datetime.now(timezone.utc)
+            ),  # Current timestamp or placeholder
+        }
+        return [default_response]
+
+    logger.info(f"Correlated Packet Drop Data: {correlated_data}")
+
+    return correlated_data.to_dict(orient='records')
+
+
+class FlowPacketErrorCorrelationRequest(BaseModel):
+    from_time: str = None  # Optional natural language timeframe
+    ifname: str = None  # Optional interface name filter
+    region: str = None  # Optional region filter
+    min_in_errors: int = None
+    max_in_errors: int = None
+    min_out_errors: int = None
+    max_out_errors: int = None
+    min_in_discards: int = None
+    max_in_discards: int = None
+    min_out_discards: int = None
+    max_out_discards: int = None
+
+
+@app.post("/packet_errors_impact_flow")
+async def packet_errors_impact_flow(
+    req: FlowPacketErrorCorrelationRequest, res: Response
+):
+    params, filters = load_params(req)
+
+    # Join the filters using AND
+    where_clause = " AND ".join(filters)
+    if where_clause:
+        where_clause = "AND " + where_clause
+
+    # Step 3: Query the packet errors and flows, correlating by timestamps
+    query = f"""
+    SELECT
+      d.switchip AS device_ip_address,
+      i.in_errors,
+      i.in_discards,
+      i.out_errors,
+      i.out_discards,
+      i.ifname,
+      t.src_addr,
+      t.dst_addr,
+      t.src_port,
+      t.dst_port,
+      t.packets,
+      t.time AS flow_time,
+      i.time AS error_time
+    FROM
+      device d
+    INNER JOIN
+      interfacestats i
+      ON d.device_mac_address = i.device_mac_address
+    INNER JOIN
+      ts_flow t
+      ON d.switchip = t.sampler_address
+    WHERE
+      i.time >= :from_time
+      AND ABS(strftime('%s', t.time) - strftime('%s', i.time)) <= 300  -- Correlate within 5 minutes
+      {where_clause}
+    ORDER BY
+      i.time;
+    """
+
+    correlated_data = pd.read_sql_query(query, conn, params=params)
+
+    if correlated_data.empty:
+        default_response = {
+            "device_ip_address": "0.0.0.0",  # Placeholder IP
+            "in_errors": 0,
+            "in_discards": 0,
+            "out_errors": 0,
+            "out_discards": 0,
+            "ifname": req.ifname
+            or "unknown",  # Placeholder or interface provided in the request
+            "src_addr": "0.0.0.0",  # Placeholder source IP
+            "dst_addr": "0.0.0.0",  # Placeholder destination IP
+            "src_port": 0,
+            "dst_port": 0,
+            "packets": 0,
+            "flow_time": str(
+                datetime.now(timezone.utc)
+            ),  # Current timestamp or placeholder
+            "error_time": str(
+                datetime.now(timezone.utc)
+            ),  # Current timestamp or placeholder
+        }
+        return [default_response]
+
+    # Return the correlated data if found
+    return correlated_data.to_dict(orient='records')
--- a/demos/network_copilot/api_server/app/utils.py
+++ b/demos/network_copilot/api_server/app/utils.py
@ -0,0 +1,247 @@
+import pandas as pd
+import random
+from datetime import datetime, timedelta, timezone
+import re
+import logging
+from dateparser import parse
+import sqlite3
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+def load_sql():
+    # Example Usage
+    conn = sqlite3.connect(":memory:")
+
+    # create and load the devices table
+    device_data = generate_device_data(conn)
+
+    # create and load the interface_stats table
+    generate_interface_stats_data(conn, device_data)
+
+    # create and load the flow table
+    generate_flow_data(conn, device_data)
+
+    return conn
+
+# Function to convert natural language time expressions to "X {time} ago" format
+def convert_to_ago_format(expression):
+    # Define patterns for different time units
+    time_units = {
+        r"seconds": "seconds",
+        r"minutes": "minutes",
+        r"mins": "mins",
+        r"hrs": "hrs",
+        r"hours": "hours",
+        r"hour": "hour",
+        r"hr": "hour",
+        r"days": "days",
+        r"day": "day",
+        r"weeks": "weeks",
+        r"week": "week",
+        r"months": "months",
+        r"month": "month",
+        r"years": "years",
+        r"yrs": "years",
+        r"year": "year",
+        r"yr": "year",
+    }
+
+    # Iterate over each time unit and create regex for each phrase format
+    for pattern, unit in time_units.items():
+        # Handle "for the past X {unit}"
+        match = re.search(rf"(\d+) {pattern}", expression)
+        if match:
+            quantity = match.group(1)
+            return f"{quantity} {unit} ago"
+
+    # If the format is not recognized, return None or raise an error
+    return None
+
+
+# Function to generate random MAC addresses
+def random_mac():
+    return "AA:BB:CC:DD:EE:" + ":".join(
+        [f"{random.randint(0, 255):02X}" for _ in range(2)]
+    )
+
+
+# Function to generate random IP addresses
+def random_ip():
+    return f"{random.randint(1, 255)}.{random.randint(1, 255)}.{random.randint(1, 255)}.{random.randint(1, 255)}"
+
+
+# Generate synthetic data for the device table
+def generate_device_data(
+    conn,
+    n=1000,
+):
+    device_data = {
+        "switchip": [random_ip() for _ in range(n)],
+        "hwsku": [f"HW{i+1}" for i in range(n)],
+        "hostname": [f"switch{i+1}" for i in range(n)],
+        "osversion": [f"v{i+1}" for i in range(n)],
+        "layer": ["L2" if i % 2 == 0 else "L3" for i in range(n)],
+        "region": [random.choice(["US", "EU", "ASIA"]) for _ in range(n)],
+        "uptime": [
+            f"{random.randint(0, 10)} days {random.randint(0, 23)}:{random.randint(0, 59)}:{random.randint(0, 59)}"
+            for _ in range(n)
+        ],
+        "device_mac_address": [random_mac() for _ in range(n)],
+    }
+    df = pd.DataFrame(device_data)
+    df.to_sql("device", conn, index=False)
+    return df
+
+
+# Generate synthetic data for the interfacestats table
+def generate_interface_stats_data(conn, device_df, n=1000):
+    interface_stats_data = []
+    for _ in range(n):
+        device_mac = random.choice(device_df["device_mac_address"])
+        ifname = random.choice(["eth0", "eth1", "eth2", "eth3"])
+        time = datetime.now(timezone.utc) - timedelta(
+            minutes=random.randint(0, 1440 * 5)
+        )  # random timestamps in the past 5 day
+        in_discards = random.randint(0, 1000)
+        in_errors = random.randint(0, 500)
+        out_discards = random.randint(0, 800)
+        out_errors = random.randint(0, 400)
+        in_octets = random.randint(1000, 100000)
+        out_octets = random.randint(1000, 100000)
+
+        interface_stats_data.append(
+            {
+                "device_mac_address": device_mac,
+                "ifname": ifname,
+                "time": time,
+                "in_discards": in_discards,
+                "in_errors": in_errors,
+                "out_discards": out_discards,
+                "out_errors": out_errors,
+                "in_octets": in_octets,
+                "out_octets": out_octets,
+            }
+        )
+    df = pd.DataFrame(interface_stats_data)
+    df.to_sql("interfacestats", conn, index=False)
+    return
+
+
+# Generate synthetic data for the ts_flow table
+def generate_flow_data(conn, device_df, n=1000):
+    flow_data = []
+    for _ in range(n):
+        sampler_address = random.choice(device_df["switchip"])
+        proto = random.choice(["TCP", "UDP"])
+        src_addr = random_ip()
+        dst_addr = random_ip()
+        src_port = random.randint(1024, 65535)
+        dst_port = random.randint(1024, 65535)
+        in_if = random.randint(1, 10)
+        out_if = random.randint(1, 10)
+        flow_start = int(
+            (datetime.now() - timedelta(days=random.randint(1, 30))).timestamp()
+        )
+        flow_end = int(
+            (datetime.now() - timedelta(days=random.randint(1, 30))).timestamp()
+        )
+        bytes_transferred = random.randint(1000, 100000)
+        packets = random.randint(1, 1000)
+        flow_time = datetime.now(timezone.utc) - timedelta(
+            minutes=random.randint(0, 1440 * 5)
+        )  # random flow time
+
+        flow_data.append(
+            {
+                "sampler_address": sampler_address,
+                "proto": proto,
+                "src_addr": src_addr,
+                "dst_addr": dst_addr,
+                "src_port": src_port,
+                "dst_port": dst_port,
+                "in_if": in_if,
+                "out_if": out_if,
+                "flow_start": flow_start,
+                "flow_end": flow_end,
+                "bytes": bytes_transferred,
+                "packets": packets,
+                "time": flow_time,
+            }
+        )
+    df = pd.DataFrame(flow_data)
+    df.to_sql("ts_flow", conn, index=False)
+    return
+
+
+def load_params(req):
+    # Step 1: Convert the from_time natural language string to a timestamp if provided
+    if req.from_time:
+        # Use `dateparser` to parse natural language timeframes
+        logger.info(f"{'* ' * 50}\n\nCaptured from time: {req.from_time}\n\n")
+        parsed_time = parse(req.from_time, settings={"RELATIVE_BASE": datetime.now()})
+        if not parsed_time:
+            conv_time = convert_to_ago_format(req.from_time)
+            if conv_time:
+                parsed_time = parse(
+                    conv_time, settings={"RELATIVE_BASE": datetime.now()}
+                )
+            else:
+                return {
+                    "error": "Invalid from_time format. Please provide a valid time description such as 'past 7 days' or 'since last month'."
+                }
+        logger.info(f"\n\nConverted from time: {parsed_time}\n\n{'* ' * 50}\n\n")
+        from_time = parsed_time
+        logger.info(f"Using parsed from_time: {from_time}")
+    else:
+        # If no from_time is provided, use a default value (e.g., the past 7 days)
+        from_time = datetime.now() - timedelta(days=7)
+        logger.info(f"Using default from_time: {from_time}")
+
+    # Step 2: Build the dynamic SQL query based on the optional filters
+    filters = []
+    params = {"from_time": from_time}
+
+    if req.ifname:
+        filters.append("i.ifname = :ifname")
+        params["ifname"] = req.ifname
+
+    if req.region:
+        filters.append("d.region = :region")
+        params["region"] = req.region
+
+    if req.min_in_errors is not None:
+        filters.append("i.in_errors >= :min_in_errors")
+        params["min_in_errors"] = req.min_in_errors
+
+    if req.max_in_errors is not None:
+        filters.append("i.in_errors <= :max_in_errors")
+        params["max_in_errors"] = req.max_in_errors
+
+    if req.min_out_errors is not None:
+        filters.append("i.out_errors >= :min_out_errors")
+        params["min_out_errors"] = req.min_out_errors
+
+    if req.max_out_errors is not None:
+        filters.append("i.out_errors <= :max_out_errors")
+        params["max_out_errors"] = req.max_out_errors
+
+    if req.min_in_discards is not None:
+        filters.append("i.in_discards >= :min_in_discards")
+        params["min_in_discards"] = req.min_in_discards
+
+    if req.max_in_discards is not None:
+        filters.append("i.in_discards <= :max_in_discards")
+        params["max_in_discards"] = req.max_in_discards
+
+    if req.min_out_discards is not None:
+        filters.append("i.out_discards >= :min_out_discards")
+        params["min_out_discards"] = req.min_out_discards
+
+    if req.max_out_discards is not None:
+        filters.append("i.out_discards <= :max_out_discards")
+        params["max_out_discards"] = req.max_out_discards
+
+    return params, filters
--- a/demos/network_copilot/api_server/requirements.txt
+++ b/demos/network_copilot/api_server/requirements.txt
@ -0,0 +1,4 @@
+fastapi
+uvicorn
+pandas
+dateparser