mirror of
https://github.com/katanemo/plano.git
synced 2026-04-27 17:56:28 +02:00
demos for network copilot and sql analyzer (#57)
* pulled from main branch after adding enums and made changes * added sql_analyzer folder and built a demo for Employee stats function calling. "top_employees" and "aggregate_stats". * sql_anayzer * After addressing PR comments * PR comments * PR comments * Addeed Network Analyzer FC Code * Added network Analyzer code for diff timeframes * Network Copilot and Employee Details demos are updated with their descriptions and resolved the PR comments * Added 2nd function in network copilot * Added 2nd function in network copilot * Added 2nd function in network copilot * Added 2nd function in network copilot * Added 2nd function in network copilot
This commit is contained in:
parent
a91fbdbf1c
commit
ed6a9139e6
11 changed files with 1052 additions and 1 deletions
200
model_server/app/network_data_generator.py
Normal file
200
model_server/app/network_data_generator.py
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
import pandas as pd
|
||||
import random
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import re
|
||||
import logging
|
||||
from dateparser import parse
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Function to convert natural language time expressions to "X {time} ago" format
|
||||
def convert_to_ago_format(expression):
|
||||
# Define patterns for different time units
|
||||
time_units = {
|
||||
r'seconds': 'seconds',
|
||||
r'minutes': 'minutes',
|
||||
r'mins': 'mins',
|
||||
r'hrs': 'hrs',
|
||||
r'hours': 'hours',
|
||||
r'hour': 'hour',
|
||||
r'hr': 'hour',
|
||||
r'days': 'days',
|
||||
r'day': 'day',
|
||||
r'weeks': 'weeks',
|
||||
r'week': 'week',
|
||||
r'months': 'months',
|
||||
r'month': 'month',
|
||||
r'years': 'years',
|
||||
r'yrs': 'years',
|
||||
r'year': 'year',
|
||||
r'yr': 'year',
|
||||
}
|
||||
|
||||
# Iterate over each time unit and create regex for each phrase format
|
||||
for pattern, unit in time_units.items():
|
||||
# Handle "for the past X {unit}"
|
||||
match = re.search(fr'(\d+) {pattern}', expression)
|
||||
if match:
|
||||
quantity = match.group(1)
|
||||
return f"{quantity} {unit} ago"
|
||||
|
||||
# If the format is not recognized, return None or raise an error
|
||||
return None
|
||||
|
||||
|
||||
# Function to generate random MAC addresses
|
||||
def random_mac():
|
||||
return "AA:BB:CC:DD:EE:" + ':'.join([f"{random.randint(0, 255):02X}" for _ in range(2)])
|
||||
|
||||
# Function to generate random IP addresses
|
||||
def random_ip():
|
||||
return f"{random.randint(1, 255)}.{random.randint(1, 255)}.{random.randint(1, 255)}.{random.randint(1, 255)}"
|
||||
|
||||
# Generate synthetic data for the device table
|
||||
def generate_device_data(conn, n=1000,):
|
||||
device_data = {
|
||||
'switchip': [random_ip() for _ in range(n)],
|
||||
'hwsku': [f'HW{i+1}' for i in range(n)],
|
||||
'hostname': [f'switch{i+1}' for i in range(n)],
|
||||
'osversion': [f'v{i+1}' for i in range(n)],
|
||||
'layer': ['L2' if i % 2 == 0 else 'L3' for i in range(n)],
|
||||
'region': [random.choice(['US', 'EU', 'ASIA']) for _ in range(n)],
|
||||
'uptime': [f'{random.randint(0, 10)} days {random.randint(0, 23)}:{random.randint(0, 59)}:{random.randint(0, 59)}' for _ in range(n)],
|
||||
'device_mac_address': [random_mac() for _ in range(n)]
|
||||
}
|
||||
df = pd.DataFrame(device_data)
|
||||
df.to_sql('device', conn, index=False)
|
||||
return df
|
||||
|
||||
# Generate synthetic data for the interfacestats table
|
||||
def generate_interface_stats_data(conn, device_df, n=1000):
|
||||
interface_stats_data = []
|
||||
for _ in range(n):
|
||||
device_mac = random.choice(device_df['device_mac_address'])
|
||||
ifname = random.choice(['eth0', 'eth1', 'eth2', 'eth3'])
|
||||
time = datetime.now(timezone.utc) - timedelta(minutes=random.randint(0, 1440 * 5)) # random timestamps in the past 5 day
|
||||
in_discards = random.randint(0, 1000)
|
||||
in_errors = random.randint(0, 500)
|
||||
out_discards = random.randint(0, 800)
|
||||
out_errors = random.randint(0, 400)
|
||||
in_octets = random.randint(1000, 100000)
|
||||
out_octets = random.randint(1000, 100000)
|
||||
|
||||
interface_stats_data.append({
|
||||
'device_mac_address': device_mac,
|
||||
'ifname': ifname,
|
||||
'time': time,
|
||||
'in_discards': in_discards,
|
||||
'in_errors': in_errors,
|
||||
'out_discards': out_discards,
|
||||
'out_errors': out_errors,
|
||||
'in_octets': in_octets,
|
||||
'out_octets': out_octets
|
||||
})
|
||||
df = pd.DataFrame(interface_stats_data)
|
||||
df.to_sql('interfacestats', conn, index=False)
|
||||
return
|
||||
|
||||
# Generate synthetic data for the ts_flow table
|
||||
def generate_flow_data(conn, device_df, n=1000):
|
||||
flow_data = []
|
||||
for _ in range(n):
|
||||
sampler_address = random.choice(device_df['switchip'])
|
||||
proto = random.choice(['TCP', 'UDP'])
|
||||
src_addr = random_ip()
|
||||
dst_addr = random_ip()
|
||||
src_port = random.randint(1024, 65535)
|
||||
dst_port = random.randint(1024, 65535)
|
||||
in_if = random.randint(1, 10)
|
||||
out_if = random.randint(1, 10)
|
||||
flow_start = int((datetime.now() - timedelta(days=random.randint(1, 30))).timestamp())
|
||||
flow_end = int((datetime.now() - timedelta(days=random.randint(1, 30))).timestamp())
|
||||
bytes_transferred = random.randint(1000, 100000)
|
||||
packets = random.randint(1, 1000)
|
||||
flow_time = datetime.now(timezone.utc) - timedelta(minutes=random.randint(0, 1440 * 5)) # random flow time
|
||||
|
||||
flow_data.append({
|
||||
'sampler_address': sampler_address,
|
||||
'proto': proto,
|
||||
'src_addr': src_addr,
|
||||
'dst_addr': dst_addr,
|
||||
'src_port': src_port,
|
||||
'dst_port': dst_port,
|
||||
'in_if': in_if,
|
||||
'out_if': out_if,
|
||||
'flow_start': flow_start,
|
||||
'flow_end': flow_end,
|
||||
'bytes': bytes_transferred,
|
||||
'packets': packets,
|
||||
'time': flow_time
|
||||
})
|
||||
df = pd.DataFrame(flow_data)
|
||||
df.to_sql('ts_flow', conn, index=False)
|
||||
return
|
||||
|
||||
def load_params(req):
|
||||
# Step 1: Convert the from_time natural language string to a timestamp if provided
|
||||
if req.from_time:
|
||||
# Use `dateparser` to parse natural language timeframes
|
||||
logger.info(f"{'* ' * 50}\n\nCaptured from time: {req.from_time}\n\n")
|
||||
parsed_time = parse(req.from_time, settings={'RELATIVE_BASE': datetime.now()})
|
||||
if not parsed_time:
|
||||
conv_time = convert_to_ago_format(req.from_time)
|
||||
if conv_time:
|
||||
parsed_time = parse(conv_time, settings={'RELATIVE_BASE': datetime.now()})
|
||||
else:
|
||||
return {"error": "Invalid from_time format. Please provide a valid time description such as 'past 7 days' or 'since last month'."}
|
||||
logger.info(f"\n\nConverted from time: {parsed_time}\n\n{'* ' * 50}\n\n")
|
||||
from_time = parsed_time
|
||||
logger.info(f"Using parsed from_time: {from_time}")
|
||||
else:
|
||||
# If no from_time is provided, use a default value (e.g., the past 7 days)
|
||||
from_time = datetime.now() - timedelta(days=7)
|
||||
logger.info(f"Using default from_time: {from_time}")
|
||||
|
||||
# Step 2: Build the dynamic SQL query based on the optional filters
|
||||
filters = []
|
||||
params = {"from_time": from_time}
|
||||
|
||||
if req.ifname:
|
||||
filters.append("i.ifname = :ifname")
|
||||
params["ifname"] = req.ifname
|
||||
|
||||
if req.region:
|
||||
filters.append("d.region = :region")
|
||||
params["region"] = req.region
|
||||
|
||||
if req.min_in_errors is not None:
|
||||
filters.append("i.in_errors >= :min_in_errors")
|
||||
params["min_in_errors"] = req.min_in_errors
|
||||
|
||||
if req.max_in_errors is not None:
|
||||
filters.append("i.in_errors <= :max_in_errors")
|
||||
params["max_in_errors"] = req.max_in_errors
|
||||
|
||||
if req.min_out_errors is not None:
|
||||
filters.append("i.out_errors >= :min_out_errors")
|
||||
params["min_out_errors"] = req.min_out_errors
|
||||
|
||||
if req.max_out_errors is not None:
|
||||
filters.append("i.out_errors <= :max_out_errors")
|
||||
params["max_out_errors"] = req.max_out_errors
|
||||
|
||||
if req.min_in_discards is not None:
|
||||
filters.append("i.in_discards >= :min_in_discards")
|
||||
params["min_in_discards"] = req.min_in_discards
|
||||
|
||||
if req.max_in_discards is not None:
|
||||
filters.append("i.in_discards <= :max_in_discards")
|
||||
params["max_in_discards"] = req.max_in_discards
|
||||
|
||||
if req.min_out_discards is not None:
|
||||
filters.append("i.out_discards >= :min_out_discards")
|
||||
params["min_out_discards"] = req.min_out_discards
|
||||
|
||||
if req.max_out_discards is not None:
|
||||
filters.append("i.out_discards <= :max_out_discards")
|
||||
params["max_out_discards"] = req.max_out_discards
|
||||
|
||||
return params, filters
|
||||
Loading…
Add table
Add a link
Reference in a new issue