mirror of
https://github.com/L-yang-yang/cugenopt.git
synced 2026-06-27 21:59:38 +02:00
Initial commit: cuGenOpt GPU optimization solver
This commit is contained in:
commit
fc5a0ff4af
117 changed files with 25545 additions and 0 deletions
189
benchmark/experiments/e0_diagnosis/bench_diagnosis.cu
Normal file
189
benchmark/experiments/e0_diagnosis/bench_diagnosis.cu
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
// GenSolver 性能诊断专用 benchmark
|
||||
// 目的:精确分解单个问题实例的时间构成
|
||||
//
|
||||
// 实验设计:
|
||||
// 1. 固定单个问题(CVRP10),固定 seed=42,max_gen=2000
|
||||
// 2. 变量:migrate_interval = 50, 100, 200, 500, 2000
|
||||
// 3. 对照组:关闭 AOS (use_aos=false),batch=2000(纯 GPU 计算基线)
|
||||
// 4. 每组跑 3 次取中位数,消除噪声
|
||||
//
|
||||
// 输出 CSV:config,run,time_ms,obj,gap_pct,generations
|
||||
// 配合 nvprof 使用时只跑单次(避免 profiling 开销叠加)
|
||||
|
||||
#include "solver.cuh"
|
||||
#include "tsp.cuh"
|
||||
#include "vrp.cuh"
|
||||
#include "knapsack.cuh"
|
||||
#include "schedule.cuh"
|
||||
#include "qap.cuh"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
|
||||
static void warmup() {
|
||||
float dist[25] = {0,3,6,5,7, 3,0,3,4,5, 6,3,0,5,4, 5,4,5,0,3, 7,5,4,3,0};
|
||||
auto p = TSPProblem::create(dist, 5);
|
||||
SolverConfig c;
|
||||
c.pop_size = 64; c.max_gen = 10; c.seed = 1; c.verbose = false;
|
||||
solve(p, c);
|
||||
p.destroy();
|
||||
}
|
||||
|
||||
static SolverConfig make_config(int batch, bool aos, int aos_interval = 1) {
|
||||
SolverConfig c;
|
||||
c.pop_size = 0;
|
||||
c.max_gen = 2000;
|
||||
c.verbose = false;
|
||||
c.sa_temp_init = 50.0f;
|
||||
c.sa_alpha = 0.999f;
|
||||
c.num_islands = 0;
|
||||
c.migrate_interval = batch;
|
||||
c.migrate_strategy = MigrateStrategy::Hybrid;
|
||||
c.crossover_rate = 0.1f;
|
||||
c.use_aos = aos;
|
||||
c.aos_update_interval = aos_interval;
|
||||
c.seed = 42;
|
||||
return c;
|
||||
}
|
||||
|
||||
struct TestProblem {
|
||||
const char* name;
|
||||
float known_optimal;
|
||||
};
|
||||
|
||||
template<typename Problem>
|
||||
static void run_single(const char* config_name, Problem& prob,
|
||||
SolverConfig cfg, float known_opt, int repeats) {
|
||||
for (int r = 0; r < repeats; r++) {
|
||||
cfg.seed = 42 + r * 111;
|
||||
auto result = solve(prob, cfg);
|
||||
float obj = result.best_solution.objectives[0];
|
||||
float gap = (known_opt != 0.0f)
|
||||
? (obj - known_opt) / fabsf(known_opt) * 100.0f
|
||||
: obj;
|
||||
printf("%s,%d,%.1f,%.2f,%.2f,%d\n",
|
||||
config_name, r, result.elapsed_ms, obj, gap, result.generations);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
// argv[1]: "all" | "baseline" (batch2000_noaos only) | "default" (batch50_aos only)
|
||||
const char* mode = (argc > 1) ? argv[1] : "all";
|
||||
bool only_baseline = (strcmp(mode, "baseline") == 0);
|
||||
bool only_default = (strcmp(mode, "default") == 0);
|
||||
int repeats = (only_baseline || only_default) ? 1 : 3;
|
||||
|
||||
{
|
||||
int device;
|
||||
cudaDeviceProp prop;
|
||||
cudaGetDevice(&device);
|
||||
cudaGetDeviceProperties(&prop, device);
|
||||
fprintf(stderr, "GPU: %s (SM=%d, Compute=%d.%d)\n",
|
||||
prop.name, prop.multiProcessorCount, prop.major, prop.minor);
|
||||
}
|
||||
warmup();
|
||||
|
||||
printf("config,run,time_ms,obj,gap_pct,generations\n");
|
||||
fflush(stdout);
|
||||
|
||||
// === 测试问题:CVRP10(中等复杂度,kernel 时间 ~600ms)===
|
||||
const int N = 10, NN = N + 1;
|
||||
float coords[NN][2] = {
|
||||
{50,50},{60,50},{70,50},{80,50},{50,60},
|
||||
{50,70},{50,80},{40,50},{30,50},{50,40},{50,30}
|
||||
};
|
||||
float demands[N] = {5,4,6,5,4,6,5,4,5,6};
|
||||
float dist[NN * NN];
|
||||
for (int i = 0; i < NN; i++)
|
||||
for (int j = 0; j < NN; j++) {
|
||||
float dx = coords[i][0] - coords[j][0];
|
||||
float dy = coords[i][1] - coords[j][1];
|
||||
dist[i * NN + j] = roundf(sqrtf(dx * dx + dy * dy));
|
||||
}
|
||||
|
||||
if (only_default) {
|
||||
// nvprof 专用:只跑默认配置(batch=50, AOS=on)
|
||||
fprintf(stderr, "\n=== CVRP10: default config (batch=50, AOS=on) ===\n");
|
||||
auto prob = VRPProblem::create(dist, demands, N, 15.0f, 4, 4);
|
||||
run_single("batch50_aos", prob, make_config(50, true), 200.0f, 1);
|
||||
prob.destroy();
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (only_baseline) {
|
||||
// nvprof 专用:只跑纯 GPU 基线(batch=2000, AOS=off)
|
||||
fprintf(stderr, "\n=== CVRP10: baseline (batch=2000, AOS=off) ===\n");
|
||||
auto prob = VRPProblem::create(dist, demands, N, 15.0f, 4, 4);
|
||||
run_single("batch2000_noaos", prob, make_config(2000, false), 200.0f, 1);
|
||||
prob.destroy();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// === 完整实验 ===
|
||||
fprintf(stderr, "\n=== CVRP10: batch size comparison ===\n");
|
||||
|
||||
// 实验组 1: 不同 batch size(AOS=on)
|
||||
{
|
||||
int batches[] = {50, 100, 200, 500, 2000};
|
||||
for (int b : batches) {
|
||||
char name[64];
|
||||
snprintf(name, sizeof(name), "batch%d_aos", b);
|
||||
fprintf(stderr, " %s ...\n", name);
|
||||
auto prob = VRPProblem::create(dist, demands, N, 15.0f, 4, 4);
|
||||
run_single(name, prob, make_config(b, true), 200.0f, repeats);
|
||||
prob.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
// 实验组 2: 不同 batch size(AOS=off)
|
||||
{
|
||||
int batches[] = {50, 200, 2000};
|
||||
for (int b : batches) {
|
||||
char name[64];
|
||||
snprintf(name, sizeof(name), "batch%d_noaos", b);
|
||||
fprintf(stderr, " %s ...\n", name);
|
||||
auto prob = VRPProblem::create(dist, demands, N, 15.0f, 4, 4);
|
||||
run_single(name, prob, make_config(b, false), 200.0f, repeats);
|
||||
prob.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
// 实验组 3: AOS 降频
|
||||
{
|
||||
int intervals[] = {1, 5, 10};
|
||||
for (int iv : intervals) {
|
||||
char name[64];
|
||||
snprintf(name, sizeof(name), "batch50_aosint%d", iv);
|
||||
fprintf(stderr, " %s ...\n", name);
|
||||
auto prob = VRPProblem::create(dist, demands, N, 15.0f, 4, 4);
|
||||
run_single(name, prob, make_config(50, true, iv), 200.0f, repeats);
|
||||
prob.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
// === Schedule3x4 ===
|
||||
fprintf(stderr, "\n=== Schedule3x4: batch size comparison ===\n");
|
||||
{
|
||||
float cost[12] = {5,3,8,4, 6,2,7,5, 4,6,3,7};
|
||||
int batches[] = {50, 200, 2000};
|
||||
for (int b : batches) {
|
||||
char name[64];
|
||||
snprintf(name, sizeof(name), "sched_batch%d_aos", b);
|
||||
fprintf(stderr, " %s ...\n", name);
|
||||
auto prob = ScheduleProblem::create(cost, 3, 4, 2);
|
||||
run_single(name, prob, make_config(b, true), 0.0f, repeats);
|
||||
prob.destroy();
|
||||
}
|
||||
{
|
||||
auto prob = ScheduleProblem::create(cost, 3, 4, 2);
|
||||
fprintf(stderr, " sched_batch2000_noaos ...\n");
|
||||
run_single("sched_batch2000_noaos", prob, make_config(2000, false), 0.0f, repeats);
|
||||
prob.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "\nAll done.\n");
|
||||
return 0;
|
||||
}
|
||||
93
benchmark/experiments/e0_diagnosis/run_diagnosis.sh
Executable file
93
benchmark/experiments/e0_diagnosis/run_diagnosis.sh
Executable file
|
|
@ -0,0 +1,93 @@
|
|||
#!/bin/bash
|
||||
# GenSolver 性能诊断 - 一键启动脚本
|
||||
#
|
||||
# 用法:
|
||||
# ./run_diagnosis.sh [host] # 运行完整诊断(all 模式)
|
||||
# ./run_diagnosis.sh [host] profile # 仅 nvprof profiling
|
||||
#
|
||||
# host: tc_new (T4) | tch (V100), 默认 tc_new
|
||||
|
||||
set -e
|
||||
|
||||
DIAG_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
BENCH_DIR="$(dirname "$DIAG_DIR")"
|
||||
ROOT_DIR="$(dirname "$BENCH_DIR")"
|
||||
RESULTS_DIR="$DIAG_DIR/results"
|
||||
|
||||
REMOTE_HOST="${1:-tc_new}"
|
||||
MODE="${2:-all}"
|
||||
REMOTE_DIR="~/gensolver"
|
||||
|
||||
echo ">>> 使用服务器: $REMOTE_HOST"
|
||||
|
||||
ARCH="sm_75"
|
||||
if [ "$REMOTE_HOST" = "tch" ]; then
|
||||
ARCH="sm_70"
|
||||
fi
|
||||
|
||||
NVCC_CMD="nvcc -arch=$ARCH -O2 -std=c++17 --extended-lambda -I ../../prototype/core -I ../../prototype/problems"
|
||||
|
||||
mkdir -p "$RESULTS_DIR"
|
||||
|
||||
echo "=========================================="
|
||||
echo " GenSolver 性能诊断"
|
||||
echo " 时间: $(date)"
|
||||
echo " 服务器: $REMOTE_HOST (arch=$ARCH)"
|
||||
echo "=========================================="
|
||||
|
||||
sync_code() {
|
||||
echo ">>> 同步代码到 $REMOTE_HOST ..."
|
||||
ssh $REMOTE_HOST "mkdir -p $REMOTE_DIR/prototype/core $REMOTE_DIR/prototype/problems $REMOTE_DIR/benchmark/experiments/e0_diagnosis"
|
||||
scp "$ROOT_DIR"/prototype/core/*.cuh $REMOTE_HOST:$REMOTE_DIR/prototype/core/
|
||||
scp "$ROOT_DIR"/prototype/problems/*.cuh $REMOTE_HOST:$REMOTE_DIR/prototype/problems/
|
||||
scp "$DIAG_DIR"/bench_diagnosis.cu $REMOTE_HOST:$REMOTE_DIR/benchmark/experiments/e0_diagnosis/
|
||||
echo " done."
|
||||
}
|
||||
|
||||
compile() {
|
||||
echo ">>> 编译 bench_diagnosis (arch=$ARCH) ..."
|
||||
ssh $REMOTE_HOST "export PATH=/usr/local/cuda/bin:\$PATH && cd $REMOTE_DIR/benchmark/experiments/e0_diagnosis && $NVCC_CMD -o bench_diagnosis bench_diagnosis.cu 2>&1"
|
||||
echo " done."
|
||||
}
|
||||
|
||||
run_all() {
|
||||
echo ">>> 运行完整诊断 ..."
|
||||
local gpu_name=$(ssh $REMOTE_HOST "nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1" | tr ' ' '_')
|
||||
local outfile="bench_${gpu_name}_$(date +%Y%m%d_%H%M%S).csv"
|
||||
|
||||
ssh $REMOTE_HOST "export PATH=/usr/local/cuda/bin:\$PATH && cd $REMOTE_DIR/benchmark/experiments/e0_diagnosis && ./bench_diagnosis all 2>&1 >/tmp/diag_out.csv && cat /tmp/diag_out.csv" > "$RESULTS_DIR/$outfile"
|
||||
|
||||
echo " 结果: $RESULTS_DIR/$outfile"
|
||||
local lines=$(wc -l < "$RESULTS_DIR/$outfile" 2>/dev/null || echo 0)
|
||||
echo " 数据行: $((lines - 1))"
|
||||
}
|
||||
|
||||
run_profile() {
|
||||
echo ">>> 运行 nvprof profiling ..."
|
||||
echo "--- baseline (batch=2000, AOS=off) ---"
|
||||
ssh $REMOTE_HOST "export PATH=/usr/local/cuda/bin:\$PATH && cd $REMOTE_DIR/benchmark/experiments/e0_diagnosis && nvprof --print-gpu-summary ./bench_diagnosis baseline 2>&1" | tee "$RESULTS_DIR/nvprof_baseline_$REMOTE_HOST.txt"
|
||||
echo ""
|
||||
echo "--- default (batch=50, AOS=on) ---"
|
||||
ssh $REMOTE_HOST "export PATH=/usr/local/cuda/bin:\$PATH && cd $REMOTE_DIR/benchmark/experiments/e0_diagnosis && nvprof --print-gpu-summary ./bench_diagnosis default 2>&1" | tee "$RESULTS_DIR/nvprof_default_$REMOTE_HOST.txt"
|
||||
}
|
||||
|
||||
sync_code
|
||||
compile
|
||||
|
||||
case "$MODE" in
|
||||
all) run_all ;;
|
||||
profile) run_profile ;;
|
||||
*)
|
||||
echo "未知模式: $MODE"
|
||||
echo "用法: ./run_diagnosis.sh [host] [all|profile]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " 诊断完成"
|
||||
echo " 服务器: $REMOTE_HOST"
|
||||
echo " 结果目录: $RESULTS_DIR"
|
||||
echo "=========================================="
|
||||
ls -lh "$RESULTS_DIR"/ 2>/dev/null || true
|
||||
Loading…
Add table
Add a link
Reference in a new issue