cuGenOpt/benchmark/experiments/e0_diagnosis/bench_diagnosis.cu
2026-03-20 00:33:45 +08:00

189 lines
6.4 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// GenSolver 性能诊断专用 benchmark
// 目的:精确分解单个问题实例的时间构成
//
// 实验设计:
// 1. 固定单个问题CVRP10固定 seed=42max_gen=2000
// 2. 变量migrate_interval = 50, 100, 200, 500, 2000
// 3. 对照组:关闭 AOS (use_aos=false)batch=2000纯 GPU 计算基线)
// 4. 每组跑 3 次取中位数,消除噪声
//
// 输出 CSVconfig,run,time_ms,obj,gap_pct,generations
// 配合 nvprof 使用时只跑单次(避免 profiling 开销叠加)
#include "solver.cuh"
#include "tsp.cuh"
#include "vrp.cuh"
#include "knapsack.cuh"
#include "schedule.cuh"
#include "qap.cuh"
#include <cstdio>
#include <cstring>
#include <cmath>
static void warmup() {
float dist[25] = {0,3,6,5,7, 3,0,3,4,5, 6,3,0,5,4, 5,4,5,0,3, 7,5,4,3,0};
auto p = TSPProblem::create(dist, 5);
SolverConfig c;
c.pop_size = 64; c.max_gen = 10; c.seed = 1; c.verbose = false;
solve(p, c);
p.destroy();
}
static SolverConfig make_config(int batch, bool aos, int aos_interval = 1) {
SolverConfig c;
c.pop_size = 0;
c.max_gen = 2000;
c.verbose = false;
c.sa_temp_init = 50.0f;
c.sa_alpha = 0.999f;
c.num_islands = 0;
c.migrate_interval = batch;
c.migrate_strategy = MigrateStrategy::Hybrid;
c.crossover_rate = 0.1f;
c.use_aos = aos;
c.aos_update_interval = aos_interval;
c.seed = 42;
return c;
}
struct TestProblem {
const char* name;
float known_optimal;
};
template<typename Problem>
static void run_single(const char* config_name, Problem& prob,
SolverConfig cfg, float known_opt, int repeats) {
for (int r = 0; r < repeats; r++) {
cfg.seed = 42 + r * 111;
auto result = solve(prob, cfg);
float obj = result.best_solution.objectives[0];
float gap = (known_opt != 0.0f)
? (obj - known_opt) / fabsf(known_opt) * 100.0f
: obj;
printf("%s,%d,%.1f,%.2f,%.2f,%d\n",
config_name, r, result.elapsed_ms, obj, gap, result.generations);
fflush(stdout);
}
}
int main(int argc, char** argv) {
// argv[1]: "all" | "baseline" (batch2000_noaos only) | "default" (batch50_aos only)
const char* mode = (argc > 1) ? argv[1] : "all";
bool only_baseline = (strcmp(mode, "baseline") == 0);
bool only_default = (strcmp(mode, "default") == 0);
int repeats = (only_baseline || only_default) ? 1 : 3;
{
int device;
cudaDeviceProp prop;
cudaGetDevice(&device);
cudaGetDeviceProperties(&prop, device);
fprintf(stderr, "GPU: %s (SM=%d, Compute=%d.%d)\n",
prop.name, prop.multiProcessorCount, prop.major, prop.minor);
}
warmup();
printf("config,run,time_ms,obj,gap_pct,generations\n");
fflush(stdout);
// === 测试问题CVRP10中等复杂度kernel 时间 ~600ms===
const int N = 10, NN = N + 1;
float coords[NN][2] = {
{50,50},{60,50},{70,50},{80,50},{50,60},
{50,70},{50,80},{40,50},{30,50},{50,40},{50,30}
};
float demands[N] = {5,4,6,5,4,6,5,4,5,6};
float dist[NN * NN];
for (int i = 0; i < NN; i++)
for (int j = 0; j < NN; j++) {
float dx = coords[i][0] - coords[j][0];
float dy = coords[i][1] - coords[j][1];
dist[i * NN + j] = roundf(sqrtf(dx * dx + dy * dy));
}
if (only_default) {
// nvprof 专用只跑默认配置batch=50, AOS=on
fprintf(stderr, "\n=== CVRP10: default config (batch=50, AOS=on) ===\n");
auto prob = VRPProblem::create(dist, demands, N, 15.0f, 4, 4);
run_single("batch50_aos", prob, make_config(50, true), 200.0f, 1);
prob.destroy();
return 0;
}
if (only_baseline) {
// nvprof 专用:只跑纯 GPU 基线batch=2000, AOS=off
fprintf(stderr, "\n=== CVRP10: baseline (batch=2000, AOS=off) ===\n");
auto prob = VRPProblem::create(dist, demands, N, 15.0f, 4, 4);
run_single("batch2000_noaos", prob, make_config(2000, false), 200.0f, 1);
prob.destroy();
return 0;
}
// === 完整实验 ===
fprintf(stderr, "\n=== CVRP10: batch size comparison ===\n");
// 实验组 1: 不同 batch sizeAOS=on
{
int batches[] = {50, 100, 200, 500, 2000};
for (int b : batches) {
char name[64];
snprintf(name, sizeof(name), "batch%d_aos", b);
fprintf(stderr, " %s ...\n", name);
auto prob = VRPProblem::create(dist, demands, N, 15.0f, 4, 4);
run_single(name, prob, make_config(b, true), 200.0f, repeats);
prob.destroy();
}
}
// 实验组 2: 不同 batch sizeAOS=off
{
int batches[] = {50, 200, 2000};
for (int b : batches) {
char name[64];
snprintf(name, sizeof(name), "batch%d_noaos", b);
fprintf(stderr, " %s ...\n", name);
auto prob = VRPProblem::create(dist, demands, N, 15.0f, 4, 4);
run_single(name, prob, make_config(b, false), 200.0f, repeats);
prob.destroy();
}
}
// 实验组 3: AOS 降频
{
int intervals[] = {1, 5, 10};
for (int iv : intervals) {
char name[64];
snprintf(name, sizeof(name), "batch50_aosint%d", iv);
fprintf(stderr, " %s ...\n", name);
auto prob = VRPProblem::create(dist, demands, N, 15.0f, 4, 4);
run_single(name, prob, make_config(50, true, iv), 200.0f, repeats);
prob.destroy();
}
}
// === Schedule3x4 ===
fprintf(stderr, "\n=== Schedule3x4: batch size comparison ===\n");
{
float cost[12] = {5,3,8,4, 6,2,7,5, 4,6,3,7};
int batches[] = {50, 200, 2000};
for (int b : batches) {
char name[64];
snprintf(name, sizeof(name), "sched_batch%d_aos", b);
fprintf(stderr, " %s ...\n", name);
auto prob = ScheduleProblem::create(cost, 3, 4, 2);
run_single(name, prob, make_config(b, true), 0.0f, repeats);
prob.destroy();
}
{
auto prob = ScheduleProblem::create(cost, 3, 4, 2);
fprintf(stderr, " sched_batch2000_noaos ...\n");
run_single("sched_batch2000_noaos", prob, make_config(2000, false), 0.0f, repeats);
prob.destroy();
}
}
fprintf(stderr, "\nAll done.\n");
return 0;
}