mirror of
https://github.com/L-yang-yang/cugenopt.git
synced 2026-04-28 12:46:21 +02:00
Initial commit: cuGenOpt GPU optimization solver
This commit is contained in:
commit
fc5a0ff4af
117 changed files with 25545 additions and 0 deletions
13
benchmark/experiments/test_lazy_norm/Makefile
Normal file
13
benchmark/experiments/test_lazy_norm/Makefile
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
NVCC = /usr/local/cuda-12.8/bin/nvcc
|
||||
CUDA_ARCH = -arch=sm_70
|
||||
INCLUDES = -I../../../prototype/core
|
||||
CXXFLAGS = -O3 -std=c++14
|
||||
NVCCFLAGS = $(CUDA_ARCH) $(CXXFLAGS) $(INCLUDES) --expt-relaxed-constexpr
|
||||
|
||||
test_lazy_norm: test_lazy_norm.cu
|
||||
$(NVCC) $(NVCCFLAGS) -o test_lazy_norm test_lazy_norm.cu
|
||||
|
||||
clean:
|
||||
rm -f test_lazy_norm
|
||||
|
||||
.PHONY: clean
|
||||
80
benchmark/experiments/test_lazy_norm/README.md
Normal file
80
benchmark/experiments/test_lazy_norm/README.md
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
# 延迟归一化测试
|
||||
|
||||
## 目的
|
||||
|
||||
验证延迟归一化(Lazy Normalization)机制的正确性和性能。
|
||||
|
||||
## 核心修改
|
||||
|
||||
### 1. SeqRegistry 结构
|
||||
|
||||
```cpp
|
||||
struct SeqRegistry {
|
||||
int ids[MAX_SEQ];
|
||||
int count;
|
||||
float weights[MAX_SEQ]; // 未归一化
|
||||
float weights_sum; // 缓存权重和 ⭐ 新增
|
||||
float max_w[MAX_SEQ];
|
||||
SeqCategory categories[MAX_SEQ];
|
||||
};
|
||||
```
|
||||
|
||||
### 2. 轮盘赌选择
|
||||
|
||||
```cpp
|
||||
// 原来:r ∈ [0, 1),要求权重归一化
|
||||
float r = curand_uniform(rng);
|
||||
|
||||
// 现在:r ∈ [0, weights_sum),不要求权重归一化
|
||||
float r = curand_uniform(rng) * reg.weights_sum;
|
||||
```
|
||||
|
||||
### 3. AOS 更新
|
||||
|
||||
```cpp
|
||||
// 原来:EMA 更新 → 归一化 → FLOOR/CAP → 再次归一化
|
||||
// 现在:EMA 更新 → FLOOR/CAP → 更新 weights_sum(不归一化)
|
||||
```
|
||||
|
||||
## 编译和运行
|
||||
|
||||
```bash
|
||||
# 在 gpu1v100 上编译
|
||||
make
|
||||
|
||||
# 运行测试
|
||||
./test_lazy_norm
|
||||
```
|
||||
|
||||
## 预期输出
|
||||
|
||||
```
|
||||
=== 延迟归一化测试 ===
|
||||
|
||||
配置:
|
||||
pop_size = 32
|
||||
max_gen = 100
|
||||
aos_weight_floor = 0.050
|
||||
aos_weight_cap = 0.350
|
||||
延迟归一化: 启用
|
||||
|
||||
开始求解...
|
||||
|
||||
[AOS batch g=10] usage: ... | w: 0.xxx 0.xxx ... | sum=0.xxx | K: ...
|
||||
[AOS batch g=20] usage: ... | w: 0.xxx 0.xxx ... | sum=0.xxx | K: ...
|
||||
...
|
||||
|
||||
=== 求解完成 ===
|
||||
最优解: xxx.xx
|
||||
代数: 100
|
||||
时间: xxx.xx ms
|
||||
|
||||
✅ 延迟归一化测试通过!
|
||||
```
|
||||
|
||||
## 验证要点
|
||||
|
||||
1. **权重和可能 ≠ 1.0**:`sum=0.xxx`(正常)
|
||||
2. **权重在边界内**:所有 `w[i] ∈ [0.05, 0.35]`
|
||||
3. **求解正常完成**:无崩溃、无异常
|
||||
4. **结果合理**:找到可行解
|
||||
109
benchmark/experiments/test_lazy_norm/test_lazy_norm.cu
Normal file
109
benchmark/experiments/test_lazy_norm/test_lazy_norm.cu
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
#include "solver.cuh"
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
|
||||
// 简单的 TSP 问题用于测试
|
||||
struct SimpleTSP : public ProblemBase<SimpleTSP, 1, 64> {
|
||||
using Sol = Solution<1, 64>;
|
||||
|
||||
const float* d_dist;
|
||||
int n;
|
||||
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}
|
||||
};
|
||||
|
||||
__device__ float compute_obj(int obj_idx, const Sol& s) const {
|
||||
float total = 0.0f;
|
||||
for (int i = 0; i < n; i++) {
|
||||
int from = s.data[0][i];
|
||||
int to = s.data[0][(i + 1) % n];
|
||||
total += d_dist[from * (n + 1) + to];
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& s) const {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
cfg.dim1 = 1;
|
||||
cfg.dim2_default = n;
|
||||
fill_obj_config(cfg);
|
||||
cfg.cross_row_prob = 0.0f;
|
||||
cfg.row_mode = RowMode::Fixed;
|
||||
cfg.total_elements = n;
|
||||
return cfg;
|
||||
}
|
||||
|
||||
SimpleTSP* clone_to_device(int target_device) const override {
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
constexpr ObjDef SimpleTSP::OBJ_DEFS[];
|
||||
|
||||
int main() {
|
||||
printf("=== 延迟归一化测试 ===\n\n");
|
||||
|
||||
// 创建小规模 TSP 实例(10 个城市)
|
||||
const int n = 10;
|
||||
float h_dist[(n+1) * (n+1)];
|
||||
|
||||
// 生成随机距离矩阵
|
||||
srand(42);
|
||||
for (int i = 0; i <= n; i++) {
|
||||
for (int j = 0; j <= n; j++) {
|
||||
if (i == j) {
|
||||
h_dist[i * (n+1) + j] = 0.0f;
|
||||
} else {
|
||||
h_dist[i * (n+1) + j] = 10.0f + rand() % 90;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 拷贝到 GPU
|
||||
float* d_dist;
|
||||
cudaMalloc(&d_dist, (n+1) * (n+1) * sizeof(float));
|
||||
cudaMemcpy(d_dist, h_dist, (n+1) * (n+1) * sizeof(float), cudaMemcpyHostToDevice);
|
||||
|
||||
SimpleTSP prob;
|
||||
prob.d_dist = d_dist;
|
||||
prob.n = n;
|
||||
|
||||
// 配置求解器(启用 AOS 和 verbose)
|
||||
SolverConfig cfg;
|
||||
cfg.pop_size = 32;
|
||||
cfg.max_gen = 500;
|
||||
cfg.use_aos = true;
|
||||
cfg.verbose = true;
|
||||
cfg.aos_update_interval = 5;
|
||||
cfg.aos_weight_floor = 0.05f;
|
||||
cfg.aos_weight_cap = 0.35f;
|
||||
|
||||
printf("配置:\n");
|
||||
printf(" pop_size = %d\n", cfg.pop_size);
|
||||
printf(" max_gen = %d\n", cfg.max_gen);
|
||||
printf(" aos_weight_floor = %.3f\n", cfg.aos_weight_floor);
|
||||
printf(" aos_weight_cap = %.3f\n", cfg.aos_weight_cap);
|
||||
printf(" 延迟归一化: 启用\n\n");
|
||||
|
||||
// 求解
|
||||
printf("开始求解...\n\n");
|
||||
auto result = solve(prob, cfg);
|
||||
|
||||
printf("\n=== 求解完成 ===\n");
|
||||
printf("最优解: %.2f\n", result.best_solution.objectives[0]);
|
||||
printf("代数: %d\n", result.generations);
|
||||
printf("时间: %.2f ms\n", result.elapsed_ms);
|
||||
|
||||
// 清理
|
||||
cudaFree(d_dist);
|
||||
|
||||
printf("\n✅ 延迟归一化测试通过!\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue