Initial commit: cuGenOpt GPU optimization solver

This commit is contained in:
L-yang-yang 2026-03-20 00:33:45 +08:00
commit fc5a0ff4af
117 changed files with 25545 additions and 0 deletions

View file

@ -0,0 +1,114 @@
/**
* assignment.cuh - 指派问题
*
* 继承 ProblemBase使用 ObjDef 目标注册机制
*/
#pragma once
#include "types.cuh"
#include "cuda_utils.cuh"
#include "operators.cuh"
struct AssignmentProblem : ProblemBase<AssignmentProblem, 1, 16> {
const float* d_cost;
const float* h_cost; // host 端成本矩阵(用于 init_relation_matrix
int n;
// ---- 目标计算 ----
__device__ float calc_total_cost(const Sol& sol) const {
float total = 0.0f;
const int* assign = sol.data[0];
int size = sol.dim2_sizes[0];
for (int i = 0; i < size; i++)
total += d_cost[i * n + assign[i]];
return total;
}
// ---- 目标定义OBJ_DEFS 与 compute_obj 必须一一对应)----
static constexpr ObjDef OBJ_DEFS[] = {
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_cost
};
__device__ float compute_obj(int idx, const Sol& sol) const {
switch (idx) {
case 0: return calc_total_cost(sol); // OBJ_DEFS[0]
default: return 0.0f;
}
}
__device__ float compute_penalty(const Sol& sol) const {
return 0.0f;
}
ProblemConfig config() const {
ProblemConfig cfg;
cfg.encoding = EncodingType::Permutation;
cfg.dim1 = 1; cfg.dim2_default = n;
fill_obj_config(cfg);
return cfg;
}
// ---- shared memory 接口 ----
static constexpr size_t SMEM_LIMIT = 48 * 1024;
size_t shared_mem_bytes() const {
size_t need = (size_t)n * n * sizeof(float);
return need <= SMEM_LIMIT ? need : 0;
}
size_t working_set_bytes() const {
return (size_t)n * n * sizeof(float);
}
__device__ void load_shared(char* smem, int tid, int bsz) {
float* sc = reinterpret_cast<float*>(smem);
int total = n * n;
for (int i = tid; i < total; i += bsz) sc[i] = d_cost[i];
d_cost = sc;
}
// 成本先验task j 和 task k 如果被相似 agent 偏好G 值高
// O 矩阵task j 在位置 i 成本低 → O[j][k] 略高j 倾向排在 k 前面的位置)
void init_relation_matrix(float* G, float* O, int N) const {
if (!h_cost || N != n) return;
// 对每个 task构建成本向量task 间余弦相似度 → G
// 简化:成本列向量的相关性
float max_c = 0.0f;
for (int i = 0; i < N * N; i++)
if (h_cost[i] > max_c) max_c = h_cost[i];
if (max_c <= 0.0f) return;
for (int j = 0; j < N; j++)
for (int k = 0; k < N; k++) {
if (j == k) continue;
// G: 两个 task 的成本向量越相似 → 越可能互换
float dot = 0.0f, nj = 0.0f, nk = 0.0f;
for (int i = 0; i < N; i++) {
float cj = h_cost[i * N + j] / max_c;
float ck = h_cost[i * N + k] / max_c;
dot += cj * ck;
nj += cj * cj;
nk += ck * ck;
}
float denom = sqrtf(nj) * sqrtf(nk);
float sim = (denom > 1e-6f) ? dot / denom : 0.0f;
G[j * N + k] = sim * 0.2f;
O[j * N + k] = sim * 0.05f;
}
}
static AssignmentProblem create(const float* hc, int n) {
AssignmentProblem prob;
prob.n = n;
prob.h_cost = hc;
float* dc;
CUDA_CHECK(cudaMalloc(&dc, sizeof(float)*n*n));
CUDA_CHECK(cudaMemcpy(dc, hc, sizeof(float)*n*n, cudaMemcpyHostToDevice));
prob.d_cost = dc;
return prob;
}
void destroy() {
if (d_cost) { cudaFree(const_cast<float*>(d_cost)); d_cost = nullptr; }
h_cost = nullptr;
}
};

View file

@ -0,0 +1,97 @@
/**
* bin_packing.cuh - 一维装箱问题Integer 编码 + 约束)
*
* N 个物品,每个重量 w[i],装入最多 B 个箱子,每个箱子容量 C。
* 决策变量data[0][i] ∈ [0, B-1],表示物品 i 放入的箱子编号。
* 目标:最小化使用的箱子数。
* 约束:每个箱子总重不超过 C超出部分作为 penalty。
*
* 验证实例8 物品 weights=[7,5,3,4,6,2,8,1], C=10, 最优=4 箱
* 箱0={7,3}=10, 箱1={5,4,1}=10, 箱2={6,2}=8, 箱3={8}=8
*/
#pragma once
#include "types.cuh"
#include "cuda_utils.cuh"
struct BinPackingProblem : ProblemBase<BinPackingProblem, 1, 64> {
const float* d_weights;
int n; // 物品数
int max_bins; // 最大箱子数 B
float capacity; // 箱子容量 C
__device__ float calc_bins_used(const Sol& sol) const {
bool used[32] = {};
int size = sol.dim2_sizes[0];
for (int i = 0; i < size; i++) {
int b = sol.data[0][i];
if (b >= 0 && b < max_bins) used[b] = true;
}
int count = 0;
for (int b = 0; b < max_bins; b++)
if (used[b]) count++;
return (float)count;
}
static constexpr ObjDef OBJ_DEFS[] = {
{ObjDir::Minimize, 1.0f, 0.0f},
};
__device__ float compute_obj(int idx, const Sol& sol) const {
switch (idx) {
case 0: return calc_bins_used(sol);
default: return 0.0f;
}
}
__device__ float compute_penalty(const Sol& sol) const {
float penalty = 0.0f;
float load[32] = {};
int size = sol.dim2_sizes[0];
for (int i = 0; i < size; i++) {
int b = sol.data[0][i];
if (b >= 0 && b < max_bins)
load[b] += d_weights[i];
}
for (int b = 0; b < max_bins; b++) {
float over = load[b] - capacity;
if (over > 0.0f) penalty += over * 10.0f;
}
return penalty;
}
ProblemConfig config() const {
ProblemConfig cfg;
cfg.encoding = EncodingType::Integer;
cfg.dim1 = 1; cfg.dim2_default = n;
cfg.value_lower_bound = 0;
cfg.value_upper_bound = max_bins - 1;
fill_obj_config(cfg);
return cfg;
}
size_t shared_mem_bytes() const {
return (size_t)n * sizeof(float);
}
__device__ void load_shared(char* smem, int tid, int bsz) {
float* sw = reinterpret_cast<float*>(smem);
for (int i = tid; i < n; i += bsz) sw[i] = d_weights[i];
d_weights = sw;
}
static BinPackingProblem create(const float* h_weights, int n,
int max_bins, float capacity) {
BinPackingProblem prob;
prob.n = n; prob.max_bins = max_bins; prob.capacity = capacity;
float* dw;
CUDA_CHECK(cudaMalloc(&dw, sizeof(float) * n));
CUDA_CHECK(cudaMemcpy(dw, h_weights, sizeof(float) * n, cudaMemcpyHostToDevice));
prob.d_weights = dw;
return prob;
}
void destroy() {
if (d_weights) cudaFree(const_cast<float*>(d_weights));
d_weights = nullptr;
}
};

View file

@ -0,0 +1,79 @@
/**
* graph_color.cuh - 图着色问题Integer 编码)
*
* N 个节点的图,用 k 种颜色着色。
* 决策变量data[0][i] ∈ [0, k-1],表示节点 i 的颜色。
* 目标:最小化冲突边数(相邻节点同色的边数)。
*
* 验证实例Petersen 图10 节点 15 边,色数=3最优冲突=0
*/
#pragma once
#include "types.cuh"
#include "cuda_utils.cuh"
struct GraphColorProblem : ProblemBase<GraphColorProblem, 1, 64> {
const int* d_adj; // 邻接矩阵 [N*N]1=相邻, 0=不相邻)
int n; // 节点数
int k; // 颜色数
__device__ float calc_conflicts(const Sol& sol) const {
int conflicts = 0;
int size = sol.dim2_sizes[0];
for (int i = 0; i < size; i++)
for (int j = i + 1; j < size; j++)
if (d_adj[i * n + j] && sol.data[0][i] == sol.data[0][j])
conflicts++;
return (float)conflicts;
}
static constexpr ObjDef OBJ_DEFS[] = {
{ObjDir::Minimize, 1.0f, 0.0f},
};
__device__ float compute_obj(int idx, const Sol& sol) const {
switch (idx) {
case 0: return calc_conflicts(sol);
default: return 0.0f;
}
}
__device__ float compute_penalty(const Sol& sol) const {
return 0.0f;
}
ProblemConfig config() const {
ProblemConfig cfg;
cfg.encoding = EncodingType::Integer;
cfg.dim1 = 1; cfg.dim2_default = n;
cfg.value_lower_bound = 0;
cfg.value_upper_bound = k - 1;
fill_obj_config(cfg);
return cfg;
}
size_t shared_mem_bytes() const {
return (size_t)n * n * sizeof(int);
}
__device__ void load_shared(char* smem, int tid, int bsz) {
int* sa = reinterpret_cast<int*>(smem);
int total = n * n;
for (int i = tid; i < total; i += bsz) sa[i] = d_adj[i];
d_adj = sa;
}
static GraphColorProblem create(const int* h_adj, int n, int k) {
GraphColorProblem prob;
prob.n = n; prob.k = k;
int* da;
CUDA_CHECK(cudaMalloc(&da, sizeof(int) * n * n));
CUDA_CHECK(cudaMemcpy(da, h_adj, sizeof(int) * n * n, cudaMemcpyHostToDevice));
prob.d_adj = da;
return prob;
}
void destroy() {
if (d_adj) cudaFree(const_cast<int*>(d_adj));
d_adj = nullptr;
}
};

View file

@ -0,0 +1,271 @@
/**
* jsp.cuh - 车间调度问题 (Job Shop Scheduling Problem)
*
* J 个工件,每个工件有 O 道工序,每道工序指定机器和耗时。
*
* === 编码方案 AInteger 多行(时间表编码)===
* JSPProblem: data[j][i] = 工件 j 的第 i 道工序的开始时间
* dim1 = num_jobs, dim2_default = num_ops
* row_mode = Fixed禁止 ROW_SPLIT/ROW_MERGE
* 每行代表一个工件的固定工序序列,行长度不可变
*
* === 编码方案 BPermutation 多重集(工序排列编码)===
* JSPPermProblem: data[0][k] = 工件编号0..J-1长度 J*O
* 值 j 出现 O 次。从左到右扫描,第 t 次遇到值 j 表示工件 j 的第 t 道工序。
* dim1 = 1, dim2_default = J*O, perm_repeat_count = O
* 标准 Permutation 算子swap/reverse/insert天然保持多重集结构
*
* 目标Minimize makespan所有工件完成时间的最大值
* 约束:
* (a) 工序顺序:同一工件的工序必须按序执行
* (b) 机器冲突:同一机器同一时刻只能处理一个工序
*
* 验证实例:自定义 3 工件 3 机器 (3x3),最优 makespan = 12
*/
#pragma once
#include "types.cuh"
#include "cuda_utils.cuh"
// ============================================================
// 编码方案 AInteger 多行(时间表编码)
// ============================================================
struct JSPProblem : ProblemBase<JSPProblem, 8, 16> {
const int* d_machine; // 工序所需机器 [J*O]
const float* d_duration; // 工序耗时 [J*O]
int num_jobs; // 工件数 J
int num_ops; // 每工件工序数 O
int num_machines; // 机器数 M
int time_horizon; // 时间上界
__device__ float calc_makespan(const Sol& sol) const {
float makespan = 0.0f;
for (int j = 0; j < num_jobs; j++) {
int last = num_ops - 1;
float end = (float)sol.data[j][last] + d_duration[j * num_ops + last];
if (end > makespan) makespan = end;
}
return makespan;
}
static constexpr ObjDef OBJ_DEFS[] = {
{ObjDir::Minimize, 1.0f, 0.0f},
};
__device__ float compute_obj(int idx, const Sol& sol) const {
switch (idx) {
case 0: return calc_makespan(sol);
default: return 0.0f;
}
}
__device__ float compute_penalty(const Sol& sol) const {
float penalty = 0.0f;
// (a) 工序顺序约束
for (int j = 0; j < num_jobs; j++) {
for (int i = 1; i < num_ops; i++) {
float prev_end = (float)sol.data[j][i-1] + d_duration[j * num_ops + (i-1)];
float curr_start = (float)sol.data[j][i];
if (curr_start < prev_end)
penalty += (prev_end - curr_start) * 10.0f;
}
}
// (b) 机器冲突约束
int total = num_jobs * num_ops;
for (int a = 0; a < total; a++) {
int ja = a / num_ops, ia = a % num_ops;
int m_a = d_machine[a];
float s_a = (float)sol.data[ja][ia];
float e_a = s_a + d_duration[a];
for (int b = a + 1; b < total; b++) {
if (d_machine[b] != m_a) continue;
int jb = b / num_ops, ib = b % num_ops;
float s_b = (float)sol.data[jb][ib];
float e_b = s_b + d_duration[b];
float overlap = fminf(e_a, e_b) - fmaxf(s_a, s_b);
if (overlap > 0.0f)
penalty += overlap * 10.0f;
}
}
return penalty;
}
ProblemConfig config() const {
ProblemConfig cfg;
cfg.encoding = EncodingType::Integer;
cfg.dim1 = num_jobs;
cfg.dim2_default = num_ops;
cfg.value_lower_bound = 0;
cfg.value_upper_bound = time_horizon - 1;
cfg.row_mode = RowMode::Fixed;
fill_obj_config(cfg);
return cfg;
}
size_t shared_mem_bytes() const {
int total = num_jobs * num_ops;
return (size_t)total * (sizeof(int) + sizeof(float));
}
__device__ void load_shared(char* smem, int tid, int bsz) {
int total = num_jobs * num_ops;
int* sm = reinterpret_cast<int*>(smem);
for (int i = tid; i < total; i += bsz) sm[i] = d_machine[i];
d_machine = sm;
float* sd = reinterpret_cast<float*>(sm + total);
for (int i = tid; i < total; i += bsz) sd[i] = d_duration[i];
d_duration = sd;
}
static JSPProblem create(const int* h_machine, const float* h_duration,
int num_jobs, int num_ops, int num_machines,
int time_horizon) {
JSPProblem prob;
prob.num_jobs = num_jobs;
prob.num_ops = num_ops;
prob.num_machines = num_machines;
prob.time_horizon = time_horizon;
int total = num_jobs * num_ops;
int* dm;
CUDA_CHECK(cudaMalloc(&dm, sizeof(int) * total));
CUDA_CHECK(cudaMemcpy(dm, h_machine, sizeof(int) * total, cudaMemcpyHostToDevice));
prob.d_machine = dm;
float* dd;
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * total));
CUDA_CHECK(cudaMemcpy(dd, h_duration, sizeof(float) * total, cudaMemcpyHostToDevice));
prob.d_duration = dd;
return prob;
}
void destroy() {
if (d_machine) { cudaFree(const_cast<int*>(d_machine)); d_machine = nullptr; }
if (d_duration) { cudaFree(const_cast<float*>(d_duration)); d_duration = nullptr; }
}
};
// ============================================================
// 编码方案 BPermutation 多重集(工序排列编码)
// ============================================================
// data[0] 是长度 J*O 的排列,值域 [0, J),每个值出现 O 次
// 从左到右扫描:第 t 次遇到值 j → 安排工件 j 的第 t 道工序
// 贪心解码:每道工序安排在"最早可行时间"(满足工序顺序 + 机器空闲)
struct JSPPermProblem : ProblemBase<JSPPermProblem, 1, 64> {
const int* d_machine; // 工序所需机器 [J*O]
const float* d_duration; // 工序耗时 [J*O]
int num_jobs;
int num_ops;
int num_machines;
// 贪心解码:从排列生成调度方案,返回 makespan
__device__ float decode_and_makespan(const Sol& sol) const {
int total = num_jobs * num_ops;
int size = sol.dim2_sizes[0];
if (size < total) return 1e9f;
float job_avail[8]; // 每个工件的下一道工序最早开始时间
float mach_avail[8]; // 每台机器的最早空闲时间
int job_next_op[8]; // 每个工件的下一道待安排工序编号
for (int j = 0; j < num_jobs; j++) { job_avail[j] = 0.0f; job_next_op[j] = 0; }
for (int m = 0; m < num_machines; m++) mach_avail[m] = 0.0f;
float makespan = 0.0f;
for (int k = 0; k < total; k++) {
int j = sol.data[0][k];
if (j < 0 || j >= num_jobs) return 1e9f;
int op = job_next_op[j];
if (op >= num_ops) continue; // 该工件已安排完
int flat = j * num_ops + op;
int m = d_machine[flat];
float dur = d_duration[flat];
// 最早开始时间 = max(工件前序完成, 机器空闲)
float start = fmaxf(job_avail[j], mach_avail[m]);
float end = start + dur;
job_avail[j] = end;
mach_avail[m] = end;
job_next_op[j] = op + 1;
if (end > makespan) makespan = end;
}
return makespan;
}
static constexpr ObjDef OBJ_DEFS[] = {
{ObjDir::Minimize, 1.0f, 0.0f},
};
__device__ float compute_obj(int idx, const Sol& sol) const {
switch (idx) {
case 0: return decode_and_makespan(sol);
default: return 0.0f;
}
}
// 贪心解码天然满足约束penalty 始终为 0
__device__ float compute_penalty(const Sol& sol) const {
return 0.0f;
}
ProblemConfig config() const {
ProblemConfig cfg;
cfg.encoding = EncodingType::Permutation;
cfg.dim1 = 1;
cfg.dim2_default = num_jobs * num_ops;
cfg.perm_repeat_count = num_ops;
fill_obj_config(cfg);
return cfg;
}
size_t shared_mem_bytes() const {
int total = num_jobs * num_ops;
return (size_t)total * (sizeof(int) + sizeof(float));
}
__device__ void load_shared(char* smem, int tid, int bsz) {
int total = num_jobs * num_ops;
int* sm = reinterpret_cast<int*>(smem);
for (int i = tid; i < total; i += bsz) sm[i] = d_machine[i];
d_machine = sm;
float* sd = reinterpret_cast<float*>(sm + total);
for (int i = tid; i < total; i += bsz) sd[i] = d_duration[i];
d_duration = sd;
}
static JSPPermProblem create(const int* h_machine, const float* h_duration,
int num_jobs, int num_ops, int num_machines) {
JSPPermProblem prob;
prob.num_jobs = num_jobs;
prob.num_ops = num_ops;
prob.num_machines = num_machines;
int total = num_jobs * num_ops;
int* dm;
CUDA_CHECK(cudaMalloc(&dm, sizeof(int) * total));
CUDA_CHECK(cudaMemcpy(dm, h_machine, sizeof(int) * total, cudaMemcpyHostToDevice));
prob.d_machine = dm;
float* dd;
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * total));
CUDA_CHECK(cudaMemcpy(dd, h_duration, sizeof(float) * total, cudaMemcpyHostToDevice));
prob.d_duration = dd;
return prob;
}
void destroy() {
if (d_machine) { cudaFree(const_cast<int*>(d_machine)); d_machine = nullptr; }
if (d_duration) { cudaFree(const_cast<float*>(d_duration)); d_duration = nullptr; }
}
};

View file

@ -0,0 +1,88 @@
/**
* knapsack.cuh - 0-1 背包问题
*
* 继承 ProblemBase使用 ObjDef 目标注册机制
*/
#pragma once
#include "types.cuh"
#include "cuda_utils.cuh"
#include "operators.cuh"
struct KnapsackProblem : ProblemBase<KnapsackProblem, 1, 32> {
// 问题数据d_weights 是物品重量,非目标权重)
const float* d_weights;
const float* d_values;
float capacity;
int n;
// ---- 目标计算 ----
__device__ float calc_total_value(const Sol& sol) const {
float tv = 0.0f;
const int* sel = sol.data[0];
int size = sol.dim2_sizes[0];
for (int i = 0; i < size; i++)
if (sel[i]) tv += d_values[i];
return tv;
}
// ---- 目标定义OBJ_DEFS 与 compute_obj 必须一一对应)----
static constexpr ObjDef OBJ_DEFS[] = {
{ObjDir::Maximize, 1.0f, 0.0f}, // case 0: calc_total_value
};
__device__ float compute_obj(int idx, const Sol& sol) const {
switch (idx) {
case 0: return calc_total_value(sol); // OBJ_DEFS[0]
default: return 0.0f;
}
}
__device__ float compute_penalty(const Sol& sol) const {
float tw = 0.0f;
const int* sel = sol.data[0];
int size = sol.dim2_sizes[0];
for (int i = 0; i < size; i++)
if (sel[i]) tw += d_weights[i];
float over = tw - capacity;
return (over > 0.0f) ? over : 0.0f;
}
ProblemConfig config() const {
ProblemConfig cfg;
cfg.encoding = EncodingType::Binary;
cfg.dim1 = 1; cfg.dim2_default = n;
fill_obj_config(cfg);
return cfg;
}
// ---- shared memory 接口 ----
size_t shared_mem_bytes() const {
return 2 * (size_t)n * sizeof(float);
}
__device__ void load_shared(char* smem, int tid, int bsz) {
float* sw = reinterpret_cast<float*>(smem);
float* sv = sw + n;
for (int i = tid; i < n; i += bsz) { sw[i] = d_weights[i]; sv[i] = d_values[i]; }
d_weights = sw;
d_values = sv;
}
static KnapsackProblem create(const float* hw, const float* hv, int n, float cap) {
KnapsackProblem prob;
prob.n = n; prob.capacity = cap;
float *dw, *dv;
CUDA_CHECK(cudaMalloc(&dw, sizeof(float)*n));
CUDA_CHECK(cudaMalloc(&dv, sizeof(float)*n));
CUDA_CHECK(cudaMemcpy(dw, hw, sizeof(float)*n, cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(dv, hv, sizeof(float)*n, cudaMemcpyHostToDevice));
prob.d_weights = dw; prob.d_values = dv;
return prob;
}
void destroy() {
if (d_weights) cudaFree(const_cast<float*>(d_weights));
if (d_values) cudaFree(const_cast<float*>(d_values));
d_weights = nullptr; d_values = nullptr;
}
};

View file

@ -0,0 +1,83 @@
/**
* load_balance.cuh - 离散负载均衡问题Integer 编码验证)
*
* N 个任务分配到 M 台机器,每个任务有一个处理时间 p[i]。
* 决策变量data[0][i] ∈ [0, M-1],表示任务 i 分配到哪台机器。
* 目标:最小化 makespan最大机器负载
*
* 已知 NP-hard等价于 multiprocessor scheduling / load balancing
* LPT最长处理时间优先贪心可得 4/3 近似。
*/
#pragma once
#include "types.cuh"
#include "cuda_utils.cuh"
struct LoadBalanceProblem : ProblemBase<LoadBalanceProblem, 1, 64> {
const float* d_proc_time; // 任务处理时间 [N]
int n; // 任务数
int m; // 机器数
__device__ float calc_makespan(const Sol& sol) const {
float load[32] = {}; // 最多 32 台机器
int size = sol.dim2_sizes[0];
for (int i = 0; i < size; i++) {
int machine = sol.data[0][i];
if (machine >= 0 && machine < m)
load[machine] += d_proc_time[i];
}
float max_load = 0.0f;
for (int j = 0; j < m; j++)
if (load[j] > max_load) max_load = load[j];
return max_load;
}
static constexpr ObjDef OBJ_DEFS[] = {
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: makespan
};
__device__ float compute_obj(int idx, const Sol& sol) const {
switch (idx) {
case 0: return calc_makespan(sol);
default: return 0.0f;
}
}
__device__ float compute_penalty(const Sol& sol) const {
return 0.0f; // 无约束(任何分配都合法)
}
ProblemConfig config() const {
ProblemConfig cfg;
cfg.encoding = EncodingType::Integer;
cfg.dim1 = 1; cfg.dim2_default = n;
cfg.value_lower_bound = 0;
cfg.value_upper_bound = m - 1;
fill_obj_config(cfg);
return cfg;
}
size_t shared_mem_bytes() const {
return (size_t)n * sizeof(float);
}
__device__ void load_shared(char* smem, int tid, int bsz) {
float* sp = reinterpret_cast<float*>(smem);
for (int i = tid; i < n; i += bsz) sp[i] = d_proc_time[i];
d_proc_time = sp;
}
static LoadBalanceProblem create(const float* h_proc_time, int n, int m) {
LoadBalanceProblem prob;
prob.n = n; prob.m = m;
float* dp;
CUDA_CHECK(cudaMalloc(&dp, sizeof(float) * n));
CUDA_CHECK(cudaMemcpy(dp, h_proc_time, sizeof(float) * n, cudaMemcpyHostToDevice));
prob.d_proc_time = dp;
return prob;
}
void destroy() {
if (d_proc_time) cudaFree(const_cast<float*>(d_proc_time));
d_proc_time = nullptr;
}
};

View file

@ -0,0 +1,84 @@
/**
* qap.cuh - 二次分配问题 (Quadratic Assignment Problem)
*
* N 个设施分配到 N 个位置(排列编码)。
* 决策变量data[0][i] = 设施 i 分配到的位置。
* 目标Minimize sum(flow[i][j] * dist[perm[i]][perm[j]])
*
* 验证实例:自定义 5x5
* flow: 设施间的物流量
* dist: 位置间的距离
* 已知最优 = 58
*/
#pragma once
#include "types.cuh"
#include "cuda_utils.cuh"
struct QAPProblem : ProblemBase<QAPProblem, 1, 32> {
const float* d_flow; // 物流量矩阵 [N*N]
const float* d_dist; // 距离矩阵 [N*N]
int n;
__device__ float calc_cost(const Sol& sol) const {
float cost = 0.0f;
int size = sol.dim2_sizes[0];
for (int i = 0; i < size; i++)
for (int j = 0; j < size; j++)
cost += d_flow[i * n + j] * d_dist[sol.data[0][i] * n + sol.data[0][j]];
return cost;
}
static constexpr ObjDef OBJ_DEFS[] = {
{ObjDir::Minimize, 1.0f, 0.0f},
};
__device__ float compute_obj(int idx, const Sol& sol) const {
switch (idx) {
case 0: return calc_cost(sol);
default: return 0.0f;
}
}
__device__ float compute_penalty(const Sol& sol) const {
return 0.0f;
}
ProblemConfig config() const {
ProblemConfig cfg;
cfg.encoding = EncodingType::Permutation;
cfg.dim1 = 1; cfg.dim2_default = n;
fill_obj_config(cfg);
return cfg;
}
size_t shared_mem_bytes() const {
return 2 * (size_t)n * n * sizeof(float);
}
__device__ void load_shared(char* smem, int tid, int bsz) {
float* sf = reinterpret_cast<float*>(smem);
float* sd = sf + n * n;
int total = n * n;
for (int i = tid; i < total; i += bsz) { sf[i] = d_flow[i]; sd[i] = d_dist[i]; }
d_flow = sf;
d_dist = sd;
}
static QAPProblem create(const float* h_flow, const float* h_dist, int n) {
QAPProblem prob;
prob.n = n;
float *df, *dd;
CUDA_CHECK(cudaMalloc(&df, sizeof(float) * n * n));
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * n * n));
CUDA_CHECK(cudaMemcpy(df, h_flow, sizeof(float) * n * n, cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(dd, h_dist, sizeof(float) * n * n, cudaMemcpyHostToDevice));
prob.d_flow = df; prob.d_dist = dd;
return prob;
}
void destroy() {
if (d_flow) cudaFree(const_cast<float*>(d_flow));
if (d_dist) cudaFree(const_cast<float*>(d_dist));
d_flow = nullptr; d_dist = nullptr;
}
};

View file

@ -0,0 +1,101 @@
/**
* schedule.cuh - 排班问题
*
* 继承 ProblemBase使用 ObjDef 目标注册机制
* 2 个目标总成本min+ 不公平度min权重更高
*/
#pragma once
#include "types.cuh"
#include "cuda_utils.cuh"
#include "operators.cuh"
struct ScheduleProblem : ProblemBase<ScheduleProblem, 8, 16> {
const float* d_cost;
int days, emps, required;
// ---- 目标计算 ----
__device__ float calc_total_cost(const Sol& sol) const {
float total = 0.0f;
for (int d = 0; d < days; d++)
for (int e = 0; e < emps; e++)
if (sol.data[d][e]) total += d_cost[d * emps + e];
return total;
}
__device__ float calc_unfairness(const Sol& sol) const {
int workdays[D2];
for (int e = 0; e < emps; e++) workdays[e] = 0;
for (int d = 0; d < days; d++)
for (int e = 0; e < emps; e++)
if (sol.data[d][e]) workdays[e]++;
int max_w = 0, min_w = days;
for (int e = 0; e < emps; e++) {
if (workdays[e] > max_w) max_w = workdays[e];
if (workdays[e] < min_w) min_w = workdays[e];
}
return (float)(max_w - min_w);
}
// ---- 目标定义OBJ_DEFS 与 compute_obj 必须一一对应)----
static constexpr ObjDef OBJ_DEFS[] = {
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_cost
{ObjDir::Minimize, 5.0f, 0.0f}, // case 1: calc_unfairness
};
__device__ float compute_obj(int idx, const Sol& sol) const {
switch (idx) {
case 0: return calc_total_cost(sol); // OBJ_DEFS[0]
case 1: return calc_unfairness(sol); // OBJ_DEFS[1]
default: return 0.0f;
}
}
__device__ float compute_penalty(const Sol& sol) const {
float penalty = 0.0f;
for (int d = 0; d < days; d++) {
int count = 0;
for (int e = 0; e < emps; e++)
if (sol.data[d][e]) count++;
int diff = count - required;
penalty += (diff > 0) ? (float)diff : (float)(-diff);
}
return penalty;
}
ProblemConfig config() const {
ProblemConfig cfg;
cfg.encoding = EncodingType::Binary;
cfg.dim1 = days; cfg.dim2_default = emps;
cfg.row_mode = RowMode::Fixed;
fill_obj_config(cfg);
return cfg;
}
// 默认回退全量(基类行为)— 不需要覆盖 evaluate_move
// ---- shared memory 接口 ----
size_t shared_mem_bytes() const {
return (size_t)days * emps * sizeof(float);
}
__device__ void load_shared(char* smem, int tid, int bsz) {
float* sc = reinterpret_cast<float*>(smem);
int total = days * emps;
for (int i = tid; i < total; i += bsz) sc[i] = d_cost[i];
d_cost = sc;
}
static ScheduleProblem create(const float* hc, int days, int emps, int req) {
ScheduleProblem prob;
prob.days = days; prob.emps = emps; prob.required = req;
float* dc;
CUDA_CHECK(cudaMalloc(&dc, sizeof(float)*days*emps));
CUDA_CHECK(cudaMemcpy(dc, hc, sizeof(float)*days*emps, cudaMemcpyHostToDevice));
prob.d_cost = dc;
return prob;
}
void destroy() {
if (d_cost) { cudaFree(const_cast<float*>(d_cost)); d_cost = nullptr; }
}
};

View file

@ -0,0 +1,110 @@
/**
* tsp.cuh - TSP 问题定义
*
* 继承 ProblemBase使用 ObjDef 目标注册机制
*/
#pragma once
#include "types.cuh"
#include "cuda_utils.cuh"
#include "operators.cuh"
struct TSPProblem : ProblemBase<TSPProblem, 1, 64> {
// 问题数据
const float* d_dist;
const float* h_dist; // host 端距离矩阵(用于 init_relation_matrix
int n;
// ---- 目标计算 ----
__device__ float calc_total_distance(const Sol& sol) const {
float total = 0.0f;
const int* route = sol.data[0];
int size = sol.dim2_sizes[0];
for (int i = 0; i < size; i++)
total += d_dist[route[i] * n + route[(i + 1) % size]];
return total;
}
// ---- 目标定义OBJ_DEFS 与 compute_obj 必须一一对应)----
static constexpr ObjDef OBJ_DEFS[] = {
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_distance
};
__device__ float compute_obj(int idx, const Sol& sol) const {
switch (idx) {
case 0: return calc_total_distance(sol); // OBJ_DEFS[0]
default: return 0.0f;
}
}
__device__ float compute_penalty(const Sol& sol) const {
return 0.0f; // TSP 无约束
}
// ---- config编码/维度部分,目标由基类自动填充)----
ProblemConfig config() const {
ProblemConfig cfg;
cfg.encoding = EncodingType::Permutation;
cfg.dim1 = 1; cfg.dim2_default = n;
fill_obj_config(cfg);
return cfg;
}
// ---- shared memory 接口 ----
static constexpr size_t SMEM_LIMIT = 48 * 1024;
size_t shared_mem_bytes() const {
size_t need = (size_t)n * n * sizeof(float);
return need <= SMEM_LIMIT ? need : 0;
}
size_t working_set_bytes() const {
return (size_t)n * n * sizeof(float);
}
__device__ void load_shared(char* smem, int tid, int bsz) {
float* sd = reinterpret_cast<float*>(smem);
int total = n * n;
for (int i = tid; i < total; i += bsz)
sd[i] = d_dist[i];
d_dist = sd;
}
// 距离先验:距离近 → G/O 分数高
void init_relation_matrix(float* G, float* O, int N) const {
if (!h_dist || N != n) return;
float max_d = 0.0f;
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
if (h_dist[i * N + j] > max_d) max_d = h_dist[i * N + j];
if (max_d <= 0.0f) return;
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++) {
if (i == j) continue;
float proximity = 1.0f - h_dist[i * N + j] / max_d;
G[i * N + j] = proximity * 0.3f;
O[i * N + j] = proximity * 0.1f;
}
}
int heuristic_matrices(HeuristicMatrix* out, int max_count) const {
if (max_count < 1 || !h_dist) return 0;
out[0] = {h_dist, n};
return 1;
}
static TSPProblem create(const float* h_dist_ptr, int n) {
TSPProblem prob;
prob.n = n;
prob.h_dist = h_dist_ptr;
float* dd;
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * n * n));
CUDA_CHECK(cudaMemcpy(dd, h_dist_ptr, sizeof(float) * n * n, cudaMemcpyHostToDevice));
prob.d_dist = dd;
return prob;
}
void destroy() {
if (d_dist) { cudaFree(const_cast<float*>(d_dist)); d_dist = nullptr; }
h_dist = nullptr;
}
};

View file

@ -0,0 +1,107 @@
/**
* tsp_large.cuh - 大规模 TSP 问题定义 (最多 256 城市)
*
* 继承 ProblemBase逻辑与 tsp.cuh 一致,仅 D2 上限不同
*/
#pragma once
#include "types.cuh"
#include "cuda_utils.cuh"
#include "operators.cuh"
struct TSPLargeProblem : ProblemBase<TSPLargeProblem, 1, 256> {
const float* d_dist;
const float* h_dist;
int n;
// ---- 目标计算 ----
__device__ float calc_total_distance(const Sol& sol) const {
float total = 0.0f;
const int* route = sol.data[0];
int size = sol.dim2_sizes[0];
for (int i = 0; i < size; i++)
total += d_dist[route[i] * n + route[(i + 1) % size]];
return total;
}
// ---- 目标定义OBJ_DEFS 与 compute_obj 必须一一对应)----
static constexpr ObjDef OBJ_DEFS[] = {
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_distance
};
__device__ float compute_obj(int idx, const Sol& sol) const {
switch (idx) {
case 0: return calc_total_distance(sol); // OBJ_DEFS[0]
default: return 0.0f;
}
}
__device__ float compute_penalty(const Sol& sol) const {
return 0.0f;
}
ProblemConfig config() const {
ProblemConfig cfg;
cfg.encoding = EncodingType::Permutation;
cfg.dim1 = 1; cfg.dim2_default = n;
fill_obj_config(cfg);
return cfg;
}
static constexpr size_t SMEM_LIMIT = 48 * 1024;
size_t shared_mem_bytes() const {
size_t need = (size_t)n * n * sizeof(float);
return need <= SMEM_LIMIT ? need : 0;
}
// 距离矩阵的实际大小(不管是否放进 smem
size_t working_set_bytes() const {
return (size_t)n * n * sizeof(float);
}
__device__ void load_shared(char* smem, int tid, int bsz) {
float* sd = reinterpret_cast<float*>(smem);
int total = n * n;
for (int i = tid; i < total; i += bsz)
sd[i] = d_dist[i];
d_dist = sd;
}
void init_relation_matrix(float* G, float* O, int N) const {
if (!h_dist || N != n) return;
float max_d = 0.0f;
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
if (h_dist[i * N + j] > max_d) max_d = h_dist[i * N + j];
if (max_d <= 0.0f) return;
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++) {
if (i == j) continue;
float proximity = 1.0f - h_dist[i * N + j] / max_d;
G[i * N + j] = proximity * 0.3f;
O[i * N + j] = proximity * 0.1f;
}
}
int heuristic_matrices(HeuristicMatrix* out, int max_count) const {
if (max_count < 1 || !h_dist) return 0;
out[0] = {h_dist, n};
return 1;
}
static TSPLargeProblem create(const float* h_dist_ptr, int n) {
TSPLargeProblem prob;
prob.n = n;
prob.h_dist = h_dist_ptr;
float* dd;
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * n * n));
CUDA_CHECK(cudaMemcpy(dd, h_dist_ptr, sizeof(float) * n * n, cudaMemcpyHostToDevice));
prob.d_dist = dd;
return prob;
}
void destroy() {
if (d_dist) { cudaFree(const_cast<float*>(d_dist)); d_dist = nullptr; }
h_dist = nullptr;
}
};

View file

@ -0,0 +1,99 @@
/**
* tsp_xlarge.cuh - 超大规模 TSP 问题定义 (最多 512 城市)
*
* 继承 ProblemBase逻辑与 tsp_large.cuh 一致D2=512
* 注意:距离矩阵 512×512×4B = 1MB远超 48KB shared memory
* 因此 shared_mem_bytes() 返回 0距离矩阵留在 global memory
*/
#pragma once
#include "types.cuh"
#include "cuda_utils.cuh"
#include "operators.cuh"
struct TSPXLargeProblem : ProblemBase<TSPXLargeProblem, 1, 512> {
const float* d_dist;
const float* h_dist; // host 端距离矩阵(用于 init_relation_matrix
int n;
__device__ float calc_total_distance(const Sol& sol) const {
float total = 0.0f;
const int* route = sol.data[0];
int size = sol.dim2_sizes[0];
for (int i = 0; i < size; i++)
total += d_dist[route[i] * n + route[(i + 1) % size]];
return total;
}
static constexpr ObjDef OBJ_DEFS[] = {
{ObjDir::Minimize, 1.0f, 0.0f},
};
__device__ float compute_obj(int idx, const Sol& sol) const {
switch (idx) {
case 0: return calc_total_distance(sol);
default: return 0.0f;
}
}
__device__ float compute_penalty(const Sol& sol) const { return 0.0f; }
ProblemConfig config() const {
ProblemConfig cfg;
cfg.encoding = EncodingType::Permutation;
cfg.dim1 = 1; cfg.dim2_default = n;
fill_obj_config(cfg);
return cfg;
}
// 距离矩阵太大,不放 shared memory
size_t shared_mem_bytes() const { return 0; }
__device__ void load_shared(char*, int, int) {}
size_t working_set_bytes() const {
return (size_t)n * n * sizeof(float);
}
// 用距离矩阵初始化 G/O 先验:距离近 → 分数高
void init_relation_matrix(float* G, float* O, int N) const {
if (!h_dist || N != n) return;
// 找最大距离用于归一化
float max_d = 0.0f;
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
if (h_dist[i * N + j] > max_d) max_d = h_dist[i * N + j];
if (max_d <= 0.0f) return;
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
if (i == j) continue;
// 距离近 → G 高(分组倾向强)
float proximity = 1.0f - h_dist[i * N + j] / max_d;
G[i * N + j] = proximity * 0.3f; // 初始信号不要太强,留空间给 EMA
// 距离近 → O 也给一点信号(对称的,不偏向任何方向)
O[i * N + j] = proximity * 0.1f;
}
}
}
int heuristic_matrices(HeuristicMatrix* out, int max_count) const {
if (max_count < 1 || !h_dist) return 0;
out[0] = {h_dist, n};
return 1;
}
static TSPXLargeProblem create(const float* h_dist_ptr, int n) {
TSPXLargeProblem prob;
prob.n = n;
prob.h_dist = h_dist_ptr; // 保留 host 指针
float* dd;
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * n * n));
CUDA_CHECK(cudaMemcpy(dd, h_dist_ptr, sizeof(float) * n * n, cudaMemcpyHostToDevice));
prob.d_dist = dd;
return prob;
}
void destroy() {
if (d_dist) { cudaFree(const_cast<float*>(d_dist)); d_dist = nullptr; }
h_dist = nullptr;
}
};

View file

@ -0,0 +1,184 @@
/**
* vrp.cuh - 容量约束车辆路径问题 (CVRP)
*
* 继承 ProblemBase使用 ObjDef 目标注册机制
* 多行编码D1=K 条路线,分区初始化 + 跨行算子)
*/
#pragma once
#include "types.cuh"
#include "cuda_utils.cuh"
#include "operators.cuh"
#include "gpu_cache.cuh"
struct VRPProblem : ProblemBase<VRPProblem, 8, 64> {
// GPU 数据
const float* d_dist;
const float* d_demand;
const float* h_dist; // host 端距离矩阵(含 depot用于 init_relation_matrix
int n;
int stride;
float capacity;
int num_vehicles;
int max_vehicles;
GpuCache cache;
// ---- 目标计算 ----
__device__ float compute_route_dist(const int* route, int size) const {
if (size == 0) return 0.0f;
float dist = 0.0f;
int prev = 0;
for (int j = 0; j < size; j++) {
int node = route[j] + 1;
dist += d_dist[prev * stride + node];
prev = node;
}
dist += d_dist[prev * stride + 0];
return dist;
}
__device__ float eval_route(const int* route, int size) const {
if (size == 0) return 0.0f;
if (!cache.keys) return compute_route_dist(route, size);
uint64_t key = route_hash(route, size);
float dist;
if (cache_lookup(cache, key, dist)) {
atomicAdd(cache.d_hits, 1);
return dist;
}
dist = compute_route_dist(route, size);
cache_insert(cache, key, dist);
atomicAdd(cache.d_misses, 1);
return dist;
}
__device__ float calc_total_distance(const Sol& sol) const {
float total = 0.0f;
for (int r = 0; r < num_vehicles; r++)
total += eval_route(sol.data[r], sol.dim2_sizes[r]);
return total;
}
// ---- 目标定义OBJ_DEFS 与 compute_obj 必须一一对应)----
static constexpr ObjDef OBJ_DEFS[] = {
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_distance
};
__device__ float compute_obj(int idx, const Sol& sol) const {
switch (idx) {
case 0: return calc_total_distance(sol); // OBJ_DEFS[0]
default: return 0.0f;
}
}
__device__ float compute_penalty(const Sol& sol) const {
float penalty = 0.0f;
int active = 0;
for (int r = 0; r < num_vehicles; r++) {
int size = sol.dim2_sizes[r];
if (size == 0) continue;
active++;
float load = 0.0f;
for (int j = 0; j < size; j++)
load += d_demand[sol.data[r][j]];
if (load > capacity)
penalty += (load - capacity) * 100.0f;
}
if (active > max_vehicles)
penalty += (float)(active - max_vehicles) * 1000.0f;
return penalty;
}
ProblemConfig config() const {
ProblemConfig cfg;
cfg.encoding = EncodingType::Permutation;
cfg.dim1 = num_vehicles;
cfg.dim2_default = 0;
fill_obj_config(cfg);
cfg.cross_row_prob = 0.3f;
cfg.row_mode = RowMode::Partition;
cfg.total_elements = n;
return cfg;
}
// ---- shared memory 接口 ----
static constexpr size_t SMEM_LIMIT = 48 * 1024;
size_t shared_mem_bytes() const {
size_t dist_bytes = (size_t)stride * stride * sizeof(float);
size_t demand_bytes = (size_t)n * sizeof(float);
size_t total = dist_bytes + demand_bytes;
return total <= SMEM_LIMIT ? total : 0;
}
size_t working_set_bytes() const {
return (size_t)stride * stride * sizeof(float) + (size_t)n * sizeof(float);
}
__device__ void load_shared(char* smem, int tid, int bsz) {
float* sd = reinterpret_cast<float*>(smem);
int dist_size = stride * stride;
for (int i = tid; i < dist_size; i += bsz) sd[i] = d_dist[i];
d_dist = sd;
float* sdem = sd + dist_size;
for (int i = tid; i < n; i += bsz) sdem[i] = d_demand[i];
d_demand = sdem;
}
void enable_cache(int cap = 65536) { cache = GpuCache::allocate(cap); }
void print_cache_stats() const { cache.print_stats(); }
// 距离先验:客户间距离近 → G/O 分数高
// 注意h_dist 含 depotstride×stride元素编号 0..n-1 对应 node 1..n
void init_relation_matrix(float* G, float* O, int N) const {
if (!h_dist || N != n) return;
float max_d = 0.0f;
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++) {
float d = h_dist[(i + 1) * stride + (j + 1)]; // 跳过 depot
if (d > max_d) max_d = d;
}
if (max_d <= 0.0f) return;
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++) {
if (i == j) continue;
float d = h_dist[(i + 1) * stride + (j + 1)];
float proximity = 1.0f - d / max_d;
G[i * N + j] = proximity * 0.3f;
O[i * N + j] = proximity * 0.1f;
}
}
static VRPProblem create(const float* h_dist_ptr, const float* h_demand,
int n, float capacity,
int num_vehicles, int max_vehicles) {
VRPProblem prob;
prob.n = n;
prob.stride = n + 1;
prob.capacity = capacity;
prob.num_vehicles = num_vehicles;
prob.max_vehicles = max_vehicles;
prob.cache = GpuCache::disabled();
prob.h_dist = h_dist_ptr;
int n_nodes = n + 1;
float* dd;
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * n_nodes * n_nodes));
CUDA_CHECK(cudaMemcpy(dd, h_dist_ptr, sizeof(float) * n_nodes * n_nodes, cudaMemcpyHostToDevice));
prob.d_dist = dd;
float* ddem;
CUDA_CHECK(cudaMalloc(&ddem, sizeof(float) * n));
CUDA_CHECK(cudaMemcpy(ddem, h_demand, sizeof(float) * n, cudaMemcpyHostToDevice));
prob.d_demand = ddem;
return prob;
}
void destroy() {
if (d_dist) { cudaFree(const_cast<float*>(d_dist)); d_dist = nullptr; }
if (d_demand) { cudaFree(const_cast<float*>(d_demand)); d_demand = nullptr; }
h_dist = nullptr;
cache.destroy();
}
};

View file

@ -0,0 +1,192 @@
/**
* vrptw.cuh - 带时间窗的车辆路径问题 (VRPTW)
*
* 在 CVRP 基础上增加时间窗约束。
* 编码Perm 多行分区(同 CVRPdata[r][j] = 路线 r 的第 j 个客户。
* 目标Minimize 总距离。
* 约束:(a) 容量约束, (b) 时间窗约束(到达时间必须 ≤ latest早到需等待
*
* 验证实例8 客户 3 车, 手工设计坐标+时间窗, 确保有已知可行解。
*/
#pragma once
#include "types.cuh"
#include "cuda_utils.cuh"
struct VRPTWProblem : ProblemBase<VRPTWProblem, 8, 64> {
const float* d_dist; // 距离矩阵 [(n+1)*(n+1)](含 depot
const float* d_demand; // 需求 [n]
const float* d_earliest; // 最早服务时间 [n+1](含 depot
const float* d_latest; // 最晚服务时间 [n+1](含 depot
const float* d_service; // 服务耗时 [n+1](含 depot
int n; // 客户数(不含 depot
int stride; // n+1
float capacity;
int num_vehicles;
int max_vehicles;
__device__ float compute_route_dist(const int* route, int size) const {
if (size == 0) return 0.0f;
float dist = 0.0f;
int prev = 0;
for (int j = 0; j < size; j++) {
int node = route[j] + 1;
dist += d_dist[prev * stride + node];
prev = node;
}
dist += d_dist[prev * stride + 0];
return dist;
}
__device__ float calc_total_distance(const Sol& sol) const {
float total = 0.0f;
for (int r = 0; r < num_vehicles; r++)
total += compute_route_dist(sol.data[r], sol.dim2_sizes[r]);
return total;
}
static constexpr ObjDef OBJ_DEFS[] = {
{ObjDir::Minimize, 1.0f, 0.0f},
};
__device__ float compute_obj(int idx, const Sol& sol) const {
switch (idx) {
case 0: return calc_total_distance(sol);
default: return 0.0f;
}
}
__device__ float compute_penalty(const Sol& sol) const {
float penalty = 0.0f;
int active = 0;
for (int r = 0; r < num_vehicles; r++) {
int size = sol.dim2_sizes[r];
if (size == 0) continue;
active++;
// 容量约束
float load = 0.0f;
for (int j = 0; j < size; j++)
load += d_demand[sol.data[r][j]];
if (load > capacity)
penalty += (load - capacity) * 100.0f;
// 时间窗约束:模拟路线行驶
float time = 0.0f;
int prev = 0;
for (int j = 0; j < size; j++) {
int node = sol.data[r][j] + 1;
float travel = d_dist[prev * stride + node];
time += travel;
// 早到需等待
if (time < d_earliest[node])
time = d_earliest[node];
// 迟到产生惩罚
if (time > d_latest[node])
penalty += (time - d_latest[node]) * 50.0f;
time += d_service[node];
prev = node;
}
// 返回 depot 的时间窗
float return_time = time + d_dist[prev * stride + 0];
if (return_time > d_latest[0])
penalty += (return_time - d_latest[0]) * 50.0f;
}
if (active > max_vehicles)
penalty += (float)(active - max_vehicles) * 1000.0f;
return penalty;
}
ProblemConfig config() const {
ProblemConfig cfg;
cfg.encoding = EncodingType::Permutation;
cfg.dim1 = num_vehicles;
cfg.dim2_default = 0;
fill_obj_config(cfg);
cfg.cross_row_prob = 0.3f;
cfg.row_mode = RowMode::Partition;
cfg.total_elements = n;
return cfg;
}
static constexpr size_t SMEM_LIMIT = 48 * 1024;
size_t shared_mem_bytes() const {
size_t dist_bytes = (size_t)stride * stride * sizeof(float);
size_t aux_bytes = (size_t)(n + 1) * 4 * sizeof(float); // demand(n) + earliest/latest/service(n+1 each)
size_t total = dist_bytes + aux_bytes;
return total <= SMEM_LIMIT ? total : 0;
}
size_t working_set_bytes() const {
return (size_t)stride * stride * sizeof(float) + (size_t)(n + 1) * 4 * sizeof(float);
}
__device__ void load_shared(char* smem, int tid, int bsz) {
float* sd = reinterpret_cast<float*>(smem);
int dist_size = stride * stride;
for (int i = tid; i < dist_size; i += bsz) sd[i] = d_dist[i];
d_dist = sd;
float* sdem = sd + dist_size;
for (int i = tid; i < n; i += bsz) sdem[i] = d_demand[i];
d_demand = sdem;
float* se = sdem + n;
int nn = n + 1;
for (int i = tid; i < nn; i += bsz) se[i] = d_earliest[i];
d_earliest = se;
float* sl = se + nn;
for (int i = tid; i < nn; i += bsz) sl[i] = d_latest[i];
d_latest = sl;
float* ss = sl + nn;
for (int i = tid; i < nn; i += bsz) ss[i] = d_service[i];
d_service = ss;
}
static VRPTWProblem create(const float* h_dist, const float* h_demand,
const float* h_earliest, const float* h_latest,
const float* h_service,
int n, float capacity,
int num_vehicles, int max_vehicles) {
VRPTWProblem prob;
prob.n = n;
prob.stride = n + 1;
prob.capacity = capacity;
prob.num_vehicles = num_vehicles;
prob.max_vehicles = max_vehicles;
int nn = n + 1;
float *dd, *ddem, *de, *dl, *ds;
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * nn * nn));
CUDA_CHECK(cudaMemcpy(dd, h_dist, sizeof(float) * nn * nn, cudaMemcpyHostToDevice));
prob.d_dist = dd;
CUDA_CHECK(cudaMalloc(&ddem, sizeof(float) * n));
CUDA_CHECK(cudaMemcpy(ddem, h_demand, sizeof(float) * n, cudaMemcpyHostToDevice));
prob.d_demand = ddem;
CUDA_CHECK(cudaMalloc(&de, sizeof(float) * nn));
CUDA_CHECK(cudaMemcpy(de, h_earliest, sizeof(float) * nn, cudaMemcpyHostToDevice));
prob.d_earliest = de;
CUDA_CHECK(cudaMalloc(&dl, sizeof(float) * nn));
CUDA_CHECK(cudaMemcpy(dl, h_latest, sizeof(float) * nn, cudaMemcpyHostToDevice));
prob.d_latest = dl;
CUDA_CHECK(cudaMalloc(&ds, sizeof(float) * nn));
CUDA_CHECK(cudaMemcpy(ds, h_service, sizeof(float) * nn, cudaMemcpyHostToDevice));
prob.d_service = ds;
return prob;
}
void destroy() {
if (d_dist) { cudaFree(const_cast<float*>(d_dist)); d_dist = nullptr; }
if (d_demand) { cudaFree(const_cast<float*>(d_demand)); d_demand = nullptr; }
if (d_earliest) { cudaFree(const_cast<float*>(d_earliest)); d_earliest = nullptr; }
if (d_latest) { cudaFree(const_cast<float*>(d_latest)); d_latest = nullptr; }
if (d_service) { cudaFree(const_cast<float*>(d_service)); d_service = nullptr; }
}
};