mirror of
https://github.com/L-yang-yang/cugenopt.git
synced 2026-04-25 12:16:21 +02:00
Initial commit: cuGenOpt GPU optimization solver
This commit is contained in:
commit
fc5a0ff4af
117 changed files with 25545 additions and 0 deletions
114
python/cugenopt/include/problems/assignment.cuh
Normal file
114
python/cugenopt/include/problems/assignment.cuh
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
/**
|
||||
* assignment.cuh - 指派问题
|
||||
*
|
||||
* 继承 ProblemBase,使用 ObjDef 目标注册机制
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
#include "operators.cuh"
|
||||
|
||||
struct AssignmentProblem : ProblemBase<AssignmentProblem, 1, 16> {
|
||||
const float* d_cost;
|
||||
const float* h_cost; // host 端成本矩阵(用于 init_relation_matrix)
|
||||
int n;
|
||||
|
||||
// ---- 目标计算 ----
|
||||
__device__ float calc_total_cost(const Sol& sol) const {
|
||||
float total = 0.0f;
|
||||
const int* assign = sol.data[0];
|
||||
int size = sol.dim2_sizes[0];
|
||||
for (int i = 0; i < size; i++)
|
||||
total += d_cost[i * n + assign[i]];
|
||||
return total;
|
||||
}
|
||||
|
||||
// ---- 目标定义(OBJ_DEFS 与 compute_obj 必须一一对应)----
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_cost
|
||||
};
|
||||
__device__ float compute_obj(int idx, const Sol& sol) const {
|
||||
switch (idx) {
|
||||
case 0: return calc_total_cost(sol); // OBJ_DEFS[0]
|
||||
default: return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
cfg.dim1 = 1; cfg.dim2_default = n;
|
||||
fill_obj_config(cfg);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
// ---- shared memory 接口 ----
|
||||
static constexpr size_t SMEM_LIMIT = 48 * 1024;
|
||||
|
||||
size_t shared_mem_bytes() const {
|
||||
size_t need = (size_t)n * n * sizeof(float);
|
||||
return need <= SMEM_LIMIT ? need : 0;
|
||||
}
|
||||
|
||||
size_t working_set_bytes() const {
|
||||
return (size_t)n * n * sizeof(float);
|
||||
}
|
||||
|
||||
__device__ void load_shared(char* smem, int tid, int bsz) {
|
||||
float* sc = reinterpret_cast<float*>(smem);
|
||||
int total = n * n;
|
||||
for (int i = tid; i < total; i += bsz) sc[i] = d_cost[i];
|
||||
d_cost = sc;
|
||||
}
|
||||
|
||||
// 成本先验:task j 和 task k 如果被相似 agent 偏好,G 值高
|
||||
// O 矩阵:task j 在位置 i 成本低 → O[j][k] 略高(j 倾向排在 k 前面的位置)
|
||||
void init_relation_matrix(float* G, float* O, int N) const {
|
||||
if (!h_cost || N != n) return;
|
||||
// 对每个 task,构建成本向量,task 间余弦相似度 → G
|
||||
// 简化:成本列向量的相关性
|
||||
float max_c = 0.0f;
|
||||
for (int i = 0; i < N * N; i++)
|
||||
if (h_cost[i] > max_c) max_c = h_cost[i];
|
||||
if (max_c <= 0.0f) return;
|
||||
|
||||
for (int j = 0; j < N; j++)
|
||||
for (int k = 0; k < N; k++) {
|
||||
if (j == k) continue;
|
||||
// G: 两个 task 的成本向量越相似 → 越可能互换
|
||||
float dot = 0.0f, nj = 0.0f, nk = 0.0f;
|
||||
for (int i = 0; i < N; i++) {
|
||||
float cj = h_cost[i * N + j] / max_c;
|
||||
float ck = h_cost[i * N + k] / max_c;
|
||||
dot += cj * ck;
|
||||
nj += cj * cj;
|
||||
nk += ck * ck;
|
||||
}
|
||||
float denom = sqrtf(nj) * sqrtf(nk);
|
||||
float sim = (denom > 1e-6f) ? dot / denom : 0.0f;
|
||||
G[j * N + k] = sim * 0.2f;
|
||||
O[j * N + k] = sim * 0.05f;
|
||||
}
|
||||
}
|
||||
|
||||
static AssignmentProblem create(const float* hc, int n) {
|
||||
AssignmentProblem prob;
|
||||
prob.n = n;
|
||||
prob.h_cost = hc;
|
||||
float* dc;
|
||||
CUDA_CHECK(cudaMalloc(&dc, sizeof(float)*n*n));
|
||||
CUDA_CHECK(cudaMemcpy(dc, hc, sizeof(float)*n*n, cudaMemcpyHostToDevice));
|
||||
prob.d_cost = dc;
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_cost) { cudaFree(const_cast<float*>(d_cost)); d_cost = nullptr; }
|
||||
h_cost = nullptr;
|
||||
}
|
||||
};
|
||||
97
python/cugenopt/include/problems/bin_packing.cuh
Normal file
97
python/cugenopt/include/problems/bin_packing.cuh
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
/**
|
||||
* bin_packing.cuh - 一维装箱问题(Integer 编码 + 约束)
|
||||
*
|
||||
* N 个物品,每个重量 w[i],装入最多 B 个箱子,每个箱子容量 C。
|
||||
* 决策变量:data[0][i] ∈ [0, B-1],表示物品 i 放入的箱子编号。
|
||||
* 目标:最小化使用的箱子数。
|
||||
* 约束:每个箱子总重不超过 C,超出部分作为 penalty。
|
||||
*
|
||||
* 验证实例:8 物品 weights=[7,5,3,4,6,2,8,1], C=10, 最优=4 箱
|
||||
* 箱0={7,3}=10, 箱1={5,4,1}=10, 箱2={6,2}=8, 箱3={8}=8
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
|
||||
struct BinPackingProblem : ProblemBase<BinPackingProblem, 1, 64> {
|
||||
const float* d_weights;
|
||||
int n; // 物品数
|
||||
int max_bins; // 最大箱子数 B
|
||||
float capacity; // 箱子容量 C
|
||||
|
||||
__device__ float calc_bins_used(const Sol& sol) const {
|
||||
bool used[32] = {};
|
||||
int size = sol.dim2_sizes[0];
|
||||
for (int i = 0; i < size; i++) {
|
||||
int b = sol.data[0][i];
|
||||
if (b >= 0 && b < max_bins) used[b] = true;
|
||||
}
|
||||
int count = 0;
|
||||
for (int b = 0; b < max_bins; b++)
|
||||
if (used[b]) count++;
|
||||
return (float)count;
|
||||
}
|
||||
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f},
|
||||
};
|
||||
__device__ float compute_obj(int idx, const Sol& sol) const {
|
||||
switch (idx) {
|
||||
case 0: return calc_bins_used(sol);
|
||||
default: return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
float penalty = 0.0f;
|
||||
float load[32] = {};
|
||||
int size = sol.dim2_sizes[0];
|
||||
for (int i = 0; i < size; i++) {
|
||||
int b = sol.data[0][i];
|
||||
if (b >= 0 && b < max_bins)
|
||||
load[b] += d_weights[i];
|
||||
}
|
||||
for (int b = 0; b < max_bins; b++) {
|
||||
float over = load[b] - capacity;
|
||||
if (over > 0.0f) penalty += over * 10.0f;
|
||||
}
|
||||
return penalty;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Integer;
|
||||
cfg.dim1 = 1; cfg.dim2_default = n;
|
||||
cfg.value_lower_bound = 0;
|
||||
cfg.value_upper_bound = max_bins - 1;
|
||||
fill_obj_config(cfg);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
size_t shared_mem_bytes() const {
|
||||
return (size_t)n * sizeof(float);
|
||||
}
|
||||
|
||||
__device__ void load_shared(char* smem, int tid, int bsz) {
|
||||
float* sw = reinterpret_cast<float*>(smem);
|
||||
for (int i = tid; i < n; i += bsz) sw[i] = d_weights[i];
|
||||
d_weights = sw;
|
||||
}
|
||||
|
||||
static BinPackingProblem create(const float* h_weights, int n,
|
||||
int max_bins, float capacity) {
|
||||
BinPackingProblem prob;
|
||||
prob.n = n; prob.max_bins = max_bins; prob.capacity = capacity;
|
||||
float* dw;
|
||||
CUDA_CHECK(cudaMalloc(&dw, sizeof(float) * n));
|
||||
CUDA_CHECK(cudaMemcpy(dw, h_weights, sizeof(float) * n, cudaMemcpyHostToDevice));
|
||||
prob.d_weights = dw;
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_weights) cudaFree(const_cast<float*>(d_weights));
|
||||
d_weights = nullptr;
|
||||
}
|
||||
};
|
||||
79
python/cugenopt/include/problems/graph_color.cuh
Normal file
79
python/cugenopt/include/problems/graph_color.cuh
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
/**
|
||||
* graph_color.cuh - 图着色问题(Integer 编码)
|
||||
*
|
||||
* N 个节点的图,用 k 种颜色着色。
|
||||
* 决策变量:data[0][i] ∈ [0, k-1],表示节点 i 的颜色。
|
||||
* 目标:最小化冲突边数(相邻节点同色的边数)。
|
||||
*
|
||||
* 验证实例:Petersen 图(10 节点 15 边,色数=3,最优冲突=0)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
|
||||
struct GraphColorProblem : ProblemBase<GraphColorProblem, 1, 64> {
|
||||
const int* d_adj; // 邻接矩阵 [N*N](1=相邻, 0=不相邻)
|
||||
int n; // 节点数
|
||||
int k; // 颜色数
|
||||
|
||||
__device__ float calc_conflicts(const Sol& sol) const {
|
||||
int conflicts = 0;
|
||||
int size = sol.dim2_sizes[0];
|
||||
for (int i = 0; i < size; i++)
|
||||
for (int j = i + 1; j < size; j++)
|
||||
if (d_adj[i * n + j] && sol.data[0][i] == sol.data[0][j])
|
||||
conflicts++;
|
||||
return (float)conflicts;
|
||||
}
|
||||
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f},
|
||||
};
|
||||
__device__ float compute_obj(int idx, const Sol& sol) const {
|
||||
switch (idx) {
|
||||
case 0: return calc_conflicts(sol);
|
||||
default: return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Integer;
|
||||
cfg.dim1 = 1; cfg.dim2_default = n;
|
||||
cfg.value_lower_bound = 0;
|
||||
cfg.value_upper_bound = k - 1;
|
||||
fill_obj_config(cfg);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
size_t shared_mem_bytes() const {
|
||||
return (size_t)n * n * sizeof(int);
|
||||
}
|
||||
|
||||
__device__ void load_shared(char* smem, int tid, int bsz) {
|
||||
int* sa = reinterpret_cast<int*>(smem);
|
||||
int total = n * n;
|
||||
for (int i = tid; i < total; i += bsz) sa[i] = d_adj[i];
|
||||
d_adj = sa;
|
||||
}
|
||||
|
||||
static GraphColorProblem create(const int* h_adj, int n, int k) {
|
||||
GraphColorProblem prob;
|
||||
prob.n = n; prob.k = k;
|
||||
int* da;
|
||||
CUDA_CHECK(cudaMalloc(&da, sizeof(int) * n * n));
|
||||
CUDA_CHECK(cudaMemcpy(da, h_adj, sizeof(int) * n * n, cudaMemcpyHostToDevice));
|
||||
prob.d_adj = da;
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_adj) cudaFree(const_cast<int*>(d_adj));
|
||||
d_adj = nullptr;
|
||||
}
|
||||
};
|
||||
271
python/cugenopt/include/problems/jsp.cuh
Normal file
271
python/cugenopt/include/problems/jsp.cuh
Normal file
|
|
@ -0,0 +1,271 @@
|
|||
/**
|
||||
* jsp.cuh - 车间调度问题 (Job Shop Scheduling Problem)
|
||||
*
|
||||
* J 个工件,每个工件有 O 道工序,每道工序指定机器和耗时。
|
||||
*
|
||||
* === 编码方案 A:Integer 多行(时间表编码)===
|
||||
* JSPProblem: data[j][i] = 工件 j 的第 i 道工序的开始时间
|
||||
* dim1 = num_jobs, dim2_default = num_ops
|
||||
* row_mode = Fixed(禁止 ROW_SPLIT/ROW_MERGE)
|
||||
* 每行代表一个工件的固定工序序列,行长度不可变
|
||||
*
|
||||
* === 编码方案 B:Permutation 多重集(工序排列编码)===
|
||||
* JSPPermProblem: data[0][k] = 工件编号(0..J-1),长度 J*O
|
||||
* 值 j 出现 O 次。从左到右扫描,第 t 次遇到值 j 表示工件 j 的第 t 道工序。
|
||||
* dim1 = 1, dim2_default = J*O, perm_repeat_count = O
|
||||
* 标准 Permutation 算子(swap/reverse/insert)天然保持多重集结构
|
||||
*
|
||||
* 目标:Minimize makespan(所有工件完成时间的最大值)。
|
||||
* 约束:
|
||||
* (a) 工序顺序:同一工件的工序必须按序执行
|
||||
* (b) 机器冲突:同一机器同一时刻只能处理一个工序
|
||||
*
|
||||
* 验证实例:自定义 3 工件 3 机器 (3x3),最优 makespan = 12
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
|
||||
// ============================================================
|
||||
// 编码方案 A:Integer 多行(时间表编码)
|
||||
// ============================================================
|
||||
|
||||
struct JSPProblem : ProblemBase<JSPProblem, 8, 16> {
|
||||
const int* d_machine; // 工序所需机器 [J*O]
|
||||
const float* d_duration; // 工序耗时 [J*O]
|
||||
int num_jobs; // 工件数 J
|
||||
int num_ops; // 每工件工序数 O
|
||||
int num_machines; // 机器数 M
|
||||
int time_horizon; // 时间上界
|
||||
|
||||
__device__ float calc_makespan(const Sol& sol) const {
|
||||
float makespan = 0.0f;
|
||||
for (int j = 0; j < num_jobs; j++) {
|
||||
int last = num_ops - 1;
|
||||
float end = (float)sol.data[j][last] + d_duration[j * num_ops + last];
|
||||
if (end > makespan) makespan = end;
|
||||
}
|
||||
return makespan;
|
||||
}
|
||||
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f},
|
||||
};
|
||||
__device__ float compute_obj(int idx, const Sol& sol) const {
|
||||
switch (idx) {
|
||||
case 0: return calc_makespan(sol);
|
||||
default: return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
float penalty = 0.0f;
|
||||
|
||||
// (a) 工序顺序约束
|
||||
for (int j = 0; j < num_jobs; j++) {
|
||||
for (int i = 1; i < num_ops; i++) {
|
||||
float prev_end = (float)sol.data[j][i-1] + d_duration[j * num_ops + (i-1)];
|
||||
float curr_start = (float)sol.data[j][i];
|
||||
if (curr_start < prev_end)
|
||||
penalty += (prev_end - curr_start) * 10.0f;
|
||||
}
|
||||
}
|
||||
|
||||
// (b) 机器冲突约束
|
||||
int total = num_jobs * num_ops;
|
||||
for (int a = 0; a < total; a++) {
|
||||
int ja = a / num_ops, ia = a % num_ops;
|
||||
int m_a = d_machine[a];
|
||||
float s_a = (float)sol.data[ja][ia];
|
||||
float e_a = s_a + d_duration[a];
|
||||
for (int b = a + 1; b < total; b++) {
|
||||
if (d_machine[b] != m_a) continue;
|
||||
int jb = b / num_ops, ib = b % num_ops;
|
||||
float s_b = (float)sol.data[jb][ib];
|
||||
float e_b = s_b + d_duration[b];
|
||||
float overlap = fminf(e_a, e_b) - fmaxf(s_a, s_b);
|
||||
if (overlap > 0.0f)
|
||||
penalty += overlap * 10.0f;
|
||||
}
|
||||
}
|
||||
|
||||
return penalty;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Integer;
|
||||
cfg.dim1 = num_jobs;
|
||||
cfg.dim2_default = num_ops;
|
||||
cfg.value_lower_bound = 0;
|
||||
cfg.value_upper_bound = time_horizon - 1;
|
||||
cfg.row_mode = RowMode::Fixed;
|
||||
fill_obj_config(cfg);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
size_t shared_mem_bytes() const {
|
||||
int total = num_jobs * num_ops;
|
||||
return (size_t)total * (sizeof(int) + sizeof(float));
|
||||
}
|
||||
|
||||
__device__ void load_shared(char* smem, int tid, int bsz) {
|
||||
int total = num_jobs * num_ops;
|
||||
int* sm = reinterpret_cast<int*>(smem);
|
||||
for (int i = tid; i < total; i += bsz) sm[i] = d_machine[i];
|
||||
d_machine = sm;
|
||||
|
||||
float* sd = reinterpret_cast<float*>(sm + total);
|
||||
for (int i = tid; i < total; i += bsz) sd[i] = d_duration[i];
|
||||
d_duration = sd;
|
||||
}
|
||||
|
||||
static JSPProblem create(const int* h_machine, const float* h_duration,
|
||||
int num_jobs, int num_ops, int num_machines,
|
||||
int time_horizon) {
|
||||
JSPProblem prob;
|
||||
prob.num_jobs = num_jobs;
|
||||
prob.num_ops = num_ops;
|
||||
prob.num_machines = num_machines;
|
||||
prob.time_horizon = time_horizon;
|
||||
|
||||
int total = num_jobs * num_ops;
|
||||
int* dm;
|
||||
CUDA_CHECK(cudaMalloc(&dm, sizeof(int) * total));
|
||||
CUDA_CHECK(cudaMemcpy(dm, h_machine, sizeof(int) * total, cudaMemcpyHostToDevice));
|
||||
prob.d_machine = dm;
|
||||
|
||||
float* dd;
|
||||
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * total));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_duration, sizeof(float) * total, cudaMemcpyHostToDevice));
|
||||
prob.d_duration = dd;
|
||||
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_machine) { cudaFree(const_cast<int*>(d_machine)); d_machine = nullptr; }
|
||||
if (d_duration) { cudaFree(const_cast<float*>(d_duration)); d_duration = nullptr; }
|
||||
}
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// 编码方案 B:Permutation 多重集(工序排列编码)
|
||||
// ============================================================
|
||||
// data[0] 是长度 J*O 的排列,值域 [0, J),每个值出现 O 次
|
||||
// 从左到右扫描:第 t 次遇到值 j → 安排工件 j 的第 t 道工序
|
||||
// 贪心解码:每道工序安排在"最早可行时间"(满足工序顺序 + 机器空闲)
|
||||
|
||||
struct JSPPermProblem : ProblemBase<JSPPermProblem, 1, 64> {
|
||||
const int* d_machine; // 工序所需机器 [J*O]
|
||||
const float* d_duration; // 工序耗时 [J*O]
|
||||
int num_jobs;
|
||||
int num_ops;
|
||||
int num_machines;
|
||||
|
||||
// 贪心解码:从排列生成调度方案,返回 makespan
|
||||
__device__ float decode_and_makespan(const Sol& sol) const {
|
||||
int total = num_jobs * num_ops;
|
||||
int size = sol.dim2_sizes[0];
|
||||
if (size < total) return 1e9f;
|
||||
|
||||
float job_avail[8]; // 每个工件的下一道工序最早开始时间
|
||||
float mach_avail[8]; // 每台机器的最早空闲时间
|
||||
int job_next_op[8]; // 每个工件的下一道待安排工序编号
|
||||
|
||||
for (int j = 0; j < num_jobs; j++) { job_avail[j] = 0.0f; job_next_op[j] = 0; }
|
||||
for (int m = 0; m < num_machines; m++) mach_avail[m] = 0.0f;
|
||||
|
||||
float makespan = 0.0f;
|
||||
for (int k = 0; k < total; k++) {
|
||||
int j = sol.data[0][k];
|
||||
if (j < 0 || j >= num_jobs) return 1e9f;
|
||||
int op = job_next_op[j];
|
||||
if (op >= num_ops) continue; // 该工件已安排完
|
||||
|
||||
int flat = j * num_ops + op;
|
||||
int m = d_machine[flat];
|
||||
float dur = d_duration[flat];
|
||||
|
||||
// 最早开始时间 = max(工件前序完成, 机器空闲)
|
||||
float start = fmaxf(job_avail[j], mach_avail[m]);
|
||||
float end = start + dur;
|
||||
|
||||
job_avail[j] = end;
|
||||
mach_avail[m] = end;
|
||||
job_next_op[j] = op + 1;
|
||||
|
||||
if (end > makespan) makespan = end;
|
||||
}
|
||||
|
||||
return makespan;
|
||||
}
|
||||
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f},
|
||||
};
|
||||
__device__ float compute_obj(int idx, const Sol& sol) const {
|
||||
switch (idx) {
|
||||
case 0: return decode_and_makespan(sol);
|
||||
default: return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
// 贪心解码天然满足约束,penalty 始终为 0
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
cfg.dim1 = 1;
|
||||
cfg.dim2_default = num_jobs * num_ops;
|
||||
cfg.perm_repeat_count = num_ops;
|
||||
fill_obj_config(cfg);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
size_t shared_mem_bytes() const {
|
||||
int total = num_jobs * num_ops;
|
||||
return (size_t)total * (sizeof(int) + sizeof(float));
|
||||
}
|
||||
|
||||
__device__ void load_shared(char* smem, int tid, int bsz) {
|
||||
int total = num_jobs * num_ops;
|
||||
int* sm = reinterpret_cast<int*>(smem);
|
||||
for (int i = tid; i < total; i += bsz) sm[i] = d_machine[i];
|
||||
d_machine = sm;
|
||||
|
||||
float* sd = reinterpret_cast<float*>(sm + total);
|
||||
for (int i = tid; i < total; i += bsz) sd[i] = d_duration[i];
|
||||
d_duration = sd;
|
||||
}
|
||||
|
||||
static JSPPermProblem create(const int* h_machine, const float* h_duration,
|
||||
int num_jobs, int num_ops, int num_machines) {
|
||||
JSPPermProblem prob;
|
||||
prob.num_jobs = num_jobs;
|
||||
prob.num_ops = num_ops;
|
||||
prob.num_machines = num_machines;
|
||||
|
||||
int total = num_jobs * num_ops;
|
||||
int* dm;
|
||||
CUDA_CHECK(cudaMalloc(&dm, sizeof(int) * total));
|
||||
CUDA_CHECK(cudaMemcpy(dm, h_machine, sizeof(int) * total, cudaMemcpyHostToDevice));
|
||||
prob.d_machine = dm;
|
||||
|
||||
float* dd;
|
||||
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * total));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_duration, sizeof(float) * total, cudaMemcpyHostToDevice));
|
||||
prob.d_duration = dd;
|
||||
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_machine) { cudaFree(const_cast<int*>(d_machine)); d_machine = nullptr; }
|
||||
if (d_duration) { cudaFree(const_cast<float*>(d_duration)); d_duration = nullptr; }
|
||||
}
|
||||
};
|
||||
88
python/cugenopt/include/problems/knapsack.cuh
Normal file
88
python/cugenopt/include/problems/knapsack.cuh
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
/**
|
||||
* knapsack.cuh - 0-1 背包问题
|
||||
*
|
||||
* 继承 ProblemBase,使用 ObjDef 目标注册机制
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
#include "operators.cuh"
|
||||
|
||||
struct KnapsackProblem : ProblemBase<KnapsackProblem, 1, 32> {
|
||||
// 问题数据(d_weights 是物品重量,非目标权重)
|
||||
const float* d_weights;
|
||||
const float* d_values;
|
||||
float capacity;
|
||||
int n;
|
||||
|
||||
// ---- 目标计算 ----
|
||||
__device__ float calc_total_value(const Sol& sol) const {
|
||||
float tv = 0.0f;
|
||||
const int* sel = sol.data[0];
|
||||
int size = sol.dim2_sizes[0];
|
||||
for (int i = 0; i < size; i++)
|
||||
if (sel[i]) tv += d_values[i];
|
||||
return tv;
|
||||
}
|
||||
|
||||
// ---- 目标定义(OBJ_DEFS 与 compute_obj 必须一一对应)----
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Maximize, 1.0f, 0.0f}, // case 0: calc_total_value
|
||||
};
|
||||
__device__ float compute_obj(int idx, const Sol& sol) const {
|
||||
switch (idx) {
|
||||
case 0: return calc_total_value(sol); // OBJ_DEFS[0]
|
||||
default: return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
float tw = 0.0f;
|
||||
const int* sel = sol.data[0];
|
||||
int size = sol.dim2_sizes[0];
|
||||
for (int i = 0; i < size; i++)
|
||||
if (sel[i]) tw += d_weights[i];
|
||||
float over = tw - capacity;
|
||||
return (over > 0.0f) ? over : 0.0f;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Binary;
|
||||
cfg.dim1 = 1; cfg.dim2_default = n;
|
||||
fill_obj_config(cfg);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
// ---- shared memory 接口 ----
|
||||
size_t shared_mem_bytes() const {
|
||||
return 2 * (size_t)n * sizeof(float);
|
||||
}
|
||||
|
||||
__device__ void load_shared(char* smem, int tid, int bsz) {
|
||||
float* sw = reinterpret_cast<float*>(smem);
|
||||
float* sv = sw + n;
|
||||
for (int i = tid; i < n; i += bsz) { sw[i] = d_weights[i]; sv[i] = d_values[i]; }
|
||||
d_weights = sw;
|
||||
d_values = sv;
|
||||
}
|
||||
|
||||
static KnapsackProblem create(const float* hw, const float* hv, int n, float cap) {
|
||||
KnapsackProblem prob;
|
||||
prob.n = n; prob.capacity = cap;
|
||||
float *dw, *dv;
|
||||
CUDA_CHECK(cudaMalloc(&dw, sizeof(float)*n));
|
||||
CUDA_CHECK(cudaMalloc(&dv, sizeof(float)*n));
|
||||
CUDA_CHECK(cudaMemcpy(dw, hw, sizeof(float)*n, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy(dv, hv, sizeof(float)*n, cudaMemcpyHostToDevice));
|
||||
prob.d_weights = dw; prob.d_values = dv;
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_weights) cudaFree(const_cast<float*>(d_weights));
|
||||
if (d_values) cudaFree(const_cast<float*>(d_values));
|
||||
d_weights = nullptr; d_values = nullptr;
|
||||
}
|
||||
};
|
||||
83
python/cugenopt/include/problems/load_balance.cuh
Normal file
83
python/cugenopt/include/problems/load_balance.cuh
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
/**
|
||||
* load_balance.cuh - 离散负载均衡问题(Integer 编码验证)
|
||||
*
|
||||
* N 个任务分配到 M 台机器,每个任务有一个处理时间 p[i]。
|
||||
* 决策变量:data[0][i] ∈ [0, M-1],表示任务 i 分配到哪台机器。
|
||||
* 目标:最小化 makespan(最大机器负载)。
|
||||
*
|
||||
* 已知 NP-hard(等价于 multiprocessor scheduling / load balancing)。
|
||||
* LPT(最长处理时间优先)贪心可得 4/3 近似。
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
|
||||
struct LoadBalanceProblem : ProblemBase<LoadBalanceProblem, 1, 64> {
|
||||
const float* d_proc_time; // 任务处理时间 [N]
|
||||
int n; // 任务数
|
||||
int m; // 机器数
|
||||
|
||||
__device__ float calc_makespan(const Sol& sol) const {
|
||||
float load[32] = {}; // 最多 32 台机器
|
||||
int size = sol.dim2_sizes[0];
|
||||
for (int i = 0; i < size; i++) {
|
||||
int machine = sol.data[0][i];
|
||||
if (machine >= 0 && machine < m)
|
||||
load[machine] += d_proc_time[i];
|
||||
}
|
||||
float max_load = 0.0f;
|
||||
for (int j = 0; j < m; j++)
|
||||
if (load[j] > max_load) max_load = load[j];
|
||||
return max_load;
|
||||
}
|
||||
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: makespan
|
||||
};
|
||||
__device__ float compute_obj(int idx, const Sol& sol) const {
|
||||
switch (idx) {
|
||||
case 0: return calc_makespan(sol);
|
||||
default: return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
return 0.0f; // 无约束(任何分配都合法)
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Integer;
|
||||
cfg.dim1 = 1; cfg.dim2_default = n;
|
||||
cfg.value_lower_bound = 0;
|
||||
cfg.value_upper_bound = m - 1;
|
||||
fill_obj_config(cfg);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
size_t shared_mem_bytes() const {
|
||||
return (size_t)n * sizeof(float);
|
||||
}
|
||||
|
||||
__device__ void load_shared(char* smem, int tid, int bsz) {
|
||||
float* sp = reinterpret_cast<float*>(smem);
|
||||
for (int i = tid; i < n; i += bsz) sp[i] = d_proc_time[i];
|
||||
d_proc_time = sp;
|
||||
}
|
||||
|
||||
static LoadBalanceProblem create(const float* h_proc_time, int n, int m) {
|
||||
LoadBalanceProblem prob;
|
||||
prob.n = n; prob.m = m;
|
||||
float* dp;
|
||||
CUDA_CHECK(cudaMalloc(&dp, sizeof(float) * n));
|
||||
CUDA_CHECK(cudaMemcpy(dp, h_proc_time, sizeof(float) * n, cudaMemcpyHostToDevice));
|
||||
prob.d_proc_time = dp;
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_proc_time) cudaFree(const_cast<float*>(d_proc_time));
|
||||
d_proc_time = nullptr;
|
||||
}
|
||||
};
|
||||
84
python/cugenopt/include/problems/qap.cuh
Normal file
84
python/cugenopt/include/problems/qap.cuh
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
/**
|
||||
* qap.cuh - 二次分配问题 (Quadratic Assignment Problem)
|
||||
*
|
||||
* N 个设施分配到 N 个位置(排列编码)。
|
||||
* 决策变量:data[0][i] = 设施 i 分配到的位置。
|
||||
* 目标:Minimize sum(flow[i][j] * dist[perm[i]][perm[j]])
|
||||
*
|
||||
* 验证实例:自定义 5x5
|
||||
* flow: 设施间的物流量
|
||||
* dist: 位置间的距离
|
||||
* 已知最优 = 58
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
|
||||
struct QAPProblem : ProblemBase<QAPProblem, 1, 32> {
|
||||
const float* d_flow; // 物流量矩阵 [N*N]
|
||||
const float* d_dist; // 距离矩阵 [N*N]
|
||||
int n;
|
||||
|
||||
__device__ float calc_cost(const Sol& sol) const {
|
||||
float cost = 0.0f;
|
||||
int size = sol.dim2_sizes[0];
|
||||
for (int i = 0; i < size; i++)
|
||||
for (int j = 0; j < size; j++)
|
||||
cost += d_flow[i * n + j] * d_dist[sol.data[0][i] * n + sol.data[0][j]];
|
||||
return cost;
|
||||
}
|
||||
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f},
|
||||
};
|
||||
__device__ float compute_obj(int idx, const Sol& sol) const {
|
||||
switch (idx) {
|
||||
case 0: return calc_cost(sol);
|
||||
default: return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
cfg.dim1 = 1; cfg.dim2_default = n;
|
||||
fill_obj_config(cfg);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
size_t shared_mem_bytes() const {
|
||||
return 2 * (size_t)n * n * sizeof(float);
|
||||
}
|
||||
|
||||
__device__ void load_shared(char* smem, int tid, int bsz) {
|
||||
float* sf = reinterpret_cast<float*>(smem);
|
||||
float* sd = sf + n * n;
|
||||
int total = n * n;
|
||||
for (int i = tid; i < total; i += bsz) { sf[i] = d_flow[i]; sd[i] = d_dist[i]; }
|
||||
d_flow = sf;
|
||||
d_dist = sd;
|
||||
}
|
||||
|
||||
static QAPProblem create(const float* h_flow, const float* h_dist, int n) {
|
||||
QAPProblem prob;
|
||||
prob.n = n;
|
||||
float *df, *dd;
|
||||
CUDA_CHECK(cudaMalloc(&df, sizeof(float) * n * n));
|
||||
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * n * n));
|
||||
CUDA_CHECK(cudaMemcpy(df, h_flow, sizeof(float) * n * n, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist, sizeof(float) * n * n, cudaMemcpyHostToDevice));
|
||||
prob.d_flow = df; prob.d_dist = dd;
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_flow) cudaFree(const_cast<float*>(d_flow));
|
||||
if (d_dist) cudaFree(const_cast<float*>(d_dist));
|
||||
d_flow = nullptr; d_dist = nullptr;
|
||||
}
|
||||
};
|
||||
101
python/cugenopt/include/problems/schedule.cuh
Normal file
101
python/cugenopt/include/problems/schedule.cuh
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
/**
|
||||
* schedule.cuh - 排班问题
|
||||
*
|
||||
* 继承 ProblemBase,使用 ObjDef 目标注册机制
|
||||
* 2 个目标:总成本(min)+ 不公平度(min,权重更高)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
#include "operators.cuh"
|
||||
|
||||
struct ScheduleProblem : ProblemBase<ScheduleProblem, 8, 16> {
|
||||
const float* d_cost;
|
||||
int days, emps, required;
|
||||
|
||||
// ---- 目标计算 ----
|
||||
__device__ float calc_total_cost(const Sol& sol) const {
|
||||
float total = 0.0f;
|
||||
for (int d = 0; d < days; d++)
|
||||
for (int e = 0; e < emps; e++)
|
||||
if (sol.data[d][e]) total += d_cost[d * emps + e];
|
||||
return total;
|
||||
}
|
||||
|
||||
__device__ float calc_unfairness(const Sol& sol) const {
|
||||
int workdays[D2];
|
||||
for (int e = 0; e < emps; e++) workdays[e] = 0;
|
||||
for (int d = 0; d < days; d++)
|
||||
for (int e = 0; e < emps; e++)
|
||||
if (sol.data[d][e]) workdays[e]++;
|
||||
int max_w = 0, min_w = days;
|
||||
for (int e = 0; e < emps; e++) {
|
||||
if (workdays[e] > max_w) max_w = workdays[e];
|
||||
if (workdays[e] < min_w) min_w = workdays[e];
|
||||
}
|
||||
return (float)(max_w - min_w);
|
||||
}
|
||||
|
||||
// ---- 目标定义(OBJ_DEFS 与 compute_obj 必须一一对应)----
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_cost
|
||||
{ObjDir::Minimize, 5.0f, 0.0f}, // case 1: calc_unfairness
|
||||
};
|
||||
__device__ float compute_obj(int idx, const Sol& sol) const {
|
||||
switch (idx) {
|
||||
case 0: return calc_total_cost(sol); // OBJ_DEFS[0]
|
||||
case 1: return calc_unfairness(sol); // OBJ_DEFS[1]
|
||||
default: return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
float penalty = 0.0f;
|
||||
for (int d = 0; d < days; d++) {
|
||||
int count = 0;
|
||||
for (int e = 0; e < emps; e++)
|
||||
if (sol.data[d][e]) count++;
|
||||
int diff = count - required;
|
||||
penalty += (diff > 0) ? (float)diff : (float)(-diff);
|
||||
}
|
||||
return penalty;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Binary;
|
||||
cfg.dim1 = days; cfg.dim2_default = emps;
|
||||
cfg.row_mode = RowMode::Fixed;
|
||||
fill_obj_config(cfg);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
// 默认回退全量(基类行为)— 不需要覆盖 evaluate_move
|
||||
|
||||
// ---- shared memory 接口 ----
|
||||
size_t shared_mem_bytes() const {
|
||||
return (size_t)days * emps * sizeof(float);
|
||||
}
|
||||
|
||||
__device__ void load_shared(char* smem, int tid, int bsz) {
|
||||
float* sc = reinterpret_cast<float*>(smem);
|
||||
int total = days * emps;
|
||||
for (int i = tid; i < total; i += bsz) sc[i] = d_cost[i];
|
||||
d_cost = sc;
|
||||
}
|
||||
|
||||
static ScheduleProblem create(const float* hc, int days, int emps, int req) {
|
||||
ScheduleProblem prob;
|
||||
prob.days = days; prob.emps = emps; prob.required = req;
|
||||
float* dc;
|
||||
CUDA_CHECK(cudaMalloc(&dc, sizeof(float)*days*emps));
|
||||
CUDA_CHECK(cudaMemcpy(dc, hc, sizeof(float)*days*emps, cudaMemcpyHostToDevice));
|
||||
prob.d_cost = dc;
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_cost) { cudaFree(const_cast<float*>(d_cost)); d_cost = nullptr; }
|
||||
}
|
||||
};
|
||||
110
python/cugenopt/include/problems/tsp.cuh
Normal file
110
python/cugenopt/include/problems/tsp.cuh
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
/**
|
||||
* tsp.cuh - TSP 问题定义
|
||||
*
|
||||
* 继承 ProblemBase,使用 ObjDef 目标注册机制
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
#include "operators.cuh"
|
||||
|
||||
struct TSPProblem : ProblemBase<TSPProblem, 1, 64> {
|
||||
// 问题数据
|
||||
const float* d_dist;
|
||||
const float* h_dist; // host 端距离矩阵(用于 init_relation_matrix)
|
||||
int n;
|
||||
|
||||
// ---- 目标计算 ----
|
||||
__device__ float calc_total_distance(const Sol& sol) const {
|
||||
float total = 0.0f;
|
||||
const int* route = sol.data[0];
|
||||
int size = sol.dim2_sizes[0];
|
||||
for (int i = 0; i < size; i++)
|
||||
total += d_dist[route[i] * n + route[(i + 1) % size]];
|
||||
return total;
|
||||
}
|
||||
|
||||
// ---- 目标定义(OBJ_DEFS 与 compute_obj 必须一一对应)----
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_distance
|
||||
};
|
||||
__device__ float compute_obj(int idx, const Sol& sol) const {
|
||||
switch (idx) {
|
||||
case 0: return calc_total_distance(sol); // OBJ_DEFS[0]
|
||||
default: return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
return 0.0f; // TSP 无约束
|
||||
}
|
||||
|
||||
// ---- config(编码/维度部分,目标由基类自动填充)----
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
cfg.dim1 = 1; cfg.dim2_default = n;
|
||||
fill_obj_config(cfg);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
// ---- shared memory 接口 ----
|
||||
static constexpr size_t SMEM_LIMIT = 48 * 1024;
|
||||
|
||||
size_t shared_mem_bytes() const {
|
||||
size_t need = (size_t)n * n * sizeof(float);
|
||||
return need <= SMEM_LIMIT ? need : 0;
|
||||
}
|
||||
|
||||
size_t working_set_bytes() const {
|
||||
return (size_t)n * n * sizeof(float);
|
||||
}
|
||||
|
||||
__device__ void load_shared(char* smem, int tid, int bsz) {
|
||||
float* sd = reinterpret_cast<float*>(smem);
|
||||
int total = n * n;
|
||||
for (int i = tid; i < total; i += bsz)
|
||||
sd[i] = d_dist[i];
|
||||
d_dist = sd;
|
||||
}
|
||||
|
||||
// 距离先验:距离近 → G/O 分数高
|
||||
void init_relation_matrix(float* G, float* O, int N) const {
|
||||
if (!h_dist || N != n) return;
|
||||
float max_d = 0.0f;
|
||||
for (int i = 0; i < N; i++)
|
||||
for (int j = 0; j < N; j++)
|
||||
if (h_dist[i * N + j] > max_d) max_d = h_dist[i * N + j];
|
||||
if (max_d <= 0.0f) return;
|
||||
for (int i = 0; i < N; i++)
|
||||
for (int j = 0; j < N; j++) {
|
||||
if (i == j) continue;
|
||||
float proximity = 1.0f - h_dist[i * N + j] / max_d;
|
||||
G[i * N + j] = proximity * 0.3f;
|
||||
O[i * N + j] = proximity * 0.1f;
|
||||
}
|
||||
}
|
||||
|
||||
int heuristic_matrices(HeuristicMatrix* out, int max_count) const {
|
||||
if (max_count < 1 || !h_dist) return 0;
|
||||
out[0] = {h_dist, n};
|
||||
return 1;
|
||||
}
|
||||
|
||||
static TSPProblem create(const float* h_dist_ptr, int n) {
|
||||
TSPProblem prob;
|
||||
prob.n = n;
|
||||
prob.h_dist = h_dist_ptr;
|
||||
float* dd;
|
||||
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * n * n));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist_ptr, sizeof(float) * n * n, cudaMemcpyHostToDevice));
|
||||
prob.d_dist = dd;
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_dist) { cudaFree(const_cast<float*>(d_dist)); d_dist = nullptr; }
|
||||
h_dist = nullptr;
|
||||
}
|
||||
};
|
||||
107
python/cugenopt/include/problems/tsp_large.cuh
Normal file
107
python/cugenopt/include/problems/tsp_large.cuh
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
/**
|
||||
* tsp_large.cuh - 大规模 TSP 问题定义 (最多 256 城市)
|
||||
*
|
||||
* 继承 ProblemBase,逻辑与 tsp.cuh 一致,仅 D2 上限不同
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
#include "operators.cuh"
|
||||
|
||||
struct TSPLargeProblem : ProblemBase<TSPLargeProblem, 1, 256> {
|
||||
const float* d_dist;
|
||||
const float* h_dist;
|
||||
int n;
|
||||
|
||||
// ---- 目标计算 ----
|
||||
__device__ float calc_total_distance(const Sol& sol) const {
|
||||
float total = 0.0f;
|
||||
const int* route = sol.data[0];
|
||||
int size = sol.dim2_sizes[0];
|
||||
for (int i = 0; i < size; i++)
|
||||
total += d_dist[route[i] * n + route[(i + 1) % size]];
|
||||
return total;
|
||||
}
|
||||
|
||||
// ---- 目标定义(OBJ_DEFS 与 compute_obj 必须一一对应)----
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_distance
|
||||
};
|
||||
__device__ float compute_obj(int idx, const Sol& sol) const {
|
||||
switch (idx) {
|
||||
case 0: return calc_total_distance(sol); // OBJ_DEFS[0]
|
||||
default: return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
cfg.dim1 = 1; cfg.dim2_default = n;
|
||||
fill_obj_config(cfg);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
static constexpr size_t SMEM_LIMIT = 48 * 1024;
|
||||
|
||||
size_t shared_mem_bytes() const {
|
||||
size_t need = (size_t)n * n * sizeof(float);
|
||||
return need <= SMEM_LIMIT ? need : 0;
|
||||
}
|
||||
|
||||
// 距离矩阵的实际大小(不管是否放进 smem)
|
||||
size_t working_set_bytes() const {
|
||||
return (size_t)n * n * sizeof(float);
|
||||
}
|
||||
|
||||
__device__ void load_shared(char* smem, int tid, int bsz) {
|
||||
float* sd = reinterpret_cast<float*>(smem);
|
||||
int total = n * n;
|
||||
for (int i = tid; i < total; i += bsz)
|
||||
sd[i] = d_dist[i];
|
||||
d_dist = sd;
|
||||
}
|
||||
|
||||
void init_relation_matrix(float* G, float* O, int N) const {
|
||||
if (!h_dist || N != n) return;
|
||||
float max_d = 0.0f;
|
||||
for (int i = 0; i < N; i++)
|
||||
for (int j = 0; j < N; j++)
|
||||
if (h_dist[i * N + j] > max_d) max_d = h_dist[i * N + j];
|
||||
if (max_d <= 0.0f) return;
|
||||
for (int i = 0; i < N; i++)
|
||||
for (int j = 0; j < N; j++) {
|
||||
if (i == j) continue;
|
||||
float proximity = 1.0f - h_dist[i * N + j] / max_d;
|
||||
G[i * N + j] = proximity * 0.3f;
|
||||
O[i * N + j] = proximity * 0.1f;
|
||||
}
|
||||
}
|
||||
|
||||
int heuristic_matrices(HeuristicMatrix* out, int max_count) const {
|
||||
if (max_count < 1 || !h_dist) return 0;
|
||||
out[0] = {h_dist, n};
|
||||
return 1;
|
||||
}
|
||||
|
||||
static TSPLargeProblem create(const float* h_dist_ptr, int n) {
|
||||
TSPLargeProblem prob;
|
||||
prob.n = n;
|
||||
prob.h_dist = h_dist_ptr;
|
||||
float* dd;
|
||||
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * n * n));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist_ptr, sizeof(float) * n * n, cudaMemcpyHostToDevice));
|
||||
prob.d_dist = dd;
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_dist) { cudaFree(const_cast<float*>(d_dist)); d_dist = nullptr; }
|
||||
h_dist = nullptr;
|
||||
}
|
||||
};
|
||||
99
python/cugenopt/include/problems/tsp_xlarge.cuh
Normal file
99
python/cugenopt/include/problems/tsp_xlarge.cuh
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
/**
|
||||
* tsp_xlarge.cuh - 超大规模 TSP 问题定义 (最多 512 城市)
|
||||
*
|
||||
* 继承 ProblemBase,逻辑与 tsp_large.cuh 一致,D2=512
|
||||
* 注意:距离矩阵 512×512×4B = 1MB,远超 48KB shared memory
|
||||
* 因此 shared_mem_bytes() 返回 0,距离矩阵留在 global memory
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
#include "operators.cuh"
|
||||
|
||||
struct TSPXLargeProblem : ProblemBase<TSPXLargeProblem, 1, 512> {
|
||||
const float* d_dist;
|
||||
const float* h_dist; // host 端距离矩阵(用于 init_relation_matrix)
|
||||
int n;
|
||||
|
||||
__device__ float calc_total_distance(const Sol& sol) const {
|
||||
float total = 0.0f;
|
||||
const int* route = sol.data[0];
|
||||
int size = sol.dim2_sizes[0];
|
||||
for (int i = 0; i < size; i++)
|
||||
total += d_dist[route[i] * n + route[(i + 1) % size]];
|
||||
return total;
|
||||
}
|
||||
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f},
|
||||
};
|
||||
__device__ float compute_obj(int idx, const Sol& sol) const {
|
||||
switch (idx) {
|
||||
case 0: return calc_total_distance(sol);
|
||||
default: return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const { return 0.0f; }
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
cfg.dim1 = 1; cfg.dim2_default = n;
|
||||
fill_obj_config(cfg);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
// 距离矩阵太大,不放 shared memory
|
||||
size_t shared_mem_bytes() const { return 0; }
|
||||
__device__ void load_shared(char*, int, int) {}
|
||||
|
||||
size_t working_set_bytes() const {
|
||||
return (size_t)n * n * sizeof(float);
|
||||
}
|
||||
|
||||
// 用距离矩阵初始化 G/O 先验:距离近 → 分数高
|
||||
void init_relation_matrix(float* G, float* O, int N) const {
|
||||
if (!h_dist || N != n) return;
|
||||
// 找最大距离用于归一化
|
||||
float max_d = 0.0f;
|
||||
for (int i = 0; i < N; i++)
|
||||
for (int j = 0; j < N; j++)
|
||||
if (h_dist[i * N + j] > max_d) max_d = h_dist[i * N + j];
|
||||
if (max_d <= 0.0f) return;
|
||||
|
||||
for (int i = 0; i < N; i++) {
|
||||
for (int j = 0; j < N; j++) {
|
||||
if (i == j) continue;
|
||||
// 距离近 → G 高(分组倾向强)
|
||||
float proximity = 1.0f - h_dist[i * N + j] / max_d;
|
||||
G[i * N + j] = proximity * 0.3f; // 初始信号不要太强,留空间给 EMA
|
||||
// 距离近 → O 也给一点信号(对称的,不偏向任何方向)
|
||||
O[i * N + j] = proximity * 0.1f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int heuristic_matrices(HeuristicMatrix* out, int max_count) const {
|
||||
if (max_count < 1 || !h_dist) return 0;
|
||||
out[0] = {h_dist, n};
|
||||
return 1;
|
||||
}
|
||||
|
||||
static TSPXLargeProblem create(const float* h_dist_ptr, int n) {
|
||||
TSPXLargeProblem prob;
|
||||
prob.n = n;
|
||||
prob.h_dist = h_dist_ptr; // 保留 host 指针
|
||||
float* dd;
|
||||
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * n * n));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist_ptr, sizeof(float) * n * n, cudaMemcpyHostToDevice));
|
||||
prob.d_dist = dd;
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_dist) { cudaFree(const_cast<float*>(d_dist)); d_dist = nullptr; }
|
||||
h_dist = nullptr;
|
||||
}
|
||||
};
|
||||
184
python/cugenopt/include/problems/vrp.cuh
Normal file
184
python/cugenopt/include/problems/vrp.cuh
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
/**
|
||||
* vrp.cuh - 容量约束车辆路径问题 (CVRP)
|
||||
*
|
||||
* 继承 ProblemBase,使用 ObjDef 目标注册机制
|
||||
* 多行编码(D1=K 条路线,分区初始化 + 跨行算子)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
#include "operators.cuh"
|
||||
#include "gpu_cache.cuh"
|
||||
|
||||
struct VRPProblem : ProblemBase<VRPProblem, 8, 64> {
|
||||
// GPU 数据
|
||||
const float* d_dist;
|
||||
const float* d_demand;
|
||||
const float* h_dist; // host 端距离矩阵(含 depot,用于 init_relation_matrix)
|
||||
int n;
|
||||
int stride;
|
||||
float capacity;
|
||||
int num_vehicles;
|
||||
int max_vehicles;
|
||||
GpuCache cache;
|
||||
|
||||
// ---- 目标计算 ----
|
||||
__device__ float compute_route_dist(const int* route, int size) const {
|
||||
if (size == 0) return 0.0f;
|
||||
float dist = 0.0f;
|
||||
int prev = 0;
|
||||
for (int j = 0; j < size; j++) {
|
||||
int node = route[j] + 1;
|
||||
dist += d_dist[prev * stride + node];
|
||||
prev = node;
|
||||
}
|
||||
dist += d_dist[prev * stride + 0];
|
||||
return dist;
|
||||
}
|
||||
|
||||
__device__ float eval_route(const int* route, int size) const {
|
||||
if (size == 0) return 0.0f;
|
||||
if (!cache.keys) return compute_route_dist(route, size);
|
||||
|
||||
uint64_t key = route_hash(route, size);
|
||||
float dist;
|
||||
if (cache_lookup(cache, key, dist)) {
|
||||
atomicAdd(cache.d_hits, 1);
|
||||
return dist;
|
||||
}
|
||||
dist = compute_route_dist(route, size);
|
||||
cache_insert(cache, key, dist);
|
||||
atomicAdd(cache.d_misses, 1);
|
||||
return dist;
|
||||
}
|
||||
|
||||
__device__ float calc_total_distance(const Sol& sol) const {
|
||||
float total = 0.0f;
|
||||
for (int r = 0; r < num_vehicles; r++)
|
||||
total += eval_route(sol.data[r], sol.dim2_sizes[r]);
|
||||
return total;
|
||||
}
|
||||
|
||||
// ---- 目标定义(OBJ_DEFS 与 compute_obj 必须一一对应)----
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_distance
|
||||
};
|
||||
__device__ float compute_obj(int idx, const Sol& sol) const {
|
||||
switch (idx) {
|
||||
case 0: return calc_total_distance(sol); // OBJ_DEFS[0]
|
||||
default: return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
float penalty = 0.0f;
|
||||
int active = 0;
|
||||
for (int r = 0; r < num_vehicles; r++) {
|
||||
int size = sol.dim2_sizes[r];
|
||||
if (size == 0) continue;
|
||||
active++;
|
||||
float load = 0.0f;
|
||||
for (int j = 0; j < size; j++)
|
||||
load += d_demand[sol.data[r][j]];
|
||||
if (load > capacity)
|
||||
penalty += (load - capacity) * 100.0f;
|
||||
}
|
||||
if (active > max_vehicles)
|
||||
penalty += (float)(active - max_vehicles) * 1000.0f;
|
||||
return penalty;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
cfg.dim1 = num_vehicles;
|
||||
cfg.dim2_default = 0;
|
||||
fill_obj_config(cfg);
|
||||
cfg.cross_row_prob = 0.3f;
|
||||
cfg.row_mode = RowMode::Partition;
|
||||
cfg.total_elements = n;
|
||||
return cfg;
|
||||
}
|
||||
|
||||
// ---- shared memory 接口 ----
|
||||
static constexpr size_t SMEM_LIMIT = 48 * 1024;
|
||||
|
||||
size_t shared_mem_bytes() const {
|
||||
size_t dist_bytes = (size_t)stride * stride * sizeof(float);
|
||||
size_t demand_bytes = (size_t)n * sizeof(float);
|
||||
size_t total = dist_bytes + demand_bytes;
|
||||
return total <= SMEM_LIMIT ? total : 0;
|
||||
}
|
||||
|
||||
size_t working_set_bytes() const {
|
||||
return (size_t)stride * stride * sizeof(float) + (size_t)n * sizeof(float);
|
||||
}
|
||||
|
||||
__device__ void load_shared(char* smem, int tid, int bsz) {
|
||||
float* sd = reinterpret_cast<float*>(smem);
|
||||
int dist_size = stride * stride;
|
||||
for (int i = tid; i < dist_size; i += bsz) sd[i] = d_dist[i];
|
||||
d_dist = sd;
|
||||
float* sdem = sd + dist_size;
|
||||
for (int i = tid; i < n; i += bsz) sdem[i] = d_demand[i];
|
||||
d_demand = sdem;
|
||||
}
|
||||
|
||||
void enable_cache(int cap = 65536) { cache = GpuCache::allocate(cap); }
|
||||
void print_cache_stats() const { cache.print_stats(); }
|
||||
|
||||
// 距离先验:客户间距离近 → G/O 分数高
|
||||
// 注意:h_dist 含 depot(stride×stride),元素编号 0..n-1 对应 node 1..n
|
||||
void init_relation_matrix(float* G, float* O, int N) const {
|
||||
if (!h_dist || N != n) return;
|
||||
float max_d = 0.0f;
|
||||
for (int i = 0; i < N; i++)
|
||||
for (int j = 0; j < N; j++) {
|
||||
float d = h_dist[(i + 1) * stride + (j + 1)]; // 跳过 depot
|
||||
if (d > max_d) max_d = d;
|
||||
}
|
||||
if (max_d <= 0.0f) return;
|
||||
for (int i = 0; i < N; i++)
|
||||
for (int j = 0; j < N; j++) {
|
||||
if (i == j) continue;
|
||||
float d = h_dist[(i + 1) * stride + (j + 1)];
|
||||
float proximity = 1.0f - d / max_d;
|
||||
G[i * N + j] = proximity * 0.3f;
|
||||
O[i * N + j] = proximity * 0.1f;
|
||||
}
|
||||
}
|
||||
|
||||
static VRPProblem create(const float* h_dist_ptr, const float* h_demand,
|
||||
int n, float capacity,
|
||||
int num_vehicles, int max_vehicles) {
|
||||
VRPProblem prob;
|
||||
prob.n = n;
|
||||
prob.stride = n + 1;
|
||||
prob.capacity = capacity;
|
||||
prob.num_vehicles = num_vehicles;
|
||||
prob.max_vehicles = max_vehicles;
|
||||
prob.cache = GpuCache::disabled();
|
||||
prob.h_dist = h_dist_ptr;
|
||||
|
||||
int n_nodes = n + 1;
|
||||
float* dd;
|
||||
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * n_nodes * n_nodes));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist_ptr, sizeof(float) * n_nodes * n_nodes, cudaMemcpyHostToDevice));
|
||||
prob.d_dist = dd;
|
||||
|
||||
float* ddem;
|
||||
CUDA_CHECK(cudaMalloc(&ddem, sizeof(float) * n));
|
||||
CUDA_CHECK(cudaMemcpy(ddem, h_demand, sizeof(float) * n, cudaMemcpyHostToDevice));
|
||||
prob.d_demand = ddem;
|
||||
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_dist) { cudaFree(const_cast<float*>(d_dist)); d_dist = nullptr; }
|
||||
if (d_demand) { cudaFree(const_cast<float*>(d_demand)); d_demand = nullptr; }
|
||||
h_dist = nullptr;
|
||||
cache.destroy();
|
||||
}
|
||||
};
|
||||
192
python/cugenopt/include/problems/vrptw.cuh
Normal file
192
python/cugenopt/include/problems/vrptw.cuh
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
/**
|
||||
* vrptw.cuh - 带时间窗的车辆路径问题 (VRPTW)
|
||||
*
|
||||
* 在 CVRP 基础上增加时间窗约束。
|
||||
* 编码:Perm 多行分区(同 CVRP),data[r][j] = 路线 r 的第 j 个客户。
|
||||
* 目标:Minimize 总距离。
|
||||
* 约束:(a) 容量约束, (b) 时间窗约束(到达时间必须 ≤ latest,早到需等待)。
|
||||
*
|
||||
* 验证实例:8 客户 3 车, 手工设计坐标+时间窗, 确保有已知可行解。
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
|
||||
struct VRPTWProblem : ProblemBase<VRPTWProblem, 8, 64> {
|
||||
const float* d_dist; // 距离矩阵 [(n+1)*(n+1)](含 depot)
|
||||
const float* d_demand; // 需求 [n]
|
||||
const float* d_earliest; // 最早服务时间 [n+1](含 depot)
|
||||
const float* d_latest; // 最晚服务时间 [n+1](含 depot)
|
||||
const float* d_service; // 服务耗时 [n+1](含 depot)
|
||||
int n; // 客户数(不含 depot)
|
||||
int stride; // n+1
|
||||
float capacity;
|
||||
int num_vehicles;
|
||||
int max_vehicles;
|
||||
|
||||
__device__ float compute_route_dist(const int* route, int size) const {
|
||||
if (size == 0) return 0.0f;
|
||||
float dist = 0.0f;
|
||||
int prev = 0;
|
||||
for (int j = 0; j < size; j++) {
|
||||
int node = route[j] + 1;
|
||||
dist += d_dist[prev * stride + node];
|
||||
prev = node;
|
||||
}
|
||||
dist += d_dist[prev * stride + 0];
|
||||
return dist;
|
||||
}
|
||||
|
||||
__device__ float calc_total_distance(const Sol& sol) const {
|
||||
float total = 0.0f;
|
||||
for (int r = 0; r < num_vehicles; r++)
|
||||
total += compute_route_dist(sol.data[r], sol.dim2_sizes[r]);
|
||||
return total;
|
||||
}
|
||||
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f},
|
||||
};
|
||||
__device__ float compute_obj(int idx, const Sol& sol) const {
|
||||
switch (idx) {
|
||||
case 0: return calc_total_distance(sol);
|
||||
default: return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
float penalty = 0.0f;
|
||||
int active = 0;
|
||||
for (int r = 0; r < num_vehicles; r++) {
|
||||
int size = sol.dim2_sizes[r];
|
||||
if (size == 0) continue;
|
||||
active++;
|
||||
|
||||
// 容量约束
|
||||
float load = 0.0f;
|
||||
for (int j = 0; j < size; j++)
|
||||
load += d_demand[sol.data[r][j]];
|
||||
if (load > capacity)
|
||||
penalty += (load - capacity) * 100.0f;
|
||||
|
||||
// 时间窗约束:模拟路线行驶
|
||||
float time = 0.0f;
|
||||
int prev = 0;
|
||||
for (int j = 0; j < size; j++) {
|
||||
int node = sol.data[r][j] + 1;
|
||||
float travel = d_dist[prev * stride + node];
|
||||
time += travel;
|
||||
// 早到需等待
|
||||
if (time < d_earliest[node])
|
||||
time = d_earliest[node];
|
||||
// 迟到产生惩罚
|
||||
if (time > d_latest[node])
|
||||
penalty += (time - d_latest[node]) * 50.0f;
|
||||
time += d_service[node];
|
||||
prev = node;
|
||||
}
|
||||
// 返回 depot 的时间窗
|
||||
float return_time = time + d_dist[prev * stride + 0];
|
||||
if (return_time > d_latest[0])
|
||||
penalty += (return_time - d_latest[0]) * 50.0f;
|
||||
}
|
||||
if (active > max_vehicles)
|
||||
penalty += (float)(active - max_vehicles) * 1000.0f;
|
||||
return penalty;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
cfg.dim1 = num_vehicles;
|
||||
cfg.dim2_default = 0;
|
||||
fill_obj_config(cfg);
|
||||
cfg.cross_row_prob = 0.3f;
|
||||
cfg.row_mode = RowMode::Partition;
|
||||
cfg.total_elements = n;
|
||||
return cfg;
|
||||
}
|
||||
|
||||
static constexpr size_t SMEM_LIMIT = 48 * 1024;
|
||||
|
||||
size_t shared_mem_bytes() const {
|
||||
size_t dist_bytes = (size_t)stride * stride * sizeof(float);
|
||||
size_t aux_bytes = (size_t)(n + 1) * 4 * sizeof(float); // demand(n) + earliest/latest/service(n+1 each)
|
||||
size_t total = dist_bytes + aux_bytes;
|
||||
return total <= SMEM_LIMIT ? total : 0;
|
||||
}
|
||||
|
||||
size_t working_set_bytes() const {
|
||||
return (size_t)stride * stride * sizeof(float) + (size_t)(n + 1) * 4 * sizeof(float);
|
||||
}
|
||||
|
||||
__device__ void load_shared(char* smem, int tid, int bsz) {
|
||||
float* sd = reinterpret_cast<float*>(smem);
|
||||
int dist_size = stride * stride;
|
||||
for (int i = tid; i < dist_size; i += bsz) sd[i] = d_dist[i];
|
||||
d_dist = sd;
|
||||
|
||||
float* sdem = sd + dist_size;
|
||||
for (int i = tid; i < n; i += bsz) sdem[i] = d_demand[i];
|
||||
d_demand = sdem;
|
||||
|
||||
float* se = sdem + n;
|
||||
int nn = n + 1;
|
||||
for (int i = tid; i < nn; i += bsz) se[i] = d_earliest[i];
|
||||
d_earliest = se;
|
||||
|
||||
float* sl = se + nn;
|
||||
for (int i = tid; i < nn; i += bsz) sl[i] = d_latest[i];
|
||||
d_latest = sl;
|
||||
|
||||
float* ss = sl + nn;
|
||||
for (int i = tid; i < nn; i += bsz) ss[i] = d_service[i];
|
||||
d_service = ss;
|
||||
}
|
||||
|
||||
static VRPTWProblem create(const float* h_dist, const float* h_demand,
|
||||
const float* h_earliest, const float* h_latest,
|
||||
const float* h_service,
|
||||
int n, float capacity,
|
||||
int num_vehicles, int max_vehicles) {
|
||||
VRPTWProblem prob;
|
||||
prob.n = n;
|
||||
prob.stride = n + 1;
|
||||
prob.capacity = capacity;
|
||||
prob.num_vehicles = num_vehicles;
|
||||
prob.max_vehicles = max_vehicles;
|
||||
|
||||
int nn = n + 1;
|
||||
float *dd, *ddem, *de, *dl, *ds;
|
||||
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * nn * nn));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist, sizeof(float) * nn * nn, cudaMemcpyHostToDevice));
|
||||
prob.d_dist = dd;
|
||||
|
||||
CUDA_CHECK(cudaMalloc(&ddem, sizeof(float) * n));
|
||||
CUDA_CHECK(cudaMemcpy(ddem, h_demand, sizeof(float) * n, cudaMemcpyHostToDevice));
|
||||
prob.d_demand = ddem;
|
||||
|
||||
CUDA_CHECK(cudaMalloc(&de, sizeof(float) * nn));
|
||||
CUDA_CHECK(cudaMemcpy(de, h_earliest, sizeof(float) * nn, cudaMemcpyHostToDevice));
|
||||
prob.d_earliest = de;
|
||||
|
||||
CUDA_CHECK(cudaMalloc(&dl, sizeof(float) * nn));
|
||||
CUDA_CHECK(cudaMemcpy(dl, h_latest, sizeof(float) * nn, cudaMemcpyHostToDevice));
|
||||
prob.d_latest = dl;
|
||||
|
||||
CUDA_CHECK(cudaMalloc(&ds, sizeof(float) * nn));
|
||||
CUDA_CHECK(cudaMemcpy(ds, h_service, sizeof(float) * nn, cudaMemcpyHostToDevice));
|
||||
prob.d_service = ds;
|
||||
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_dist) { cudaFree(const_cast<float*>(d_dist)); d_dist = nullptr; }
|
||||
if (d_demand) { cudaFree(const_cast<float*>(d_demand)); d_demand = nullptr; }
|
||||
if (d_earliest) { cudaFree(const_cast<float*>(d_earliest)); d_earliest = nullptr; }
|
||||
if (d_latest) { cudaFree(const_cast<float*>(d_latest)); d_latest = nullptr; }
|
||||
if (d_service) { cudaFree(const_cast<float*>(d_service)); d_service = nullptr; }
|
||||
}
|
||||
};
|
||||
Loading…
Add table
Add a link
Reference in a new issue