mirror of
https://github.com/L-yang-yang/cugenopt.git
synced 2026-06-28 22:09:39 +02:00
Initial commit: cuGenOpt GPU optimization solver
This commit is contained in:
commit
fc5a0ff4af
117 changed files with 25545 additions and 0 deletions
130
benchmark/experiments/e11_ultra_large/medium_vrp.cuh
Normal file
130
benchmark/experiments/e11_ultra_large/medium_vrp.cuh
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
#include "operators.cuh"
|
||||
|
||||
// 测试中等规模 VRP(最多 512 个客户,24 辆车)
|
||||
struct MediumVRPProblem : ProblemBase<MediumVRPProblem, 24, 512> {
|
||||
const float* d_dist;
|
||||
const float* d_demand;
|
||||
const float* h_dist;
|
||||
const float* h_demand;
|
||||
int n;
|
||||
float capacity;
|
||||
int num_vehicles;
|
||||
int max_vehicles;
|
||||
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}
|
||||
};
|
||||
|
||||
__device__ float compute_obj(int obj_idx, const Sol& s) const {
|
||||
float total = 0;
|
||||
for (int v = 0; v < num_vehicles; v++) {
|
||||
int route_len = s.dim2_sizes[v];
|
||||
if (route_len == 0) continue;
|
||||
|
||||
int first_node = s.data[v][0] + 1;
|
||||
total += d_dist[0 * (n+1) + first_node];
|
||||
|
||||
int prev = first_node;
|
||||
for (int i = 1; i < route_len; i++) {
|
||||
int node = s.data[v][i] + 1;
|
||||
total += d_dist[prev * (n+1) + node];
|
||||
prev = node;
|
||||
}
|
||||
|
||||
total += d_dist[prev * (n+1) + 0];
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& s) const {
|
||||
float penalty = 0;
|
||||
for (int v = 0; v < num_vehicles; v++) {
|
||||
float load = 0;
|
||||
for (int i = 0; i < s.dim2_sizes[v]; i++) {
|
||||
load += d_demand[s.data[v][i]];
|
||||
}
|
||||
if (load > capacity) {
|
||||
penalty += (load - capacity) * 100.0f;
|
||||
}
|
||||
}
|
||||
return penalty;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
cfg.dim1 = num_vehicles;
|
||||
cfg.dim2_default = 0;
|
||||
fill_obj_config(cfg);
|
||||
cfg.cross_row_prob = 0.3f;
|
||||
cfg.row_mode = RowMode::Partition;
|
||||
cfg.total_elements = n;
|
||||
return cfg;
|
||||
}
|
||||
|
||||
size_t working_set_bytes() const {
|
||||
return (size_t)(n + 1) * (n + 1) * sizeof(float) + (size_t)n * sizeof(float);
|
||||
}
|
||||
|
||||
static MediumVRPProblem create(const float* h_dist_matrix, const float* h_demand_array,
|
||||
int num_customers, float vehicle_capacity,
|
||||
int num_veh, int max_veh) {
|
||||
MediumVRPProblem prob;
|
||||
prob.n = num_customers;
|
||||
prob.capacity = vehicle_capacity;
|
||||
prob.num_vehicles = num_veh;
|
||||
prob.max_vehicles = max_veh;
|
||||
prob.h_dist = h_dist_matrix;
|
||||
prob.h_demand = h_demand_array;
|
||||
|
||||
size_t dist_size = (size_t)(num_customers + 1) * (num_customers + 1) * sizeof(float);
|
||||
size_t demand_size = (size_t)num_customers * sizeof(float);
|
||||
|
||||
CUDA_CHECK(cudaMalloc(&prob.d_dist, dist_size));
|
||||
CUDA_CHECK(cudaMalloc(&prob.d_demand, demand_size));
|
||||
CUDA_CHECK(cudaMemcpy((void*)prob.d_dist, h_dist_matrix, dist_size, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy((void*)prob.d_demand, h_demand_array, demand_size, cudaMemcpyHostToDevice));
|
||||
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_dist) cudaFree((void*)d_dist);
|
||||
if (d_demand) cudaFree((void*)d_demand);
|
||||
d_dist = nullptr;
|
||||
d_demand = nullptr;
|
||||
}
|
||||
|
||||
MediumVRPProblem* clone_to_device(int target_gpu) const {
|
||||
int orig_device;
|
||||
CUDA_CHECK(cudaGetDevice(&orig_device));
|
||||
CUDA_CHECK(cudaSetDevice(target_gpu));
|
||||
|
||||
float* dd;
|
||||
float* ddem;
|
||||
size_t dist_size = (size_t)(n + 1) * (n + 1) * sizeof(float);
|
||||
size_t demand_size = (size_t)n * sizeof(float);
|
||||
|
||||
CUDA_CHECK(cudaMalloc(&dd, dist_size));
|
||||
CUDA_CHECK(cudaMalloc(&ddem, demand_size));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist, dist_size, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy(ddem, h_demand, demand_size, cudaMemcpyHostToDevice));
|
||||
|
||||
CUDA_CHECK(cudaSetDevice(orig_device));
|
||||
|
||||
MediumVRPProblem* new_prob = new MediumVRPProblem();
|
||||
new_prob->n = n;
|
||||
new_prob->capacity = capacity;
|
||||
new_prob->num_vehicles = num_vehicles;
|
||||
new_prob->max_vehicles = max_vehicles;
|
||||
new_prob->h_dist = h_dist;
|
||||
new_prob->h_demand = h_demand;
|
||||
new_prob->d_dist = dd;
|
||||
new_prob->d_demand = ddem;
|
||||
|
||||
return new_prob;
|
||||
}
|
||||
};
|
||||
132
benchmark/experiments/e11_ultra_large/optimized_vrp.cuh
Normal file
132
benchmark/experiments/e11_ultra_large/optimized_vrp.cuh
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
#include "operators.cuh"
|
||||
|
||||
// 优化的大规模 VRP(最多 500 个客户,80 辆车)
|
||||
// D1=32 支持最多 32 辆车,D2=256 每车最多 256 个客户
|
||||
// Solution 大小 = 32 KB(优化后)
|
||||
struct OptimizedVRPProblem : ProblemBase<OptimizedVRPProblem, 32, 256> {
|
||||
const float* d_dist;
|
||||
const float* d_demand;
|
||||
const float* h_dist;
|
||||
const float* h_demand;
|
||||
int n;
|
||||
float capacity;
|
||||
int num_vehicles;
|
||||
int max_vehicles;
|
||||
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}
|
||||
};
|
||||
|
||||
__device__ float compute_obj(int obj_idx, const Sol& s) const {
|
||||
float total = 0;
|
||||
for (int v = 0; v < num_vehicles; v++) {
|
||||
int route_len = s.dim2_sizes[v];
|
||||
if (route_len == 0) continue;
|
||||
|
||||
int first_node = s.data[v][0] + 1;
|
||||
total += d_dist[0 * (n+1) + first_node];
|
||||
|
||||
int prev = first_node;
|
||||
for (int i = 1; i < route_len; i++) {
|
||||
int node = s.data[v][i] + 1;
|
||||
total += d_dist[prev * (n+1) + node];
|
||||
prev = node;
|
||||
}
|
||||
|
||||
total += d_dist[prev * (n+1) + 0];
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& s) const {
|
||||
float penalty = 0;
|
||||
for (int v = 0; v < num_vehicles; v++) {
|
||||
float load = 0;
|
||||
for (int i = 0; i < s.dim2_sizes[v]; i++) {
|
||||
load += d_demand[s.data[v][i]];
|
||||
}
|
||||
if (load > capacity) {
|
||||
penalty += (load - capacity) * 100.0f;
|
||||
}
|
||||
}
|
||||
return penalty;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
cfg.dim1 = num_vehicles;
|
||||
cfg.dim2_default = 0;
|
||||
fill_obj_config(cfg);
|
||||
cfg.cross_row_prob = 0.3f;
|
||||
cfg.row_mode = RowMode::Partition;
|
||||
cfg.total_elements = n;
|
||||
return cfg;
|
||||
}
|
||||
|
||||
size_t working_set_bytes() const {
|
||||
return (size_t)(n + 1) * (n + 1) * sizeof(float) + (size_t)n * sizeof(float);
|
||||
}
|
||||
|
||||
static OptimizedVRPProblem create(const float* h_dist_matrix, const float* h_demand_array,
|
||||
int num_customers, float vehicle_capacity,
|
||||
int num_veh, int max_veh) {
|
||||
OptimizedVRPProblem prob;
|
||||
prob.n = num_customers;
|
||||
prob.capacity = vehicle_capacity;
|
||||
prob.num_vehicles = num_veh;
|
||||
prob.max_vehicles = max_veh;
|
||||
prob.h_dist = h_dist_matrix;
|
||||
prob.h_demand = h_demand_array;
|
||||
|
||||
size_t dist_size = (size_t)(num_customers + 1) * (num_customers + 1) * sizeof(float);
|
||||
size_t demand_size = (size_t)num_customers * sizeof(float);
|
||||
|
||||
CUDA_CHECK(cudaMalloc(&prob.d_dist, dist_size));
|
||||
CUDA_CHECK(cudaMalloc(&prob.d_demand, demand_size));
|
||||
CUDA_CHECK(cudaMemcpy((void*)prob.d_dist, h_dist_matrix, dist_size, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy((void*)prob.d_demand, h_demand_array, demand_size, cudaMemcpyHostToDevice));
|
||||
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_dist) cudaFree((void*)d_dist);
|
||||
if (d_demand) cudaFree((void*)d_demand);
|
||||
d_dist = nullptr;
|
||||
d_demand = nullptr;
|
||||
}
|
||||
|
||||
OptimizedVRPProblem* clone_to_device(int target_gpu) const {
|
||||
int orig_device;
|
||||
CUDA_CHECK(cudaGetDevice(&orig_device));
|
||||
CUDA_CHECK(cudaSetDevice(target_gpu));
|
||||
|
||||
float* dd;
|
||||
float* ddem;
|
||||
size_t dist_size = (size_t)(n + 1) * (n + 1) * sizeof(float);
|
||||
size_t demand_size = (size_t)n * sizeof(float);
|
||||
|
||||
CUDA_CHECK(cudaMalloc(&dd, dist_size));
|
||||
CUDA_CHECK(cudaMalloc(&ddem, demand_size));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist, dist_size, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy(ddem, h_demand, demand_size, cudaMemcpyHostToDevice));
|
||||
|
||||
CUDA_CHECK(cudaSetDevice(orig_device));
|
||||
|
||||
OptimizedVRPProblem* new_prob = new OptimizedVRPProblem();
|
||||
new_prob->n = n;
|
||||
new_prob->capacity = capacity;
|
||||
new_prob->num_vehicles = num_vehicles;
|
||||
new_prob->max_vehicles = max_vehicles;
|
||||
new_prob->h_dist = h_dist;
|
||||
new_prob->h_demand = h_demand;
|
||||
new_prob->d_dist = dd;
|
||||
new_prob->d_demand = ddem;
|
||||
|
||||
return new_prob;
|
||||
}
|
||||
};
|
||||
132
benchmark/experiments/e11_ultra_large/optimized_vrp_v2.cuh
Normal file
132
benchmark/experiments/e11_ultra_large/optimized_vrp_v2.cuh
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
#include "operators.cuh"
|
||||
|
||||
// 优化的大规模 VRP(最多 500 个客户,80 辆车)
|
||||
// D1=80 支持 80 辆车,D2=128 每车最多 128 个客户
|
||||
// Solution 大小 = 80×128×4 = 40 KB
|
||||
struct OptimizedVRPv2Problem : ProblemBase<OptimizedVRPv2Problem, 80, 128> {
|
||||
const float* d_dist;
|
||||
const float* d_demand;
|
||||
const float* h_dist;
|
||||
const float* h_demand;
|
||||
int n;
|
||||
float capacity;
|
||||
int num_vehicles;
|
||||
int max_vehicles;
|
||||
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}
|
||||
};
|
||||
|
||||
__device__ float compute_obj(int obj_idx, const Sol& s) const {
|
||||
float total = 0;
|
||||
for (int v = 0; v < num_vehicles; v++) {
|
||||
int route_len = s.dim2_sizes[v];
|
||||
if (route_len == 0) continue;
|
||||
|
||||
int first_node = s.data[v][0] + 1;
|
||||
total += d_dist[0 * (n+1) + first_node];
|
||||
|
||||
int prev = first_node;
|
||||
for (int i = 1; i < route_len; i++) {
|
||||
int node = s.data[v][i] + 1;
|
||||
total += d_dist[prev * (n+1) + node];
|
||||
prev = node;
|
||||
}
|
||||
|
||||
total += d_dist[prev * (n+1) + 0];
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& s) const {
|
||||
float penalty = 0;
|
||||
for (int v = 0; v < num_vehicles; v++) {
|
||||
float load = 0;
|
||||
for (int i = 0; i < s.dim2_sizes[v]; i++) {
|
||||
load += d_demand[s.data[v][i]];
|
||||
}
|
||||
if (load > capacity) {
|
||||
penalty += (load - capacity) * 100.0f;
|
||||
}
|
||||
}
|
||||
return penalty;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
cfg.dim1 = num_vehicles;
|
||||
cfg.dim2_default = 0;
|
||||
fill_obj_config(cfg);
|
||||
cfg.cross_row_prob = 0.3f;
|
||||
cfg.row_mode = RowMode::Partition;
|
||||
cfg.total_elements = n;
|
||||
return cfg;
|
||||
}
|
||||
|
||||
size_t working_set_bytes() const {
|
||||
return (size_t)(n + 1) * (n + 1) * sizeof(float) + (size_t)n * sizeof(float);
|
||||
}
|
||||
|
||||
static OptimizedVRPv2Problem create(const float* h_dist_matrix, const float* h_demand_array,
|
||||
int num_customers, float vehicle_capacity,
|
||||
int num_veh, int max_veh) {
|
||||
OptimizedVRPv2Problem prob;
|
||||
prob.n = num_customers;
|
||||
prob.capacity = vehicle_capacity;
|
||||
prob.num_vehicles = num_veh;
|
||||
prob.max_vehicles = max_veh;
|
||||
prob.h_dist = h_dist_matrix;
|
||||
prob.h_demand = h_demand_array;
|
||||
|
||||
size_t dist_size = (size_t)(num_customers + 1) * (num_customers + 1) * sizeof(float);
|
||||
size_t demand_size = (size_t)num_customers * sizeof(float);
|
||||
|
||||
CUDA_CHECK(cudaMalloc(&prob.d_dist, dist_size));
|
||||
CUDA_CHECK(cudaMalloc(&prob.d_demand, demand_size));
|
||||
CUDA_CHECK(cudaMemcpy((void*)prob.d_dist, h_dist_matrix, dist_size, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy((void*)prob.d_demand, h_demand_array, demand_size, cudaMemcpyHostToDevice));
|
||||
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_dist) cudaFree((void*)d_dist);
|
||||
if (d_demand) cudaFree((void*)d_demand);
|
||||
d_dist = nullptr;
|
||||
d_demand = nullptr;
|
||||
}
|
||||
|
||||
OptimizedVRPv2Problem* clone_to_device(int target_gpu) const {
|
||||
int orig_device;
|
||||
CUDA_CHECK(cudaGetDevice(&orig_device));
|
||||
CUDA_CHECK(cudaSetDevice(target_gpu));
|
||||
|
||||
float* dd;
|
||||
float* ddem;
|
||||
size_t dist_size = (size_t)(n + 1) * (n + 1) * sizeof(float);
|
||||
size_t demand_size = (size_t)n * sizeof(float);
|
||||
|
||||
CUDA_CHECK(cudaMalloc(&dd, dist_size));
|
||||
CUDA_CHECK(cudaMalloc(&ddem, demand_size));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist, dist_size, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy(ddem, h_demand, demand_size, cudaMemcpyHostToDevice));
|
||||
|
||||
CUDA_CHECK(cudaSetDevice(orig_device));
|
||||
|
||||
OptimizedVRPv2Problem* new_prob = new OptimizedVRPv2Problem();
|
||||
new_prob->n = n;
|
||||
new_prob->capacity = capacity;
|
||||
new_prob->num_vehicles = num_vehicles;
|
||||
new_prob->max_vehicles = max_vehicles;
|
||||
new_prob->h_dist = h_dist;
|
||||
new_prob->h_demand = h_demand;
|
||||
new_prob->d_dist = dd;
|
||||
new_prob->d_demand = ddem;
|
||||
|
||||
return new_prob;
|
||||
}
|
||||
};
|
||||
120
benchmark/experiments/e11_ultra_large/test_e11.cu
Normal file
120
benchmark/experiments/e11_ultra_large/test_e11.cu
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
#include "solver.cuh"
|
||||
#include "multi_gpu_solver.cuh"
|
||||
#include "ultra_large_tsp.cuh"
|
||||
#include "ultra_large_vrp.cuh"
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
#include <ctime>
|
||||
|
||||
void generate_random_tsp(float* dist, int n, unsigned seed) {
|
||||
srand(seed);
|
||||
for (int i = 0; i < n; i++) {
|
||||
dist[i * n + i] = 0.0f;
|
||||
for (int j = i + 1; j < n; j++) {
|
||||
float d = 10.0f + (rand() % 10000) / 10.0f;
|
||||
dist[i * n + j] = d;
|
||||
dist[j * n + i] = d;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void generate_random_vrp(float* dist, float* demand, int n, unsigned seed) {
|
||||
srand(seed);
|
||||
int stride = n + 1;
|
||||
for (int i = 0; i < stride; i++) {
|
||||
dist[i * stride + i] = 0.0f;
|
||||
for (int j = i + 1; j < stride; j++) {
|
||||
float d = 10.0f + (rand() % 10000) / 10.0f;
|
||||
dist[i * stride + j] = d;
|
||||
dist[j * stride + i] = d;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < n; i++) {
|
||||
demand[i] = 5.0f + (rand() % 20);
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
printf("==============================================\n");
|
||||
printf("E11: 超大规模实验 (n=1000)\n");
|
||||
printf("==============================================\n\n");
|
||||
|
||||
int num_gpus;
|
||||
cudaGetDeviceCount(&num_gpus);
|
||||
printf("检测到 %d 个 GPU\n\n", num_gpus);
|
||||
|
||||
// ========== TSP n=1000 ==========
|
||||
printf("[TSP n=1000]\n");
|
||||
printf("分配内存...\n");
|
||||
|
||||
int n_tsp = 1000;
|
||||
float* h_dist_tsp = new float[n_tsp * n_tsp];
|
||||
printf("生成数据...\n");
|
||||
generate_random_tsp(h_dist_tsp, n_tsp, 12345);
|
||||
|
||||
printf("创建 Problem...\n");
|
||||
auto prob_tsp = UltraLargeTSPProblem::create(h_dist_tsp, n_tsp);
|
||||
|
||||
SolverConfig cfg;
|
||||
cfg.pop_size = 0;
|
||||
cfg.max_gen = 1000; // 先测 1000 代
|
||||
cfg.verbose = true;
|
||||
cfg.num_islands = 16;
|
||||
cfg.use_aos = true;
|
||||
cfg.sa_temp_init = 50.0f;
|
||||
cfg.use_cuda_graph = true;
|
||||
cfg.seed = 42;
|
||||
|
||||
printf("\n开始求解(单GPU,1000代)...\n");
|
||||
time_t start = time(nullptr);
|
||||
auto result_tsp = solve(prob_tsp, cfg);
|
||||
time_t end = time(nullptr);
|
||||
|
||||
printf("\n结果: %.2f\n", result_tsp.best_solution.objectives[0]);
|
||||
printf("耗时: %ld 秒\n", end - start);
|
||||
printf("预估 5000 代耗时: ~%ld 秒 (%.1f 分钟)\n",
|
||||
(end - start) * 5, (end - start) * 5.0 / 60.0);
|
||||
|
||||
prob_tsp.destroy();
|
||||
delete[] h_dist_tsp;
|
||||
|
||||
printf("\n");
|
||||
|
||||
// ========== VRP n=500 (先测小一点) ==========
|
||||
printf("[VRP n=500, vehicles=25]\n");
|
||||
printf("分配内存...\n");
|
||||
|
||||
int n_vrp = 500;
|
||||
int num_veh = 25;
|
||||
float* h_dist_vrp = new float[(n_vrp+1) * (n_vrp+1)];
|
||||
float* h_demand_vrp = new float[n_vrp];
|
||||
|
||||
printf("生成数据...\n");
|
||||
generate_random_vrp(h_dist_vrp, h_demand_vrp, n_vrp, 12345);
|
||||
|
||||
printf("创建 Problem...\n");
|
||||
auto prob_vrp = UltraLargeVRPProblem::create(h_dist_vrp, h_demand_vrp, n_vrp, 100.0f, num_veh, num_veh);
|
||||
|
||||
cfg.seed = 42;
|
||||
cfg.max_gen = 1000;
|
||||
|
||||
printf("\n开始求解(单GPU,1000代)...\n");
|
||||
start = time(nullptr);
|
||||
auto result_vrp = solve(prob_vrp, cfg);
|
||||
end = time(nullptr);
|
||||
|
||||
printf("\n结果: %.2f\n", result_vrp.best_solution.objectives[0]);
|
||||
printf("耗时: %ld 秒\n", end - start);
|
||||
printf("预估 5000 代耗时: ~%ld 秒 (%.1f 分钟)\n",
|
||||
(end - start) * 5, (end - start) * 5.0 / 60.0);
|
||||
|
||||
prob_vrp.destroy();
|
||||
delete[] h_dist_vrp;
|
||||
delete[] h_demand_vrp;
|
||||
|
||||
printf("\n==============================================\n");
|
||||
printf("E11 快速验证完成\n");
|
||||
printf("==============================================\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
82
benchmark/experiments/e11_ultra_large/ultra_large_tsp.cuh
Normal file
82
benchmark/experiments/e11_ultra_large/ultra_large_tsp.cuh
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
#include "operators.cuh"
|
||||
|
||||
// 支持超大规模 TSP(最多 1024 个城市)
|
||||
struct UltraLargeTSPProblem : ProblemBase<UltraLargeTSPProblem, 1, 1024> {
|
||||
const float* d_dist;
|
||||
const float* h_dist;
|
||||
int n;
|
||||
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}
|
||||
};
|
||||
|
||||
__device__ float compute_obj(int obj_idx, const Sol& s) const {
|
||||
float total = 0;
|
||||
for (int i = 0; i < n - 1; i++) {
|
||||
int from = s.data[0][i];
|
||||
int to = s.data[0][i + 1];
|
||||
total += d_dist[from * n + to];
|
||||
}
|
||||
total += d_dist[s.data[0][n - 1] * n + s.data[0][0]];
|
||||
return total;
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& s) const {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
cfg.dim1 = 1;
|
||||
cfg.dim2_default = n;
|
||||
fill_obj_config(cfg);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
size_t working_set_bytes() const {
|
||||
return (size_t)n * n * sizeof(float);
|
||||
}
|
||||
|
||||
static UltraLargeTSPProblem create(const float* h_dist_matrix, int num_cities) {
|
||||
UltraLargeTSPProblem prob;
|
||||
prob.n = num_cities;
|
||||
prob.h_dist = h_dist_matrix;
|
||||
|
||||
size_t dist_size = (size_t)num_cities * num_cities * sizeof(float);
|
||||
CUDA_CHECK(cudaMalloc(&prob.d_dist, dist_size));
|
||||
CUDA_CHECK(cudaMemcpy((void*)prob.d_dist, h_dist_matrix, dist_size, cudaMemcpyHostToDevice));
|
||||
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_dist) {
|
||||
cudaFree((void*)d_dist);
|
||||
d_dist = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
UltraLargeTSPProblem* clone_to_device(int target_gpu) const {
|
||||
int orig_device;
|
||||
CUDA_CHECK(cudaGetDevice(&orig_device));
|
||||
CUDA_CHECK(cudaSetDevice(target_gpu));
|
||||
|
||||
float* dd;
|
||||
size_t dist_size = (size_t)n * n * sizeof(float);
|
||||
CUDA_CHECK(cudaMalloc(&dd, dist_size));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist, dist_size, cudaMemcpyHostToDevice));
|
||||
|
||||
CUDA_CHECK(cudaSetDevice(orig_device));
|
||||
|
||||
UltraLargeTSPProblem* new_prob = new UltraLargeTSPProblem();
|
||||
new_prob->n = n;
|
||||
new_prob->h_dist = h_dist;
|
||||
new_prob->d_dist = dd;
|
||||
|
||||
return new_prob;
|
||||
}
|
||||
};
|
||||
130
benchmark/experiments/e11_ultra_large/ultra_large_vrp.cuh
Normal file
130
benchmark/experiments/e11_ultra_large/ultra_large_vrp.cuh
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
#pragma once
|
||||
#include "types.cuh"
|
||||
#include "cuda_utils.cuh"
|
||||
#include "operators.cuh"
|
||||
|
||||
// 支持超大规模 VRP(最多 1024 个客户,32 辆车)
|
||||
struct UltraLargeVRPProblem : ProblemBase<UltraLargeVRPProblem, 32, 1024> {
|
||||
const float* d_dist;
|
||||
const float* d_demand;
|
||||
const float* h_dist;
|
||||
const float* h_demand;
|
||||
int n;
|
||||
float capacity;
|
||||
int num_vehicles;
|
||||
int max_vehicles;
|
||||
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}
|
||||
};
|
||||
|
||||
__device__ float compute_obj(int obj_idx, const Sol& s) const {
|
||||
float total = 0;
|
||||
for (int v = 0; v < num_vehicles; v++) {
|
||||
int route_len = s.dim2_sizes[v];
|
||||
if (route_len == 0) continue;
|
||||
|
||||
int first_node = s.data[v][0] + 1;
|
||||
total += d_dist[0 * (n+1) + first_node];
|
||||
|
||||
int prev = first_node;
|
||||
for (int i = 1; i < route_len; i++) {
|
||||
int node = s.data[v][i] + 1;
|
||||
total += d_dist[prev * (n+1) + node];
|
||||
prev = node;
|
||||
}
|
||||
|
||||
total += d_dist[prev * (n+1) + 0];
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& s) const {
|
||||
float penalty = 0;
|
||||
for (int v = 0; v < num_vehicles; v++) {
|
||||
float load = 0;
|
||||
for (int i = 0; i < s.dim2_sizes[v]; i++) {
|
||||
load += d_demand[s.data[v][i]];
|
||||
}
|
||||
if (load > capacity) {
|
||||
penalty += (load - capacity) * 100.0f;
|
||||
}
|
||||
}
|
||||
return penalty;
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
cfg.dim1 = num_vehicles;
|
||||
cfg.dim2_default = 0;
|
||||
fill_obj_config(cfg);
|
||||
cfg.cross_row_prob = 0.3f;
|
||||
cfg.row_mode = RowMode::Partition;
|
||||
cfg.total_elements = n;
|
||||
return cfg;
|
||||
}
|
||||
|
||||
size_t working_set_bytes() const {
|
||||
return (size_t)(n + 1) * (n + 1) * sizeof(float) + (size_t)n * sizeof(float);
|
||||
}
|
||||
|
||||
static UltraLargeVRPProblem create(const float* h_dist_matrix, const float* h_demand_array,
|
||||
int num_customers, float vehicle_capacity,
|
||||
int num_veh, int max_veh) {
|
||||
UltraLargeVRPProblem prob;
|
||||
prob.n = num_customers;
|
||||
prob.capacity = vehicle_capacity;
|
||||
prob.num_vehicles = num_veh;
|
||||
prob.max_vehicles = max_veh;
|
||||
prob.h_dist = h_dist_matrix;
|
||||
prob.h_demand = h_demand_array;
|
||||
|
||||
size_t dist_size = (size_t)(num_customers + 1) * (num_customers + 1) * sizeof(float);
|
||||
size_t demand_size = (size_t)num_customers * sizeof(float);
|
||||
|
||||
CUDA_CHECK(cudaMalloc(&prob.d_dist, dist_size));
|
||||
CUDA_CHECK(cudaMalloc(&prob.d_demand, demand_size));
|
||||
CUDA_CHECK(cudaMemcpy((void*)prob.d_dist, h_dist_matrix, dist_size, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy((void*)prob.d_demand, h_demand_array, demand_size, cudaMemcpyHostToDevice));
|
||||
|
||||
return prob;
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
if (d_dist) cudaFree((void*)d_dist);
|
||||
if (d_demand) cudaFree((void*)d_demand);
|
||||
d_dist = nullptr;
|
||||
d_demand = nullptr;
|
||||
}
|
||||
|
||||
UltraLargeVRPProblem* clone_to_device(int target_gpu) const {
|
||||
int orig_device;
|
||||
CUDA_CHECK(cudaGetDevice(&orig_device));
|
||||
CUDA_CHECK(cudaSetDevice(target_gpu));
|
||||
|
||||
float* dd;
|
||||
float* ddem;
|
||||
size_t dist_size = (size_t)(n + 1) * (n + 1) * sizeof(float);
|
||||
size_t demand_size = (size_t)n * sizeof(float);
|
||||
|
||||
CUDA_CHECK(cudaMalloc(&dd, dist_size));
|
||||
CUDA_CHECK(cudaMalloc(&ddem, demand_size));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist, dist_size, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy(ddem, h_demand, demand_size, cudaMemcpyHostToDevice));
|
||||
|
||||
CUDA_CHECK(cudaSetDevice(orig_device));
|
||||
|
||||
UltraLargeVRPProblem* new_prob = new UltraLargeVRPProblem();
|
||||
new_prob->n = n;
|
||||
new_prob->capacity = capacity;
|
||||
new_prob->num_vehicles = num_vehicles;
|
||||
new_prob->max_vehicles = max_vehicles;
|
||||
new_prob->h_dist = h_dist;
|
||||
new_prob->h_demand = h_demand;
|
||||
new_prob->d_dist = dd;
|
||||
new_prob->d_demand = ddem;
|
||||
|
||||
return new_prob;
|
||||
}
|
||||
};
|
||||
Loading…
Add table
Add a link
Reference in a new issue