mirror of
https://github.com/L-yang-yang/cugenopt.git
synced 2026-04-24 12:06:22 +02:00
fix: harden CUDA safety checks and translate comments to English
Safety fixes (4 critical, 4 warning) from code review: - qap.cuh: fix clone_to_device cross-device D2H by retaining host matrices - types.cuh: add CUDA_CHECK to InjectBuffer, track owner_gpu for safe destroy - types.cuh: add bounds check on lexicographic priority index - solver.cuh: cap migrate_kernel islands to MAX_ISLANDS=64 to prevent stack overflow - multi_gpu_solver.cuh: guard against 0 GPUs, propagate stop_reason from best GPU - types.cuh: warn on SeqRegistry overflow - solver.cuh: warn when constraint_directed/phased_search disabled without AOS Translate all Chinese comments to English across 25+ source files (core/*.cuh, problems/*.cuh, Makefile, multi-GPU tests). Verified on V100S×2 (sm_70, CUDA 12.8): e5 (12 problem types, all optimal), e13 (multi-objective + multi-GPU, 9 configs, all passed).
This commit is contained in:
parent
ab278d0e82
commit
a848730459
25 changed files with 1147 additions and 1167 deletions
37
README.md
37
README.md
|
|
@ -6,7 +6,7 @@
|
|||
[](https://developer.nvidia.com/cuda-toolkit)
|
||||
[](https://www.python.org/)
|
||||
|
||||
**Paper**: [cuGenOpt: A GPU-Accelerated General-Purpose Metaheuristic Framework for Combinatorial Optimization](http://arxiv.org/abs/2603.19163)
|
||||
**Paper**: [cuGenOpt: A GPU-Accelerated General-Purpose Metaheuristic Framework for Combinatorial Optimization](https://arxiv.org/abs/2603.19163)
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -114,28 +114,7 @@ Define your own problem by inheriting `ProblemBase` and implementing `compute_ob
|
|||
└─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
generic_solver/
|
||||
├── prototype/ # Core framework (header-only .cuh files)
|
||||
│ ├── core/ # Solver, operators, population, types
|
||||
│ └── problems/ # 12+ problem implementations
|
||||
├── python/ # Python wrapper (pip install cugenopt)
|
||||
│ ├── cugenopt/ # Python package (built-ins + JIT compiler)
|
||||
│ └── tests/ # Test suite
|
||||
├── benchmark/ # Experiments and benchmarks
|
||||
│ ├── experiments/ # E0-E13: 14 experiment groups
|
||||
│ ├── data/ # Standard instances (TSPLIB, Solomon, QAPLIB)
|
||||
│ └── results/ # Experimental reports
|
||||
├── paper_v3_en/ # Paper source (LaTeX)
|
||||
├── STATUS.md # Project status and roadmap
|
||||
└── README.md # This file
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Highlights
|
||||
|
||||
|
|
@ -186,8 +165,7 @@ generic_solver/
|
|||
## Installation
|
||||
|
||||
### Python Package
|
||||
|
||||
coming soon~
|
||||
come soon
|
||||
```bash
|
||||
pip install cugenopt
|
||||
```
|
||||
|
|
@ -207,18 +185,7 @@ cd prototype
|
|||
make all
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
|
||||
| Document | Description |
|
||||
|----------|-------------|
|
||||
| [STATUS.md](STATUS.md) | Project status, roadmap, and design decisions |
|
||||
| [Python API Guide](python/README.md) | Detailed Python API documentation |
|
||||
| [Benchmark Design](benchmark/DESIGN.md) | Experimental methodology |
|
||||
| [Paper](paper_v3_en/) | Full technical details and evaluation |
|
||||
|
||||
---
|
||||
|
||||
## Citation
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
# GenSolver Makefile
|
||||
#
|
||||
# 用法:
|
||||
# make e1 e2 e3 e4 e5 e6 → 编译单个实验
|
||||
# make diag → 编译诊断程序
|
||||
# make all → 编译全部
|
||||
# make clean → 清理
|
||||
# Usage:
|
||||
# make e1 e2 e3 e4 e5 e6 → Build individual experiments
|
||||
# make diag → Build diagnostic program
|
||||
# make all → Build all
|
||||
# make clean → Clean
|
||||
|
||||
NVCC = nvcc
|
||||
ARCH ?= -arch=sm_75
|
||||
|
|
@ -40,10 +40,10 @@ $(EXP_DIR)/%/gpu: $(EXP_DIR)/%/gpu.cu $(ALL_HEADERS) problems/tsplib_data.h
|
|||
$(EXP_DIR)/e0_diagnosis/bench_diagnosis: $(EXP_DIR)/e0_diagnosis/bench_diagnosis.cu $(ALL_HEADERS)
|
||||
$(NVCC) $(ARCH) $(CFLAGS) $(INCLUDES) -o $@ $<
|
||||
|
||||
test_multi_gpu: test_multi_gpu.cu $(ALL_HEADERS)
|
||||
test_multi_gpu: $(EXP_DIR)/e9_multi_gpu_b3/test_multi_gpu.cu $(ALL_HEADERS)
|
||||
$(NVCC) $(ARCH) $(CFLAGS) $(INCLUDES) -o $@ $<
|
||||
|
||||
test_multi_gpu_b3: test_multi_gpu_b3.cu $(ALL_HEADERS)
|
||||
test_multi_gpu_b3: $(EXP_DIR)/e9_multi_gpu_b3/test_multi_gpu_b3.cu $(ALL_HEADERS)
|
||||
$(NVCC) $(ARCH) $(CFLAGS) $(INCLUDES) -o $@ $<
|
||||
|
||||
clean:
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
/**
|
||||
* cuda_utils.cuh - CUDA 工具集
|
||||
* cuda_utils.cuh - CUDA utilities
|
||||
*
|
||||
* 职责:错误检查、设备信息、随机数工具
|
||||
* 规则:所有 CUDA API 调用都必须用 CUDA_CHECK 包裹
|
||||
* Responsibilities: error checking, device info, random number utilities
|
||||
* Rule: every CUDA API call must be wrapped with CUDA_CHECK
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -11,7 +11,7 @@
|
|||
#include <curand_kernel.h>
|
||||
|
||||
// ============================================================
|
||||
// 错误检查
|
||||
// Error checking
|
||||
// ============================================================
|
||||
|
||||
#define CUDA_CHECK(call) do { \
|
||||
|
|
@ -23,7 +23,7 @@
|
|||
} \
|
||||
} while(0)
|
||||
|
||||
// kernel launch 后检查(捕获异步错误)
|
||||
// Check after kernel launch (catches async errors)
|
||||
#define CUDA_CHECK_LAST() do { \
|
||||
cudaError_t err = cudaGetLastError(); \
|
||||
if (err != cudaSuccess) { \
|
||||
|
|
@ -34,7 +34,7 @@
|
|||
} while(0)
|
||||
|
||||
// ============================================================
|
||||
// 设备信息
|
||||
// Device info
|
||||
// ============================================================
|
||||
|
||||
inline void print_device_info() {
|
||||
|
|
@ -52,10 +52,10 @@ inline void print_device_info() {
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// 随机数工具 (Device 端)
|
||||
// Random number utilities (device-side)
|
||||
// ============================================================
|
||||
|
||||
// 初始化 curand 状态,每个线程一个
|
||||
// Initialize curand state: one per thread
|
||||
__global__ void init_curand_kernel(curandState* states, unsigned long long seed, int n) {
|
||||
int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (tid < n) {
|
||||
|
|
@ -63,12 +63,12 @@ __global__ void init_curand_kernel(curandState* states, unsigned long long seed,
|
|||
}
|
||||
}
|
||||
|
||||
// Device 端:生成 [0, bound) 的随机整数
|
||||
// Device-side: random integer in [0, bound)
|
||||
__device__ inline int rand_int(curandState* state, int bound) {
|
||||
return curand(state) % bound;
|
||||
}
|
||||
|
||||
// Device 端:Fisher-Yates shuffle,对 arr[0..n-1] 做随机排列
|
||||
// Device-side: Fisher-Yates shuffle of arr[0..n-1]
|
||||
__device__ inline void shuffle(int* arr, int n, curandState* state) {
|
||||
for (int i = n - 1; i > 0; i--) {
|
||||
int j = rand_int(state, i + 1);
|
||||
|
|
@ -79,12 +79,12 @@ __device__ inline void shuffle(int* arr, int n, curandState* state) {
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// Kernel 启动参数计算
|
||||
// Kernel launch grid sizing
|
||||
// ============================================================
|
||||
|
||||
inline int div_ceil(int a, int b) { return (a + b - 1) / b; }
|
||||
|
||||
// 计算合适的 block 数量
|
||||
// Compute suitable number of blocks
|
||||
inline int calc_grid_size(int n, int block_size = 256) {
|
||||
return div_ceil(n, block_size);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,20 +1,20 @@
|
|||
/**
|
||||
* gpu_cache.cuh - GPU 全局内存哈希表(通用缓存组件)
|
||||
* gpu_cache.cuh - GPU global-memory hash table (generic cache component)
|
||||
*
|
||||
* 设计:
|
||||
* - 开放寻址,固定容量(power of 2),线性探测
|
||||
* - key = uint64_t(由 Problem 自行计算 hash)
|
||||
* - value = float(单个指标值)
|
||||
* - 无锁:允许 race condition(缓存语义,偶尔脏读可接受)
|
||||
* - 自带命中/未命中原子计数器
|
||||
* Design:
|
||||
* - Open addressing, fixed capacity (power of 2), linear probing
|
||||
* - key = uint64_t (hash computed by Problem)
|
||||
* - value = float (single metric value)
|
||||
* - Lock-free: race conditions allowed (cache semantics; occasional dirty reads OK)
|
||||
* - Built-in atomic hit/miss counters
|
||||
*
|
||||
* 用法:
|
||||
* Usage:
|
||||
* GpuCache cache = GpuCache::allocate(65536); // host
|
||||
* // ... pass cache as Problem member to kernels ...
|
||||
* cache.print_stats(); // host
|
||||
* cache.destroy(); // host
|
||||
*
|
||||
* 参考:scute 项目 LRUCache(key = metric_type + content_hash)
|
||||
* Reference: scute project LRUCache (key = metric_type + content_hash)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -22,25 +22,25 @@
|
|||
#include <cstdint>
|
||||
|
||||
// ============================================================
|
||||
// 常量
|
||||
// Constants
|
||||
// ============================================================
|
||||
|
||||
static constexpr uint64_t CACHE_EMPTY_KEY = 0xFFFFFFFFFFFFFFFFULL;
|
||||
static constexpr int CACHE_MAX_PROBE = 8; // 最大线性探测步数
|
||||
static constexpr int CACHE_MAX_PROBE = 8; // Max linear probing steps
|
||||
|
||||
// ============================================================
|
||||
// GpuCache 结构体(POD,可安全拷贝到 kernel)
|
||||
// GpuCache struct (POD, safe to copy to kernel)
|
||||
// ============================================================
|
||||
|
||||
struct GpuCache {
|
||||
uint64_t* keys; // GPU 全局内存
|
||||
float* values; // GPU 全局内存
|
||||
unsigned int* d_hits; // 原子计数器(GPU)
|
||||
unsigned int* d_misses; // 原子计数器(GPU)
|
||||
int capacity; // 必须是 2 的幂
|
||||
uint64_t* keys; // GPU global memory
|
||||
float* values; // GPU global memory
|
||||
unsigned int* d_hits; // Atomic counters (GPU)
|
||||
unsigned int* d_misses; // Atomic counters (GPU)
|
||||
int capacity; // Must be a power of 2
|
||||
int mask; // = capacity - 1
|
||||
|
||||
// ---- Host 操作 ----
|
||||
// ---- Host operations ----
|
||||
|
||||
static GpuCache allocate(int cap = 65536) {
|
||||
GpuCache c;
|
||||
|
|
@ -94,20 +94,20 @@ struct GpuCache {
|
|||
};
|
||||
|
||||
// ============================================================
|
||||
// Device 函数:哈希 / 查找 / 插入
|
||||
// Device functions: hash / lookup / insert
|
||||
// ============================================================
|
||||
|
||||
/// FNV-1a 哈希:对一段有序 int 序列(如路线中的客户 ID)
|
||||
/// FNV-1a hash over an ordered int sequence (e.g. customer IDs on a route)
|
||||
__device__ inline uint64_t route_hash(const int* data, int len) {
|
||||
uint64_t h = 14695981039346656037ULL; // FNV offset basis
|
||||
for (int i = 0; i < len; i++) {
|
||||
h ^= (uint64_t)(unsigned int)data[i];
|
||||
h *= 1099511628211ULL; // FNV prime
|
||||
}
|
||||
return (h == CACHE_EMPTY_KEY) ? h - 1 : h; // 避免与哨兵值碰撞
|
||||
return (h == CACHE_EMPTY_KEY) ? h - 1 : h; // Avoid collision with sentinel value
|
||||
}
|
||||
|
||||
/// 查找:命中返回 true + 写入 out
|
||||
/// Lookup: on hit returns true and writes out
|
||||
__device__ inline bool cache_lookup(const GpuCache& c, uint64_t key, float& out) {
|
||||
int slot = (int)(key & (uint64_t)c.mask);
|
||||
for (int p = 0; p < CACHE_MAX_PROBE; p++) {
|
||||
|
|
@ -117,12 +117,12 @@ __device__ inline bool cache_lookup(const GpuCache& c, uint64_t key, float& out)
|
|||
out = c.values[idx];
|
||||
return true;
|
||||
}
|
||||
if (k == CACHE_EMPTY_KEY) return false; // 空槽 → 一定不存在
|
||||
if (k == CACHE_EMPTY_KEY) return false; // Empty slot -> key not present
|
||||
}
|
||||
return false; // 探测用尽
|
||||
return false; // Probing exhausted
|
||||
}
|
||||
|
||||
/// 插入:写入 key-value,同 key 覆盖,探测满则驱逐首槽
|
||||
/// Insert: write key-value; same key overwrites; if probe full, evict first slot
|
||||
__device__ inline void cache_insert(const GpuCache& c, uint64_t key, float value) {
|
||||
int slot = (int)(key & (uint64_t)c.mask);
|
||||
for (int p = 0; p < CACHE_MAX_PROBE; p++) {
|
||||
|
|
@ -134,7 +134,7 @@ __device__ inline void cache_insert(const GpuCache& c, uint64_t key, float value
|
|||
return;
|
||||
}
|
||||
}
|
||||
// 探测满:驱逐首槽
|
||||
// Probe full: evict first slot
|
||||
int idx = slot & c.mask;
|
||||
c.keys[idx] = key;
|
||||
c.values[idx] = value;
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
namespace heuristic_init {
|
||||
|
||||
// 单行排列:所有行填相同排列
|
||||
// Single-row layout: same permutation in every row
|
||||
template<typename Sol>
|
||||
static void build_sorted_permutation(Sol& sol, const std::vector<int>& order,
|
||||
int dim1, int dim2) {
|
||||
|
|
@ -19,7 +19,7 @@ static void build_sorted_permutation(Sol& sol, const std::vector<int>& order,
|
|||
for (int i = 0; i < MAX_OBJ; i++) sol.objectives[i] = 0.0f;
|
||||
}
|
||||
|
||||
// Partition 模式:排列均匀切分到 dim1 行,元素不重复
|
||||
// Partition mode: split permutation evenly across dim1 rows, no duplicate elements
|
||||
template<typename Sol>
|
||||
static void build_partition_from_order(Sol& sol, const std::vector<int>& order,
|
||||
int dim1, int total_elements) {
|
||||
|
|
@ -66,8 +66,8 @@ std::vector<Sol> build_from_matrices(const HeuristicMatrix* matrices, int num_ma
|
|||
col_sum[j] += mat[i * N + j];
|
||||
}
|
||||
|
||||
// 对于 Partition (VRPTW),距离矩阵含 depot (index 0),
|
||||
// 排序只针对客户 (index 1..N-1),输出值为 0-based 客户编号
|
||||
// For Partition (VRPTW), the distance matrix includes depot (index 0);
|
||||
// sorting is only over customers (indices 1..N-1); output values are 0-based customer ids
|
||||
std::vector<int> idx;
|
||||
if (partition_mode && N > elem_count) {
|
||||
for (int i = 1; i <= elem_count; i++) idx.push_back(i);
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
/**
|
||||
* init_selection.cuh - 初始解采样择优 + NSGA-II 选择
|
||||
* init_selection.cuh - Initial-solution sampling and NSGA-II selection
|
||||
*
|
||||
* Host 端逻辑,在 solver 初始化阶段调用一次。
|
||||
* 从 K × pop_size 个候选解中选出 pop_size 个作为初始种群。
|
||||
* Host-side logic; called once during solver initialization.
|
||||
* Selects pop_size individuals from K × pop_size candidates as the initial population.
|
||||
*
|
||||
* 选择策略:
|
||||
* 1. 核心目标预留名额(按 importance 分配)
|
||||
* 2. NSGA-II 选择(非支配排序 + 加权拥挤度)
|
||||
* 3. 纯随机保底(多样性)
|
||||
* Selection strategy:
|
||||
* 1. Reserve slots for core objectives (by importance)
|
||||
* 2. NSGA-II selection (non-dominated sort + weighted crowding)
|
||||
* 3. Pure random fallback (diversity)
|
||||
*
|
||||
* 单目标时自动退化为 top-N 排序,无需分支。
|
||||
* Single-objective case automatically reduces to top-N sorting; no extra branching.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -22,36 +22,36 @@
|
|||
namespace init_sel {
|
||||
|
||||
// ============================================================
|
||||
// 候选解的目标信息(从 GPU 下载后在 host 端使用)
|
||||
// Per-candidate objective info (used on host after download from GPU)
|
||||
// ============================================================
|
||||
struct CandidateInfo {
|
||||
int idx; // 在候选数组中的原始索引
|
||||
float objs[MAX_OBJ]; // 归一化后的目标值(越小越好)
|
||||
int idx; // Original index in the candidate array
|
||||
float objs[MAX_OBJ]; // Normalized objectives (lower is better)
|
||||
float penalty;
|
||||
int rank; // 非支配排序层级(0 = Pareto 前沿)
|
||||
float crowding; // 拥挤度距离
|
||||
bool selected; // 是否已被选中
|
||||
int rank; // Non-dominated sort front (0 = Pareto front)
|
||||
float crowding; // Crowding distance
|
||||
bool selected; // Whether already selected
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// 非支配排序(Fast Non-dominated Sort)
|
||||
// Non-dominated sort (Fast Non-dominated Sort)
|
||||
// ============================================================
|
||||
// 复杂度:O(M × N²),M = 目标数,N = 候选数
|
||||
// 对初始化场景(N ≤ 几千,M ≤ 4)完全可接受
|
||||
// Complexity: O(M × N²), M = number of objectives, N = number of candidates
|
||||
// Acceptable for initialization (N up to a few thousand, M ≤ 4)
|
||||
|
||||
inline void fast_nondominated_sort(std::vector<CandidateInfo>& cands,
|
||||
int num_obj,
|
||||
std::vector<std::vector<int>>& fronts) {
|
||||
int n = (int)cands.size();
|
||||
std::vector<int> dom_count(n, 0); // 被多少个解支配
|
||||
std::vector<std::vector<int>> dom_set(n); // 支配了哪些解
|
||||
std::vector<int> dom_count(n, 0); // How many solutions dominate this one
|
||||
std::vector<std::vector<int>> dom_set(n); // Which solutions this one dominates
|
||||
|
||||
// 判断 a 是否支配 b:a 在所有目标上 ≤ b,且至少一个 <
|
||||
// 先处理 penalty:可行解支配不可行解
|
||||
// Whether a dominates b: a ≤ b on all objectives, and strictly < on at least one
|
||||
// Handle penalty first: feasible dominates infeasible
|
||||
auto dominates = [&](int a, int b) -> bool {
|
||||
const auto& ca = cands[a];
|
||||
const auto& cb = cands[b];
|
||||
// penalty 处理
|
||||
// Penalty handling
|
||||
if (ca.penalty <= 0.0f && cb.penalty > 0.0f) return true;
|
||||
if (ca.penalty > 0.0f && cb.penalty <= 0.0f) return false;
|
||||
if (ca.penalty > 0.0f && cb.penalty > 0.0f) return ca.penalty < cb.penalty;
|
||||
|
|
@ -65,7 +65,7 @@ inline void fast_nondominated_sort(std::vector<CandidateInfo>& cands,
|
|||
return all_leq && any_lt;
|
||||
};
|
||||
|
||||
// 计算支配关系
|
||||
// Compute dominance relations
|
||||
for (int i = 0; i < n; i++) {
|
||||
for (int j = i + 1; j < n; j++) {
|
||||
if (dominates(i, j)) {
|
||||
|
|
@ -78,7 +78,7 @@ inline void fast_nondominated_sort(std::vector<CandidateInfo>& cands,
|
|||
}
|
||||
}
|
||||
|
||||
// 提取各层前沿
|
||||
// Extract each front layer
|
||||
fronts.clear();
|
||||
std::vector<int> current_front;
|
||||
for (int i = 0; i < n; i++) {
|
||||
|
|
@ -107,9 +107,9 @@ inline void fast_nondominated_sort(std::vector<CandidateInfo>& cands,
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// 加权拥挤度距离
|
||||
// Weighted crowding distance
|
||||
// ============================================================
|
||||
// 标准拥挤度 + importance 加权:核心目标维度上的间距贡献更大
|
||||
// Standard crowding + importance weighting: larger gap contribution on core objectives
|
||||
|
||||
inline void weighted_crowding_distance(std::vector<CandidateInfo>& cands,
|
||||
const std::vector<int>& front,
|
||||
|
|
@ -117,7 +117,7 @@ inline void weighted_crowding_distance(std::vector<CandidateInfo>& cands,
|
|||
const float* importance) {
|
||||
int n = (int)front.size();
|
||||
if (n <= 2) {
|
||||
for (int i : front) cands[i].crowding = 1e18f; // 边界解无穷大
|
||||
for (int i : front) cands[i].crowding = 1e18f; // Boundary solutions: infinite
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -126,18 +126,18 @@ inline void weighted_crowding_distance(std::vector<CandidateInfo>& cands,
|
|||
std::vector<int> sorted_idx(front.begin(), front.end());
|
||||
|
||||
for (int m = 0; m < num_obj; m++) {
|
||||
// 按目标 m 排序
|
||||
// Sort by objective m
|
||||
std::sort(sorted_idx.begin(), sorted_idx.end(),
|
||||
[&](int a, int b) { return cands[a].objs[m] < cands[b].objs[m]; });
|
||||
|
||||
float range = cands[sorted_idx[n-1]].objs[m] - cands[sorted_idx[0]].objs[m];
|
||||
if (range < 1e-12f) continue; // 该目标无区分度
|
||||
if (range < 1e-12f) continue; // No spread on this objective
|
||||
|
||||
// 边界解设为无穷大
|
||||
// Boundary solutions: infinite crowding
|
||||
cands[sorted_idx[0]].crowding += 1e18f;
|
||||
cands[sorted_idx[n-1]].crowding += 1e18f;
|
||||
|
||||
// 中间解:相邻间距 × importance 权重
|
||||
// Interior: neighbor gap × importance weight
|
||||
float w = importance[m];
|
||||
for (int i = 1; i < n - 1; i++) {
|
||||
float gap = cands[sorted_idx[i+1]].objs[m] - cands[sorted_idx[i-1]].objs[m];
|
||||
|
|
@ -147,29 +147,29 @@ inline void weighted_crowding_distance(std::vector<CandidateInfo>& cands,
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// 主选择函数:从 N 个候选中选出 target 个
|
||||
// Main selection: pick target candidates from N
|
||||
// ============================================================
|
||||
// 返回被选中的候选索引
|
||||
// Returns indices of selected candidates
|
||||
|
||||
inline std::vector<int> nsga2_select(std::vector<CandidateInfo>& cands,
|
||||
int num_obj,
|
||||
const float* importance,
|
||||
int target,
|
||||
int num_reserved_random) {
|
||||
// --- 1. 核心目标预留名额 ---
|
||||
// --- 1. Reserve slots for core objectives ---
|
||||
int num_reserve_total = target - num_reserved_random;
|
||||
// 预留比例:importance[i] × 30% 的名额(剩余 70% 给 NSGA-II)
|
||||
// Reserve ratio: importance[i] × 30% of slots (remaining 70% for NSGA-II)
|
||||
float reserve_ratio = 0.3f;
|
||||
|
||||
std::vector<int> selected;
|
||||
selected.reserve(target);
|
||||
|
||||
// 对每个目标,按该目标排序取 top
|
||||
// For each objective, sort by that objective and take top
|
||||
for (int m = 0; m < num_obj; m++) {
|
||||
int quota = (int)(num_reserve_total * importance[m] * reserve_ratio);
|
||||
if (quota < 1 && num_obj > 1) quota = 1; // 每个目标至少 1 个
|
||||
if (quota < 1 && num_obj > 1) quota = 1; // At least one per objective
|
||||
|
||||
// 按目标 m 排序(越小越好)
|
||||
// Sort by objective m (lower is better)
|
||||
std::vector<int> by_obj(cands.size());
|
||||
for (int i = 0; i < (int)cands.size(); i++) by_obj[i] = i;
|
||||
std::sort(by_obj.begin(), by_obj.end(),
|
||||
|
|
@ -186,32 +186,32 @@ inline std::vector<int> nsga2_select(std::vector<CandidateInfo>& cands,
|
|||
}
|
||||
}
|
||||
|
||||
// --- 2. NSGA-II 选择填充剩余名额 ---
|
||||
// --- 2. NSGA-II fills remaining slots ---
|
||||
int remaining = target - num_reserved_random - (int)selected.size();
|
||||
|
||||
if (remaining > 0) {
|
||||
// 非支配排序
|
||||
// Non-dominated sort
|
||||
std::vector<std::vector<int>> fronts;
|
||||
fast_nondominated_sort(cands, num_obj, fronts);
|
||||
|
||||
for (auto& front : fronts) {
|
||||
if (remaining <= 0) break;
|
||||
|
||||
// 过滤已选中的
|
||||
// Filter out already selected
|
||||
std::vector<int> available;
|
||||
for (int i : front) {
|
||||
if (!cands[i].selected) available.push_back(i);
|
||||
}
|
||||
|
||||
if ((int)available.size() <= remaining) {
|
||||
// 整层都选
|
||||
// Take the whole front
|
||||
for (int i : available) {
|
||||
cands[i].selected = true;
|
||||
selected.push_back(i);
|
||||
remaining--;
|
||||
}
|
||||
} else {
|
||||
// 该层需要截断:按加权拥挤度选
|
||||
// Truncate this front: pick by weighted crowding
|
||||
weighted_crowding_distance(cands, available, num_obj, importance);
|
||||
std::sort(available.begin(), available.end(),
|
||||
[&](int a, int b) { return cands[a].crowding > cands[b].crowding; });
|
||||
|
|
@ -228,14 +228,14 @@ inline std::vector<int> nsga2_select(std::vector<CandidateInfo>& cands,
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// 单目标快速路径:直接按标量排序取 top
|
||||
// Single-objective fast path: scalar sort and take top
|
||||
// ============================================================
|
||||
inline std::vector<int> top_n_select(std::vector<CandidateInfo>& cands,
|
||||
int target,
|
||||
int num_reserved_random) {
|
||||
int to_select = target - num_reserved_random;
|
||||
|
||||
// 按 penalty 优先,然后按 objs[0](已归一化为越小越好)
|
||||
// Prefer lower penalty, then objs[0] (normalized, lower is better)
|
||||
std::vector<int> indices(cands.size());
|
||||
for (int i = 0; i < (int)cands.size(); i++) indices[i] = i;
|
||||
std::sort(indices.begin(), indices.end(), [&](int a, int b) {
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
/**
|
||||
* multi_gpu_solver.cuh - 多 GPU 协同求解
|
||||
* multi_gpu_solver.cuh - Multi-GPU cooperative solving
|
||||
*
|
||||
* v5.0 方案 B3: 被动注入 + GPU 无感知
|
||||
* - 每块 GPU 独立运行 solve(),各自用不同 seed
|
||||
* - 每个 GPU 有一个 InjectBuffer(设备端)
|
||||
* - CPU 协调线程定期(每 N 秒)收集各 GPU 的 best,异步写入其他 GPU 的 InjectBuffer
|
||||
* - GPU 在 migrate_kernel 后检查 InjectBuffer,如果有新解则注入
|
||||
* - 完全解耦:GPU 无需暂停,CPU 异步写入,通过 CUDA Stream 同步保证安全
|
||||
* v5.0 plan B3: passive injection + GPU-agnostic design
|
||||
* - Each GPU runs solve() independently with its own seed
|
||||
* - Each GPU has an InjectBuffer (device memory)
|
||||
* - A CPU coordinator thread periodically (every N seconds) collects each GPU's best and asynchronously writes to other GPUs' InjectBuffers
|
||||
* - After migrate_kernel, each GPU checks InjectBuffer and injects if a new solution is present
|
||||
* - Fully decoupled: GPUs need not pause; CPU writes asynchronously; CUDA stream sync ensures safety
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -18,25 +18,26 @@
|
|||
#include <chrono>
|
||||
|
||||
// ============================================================
|
||||
// MultiGpuContext — 每个 GPU 的上下文
|
||||
// MultiGpuContext — per-GPU context
|
||||
// ============================================================
|
||||
|
||||
template<typename Problem>
|
||||
struct MultiGpuContext {
|
||||
using Sol = typename Problem::Sol;
|
||||
|
||||
int gpu_id; // GPU 设备 ID
|
||||
Problem* problem; // Problem 实例(设备指针指向该 GPU)
|
||||
SolverConfig config; // 求解器配置(独立 seed)
|
||||
int gpu_id; // GPU device ID
|
||||
Problem* problem; // Problem instance (device pointer for this GPU)
|
||||
SolverConfig config; // Solver config (independent seed)
|
||||
|
||||
Sol best_solution; // 当前最优解(host 端)
|
||||
std::mutex best_mutex; // 保护 best_solution 的互斥锁
|
||||
Sol best_solution; // Current best solution (host)
|
||||
SolveResult<Sol> solve_result; // Full result from solve()
|
||||
std::mutex best_mutex; // Mutex protecting best_solution
|
||||
|
||||
InjectBuffer<Sol>* d_inject_buf; // Device 端注入缓冲区(在该 GPU 上分配)
|
||||
Sol* d_global_best; // Device 端全局最优解指针(由 solve() 导出)
|
||||
InjectBuffer<Sol>* d_inject_buf; // Device-side inject buffer (allocated on this GPU)
|
||||
Sol* d_global_best; // Device pointer to global best (exported by solve())
|
||||
|
||||
std::atomic<bool> stop_flag; // 停止标志
|
||||
std::atomic<bool> running; // 运行状态标志(用于协调线程判断)
|
||||
std::atomic<bool> stop_flag; // Stop flag
|
||||
std::atomic<bool> running; // Running flag (for coordinator thread)
|
||||
|
||||
MultiGpuContext(int id) : gpu_id(id), problem(nullptr), d_inject_buf(nullptr),
|
||||
d_global_best(nullptr), stop_flag(false), running(false) {
|
||||
|
|
@ -47,45 +48,46 @@ struct MultiGpuContext {
|
|||
};
|
||||
|
||||
// ============================================================
|
||||
// GPU Worker 线程函数(方案 B3)
|
||||
// GPU worker thread (plan B3)
|
||||
// ============================================================
|
||||
|
||||
template<typename Problem>
|
||||
void gpu_worker(MultiGpuContext<Problem>* ctx) {
|
||||
using Sol = typename Problem::Sol;
|
||||
|
||||
// 设置当前线程使用的 GPU
|
||||
// Set GPU for this thread
|
||||
CUDA_CHECK(cudaSetDevice(ctx->gpu_id));
|
||||
|
||||
// 标记开始运行
|
||||
// Mark as running
|
||||
ctx->running.store(true);
|
||||
|
||||
// 运行 solve(传入 inject_buf 和 d_global_best_out)
|
||||
// Run solve (pass inject_buf and d_global_best_out)
|
||||
SolveResult<Sol> result = solve(*ctx->problem, ctx->config,
|
||||
nullptr, 0, nullptr, ctx->d_inject_buf, &ctx->d_global_best);
|
||||
|
||||
// 标记运行结束
|
||||
// Mark as finished running
|
||||
ctx->running.store(false);
|
||||
|
||||
// 更新最优解
|
||||
// Update best solution and full result
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(ctx->best_mutex);
|
||||
ctx->best_solution = result.best_solution;
|
||||
ctx->solve_result = result;
|
||||
}
|
||||
|
||||
// 标记完成
|
||||
// Mark complete
|
||||
ctx->stop_flag.store(true);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 协调线程函数(方案 B3)
|
||||
// Coordinator thread (plan B3)
|
||||
// ============================================================
|
||||
// 定期从各 GPU 的 d_global_best 读取当前 best,计算 global_best,注入到其他 GPU
|
||||
// Periodically read each GPU's current best from d_global_best, compute global_best, inject to other GPUs
|
||||
//
|
||||
// 关键设计:
|
||||
// 1. 直接从各 GPU 的 d_global_best 读取(由 solve() 导出)
|
||||
// 2. 要求启用 SA(否则无 d_global_best)
|
||||
// 3. 轻量侵入:solve() 只需导出一个指针,对单 GPU 无影响
|
||||
// Key design:
|
||||
// 1. Read directly from each GPU's d_global_best (exported by solve())
|
||||
// 2. Requires SA enabled (otherwise no d_global_best)
|
||||
// 3. Light touch: solve() only exports a pointer; single-GPU path unchanged
|
||||
|
||||
template<typename Problem>
|
||||
void coordinator_thread(std::vector<MultiGpuContext<Problem>*>& contexts,
|
||||
|
|
@ -96,7 +98,7 @@ void coordinator_thread(std::vector<MultiGpuContext<Problem>*>& contexts,
|
|||
auto interval_ms = std::chrono::milliseconds(static_cast<int>(interval_sec * 1000));
|
||||
int round = 0;
|
||||
|
||||
// 等待所有 GPU 的 d_global_best 就绪
|
||||
// Wait until all GPUs' d_global_best are ready
|
||||
bool all_ready = false;
|
||||
while (!all_ready) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
|
|
@ -110,10 +112,10 @@ void coordinator_thread(std::vector<MultiGpuContext<Problem>*>& contexts,
|
|||
}
|
||||
|
||||
while (true) {
|
||||
// 等待指定时间间隔
|
||||
// Wait for the configured interval
|
||||
std::this_thread::sleep_for(interval_ms);
|
||||
|
||||
// 检查是否所有 GPU 都已停止
|
||||
// Check whether all GPUs have stopped
|
||||
bool all_stopped = true;
|
||||
for (auto* ctx : contexts) {
|
||||
if (ctx->running.load()) {
|
||||
|
|
@ -125,17 +127,17 @@ void coordinator_thread(std::vector<MultiGpuContext<Problem>*>& contexts,
|
|||
|
||||
round++;
|
||||
|
||||
// 收集各 GPU 的当前最优解(从 d_global_best 读取)
|
||||
// Collect each GPU's current best (from d_global_best)
|
||||
Sol global_best;
|
||||
global_best.penalty = 1e30f;
|
||||
global_best.objectives[0] = 1e30f;
|
||||
int best_gpu = -1;
|
||||
|
||||
for (int i = 0; i < (int)contexts.size(); i++) {
|
||||
if (!contexts[i]->running.load()) continue; // 已停止的 GPU 跳过
|
||||
if (contexts[i]->d_global_best == nullptr) continue; // 未就绪跳过
|
||||
if (!contexts[i]->running.load()) continue; // skip stopped GPUs
|
||||
if (contexts[i]->d_global_best == nullptr) continue; // skip not ready
|
||||
|
||||
// 从该 GPU 的 d_global_best 读取
|
||||
// Read from this GPU's d_global_best
|
||||
Sol gpu_best;
|
||||
cudaSetDevice(contexts[i]->gpu_id);
|
||||
cudaMemcpy(&gpu_best, contexts[i]->d_global_best, sizeof(Sol), cudaMemcpyDeviceToHost);
|
||||
|
|
@ -146,23 +148,23 @@ void coordinator_thread(std::vector<MultiGpuContext<Problem>*>& contexts,
|
|||
}
|
||||
}
|
||||
|
||||
if (best_gpu == -1) continue; // 所有 GPU 都已停止或未就绪
|
||||
if (best_gpu == -1) continue; // all GPUs stopped or not ready
|
||||
|
||||
if (verbose) {
|
||||
printf(" [Coordinator Round %d] Global best from GPU %d: obj=%.2f, penalty=%.2f\n",
|
||||
round, best_gpu, global_best.objectives[0], global_best.penalty);
|
||||
}
|
||||
|
||||
// 将 global_best 注入到其他 GPU(除了 best_gpu 自己)
|
||||
// Inject global_best into other GPUs (except best_gpu)
|
||||
for (int i = 0; i < (int)contexts.size(); i++) {
|
||||
if (i == best_gpu) continue; // 不注入到自己
|
||||
if (!contexts[i]->running.load()) continue; // 已停止的 GPU 不注入
|
||||
if (i == best_gpu) continue; // do not inject to self
|
||||
if (!contexts[i]->running.load()) continue; // do not inject to stopped GPUs
|
||||
|
||||
// 读取 InjectBuffer 结构(从 device 到 host)
|
||||
// Read InjectBuffer struct (device to host)
|
||||
InjectBuffer<Sol> buf;
|
||||
cudaMemcpy(&buf, contexts[i]->d_inject_buf, sizeof(InjectBuffer<Sol>), cudaMemcpyDeviceToHost);
|
||||
|
||||
// 同步写入(会自动切换设备)
|
||||
// Synchronous write (switches device as needed)
|
||||
buf.write_sync(global_best, contexts[i]->gpu_id);
|
||||
}
|
||||
}
|
||||
|
|
@ -173,7 +175,7 @@ void coordinator_thread(std::vector<MultiGpuContext<Problem>*>& contexts,
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// 多 GPU 协同求解主函数(方案 B3)
|
||||
// Multi-GPU cooperative solve entry (plan B3)
|
||||
// ============================================================
|
||||
|
||||
template<typename Problem>
|
||||
|
|
@ -181,13 +183,17 @@ SolveResult<typename Problem::Sol> solve_multi_gpu(Problem& prob, const SolverCo
|
|||
using Sol = typename Problem::Sol;
|
||||
|
||||
if (cfg.num_gpus <= 1) {
|
||||
// 单 GPU 模式,直接调用普通 solve
|
||||
// Single-GPU mode: call plain solve
|
||||
return solve(prob, cfg);
|
||||
}
|
||||
|
||||
// 检查可用 GPU 数量
|
||||
int device_count;
|
||||
// Check available GPU count
|
||||
int device_count = 0;
|
||||
CUDA_CHECK(cudaGetDeviceCount(&device_count));
|
||||
if (device_count <= 0) {
|
||||
fprintf(stderr, "Error: No CUDA devices available\n");
|
||||
return SolveResult<Sol>{};
|
||||
}
|
||||
int actual_gpus = std::min(cfg.num_gpus, device_count);
|
||||
|
||||
if (cfg.verbose) {
|
||||
|
|
@ -199,15 +205,15 @@ SolveResult<typename Problem::Sol> solve_multi_gpu(Problem& prob, const SolverCo
|
|||
cfg.multi_gpu_inject_mode == MultiGpuInjectMode::HalfIslands ? "HalfIslands" : "AllIslands");
|
||||
}
|
||||
|
||||
// 创建各 GPU 的上下文
|
||||
// Create per-GPU contexts
|
||||
std::vector<MultiGpuContext<Problem>*> contexts;
|
||||
for (int i = 0; i < actual_gpus; i++) {
|
||||
auto* ctx = new MultiGpuContext<Problem>(i);
|
||||
ctx->config = cfg;
|
||||
ctx->config.seed = cfg.seed + i * 1000; // 每个 GPU 用不同 seed
|
||||
ctx->config.num_gpus = 1; // 单 GPU 模式运行
|
||||
ctx->config.seed = cfg.seed + i * 1000; // distinct seed per GPU
|
||||
ctx->config.num_gpus = 1; // run as single-GPU per device
|
||||
|
||||
// 克隆 Problem 到该 GPU
|
||||
// Clone Problem onto this GPU
|
||||
ctx->problem = prob.clone_to_device(i);
|
||||
if (ctx->problem == nullptr) {
|
||||
fprintf(stderr, "Error: Failed to clone problem to GPU %d\n", i);
|
||||
|
|
@ -218,10 +224,10 @@ SolveResult<typename Problem::Sol> solve_multi_gpu(Problem& prob, const SolverCo
|
|||
return SolveResult<Sol>{};
|
||||
}
|
||||
|
||||
// 分配 InjectBuffer(在该 GPU 上)
|
||||
// Allocate InjectBuffer on this GPU
|
||||
InjectBuffer<Sol> buf = InjectBuffer<Sol>::allocate(i);
|
||||
|
||||
// 将 InjectBuffer 拷贝到 device 端(传给 kernel)
|
||||
// Copy InjectBuffer to device (for kernels)
|
||||
InjectBuffer<Sol>* d_buf;
|
||||
CUDA_CHECK(cudaSetDevice(i));
|
||||
CUDA_CHECK(cudaMalloc(&d_buf, sizeof(InjectBuffer<Sol>)));
|
||||
|
|
@ -231,34 +237,36 @@ SolveResult<typename Problem::Sol> solve_multi_gpu(Problem& prob, const SolverCo
|
|||
contexts.push_back(ctx);
|
||||
}
|
||||
|
||||
// 启动 worker 线程
|
||||
// Start worker threads
|
||||
std::vector<std::thread> workers;
|
||||
for (auto* ctx : contexts) {
|
||||
workers.emplace_back(gpu_worker<Problem>, ctx);
|
||||
}
|
||||
|
||||
// 启动协调线程(定期注入 global_best)
|
||||
// Start coordinator thread (periodic global_best injection)
|
||||
std::thread coordinator(coordinator_thread<Problem>, std::ref(contexts),
|
||||
cfg.multi_gpu_interval_sec, cfg.verbose);
|
||||
|
||||
// 等待所有 worker 完成
|
||||
// Wait for all workers to finish
|
||||
for (auto& w : workers) w.join();
|
||||
|
||||
// 等待协调线程完成
|
||||
// Wait for coordinator to finish
|
||||
coordinator.join();
|
||||
|
||||
// 收集最终结果
|
||||
// Collect final result from best GPU
|
||||
Sol final_best = contexts[0]->best_solution;
|
||||
int best_ctx = 0;
|
||||
ObjConfig oc = prob.obj_config();
|
||||
for (int i = 1; i < (int)contexts.size(); i++) {
|
||||
if (is_better(contexts[i]->best_solution, final_best, oc)) {
|
||||
final_best = contexts[i]->best_solution;
|
||||
best_ctx = i;
|
||||
}
|
||||
}
|
||||
|
||||
// 清理
|
||||
// Cleanup
|
||||
for (auto* ctx : contexts) {
|
||||
// 读取 InjectBuffer 的内容(用于释放)
|
||||
// Read InjectBuffer content (for teardown)
|
||||
InjectBuffer<Sol> buf;
|
||||
CUDA_CHECK(cudaSetDevice(ctx->gpu_id));
|
||||
CUDA_CHECK(cudaMemcpy(&buf, ctx->d_inject_buf, sizeof(InjectBuffer<Sol>), cudaMemcpyDeviceToHost));
|
||||
|
|
@ -269,10 +277,9 @@ SolveResult<typename Problem::Sol> solve_multi_gpu(Problem& prob, const SolverCo
|
|||
delete ctx;
|
||||
}
|
||||
|
||||
// 构造返回结果
|
||||
SolveResult<Sol> result;
|
||||
// Build return value from best GPU's result
|
||||
SolveResult<Sol> result = contexts[best_ctx]->solve_result;
|
||||
result.best_solution = final_best;
|
||||
result.stop_reason = StopReason::MaxGen;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,40 +1,40 @@
|
|||
/**
|
||||
* operators.cuh - 四层搜索算子体系(Device 端)
|
||||
* operators.cuh - Four-layer search operator hierarchy (device side)
|
||||
*
|
||||
* v1.0: 二维通用编码的完整算子层次
|
||||
* v1.0: Full operator hierarchy for 2D universal encoding
|
||||
*
|
||||
* 层次结构(所有算子只看 data[D1][D2] + dim2_sizes,不感知问题语义):
|
||||
* Hierarchy (all operators only see data[D1][D2] + dim2_sizes, no problem semantics):
|
||||
*
|
||||
* 第 1 层 - 元素级(Element): 操作单个元素
|
||||
* 行内: swap, reverse(2-opt), insert, flip
|
||||
* 跨行: cross_relocate(单元素移行), cross_swap(单元素换行)
|
||||
* Layer 1 - Element: operate on single elements
|
||||
* Within row: swap, reverse(2-opt), insert, flip
|
||||
* Cross-row: cross_relocate (move one element across rows), cross_swap (swap one element per row)
|
||||
*
|
||||
* 第 2 层 - 片段级(Segment): 操作连续片段
|
||||
* 行内: or_opt(移动连续 k 个元素到行内新位置)
|
||||
* 跨行: seg_relocate(片段从一行移到另一行)
|
||||
* seg_swap(两行各取一段互换,即 2-opt*)
|
||||
* Layer 2 - Segment: operate on contiguous segments
|
||||
* Within row: or_opt (move contiguous k elements to a new position in the row)
|
||||
* Cross-row: seg_relocate (move a segment from one row to another)
|
||||
* seg_swap (swap two segments from two rows each, i.e. 2-opt*)
|
||||
*
|
||||
* 第 3 层 - 行级(Row): 操作整行
|
||||
* row_swap(交换两行全部内容和长度)
|
||||
* row_reverse(反转行的排列顺序)
|
||||
* row_split(一行拆成两行)
|
||||
* row_merge(两行合并为一行)
|
||||
* Layer 3 - Row: operate on whole rows
|
||||
* row_swap (swap full contents and lengths of two rows)
|
||||
* row_reverse (reverse row order)
|
||||
* row_split (split one row into two)
|
||||
* row_merge (merge two rows into one)
|
||||
*
|
||||
* 第 4 层 - 交叉(Crossover): 组合两个解
|
||||
* row_crossover(从父代 A/B 各取若干行组成子代)
|
||||
* uniform_crossover(逐元素从两个父代中选)
|
||||
* Layer 4 - Crossover: combine two solutions
|
||||
* row_crossover (child takes some rows from parent A and B)
|
||||
* uniform_crossover (pick per element from two parents)
|
||||
*
|
||||
* Move 描述符:
|
||||
* row, row2: 行索引(row2=-1 表示行内)
|
||||
* op: 操作码
|
||||
* pos1, pos2: 位置参数
|
||||
* seg_len: 片段长度(第 2 层使用)
|
||||
* Move descriptor:
|
||||
* row, row2: row indices (row2=-1 means within-row)
|
||||
* op: operation code
|
||||
* pos1, pos2: position parameters
|
||||
* seg_len: segment length (used by layer 2)
|
||||
*
|
||||
* 设计原则:
|
||||
* - 所有算子对问题类型无感知,只操作二维数组
|
||||
* - 每个算子都有对应的 undo 操作
|
||||
* - 空行安全:自动降级为 no-op
|
||||
* - 编码类型决定可用算子集
|
||||
* Design principles:
|
||||
* - All operators are problem-agnostic; they only manipulate a 2D array
|
||||
* - Each operator has a corresponding undo
|
||||
* - Empty-row safe: automatically degrades to no-op
|
||||
* - Encoding type determines the available operator set
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -44,61 +44,61 @@
|
|||
namespace ops {
|
||||
|
||||
// ============================================================
|
||||
// Op 码常量 — 按层次编号,避免冲突
|
||||
// Op code constants — numbered by layer to avoid collisions
|
||||
// ============================================================
|
||||
|
||||
// 通用
|
||||
// General
|
||||
constexpr int OP_NOOP = -1;
|
||||
|
||||
// --- 第 1 层:元素级 ---
|
||||
// Permutation 行内
|
||||
constexpr int PERM_SWAP = 0; // 交换两个位置
|
||||
constexpr int PERM_REVERSE = 1; // 反转区间(2-opt)
|
||||
constexpr int PERM_INSERT = 2; // 移动单个元素到新位置
|
||||
// Permutation 跨行
|
||||
constexpr int PERM_CROSS_RELOCATE = 3; // 单元素从一行移到另一行
|
||||
constexpr int PERM_CROSS_SWAP = 4; // 两行各一个元素互换
|
||||
// Binary 行内
|
||||
constexpr int BIN_FLIP = 0; // 翻转一个位
|
||||
constexpr int BIN_SWAP = 1; // 交换两个位
|
||||
// Binary 跨行
|
||||
constexpr int BIN_CROSS_SWAP = 2; // 两行各一个位互换
|
||||
// --- Layer 1: element ---
|
||||
// Permutation within row
|
||||
constexpr int PERM_SWAP = 0; // swap two positions
|
||||
constexpr int PERM_REVERSE = 1; // reverse interval (2-opt)
|
||||
constexpr int PERM_INSERT = 2; // move one element to a new position
|
||||
// Permutation cross-row
|
||||
constexpr int PERM_CROSS_RELOCATE = 3; // move one element from one row to another
|
||||
constexpr int PERM_CROSS_SWAP = 4; // swap one element per row between two rows
|
||||
// Binary within row
|
||||
constexpr int BIN_FLIP = 0; // flip one bit
|
||||
constexpr int BIN_SWAP = 1; // swap two bits
|
||||
// Binary cross-row
|
||||
constexpr int BIN_CROSS_SWAP = 2; // swap one bit per row between two rows
|
||||
|
||||
// --- 第 1 层(续):排列行内 ---
|
||||
constexpr int PERM_3OPT = 5; // 3-opt:断 3 条边重连
|
||||
// --- Layer 1 (cont.): permutation within row ---
|
||||
constexpr int PERM_3OPT = 5; // 3-opt: break 3 edges and reconnect
|
||||
|
||||
// --- 第 2 层:片段级 ---
|
||||
constexpr int PERM_OR_OPT = 10; // 行内:移动连续 k 个元素
|
||||
constexpr int PERM_SEG_RELOCATE = 11; // 跨行:片段从一行移到另一行
|
||||
constexpr int PERM_SEG_SWAP = 12; // 跨行:两行各取一段互换(2-opt*)
|
||||
constexpr int PERM_CROSS_EXCHANGE = 15; // 跨行:两行各取一段互换(保持各自内部顺序)
|
||||
constexpr int BIN_SEG_FLIP = 13; // 行内:翻转连续 k 个位
|
||||
constexpr int BIN_SEG_CROSS_SWAP = 14; // 跨行:两行各取一段互换
|
||||
constexpr int BIN_K_FLIP = 16; // 行内:同时翻转 k 个随机位
|
||||
// --- Layer 2: segment ---
|
||||
constexpr int PERM_OR_OPT = 10; // within row: move contiguous k elements
|
||||
constexpr int PERM_SEG_RELOCATE = 11; // cross-row: move segment from one row to another
|
||||
constexpr int PERM_SEG_SWAP = 12; // cross-row: swap two segments from two rows each (2-opt*)
|
||||
constexpr int PERM_CROSS_EXCHANGE = 15; // cross-row: swap two segments (preserve internal order each)
|
||||
constexpr int BIN_SEG_FLIP = 13; // within row: flip contiguous k bits
|
||||
constexpr int BIN_SEG_CROSS_SWAP = 14; // cross-row: swap two segments from two rows each
|
||||
constexpr int BIN_K_FLIP = 16; // within row: flip k random bits at once
|
||||
|
||||
// --- 第 3 层:行级 ---
|
||||
constexpr int ROW_SWAP = 20; // 交换两行全部内容
|
||||
constexpr int ROW_REVERSE = 21; // 反转行的排列顺序(行号重排)
|
||||
constexpr int ROW_SPLIT = 22; // 一行拆成两行
|
||||
constexpr int ROW_MERGE = 23; // 两行合并为一行
|
||||
// --- Layer 3: row ---
|
||||
constexpr int ROW_SWAP = 20; // swap full contents of two rows
|
||||
constexpr int ROW_REVERSE = 21; // reverse row order (row index permutation)
|
||||
constexpr int ROW_SPLIT = 22; // split one row into two
|
||||
constexpr int ROW_MERGE = 23; // merge two rows into one
|
||||
|
||||
// --- 特殊:扰动(连续多步 move,不可 undo,用于跳出局部最优)---
|
||||
// --- Special: perturbation (multi-step moves, no undo, escape local optima) ---
|
||||
constexpr int PERTURBATION = 40;
|
||||
|
||||
// --- 第 4 层:交叉 ---
|
||||
constexpr int CROSS_ROW = 30; // 行级交叉:从两个父代各取若干行
|
||||
constexpr int CROSS_UNIFORM = 31; // 均匀交叉:逐元素从两个父代选
|
||||
// --- Layer 4: crossover ---
|
||||
constexpr int CROSS_ROW = 30; // row crossover: take some rows from each parent
|
||||
constexpr int CROSS_UNIFORM = 31; // uniform crossover: pick per element from two parents
|
||||
|
||||
// ============================================================
|
||||
// Move 描述符 — 编码级别的变动描述
|
||||
// Move descriptor — encoding-level change description
|
||||
// ============================================================
|
||||
|
||||
struct Move {
|
||||
int row; // 源行(或第一行)
|
||||
int row2; // 目标行(-1 = 行内)
|
||||
int op; // 操作码
|
||||
int pos1, pos2; // 位置参数
|
||||
int seg_len; // 片段长度(第 2 层使用,其他层 = 0)
|
||||
int row; // source row (or first row)
|
||||
int row2; // target row (-1 = within-row)
|
||||
int op; // operation code
|
||||
int pos1, pos2; // position parameters
|
||||
int seg_len; // segment length (layer 2; 0 for other layers)
|
||||
};
|
||||
|
||||
} // namespace ops
|
||||
|
|
@ -106,10 +106,10 @@ struct Move {
|
|||
namespace ops {
|
||||
|
||||
// ============================================================
|
||||
// 第 1 层:元素级底层操作
|
||||
// Layer 1: element-level primitives
|
||||
// ============================================================
|
||||
|
||||
// --- Permutation 行内 ---
|
||||
// --- Permutation within row ---
|
||||
|
||||
__device__ inline void perm_swap(int* row, int i, int j) {
|
||||
int tmp = row[i]; row[i] = row[j]; row[j] = tmp;
|
||||
|
|
@ -126,9 +126,9 @@ __device__ inline void perm_insert(int* row, int from, int to, int size) {
|
|||
row[to] = val;
|
||||
}
|
||||
|
||||
// --- Permutation 跨行 ---
|
||||
// --- Permutation cross-row ---
|
||||
|
||||
/// cross_relocate: 从 src_row[src_pos] 取出元素,插入 dst_row[dst_pos]
|
||||
/// cross_relocate: take element from src_row[src_pos], insert at dst_row[dst_pos]
|
||||
__device__ inline void perm_cross_relocate(int* src_row, int& src_size,
|
||||
int* dst_row, int& dst_size,
|
||||
int src_pos, int dst_pos) {
|
||||
|
|
@ -142,24 +142,24 @@ __device__ inline void perm_cross_relocate(int* src_row, int& src_size,
|
|||
dst_size++;
|
||||
}
|
||||
|
||||
/// cross_swap: 交换 rowA[posA] 和 rowB[posB]
|
||||
/// cross_swap: swap rowA[posA] and rowB[posB]
|
||||
__device__ inline void cross_swap_elem(int* rowA, int posA, int* rowB, int posB) {
|
||||
int tmp = rowA[posA]; rowA[posA] = rowB[posB]; rowB[posB] = tmp;
|
||||
}
|
||||
|
||||
// --- Permutation 行内:3-opt ---
|
||||
// 断开 3 条边,选择最佳重连方式(共 8 种组合,取随机一种非恒等变换)
|
||||
// 参数:3 个断点 i < j < k,将路线分为 seg0=[0,i] seg1=[i+1,j] seg2=[j+1,k] seg3=[k+1,end]
|
||||
// 实现:随机选一种重连(reverse seg1, reverse seg2, 或两者都反转)
|
||||
// pos1=i, pos2=j, seg_len 编码 k
|
||||
// --- Permutation within row: 3-opt ---
|
||||
// Break 3 edges and pick a reconnection (8 combinations; pick one random non-identity)
|
||||
// Args: three breakpoints i < j < k, route splits seg0=[0,i] seg1=[i+1,j] seg2=[j+1,k] seg3=[k+1,end]
|
||||
// Impl: random reconnection (reverse seg1, reverse seg2, or both)
|
||||
// pos1=i, pos2=j, seg_len encodes k
|
||||
__device__ inline void perm_3opt(int* row, int size, int i, int j, int k) {
|
||||
// 3-opt 有多种重连方式,这里实现最常用的 3 种非恒等变换:
|
||||
// type 1: reverse [i+1, j] — 等价于 2-opt(i+1, j)
|
||||
// type 2: reverse [j+1, k] — 等价于 2-opt(j+1, k)
|
||||
// type 3: reverse [i+1, j] + reverse [j+1, k] — 真正的 3-opt move
|
||||
// type 4: 将 seg1 和 seg2 互换位置(不反转) — or-opt 的泛化
|
||||
// 我们随机选 type 3 或 type 4(type 1/2 已被 2-opt 覆盖)
|
||||
// 这里固定做 type 3(双反转),因为它是 2-opt 无法达到的唯一新邻域
|
||||
// 3-opt has several reconnections; here we use the most common non-identity variants:
|
||||
// type 1: reverse [i+1, j] — same as 2-opt(i+1, j)
|
||||
// type 2: reverse [j+1, k] — same as 2-opt(j+1, k)
|
||||
// type 3: reverse [i+1, j] + reverse [j+1, k] — true 3-opt move
|
||||
// type 4: swap seg1 and seg2 (no reverse) — generalization of or-opt
|
||||
// We would randomize type 3 or 4 (types 1/2 are covered by 2-opt)
|
||||
// Here we fix type 3 (double reverse) as the only new neighborhood 2-opt cannot reach
|
||||
// reverse [i+1, j]
|
||||
int lo = i + 1, hi = j;
|
||||
while (lo < hi) { int t = row[lo]; row[lo] = row[hi]; row[hi] = t; lo++; hi--; }
|
||||
|
|
@ -168,12 +168,12 @@ __device__ inline void perm_3opt(int* row, int size, int i, int j, int k) {
|
|||
while (lo < hi) { int t = row[lo]; row[lo] = row[hi]; row[hi] = t; lo++; hi--; }
|
||||
}
|
||||
|
||||
// 3-opt undo: 再做一次相同操作即可恢复(双反转是自反的)
|
||||
// 3-opt undo: repeat the same move to restore (double reverse is self-inverse)
|
||||
__device__ inline void perm_3opt_undo(int* row, int size, int i, int j, int k) {
|
||||
perm_3opt(row, size, i, j, k); // 自反
|
||||
perm_3opt(row, size, i, j, k); // self-inverse
|
||||
}
|
||||
|
||||
// --- Binary 行内 ---
|
||||
// --- Binary within row ---
|
||||
|
||||
__device__ inline void bin_flip(int* row, int i) { row[i] = 1 - row[i]; }
|
||||
|
||||
|
|
@ -182,51 +182,51 @@ __device__ inline void bin_swap(int* row, int i, int j) {
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// 第 2 层:片段级底层操作
|
||||
// Layer 2: segment-level primitives
|
||||
// ============================================================
|
||||
|
||||
/// or_opt: 行内移动连续 seg_len 个元素(从 from 开始)到 to 位置
|
||||
/// 等价于:取出 [from, from+seg_len),插入到 to 之前
|
||||
/// 约束:from + seg_len <= size, to 不在 [from, from+seg_len) 内
|
||||
/// or_opt: within row, move contiguous seg_len elements (starting at from) to position to
|
||||
/// Same as: take [from, from+seg_len), insert before to
|
||||
/// Constraints: from + seg_len <= size, to not in [from, from+seg_len)
|
||||
__device__ inline void perm_or_opt(int* row, int size, int from, int to, int seg_len) {
|
||||
// 临时缓冲(片段最大长度受限于寄存器,实际 seg_len 通常 <= 4)
|
||||
int buf[8]; // 足够覆盖常见 seg_len
|
||||
// Temp buffer (max segment length limited by registers; seg_len usually <= 4)
|
||||
int buf[8]; // enough for typical seg_len
|
||||
int actual_len = (seg_len > 8) ? 8 : seg_len;
|
||||
|
||||
// 保存片段
|
||||
// Save segment
|
||||
for (int i = 0; i < actual_len; i++) buf[i] = row[from + i];
|
||||
|
||||
// 移除片段(左移填补空洞)
|
||||
// Remove segment (shift left to close gap)
|
||||
int new_size = size - actual_len;
|
||||
for (int k = from; k < new_size; k++) row[k] = row[k + actual_len];
|
||||
|
||||
// 计算插入位置(移除后的坐标系)
|
||||
// Insert position after removal (coords after removal)
|
||||
int ins = (to > from) ? to - actual_len : to;
|
||||
if (ins < 0) ins = 0;
|
||||
if (ins > new_size) ins = new_size;
|
||||
|
||||
// 插入片段(右移腾位)
|
||||
// Insert segment (shift right to make room)
|
||||
for (int k = new_size - 1; k >= ins; k--) row[k + actual_len] = row[k];
|
||||
for (int i = 0; i < actual_len; i++) row[ins + i] = buf[i];
|
||||
}
|
||||
|
||||
/// seg_relocate: 从 src_row 取出连续 seg_len 个元素,插入 dst_row 的 dst_pos
|
||||
/// src_size 减 seg_len,dst_size 加 seg_len
|
||||
/// seg_relocate: take contiguous seg_len elements from src_row, insert at dst_pos in dst_row
|
||||
/// src_size -= seg_len, dst_size += seg_len
|
||||
__device__ inline void perm_seg_relocate(int* src_row, int& src_size,
|
||||
int* dst_row, int& dst_size,
|
||||
int src_pos, int dst_pos, int seg_len) {
|
||||
int buf[8];
|
||||
int actual_len = (seg_len > 8) ? 8 : seg_len;
|
||||
|
||||
// 保存片段
|
||||
// Save segment
|
||||
for (int i = 0; i < actual_len; i++) buf[i] = src_row[src_pos + i];
|
||||
|
||||
// 源行:移除(左移)
|
||||
// Source row: remove (shift left)
|
||||
for (int k = src_pos; k < src_size - actual_len; k++)
|
||||
src_row[k] = src_row[k + actual_len];
|
||||
src_size -= actual_len;
|
||||
|
||||
// 目标行:插入(右移)
|
||||
// Destination row: insert (shift right)
|
||||
for (int k = dst_size - 1; k >= dst_pos; k--)
|
||||
dst_row[k + actual_len] = dst_row[k];
|
||||
for (int i = 0; i < actual_len; i++)
|
||||
|
|
@ -234,29 +234,29 @@ __device__ inline void perm_seg_relocate(int* src_row, int& src_size,
|
|||
dst_size += actual_len;
|
||||
}
|
||||
|
||||
/// seg_swap: 两行各取一段互换(2-opt* 的通用形式)
|
||||
/// seg_swap: swap one segment from each row (general 2-opt*)
|
||||
/// rowA[posA..posA+lenA) <-> rowB[posB..posB+lenB)
|
||||
/// 行长变化:sizeA += (lenB - lenA), sizeB += (lenA - lenB)
|
||||
/// Row lengths: sizeA += (lenB - lenA), sizeB += (lenA - lenB)
|
||||
__device__ inline void perm_seg_swap(int* rowA, int& sizeA, int posA, int lenA,
|
||||
int* rowB, int& sizeB, int posB, int lenB) {
|
||||
int bufA[8], bufB[8];
|
||||
int aLen = (lenA > 8) ? 8 : lenA;
|
||||
int bLen = (lenB > 8) ? 8 : lenB;
|
||||
|
||||
// 保存两段
|
||||
// Save both segments
|
||||
for (int i = 0; i < aLen; i++) bufA[i] = rowA[posA + i];
|
||||
for (int i = 0; i < bLen; i++) bufB[i] = rowB[posB + i];
|
||||
|
||||
// 从 rowA 移除 segA,腾出空间插入 segB
|
||||
// 先移除
|
||||
// Remove segA from rowA to make room for segB
|
||||
// Remove first
|
||||
int newSizeA = sizeA - aLen;
|
||||
for (int k = posA; k < newSizeA; k++) rowA[k] = rowA[k + aLen];
|
||||
// 再插入 segB
|
||||
// Then insert segB
|
||||
for (int k = newSizeA - 1; k >= posA; k--) rowA[k + bLen] = rowA[k];
|
||||
for (int i = 0; i < bLen; i++) rowA[posA + i] = bufB[i];
|
||||
sizeA = newSizeA + bLen;
|
||||
|
||||
// 从 rowB 移除 segB,腾出空间插入 segA
|
||||
// Remove segB from rowB to make room for segA
|
||||
int newSizeB = sizeB - bLen;
|
||||
for (int k = posB; k < newSizeB; k++) rowB[k] = rowB[k + bLen];
|
||||
for (int k = newSizeB - 1; k >= posB; k--) rowB[k + aLen] = rowB[k];
|
||||
|
|
@ -264,10 +264,10 @@ __device__ inline void perm_seg_swap(int* rowA, int& sizeA, int posA, int lenA,
|
|||
sizeB = newSizeB + aLen;
|
||||
}
|
||||
|
||||
/// cross_exchange: 两行各取一段互换,保持各自内部顺序
|
||||
/// 与 seg_swap 的区别:seg_swap 是等长互换,cross_exchange 允许不等长
|
||||
/// cross_exchange: swap one segment from each row, preserving internal order each
|
||||
/// Unlike seg_swap: seg_swap is equal-length swap; cross_exchange allows unequal lengths
|
||||
/// rowA[posA..posA+lenA) <-> rowB[posB..posB+lenB)
|
||||
/// 行长变化:sizeA += (lenB - lenA), sizeB += (lenA - lenB)
|
||||
/// Row lengths: sizeA += (lenB - lenA), sizeB += (lenA - lenB)
|
||||
__device__ inline void perm_cross_exchange(int* rowA, int& sizeA, int posA, int lenA,
|
||||
int* rowB, int& sizeB, int posB, int lenB) {
|
||||
int bufA[8], bufB[8];
|
||||
|
|
@ -277,14 +277,14 @@ __device__ inline void perm_cross_exchange(int* rowA, int& sizeA, int posA, int
|
|||
for (int i = 0; i < aLen; i++) bufA[i] = rowA[posA + i];
|
||||
for (int i = 0; i < bLen; i++) bufB[i] = rowB[posB + i];
|
||||
|
||||
// rowA: 移除 segA,插入 segB
|
||||
// rowA: remove segA, insert segB
|
||||
int newSizeA = sizeA - aLen;
|
||||
for (int k = posA; k < newSizeA; k++) rowA[k] = rowA[k + aLen];
|
||||
for (int k = newSizeA - 1; k >= posA; k--) rowA[k + bLen] = rowA[k];
|
||||
for (int i = 0; i < bLen; i++) rowA[posA + i] = bufB[i];
|
||||
sizeA = newSizeA + bLen;
|
||||
|
||||
// rowB: 移除 segB,插入 segA
|
||||
// rowB: remove segB, insert segA
|
||||
int newSizeB = sizeB - bLen;
|
||||
for (int k = posB; k < newSizeB; k++) rowB[k] = rowB[k + bLen];
|
||||
for (int k = newSizeB - 1; k >= posB; k--) rowB[k + aLen] = rowB[k];
|
||||
|
|
@ -292,8 +292,8 @@ __device__ inline void perm_cross_exchange(int* rowA, int& sizeA, int posA, int
|
|||
sizeB = newSizeB + aLen;
|
||||
}
|
||||
|
||||
/// k-bit flip: 同时翻转 k 个随机位(Binary 编码)
|
||||
/// positions 数组存储要翻转的位置,k = 实际翻转数
|
||||
/// k-bit flip: flip k random bits at once (Binary encoding)
|
||||
/// positions array holds indices to flip; k = number of flips
|
||||
__device__ inline void bin_k_flip(int* row, int size, int k, curandState* rng) {
|
||||
for (int i = 0; i < k; i++) {
|
||||
int pos = rand_int(rng, size);
|
||||
|
|
@ -301,12 +301,12 @@ __device__ inline void bin_k_flip(int* row, int size, int k, curandState* rng) {
|
|||
}
|
||||
}
|
||||
|
||||
/// seg_flip: 翻转行内连续 seg_len 个位(Binary 编码)
|
||||
/// seg_flip: flip contiguous seg_len bits within row (Binary encoding)
|
||||
__device__ inline void bin_seg_flip(int* row, int pos, int seg_len) {
|
||||
for (int i = 0; i < seg_len; i++) row[pos + i] = 1 - row[pos + i];
|
||||
}
|
||||
|
||||
/// seg_cross_swap: 两行各取一段互换(Binary 编码,等长)
|
||||
/// seg_cross_swap: swap one segment from each row (Binary encoding, equal length)
|
||||
__device__ inline void bin_seg_cross_swap(int* rowA, int posA,
|
||||
int* rowB, int posB, int seg_len) {
|
||||
for (int i = 0; i < seg_len; i++) {
|
||||
|
|
@ -317,23 +317,23 @@ __device__ inline void bin_seg_cross_swap(int* rowA, int posA,
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// Integer 编码底层操作
|
||||
// Integer encoding primitives
|
||||
// ============================================================
|
||||
|
||||
/// int_clamp: 将值限制在 [lb, ub] 范围内
|
||||
/// int_clamp: clamp value to [lb, ub]
|
||||
__device__ inline int int_clamp(int v, int lb, int ub) {
|
||||
if (v < lb) return lb;
|
||||
if (v > ub) return ub;
|
||||
return v;
|
||||
}
|
||||
|
||||
/// int_random_reset: 随机一个位置重置为 [lb, ub] 内随机值
|
||||
/// int_random_reset: reset one random position to uniform random in [lb, ub]
|
||||
__device__ inline void int_random_reset(int* row, int pos, int lb, int ub,
|
||||
curandState* rng) {
|
||||
row[pos] = lb + (curand(rng) % (ub - lb + 1));
|
||||
}
|
||||
|
||||
/// int_delta: 随机一个位置 ±k(clamp 到 [lb, ub])
|
||||
/// int_delta: random position, add ±k (clamped to [lb, ub])
|
||||
__device__ inline void int_delta(int* row, int pos, int lb, int ub,
|
||||
curandState* rng) {
|
||||
int range = ub - lb + 1;
|
||||
|
|
@ -343,7 +343,7 @@ __device__ inline void int_delta(int* row, int pos, int lb, int ub,
|
|||
row[pos] = int_clamp(row[pos] + step, lb, ub);
|
||||
}
|
||||
|
||||
/// int_seg_reset: 连续 k 个位置全部重置为 [lb, ub] 内随机值
|
||||
/// int_seg_reset: reset k contiguous positions to uniform random in [lb, ub]
|
||||
__device__ inline void int_seg_reset(int* row, int pos, int seg_len,
|
||||
int lb, int ub, curandState* rng) {
|
||||
int range = ub - lb + 1;
|
||||
|
|
@ -351,7 +351,7 @@ __device__ inline void int_seg_reset(int* row, int pos, int seg_len,
|
|||
row[pos + i] = lb + (curand(rng) % range);
|
||||
}
|
||||
|
||||
/// int_k_delta: 随机 k 个位置各自 ±1
|
||||
/// int_k_delta: k random positions, each ±1
|
||||
__device__ inline void int_k_delta(int* row, int size, int k,
|
||||
int lb, int ub, curandState* rng) {
|
||||
for (int i = 0; i < k; i++) {
|
||||
|
|
@ -362,21 +362,21 @@ __device__ inline void int_k_delta(int* row, int size, int k,
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// 第 3 层:行级底层操作
|
||||
// Layer 3: row-level primitives
|
||||
// ============================================================
|
||||
|
||||
/// row_swap: 交换两行的全部内容和长度
|
||||
/// row_swap: swap full contents and lengths of two rows
|
||||
template<typename Sol>
|
||||
__device__ inline void row_swap(Sol& sol, int r1, int r2) {
|
||||
// 交换长度
|
||||
// Swap lengths
|
||||
int tmp_size = sol.dim2_sizes[r1];
|
||||
sol.dim2_sizes[r1] = sol.dim2_sizes[r2];
|
||||
sol.dim2_sizes[r2] = tmp_size;
|
||||
// 交换数据(取两行中较长的长度)
|
||||
// Swap data (use the longer of the two row lengths)
|
||||
int max_len = (sol.dim2_sizes[r1] > sol.dim2_sizes[r2])
|
||||
? sol.dim2_sizes[r1] : sol.dim2_sizes[r2];
|
||||
// 交换后 r1 的长度是原 r2 的,r2 的长度是原 r1 的
|
||||
// 所以需要交换 max(原r1长度, 原r2长度) 个元素
|
||||
// After swap, r1 has old r2 length and r2 has old r1 length
|
||||
// So swap max(old r1 len, old r2 len) elements
|
||||
max_len = (tmp_size > max_len) ? tmp_size : max_len;
|
||||
for (int c = 0; c < max_len; c++) {
|
||||
int tmp = sol.data[r1][c];
|
||||
|
|
@ -385,8 +385,8 @@ __device__ inline void row_swap(Sol& sol, int r1, int r2) {
|
|||
}
|
||||
}
|
||||
|
||||
/// row_reverse: 反转 [r1, r2] 范围内的行排列顺序
|
||||
/// 例如 row_reverse(sol, 1, 4) 把行 1,2,3,4 变成 4,3,2,1
|
||||
/// row_reverse: reverse row order in [r1, r2]
|
||||
/// e.g. row_reverse(sol, 1, 4) turns rows 1,2,3,4 into 4,3,2,1
|
||||
template<typename Sol>
|
||||
__device__ inline void row_reverse_range(Sol& sol, int r1, int r2) {
|
||||
while (r1 < r2) {
|
||||
|
|
@ -395,23 +395,23 @@ __device__ inline void row_reverse_range(Sol& sol, int r1, int r2) {
|
|||
}
|
||||
}
|
||||
|
||||
/// row_split: 将 row 从 split_pos 处拆成两行
|
||||
/// row 保留 [0, split_pos),empty_row 接收 [split_pos, size)
|
||||
/// 要求 empty_row 当前为空或有足够空间
|
||||
/// row_split: split row at split_pos into two rows
|
||||
/// row keeps [0, split_pos), empty_row gets [split_pos, size)
|
||||
/// requires empty_row empty or with enough space
|
||||
template<typename Sol>
|
||||
__device__ inline void row_split(Sol& sol, int row, int empty_row, int split_pos) {
|
||||
int orig_size = sol.dim2_sizes[row];
|
||||
int move_count = orig_size - split_pos;
|
||||
// 复制后半段到 empty_row
|
||||
// Copy tail to empty_row
|
||||
for (int i = 0; i < move_count; i++)
|
||||
sol.data[empty_row][i] = sol.data[row][split_pos + i];
|
||||
sol.dim2_sizes[empty_row] = move_count;
|
||||
sol.dim2_sizes[row] = split_pos;
|
||||
}
|
||||
|
||||
/// row_merge: 将 src_row 的全部内容追加到 dst_row 末尾
|
||||
/// src_row 清空,dst_row 长度增加
|
||||
/// 要求 dst_size + src_size <= DIM2
|
||||
/// row_merge: append full contents of src_row to end of dst_row
|
||||
/// src_row cleared, dst_row length increased
|
||||
/// requires dst_size + src_size <= DIM2
|
||||
template<typename Sol>
|
||||
__device__ inline void row_merge(Sol& sol, int dst_row, int src_row) {
|
||||
int dst_size = sol.dim2_sizes[dst_row];
|
||||
|
|
@ -423,33 +423,33 @@ __device__ inline void row_merge(Sol& sol, int dst_row, int src_row) {
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// 第 4 层:交叉底层操作
|
||||
// Layer 4: crossover primitives
|
||||
// ============================================================
|
||||
//
|
||||
// 排列编码:OX 家族(统一框架)
|
||||
// 核心逻辑:A 中标记一组"保留位置"不动,空位按 B 的全局顺序填充
|
||||
// 三个变体只是"怎么选保留集"不同,填充逻辑完全共享
|
||||
// 天然保证唯一性:从 B 中按序取不在保留集中的元素,不会重复
|
||||
// 行长度不变(= A 的行长度),行边界不变
|
||||
// Permutation encoding: OX family (unified framework)
|
||||
// Core: mark "kept" positions from A; fill gaps in B's global order
|
||||
// Three variants differ only in how the keep set is chosen; fill logic is shared
|
||||
// Uniqueness: take from B in order elements not in keep set, no duplicates
|
||||
// Row lengths unchanged (= A's row lengths), row boundaries unchanged
|
||||
//
|
||||
// Binary 编码:uniform_crossover(逐元素随机选)
|
||||
// Binary encoding: uniform_crossover (random pick per element)
|
||||
//
|
||||
// ============================================================
|
||||
|
||||
// ---- OX 核心填充逻辑 ----
|
||||
// keep[r][c] = true 表示 child[r][c] 保留 A 的值,false 表示空位
|
||||
// 空位按 B 中元素的出现顺序(逐行扫描)填充
|
||||
// 要求:child 已拷贝自 A,dim2_sizes 已设为 A 的行长度
|
||||
// ---- OX core fill logic ----
|
||||
// keep[r][c] = true means child[r][c] keeps A's value; false = gap to fill
|
||||
// Gaps filled in order of appearance of elements in B (row-major scan)
|
||||
// Requires: child copied from A, dim2_sizes set to A's row lengths
|
||||
//
|
||||
// 参数 total_elements: 分区模式下的总元素数,非分区模式下 = 单行长度
|
||||
// 用于确定 B 中扫描的元素范围
|
||||
// total_elements: total elements in partitioned mode; in non-partitioned = single row length
|
||||
// Used to bound the scan range in B
|
||||
|
||||
template<typename Sol>
|
||||
__device__ inline void ox_fill_from_b(Sol& child, const Sol& parentB,
|
||||
const bool* keep_flat,
|
||||
int dim1, int total_elements) {
|
||||
// 统计 A 中保留位置的每个值的出现次数(支持多重集排列)
|
||||
// keep_flat 是按行展平的:keep_flat[r * DIM2 + c]
|
||||
// Count occurrences of each value at kept positions in A (multiset permutations)
|
||||
// keep_flat is row-major flat: keep_flat[r * DIM2 + c]
|
||||
int keep_count[512];
|
||||
for (int i = 0; i < total_elements; i++) keep_count[i] = 0;
|
||||
|
||||
|
|
@ -460,21 +460,21 @@ __device__ inline void ox_fill_from_b(Sol& child, const Sol& parentB,
|
|||
if (v >= 0 && v < total_elements) keep_count[v]++;
|
||||
}
|
||||
|
||||
// 从 B 中按行扫描顺序收集:每个值只取"需要填充"的份数
|
||||
// 标准排列:每个值最多 1 份,多重集:每个值最多 repeat_count 份
|
||||
// Collect from B in row scan order: take only as many of each value as needed to fill
|
||||
// Standard permutation: at most 1 of each value; multiset: up to repeat_count each
|
||||
int fill_buf[512];
|
||||
int fill_count = 0;
|
||||
for (int r = 0; r < dim1; r++)
|
||||
for (int c = 0; c < parentB.dim2_sizes[r]; c++) {
|
||||
int val = parentB.data[r][c];
|
||||
if (val >= 0 && val < total_elements && keep_count[val] > 0) {
|
||||
keep_count[val]--; // 消耗一个保留名额
|
||||
keep_count[val]--; // consume one kept slot
|
||||
} else if (val >= 0 && val < total_elements) {
|
||||
fill_buf[fill_count++] = val;
|
||||
}
|
||||
}
|
||||
|
||||
// 按空位顺序(逐行从左到右)填入
|
||||
// Fill gaps in order (row by row, left to right)
|
||||
int fi = 0;
|
||||
for (int r = 0; r < dim1; r++)
|
||||
for (int c = 0; c < child.dim2_sizes[r]; c++)
|
||||
|
|
@ -482,26 +482,26 @@ __device__ inline void ox_fill_from_b(Sol& child, const Sol& parentB,
|
|||
child.data[r][c] = fill_buf[fi++];
|
||||
}
|
||||
|
||||
// ---- 变体 1: OX-区间 ----
|
||||
// 每行随机选一个连续区间保留,保留邻接关系
|
||||
// ---- Variant 1: OX-interval ----
|
||||
// Per row, random contiguous interval kept; preserves adjacency
|
||||
template<typename Sol>
|
||||
__device__ inline void ox_interval(Sol& child, const Sol& parentA, const Sol& parentB,
|
||||
int dim1, int total_elements, curandState* rng) {
|
||||
bool keep[Sol::DIM1 * Sol::DIM2];
|
||||
for (int i = 0; i < Sol::DIM1 * Sol::DIM2; i++) keep[i] = false;
|
||||
|
||||
// child = A,同时标记每行的保留区间
|
||||
// child = A, mark each row's kept interval
|
||||
for (int r = 0; r < dim1; r++) {
|
||||
int sz = parentA.dim2_sizes[r];
|
||||
child.dim2_sizes[r] = sz;
|
||||
for (int c = 0; c < sz; c++) child.data[r][c] = parentA.data[r][c];
|
||||
|
||||
if (sz < 2) {
|
||||
// 长度 0 或 1:全部保留
|
||||
// length 0 or 1: keep all
|
||||
for (int c = 0; c < sz; c++) keep[r * Sol::DIM2 + c] = true;
|
||||
continue;
|
||||
}
|
||||
// 随机选区间 [lo, hi]
|
||||
// Random interval [lo, hi]
|
||||
int lo = rand_int(rng, sz);
|
||||
int hi = rand_int(rng, sz);
|
||||
if (lo > hi) { int tmp = lo; lo = hi; hi = tmp; }
|
||||
|
|
@ -511,8 +511,8 @@ __device__ inline void ox_interval(Sol& child, const Sol& parentA, const Sol& pa
|
|||
ox_fill_from_b(child, parentB, keep, dim1, total_elements);
|
||||
}
|
||||
|
||||
// ---- 变体 2: OX-子集 ----
|
||||
// 随机选约 50% 的元素值保留其在 A 中的位置,通用性最强
|
||||
// ---- Variant 2: OX-subset ----
|
||||
// Randomly keep ~50% of positions at their A values; most general
|
||||
template<typename Sol>
|
||||
__device__ inline void ox_subset(Sol& child, const Sol& parentA, const Sol& parentB,
|
||||
int dim1, int total_elements, curandState* rng) {
|
||||
|
|
@ -526,7 +526,7 @@ __device__ inline void ox_subset(Sol& child, const Sol& parentA, const Sol& pare
|
|||
child.data[r][c] = parentA.data[r][c];
|
||||
}
|
||||
|
||||
// 每个位置 50% 概率保留
|
||||
// 50% keep per position
|
||||
for (int r = 0; r < dim1; r++)
|
||||
for (int c = 0; c < child.dim2_sizes[r]; c++)
|
||||
keep[r * Sol::DIM2 + c] = (curand_uniform(rng) < 0.5f);
|
||||
|
|
@ -534,9 +534,9 @@ __device__ inline void ox_subset(Sol& child, const Sol& parentA, const Sol& pare
|
|||
ox_fill_from_b(child, parentB, keep, dim1, total_elements);
|
||||
}
|
||||
|
||||
// ---- 变体 3: OX-行 ----
|
||||
// 随机选若干整行保留,其余行的元素全部按 B 的顺序重填
|
||||
// 保留整条路线结构,VRP 受益
|
||||
// ---- Variant 3: OX-row ----
|
||||
// Randomly keep whole rows; refill non-kept rows from B's order
|
||||
// Preserves full route structure; good for VRP
|
||||
template<typename Sol>
|
||||
__device__ inline void ox_row(Sol& child, const Sol& parentA, const Sol& parentB,
|
||||
int dim1, int total_elements, curandState* rng) {
|
||||
|
|
@ -550,7 +550,7 @@ __device__ inline void ox_row(Sol& child, const Sol& parentA, const Sol& parentB
|
|||
child.data[r][c] = parentA.data[r][c];
|
||||
}
|
||||
|
||||
// 每行 50% 概率整行保留
|
||||
// 50% chance to keep whole row
|
||||
int kept = 0;
|
||||
for (int r = 0; r < dim1; r++) {
|
||||
if (curand_uniform(rng) < 0.5f) {
|
||||
|
|
@ -559,14 +559,14 @@ __device__ inline void ox_row(Sol& child, const Sol& parentA, const Sol& parentB
|
|||
kept++;
|
||||
}
|
||||
}
|
||||
// 确保不是全保留或全不保留
|
||||
// Ensure not all-kept or all-unkept
|
||||
if (kept == 0) {
|
||||
int r = rand_int(rng, dim1);
|
||||
// 不标记任何 keep → 全部重填(至少有一行不保留)
|
||||
// 实际上 kept==0 意味着全部重填,这是合法的(child = B 的顺序填入 A 的结构)
|
||||
// No keep marks → full refill (at least one row not kept)
|
||||
// kept==0 means full refill; valid (child gets B's order into A's structure)
|
||||
}
|
||||
if (kept == dim1 && dim1 > 1) {
|
||||
// 全保留 → 随机取消一行
|
||||
// All kept → randomly un-keep one row
|
||||
int r = rand_int(rng, dim1);
|
||||
for (int c = 0; c < child.dim2_sizes[r]; c++)
|
||||
keep[r * Sol::DIM2 + c] = false;
|
||||
|
|
@ -575,14 +575,14 @@ __device__ inline void ox_row(Sol& child, const Sol& parentA, const Sol& parentB
|
|||
ox_fill_from_b(child, parentB, keep, dim1, total_elements);
|
||||
}
|
||||
|
||||
// ---- OX 统一入口 ----
|
||||
// 随机选一个变体执行
|
||||
// dim1==1 时只用区间和子集(行变体无意义)
|
||||
// ---- OX unified entry ----
|
||||
// Pick one variant at random
|
||||
// When dim1==1 use only interval and subset (row variant useless)
|
||||
template<typename Sol>
|
||||
__device__ inline void perm_ox_crossover(Sol& child, const Sol& parentA, const Sol& parentB,
|
||||
int dim1, int total_elements, curandState* rng) {
|
||||
int n_variants = (dim1 > 1) ? 3 : 2;
|
||||
int variant = rand_int(rng, n_variants); // 0: 区间, 1: 子集, [2: 行]
|
||||
int variant = rand_int(rng, n_variants); // 0: interval, 1: subset, [2: row]
|
||||
switch (variant) {
|
||||
case 0: ox_interval(child, parentA, parentB, dim1, total_elements, rng); break;
|
||||
case 1: ox_subset(child, parentA, parentB, dim1, total_elements, rng); break;
|
||||
|
|
@ -590,8 +590,8 @@ __device__ inline void perm_ox_crossover(Sol& child, const Sol& parentA, const S
|
|||
}
|
||||
}
|
||||
|
||||
/// uniform_crossover: 逐元素从两个父代中随机选择
|
||||
/// 适用于 Binary 编码(不破坏排列约束)
|
||||
/// uniform_crossover: random parent choice per element
|
||||
/// Suitable for Binary encoding (does not break permutation constraints)
|
||||
template<typename Sol>
|
||||
__device__ inline void uniform_crossover(Sol& child, const Sol& parentA, const Sol& parentB,
|
||||
int dim1, curandState* rng) {
|
||||
|
|
@ -607,15 +607,15 @@ __device__ inline void uniform_crossover(Sol& child, const Sol& parentA, const S
|
|||
}
|
||||
}
|
||||
|
||||
// [已删除] generate_move_for_seq / sample_and_generate / apply_move / undo_move
|
||||
// P0 重构后主路径统一使用 execute_sequence,旧的 Move 生成+应用+撤销路径不再需要
|
||||
// [removed] generate_move_for_seq / sample_and_generate / apply_move / undo_move
|
||||
// After P0 refactor the main path uses execute_sequence; old Move gen/apply/undo path removed
|
||||
|
||||
// ============================================================
|
||||
// execute_sequence — 统一接口:生成参数并直接执行(不返回 Move)
|
||||
// execute_sequence — unified API: generate params and execute directly (no Move returned)
|
||||
// ============================================================
|
||||
// 返回 true 若 sol 被修改,false 若 NOOP
|
||||
// d_G, d_O, rel_N: 可选的关系矩阵指针(SEQ_LNS_GUIDED_REBUILD 使用)
|
||||
// val_lb, val_ub: Integer 编码的值域范围(其他编码忽略)
|
||||
// Returns true if sol modified, false if NOOP
|
||||
// d_G, d_O, rel_N: optional relation matrices (for SEQ_LNS_GUIDED_REBUILD)
|
||||
// val_lb, val_ub: Integer encoding value range (ignored for other encodings)
|
||||
|
||||
template<typename Sol>
|
||||
__device__ inline bool execute_sequence(int seq_id, Sol& sol, int dim1,
|
||||
|
|
@ -627,7 +627,7 @@ __device__ inline bool execute_sequence(int seq_id, Sol& sol, int dim1,
|
|||
int val_ub = 1,
|
||||
const void* prob_data = nullptr) {
|
||||
// ============================================================
|
||||
// Permutation 序列
|
||||
// Permutation sequences
|
||||
// ============================================================
|
||||
if (encoding == EncodingType::Permutation) {
|
||||
switch (seq_id) {
|
||||
|
|
@ -841,15 +841,15 @@ __device__ inline bool execute_sequence(int seq_id, Sol& sol, int dim1,
|
|||
return true;
|
||||
}
|
||||
case seq::SEQ_LNS_GUIDED_REBUILD: {
|
||||
// 关系矩阵引导重建:
|
||||
// 1. 随机选种子元素 seed
|
||||
// 2. 查 G[seed] 找分组倾向最强的 K 个元素
|
||||
// 3. 在解中找到这些元素的位置
|
||||
// 4. 按 O 矩阵引导的顺序重排这些位置的元素
|
||||
// Relation-matrix guided rebuild:
|
||||
// 1. Pick random seed element seed
|
||||
// 2. Look up G[seed] for K elements with strongest grouping affinity
|
||||
// 3. Find positions of these elements in the solution
|
||||
// 4. Reorder these positions by order guided by O matrix
|
||||
//
|
||||
// 如果没有关系矩阵(冷启动),退化为 scatter_shuffle
|
||||
// Without relation matrices (cold start), fall back to scatter_shuffle
|
||||
if (!d_G || !d_O || rel_N <= 0) {
|
||||
// 退化:随机 scatter shuffle
|
||||
// Fallback: random scatter shuffle
|
||||
int row = (dim1 > 1) ? rand_int(rng, dim1) : 0;
|
||||
int sz = sol.dim2_sizes[row];
|
||||
if (sz < 4) return false;
|
||||
|
|
@ -872,21 +872,21 @@ __device__ inline bool execute_sequence(int seq_id, Sol& sol, int dim1,
|
|||
return true;
|
||||
}
|
||||
|
||||
// --- 有关系矩阵:引导重建 ---
|
||||
// 通用策略(不感知问题类型):
|
||||
// G 矩阵 → 选哪些元素(分组倾向弱的 = 可能放错位置的)
|
||||
// O 矩阵 → 怎么排(排序倾向引导重排顺序)
|
||||
// 两者协同:G 选人,O 排序
|
||||
// --- With relation matrices: guided rebuild ---
|
||||
// Generic strategy (problem-agnostic):
|
||||
// G matrix → which elements (weak grouping with seed = likely misplaced)
|
||||
// O matrix → how to order (ordering affinity guides reorder)
|
||||
// Together: G picks, O orders
|
||||
int row = (dim1 > 1) ? rand_int(rng, dim1) : 0;
|
||||
int sz = sol.dim2_sizes[row];
|
||||
if (sz < 4) return false;
|
||||
|
||||
// 选种子元素
|
||||
// Pick seed element
|
||||
int seed_pos = rand_int(rng, sz);
|
||||
int seed_val = sol.data[row][seed_pos];
|
||||
if (seed_val < 0 || seed_val >= rel_N) return false;
|
||||
|
||||
// 检查矩阵是否有足够信息(G 和 O 任一有信号即可)
|
||||
// Check matrices have enough signal (either G or O)
|
||||
float max_signal = 0.0f;
|
||||
for (int c = 0; c < sz; c++) {
|
||||
int v = sol.data[row][c];
|
||||
|
|
@ -897,11 +897,11 @@ __device__ inline bool execute_sequence(int seq_id, Sol& sol, int dim1,
|
|||
if (o > max_signal) max_signal = o;
|
||||
}
|
||||
}
|
||||
if (max_signal < 0.05f) return false; // 信息不足,跳过
|
||||
if (max_signal < 0.05f) return false; // insufficient signal, skip
|
||||
|
||||
// 破坏:锦标赛选择 G 值低的元素(t=2)
|
||||
// G 值低 = 与 seed 分组倾向弱 = 可能放错位置
|
||||
// 锦标赛:随机抽 2 个,取 G 值更低的那个,重复 count 次
|
||||
// Destroy: tournament pick low-G elements (t=2)
|
||||
// Low G = weak grouping with seed = likely misplaced
|
||||
// Tournament: draw 2 at random, take lower G, repeat count times
|
||||
constexpr int MAX_REBUILD = 10;
|
||||
constexpr int TOUR_SIZE = 2;
|
||||
int count = sz / 5; // ~20%
|
||||
|
|
@ -911,12 +911,12 @@ __device__ inline bool execute_sequence(int seq_id, Sol& sol, int dim1,
|
|||
|
||||
int sel_pos[MAX_REBUILD];
|
||||
int sel_val[MAX_REBUILD];
|
||||
bool used[128] = {}; // 标记已选位置,防止重复
|
||||
bool used[128] = {}; // mark chosen positions to avoid duplicates
|
||||
int picked = 0;
|
||||
int max_attempts = count * 4; // 防止死循环
|
||||
int max_attempts = count * 4; // avoid infinite loop
|
||||
|
||||
for (int attempt = 0; attempt < max_attempts && picked < count; attempt++) {
|
||||
// 锦标赛:随机抽 TOUR_SIZE 个候选,取 G 值最低的
|
||||
// Tournament: draw TOUR_SIZE candidates at random, take lowest G
|
||||
int best_c = -1;
|
||||
float best_g = 1e30f;
|
||||
for (int t = 0; t < TOUR_SIZE; t++) {
|
||||
|
|
@ -936,15 +936,15 @@ __device__ inline bool execute_sequence(int seq_id, Sol& sol, int dim1,
|
|||
if (picked < 2) return false;
|
||||
count = picked;
|
||||
|
||||
// 修复:锦标赛排序(O 矩阵引导 + 随机扰动)
|
||||
// 插入排序,比较时加噪声实现概率性:O 值高的大概率排前面,但不绝对
|
||||
// Repair: tournament sort (O-guided + random noise)
|
||||
// Insertion sort with noisy comparison: high O tends to go first, not guaranteed
|
||||
for (int i = 1; i < count; i++) {
|
||||
int key = sel_val[i];
|
||||
int j = i - 1;
|
||||
while (j >= 0) {
|
||||
float o_key_before = d_O[key * rel_N + sel_val[j]];
|
||||
float o_j_before = d_O[sel_val[j] * rel_N + key];
|
||||
// 噪声幅度 0.05:O 值差距 >0.05 时基本确定,<0.05 时随机
|
||||
// Noise scale 0.05: if O gap >0.05 mostly deterministic, else random
|
||||
float noise = (curand_uniform(rng) - 0.5f) * 0.1f;
|
||||
if (o_key_before + noise > o_j_before) {
|
||||
sel_val[j + 1] = sel_val[j];
|
||||
|
|
@ -956,7 +956,7 @@ __device__ inline bool execute_sequence(int seq_id, Sol& sol, int dim1,
|
|||
sel_val[j + 1] = key;
|
||||
}
|
||||
|
||||
// 对 sel_pos 排序(升序),使写回位置有序
|
||||
// Sort sel_pos ascending so write-back order is stable
|
||||
for (int i = 1; i < count; i++) {
|
||||
int key = sel_pos[i];
|
||||
int j = i - 1;
|
||||
|
|
@ -967,7 +967,7 @@ __device__ inline bool execute_sequence(int seq_id, Sol& sol, int dim1,
|
|||
sel_pos[j + 1] = key;
|
||||
}
|
||||
|
||||
// 检查是否真的改变了排列
|
||||
// Check whether permutation actually changed
|
||||
bool any_change = false;
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (sol.data[row][sel_pos[i]] != sel_val[i]) {
|
||||
|
|
@ -977,7 +977,7 @@ __device__ inline bool execute_sequence(int seq_id, Sol& sol, int dim1,
|
|||
}
|
||||
if (!any_change) return false;
|
||||
|
||||
// 写回
|
||||
// Write back
|
||||
for (int i = 0; i < count; i++) {
|
||||
sol.data[row][sel_pos[i]] = sel_val[i];
|
||||
}
|
||||
|
|
@ -989,7 +989,7 @@ __device__ inline bool execute_sequence(int seq_id, Sol& sol, int dim1,
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// Binary 序列
|
||||
// Binary sequences
|
||||
// ============================================================
|
||||
if (encoding == EncodingType::Binary) {
|
||||
switch (seq_id) {
|
||||
|
|
@ -1063,7 +1063,7 @@ __device__ inline bool execute_sequence(int seq_id, Sol& sol, int dim1,
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// Integer 序列
|
||||
// Integer sequences
|
||||
// ============================================================
|
||||
if (encoding == EncodingType::Integer) {
|
||||
switch (seq_id) {
|
||||
|
|
@ -1131,7 +1131,7 @@ __device__ inline bool execute_sequence(int seq_id, Sol& sol, int dim1,
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// 共享:行级序列(编码无关)
|
||||
// Shared: row-level sequences (encoding-agnostic)
|
||||
// ============================================================
|
||||
switch (seq_id) {
|
||||
case seq::SEQ_ROW_SWAP: {
|
||||
|
|
@ -1194,11 +1194,11 @@ __device__ inline bool execute_sequence(int seq_id, Sol& sol, int dim1,
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// sample_and_execute — 从 SeqRegistry 按权重采样 + 直接执行
|
||||
// sample_and_execute — sample from SeqRegistry by weight and execute directly
|
||||
// ============================================================
|
||||
// 返回 true 若 sol 被修改,false 若 NOOP
|
||||
// 输出参数 out_seq_idx:采样到的序列在 registry 中的索引
|
||||
// d_G, d_O, rel_N: 可选的关系矩阵(传递给 execute_sequence)
|
||||
// Returns true if sol modified, false if NOOP
|
||||
// out_seq_idx: index of sampled sequence in registry
|
||||
// d_G, d_O, rel_N: optional relation matrices (passed to execute_sequence)
|
||||
|
||||
template<typename Sol>
|
||||
__device__ inline bool sample_and_execute(const SeqRegistry& reg,
|
||||
|
|
@ -1212,7 +1212,7 @@ __device__ inline bool sample_and_execute(const SeqRegistry& reg,
|
|||
int val_lb = 0,
|
||||
int val_ub = 1,
|
||||
const void* prob_data = nullptr) {
|
||||
// 延迟归一化:使用缓存的 weights_sum
|
||||
// Lazy normalization: use cached weights_sum
|
||||
float r = curand_uniform(rng) * reg.weights_sum; // r ∈ [0, weights_sum)
|
||||
float cumsum = 0.0f;
|
||||
out_seq_idx = reg.count - 1;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
/**
|
||||
* population.cuh - 种群管理
|
||||
* population.cuh - Population management
|
||||
*
|
||||
* v2.0: Block 级架构
|
||||
* - RNG 数组大小 = pop_size * block_size(每个 block 内每个线程独立 RNG)
|
||||
* - 初始化 kernel 保持 1-thread-per-solution(初始化只做一次,不需要并行)
|
||||
* - find_best_kernel 保持单线程(种群规模不大)
|
||||
* v2.0: Block-level architecture
|
||||
* - RNG array size = pop_size * block_size (one independent RNG per thread within each block)
|
||||
* - Init kernel stays 1-thread-per-solution (initialization runs once; parallelism not needed)
|
||||
* - find_best_kernel remains single-threaded (population size is modest)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -12,7 +12,7 @@
|
|||
#include "cuda_utils.cuh"
|
||||
|
||||
// ============================================================
|
||||
// Device 端 Kernel(模板化)
|
||||
// Device-side kernels (templated)
|
||||
// ============================================================
|
||||
|
||||
template<typename Sol>
|
||||
|
|
@ -65,9 +65,9 @@ __global__ void init_integer_kernel(Sol* pop, int pop_size,
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// 多重集排列初始化 — 每个值 [0, N) 重复 R 次,总长度 N*R
|
||||
// Multiset permutation init — each value in [0, N) repeated R times, total length N*R
|
||||
// ============================================================
|
||||
// 用于 JSP 工序排列编码:N=num_jobs, R=num_ops,值 j 出现 R 次表示工件 j
|
||||
// For JSP operation-sequence encoding: N=num_jobs, R=num_ops; value j appearing R times means job j
|
||||
|
||||
template<typename Sol>
|
||||
__global__ void init_multiset_perm_kernel(Sol* pop, int pop_size,
|
||||
|
|
@ -90,7 +90,7 @@ __global__ void init_multiset_perm_kernel(Sol* pop, int pop_size,
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// 分区初始化 — 元素 {0..total_elements-1} 不重复分配到 dim1 行
|
||||
// Partition init — elements {0..total_elements-1} assigned without duplication across dim1 rows
|
||||
// ============================================================
|
||||
|
||||
template<typename Sol>
|
||||
|
|
@ -131,21 +131,21 @@ __global__ void find_best_kernel(const Sol* pop, int pop_size,
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// Host 端 RAII 类(模板化)
|
||||
// Host-side RAII class (templated)
|
||||
// ============================================================
|
||||
|
||||
template<typename Sol>
|
||||
class Population {
|
||||
public:
|
||||
Sol* d_solutions = nullptr;
|
||||
curandState* d_rng_states = nullptr; // 大小 = pop_size * block_size
|
||||
curandState* d_rng_states = nullptr; // size = pop_size * block_size
|
||||
int size = 0;
|
||||
int rng_count = 0; // RNG 状态总数
|
||||
int rng_count = 0; // total RNG states
|
||||
|
||||
Population() = default;
|
||||
|
||||
// block_size: Block 级架构下每个 block 的线程数
|
||||
// RNG 数组大小 = pop_size * block_size(每个 block 内每个线程独立 RNG)
|
||||
// block_size: threads per block under block-level architecture
|
||||
// RNG array size = pop_size * block_size (one independent RNG per thread within each block)
|
||||
void allocate(int pop_size, int block_size = 128) {
|
||||
size = pop_size;
|
||||
rng_count = pop_size * block_size;
|
||||
|
|
|
|||
|
|
@ -1,20 +1,20 @@
|
|||
/**
|
||||
* relation_matrix.cuh - G/O 关系矩阵管理
|
||||
* relation_matrix.cuh - G/O relation matrix management
|
||||
*
|
||||
* G[i][j]: 分组倾向(元素 i 和 j 应在同一行的倾向,对称)
|
||||
* O[i][j]: 排序倾向(元素 i 应排在 j 前面的倾向,不对称)
|
||||
* G[i][j]: grouping affinity (tendency for elements i and j to be on the same row; symmetric)
|
||||
* O[i][j]: ordering affinity (tendency for element i to appear before j; asymmetric)
|
||||
*
|
||||
* 更新来源:历史最优解统计
|
||||
* 每当 host 端获取到当前 best 解,扫描所有元素对关系:
|
||||
* - 同行 → G[i][j] 增强
|
||||
* - i 在 j 前 → O[i][j] 增强
|
||||
* 使用 EMA 衰减:M[i][j] = α * M[i][j] + (1-α) * signal
|
||||
* Update source: statistics from historical best solutions
|
||||
* Whenever the host obtains the current best solution, scan all element-pair relations:
|
||||
* - Same row → strengthen G[i][j]
|
||||
* - i before j → strengthen O[i][j]
|
||||
* EMA decay: M[i][j] = α * M[i][j] + (1-α) * signal
|
||||
*
|
||||
* 生命周期:
|
||||
* 1. relation_matrix_create(N) — 分配 host/device 内存,初始化为 0
|
||||
* 2. relation_matrix_update(rm, sol, dim1) — 从一个解更新 G/O(host 端)
|
||||
* 3. relation_matrix_upload(rm) — 上传 h_G/h_O 到 d_G/d_O
|
||||
* 4. relation_matrix_destroy(rm) — 释放内存
|
||||
* Lifecycle:
|
||||
* 1. relation_matrix_create(N) — allocate host/device memory, initialize to 0
|
||||
* 2. relation_matrix_update(rm, sol, dim1) — update G/O from one solution (host)
|
||||
* 3. relation_matrix_upload(rm) — upload h_G/h_O to d_G/d_O
|
||||
* 4. relation_matrix_destroy(rm) — free memory
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -23,7 +23,7 @@
|
|||
#include <cstring>
|
||||
|
||||
// ============================================================
|
||||
// 创建 / 销毁
|
||||
// Create / destroy
|
||||
// ============================================================
|
||||
|
||||
inline RelationMatrix relation_matrix_create(int N, float decay = 0.95f) {
|
||||
|
|
@ -58,19 +58,19 @@ inline void relation_matrix_destroy(RelationMatrix& rm) {
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// 从一个解更新 G/O(host 端)
|
||||
// Update G/O from one solution (host)
|
||||
// ============================================================
|
||||
// sol: 当前最优解(已下载到 host)
|
||||
// dim1: 实际使用的行数
|
||||
// sol: current best solution (already copied to host)
|
||||
// dim1: number of rows in use
|
||||
//
|
||||
// 逻辑:
|
||||
// 对 sol 中每对元素 (val_a, val_b):
|
||||
// 如果在同一行 → G[val_a][val_b] 增强
|
||||
// 如果 val_a 在 val_b 前面 → O[val_a][val_b] 增强
|
||||
// Logic:
|
||||
// For each pair (val_a, val_b) in sol:
|
||||
// If on the same row → strengthen G[val_a][val_b]
|
||||
// If val_a appears before val_b → strengthen O[val_a][val_b]
|
||||
//
|
||||
// 注意:元素值 val 必须在 [0, N) 范围内才有意义
|
||||
// 对于 partition 编码(VRP),元素值就是客户编号
|
||||
// 对于单行排列(TSP),元素值就是城市编号
|
||||
// Note: element values val are meaningful only in [0, N)
|
||||
// For partition encoding (VRP), values are customer IDs
|
||||
// For single-row permutation (TSP), values are city IDs
|
||||
|
||||
template<typename Sol>
|
||||
void relation_matrix_update(RelationMatrix& rm, const Sol& sol, int dim1) {
|
||||
|
|
@ -78,13 +78,13 @@ void relation_matrix_update(RelationMatrix& rm, const Sol& sol, int dim1) {
|
|||
float alpha = rm.decay;
|
||||
float signal_strength = 1.0f;
|
||||
|
||||
// 衰减所有现有值
|
||||
// Decay all existing values
|
||||
for (int i = 0; i < N * N; i++) {
|
||||
rm.h_G[i] *= alpha;
|
||||
rm.h_O[i] *= alpha;
|
||||
}
|
||||
|
||||
// 扫描解中的元素对关系
|
||||
// Scan element-pair relations in the solution
|
||||
for (int r = 0; r < dim1; r++) {
|
||||
int sz = sol.dim2_sizes[r];
|
||||
for (int c1 = 0; c1 < sz; c1++) {
|
||||
|
|
@ -95,17 +95,17 @@ void relation_matrix_update(RelationMatrix& rm, const Sol& sol, int dim1) {
|
|||
int val_b = sol.data[r][c2];
|
||||
if (val_b < 0 || val_b >= N) continue;
|
||||
|
||||
// 同行 → G 增强(对称)
|
||||
// Same row → strengthen G (symmetric)
|
||||
rm.h_G[val_a * N + val_b] += (1.0f - alpha) * signal_strength;
|
||||
rm.h_G[val_b * N + val_a] += (1.0f - alpha) * signal_strength;
|
||||
|
||||
// val_a 在 val_b 前 → O[val_a][val_b] 增强
|
||||
// val_a before val_b → strengthen O[val_a][val_b]
|
||||
rm.h_O[val_a * N + val_b] += (1.0f - alpha) * signal_strength;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 裁剪到 [0, 1]
|
||||
// Clamp to [0, 1]
|
||||
for (int i = 0; i < N * N; i++) {
|
||||
if (rm.h_G[i] > 1.0f) rm.h_G[i] = 1.0f;
|
||||
if (rm.h_O[i] > 1.0f) rm.h_O[i] = 1.0f;
|
||||
|
|
@ -115,7 +115,7 @@ void relation_matrix_update(RelationMatrix& rm, const Sol& sol, int dim1) {
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// 上传到 GPU
|
||||
// Upload to GPU
|
||||
// ============================================================
|
||||
|
||||
inline void relation_matrix_upload(const RelationMatrix& rm) {
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,38 +1,39 @@
|
|||
/**
|
||||
* types.cuh - 核心类型定义
|
||||
* types.cuh - Core type definitions
|
||||
*
|
||||
* 包含:编码类型、Solution 模板、ProblemConfig/SolverConfig、
|
||||
* SeqRegistry(AOS 序列级权重)、KStepConfig(多步执行)、
|
||||
* RelationMatrix(G/O 关系矩阵)、ProblemBase(CRTP 基类)
|
||||
* Contains: encoding types, Solution template, ProblemConfig/SolverConfig,
|
||||
* SeqRegistry (AOS sequence-level weights), KStepConfig (multi-step execution),
|
||||
* RelationMatrix (G/O relation matrix), ProblemBase (CRTP base class)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdio>
|
||||
#include "cuda_utils.cuh"
|
||||
|
||||
// ============================================================
|
||||
// 编译时常量
|
||||
// Compile-time constants
|
||||
// ============================================================
|
||||
constexpr int MAX_OBJ = 4; // 最多 4 个目标(16字节,不值得模板化)
|
||||
constexpr int MAX_SEQ = 32; // 最大序列数(内置 ~16 + 自定义算子 ≤8,留余量)
|
||||
constexpr int MAX_K = 3; // 多步执行的最大步数(K=1,2,3)
|
||||
// AOS 权重上下限(归一化后)
|
||||
constexpr float AOS_WEIGHT_FLOOR = 0.05f; // 最低权重保底(确保充分探索)
|
||||
constexpr float AOS_WEIGHT_CAP = 0.35f; // 最高权重上限(防止赢者通吃)
|
||||
constexpr int MAX_OBJ = 4; // Max 4 objectives (16 bytes, not worth templatizing)
|
||||
constexpr int MAX_SEQ = 32; // Max sequences (built-in ~16 + custom ops ≤8, with margin)
|
||||
constexpr int MAX_K = 3; // Max steps for multi-step execution (K=1,2,3)
|
||||
// AOS weight bounds
|
||||
constexpr float AOS_WEIGHT_FLOOR = 0.05f; // Minimum weight floor (ensures sufficient exploration)
|
||||
constexpr float AOS_WEIGHT_CAP = 0.35f; // Maximum weight cap (prevents winner-take-all)
|
||||
|
||||
// ============================================================
|
||||
// 枚举类型
|
||||
// Enum types
|
||||
// ============================================================
|
||||
|
||||
enum class EncodingType {
|
||||
Permutation, // 排列:元素不重复
|
||||
Binary, // 0-1:flip 是主要算子
|
||||
Integer // 有界整数
|
||||
Permutation, // Permutation: elements are unique
|
||||
Binary, // 0-1: flip is the main operator
|
||||
Integer // Bounded integers
|
||||
};
|
||||
|
||||
enum class RowMode {
|
||||
Single, // dim1=1,单行(TSP/QAP/Knapsack 等大部分问题)
|
||||
Fixed, // dim1>1,行等长不可变(JSP-Int/Schedule,禁止 SPLIT/MERGE)
|
||||
Partition // dim1>1,元素分区到各行,行长可变(CVRP/VRPTW)
|
||||
Single, // dim1=1, single row (most problems: TSP/QAP/Knapsack, etc.)
|
||||
Fixed, // dim1>1, equal row lengths fixed (JSP-Int/Schedule; SPLIT/MERGE disallowed)
|
||||
Partition // dim1>1, elements partitioned across rows, variable row lengths (CVRP/VRPTW)
|
||||
};
|
||||
|
||||
enum class ObjDir {
|
||||
|
|
@ -40,241 +41,235 @@ enum class ObjDir {
|
|||
Maximize
|
||||
};
|
||||
|
||||
// 多目标比较模式
|
||||
// Multi-objective comparison mode
|
||||
enum class CompareMode {
|
||||
Weighted, // 加权求和:sum(weight[i] * obj[i]),越小越好
|
||||
Lexicographic // 字典法:按优先级逐目标比较,前面的目标优先
|
||||
Weighted, // Weighted sum: sum(weight[i] * obj[i]), lower is better
|
||||
Lexicographic // Lexicographic: compare objectives by priority order
|
||||
};
|
||||
|
||||
enum class MigrateStrategy {
|
||||
Ring, // 环形:各岛最优→邻岛最差(慢传播,高多样性)
|
||||
TopN, // 全局 Top-N 轮转分发(快传播,强收敛)
|
||||
Hybrid // 两者兼顾:Top-N 替换最差 + Ring 替换次差
|
||||
Ring, // Ring: each island's best → neighbor's worst (slow spread, high diversity)
|
||||
TopN, // Global Top-N round-robin (fast spread, strong convergence)
|
||||
Hybrid // Hybrid: Top-N replaces worst + Ring replaces second-worst
|
||||
};
|
||||
|
||||
// v5.0: 多 GPU 协同 — 解注入模式
|
||||
// v5.0: multi-GPU coordination — solution injection mode
|
||||
enum class MultiGpuInjectMode {
|
||||
OneIsland, // 注入到 1 个岛的 worst(保守,保持多样性)
|
||||
HalfIslands, // 注入到 num_islands/2 个岛的 worst(平衡)
|
||||
AllIslands // 注入到所有岛的 worst(激进,快速传播)
|
||||
OneIsland, // Inject into worst of 1 island (conservative, preserves diversity)
|
||||
HalfIslands, // Inject into worst on num_islands/2 islands (balanced)
|
||||
AllIslands // Inject into worst on all islands (aggressive, fast spread)
|
||||
};
|
||||
|
||||
// v5.0 方案 B3: InjectBuffer — 被动注入缓冲区
|
||||
// GPU 无感知,CPU 同步写入,GPU 在 migrate_kernel 中检查并应用
|
||||
// 设计要点:
|
||||
// 1. 使用同步 cudaMemcpy 避免与 solve() 的 stream/Graph 冲突
|
||||
// 2. 写入顺序:先 solution 后 flag,GPU 端原子读 flag 确保一致性
|
||||
// 3. 完全解耦:不依赖 solve() 的任何内部状态
|
||||
// v5.0 option B3: InjectBuffer — passive injection buffer
|
||||
// GPU has no awareness; CPU writes synchronously; GPU checks and applies in migrate_kernel
|
||||
// Design notes:
|
||||
// 1. Use synchronous cudaMemcpy to avoid conflicts with solve() stream/Graph
|
||||
// 2. Write order: solution first, then flag; GPU atomic flag read ensures consistency
|
||||
// 3. Fully decoupled: does not depend on any internal state of solve()
|
||||
template<typename Sol>
|
||||
struct InjectBuffer {
|
||||
Sol* d_solution; // Device 端解缓冲区(单个解)
|
||||
int* d_flag; // Device 端标志位:0=空,1=有新解
|
||||
Sol* d_solution = nullptr; // Device solution buffer (single solution)
|
||||
int* d_flag = nullptr; // Device flag: 0=empty, 1=new solution
|
||||
int owner_gpu = 0; // GPU that owns the allocation
|
||||
|
||||
// 分配 InjectBuffer(在指定 GPU 上)
|
||||
// Allocate InjectBuffer (on given GPU)
|
||||
static InjectBuffer<Sol> allocate(int gpu_id) {
|
||||
InjectBuffer<Sol> buf;
|
||||
buf.owner_gpu = gpu_id;
|
||||
|
||||
// 保存原设备,切换到目标 GPU
|
||||
int orig_device;
|
||||
cudaGetDevice(&orig_device);
|
||||
cudaSetDevice(gpu_id);
|
||||
CUDA_CHECK(cudaGetDevice(&orig_device));
|
||||
CUDA_CHECK(cudaSetDevice(gpu_id));
|
||||
|
||||
// 分配设备内存
|
||||
cudaMalloc(&buf.d_solution, sizeof(Sol));
|
||||
cudaMalloc(&buf.d_flag, sizeof(int));
|
||||
CUDA_CHECK(cudaMalloc(&buf.d_solution, sizeof(Sol)));
|
||||
CUDA_CHECK(cudaMalloc(&buf.d_flag, sizeof(int)));
|
||||
|
||||
// 初始化 flag 为 0
|
||||
int zero = 0;
|
||||
cudaMemcpy(buf.d_flag, &zero, sizeof(int), cudaMemcpyHostToDevice);
|
||||
CUDA_CHECK(cudaMemcpy(buf.d_flag, &zero, sizeof(int), cudaMemcpyHostToDevice));
|
||||
|
||||
// 恢复原设备
|
||||
cudaSetDevice(orig_device);
|
||||
CUDA_CHECK(cudaSetDevice(orig_device));
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
// 释放 InjectBuffer
|
||||
// Free InjectBuffer (switches to owner GPU before freeing)
|
||||
void destroy() {
|
||||
if (d_solution) {
|
||||
cudaFree(d_solution);
|
||||
d_solution = nullptr;
|
||||
}
|
||||
if (d_flag) {
|
||||
cudaFree(d_flag);
|
||||
d_flag = nullptr;
|
||||
if (d_solution || d_flag) {
|
||||
int orig_device;
|
||||
cudaGetDevice(&orig_device);
|
||||
cudaSetDevice(owner_gpu);
|
||||
if (d_solution) { cudaFree(d_solution); d_solution = nullptr; }
|
||||
if (d_flag) { cudaFree(d_flag); d_flag = nullptr; }
|
||||
cudaSetDevice(orig_device);
|
||||
}
|
||||
}
|
||||
|
||||
// CPU 端写入新解
|
||||
// 注意:使用同步 cudaMemcpy 避免与 solve() 的 stream 冲突
|
||||
// 顺序:先写 solution,再写 flag(GPU 端原子读 flag 确保不会读到半写状态)
|
||||
// CPU-side write of new solution
|
||||
// Note: synchronous cudaMemcpy avoids stream conflicts with solve()
|
||||
// Order: write solution first, then flag (GPU atomic flag read avoids half-written reads)
|
||||
void write_sync(const Sol& sol, int target_gpu) {
|
||||
// 保存原设备,切换到目标 GPU
|
||||
int orig_device;
|
||||
cudaGetDevice(&orig_device);
|
||||
cudaSetDevice(target_gpu);
|
||||
CUDA_CHECK(cudaGetDevice(&orig_device));
|
||||
CUDA_CHECK(cudaSetDevice(target_gpu));
|
||||
|
||||
// 先写解数据
|
||||
cudaMemcpy(d_solution, &sol, sizeof(Sol), cudaMemcpyHostToDevice);
|
||||
// 再写标志位(确保解数据已写完)
|
||||
CUDA_CHECK(cudaMemcpy(d_solution, &sol, sizeof(Sol), cudaMemcpyHostToDevice));
|
||||
int flag = 1;
|
||||
cudaMemcpy(d_flag, &flag, sizeof(int), cudaMemcpyHostToDevice);
|
||||
CUDA_CHECK(cudaMemcpy(d_flag, &flag, sizeof(int), cudaMemcpyHostToDevice));
|
||||
|
||||
// 恢复原设备
|
||||
cudaSetDevice(orig_device);
|
||||
CUDA_CHECK(cudaSetDevice(orig_device));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// ============================================================
|
||||
// SeqID — 统一的 OperationSequence 编号
|
||||
// SeqID — unified OperationSequence IDs
|
||||
// ============================================================
|
||||
// 每个 SeqID 对应一种具体的搜索操作(原子或多步)
|
||||
// AOS 权重跟踪粒度 = SeqID(每个序列独立权重)
|
||||
// Each SeqID maps to one concrete search operation (atomic or multi-step)
|
||||
// AOS weight granularity = SeqID (independent weight per sequence)
|
||||
//
|
||||
// 命名规则:SEQ_{编码}_{操作名}
|
||||
// 跨编码共享的行级操作统一编号
|
||||
// Naming: SEQ_{encoding}_{operation}
|
||||
// Row-level ops shared across encodings use unified numbering
|
||||
|
||||
namespace seq {
|
||||
|
||||
// --- Permutation 行内(元素级)---
|
||||
constexpr int SEQ_PERM_SWAP = 0; // swap 两个位置
|
||||
constexpr int SEQ_PERM_REVERSE = 1; // 2-opt(反转区间)
|
||||
constexpr int SEQ_PERM_INSERT = 2; // insert(移动到新位置)
|
||||
constexpr int SEQ_PERM_3OPT = 3; // 3-opt(断 3 边重连)
|
||||
// --- Permutation in-row (element-level) ---
|
||||
constexpr int SEQ_PERM_SWAP = 0; // swap two positions
|
||||
constexpr int SEQ_PERM_REVERSE = 1; // 2-opt (reverse segment)
|
||||
constexpr int SEQ_PERM_INSERT = 2; // insert (move to new position)
|
||||
constexpr int SEQ_PERM_3OPT = 3; // 3-opt (reconnect after 3 edges)
|
||||
|
||||
// --- Permutation 行内(片段级)---
|
||||
constexpr int SEQ_PERM_OR_OPT = 4; // or-opt(移动连续 k 个元素)
|
||||
// --- Permutation in-row (segment-level) ---
|
||||
constexpr int SEQ_PERM_OR_OPT = 4; // or-opt (move k consecutive elements)
|
||||
|
||||
// --- Permutation 行内(组合级)---
|
||||
constexpr int SEQ_PERM_DOUBLE_SWAP = 30; // 连续两次 swap(同行)
|
||||
constexpr int SEQ_PERM_TRIPLE_SWAP = 31; // 连续三次 swap(同行)
|
||||
// --- Permutation in-row (combo-level) ---
|
||||
constexpr int SEQ_PERM_DOUBLE_SWAP = 30; // two consecutive swaps (same row)
|
||||
constexpr int SEQ_PERM_TRIPLE_SWAP = 31; // three consecutive swaps (same row)
|
||||
|
||||
// --- Permutation 跨行(元素级)---
|
||||
constexpr int SEQ_PERM_CROSS_RELOCATE = 5; // 单元素移行
|
||||
constexpr int SEQ_PERM_CROSS_SWAP = 6; // 单元素换行
|
||||
// --- Permutation cross-row (element-level) ---
|
||||
constexpr int SEQ_PERM_CROSS_RELOCATE = 5; // single element moves row
|
||||
constexpr int SEQ_PERM_CROSS_SWAP = 6; // single element swaps rows
|
||||
|
||||
// --- Permutation 跨行(片段级)---
|
||||
constexpr int SEQ_PERM_SEG_RELOCATE = 7; // 片段移行
|
||||
constexpr int SEQ_PERM_SEG_SWAP = 8; // 片段换行(2-opt*)
|
||||
constexpr int SEQ_PERM_CROSS_EXCHANGE = 9; // 片段互换(保序)
|
||||
// --- Permutation cross-row (segment-level) ---
|
||||
constexpr int SEQ_PERM_SEG_RELOCATE = 7; // segment moves row
|
||||
constexpr int SEQ_PERM_SEG_SWAP = 8; // segment swaps rows (2-opt*)
|
||||
constexpr int SEQ_PERM_CROSS_EXCHANGE = 9; // segment exchange (order preserved)
|
||||
|
||||
// --- Binary 行内(元素级)---
|
||||
constexpr int SEQ_BIN_FLIP = 0; // 翻转一个位
|
||||
constexpr int SEQ_BIN_SWAP = 1; // 交换两个位
|
||||
// --- Binary in-row (element-level) ---
|
||||
constexpr int SEQ_BIN_FLIP = 0; // flip one bit
|
||||
constexpr int SEQ_BIN_SWAP = 1; // swap two bits
|
||||
|
||||
// --- Binary 行内(片段级)---
|
||||
constexpr int SEQ_BIN_SEG_FLIP = 2; // 翻转连续 k 个位
|
||||
constexpr int SEQ_BIN_K_FLIP = 3; // 同时翻转 k 个随机位
|
||||
// --- Binary in-row (segment-level) ---
|
||||
constexpr int SEQ_BIN_SEG_FLIP = 2; // flip k consecutive bits
|
||||
constexpr int SEQ_BIN_K_FLIP = 3; // flip k random bits at once
|
||||
|
||||
// --- Binary 跨行 ---
|
||||
constexpr int SEQ_BIN_CROSS_SWAP = 4; // 两行各一个位互换
|
||||
constexpr int SEQ_BIN_SEG_CROSS_SWAP = 5; // 两行各取一段互换
|
||||
// --- Binary cross-row ---
|
||||
constexpr int SEQ_BIN_CROSS_SWAP = 4; // swap one bit per row across two rows
|
||||
constexpr int SEQ_BIN_SEG_CROSS_SWAP = 5; // swap a segment from each row
|
||||
|
||||
// --- 共享:行级(编码无关)---
|
||||
constexpr int SEQ_ROW_SWAP = 10; // 交换两行
|
||||
constexpr int SEQ_ROW_REVERSE = 11; // 反转行排列
|
||||
constexpr int SEQ_ROW_SPLIT = 12; // 一行拆两行
|
||||
constexpr int SEQ_ROW_MERGE = 13; // 两行合并
|
||||
// --- Shared: row-level (encoding-agnostic) ---
|
||||
constexpr int SEQ_ROW_SWAP = 10; // swap two rows
|
||||
constexpr int SEQ_ROW_REVERSE = 11; // reverse row order
|
||||
constexpr int SEQ_ROW_SPLIT = 12; // split one row into two
|
||||
constexpr int SEQ_ROW_MERGE = 13; // merge two rows
|
||||
|
||||
// --- 特殊 ---
|
||||
constexpr int SEQ_PERTURBATION = 14; // 扰动(多步不可逆)
|
||||
// --- Special ---
|
||||
constexpr int SEQ_PERTURBATION = 14; // perturbation (multi-step, irreversible)
|
||||
|
||||
// --- Integer 行内(元素级)---
|
||||
constexpr int SEQ_INT_RANDOM_RESET = 0; // 随机一个位置重置为 [lb, ub] 内随机值
|
||||
constexpr int SEQ_INT_DELTA = 1; // 随机一个位置 ±k(clamp 到 [lb, ub])
|
||||
constexpr int SEQ_INT_SWAP = 2; // 交换两个位置的值
|
||||
// --- Integer in-row (element-level) ---
|
||||
constexpr int SEQ_INT_RANDOM_RESET = 0; // reset one position to random in [lb, ub]
|
||||
constexpr int SEQ_INT_DELTA = 1; // one position ±k (clamped to [lb, ub])
|
||||
constexpr int SEQ_INT_SWAP = 2; // swap values at two positions
|
||||
|
||||
// --- Integer 行内(片段级)---
|
||||
constexpr int SEQ_INT_SEG_RESET = 3; // 连续 k 个位置全部重置
|
||||
constexpr int SEQ_INT_K_DELTA = 4; // 随机 k 个位置各自 ±1
|
||||
// --- Integer in-row (segment-level) ---
|
||||
constexpr int SEQ_INT_SEG_RESET = 3; // reset k consecutive positions
|
||||
constexpr int SEQ_INT_K_DELTA = 4; // k positions each ±1 at random
|
||||
|
||||
// --- Integer 跨行 ---
|
||||
constexpr int SEQ_INT_CROSS_SWAP = 5; // 两行各一个位置互换
|
||||
// --- Integer cross-row ---
|
||||
constexpr int SEQ_INT_CROSS_SWAP = 5; // swap one position per row across two rows
|
||||
|
||||
// --- LNS(大邻域搜索)---
|
||||
constexpr int SEQ_LNS_SEGMENT_SHUFFLE = 20; // 打乱连续片段
|
||||
constexpr int SEQ_LNS_SCATTER_SHUFFLE = 21; // 打乱随机分散位置
|
||||
constexpr int SEQ_LNS_GUIDED_REBUILD = 22; // 关系矩阵引导重建
|
||||
// --- LNS (large neighborhood search) ---
|
||||
constexpr int SEQ_LNS_SEGMENT_SHUFFLE = 20; // shuffle a contiguous segment
|
||||
constexpr int SEQ_LNS_SCATTER_SHUFFLE = 21; // shuffle a scattered set of positions
|
||||
constexpr int SEQ_LNS_GUIDED_REBUILD = 22; // guided rebuild from relation matrix
|
||||
|
||||
} // namespace seq
|
||||
|
||||
// ============================================================
|
||||
// RelationMatrix — G/O 关系矩阵(GPU global memory)
|
||||
// RelationMatrix — G/O relation matrix (GPU global memory)
|
||||
// ============================================================
|
||||
// G[i][j]: 元素 i 和 j 的分组倾向(对称,越大越倾向同组)
|
||||
// O[i][j]: 元素 i 排在 j 前面的倾向(不对称)
|
||||
// 存储为一维数组 [N * N],行优先
|
||||
// 小规模 N<200 直接 Dense,P2 再做稀疏化
|
||||
// G[i][j]: grouping tendency of elements i and j (symmetric; higher → more same-group)
|
||||
// O[i][j]: tendency for element i to precede j (asymmetric)
|
||||
// Stored as a 1D row-major array [N * N]
|
||||
// For small N<200 use dense directly; P2 may add sparsification
|
||||
//
|
||||
// 更新时机:host 端,每个 batch 间隙
|
||||
// 使用时机:kernel 中 SEQ_LNS_GUIDED_REBUILD 读取
|
||||
// Updated on: host, between batches
|
||||
// Read in: kernel for SEQ_LNS_GUIDED_REBUILD
|
||||
|
||||
struct RelationMatrix {
|
||||
float* d_G; // GPU 上的 G 矩阵 [N * N]
|
||||
float* d_O; // GPU 上的 O 矩阵 [N * N]
|
||||
float* h_G; // Host 上的 G 矩阵 [N * N](用于更新后上传)
|
||||
float* h_O; // Host 上的 O 矩阵 [N * N]
|
||||
int N; // 元素总数
|
||||
float decay; // 衰减系数 α(默认 0.95)
|
||||
int update_count; // 已更新次数(用于冷启动判断)
|
||||
float* d_G; // G matrix on GPU [N * N]
|
||||
float* d_O; // O matrix on GPU [N * N]
|
||||
float* h_G; // G matrix on host [N * N] (for upload after update)
|
||||
float* h_O; // O matrix on host [N * N]
|
||||
int N; // total number of elements
|
||||
float decay; // decay factor α (default 0.95)
|
||||
int update_count; // number of updates so far (for cold-start logic)
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// SeqRegistry — 运行时可用序列注册表
|
||||
// SeqRegistry — runtime-available sequence registry
|
||||
// ============================================================
|
||||
// 根据 EncodingType 和 dim1 自动确定哪些序列可用
|
||||
// 传到 GPU 供 sample_sequence() 使用
|
||||
// Which sequences are available is determined from EncodingType and dim1
|
||||
// Passed to GPU for sample_sequence()
|
||||
|
||||
enum class SeqCategory : int {
|
||||
InRow = 0, // 行内算子(swap, reverse, insert, ...)
|
||||
CrossRow = 1, // 跨行算子(cross_relocate, cross_swap, seg_relocate, ...)
|
||||
RowLevel = 2, // 行级算子(row_swap, row_reverse, split, merge)
|
||||
LNS = 3, // 大邻域搜索
|
||||
InRow = 0, // within-row operators (swap, reverse, insert, ...)
|
||||
CrossRow = 1, // cross-row operators (cross_relocate, cross_swap, seg_relocate, ...)
|
||||
RowLevel = 2, // row-level operators (row_swap, row_reverse, split, merge)
|
||||
LNS = 3, // large neighborhood search
|
||||
};
|
||||
|
||||
struct SeqRegistry {
|
||||
int ids[MAX_SEQ]; // 可用序列的 SeqID 列表
|
||||
int count; // 可用序列数量
|
||||
float weights[MAX_SEQ]; // 每个序列的当前权重(未归一化,延迟归一化)
|
||||
float weights_sum; // 权重和(缓存,用于延迟归一化)
|
||||
float max_w[MAX_SEQ]; // 每个序列的权重上限(0 = 不限,用全局 cap)
|
||||
SeqCategory categories[MAX_SEQ]; // 每个序列的分类(约束导向用)
|
||||
int ids[MAX_SEQ]; // SeqID list of available sequences
|
||||
int count; // number of available sequences
|
||||
float weights[MAX_SEQ]; // current weight per sequence (unnormalized; lazy normalization)
|
||||
float weights_sum; // sum of weights (cached for lazy normalization)
|
||||
float max_w[MAX_SEQ]; // per-sequence weight cap (0 = unlimited, use global cap)
|
||||
SeqCategory categories[MAX_SEQ]; // category per sequence (for constraint-directed mode)
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// KStepConfig — 多步执行的步数选择配置
|
||||
// KStepConfig — step-count selection for multi-step execution
|
||||
// ============================================================
|
||||
// K=1: 单步(当前行为),K=2/3: 连续执行多个序列后再评估
|
||||
// 两层权重体系的第一层
|
||||
// K=1: single step (current behavior); K=2/3: run several sequences then evaluate
|
||||
// First layer of the two-level weight system
|
||||
//
|
||||
// 自适应策略:
|
||||
// - 初始 K=1 权重很大(保守),K>1 权重小
|
||||
// - K>1 带来改进 → 增大该 K 的权重
|
||||
// - 长时间无改进 → 重置/增大 K>1 权重(跳出局部最优)
|
||||
// Adaptive policy:
|
||||
// - Initially K=1 has large weight (conservative), K>1 small
|
||||
// - If K>1 yields improvement → increase that K's weight
|
||||
// - Long stagnation → reset / boost K>1 weights (escape local optima)
|
||||
|
||||
struct KStepConfig {
|
||||
float weights[MAX_K]; // K=1,2,3 的采样权重(归一化)
|
||||
int stagnation_count; // 连续无改进的 batch 数(用于触发重置)
|
||||
int stagnation_limit; // 触发重置的阈值(默认 5 个 batch)
|
||||
float weights[MAX_K]; // sampling weights for K=1,2,3 (normalized)
|
||||
int stagnation_count; // consecutive batches without improvement (triggers reset)
|
||||
int stagnation_limit; // threshold to trigger reset (default 5 batches)
|
||||
};
|
||||
|
||||
// 构建默认 K 步配置
|
||||
// Build default K-step configuration
|
||||
inline KStepConfig build_kstep_config() {
|
||||
KStepConfig kc;
|
||||
kc.weights[0] = 0.80f; // K=1: 初始主导
|
||||
kc.weights[1] = 0.15f; // K=2: 少量探索
|
||||
kc.weights[2] = 0.05f; // K=3: 极少探索
|
||||
kc.weights[0] = 0.80f; // K=1: dominates initially
|
||||
kc.weights[1] = 0.15f; // K=2: little exploration
|
||||
kc.weights[2] = 0.05f; // K=3: minimal exploration
|
||||
kc.stagnation_count = 0;
|
||||
kc.stagnation_limit = 5;
|
||||
return kc;
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// ProblemProfile — 基于结构特征推断的问题画像
|
||||
// ProblemProfile — problem profile inferred from structural features
|
||||
// ============================================================
|
||||
// 第一层:纯结构推断(不感知语义),用于驱动算子注册和初始权重
|
||||
// 未来第二层:可扩展更细粒度的画像(如多属性、高约束等)
|
||||
// Layer 1: structure-only inference (no semantics), drives operator registration and initial weights
|
||||
// Future layer 2: finer profiles (e.g. multi-attribute, high constraint)
|
||||
|
||||
enum class ScaleClass { Small, Medium, Large };
|
||||
enum class StructClass { SingleSeq, MultiFixed, MultiPartition };
|
||||
|
|
@ -286,10 +281,10 @@ struct ProblemProfile {
|
|||
float cross_row_prob;
|
||||
};
|
||||
|
||||
// classify_problem() 定义在 ProblemConfig 之后
|
||||
// classify_problem() is defined after ProblemConfig
|
||||
|
||||
// ============================================================
|
||||
// 权重预设 — 由 ScaleClass 驱动
|
||||
// Weight presets — driven by ScaleClass
|
||||
// ============================================================
|
||||
|
||||
struct WeightPreset {
|
||||
|
|
@ -308,100 +303,100 @@ inline WeightPreset get_weight_preset(ScaleClass scale) {
|
|||
return { 0.50f, 0.80f, 0.006f, 0.01f };
|
||||
}
|
||||
|
||||
// classify_problem() 和 build_seq_registry() 定义在 ProblemConfig 之后
|
||||
// classify_problem() and build_seq_registry() are defined after ProblemConfig
|
||||
|
||||
// ============================================================
|
||||
// Solution<D1, D2> — 解的模板化表示
|
||||
// Solution<D1, D2> — templated solution representation
|
||||
// ============================================================
|
||||
// D1: 行数上限 (TSP=1, VRP≤16, Schedule≤8)
|
||||
// D2: 每行列数上限 (TSP≤64, 背包≤32)
|
||||
// 每个 Problem 选择最小够用的 D1/D2,编译器生成紧凑的结构
|
||||
// D1: max number of rows (TSP=1, VRP≤16, Schedule≤8)
|
||||
// D2: max columns per row (TSP≤64, knapsack≤32)
|
||||
// Each Problem picks the smallest sufficient D1/D2; compiler emits a compact layout
|
||||
|
||||
template<int D1, int D2>
|
||||
struct Solution {
|
||||
static constexpr int DIM1 = D1; // 编译时行数上限
|
||||
static constexpr int DIM2 = D2; // 编译时列数上限
|
||||
int data[D1][D2]; // D1×D2×4 字节
|
||||
int dim2_sizes[D1]; // D1×4 字节
|
||||
float objectives[MAX_OBJ]; // 16 字节(固定)
|
||||
float penalty; // 4 字节
|
||||
static constexpr int DIM1 = D1; // compile-time max rows
|
||||
static constexpr int DIM2 = D2; // compile-time max columns per row
|
||||
int data[D1][D2]; // D1×D2×4 bytes
|
||||
int dim2_sizes[D1]; // D1×4 bytes
|
||||
float objectives[MAX_OBJ]; // 16 bytes (fixed)
|
||||
float penalty; // 4 bytes
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// ProblemConfig — 问题的运行时元信息
|
||||
// ProblemConfig — runtime metadata for a problem
|
||||
// ============================================================
|
||||
|
||||
struct ProblemConfig {
|
||||
EncodingType encoding;
|
||||
int dim1; // 实际使用的行数 (≤ D1)
|
||||
int dim2_default; // 实际使用的列数 (≤ D2)
|
||||
int dim1; // actual number of rows used (≤ D1)
|
||||
int dim2_default; // actual number of columns used (≤ D2)
|
||||
int num_objectives;
|
||||
ObjDir obj_dirs[MAX_OBJ];
|
||||
float obj_weights[MAX_OBJ]; // Weighted 模式下的权重
|
||||
// 多目标比较
|
||||
float obj_weights[MAX_OBJ]; // weights in Weighted mode
|
||||
// Multi-objective comparison
|
||||
CompareMode compare_mode = CompareMode::Weighted;
|
||||
int obj_priority[MAX_OBJ] = {0, 1, 2, 3}; // Lexicographic 模式下的比较顺序(索引)
|
||||
float obj_tolerance[MAX_OBJ] = {0.0f, 0.0f, 0.0f, 0.0f}; // 字典法容差:差值 <= tol 视为相等
|
||||
int obj_priority[MAX_OBJ] = {0, 1, 2, 3}; // comparison order in Lexicographic mode (indices)
|
||||
float obj_tolerance[MAX_OBJ] = {0.0f, 0.0f, 0.0f, 0.0f}; // lexicographic tolerance: |diff| ≤ tol ⇒ tie
|
||||
int value_lower_bound;
|
||||
int value_upper_bound;
|
||||
// v3.4: 统一行模式
|
||||
RowMode row_mode = RowMode::Single; // 行模式(Single/Fixed/Partition)
|
||||
float cross_row_prob = 0.0f; // 跨行 move 概率(0=纯行内操作)
|
||||
int total_elements = 0; // Partition 模式下的总元素数
|
||||
int perm_repeat_count = 1; // 排列中每个值的重复次数(1=标准排列,>1=多重集排列)
|
||||
// v3.4: unified row mode
|
||||
RowMode row_mode = RowMode::Single; // row mode (Single/Fixed/Partition)
|
||||
float cross_row_prob = 0.0f; // probability of cross-row moves (0 = within-row only)
|
||||
int total_elements = 0; // total elements in Partition mode
|
||||
int perm_repeat_count = 1; // repeats per value in permutation (1 = standard; >1 = multiset)
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// SolverConfig — 求解器参数
|
||||
// SolverConfig — solver parameters
|
||||
// ============================================================
|
||||
|
||||
struct SolverConfig {
|
||||
int pop_size = 0; // 种群大小(0 = 自动匹配 GPU 最大并行度)
|
||||
int pop_size = 0; // population size (0 = auto to max GPU parallelism)
|
||||
int max_gen = 1000;
|
||||
float mutation_rate = 0.1f;
|
||||
unsigned seed = 42;
|
||||
bool verbose = true;
|
||||
int print_every = 100;
|
||||
// 岛屿模型参数
|
||||
int num_islands = 1; // 0 = 自适应,1 = 纯爬山(无岛屿),>1 = 岛屿模型
|
||||
int migrate_interval = 100; // 每隔多少代执行一次迁移
|
||||
// Island model
|
||||
int num_islands = 1; // 0 = adaptive, 1 = pure hill climbing (no islands), >1 = island model
|
||||
int migrate_interval = 100; // migrate every this many generations
|
||||
MigrateStrategy migrate_strategy = MigrateStrategy::Hybrid;
|
||||
// 模拟退火参数
|
||||
float sa_temp_init = 0.0f; // 初始温度(0 = 禁用 SA,纯爬山)
|
||||
float sa_alpha = 0.998f; // 冷却率(每代乘以 alpha)
|
||||
// v1.0: 交叉参数
|
||||
float crossover_rate = 0.1f; // 每代中执行交叉的概率(vs 变异)
|
||||
// v2.0: 自适应算子选择
|
||||
bool use_aos = false; // 启用 AOS(batch 间更新算子权重)
|
||||
float aos_weight_floor = AOS_WEIGHT_FLOOR; // 运行时可覆盖的 floor
|
||||
float aos_weight_cap = AOS_WEIGHT_CAP; // 运行时可覆盖的 cap
|
||||
// v2.1: 初始解策略
|
||||
int init_oversample = 4; // 采样倍数(1 = 不做采样择优,即纯随机)
|
||||
float init_random_ratio = 0.3f; // 纯随机解占比(多样性保底)
|
||||
// v3.0: 工程可用性
|
||||
float time_limit_sec = 0.0f; // 时间限制(秒,0 = 不限制,按 max_gen 跑完)
|
||||
int stagnation_limit = 0; // 收敛检测:连续多少个 batch 无改进后 reheat(0 = 禁用)
|
||||
float reheat_ratio = 0.5f; // reheat 时温度恢复到初始温度的比例
|
||||
// Simulated annealing
|
||||
float sa_temp_init = 0.0f; // initial temperature (0 = disable SA, hill climb only)
|
||||
float sa_alpha = 0.998f; // cooling rate (multiply by alpha each generation)
|
||||
// v1.0: crossover
|
||||
float crossover_rate = 0.1f; // probability of crossover per generation (vs mutation)
|
||||
// v2.0: adaptive operator selection
|
||||
bool use_aos = false; // enable AOS (update operator weights between batches)
|
||||
float aos_weight_floor = AOS_WEIGHT_FLOOR; // runtime-overridable floor
|
||||
float aos_weight_cap = AOS_WEIGHT_CAP; // runtime-overridable cap
|
||||
// v2.1: initial solution strategy
|
||||
int init_oversample = 4; // oversampling factor (1 = no sampling selection, pure random)
|
||||
float init_random_ratio = 0.3f; // fraction of purely random solutions (diversity floor)
|
||||
// v3.0: engineering usability
|
||||
float time_limit_sec = 0.0f; // time limit in seconds (0 = none, run to max_gen)
|
||||
int stagnation_limit = 0; // convergence: reheat after this many batches without improvement (0 = off)
|
||||
float reheat_ratio = 0.5f; // on reheat, fraction of initial temperature to restore
|
||||
// v3.5: CUDA Graph
|
||||
bool use_cuda_graph = false; // 启用 CUDA Graph(减少 kernel launch 开销)
|
||||
// v3.6: AOS 更新频率控制
|
||||
int aos_update_interval = 10; // 每隔多少个 batch 更新一次 AOS 权重(降低 cudaMemcpy 同步频率)
|
||||
// v4.0: 约束导向 + 分层搜索
|
||||
bool use_constraint_directed = false; // 启用约束导向(根据 penalty 比例动态调整跨行算子权重)
|
||||
bool use_phased_search = false; // 启用分层搜索(按进度调整全局 floor/cap)
|
||||
// 分层搜索参数:三期阈值
|
||||
float phase_explore_end = 0.30f; // 探索期结束(进度比例)
|
||||
float phase_refine_start = 0.70f; // 精细期开始(进度比例)
|
||||
// 约束导向参数
|
||||
float constraint_boost_max = 2.5f; // 高约束时跨行算子 cap 提升倍率上限
|
||||
// v5.0: 多 GPU 协同
|
||||
int num_gpus = 1; // 使用的 GPU 数量(1 = 单 GPU,>1 = 多 GPU 协同)
|
||||
float multi_gpu_interval_sec = 10.0f; // GPU 间交换最优解的时间间隔(秒)
|
||||
MultiGpuInjectMode multi_gpu_inject_mode = MultiGpuInjectMode::HalfIslands; // 注入模式
|
||||
bool use_cuda_graph = false; // enable CUDA Graph (fewer kernel launch overheads)
|
||||
// v3.6: AOS update frequency
|
||||
int aos_update_interval = 10; // update AOS weights every this many batches (lower cudaMemcpy sync rate)
|
||||
// v4.0: constraint-directed + phased search
|
||||
bool use_constraint_directed = false; // constraint-directed mode (scale cross-row weights by penalty ratio)
|
||||
bool use_phased_search = false; // phased search (adjust global floor/cap by progress)
|
||||
// Phased search: three-phase thresholds
|
||||
float phase_explore_end = 0.30f; // end of exploration phase (progress fraction)
|
||||
float phase_refine_start = 0.70f; // start of refinement phase (progress fraction)
|
||||
// Constraint-directed parameters
|
||||
float constraint_boost_max = 2.5f; // max multiplier boost for cross-row cap under high constraint
|
||||
// v5.0: multi-GPU cooperation
|
||||
int num_gpus = 1; // number of GPUs (1 = single GPU, >1 = multi-GPU)
|
||||
float multi_gpu_interval_sec = 10.0f; // interval in seconds to exchange best solutions across GPUs
|
||||
MultiGpuInjectMode multi_gpu_inject_mode = MultiGpuInjectMode::HalfIslands; // injection mode
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// classify_problem — 从 ProblemConfig 推断问题画像
|
||||
// classify_problem — infer problem profile from ProblemConfig
|
||||
// ============================================================
|
||||
|
||||
inline ProblemProfile classify_problem(const ProblemConfig& pcfg) {
|
||||
|
|
@ -424,7 +419,7 @@ inline ProblemProfile classify_problem(const ProblemConfig& pcfg) {
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// build_seq_registry — 由 ProblemProfile 驱动的算子注册
|
||||
// build_seq_registry — operator registration driven by ProblemProfile
|
||||
// ============================================================
|
||||
|
||||
inline SeqRegistry build_seq_registry(const ProblemProfile& prof) {
|
||||
|
|
@ -436,7 +431,10 @@ inline SeqRegistry build_seq_registry(const ProblemProfile& prof) {
|
|||
}
|
||||
|
||||
auto add = [&](int id, float w, SeqCategory cat, float cap = 0.0f) {
|
||||
if (reg.count >= MAX_SEQ) return;
|
||||
if (reg.count >= MAX_SEQ) {
|
||||
printf("[WARN] SeqRegistry full (MAX_SEQ=%d), ignoring SeqID %d\n", MAX_SEQ, id);
|
||||
return;
|
||||
}
|
||||
reg.ids[reg.count] = id;
|
||||
reg.weights[reg.count] = w;
|
||||
reg.max_w[reg.count] = cap;
|
||||
|
|
@ -514,7 +512,7 @@ inline SeqRegistry build_seq_registry(const ProblemProfile& prof) {
|
|||
}
|
||||
}
|
||||
|
||||
// 延迟归一化:只计算权重和,不归一化
|
||||
// Lazy normalization: only sum weights; do not normalize here
|
||||
reg.weights_sum = 0.0f;
|
||||
for (int i = 0; i < reg.count; i++) {
|
||||
reg.weights_sum += reg.weights[i];
|
||||
|
|
@ -523,19 +521,19 @@ inline SeqRegistry build_seq_registry(const ProblemProfile& prof) {
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// ObjConfig — 传到 GPU 的目标比较配置(紧凑结构)
|
||||
// ObjConfig — compact objective comparison config for GPU
|
||||
// ============================================================
|
||||
|
||||
struct ObjConfig {
|
||||
int num_obj;
|
||||
CompareMode mode;
|
||||
ObjDir dirs[MAX_OBJ]; // 每个目标的方向
|
||||
float weights[MAX_OBJ]; // Weighted 模式下的权重
|
||||
int priority[MAX_OBJ]; // Lexicographic 模式下的比较顺序
|
||||
float tolerance[MAX_OBJ]; // Lexicographic 模式下的容差
|
||||
ObjDir dirs[MAX_OBJ]; // direction per objective
|
||||
float weights[MAX_OBJ]; // weights in Weighted mode
|
||||
int priority[MAX_OBJ]; // comparison order in Lexicographic mode
|
||||
float tolerance[MAX_OBJ]; // tolerance in Lexicographic mode
|
||||
};
|
||||
|
||||
// 从 ProblemConfig 构造 ObjConfig(CPU 端)
|
||||
// Build ObjConfig from ProblemConfig (CPU side)
|
||||
inline ObjConfig make_obj_config(const ProblemConfig& pcfg) {
|
||||
ObjConfig oc;
|
||||
oc.num_obj = pcfg.num_objectives;
|
||||
|
|
@ -550,7 +548,7 @@ inline ObjConfig make_obj_config(const ProblemConfig& pcfg) {
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// SolveResult — solve() 的返回值
|
||||
// SolveResult — return value of solve()
|
||||
// ============================================================
|
||||
|
||||
enum class StopReason { MaxGen, TimeLimit, Stagnation };
|
||||
|
|
@ -564,12 +562,12 @@ struct SolveResult {
|
|||
};
|
||||
|
||||
// ============================================================
|
||||
// 目标重要性映射 — 统一 Weighted / Lexicographic 的重要性度量
|
||||
// Objective importance mapping — unified importance for Weighted / Lexicographic
|
||||
// ============================================================
|
||||
// 用于初始化选种(NSGA-II 加权拥挤度 + 核心目标预留名额)
|
||||
// Used for initial selection (NSGA-II weighted crowding + core-object slots)
|
||||
// Weighted: importance[i] = weight[i] / Σweight
|
||||
// Lexicographic: importance[i] = 0.5^rank[i] / Σ(0.5^rank)
|
||||
// → 第一优先级 ~57%,第二 ~29%,第三 ~14%
|
||||
// → first priority ~57%, second ~29%, third ~14%
|
||||
|
||||
inline void compute_importance(const ObjConfig& oc, float* importance) {
|
||||
float sum = 0.0f;
|
||||
|
|
@ -590,26 +588,26 @@ inline void compute_importance(const ObjConfig& oc, float* importance) {
|
|||
}
|
||||
|
||||
// ============================================================
|
||||
// 比较工具 — 支持 Weighted / Lexicographic
|
||||
// Comparison utilities — Weighted / Lexicographic
|
||||
// ============================================================
|
||||
|
||||
// 将目标值统一为"越小越好":Maximize 目标取负
|
||||
// Normalize objectives to "smaller is better": negate Maximize objectives
|
||||
__device__ __host__ inline float normalize_obj(float val, ObjDir dir) {
|
||||
return (dir == ObjDir::Maximize) ? -val : val;
|
||||
}
|
||||
|
||||
// 核心比较:a 是否优于 b
|
||||
// v5.0: 添加 __host__ 支持多 GPU 在 CPU 端比较解
|
||||
// Core comparison: whether a is better than b
|
||||
// v5.0: add __host__ so multi-GPU can compare solutions on CPU
|
||||
template<typename Sol>
|
||||
__device__ __host__ inline bool is_better(const Sol& a, const Sol& b,
|
||||
const ObjConfig& oc) {
|
||||
// penalty 优先:可行解一定优于不可行解
|
||||
// Penalty first: feasible beats infeasible
|
||||
if (a.penalty <= 0.0f && b.penalty > 0.0f) return true;
|
||||
if (a.penalty > 0.0f && b.penalty <= 0.0f) return false;
|
||||
if (a.penalty > 0.0f && b.penalty > 0.0f) return a.penalty < b.penalty;
|
||||
|
||||
if (oc.mode == CompareMode::Weighted) {
|
||||
// 加权求和(权重已包含方向信息:Maximize 目标用负权重,或由 normalize_obj 处理)
|
||||
// Weighted sum (weights may encode direction: negative for Maximize, or use normalize_obj)
|
||||
float sum_a = 0.0f, sum_b = 0.0f;
|
||||
for (int i = 0; i < oc.num_obj; i++) {
|
||||
float na = normalize_obj(a.objectives[i], oc.dirs[i]);
|
||||
|
|
@ -619,21 +617,22 @@ __device__ __host__ inline bool is_better(const Sol& a, const Sol& b,
|
|||
}
|
||||
return sum_a < sum_b;
|
||||
} else {
|
||||
// 字典法:按 priority 顺序逐目标比较
|
||||
// Lexicographic: compare objectives in priority order
|
||||
for (int p = 0; p < oc.num_obj; p++) {
|
||||
int idx = oc.priority[p];
|
||||
if (idx < 0 || idx >= oc.num_obj) continue;
|
||||
float va = normalize_obj(a.objectives[idx], oc.dirs[idx]);
|
||||
float vb = normalize_obj(b.objectives[idx], oc.dirs[idx]);
|
||||
float diff = va - vb;
|
||||
if (diff < -oc.tolerance[idx]) return true; // a 明显更好
|
||||
if (diff > oc.tolerance[idx]) return false; // b 明显更好
|
||||
// 在容差内视为相等 → 继续比较下一个目标
|
||||
if (diff < -oc.tolerance[idx]) return true; // a clearly better
|
||||
if (diff > oc.tolerance[idx]) return false; // b clearly better
|
||||
// Within tolerance → tie, continue to next objective
|
||||
}
|
||||
return false; // 所有目标都在容差内相等
|
||||
return false; // all objectives tied within tolerance
|
||||
}
|
||||
}
|
||||
|
||||
// 标量化(SA 接受概率用):返回越小越好的标量
|
||||
// Scalarization (for SA acceptance): smaller is better
|
||||
template<typename Sol>
|
||||
__device__ __host__ inline float scalar_objective(const Sol& sol,
|
||||
const ObjConfig& oc) {
|
||||
|
|
@ -643,13 +642,14 @@ __device__ __host__ inline float scalar_objective(const Sol& sol,
|
|||
sum += oc.weights[i] * normalize_obj(sol.objectives[i], oc.dirs[i]);
|
||||
return sum;
|
||||
} else {
|
||||
// 字典法下 SA 用第一优先级目标作为标量
|
||||
// Under lexicographic SA, use first-priority objective as scalar
|
||||
int idx = oc.priority[0];
|
||||
if (idx < 0 || idx >= oc.num_obj) idx = 0;
|
||||
return normalize_obj(sol.objectives[idx], oc.dirs[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
// 轻量比较:直接操作 float[] 目标数组(避免复制整个 Sol)
|
||||
// Lightweight comparison: operate on float[] objectives (avoid copying full Sol)
|
||||
__device__ inline bool obj_is_better(const float* new_objs, const float* old_objs,
|
||||
const ObjConfig& oc) {
|
||||
if (oc.mode == CompareMode::Weighted) {
|
||||
|
|
@ -662,6 +662,7 @@ __device__ inline bool obj_is_better(const float* new_objs, const float* old_obj
|
|||
} else {
|
||||
for (int p = 0; p < oc.num_obj; p++) {
|
||||
int idx = oc.priority[p];
|
||||
if (idx < 0 || idx >= oc.num_obj) continue;
|
||||
float va = normalize_obj(new_objs[idx], oc.dirs[idx]);
|
||||
float vb = normalize_obj(old_objs[idx], oc.dirs[idx]);
|
||||
float diff = va - vb;
|
||||
|
|
@ -672,7 +673,7 @@ __device__ inline bool obj_is_better(const float* new_objs, const float* old_obj
|
|||
}
|
||||
}
|
||||
|
||||
// 轻量标量化:直接操作 float[] 目标数组
|
||||
// Lightweight scalarization: operate on float[] objectives
|
||||
__device__ __host__ inline float obj_scalar(const float* objs, const ObjConfig& oc) {
|
||||
if (oc.mode == CompareMode::Weighted) {
|
||||
float sum = 0.0f;
|
||||
|
|
@ -681,60 +682,61 @@ __device__ __host__ inline float obj_scalar(const float* objs, const ObjConfig&
|
|||
return sum;
|
||||
} else {
|
||||
int idx = oc.priority[0];
|
||||
if (idx < 0 || idx >= oc.num_obj) idx = 0;
|
||||
return normalize_obj(objs[idx], oc.dirs[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// AOSStats — 自适应算子选择统计(每个 block 一份)
|
||||
// AOSStats — adaptive operator selection stats (one per block)
|
||||
// ============================================================
|
||||
// v3.0: 粒度从 3 层 → MAX_SEQ 个序列
|
||||
// 记录每个序列的使用次数和改进次数
|
||||
// batch 结束后由 host 聚合,更新 SeqRegistry 权重
|
||||
// v3.0: granularity from 3 layers → MAX_SEQ sequences
|
||||
// Records per-sequence usage and improvement counts
|
||||
// Host aggregates after each batch and updates SeqRegistry weights
|
||||
|
||||
struct AOSStats {
|
||||
// 算子层统计(第二层)
|
||||
int usage[MAX_SEQ]; // 各序列使用次数
|
||||
int improvement[MAX_SEQ]; // 各序列改进次数(delta < 0 且被接受)
|
||||
// K 步数层统计(第一层)
|
||||
int k_usage[MAX_K]; // K=1,2,3 各自使用次数
|
||||
int k_improvement[MAX_K]; // K=1,2,3 各自改进次数
|
||||
// Operator-level stats (second layer)
|
||||
int usage[MAX_SEQ]; // per-sequence usage counts
|
||||
int improvement[MAX_SEQ]; // per-sequence improvements (delta < 0 and accepted)
|
||||
// K-step layer stats (first layer)
|
||||
int k_usage[MAX_K]; // usage counts for K=1,2,3
|
||||
int k_improvement[MAX_K]; // improvement counts for K=1,2,3
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// ObjDef — 单个目标的定义(编译期常量)
|
||||
// ObjDef — single-objective definition (compile-time constant)
|
||||
// ============================================================
|
||||
|
||||
struct ObjDef {
|
||||
ObjDir dir; // 优化方向
|
||||
float weight; // Weighted 模式下的权重
|
||||
float tolerance; // Lexicographic 模式下的容差
|
||||
ObjDir dir; // optimization direction
|
||||
float weight; // weight in Weighted mode
|
||||
float tolerance; // tolerance in Lexicographic mode
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// HeuristicMatrix — 启发式初始解构造用的数据矩阵描述
|
||||
// HeuristicMatrix — data matrix descriptor for heuristic initial solutions
|
||||
// ============================================================
|
||||
|
||||
struct HeuristicMatrix {
|
||||
const float* data; // host 端 N*N 矩阵
|
||||
int N; // 维度
|
||||
const float* data; // N×N matrix on host
|
||||
int N; // dimension
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// ProblemBase<Derived, D1, D2> — CRTP 基类
|
||||
// ProblemBase<Derived, D1, D2> — CRTP base class
|
||||
//
|
||||
// 用户继承此基类,提供:
|
||||
// static constexpr ObjDef OBJ_DEFS[] = {...}; — 目标元信息
|
||||
// __device__ float compute_obj(int idx, ...) const; — 目标分发
|
||||
// Users inherit this base and provide:
|
||||
// static constexpr ObjDef OBJ_DEFS[] = {...}; — objective metadata
|
||||
// __device__ float compute_obj(int idx, ...) const; — objective dispatch
|
||||
// __device__ float compute_penalty(...) const;
|
||||
//
|
||||
// 约定:OBJ_DEFS 和 compute_obj 紧挨着写,case N 对应 OBJ_DEFS[N]
|
||||
// NUM_OBJ 由 sizeof(OBJ_DEFS) 自动推导,无需手动维护
|
||||
// Convention: OBJ_DEFS and compute_obj stay aligned; case N maps to OBJ_DEFS[N]
|
||||
// NUM_OBJ is derived from sizeof(OBJ_DEFS); no manual count
|
||||
//
|
||||
// 基类自动提供:
|
||||
// evaluate(sol) — 遍历目标列表调用 compute_obj
|
||||
// fill_obj_config(cfg) — 从 OBJ_DEFS 自动填充 ProblemConfig
|
||||
// obj_config() — 直接生成 ObjConfig
|
||||
// Base class provides:
|
||||
// evaluate(sol) — loop objectives and call compute_obj
|
||||
// fill_obj_config(cfg) — fill ProblemConfig from OBJ_DEFS
|
||||
// obj_config() — build ObjConfig directly
|
||||
// ============================================================
|
||||
|
||||
template<typename Derived, int D1_, int D2_>
|
||||
|
|
@ -743,10 +745,10 @@ struct ProblemBase {
|
|||
static constexpr int D2 = D2_;
|
||||
using Sol = Solution<D1, D2>;
|
||||
|
||||
// NUM_OBJ 从 OBJ_DEFS 数组自动推导
|
||||
// NUM_OBJ derived from OBJ_DEFS array size
|
||||
static constexpr int NUM_OBJ = sizeof(Derived::OBJ_DEFS) / sizeof(ObjDef);
|
||||
|
||||
// 自动评估:遍历目标列表
|
||||
// Automatic evaluation: iterate objectives
|
||||
__device__ void evaluate(Sol& sol) const {
|
||||
const auto& self = static_cast<const Derived&>(*this);
|
||||
constexpr int n = sizeof(Derived::OBJ_DEFS) / sizeof(ObjDef);
|
||||
|
|
@ -755,7 +757,7 @@ struct ProblemBase {
|
|||
sol.penalty = self.compute_penalty(sol);
|
||||
}
|
||||
|
||||
// 从 OBJ_DEFS 自动填充 ProblemConfig 的目标部分
|
||||
// Fill objective fields of ProblemConfig from OBJ_DEFS
|
||||
void fill_obj_config(ProblemConfig& cfg) const {
|
||||
constexpr int n = sizeof(Derived::OBJ_DEFS) / sizeof(ObjDef);
|
||||
cfg.num_objectives = n;
|
||||
|
|
@ -763,59 +765,59 @@ struct ProblemBase {
|
|||
cfg.obj_dirs[i] = Derived::OBJ_DEFS[i].dir;
|
||||
cfg.obj_weights[i] = Derived::OBJ_DEFS[i].weight;
|
||||
cfg.obj_tolerance[i] = Derived::OBJ_DEFS[i].tolerance;
|
||||
cfg.obj_priority[i] = i; // 列表顺序即优先级
|
||||
cfg.obj_priority[i] = i; // list order is priority order
|
||||
}
|
||||
}
|
||||
|
||||
// 直接生成 ObjConfig(供 solver 使用)
|
||||
// Build ObjConfig directly (for solver)
|
||||
ObjConfig obj_config() const {
|
||||
ProblemConfig pcfg;
|
||||
fill_obj_config(pcfg);
|
||||
return make_obj_config(pcfg);
|
||||
}
|
||||
|
||||
// 可选:返回 shared memory 需求(字节)
|
||||
// 默认返回 0(不使用 shared memory)
|
||||
// 子类覆盖:如果问题数据可以放入 shared memory,返回实际大小
|
||||
// Optional: shared memory requirement (bytes)
|
||||
// Default 0 (no shared memory)
|
||||
// Override if problem data fits in shared memory; return actual size
|
||||
size_t shared_mem_bytes() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 可选:加载问题数据到 shared memory
|
||||
// 默认空实现(不使用 shared memory)
|
||||
// 子类覆盖:如果 shared_mem_bytes() > 0,实现数据加载逻辑
|
||||
// Optional: load problem data into shared memory
|
||||
// Default no-op (no shared memory)
|
||||
// Override if shared_mem_bytes() > 0 to implement loading
|
||||
__device__ void load_shared(char* smem, int tid, int bsz) {
|
||||
(void)smem; (void)tid; (void)bsz; // 默认:不做任何事
|
||||
(void)smem; (void)tid; (void)bsz; // default: no-op
|
||||
}
|
||||
|
||||
// 每个 block 在 global memory 中的热数据工作集大小(字节)
|
||||
// 用于 auto pop_size 估算 L2 cache 压力
|
||||
// 默认 = shared_mem_bytes()(数据在 smem 时,gmem 工作集为 0 不影响)
|
||||
// 子类覆盖:当 shared_mem_bytes() 返回 0(数据放不进 smem)时,
|
||||
// 返回实际数据大小(如距离矩阵 n*n*sizeof(float))
|
||||
// Hot working-set size in global memory per block (bytes)
|
||||
// Used for auto pop_size L2 cache pressure estimate
|
||||
// Default = shared_mem_bytes() (when data is in smem, gmem working set is 0)
|
||||
// Override when shared_mem_bytes() is 0 (data does not fit in smem):
|
||||
// return actual data size (e.g. distance matrix n*n*sizeof(float))
|
||||
size_t working_set_bytes() const {
|
||||
return static_cast<const Derived&>(*this).shared_mem_bytes();
|
||||
}
|
||||
|
||||
// 可选:初始化 G/O 关系矩阵(为 GUIDED_REBUILD 提供先验知识)
|
||||
// G[i*N+j]: 元素 i 和 j 的分组倾向(对称,[0,1],越大越倾向同组)
|
||||
// O[i*N+j]: 元素 i 排在 j 前面的倾向(不对称,[0,1])
|
||||
// 默认不提供(全零),搜索过程中通过 EMA 从历史好解积累
|
||||
// 用户覆盖示例:距离近 → G 和 O 都高
|
||||
// Optional: initialize G/O relation matrix (prior for GUIDED_REBUILD)
|
||||
// G[i*N+j]: grouping tendency of i and j (symmetric, [0,1]; higher → same group)
|
||||
// O[i*N+j]: tendency for i before j (asymmetric, [0,1])
|
||||
// Default none (zeros); EMA accumulates from good solutions during search
|
||||
// Example override: close distance → high G and O
|
||||
void init_relation_matrix(float* h_G, float* h_O, int N) const {
|
||||
(void)h_G; (void)h_O; (void)N; // 默认:不做任何事(保持全零)
|
||||
(void)h_G; (void)h_O; (void)N; // default: no-op (keep zeros)
|
||||
}
|
||||
|
||||
// 可选:返回 host 端数据矩阵供启发式初始解构造
|
||||
// 默认返回 0(不提供),子类 override 后填充 out 数组并返回实际数量
|
||||
// Optional: host-side data matrices for heuristic initial solutions
|
||||
// Default 0 (none); override to fill out[] and return count
|
||||
int heuristic_matrices(HeuristicMatrix* out, int max_count) const {
|
||||
(void)out; (void)max_count;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// v5.0: 多 GPU 协同 — 克隆 Problem 到指定 GPU
|
||||
// 子类需实现:cudaSetDevice(gpu_id) + 分配设备内存 + 拷贝数据
|
||||
// 返回新的 Problem 实例指针(在 host 端,但其内部设备指针指向 gpu_id)
|
||||
// v5.0: multi-GPU — clone Problem to a given GPU
|
||||
// Subclasses implement: cudaSetDevice(gpu_id) + device alloc + copy
|
||||
// Returns new Problem* on host; internal device pointers target gpu_id
|
||||
virtual Derived* clone_to_device(int gpu_id) const {
|
||||
(void)gpu_id;
|
||||
fprintf(stderr, "Error: clone_to_device() not implemented for this Problem type\n");
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/**
|
||||
* assignment.cuh - 指派问题
|
||||
*
|
||||
* 继承 ProblemBase,使用 ObjDef 目标注册机制
|
||||
* assignment.cuh - assignment problem
|
||||
*
|
||||
* Extends ProblemBase with ObjDef objective registration.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -11,10 +11,10 @@
|
|||
|
||||
struct AssignmentProblem : ProblemBase<AssignmentProblem, 1, 16> {
|
||||
const float* d_cost;
|
||||
const float* h_cost; // host 端成本矩阵(用于 init_relation_matrix)
|
||||
const float* h_cost; // host cost matrix (for init_relation_matrix)
|
||||
int n;
|
||||
|
||||
// ---- 目标计算 ----
|
||||
// ---- objective evaluation ----
|
||||
__device__ float calc_total_cost(const Sol& sol) const {
|
||||
float total = 0.0f;
|
||||
const int* assign = sol.data[0];
|
||||
|
|
@ -24,7 +24,7 @@ struct AssignmentProblem : ProblemBase<AssignmentProblem, 1, 16> {
|
|||
return total;
|
||||
}
|
||||
|
||||
// ---- 目标定义(OBJ_DEFS 与 compute_obj 必须一一对应)----
|
||||
// ---- objective defs (OBJ_DEFS must match compute_obj one-to-one) ----
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_cost
|
||||
};
|
||||
|
|
@ -47,7 +47,7 @@ struct AssignmentProblem : ProblemBase<AssignmentProblem, 1, 16> {
|
|||
return cfg;
|
||||
}
|
||||
|
||||
// ---- shared memory 接口 ----
|
||||
// ---- shared memory interface ----
|
||||
static constexpr size_t SMEM_LIMIT = 48 * 1024;
|
||||
|
||||
size_t shared_mem_bytes() const {
|
||||
|
|
@ -66,12 +66,12 @@ struct AssignmentProblem : ProblemBase<AssignmentProblem, 1, 16> {
|
|||
d_cost = sc;
|
||||
}
|
||||
|
||||
// 成本先验:task j 和 task k 如果被相似 agent 偏好,G 值高
|
||||
// O 矩阵:task j 在位置 i 成本低 → O[j][k] 略高(j 倾向排在 k 前面的位置)
|
||||
// Cost prior: if tasks j and k are similarly preferred by agents, G is high
|
||||
// O matrix: low cost for task j at slot i → slightly higher O[j][k] (j tends before k)
|
||||
void init_relation_matrix(float* G, float* O, int N) const {
|
||||
if (!h_cost || N != n) return;
|
||||
// 对每个 task,构建成本向量,task 间余弦相似度 → G
|
||||
// 简化:成本列向量的相关性
|
||||
// Per task, build cost vectors; cosine similarity between tasks → G
|
||||
// Simplified: correlation of cost columns
|
||||
float max_c = 0.0f;
|
||||
for (int i = 0; i < N * N; i++)
|
||||
if (h_cost[i] > max_c) max_c = h_cost[i];
|
||||
|
|
@ -80,7 +80,7 @@ struct AssignmentProblem : ProblemBase<AssignmentProblem, 1, 16> {
|
|||
for (int j = 0; j < N; j++)
|
||||
for (int k = 0; k < N; k++) {
|
||||
if (j == k) continue;
|
||||
// G: 两个 task 的成本向量越相似 → 越可能互换
|
||||
// G: more similar cost columns → more likely to swap tasks
|
||||
float dot = 0.0f, nj = 0.0f, nk = 0.0f;
|
||||
for (int i = 0; i < N; i++) {
|
||||
float cj = h_cost[i * N + j] / max_c;
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
/**
|
||||
* bin_packing.cuh - 一维装箱问题(Integer 编码 + 约束)
|
||||
*
|
||||
* N 个物品,每个重量 w[i],装入最多 B 个箱子,每个箱子容量 C。
|
||||
* 决策变量:data[0][i] ∈ [0, B-1],表示物品 i 放入的箱子编号。
|
||||
* 目标:最小化使用的箱子数。
|
||||
* 约束:每个箱子总重不超过 C,超出部分作为 penalty。
|
||||
*
|
||||
* 验证实例:8 物品 weights=[7,5,3,4,6,2,8,1], C=10, 最优=4 箱
|
||||
* 箱0={7,3}=10, 箱1={5,4,1}=10, 箱2={6,2}=8, 箱3={8}=8
|
||||
* bin_packing.cuh - one-dimensional bin packing (Integer encoding + constraints)
|
||||
*
|
||||
* N items with weights w[i], at most B bins, capacity C per bin.
|
||||
* Decision: data[0][i] in [0, B-1] = bin index for item i.
|
||||
* Objective: minimize number of bins used.
|
||||
* Constraint: bin load ≤ C; overflow contributes to penalty.
|
||||
*
|
||||
* Validation instance: 8 items weights=[7,5,3,4,6,2,8,1], C=10, optimum=4 bins
|
||||
* bin0={7,3}=10, bin1={5,4,1}=10, bin2={6,2}=8, bin3={8}=8
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -16,9 +16,9 @@
|
|||
|
||||
struct BinPackingProblem : ProblemBase<BinPackingProblem, 1, 64> {
|
||||
const float* d_weights;
|
||||
int n; // 物品数
|
||||
int max_bins; // 最大箱子数 B
|
||||
float capacity; // 箱子容量 C
|
||||
int n; // number of items
|
||||
int max_bins; // max bins B
|
||||
float capacity; // bin capacity C
|
||||
|
||||
__device__ float calc_bins_used(const Sol& sol) const {
|
||||
bool used[32] = {};
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
/**
|
||||
* graph_color.cuh - 图着色问题(Integer 编码)
|
||||
*
|
||||
* N 个节点的图,用 k 种颜色着色。
|
||||
* 决策变量:data[0][i] ∈ [0, k-1],表示节点 i 的颜色。
|
||||
* 目标:最小化冲突边数(相邻节点同色的边数)。
|
||||
*
|
||||
* 验证实例:Petersen 图(10 节点 15 边,色数=3,最优冲突=0)
|
||||
* graph_color.cuh - graph coloring (Integer encoding)
|
||||
*
|
||||
* Graph on N nodes, k colors.
|
||||
* Decision: data[0][i] in [0, k-1] = color of node i.
|
||||
* Objective: minimize number of conflicting edges (adjacent same color).
|
||||
*
|
||||
* Validation instance: Petersen graph (10 nodes, 15 edges, chromatic number 3, optimal conflicts=0)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -13,9 +13,9 @@
|
|||
#include "cuda_utils.cuh"
|
||||
|
||||
struct GraphColorProblem : ProblemBase<GraphColorProblem, 1, 64> {
|
||||
const int* d_adj; // 邻接矩阵 [N*N](1=相邻, 0=不相邻)
|
||||
int n; // 节点数
|
||||
int k; // 颜色数
|
||||
const int* d_adj; // adjacency [N*N] (1=edge, 0=no edge)
|
||||
int n; // number of nodes
|
||||
int k; // number of colors
|
||||
|
||||
__device__ float calc_conflicts(const Sol& sol) const {
|
||||
int conflicts = 0;
|
||||
|
|
|
|||
|
|
@ -1,26 +1,26 @@
|
|||
/**
|
||||
* jsp.cuh - 车间调度问题 (Job Shop Scheduling Problem)
|
||||
*
|
||||
* J 个工件,每个工件有 O 道工序,每道工序指定机器和耗时。
|
||||
*
|
||||
* === 编码方案 A:Integer 多行(时间表编码)===
|
||||
* JSPProblem: data[j][i] = 工件 j 的第 i 道工序的开始时间
|
||||
* jsp.cuh - Job Shop Scheduling Problem (JSSP)
|
||||
*
|
||||
* J jobs, each with O operations; each op specifies machine and duration.
|
||||
*
|
||||
* === Encoding A: multi-row Integer (time-table encoding) ===
|
||||
* JSPProblem: data[j][i] = start time of job j's i-th operation
|
||||
* dim1 = num_jobs, dim2_default = num_ops
|
||||
* row_mode = Fixed(禁止 ROW_SPLIT/ROW_MERGE)
|
||||
* 每行代表一个工件的固定工序序列,行长度不可变
|
||||
*
|
||||
* === 编码方案 B:Permutation 多重集(工序排列编码)===
|
||||
* JSPPermProblem: data[0][k] = 工件编号(0..J-1),长度 J*O
|
||||
* 值 j 出现 O 次。从左到右扫描,第 t 次遇到值 j 表示工件 j 的第 t 道工序。
|
||||
* row_mode = Fixed (no ROW_SPLIT/ROW_MERGE)
|
||||
* Each row is a fixed op sequence for one job; row length is fixed.
|
||||
*
|
||||
* === Encoding B: Permutation multiset (operation sequence encoding) ===
|
||||
* JSPPermProblem: data[0][k] = job id (0..J-1), length J*O
|
||||
* Value j appears O times. Left-to-right scan: t-th occurrence of j is job j's t-th op.
|
||||
* dim1 = 1, dim2_default = J*O, perm_repeat_count = O
|
||||
* 标准 Permutation 算子(swap/reverse/insert)天然保持多重集结构
|
||||
*
|
||||
* 目标:Minimize makespan(所有工件完成时间的最大值)。
|
||||
* 约束:
|
||||
* (a) 工序顺序:同一工件的工序必须按序执行
|
||||
* (b) 机器冲突:同一机器同一时刻只能处理一个工序
|
||||
*
|
||||
* 验证实例:自定义 3 工件 3 机器 (3x3),最优 makespan = 12
|
||||
* Standard permutation ops (swap/reverse/insert) preserve multiset structure.
|
||||
*
|
||||
* Objective: minimize makespan (max completion time over jobs).
|
||||
* Constraints:
|
||||
* (a) Precedence: ops of the same job must run in order.
|
||||
* (b) Machine conflict: one op per machine at a time.
|
||||
*
|
||||
* Validation instance: custom 3 jobs × 3 machines (3x3), optimal makespan = 12
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -28,16 +28,16 @@
|
|||
#include "cuda_utils.cuh"
|
||||
|
||||
// ============================================================
|
||||
// 编码方案 A:Integer 多行(时间表编码)
|
||||
// Encoding A: multi-row Integer (time-table encoding)
|
||||
// ============================================================
|
||||
|
||||
struct JSPProblem : ProblemBase<JSPProblem, 8, 16> {
|
||||
const int* d_machine; // 工序所需机器 [J*O]
|
||||
const float* d_duration; // 工序耗时 [J*O]
|
||||
int num_jobs; // 工件数 J
|
||||
int num_ops; // 每工件工序数 O
|
||||
int num_machines; // 机器数 M
|
||||
int time_horizon; // 时间上界
|
||||
const int* d_machine; // machine per op [J*O]
|
||||
const float* d_duration; // op duration [J*O]
|
||||
int num_jobs; // number of jobs J
|
||||
int num_ops; // ops per job O
|
||||
int num_machines; // number of machines M
|
||||
int time_horizon; // time horizon upper bound
|
||||
|
||||
__device__ float calc_makespan(const Sol& sol) const {
|
||||
float makespan = 0.0f;
|
||||
|
|
@ -62,7 +62,7 @@ struct JSPProblem : ProblemBase<JSPProblem, 8, 16> {
|
|||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
float penalty = 0.0f;
|
||||
|
||||
// (a) 工序顺序约束
|
||||
// (a) Precedence constraints
|
||||
for (int j = 0; j < num_jobs; j++) {
|
||||
for (int i = 1; i < num_ops; i++) {
|
||||
float prev_end = (float)sol.data[j][i-1] + d_duration[j * num_ops + (i-1)];
|
||||
|
|
@ -72,7 +72,7 @@ struct JSPProblem : ProblemBase<JSPProblem, 8, 16> {
|
|||
}
|
||||
}
|
||||
|
||||
// (b) 机器冲突约束
|
||||
// (b) Machine conflict constraints
|
||||
int total = num_jobs * num_ops;
|
||||
for (int a = 0; a < total; a++) {
|
||||
int ja = a / num_ops, ia = a % num_ops;
|
||||
|
|
@ -151,28 +151,28 @@ struct JSPProblem : ProblemBase<JSPProblem, 8, 16> {
|
|||
};
|
||||
|
||||
// ============================================================
|
||||
// 编码方案 B:Permutation 多重集(工序排列编码)
|
||||
// Encoding B: Permutation multiset (operation sequence encoding)
|
||||
// ============================================================
|
||||
// data[0] 是长度 J*O 的排列,值域 [0, J),每个值出现 O 次
|
||||
// 从左到右扫描:第 t 次遇到值 j → 安排工件 j 的第 t 道工序
|
||||
// 贪心解码:每道工序安排在"最早可行时间"(满足工序顺序 + 机器空闲)
|
||||
// data[0] is a length-J*O sequence with values in [0, J), each appearing O times.
|
||||
// Left-to-right: t-th occurrence of j schedules job j's t-th operation.
|
||||
// Greedy decode: each op at earliest feasible time (precedence + machine free).
|
||||
|
||||
struct JSPPermProblem : ProblemBase<JSPPermProblem, 1, 64> {
|
||||
const int* d_machine; // 工序所需机器 [J*O]
|
||||
const float* d_duration; // 工序耗时 [J*O]
|
||||
const int* d_machine; // machine per op [J*O]
|
||||
const float* d_duration; // op duration [J*O]
|
||||
int num_jobs;
|
||||
int num_ops;
|
||||
int num_machines;
|
||||
|
||||
// 贪心解码:从排列生成调度方案,返回 makespan
|
||||
// Greedy decode: build schedule from permutation, return makespan
|
||||
__device__ float decode_and_makespan(const Sol& sol) const {
|
||||
int total = num_jobs * num_ops;
|
||||
int size = sol.dim2_sizes[0];
|
||||
if (size < total) return 1e9f;
|
||||
|
||||
float job_avail[8]; // 每个工件的下一道工序最早开始时间
|
||||
float mach_avail[8]; // 每台机器的最早空闲时间
|
||||
int job_next_op[8]; // 每个工件的下一道待安排工序编号
|
||||
float job_avail[8]; // earliest start for next op of each job
|
||||
float mach_avail[8]; // earliest machine free time
|
||||
int job_next_op[8]; // next op index to schedule per job
|
||||
|
||||
for (int j = 0; j < num_jobs; j++) { job_avail[j] = 0.0f; job_next_op[j] = 0; }
|
||||
for (int m = 0; m < num_machines; m++) mach_avail[m] = 0.0f;
|
||||
|
|
@ -182,13 +182,13 @@ struct JSPPermProblem : ProblemBase<JSPPermProblem, 1, 64> {
|
|||
int j = sol.data[0][k];
|
||||
if (j < 0 || j >= num_jobs) return 1e9f;
|
||||
int op = job_next_op[j];
|
||||
if (op >= num_ops) continue; // 该工件已安排完
|
||||
if (op >= num_ops) continue; // job already fully scheduled
|
||||
|
||||
int flat = j * num_ops + op;
|
||||
int m = d_machine[flat];
|
||||
float dur = d_duration[flat];
|
||||
|
||||
// 最早开始时间 = max(工件前序完成, 机器空闲)
|
||||
// Earliest start = max(job predecessor done, machine free)
|
||||
float start = fmaxf(job_avail[j], mach_avail[m]);
|
||||
float end = start + dur;
|
||||
|
||||
|
|
@ -212,7 +212,7 @@ struct JSPPermProblem : ProblemBase<JSPPermProblem, 1, 64> {
|
|||
}
|
||||
}
|
||||
|
||||
// 贪心解码天然满足约束,penalty 始终为 0
|
||||
// Greedy decode satisfies constraints; penalty is always 0
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
return 0.0f;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/**
|
||||
* knapsack.cuh - 0-1 背包问题
|
||||
*
|
||||
* 继承 ProblemBase,使用 ObjDef 目标注册机制
|
||||
* knapsack.cuh - 0-1 knapsack
|
||||
*
|
||||
* Extends ProblemBase with ObjDef objective registration.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -10,13 +10,13 @@
|
|||
#include "operators.cuh"
|
||||
|
||||
struct KnapsackProblem : ProblemBase<KnapsackProblem, 1, 32> {
|
||||
// 问题数据(d_weights 是物品重量,非目标权重)
|
||||
// problem data (d_weights are item weights, not objective weights)
|
||||
const float* d_weights;
|
||||
const float* d_values;
|
||||
float capacity;
|
||||
int n;
|
||||
|
||||
// ---- 目标计算 ----
|
||||
// ---- objective evaluation ----
|
||||
__device__ float calc_total_value(const Sol& sol) const {
|
||||
float tv = 0.0f;
|
||||
const int* sel = sol.data[0];
|
||||
|
|
@ -26,7 +26,7 @@ struct KnapsackProblem : ProblemBase<KnapsackProblem, 1, 32> {
|
|||
return tv;
|
||||
}
|
||||
|
||||
// ---- 目标定义(OBJ_DEFS 与 compute_obj 必须一一对应)----
|
||||
// ---- objective defs (OBJ_DEFS must match compute_obj one-to-one) ----
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Maximize, 1.0f, 0.0f}, // case 0: calc_total_value
|
||||
};
|
||||
|
|
@ -55,7 +55,7 @@ struct KnapsackProblem : ProblemBase<KnapsackProblem, 1, 32> {
|
|||
return cfg;
|
||||
}
|
||||
|
||||
// ---- shared memory 接口 ----
|
||||
// ---- shared memory interface ----
|
||||
size_t shared_mem_bytes() const {
|
||||
return 2 * (size_t)n * sizeof(float);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
/**
|
||||
* load_balance.cuh - 离散负载均衡问题(Integer 编码验证)
|
||||
*
|
||||
* N 个任务分配到 M 台机器,每个任务有一个处理时间 p[i]。
|
||||
* 决策变量:data[0][i] ∈ [0, M-1],表示任务 i 分配到哪台机器。
|
||||
* 目标:最小化 makespan(最大机器负载)。
|
||||
*
|
||||
* 已知 NP-hard(等价于 multiprocessor scheduling / load balancing)。
|
||||
* LPT(最长处理时间优先)贪心可得 4/3 近似。
|
||||
* load_balance.cuh - discrete load balancing (Integer encoding sanity check)
|
||||
*
|
||||
* N tasks on M machines, processing time p[i] per task.
|
||||
* Decision: data[0][i] in [0, M-1] = machine for task i.
|
||||
* Objective: minimize makespan (max machine load).
|
||||
*
|
||||
* NP-hard (same as multiprocessor scheduling / load balancing).
|
||||
* LPT (longest processing time first) greedy achieves 4/3 approximation.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -14,12 +14,12 @@
|
|||
#include "cuda_utils.cuh"
|
||||
|
||||
struct LoadBalanceProblem : ProblemBase<LoadBalanceProblem, 1, 64> {
|
||||
const float* d_proc_time; // 任务处理时间 [N]
|
||||
int n; // 任务数
|
||||
int m; // 机器数
|
||||
const float* d_proc_time; // task processing times [N]
|
||||
int n; // number of tasks
|
||||
int m; // number of machines
|
||||
|
||||
__device__ float calc_makespan(const Sol& sol) const {
|
||||
float load[32] = {}; // 最多 32 台机器
|
||||
float load[32] = {}; // at most 32 machines
|
||||
int size = sol.dim2_sizes[0];
|
||||
for (int i = 0; i < size; i++) {
|
||||
int machine = sol.data[0][i];
|
||||
|
|
@ -43,7 +43,7 @@ struct LoadBalanceProblem : ProblemBase<LoadBalanceProblem, 1, 64> {
|
|||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
return 0.0f; // 无约束(任何分配都合法)
|
||||
return 0.0f; // no side constraints (any assignment is feasible)
|
||||
}
|
||||
|
||||
ProblemConfig config() const {
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
/**
|
||||
* qap.cuh - 二次分配问题 (Quadratic Assignment Problem)
|
||||
*
|
||||
* N 个设施分配到 N 个位置(排列编码)。
|
||||
* 决策变量:data[0][i] = 设施 i 分配到的位置。
|
||||
* 目标:Minimize sum(flow[i][j] * dist[perm[i]][perm[j]])
|
||||
*
|
||||
* 验证实例:自定义 5x5
|
||||
* flow: 设施间的物流量
|
||||
* dist: 位置间的距离
|
||||
* 已知最优 = 58
|
||||
* qap.cuh - Quadratic Assignment Problem (QAP)
|
||||
*
|
||||
* Assign N facilities to N locations (permutation encoding).
|
||||
* Decision: data[0][i] = location assigned to facility i.
|
||||
* Objective: Minimize sum(flow[i][j] * dist[perm[i]][perm[j]])
|
||||
*
|
||||
* Validation instance: custom 5x5
|
||||
* flow: inter-facility flow
|
||||
* dist: inter-location distances
|
||||
* known optimum = 58
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -16,8 +16,10 @@
|
|||
#include "cuda_utils.cuh"
|
||||
|
||||
struct QAPProblem : ProblemBase<QAPProblem, 1, 32> {
|
||||
const float* d_flow; // 物流量矩阵 [N*N]
|
||||
const float* d_dist; // 距离矩阵 [N*N]
|
||||
const float* d_flow; // flow matrix [N*N] (device)
|
||||
const float* d_dist; // distance matrix [N*N] (device)
|
||||
const float* h_flow; // flow matrix [N*N] (host, for clone_to_device)
|
||||
const float* h_dist; // distance matrix [N*N] (host, for clone_to_device)
|
||||
int n;
|
||||
|
||||
__device__ float calc_cost(const Sol& sol) const {
|
||||
|
|
@ -64,14 +66,16 @@ struct QAPProblem : ProblemBase<QAPProblem, 1, 32> {
|
|||
d_dist = sd;
|
||||
}
|
||||
|
||||
static QAPProblem create(const float* h_flow, const float* h_dist, int n) {
|
||||
static QAPProblem create(const float* h_flow_in, const float* h_dist_in, int n) {
|
||||
QAPProblem prob;
|
||||
prob.n = n;
|
||||
prob.h_flow = h_flow_in;
|
||||
prob.h_dist = h_dist_in;
|
||||
float *df, *dd;
|
||||
CUDA_CHECK(cudaMalloc(&df, sizeof(float) * n * n));
|
||||
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * n * n));
|
||||
CUDA_CHECK(cudaMemcpy(df, h_flow, sizeof(float) * n * n, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist, sizeof(float) * n * n, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy(df, h_flow_in, sizeof(float) * n * n, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist_in, sizeof(float) * n * n, cudaMemcpyHostToDevice));
|
||||
prob.d_flow = df; prob.d_dist = dd;
|
||||
return prob;
|
||||
}
|
||||
|
|
@ -82,18 +86,12 @@ struct QAPProblem : ProblemBase<QAPProblem, 1, 32> {
|
|||
d_flow = nullptr; d_dist = nullptr;
|
||||
}
|
||||
|
||||
// v5.0: 多 GPU 协同 — 克隆到指定 GPU
|
||||
// v5.0: multi-GPU — clone onto a given device
|
||||
QAPProblem* clone_to_device(int gpu_id) const override {
|
||||
int orig_device;
|
||||
CUDA_CHECK(cudaGetDevice(&orig_device));
|
||||
|
||||
// 先下载数据到 host(从当前设备)
|
||||
float* h_flow = new float[n * n];
|
||||
float* h_dist = new float[n * n];
|
||||
CUDA_CHECK(cudaMemcpy(h_flow, d_flow, sizeof(float) * n * n, cudaMemcpyDeviceToHost));
|
||||
CUDA_CHECK(cudaMemcpy(h_dist, d_dist, sizeof(float) * n * n, cudaMemcpyDeviceToHost));
|
||||
|
||||
// 切换到目标 GPU 并上传
|
||||
// Use host-side matrices directly (no D2H needed)
|
||||
CUDA_CHECK(cudaSetDevice(gpu_id));
|
||||
float *df, *dd;
|
||||
CUDA_CHECK(cudaMalloc(&df, sizeof(float) * n * n));
|
||||
|
|
@ -101,15 +99,12 @@ struct QAPProblem : ProblemBase<QAPProblem, 1, 32> {
|
|||
CUDA_CHECK(cudaMemcpy(df, h_flow, sizeof(float) * n * n, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist, sizeof(float) * n * n, cudaMemcpyHostToDevice));
|
||||
|
||||
delete[] h_flow;
|
||||
delete[] h_dist;
|
||||
|
||||
// 恢复原设备
|
||||
CUDA_CHECK(cudaSetDevice(orig_device));
|
||||
|
||||
// 创建新实例
|
||||
QAPProblem* new_prob = new QAPProblem();
|
||||
new_prob->n = n;
|
||||
new_prob->h_flow = h_flow;
|
||||
new_prob->h_dist = h_dist;
|
||||
new_prob->d_flow = df;
|
||||
new_prob->d_dist = dd;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
/**
|
||||
* schedule.cuh - 排班问题
|
||||
*
|
||||
* 继承 ProblemBase,使用 ObjDef 目标注册机制
|
||||
* 2 个目标:总成本(min)+ 不公平度(min,权重更高)
|
||||
* schedule.cuh - staff scheduling
|
||||
*
|
||||
* Extends ProblemBase with ObjDef objective registration.
|
||||
* Two objectives: total cost (min) + unfairness (min, higher weight).
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -14,7 +14,7 @@ struct ScheduleProblem : ProblemBase<ScheduleProblem, 8, 16> {
|
|||
const float* d_cost;
|
||||
int days, emps, required;
|
||||
|
||||
// ---- 目标计算 ----
|
||||
// ---- objective evaluation ----
|
||||
__device__ float calc_total_cost(const Sol& sol) const {
|
||||
float total = 0.0f;
|
||||
for (int d = 0; d < days; d++)
|
||||
|
|
@ -37,7 +37,7 @@ struct ScheduleProblem : ProblemBase<ScheduleProblem, 8, 16> {
|
|||
return (float)(max_w - min_w);
|
||||
}
|
||||
|
||||
// ---- 目标定义(OBJ_DEFS 与 compute_obj 必须一一对应)----
|
||||
// ---- objective defs (OBJ_DEFS must match compute_obj one-to-one) ----
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_cost
|
||||
{ObjDir::Minimize, 5.0f, 0.0f}, // case 1: calc_unfairness
|
||||
|
|
@ -71,9 +71,9 @@ struct ScheduleProblem : ProblemBase<ScheduleProblem, 8, 16> {
|
|||
return cfg;
|
||||
}
|
||||
|
||||
// 默认回退全量(基类行为)— 不需要覆盖 evaluate_move
|
||||
// Default full re-eval (base behavior) — no need to override evaluate_move
|
||||
|
||||
// ---- shared memory 接口 ----
|
||||
// ---- shared memory interface ----
|
||||
size_t shared_mem_bytes() const {
|
||||
return (size_t)days * emps * sizeof(float);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/**
|
||||
* tsp.cuh - TSP 问题定义
|
||||
*
|
||||
* 继承 ProblemBase,使用 ObjDef 目标注册机制
|
||||
* tsp.cuh - Traveling Salesman Problem (TSP) definition
|
||||
*
|
||||
* Extends ProblemBase with ObjDef objective registration.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -10,12 +10,12 @@
|
|||
#include "operators.cuh"
|
||||
|
||||
struct TSPProblem : ProblemBase<TSPProblem, 1, 64> {
|
||||
// 问题数据
|
||||
// problem data
|
||||
const float* d_dist;
|
||||
const float* h_dist; // host 端距离矩阵(用于 init_relation_matrix)
|
||||
const float* h_dist; // host distance matrix (for init_relation_matrix)
|
||||
int n;
|
||||
|
||||
// ---- 目标计算 ----
|
||||
// ---- objective evaluation ----
|
||||
__device__ float calc_total_distance(const Sol& sol) const {
|
||||
float total = 0.0f;
|
||||
const int* route = sol.data[0];
|
||||
|
|
@ -25,7 +25,7 @@ struct TSPProblem : ProblemBase<TSPProblem, 1, 64> {
|
|||
return total;
|
||||
}
|
||||
|
||||
// ---- 目标定义(OBJ_DEFS 与 compute_obj 必须一一对应)----
|
||||
// ---- objective defs (OBJ_DEFS must match compute_obj one-to-one) ----
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_distance
|
||||
};
|
||||
|
|
@ -37,10 +37,10 @@ struct TSPProblem : ProblemBase<TSPProblem, 1, 64> {
|
|||
}
|
||||
|
||||
__device__ float compute_penalty(const Sol& sol) const {
|
||||
return 0.0f; // TSP 无约束
|
||||
return 0.0f; // TSP has no side constraints
|
||||
}
|
||||
|
||||
// ---- config(编码/维度部分,目标由基类自动填充)----
|
||||
// ---- config (encoding/dims; objectives filled by base class) ----
|
||||
ProblemConfig config() const {
|
||||
ProblemConfig cfg;
|
||||
cfg.encoding = EncodingType::Permutation;
|
||||
|
|
@ -49,7 +49,7 @@ struct TSPProblem : ProblemBase<TSPProblem, 1, 64> {
|
|||
return cfg;
|
||||
}
|
||||
|
||||
// ---- shared memory 接口 ----
|
||||
// ---- shared memory interface ----
|
||||
static constexpr size_t SMEM_LIMIT = 48 * 1024;
|
||||
|
||||
size_t shared_mem_bytes() const {
|
||||
|
|
@ -69,7 +69,7 @@ struct TSPProblem : ProblemBase<TSPProblem, 1, 64> {
|
|||
d_dist = sd;
|
||||
}
|
||||
|
||||
// 距离先验:距离近 → G/O 分数高
|
||||
// Distance prior: closer cities → higher G/O scores
|
||||
void init_relation_matrix(float* G, float* O, int N) const {
|
||||
if (!h_dist || N != n) return;
|
||||
float max_d = 0.0f;
|
||||
|
|
@ -108,21 +108,21 @@ struct TSPProblem : ProblemBase<TSPProblem, 1, 64> {
|
|||
h_dist = nullptr;
|
||||
}
|
||||
|
||||
// v5.0: 多 GPU 协同 — 克隆到指定 GPU
|
||||
// v5.0: multi-GPU — clone onto a given device
|
||||
TSPProblem* clone_to_device(int gpu_id) const override {
|
||||
int orig_device;
|
||||
CUDA_CHECK(cudaGetDevice(&orig_device));
|
||||
CUDA_CHECK(cudaSetDevice(gpu_id));
|
||||
|
||||
// 分配设备内存并拷贝距离矩阵
|
||||
// Allocate device memory and copy distance matrix
|
||||
float* dd;
|
||||
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * n * n));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist, sizeof(float) * n * n, cudaMemcpyHostToDevice));
|
||||
|
||||
// 恢复原设备
|
||||
// Restore original device
|
||||
CUDA_CHECK(cudaSetDevice(orig_device));
|
||||
|
||||
// 创建新的 Problem 实例(在 host 端)
|
||||
// Create new Problem instance (on host)
|
||||
TSPProblem* new_prob = new TSPProblem();
|
||||
new_prob->n = n;
|
||||
new_prob->h_dist = h_dist;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/**
|
||||
* tsp_large.cuh - 大规模 TSP 问题定义 (最多 256 城市)
|
||||
*
|
||||
* 继承 ProblemBase,逻辑与 tsp.cuh 一致,仅 D2 上限不同
|
||||
* tsp_large.cuh - large-scale TSP definition (up to 256 cities)
|
||||
*
|
||||
* Same logic as tsp.cuh under ProblemBase; only D2 cap differs.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -14,7 +14,7 @@ struct TSPLargeProblem : ProblemBase<TSPLargeProblem, 1, 256> {
|
|||
const float* h_dist;
|
||||
int n;
|
||||
|
||||
// ---- 目标计算 ----
|
||||
// ---- objective evaluation ----
|
||||
__device__ float calc_total_distance(const Sol& sol) const {
|
||||
float total = 0.0f;
|
||||
const int* route = sol.data[0];
|
||||
|
|
@ -24,7 +24,7 @@ struct TSPLargeProblem : ProblemBase<TSPLargeProblem, 1, 256> {
|
|||
return total;
|
||||
}
|
||||
|
||||
// ---- 目标定义(OBJ_DEFS 与 compute_obj 必须一一对应)----
|
||||
// ---- objective defs (OBJ_DEFS must match compute_obj one-to-one) ----
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_distance
|
||||
};
|
||||
|
|
@ -54,7 +54,7 @@ struct TSPLargeProblem : ProblemBase<TSPLargeProblem, 1, 256> {
|
|||
return need <= SMEM_LIMIT ? need : 0;
|
||||
}
|
||||
|
||||
// 距离矩阵的实际大小(不管是否放进 smem)
|
||||
// Actual distance matrix size (whether or not placed in smem)
|
||||
size_t working_set_bytes() const {
|
||||
return (size_t)n * n * sizeof(float);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
/**
|
||||
* tsp_xlarge.cuh - 超大规模 TSP 问题定义 (最多 512 城市)
|
||||
*
|
||||
* 继承 ProblemBase,逻辑与 tsp_large.cuh 一致,D2=512
|
||||
* 注意:距离矩阵 512×512×4B = 1MB,远超 48KB shared memory
|
||||
* 因此 shared_mem_bytes() 返回 0,距离矩阵留在 global memory
|
||||
* tsp_xlarge.cuh - very large TSP definition (up to 512 cities)
|
||||
*
|
||||
* Same as tsp_large.cuh under ProblemBase, with D2=512.
|
||||
* Note: distance matrix 512×512×4B = 1MB, far above 48KB shared memory,
|
||||
* so shared_mem_bytes() returns 0 and the matrix stays in global memory.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
struct TSPXLargeProblem : ProblemBase<TSPXLargeProblem, 1, 512> {
|
||||
const float* d_dist;
|
||||
const float* h_dist; // host 端距离矩阵(用于 init_relation_matrix)
|
||||
const float* h_dist; // host distance matrix (for init_relation_matrix)
|
||||
int n;
|
||||
|
||||
__device__ float calc_total_distance(const Sol& sol) const {
|
||||
|
|
@ -45,7 +45,7 @@ struct TSPXLargeProblem : ProblemBase<TSPXLargeProblem, 1, 512> {
|
|||
return cfg;
|
||||
}
|
||||
|
||||
// 距离矩阵太大,不放 shared memory
|
||||
// Distance matrix too large for shared memory
|
||||
size_t shared_mem_bytes() const { return 0; }
|
||||
__device__ void load_shared(char*, int, int) {}
|
||||
|
||||
|
|
@ -53,10 +53,10 @@ struct TSPXLargeProblem : ProblemBase<TSPXLargeProblem, 1, 512> {
|
|||
return (size_t)n * n * sizeof(float);
|
||||
}
|
||||
|
||||
// 用距离矩阵初始化 G/O 先验:距离近 → 分数高
|
||||
// Initialize G/O priors from distances: closer → higher score
|
||||
void init_relation_matrix(float* G, float* O, int N) const {
|
||||
if (!h_dist || N != n) return;
|
||||
// 找最大距离用于归一化
|
||||
// Max distance for normalization
|
||||
float max_d = 0.0f;
|
||||
for (int i = 0; i < N; i++)
|
||||
for (int j = 0; j < N; j++)
|
||||
|
|
@ -66,10 +66,10 @@ struct TSPXLargeProblem : ProblemBase<TSPXLargeProblem, 1, 512> {
|
|||
for (int i = 0; i < N; i++) {
|
||||
for (int j = 0; j < N; j++) {
|
||||
if (i == j) continue;
|
||||
// 距离近 → G 高(分组倾向强)
|
||||
// Closer → higher G (stronger grouping signal)
|
||||
float proximity = 1.0f - h_dist[i * N + j] / max_d;
|
||||
G[i * N + j] = proximity * 0.3f; // 初始信号不要太强,留空间给 EMA
|
||||
// 距离近 → O 也给一点信号(对称的,不偏向任何方向)
|
||||
G[i * N + j] = proximity * 0.3f; // keep initial signal moderate for EMA headroom
|
||||
// Closer → small O signal too (symmetric, no directional bias)
|
||||
O[i * N + j] = proximity * 0.1f;
|
||||
}
|
||||
}
|
||||
|
|
@ -84,7 +84,7 @@ struct TSPXLargeProblem : ProblemBase<TSPXLargeProblem, 1, 512> {
|
|||
static TSPXLargeProblem create(const float* h_dist_ptr, int n) {
|
||||
TSPXLargeProblem prob;
|
||||
prob.n = n;
|
||||
prob.h_dist = h_dist_ptr; // 保留 host 指针
|
||||
prob.h_dist = h_dist_ptr; // keep host pointer
|
||||
float* dd;
|
||||
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * n * n));
|
||||
CUDA_CHECK(cudaMemcpy(dd, h_dist_ptr, sizeof(float) * n * n, cudaMemcpyHostToDevice));
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
/**
|
||||
* vrp.cuh - 容量约束车辆路径问题 (CVRP)
|
||||
*
|
||||
* 继承 ProblemBase,使用 ObjDef 目标注册机制
|
||||
* 多行编码(D1=K 条路线,分区初始化 + 跨行算子)
|
||||
* vrp.cuh - Capacitated Vehicle Routing Problem (CVRP)
|
||||
*
|
||||
* Extends ProblemBase with ObjDef objective registration.
|
||||
* Multi-row encoding (D1 = K routes, partition init + cross-row operators).
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -12,11 +12,11 @@
|
|||
#include "gpu_cache.cuh"
|
||||
|
||||
struct VRPProblem : ProblemBase<VRPProblem, 8, 64> {
|
||||
// GPU 数据
|
||||
// GPU data
|
||||
const float* d_dist;
|
||||
const float* d_demand;
|
||||
const float* h_dist; // host 端距离矩阵(含 depot,用于 init_relation_matrix)
|
||||
const float* h_demand; // host 端需求数组(用于 clone_to_device)
|
||||
const float* h_dist; // host distance matrix (includes depot; for init_relation_matrix)
|
||||
const float* h_demand; // host demand array (for clone_to_device)
|
||||
int n;
|
||||
int stride;
|
||||
float capacity;
|
||||
|
|
@ -24,7 +24,7 @@ struct VRPProblem : ProblemBase<VRPProblem, 8, 64> {
|
|||
int max_vehicles;
|
||||
GpuCache cache;
|
||||
|
||||
// ---- 目标计算 ----
|
||||
// ---- objective evaluation ----
|
||||
__device__ float compute_route_dist(const int* route, int size) const {
|
||||
if (size == 0) return 0.0f;
|
||||
float dist = 0.0f;
|
||||
|
|
@ -61,7 +61,7 @@ struct VRPProblem : ProblemBase<VRPProblem, 8, 64> {
|
|||
return total;
|
||||
}
|
||||
|
||||
// ---- 目标定义(OBJ_DEFS 与 compute_obj 必须一一对应)----
|
||||
// ---- objective defs (OBJ_DEFS must match compute_obj one-to-one) ----
|
||||
static constexpr ObjDef OBJ_DEFS[] = {
|
||||
{ObjDir::Minimize, 1.0f, 0.0f}, // case 0: calc_total_distance
|
||||
};
|
||||
|
|
@ -102,7 +102,7 @@ struct VRPProblem : ProblemBase<VRPProblem, 8, 64> {
|
|||
return cfg;
|
||||
}
|
||||
|
||||
// ---- shared memory 接口 ----
|
||||
// ---- shared memory interface ----
|
||||
static constexpr size_t SMEM_LIMIT = 48 * 1024;
|
||||
|
||||
size_t shared_mem_bytes() const {
|
||||
|
|
@ -129,14 +129,14 @@ struct VRPProblem : ProblemBase<VRPProblem, 8, 64> {
|
|||
void enable_cache(int cap = 65536) { cache = GpuCache::allocate(cap); }
|
||||
void print_cache_stats() const { cache.print_stats(); }
|
||||
|
||||
// 距离先验:客户间距离近 → G/O 分数高
|
||||
// 注意:h_dist 含 depot(stride×stride),元素编号 0..n-1 对应 node 1..n
|
||||
// Distance prior: closer customers → higher G/O scores
|
||||
// Note: h_dist includes depot (stride×stride); indices 0..n-1 map to nodes 1..n
|
||||
void init_relation_matrix(float* G, float* O, int N) const {
|
||||
if (!h_dist || N != n) return;
|
||||
float max_d = 0.0f;
|
||||
for (int i = 0; i < N; i++)
|
||||
for (int j = 0; j < N; j++) {
|
||||
float d = h_dist[(i + 1) * stride + (j + 1)]; // 跳过 depot
|
||||
float d = h_dist[(i + 1) * stride + (j + 1)]; // skip depot
|
||||
if (d > max_d) max_d = d;
|
||||
}
|
||||
if (max_d <= 0.0f) return;
|
||||
|
|
@ -161,7 +161,7 @@ struct VRPProblem : ProblemBase<VRPProblem, 8, 64> {
|
|||
prob.max_vehicles = max_vehicles;
|
||||
prob.cache = GpuCache::disabled();
|
||||
prob.h_dist = h_dist_ptr;
|
||||
prob.h_demand = h_demand_ptr; // 保存 host 端指针
|
||||
prob.h_demand = h_demand_ptr; // keep host pointer
|
||||
|
||||
int n_nodes = n + 1;
|
||||
float* dd;
|
||||
|
|
@ -185,13 +185,13 @@ struct VRPProblem : ProblemBase<VRPProblem, 8, 64> {
|
|||
cache.destroy();
|
||||
}
|
||||
|
||||
// v5.0: 多 GPU 协同 — 克隆到指定 GPU
|
||||
// v5.0: multi-GPU — clone onto a given device
|
||||
VRPProblem* clone_to_device(int gpu_id) const override {
|
||||
int orig_device;
|
||||
CUDA_CHECK(cudaGetDevice(&orig_device));
|
||||
CUDA_CHECK(cudaSetDevice(gpu_id));
|
||||
|
||||
// 从 host 端数据直接拷贝到目标 GPU(避免跨设备 D2H 拷贝)
|
||||
// Copy from host straight to target GPU (avoid cross-device D2H staging)
|
||||
int n_nodes = n + 1;
|
||||
float* dd;
|
||||
CUDA_CHECK(cudaMalloc(&dd, sizeof(float) * n_nodes * n_nodes));
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
/**
|
||||
* vrptw.cuh - 带时间窗的车辆路径问题 (VRPTW)
|
||||
*
|
||||
* 在 CVRP 基础上增加时间窗约束。
|
||||
* 编码:Perm 多行分区(同 CVRP),data[r][j] = 路线 r 的第 j 个客户。
|
||||
* 目标:Minimize 总距离。
|
||||
* 约束:(a) 容量约束, (b) 时间窗约束(到达时间必须 ≤ latest,早到需等待)。
|
||||
*
|
||||
* 验证实例:8 客户 3 车, 手工设计坐标+时间窗, 确保有已知可行解。
|
||||
* vrptw.cuh - Vehicle Routing Problem with Time Windows (VRPTW)
|
||||
*
|
||||
* CVRP plus time window constraints.
|
||||
* Encoding: multi-row perm partition (same as CVRP); data[r][j] = j-th customer on route r.
|
||||
* Objective: minimize total distance.
|
||||
* Constraints: (a) capacity, (b) time windows (arrival ≤ latest; early arrival waits).
|
||||
*
|
||||
* Validation instance: 8 customers, 3 vehicles; hand-crafted coords + windows with known feasible solution.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
|
@ -14,12 +14,12 @@
|
|||
#include "cuda_utils.cuh"
|
||||
|
||||
struct VRPTWProblem : ProblemBase<VRPTWProblem, 8, 64> {
|
||||
const float* d_dist; // 距离矩阵 [(n+1)*(n+1)](含 depot)
|
||||
const float* d_demand; // 需求 [n]
|
||||
const float* d_earliest; // 最早服务时间 [n+1](含 depot)
|
||||
const float* d_latest; // 最晚服务时间 [n+1](含 depot)
|
||||
const float* d_service; // 服务耗时 [n+1](含 depot)
|
||||
int n; // 客户数(不含 depot)
|
||||
const float* d_dist; // distance matrix [(n+1)*(n+1)] (includes depot)
|
||||
const float* d_demand; // demand [n]
|
||||
const float* d_earliest; // earliest service time [n+1] (includes depot)
|
||||
const float* d_latest; // latest service time [n+1] (includes depot)
|
||||
const float* d_service; // service time [n+1] (includes depot)
|
||||
int n; // number of customers (excludes depot)
|
||||
int stride; // n+1
|
||||
float capacity;
|
||||
int num_vehicles;
|
||||
|
|
@ -63,30 +63,30 @@ struct VRPTWProblem : ProblemBase<VRPTWProblem, 8, 64> {
|
|||
if (size == 0) continue;
|
||||
active++;
|
||||
|
||||
// 容量约束
|
||||
// Capacity constraint
|
||||
float load = 0.0f;
|
||||
for (int j = 0; j < size; j++)
|
||||
load += d_demand[sol.data[r][j]];
|
||||
if (load > capacity)
|
||||
penalty += (load - capacity) * 100.0f;
|
||||
|
||||
// 时间窗约束:模拟路线行驶
|
||||
// Time windows: simulate route travel
|
||||
float time = 0.0f;
|
||||
int prev = 0;
|
||||
for (int j = 0; j < size; j++) {
|
||||
int node = sol.data[r][j] + 1;
|
||||
float travel = d_dist[prev * stride + node];
|
||||
time += travel;
|
||||
// 早到需等待
|
||||
// Wait if early
|
||||
if (time < d_earliest[node])
|
||||
time = d_earliest[node];
|
||||
// 迟到产生惩罚
|
||||
// Penalize lateness
|
||||
if (time > d_latest[node])
|
||||
penalty += (time - d_latest[node]) * 50.0f;
|
||||
time += d_service[node];
|
||||
prev = node;
|
||||
}
|
||||
// 返回 depot 的时间窗
|
||||
// Time window returning to depot
|
||||
float return_time = time + d_dist[prev * stride + 0];
|
||||
if (return_time > d_latest[0])
|
||||
penalty += (return_time - d_latest[0]) * 50.0f;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue