mirror of
https://github.com/L-yang-yang/cugenopt.git
synced 2026-05-08 14:12:36 +02:00
Initial commit: cuGenOpt GPU optimization solver
This commit is contained in:
commit
fc5a0ff4af
117 changed files with 25545 additions and 0 deletions
93
benchmark/experiments/e0_diagnosis/run_diagnosis.sh
Executable file
93
benchmark/experiments/e0_diagnosis/run_diagnosis.sh
Executable file
|
|
@ -0,0 +1,93 @@
|
|||
#!/bin/bash
|
||||
# GenSolver 性能诊断 - 一键启动脚本
|
||||
#
|
||||
# 用法:
|
||||
# ./run_diagnosis.sh [host] # 运行完整诊断(all 模式)
|
||||
# ./run_diagnosis.sh [host] profile # 仅 nvprof profiling
|
||||
#
|
||||
# host: tc_new (T4) | tch (V100), 默认 tc_new
|
||||
|
||||
set -e
|
||||
|
||||
DIAG_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
BENCH_DIR="$(dirname "$DIAG_DIR")"
|
||||
ROOT_DIR="$(dirname "$BENCH_DIR")"
|
||||
RESULTS_DIR="$DIAG_DIR/results"
|
||||
|
||||
REMOTE_HOST="${1:-tc_new}"
|
||||
MODE="${2:-all}"
|
||||
REMOTE_DIR="~/gensolver"
|
||||
|
||||
echo ">>> 使用服务器: $REMOTE_HOST"
|
||||
|
||||
ARCH="sm_75"
|
||||
if [ "$REMOTE_HOST" = "tch" ]; then
|
||||
ARCH="sm_70"
|
||||
fi
|
||||
|
||||
NVCC_CMD="nvcc -arch=$ARCH -O2 -std=c++17 --extended-lambda -I ../../prototype/core -I ../../prototype/problems"
|
||||
|
||||
mkdir -p "$RESULTS_DIR"
|
||||
|
||||
echo "=========================================="
|
||||
echo " GenSolver 性能诊断"
|
||||
echo " 时间: $(date)"
|
||||
echo " 服务器: $REMOTE_HOST (arch=$ARCH)"
|
||||
echo "=========================================="
|
||||
|
||||
sync_code() {
|
||||
echo ">>> 同步代码到 $REMOTE_HOST ..."
|
||||
ssh $REMOTE_HOST "mkdir -p $REMOTE_DIR/prototype/core $REMOTE_DIR/prototype/problems $REMOTE_DIR/benchmark/experiments/e0_diagnosis"
|
||||
scp "$ROOT_DIR"/prototype/core/*.cuh $REMOTE_HOST:$REMOTE_DIR/prototype/core/
|
||||
scp "$ROOT_DIR"/prototype/problems/*.cuh $REMOTE_HOST:$REMOTE_DIR/prototype/problems/
|
||||
scp "$DIAG_DIR"/bench_diagnosis.cu $REMOTE_HOST:$REMOTE_DIR/benchmark/experiments/e0_diagnosis/
|
||||
echo " done."
|
||||
}
|
||||
|
||||
compile() {
|
||||
echo ">>> 编译 bench_diagnosis (arch=$ARCH) ..."
|
||||
ssh $REMOTE_HOST "export PATH=/usr/local/cuda/bin:\$PATH && cd $REMOTE_DIR/benchmark/experiments/e0_diagnosis && $NVCC_CMD -o bench_diagnosis bench_diagnosis.cu 2>&1"
|
||||
echo " done."
|
||||
}
|
||||
|
||||
run_all() {
|
||||
echo ">>> 运行完整诊断 ..."
|
||||
local gpu_name=$(ssh $REMOTE_HOST "nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1" | tr ' ' '_')
|
||||
local outfile="bench_${gpu_name}_$(date +%Y%m%d_%H%M%S).csv"
|
||||
|
||||
ssh $REMOTE_HOST "export PATH=/usr/local/cuda/bin:\$PATH && cd $REMOTE_DIR/benchmark/experiments/e0_diagnosis && ./bench_diagnosis all 2>&1 >/tmp/diag_out.csv && cat /tmp/diag_out.csv" > "$RESULTS_DIR/$outfile"
|
||||
|
||||
echo " 结果: $RESULTS_DIR/$outfile"
|
||||
local lines=$(wc -l < "$RESULTS_DIR/$outfile" 2>/dev/null || echo 0)
|
||||
echo " 数据行: $((lines - 1))"
|
||||
}
|
||||
|
||||
run_profile() {
|
||||
echo ">>> 运行 nvprof profiling ..."
|
||||
echo "--- baseline (batch=2000, AOS=off) ---"
|
||||
ssh $REMOTE_HOST "export PATH=/usr/local/cuda/bin:\$PATH && cd $REMOTE_DIR/benchmark/experiments/e0_diagnosis && nvprof --print-gpu-summary ./bench_diagnosis baseline 2>&1" | tee "$RESULTS_DIR/nvprof_baseline_$REMOTE_HOST.txt"
|
||||
echo ""
|
||||
echo "--- default (batch=50, AOS=on) ---"
|
||||
ssh $REMOTE_HOST "export PATH=/usr/local/cuda/bin:\$PATH && cd $REMOTE_DIR/benchmark/experiments/e0_diagnosis && nvprof --print-gpu-summary ./bench_diagnosis default 2>&1" | tee "$RESULTS_DIR/nvprof_default_$REMOTE_HOST.txt"
|
||||
}
|
||||
|
||||
sync_code
|
||||
compile
|
||||
|
||||
case "$MODE" in
|
||||
all) run_all ;;
|
||||
profile) run_profile ;;
|
||||
*)
|
||||
echo "未知模式: $MODE"
|
||||
echo "用法: ./run_diagnosis.sh [host] [all|profile]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " 诊断完成"
|
||||
echo " 服务器: $REMOTE_HOST"
|
||||
echo " 结果目录: $RESULTS_DIR"
|
||||
echo "=========================================="
|
||||
ls -lh "$RESULTS_DIR"/ 2>/dev/null || true
|
||||
Loading…
Add table
Add a link
Reference in a new issue