1+ #! /bin/bash
2+
3+ # 配置机器信息,请按照实际修改
4+ SERVER_A=" A_IP"
5+ SERVER_B=" B_IP"
6+ USER=" actual_username"
7+ IB_DEVICES=(" mlx5_2" " mlx5_3" " mlx5_4" " mlx5_5" " mlx5_8" " mlx5_9" " mlx5_10" " mlx5_11" )
8+ FAIL_LOG=" failures_$( date +%Y%m%d) .log" # 含日期的日志文件
9+
10+ # 颜色定义
11+ RED=' \033[1;31m'
12+ GREEN=' \033[1;32m'
13+ NC=' \033[0m' # 重置颜色
14+
15+ # 失败组合记录数组
16+ declare -a FAILED_PAIRS
17+
18+ cleanup () {
19+ ssh ${USER} @${SERVER_A} " pkill -f 'ib_write_bw -d'" > /dev/null 2>&1
20+ ssh ${USER} @${SERVER_B} " pkill -f 'ib_write_bw -d'" > /dev/null 2>&1
21+ }
22+ trap cleanup EXIT
23+
24+ # 测试结果处理函数
25+ process_result () {
26+ local dev_a=$1
27+ local dev_b=$2
28+ local log_file=" client_${dev_a} _${dev_b} .log"
29+
30+ if grep -q " BW average" " $log_file " ; then
31+ echo -e " ${GREEN} [PASS]${NC} $dev_b -> $dev_a "
32+ grep -A 5 " BW average" " $log_file " | tail -6
33+ else
34+ echo -e " ${RED} [FAIL]${NC} $dev_b -> $dev_a "
35+ FAILED_PAIRS+=(" $dev_a -$dev_b " )
36+ # 记录详细失败日志
37+ echo " ===== 失败组合: $dev_a -$dev_b =====" >> " $FAIL_LOG "
38+ cat " $log_file " >> " $FAIL_LOG "
39+ echo -e " \n" >> " $FAIL_LOG "
40+ fi
41+ }
42+
43+ # 主测试循环
44+ for (( i= 0 ; i< ${# IB_DEVICES[@]} ; i++ )) ; do
45+ DEV_A=" ${IB_DEVICES[$i]} "
46+ echo " [INFO] 机器A启动持续接收服务: ${DEV_A} "
47+
48+ ssh ${USER} @${SERVER_A} " while true; do ib_write_bw -d ${DEV_A} ; done" > " server_${DEV_A} .log" &
49+ SERVER_PID=$!
50+ sleep 8
51+
52+ for (( j= 0 ; j< ${# IB_DEVICES[@]} ; j++ )) ; do
53+ DEV_B=" ${IB_DEVICES[$j]} "
54+ echo " [TEST] 机器B使用设备: ${DEV_B} -> 机器A设备: ${DEV_A} "
55+
56+ # 执行测试并捕获完整输出
57+ ssh ${USER} @${SERVER_B} " ib_write_bw -d ${DEV_B} ${SERVER_A} -D 5" > " client_${DEV_A} _${DEV_B} .log"
58+ process_result " $DEV_A " " $DEV_B "
59+ sleep 2
60+ done
61+
62+ kill -15 $SERVER_PID 2> /dev/null
63+ wait $SERVER_PID 2> /dev/null
64+ ssh ${USER} @${SERVER_A} " pkill -f 'ib_write_bw -d ${DEV_A} '" > /dev/null 2>&1
65+ done
66+
67+ # 失败组合总结
68+ summarize_failures () {
69+ if [ ${# FAILED_PAIRS[@]} -eq 0 ]; then
70+ echo -e " ${GREEN} 所有组合测试成功!${NC} "
71+ return
72+ fi
73+
74+ echo -e " \n${RED} ===== 失败组合总结 =====${NC} "
75+ echo " 共 ${# FAILED_PAIRS[@]} 组失败:"
76+ for pair in " ${FAILED_PAIRS[@]} " ; do
77+ echo -e " ${RED} $pair ${NC} "
78+ done
79+
80+ # 记录到日志文件
81+ echo -e " \n===== $( date) 失败组合汇总 =====" >> " $FAIL_LOG "
82+ printf " %s\n" " ${FAILED_PAIRS[@]} " >> " $FAIL_LOG "
83+ echo -e " \n详细日志见: ${RED} $FAIL_LOG ${NC} "
84+ }
85+
86+ summarize_failures
0 commit comments