1+ #! /bin/bash
2+ # 最好保证两台机器ssh免密
3+
4+ # 配置机器信息,请按照实际修改
5+ SERVER_A=" A_IP"
6+ SERVER_B=" B_IP"
7+ USER=" actual_username"
8+ IB_DEVICES=(" mlx5_2" " mlx5_3" " mlx5_4" " mlx5_5" " mlx5_8" " mlx5_9" " mlx5_10" " mlx5_11" )
9+ FAIL_LOG=" failures_$( date +%Y%m%d) .log" # 含日期的日志文件
10+
11+ # 颜色定义
12+ RED=' \033[1;31m'
13+ GREEN=' \033[1;32m'
14+ NC=' \033[0m' # 重置颜色
15+
16+ # 失败组合记录数组
17+ declare -a FAILED_PAIRS
18+
19+ cleanup () {
20+ ssh ${USER} @${SERVER_A} " pkill -f 'ib_write_bw -d'" > /dev/null 2>&1
21+ ssh ${USER} @${SERVER_B} " pkill -f 'ib_write_bw -d'" > /dev/null 2>&1
22+ }
23+ trap cleanup EXIT
24+
25+ # 测试结果处理函数
26+ process_result () {
27+ local dev_a=$1
28+ local dev_b=$2
29+ local log_file=" client_${dev_a} _${dev_b} .log"
30+
31+ if grep -q " BW average" " $log_file " ; then
32+ echo -e " ${GREEN} [PASS]${NC} $dev_b -> $dev_a "
33+ grep -A 5 " BW average" " $log_file " | tail -6
34+ else
35+ echo -e " ${RED} [FAIL]${NC} $dev_b -> $dev_a "
36+ FAILED_PAIRS+=(" $dev_a -$dev_b " )
37+ # 记录详细失败日志
38+ echo " ===== 失败组合: $dev_a -$dev_b =====" >> " $FAIL_LOG "
39+ cat " $log_file " >> " $FAIL_LOG "
40+ echo -e " \n" >> " $FAIL_LOG "
41+ fi
42+ }
43+
44+ # 主测试循环
45+ for (( i= 0 ; i< ${# IB_DEVICES[@]} ; i++ )) ; do
46+ DEV_A=" ${IB_DEVICES[$i]} "
47+ echo " [INFO] 机器A启动持续接收服务: ${DEV_A} "
48+
49+ ssh ${USER} @${SERVER_A} " while true; do ib_write_bw -d ${DEV_A} ; done" > " server_${DEV_A} .log" &
50+ SERVER_PID=$!
51+ sleep 8
52+
53+ for (( j= 0 ; j< ${# IB_DEVICES[@]} ; j++ )) ; do
54+ DEV_B=" ${IB_DEVICES[$j]} "
55+ echo " [TEST] 机器B使用设备: ${DEV_B} -> 机器A设备: ${DEV_A} "
56+
57+ # 执行测试并捕获完整输出
58+ ssh ${USER} @${SERVER_B} " ib_write_bw -d ${DEV_B} ${SERVER_A} -D 5" > " client_${DEV_A} _${DEV_B} .log"
59+ process_result " $DEV_A " " $DEV_B "
60+ sleep 2
61+ done
62+
63+ kill -15 $SERVER_PID 2> /dev/null
64+ wait $SERVER_PID 2> /dev/null
65+ ssh ${USER} @${SERVER_A} " pkill -f 'ib_write_bw -d ${DEV_A} '" > /dev/null 2>&1
66+ done
67+
68+ # 失败组合总结
69+ summarize_failures () {
70+ if [ ${# FAILED_PAIRS[@]} -eq 0 ]; then
71+ echo -e " ${GREEN} 所有组合测试成功!${NC} "
72+ return
73+ fi
74+
75+ echo -e " \n${RED} ===== 失败组合总结 =====${NC} "
76+ echo " 共 ${# FAILED_PAIRS[@]} 组失败:"
77+ for pair in " ${FAILED_PAIRS[@]} " ; do
78+ echo -e " ${RED} $pair ${NC} "
79+ done
80+
81+ # 记录到日志文件
82+ echo -e " \n===== $( date) 失败组合汇总 =====" >> " $FAIL_LOG "
83+ printf " %s\n" " ${FAILED_PAIRS[@]} " >> " $FAIL_LOG "
84+ echo -e " \n详细日志见: ${RED} $FAIL_LOG ${NC} "
85+ }
86+
87+ summarize_failures
0 commit comments