diff options
author | Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> | 2023-02-28 14:54:27 +0000 |
---|---|---|
committer | Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> | 2023-02-28 14:54:27 +0000 |
commit | a17746a3b9065a1957ee737eecd7824741f82a73 (patch) | |
tree | b46e24c894f60d4abda014c908caa444d639c409 /tcwg-benchmark.sh | |
parent | b981a3550bc047f0a858fd302fc71e04d9f47d90 (diff) |
tcwg-benchmark.sh: Improve reboot and recovery sequence again
Change-Id: I69cae2f56f936d647f44fdacf87c7a5495f5d350
Diffstat (limited to 'tcwg-benchmark.sh')
-rwxr-xr-x | tcwg-benchmark.sh | 51 |
1 files changed, 28 insertions, 23 deletions
diff --git a/tcwg-benchmark.sh b/tcwg-benchmark.sh index feb6671a..4c7e6b3b 100755 --- a/tcwg-benchmark.sh +++ b/tcwg-benchmark.sh @@ -310,55 +310,60 @@ while [ $tries_left != 0 ]; do if timeout 1m ssh "$boardname" true; then ssh_cmd="ssh" + wait_opts="" elif timeout 1m ssh -p22 -lroot "$boardname" true; then ssh_cmd="ssh -p22 -lroot" + wait_opts="22 -lroot" else - ssh_cmd="ssh -p22 -lroot" + ssh_cmd="false" + wait_opts="22 -lroot" + reboot=true force_power_cycle=true + tries_left=0 fi - if ! $reboot && $prepare_board; then - # Check board for kernel panics and reboot, if any. - dmesg_file="$boardname.dmesg-$(date +%s)" - timeout 1m $ssh_cmd "$boardname" dmesg -l emerg 2>&1 \ - | tee "$dmesg_file-emerg" - if [ x"$(cat "$dmesg_file-emerg" | wc -l)" != x"0" ]; then - reboot=true - timeout 1m $ssh_cmd "$boardname" dmesg 2>&1 \ - | tee "$dmesg_file" - else - # Remove empty dmesg reports, but keep non-empty ones for offline - # analysis -- e.g., to understand frequency and nature of kernel - # panics. - rm "$dmesg_file-emerg" + if $prepare_board; then + if ! $reboot; then + # Check board for kernel panics and reboot, if any. + dmesg_file="$boardname.dmesg-$(date +%s)" + timeout 1m $ssh_cmd "$boardname" dmesg -l emerg 2>&1 \ + | tee "$dmesg_file-emerg" + if [ x"$(cat "$dmesg_file-emerg" | wc -l)" != x"0" ]; then + reboot=true + timeout 1m $ssh_cmd "$boardname" dmesg 2>&1 \ + | tee "$dmesg_file" + else + # Remove empty dmesg reports, but keep non-empty ones for + # offline analysis -- e.g., to understand frequency and + # nature of kernel panics. + rm "$dmesg_file-emerg" + fi fi - fi - if $reboot; then if $force_power_cycle; then echo "Trying to power-cycle $boardname" ( pdu_name=$(echo "${boardname%.tcwglab}" \ | sed -e 's/^tcwg-bmk-/tcwg-/') nvidia-power-cycle.sh "$pdu_name" - wait_for_ssh_server "$boardname" "" 150 + wait_for_ssh_server "$boardname" "$wait_opts" 150 ) & wait $! || exit $EXTERNAL_FAIL echo "Successfully powered-cycled $boardname" - else + elif $reboot; then echo "Trying to reboot $boardname" # Reboot the board. # Ping board every second (ServerAliveInterval=1) to avoid # waiting [default] 5min for ssh to break connection. $ssh_cmd -Snone -oServerAliveInterval=1 $boardname \ sudo /sbin/reboot || true + # Wait until the ssh server is ready sleep 30 # Give time to the board to shutdown - ret=0 - wait_for_ssh_server "$boardname" "" 150 || ret=$? + wait_for_ssh_server "$boardname" "$wait_opts" 150 & + ret=0 && wait $! || ret=$? if [ $ret != 0 ]; then - echo "SSH server did not respond after reboot, exiting." - exit $EXTERNAL_FAIL + echo "SSH server did not respond after reboot" fi fi fi |