diff options
author | Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> | 2023-02-26 14:22:32 +0000 |
---|---|---|
committer | Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> | 2023-02-26 14:28:17 +0000 |
commit | 5116dcb7c4815af7fe9d90cd3feb379c70b139c1 (patch) | |
tree | 716179432c28f67dec86499aecdb20474fc0609a /tcwg-benchmark.sh | |
parent | d6f047a2b6fcaff124afe499aa1ccac25ddd398f (diff) |
tcwg-benchmark.sh: Reboot boards with kernel problems
The immediate problem is with tcwg-sq-02.tcwglab, which is accessible,
but unusable.
Change-Id: I7756620e0d41b9e5c1262c472d21957c3c9f8e0b
Diffstat (limited to 'tcwg-benchmark.sh')
-rwxr-xr-x | tcwg-benchmark.sh | 32 |
1 files changed, 29 insertions, 3 deletions
diff --git a/tcwg-benchmark.sh b/tcwg-benchmark.sh index e88167ae..8664bee2 100755 --- a/tcwg-benchmark.sh +++ b/tcwg-benchmark.sh @@ -308,8 +308,33 @@ force_power_cycle=false while [ $tries_left != 0 ]; do tries_left=$(($tries_left-1)) + if ! $reboot && $prepare_board; then + # Check board for kernel panics and reboot, if any. + dmesg_file="$boardname.dmesg-$(date +%s)" + timeout 1m ssh "$boardname" dmesg -l emerg 2>&1 \ + | tee "$dmesg_file-emerg" + if [ x"$(cat "$dmesg_file-emerg" | wc -l)" != x"0" ]; then + reboot=true + timeout 1m ssh -p22 -lroot "$boardname" dmesg 2>&1 \ + | tee "$dmesg_file" + else + # Remove empty dmesg reports, but keep non-empty ones for offline + # analysis -- e.g., to understand frequency and nature of kernel + # panics. + rm "$dmesg_file-emerg" + fi + fi + if $reboot; then - if ! timeout 1m ssh "$boardname" true || $force_power_cycle; then + if timeout 1m ssh "$boardname" true; then + ssh_cmd="ssh" + elif timeout 1m ssh -p22 -lroot "$boardname" true; then + ssh_cmd="ssh -p22 -lroot" + else + force_power_cycle=true + fi + + if $force_power_cycle; then echo "Trying to power-cycle $boardname" ( pdu_name=$(echo "${boardname%.tcwglab}" \ @@ -320,11 +345,12 @@ while [ $tries_left != 0 ]; do wait $! || exit $EXTERNAL_FAIL echo "Successfully powered-cycled $boardname" else + echo "Trying to reboot $boardname" # Reboot the board. # Ping board every second (ServerAliveInterval=1) to avoid # waiting [default] 5min for ssh to break connection. - ssh -Snone -oServerAliveInterval=1 $boardname sudo /sbin/reboot \ - || true + $ssh_cmd -Snone -oServerAliveInterval=1 $boardname \ + sudo /sbin/reboot || true # Wait until the ssh server is ready sleep 30 # Give time to the board to shutdown ret=0 |