summaryrefslogtreecommitdiff
path: root/tcwg-benchmark.sh
diff options
context:
space:
mode:
authorMaxim Kuvyrkov <maxim.kuvyrkov@linaro.org>2023-02-26 14:22:32 +0000
committerMaxim Kuvyrkov <maxim.kuvyrkov@linaro.org>2023-02-26 14:28:17 +0000
commit5116dcb7c4815af7fe9d90cd3feb379c70b139c1 (patch)
tree716179432c28f67dec86499aecdb20474fc0609a /tcwg-benchmark.sh
parentd6f047a2b6fcaff124afe499aa1ccac25ddd398f (diff)
tcwg-benchmark.sh: Reboot boards with kernel problems
The immediate problem is with tcwg-sq-02.tcwglab, which is accessible, but unusable. Change-Id: I7756620e0d41b9e5c1262c472d21957c3c9f8e0b
Diffstat (limited to 'tcwg-benchmark.sh')
-rwxr-xr-xtcwg-benchmark.sh32
1 files changed, 29 insertions, 3 deletions
diff --git a/tcwg-benchmark.sh b/tcwg-benchmark.sh
index e88167ae..8664bee2 100755
--- a/tcwg-benchmark.sh
+++ b/tcwg-benchmark.sh
@@ -308,8 +308,33 @@ force_power_cycle=false
while [ $tries_left != 0 ]; do
tries_left=$(($tries_left-1))
+ if ! $reboot && $prepare_board; then
+ # Check board for kernel panics and reboot, if any.
+ dmesg_file="$boardname.dmesg-$(date +%s)"
+ timeout 1m ssh "$boardname" dmesg -l emerg 2>&1 \
+ | tee "$dmesg_file-emerg"
+ if [ x"$(cat "$dmesg_file-emerg" | wc -l)" != x"0" ]; then
+ reboot=true
+ timeout 1m ssh -p22 -lroot "$boardname" dmesg 2>&1 \
+ | tee "$dmesg_file"
+ else
+ # Remove empty dmesg reports, but keep non-empty ones for offline
+ # analysis -- e.g., to understand frequency and nature of kernel
+ # panics.
+ rm "$dmesg_file-emerg"
+ fi
+ fi
+
if $reboot; then
- if ! timeout 1m ssh "$boardname" true || $force_power_cycle; then
+ if timeout 1m ssh "$boardname" true; then
+ ssh_cmd="ssh"
+ elif timeout 1m ssh -p22 -lroot "$boardname" true; then
+ ssh_cmd="ssh -p22 -lroot"
+ else
+ force_power_cycle=true
+ fi
+
+ if $force_power_cycle; then
echo "Trying to power-cycle $boardname"
(
pdu_name=$(echo "${boardname%.tcwglab}" \
@@ -320,11 +345,12 @@ while [ $tries_left != 0 ]; do
wait $! || exit $EXTERNAL_FAIL
echo "Successfully powered-cycled $boardname"
else
+ echo "Trying to reboot $boardname"
# Reboot the board.
# Ping board every second (ServerAliveInterval=1) to avoid
# waiting [default] 5min for ssh to break connection.
- ssh -Snone -oServerAliveInterval=1 $boardname sudo /sbin/reboot \
- || true
+ $ssh_cmd -Snone -oServerAliveInterval=1 $boardname \
+ sudo /sbin/reboot || true
# Wait until the ssh server is ready
sleep 30 # Give time to the board to shutdown
ret=0