diff options
author | Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> | 2020-12-11 06:05:50 +0000 |
---|---|---|
committer | Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> | 2020-12-11 06:06:46 +0000 |
commit | 333123515e7d6ad9b5bb5eaa931249672170b1ee (patch) | |
tree | c5a9cdd4c36534cb1720522e0fef3f73a7ca751f | |
parent | a20b051a8c0e2552eef636e5f86b19d6032135a7 (diff) |
tcwg-benchmark.sh: Improve board recovery to check that filesystem is working
This is to, hopefully, recover from filesystem failures like below:
00:02:35.206 + ssh tcwg-tk1-06.tcwglab sudo /sbin/reboot
00:02:35.306 Failed to start reboot.target: Unit reboot.target is not loaded properly: Input/output error.
00:02:35.307 See system logs and 'systemctl status reboot.target' for details.
00:02:35.308 Failed to open /dev/initctl: No such device or address
00:02:35.308 Failed to talk to init daemon.
...
00:03:05.323 + wait_for_ssh_server tcwg-tk1-06.tcwglab 22 100
...
00:03:05.346 + rsync -az --delete bmk-scripts/ tcwg-tk1-06.tcwglab:bmk-scripts/
00:03:05.514 rsync: failed to set times on "/home/tcwg-benchmark/bmk-scripts/.git": Read-only file system (30)
Change-Id: I9c1b50d18034a7c6317505f0e770791bc178e8b7
-rwxr-xr-x | tcwg-benchmark.sh | 64 |
1 files changed, 42 insertions, 22 deletions
diff --git a/tcwg-benchmark.sh b/tcwg-benchmark.sh index cc533d51..48680384 100755 --- a/tcwg-benchmark.sh +++ b/tcwg-benchmark.sh @@ -229,31 +229,51 @@ case "$hw_tag" in ;; esac -if ! ssh "$boardname" true; then - echo "Trying to power-cycle $boardname" - ( - nvidia-power-cycle.sh "${boardname%.tcwglab}" - wait_for_ssh_server "$boardname" 22 100 - ) & - wait $! || exit $EXTERNAL_FAIL - echo "Successfull powered-cycled $boardname" - reboot=false -fi +# Check that we can ssh to the board and rsync scripts. This ensures that +# the board is online and filesystem is good condition. Try to reboot and/or +# power-cycle the board as needed. +tries_left=2 +force_power_cycle=false +while [ $tries_left != 0 ]; do + tries_left=$(($tries_left-1)) -if $reboot; then - # Reboot the board - ssh $boardname sudo /sbin/reboot || true - # Wait until the ssh server is ready - sleep 30 # Give time to the board to shutdown - ret=0 - wait_for_ssh_server $boardname 22 100 || ret=$? - if [ $ret != 0 ]; then - echo "SSH server did not respond after reboot, exiting." - exit $ret + if ! ssh "$boardname" true || $force_power_cycle; then + echo "Trying to power-cycle $boardname" + ( + nvidia-power-cycle.sh "${boardname%.tcwglab}" + wait_for_ssh_server "$boardname" 22 100 + ) & + wait $! || exit $EXTERNAL_FAIL + echo "Successfull powered-cycled $boardname" + reboot=false + fi + + if $reboot; then + # Reboot the board + ssh $boardname sudo /sbin/reboot || true + # Wait until the ssh server is ready + sleep 30 # Give time to the board to shutdown + ret=0 + wait_for_ssh_server $boardname 22 100 || ret=$? + if [ $ret != 0 ]; then + echo "SSH server did not respond after reboot, exiting." + exit $EXTERNAL_FAIL + fi fi -fi -rsync -az --delete bmk-scripts/ "$boardname:bmk-scripts/" + rsync -az --delete bmk-scripts/ "$boardname:bmk-scripts/" & + res=0 && wait $! || res=$? + if [ $res = 0 ]; then + break + else + force_power_cycle=true + fi +done + +if [ res != 0 ]; then + echo "ERROR: Could not get board online" + exit $EXTERNAL_FAIL +fi case "$testmode" in build|verify) input_size="test" ;; |