diff options
author | Leonardo Sandoval <leonardo.sandoval@linaro.org> | 2021-02-12 13:42:55 -0600 |
---|---|---|
committer | Leonardo Sandoval <leonardo.sandoval@linaro.org> | 2021-02-16 09:07:04 -0600 |
commit | 73d301aa8d23fa8ea0404740b16c070f0c387663 (patch) | |
tree | bdac765f992e4190e4e106cc5744422e39e95c0f | |
parent | a8078d68f36c9f0ece15e13b3c534b273cb03473 (diff) |
squad.sh: resilient LAVA job ID retrieval
Under heavy loads in the LAVA lab [1], job ID retrieval through squad
may yield invalid job id numbers as seen in [2] and below
12:37:03 ++ curl --retry 4 'https://qa-reports.linaro.org/api/testjobs/4020286/?fields=job_id'
12:37:05 % Total % Received % Xferd Average Speed Time Time Time Current
12:37:05 Dload Upload Total Spent Left Speed
12:37:05
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
100 15 100 15 0 0 40 0 --:--:-- --:--:-- --:--:-- 40
100 15 100 15 0 0 40 0 --:--:-- --:--:-- --:--:-- 40
12:37:05 + LAVAJOB_ID='{"job_id":null}'
This commit introduce a simple loop & sleep sequence, breaking the
loop either if ID is a valid number or max-tries.
[1] https://tf.validation.linaro.org/
[2] https://ci.staging.trustedfirmware.org/job/lsandov1-tf-a-builder/21995/console
Signed-off-by: Leonardo Sandoval <leonardo.sandoval@linaro.org>
Change-Id: I93c9e81a150f87c850f35dbca547482f906a3829
-rw-r--r-- | tf-a-builder/squad.sh | 53 |
1 files changed, 31 insertions, 22 deletions
diff --git a/tf-a-builder/squad.sh b/tf-a-builder/squad.sh index 7b3a650..93a19d6 100644 --- a/tf-a-builder/squad.sh +++ b/tf-a-builder/squad.sh @@ -51,34 +51,43 @@ if [ -n "${QA_SERVER_VERSION}" ]; then if [ -n "${TESTJOB_ID}" ]; then echo "TEST JOB URL: ${QA_SERVER}/testjob/${TESTJOB_ID} TEST JOB ID: ${TESTJOB_ID}" - # The below sleep command is intentional: LAVA could be under heavy load so previous job creation can - # take 'some' time - sleep 2 - LAVAJOB_ID=$(curl --retry 4 ${QA_SERVER}/api/testjobs/${TESTJOB_ID}/?fields=job_id) + # The below loop with a sleep is intentional: LAVA could be under heavy load so previous job creation can + # take 'some' time to get the right numeric LAVA JOB ID + renumber='^[0-9]+$' + LAVAJOB_ID="null" + iter=0 + max_tries=10 + while ! [[ $LAVAJOB_ID =~ $renumber ]]; do + if [ $iter -eq $max_tries ] ; then + LAVAJOB_ID='' + break + fi + sleep 2 + LAVAJOB_ID=$(curl --retry 4 ${QA_SERVER}/api/testjobs/${TESTJOB_ID}/?fields=job_id) + + # Get the job_id value (whatever it is) + LAVAJOB_ID=$(echo ${LAVAJOB_ID} | jq '.job_id') + LAVAJOB_ID="${LAVAJOB_ID//\"/}" + + iter=$(( iter + 1 )) + done # check that rest query at least get non-empty value if [ -n "${LAVAJOB_ID}" ]; then - # Get the numeric ID - LAVAJOB_ID=$(echo ${LAVAJOB_ID} | jq '.job_id') - LAVAJOB_ID="${LAVAJOB_ID//\"/}" - if [ -n "${LAVAJOB_ID}" ]; then - echo "LAVA URL: https://${LAVA_SERVER}/scheduler/job/${LAVAJOB_ID} LAVA JOB ID: ${LAVAJOB_ID}" - - resilient_cmd lavacli identities add --username ${LAVA_USER} --token ${LAVA_TOKEN} --uri "https://${LAVA_SERVER}/RPC2" default - resilient_cmd lavacli jobs wait ${LAVAJOB_ID} - resilient_cmd lavacli jobs logs ${LAVAJOB_ID} > "${WORKSPACE}/lava.log" - - # Fetch and store LAVA job result (1 failure, 0 success) - resilient_cmd lavacli results ${LAVAJOB_ID} | tee "${WORKSPACE}/lava.res" - if grep '\[fail\]' "${WORKSPACE}/lava.res"; then - echo "LAVA JOB RESULT: 1" - else - echo "LAVA JOB RESULT: 0" - fi + echo "LAVA URL: https://${LAVA_SERVER}/scheduler/job/${LAVAJOB_ID} LAVA JOB ID: ${LAVAJOB_ID}" + + resilient_cmd lavacli identities add --username ${LAVA_USER} --token ${LAVA_TOKEN} --uri "https://${LAVA_SERVER}/RPC2" default + resilient_cmd lavacli jobs wait ${LAVAJOB_ID} + resilient_cmd lavacli jobs logs ${LAVAJOB_ID} > "${WORKSPACE}/lava.log" + + # Fetch and store LAVA job result (1 failure, 0 success) + resilient_cmd lavacli results ${LAVAJOB_ID} | tee "${WORKSPACE}/lava.res" + if grep '\[fail\]' "${WORKSPACE}/lava.res"; then + echo "LAVA JOB RESULT: 1" else - echo "LAVA Job ID could not be obtained" + echo "LAVA JOB RESULT: 0" fi else echo "LAVA Job ID could not be obtained" |