summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonardo Sandoval <leonardo.sandoval@linaro.org>2021-02-12 13:42:55 -0600
committerLeonardo Sandoval <leonardo.sandoval@linaro.org>2021-02-16 09:07:04 -0600
commit73d301aa8d23fa8ea0404740b16c070f0c387663 (patch)
treebdac765f992e4190e4e106cc5744422e39e95c0f
parenta8078d68f36c9f0ece15e13b3c534b273cb03473 (diff)
squad.sh: resilient LAVA job ID retrieval
Under heavy loads in the LAVA lab [1], job ID retrieval through squad may yield invalid job id numbers as seen in [2] and below 12:37:03 ++ curl --retry 4 'https://qa-reports.linaro.org/api/testjobs/4020286/?fields=job_id' 12:37:05 % Total % Received % Xferd Average Speed Time Time Time Current 12:37:05 Dload Upload Total Spent Left Speed 12:37:05 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 100 15 100 15 0 0 40 0 --:--:-- --:--:-- --:--:-- 40 100 15 100 15 0 0 40 0 --:--:-- --:--:-- --:--:-- 40 12:37:05 + LAVAJOB_ID='{"job_id":null}' This commit introduce a simple loop & sleep sequence, breaking the loop either if ID is a valid number or max-tries. [1] https://tf.validation.linaro.org/ [2] https://ci.staging.trustedfirmware.org/job/lsandov1-tf-a-builder/21995/console Signed-off-by: Leonardo Sandoval <leonardo.sandoval@linaro.org> Change-Id: I93c9e81a150f87c850f35dbca547482f906a3829
-rw-r--r--tf-a-builder/squad.sh53
1 files changed, 31 insertions, 22 deletions
diff --git a/tf-a-builder/squad.sh b/tf-a-builder/squad.sh
index 7b3a650..93a19d6 100644
--- a/tf-a-builder/squad.sh
+++ b/tf-a-builder/squad.sh
@@ -51,34 +51,43 @@ if [ -n "${QA_SERVER_VERSION}" ]; then
if [ -n "${TESTJOB_ID}" ]; then
echo "TEST JOB URL: ${QA_SERVER}/testjob/${TESTJOB_ID} TEST JOB ID: ${TESTJOB_ID}"
- # The below sleep command is intentional: LAVA could be under heavy load so previous job creation can
- # take 'some' time
- sleep 2
- LAVAJOB_ID=$(curl --retry 4 ${QA_SERVER}/api/testjobs/${TESTJOB_ID}/?fields=job_id)
+ # The below loop with a sleep is intentional: LAVA could be under heavy load so previous job creation can
+ # take 'some' time to get the right numeric LAVA JOB ID
+ renumber='^[0-9]+$'
+ LAVAJOB_ID="null"
+ iter=0
+ max_tries=10
+ while ! [[ $LAVAJOB_ID =~ $renumber ]]; do
+ if [ $iter -eq $max_tries ] ; then
+ LAVAJOB_ID=''
+ break
+ fi
+ sleep 2
+ LAVAJOB_ID=$(curl --retry 4 ${QA_SERVER}/api/testjobs/${TESTJOB_ID}/?fields=job_id)
+
+ # Get the job_id value (whatever it is)
+ LAVAJOB_ID=$(echo ${LAVAJOB_ID} | jq '.job_id')
+ LAVAJOB_ID="${LAVAJOB_ID//\"/}"
+
+ iter=$(( iter + 1 ))
+ done
# check that rest query at least get non-empty value
if [ -n "${LAVAJOB_ID}" ]; then
- # Get the numeric ID
- LAVAJOB_ID=$(echo ${LAVAJOB_ID} | jq '.job_id')
- LAVAJOB_ID="${LAVAJOB_ID//\"/}"
- if [ -n "${LAVAJOB_ID}" ]; then
- echo "LAVA URL: https://${LAVA_SERVER}/scheduler/job/${LAVAJOB_ID} LAVA JOB ID: ${LAVAJOB_ID}"
-
- resilient_cmd lavacli identities add --username ${LAVA_USER} --token ${LAVA_TOKEN} --uri "https://${LAVA_SERVER}/RPC2" default
- resilient_cmd lavacli jobs wait ${LAVAJOB_ID}
- resilient_cmd lavacli jobs logs ${LAVAJOB_ID} > "${WORKSPACE}/lava.log"
-
- # Fetch and store LAVA job result (1 failure, 0 success)
- resilient_cmd lavacli results ${LAVAJOB_ID} | tee "${WORKSPACE}/lava.res"
- if grep '\[fail\]' "${WORKSPACE}/lava.res"; then
- echo "LAVA JOB RESULT: 1"
- else
- echo "LAVA JOB RESULT: 0"
- fi
+ echo "LAVA URL: https://${LAVA_SERVER}/scheduler/job/${LAVAJOB_ID} LAVA JOB ID: ${LAVAJOB_ID}"
+
+ resilient_cmd lavacli identities add --username ${LAVA_USER} --token ${LAVA_TOKEN} --uri "https://${LAVA_SERVER}/RPC2" default
+ resilient_cmd lavacli jobs wait ${LAVAJOB_ID}
+ resilient_cmd lavacli jobs logs ${LAVAJOB_ID} > "${WORKSPACE}/lava.log"
+
+ # Fetch and store LAVA job result (1 failure, 0 success)
+ resilient_cmd lavacli results ${LAVAJOB_ID} | tee "${WORKSPACE}/lava.res"
+ if grep '\[fail\]' "${WORKSPACE}/lava.res"; then
+ echo "LAVA JOB RESULT: 1"
else
- echo "LAVA Job ID could not be obtained"
+ echo "LAVA JOB RESULT: 0"
fi
else
echo "LAVA Job ID could not be obtained"