#!/bin/bash set -euf -o pipefail scripts=$(dirname "$0") # shellcheck source=jenkins-helpers.sh . "$scripts/jenkins-helpers.sh" convert_args_to_variables "$@" days="${days-10}" human="${human-true}" output="${output-/dev/null}" refs_prefix="${refs_prefix-refs/heads/linaro-local/ci/}" refs_bkp_url_prefix="${refs_bkp_url_prefix-ssh://bkp.tcwglab/home/tcwg-buildslave}" repos=("${repos[@]-default}") verbose="${verbose-false}" classify="${classify-false}" only="${only-false}" keep_tmp="${keep_tmp-false}" tmpdir="${tmpdir-""}" output=$(realpath $output) if [ $output != "/dev/null" ]; then rm -f $output fi # This represent the average number of days between the last commit # tested in the component, and the starting date of the jenkins test declare -A delay_per_component=([binutils]=0 [gcc]=0 [glibc]=0 [llvm]=0 [linux]=8 [qemu]=0) process_base_artifacts () { ( set -euf -o pipefail local baseartifacts_url="$1" local days_limit=$days while read -r ref; do local ci_project_config="${ref#refs/heads/linaro-local/ci/}" local remote_project_config="${ci_project_config/\//--}" local dst_ref="refs/remotes/$remote_project_config/linaro-local/ci/$ci_project_config" if [ $only != false ] && [[ ! $ci_project_config =~ $only ]]; then continue fi # -- get the studied base-artifact branch if ! [ "$(git -C "base-artifacts" remote get-url $remote_project_config 2> /dev/null)" ]; then git -C "base-artifacts" remote add $remote_project_config $baseartifacts_url git -C "base-artifacts" fetch -q "$baseartifacts_url" "+$ref:$dst_ref" 2> /dev/null fi git -C "base-artifacts" checkout -q $dst_ref # -- get components for this project local components components="$(get_baseline_manifest "{rr[components]}")" # -- check last update of that component declare -A commit_stamps for c in $components; do local tmproot="" last_changed_sha1="" local -a git_history readarray -t git_history < <(get_git_history 1 base-artifacts "git/${c}_rev") tmproot=${git_history[0]} if [ ${#git_history[@]} == 2 ]; then last_changed_sha1=${git_history[1]} last_changed_sha1="${last_changed_sha1#$tmproot/}" last_changed_sha1="${last_changed_sha1%/git/${c}_rev}" commit_stamps[$c]="$(git -C base-artifacts show --no-patch --pretty='%ct' $last_changed_sha1)" fi rm -rf "$tmproot" done # -- dump the messages for c in $components; do if [[ -v commit_stamps[$c] ]]; then days_ago=$((($(date +%s) - ${commit_stamps[$c]}) / (24 * 3600))) if [ "$days_ago" -gt "$((days_limit + delay_per_component[$c]))" ]; then echo "$c: $ci_project_config: last updated $days_ago days ago" |& tee -a $output fi else echo "$c: $ci_project_config: no date for this component" |& tee -a $output fi done done < <(git ls-remote "$baseartifacts_url" "${refs_prefix}*" | cut -f2) ) } report_old_backup_branches () { ( set -euf -o pipefail local ci_project_config while read -r ci_project_config; do # get the branches of this $ci_project_config git repository baseartifacts_url="$refs_bkp_url_prefix/base-artifacts/$ci_project_config.git" readarray -t git_branches < <(git -C base-artifacts ls-remote \ "$baseartifacts_url"|cut -f2) # retrieve the manifest of current branch to have minor/major vars git -C base-artifacts checkout FETCH_HEAD -- manifest.sh cur_major=$(("$(get_baseline_manifest "{rr[major]}")")) cur_minor=$(("$(get_baseline_manifest "{rr[minor]}")")) for br in "${git_branches[@]}"; do if [[ $br =~ refs/heads/linaro-local/ci/.* ]]; then continue elif [[ $br =~ refs/heads/linaro-local/v.*_to_v.*-.*/.* ]]; then br_revs=$(echo $br | \ sed -e 's|refs/heads/linaro-local/v.*_to_v\([0-9\.]*\)-.*/.*|\1|') br_major=$(echo $br_revs | cut -d. -f1) br_minor=$(echo $br_revs | cut -d. -f2) if [ "$cur_major" -eq "$br_major" ] \ && [ "$cur_minor" -eq "$br_minor" ]; then # Nothing to report. This is the last backup branch. true elif [ "$cur_major" -gt "$br_major" ] \ || { [ "$cur_major" -eq "$br_major" ] \ && [ "$cur_minor" -gt "$br_minor" ]; }; then echo "BRANCH $br : Too old. (v$br_major.$br_minor < v$cur_major.$cur_minor)" elif [ "$cur_major" -lt "$br_major" ] \ || { [ "$cur_major" -eq "$br_major" ] \ && [ "$cur_minor" -lt "$br_minor" ]; }; then echo "BRANCH $br : In advance (v$br_major.$br_minor > v$cur_major.$cur_minor)" else assert_with_msg "Internal error for branch $br" false fi else echo "BRANCH $br : Strangely formed" fi done done < <(ssh bkp.tcwglab 'cd /home/tcwg-buildslave/base-artifacts/; find . -type d -name "*.git"' | \ sed -e 's/.git$//' | cut -d/ -f2-3) ) } process_all_base_artifacts () { ( set -euf -o pipefail # -- Initialize base-artifacts repo (by cloning its "empty" branch). # FIXME: We need to add handling of /home/shared/git/base-artifacts to # tcwg-generate-source-cache.sh . clone_or_update_repo "base-artifacts" empty \ "$refs_bkp_url_prefix/base-artifacts/empty.git" auto empty git -C "base-artifacts" reset -q --hard # -- Fetching all remotes local nb_remote idx_remote echo "# Fetching all" git -C "base-artifacts" fetch --all 2> /dev/null nb_remote=$(git -C "base-artifacts" remote |wc -l) # -- Process every components dates echo "# Reporting component not updated recently" # process all unitary base-artifacts// local ci_project_config idx_remote=0 while read -r ci_project_config; do idx_remote=$((idx_remote+1)) verbose -en "# Processing base-artifacts $idx_remote / $nb_remote\r" process_base_artifacts "$refs_bkp_url_prefix/base-artifacts/$ci_project_config.git" done < <(ssh bkp.tcwglab 'cd /home/tcwg-buildslave/base-artifacts/; find . -type d -name "*.git"' | \ sed -e 's/.git$//' | cut -d/ -f2-3) # -- Check for old branches echo "# Reporting useless backup branches" report_old_backup_branches # -- clean local repository # Clean up the clone (this is supposed to re-share objects from # reference clone and keep the size of the clone minimal). # It's possible that previous GC process was interrupted and left # a lock. Use --force to workaround that. It should be safe # to override the lock since directories should not be shared # between concurrent builds. # # Also, prune all loose objects to avoid "git gc --auto" failing # and creating .git/gc.log to warn us. rm -f "base-artifacts/.git/gc.log" # Do not detach into background for GC. Running in the background may # cause a failure during bisect's rsync, which may see some of # the files disappering mid-rsync. git -C "base-artifacts" config gc.autoDetach false git -C "base-artifacts" gc --auto --force --prune=all # Delete stale locks -- especially .git/refs/remotes/REMOTE/BRANCH.lock # These occur when builds are aborted during "git remote update" or similar. find "base-artifacts/.git" -name "*.lock" -delete ) } jenkins_base_url="https://ci.linaro.org" use_last_build="${use_last_build-no}" count_all=0 list_err_noproject=() declare -A test declare -A alldiags ################## UTILITY FUNCTIONS classify_get_project() { verbose " * $1" # zero-initialize test var test=(['gitproject']="" ['jkproject']="" ['branch']="" ['last_updated']="" ['poll_date']="" # ['run_nb']="" ['run_date']="" ['run_status']="" ['run_title']="" ['run_check_regression']="" # ['last_run']="" ['diag']="" ) test['last_updated']=$(echo $1 | sed -e 's|.*: last updated ||' -e 's|.*: No successful run since ||') test['gitproject']=$(echo $1 |cut -d: -f 1) test['branch']=$(echo $1 |cut -d: -f 2) test['branch']=${test['branch']:1} test['jkproject']=$(echo ${test['branch']} | sed \ -e's|\(.*\)/\(.*\)|\1--\2-build|') verbose " : $jenkins_base_url/job/${test['jkproject']}" verbose " : $tmpdir/""${test['jkproject']}" mkdir -p $tmpdir/"${test['jkproject']}" ; cd $tmpdir/"${test['jkproject']}" } set_diag() { diag_error="$1" test['diag']="$diag_error" if [ "${test['diag']}" == "ERROR (project doesnot exist)" ] && [[ ! ${list_err_noproject[*]} =~ (^|[[:space:]])${test['branch']}($|[[:space:]]) ]]; then list_err_noproject+=("${test['branch']}"); fi [ -z "${alldiags["$diag_error"]+set}" ] && alldiags["$diag_error"]=0 alldiags["$diag_error"]="$(( alldiags["$diag_error"] + 1 ))" verbose " ==> diag=$diag_error" } verbose () { if [ $verbose != false ]; then echo "$@" fi } download_project_file () { local filename=$1 local local_file=$filename local remote_file remote_file="$(echo $filename | sed -e 's|__toppage__|.|')" cd $tmpdir/"${test['jkproject']}" [ -f "$local_file" ] && return mkdir -p "$(dirname "$local_file")" # echo $(pwd)/$local_file wget -O "$(pwd)/$local_file" -o /dev/null "$jenkins_base_url/job/${test['jkproject']}/$remote_file" || true } classify () { local condition="$1" local filename="$2" local expression="$3" local diag_error="$4" # Only if not already classified if [ ! -z "${test['diag']}" ]; then return; fi download_project_file "$filename" if [ "$condition" == "exist" ]; then if [ ! -s "$filename" ]; then set_diag "$diag_error" fi fi if [ "$condition" == "grep" ] && [ -f "$filename" ]; then nb=$(grep -c "$expression" $filename || true) if [ "$nb" != "0" ]; then set_diag "$diag_error" fi fi if [ "$condition" == "xzgrep" ] && [ -s "$filename" ]; then nb=$(xzcat $filename | grep -c "$expression" || true) if [ "$nb" != "0" ]; then set_diag "$diag_error" fi fi } ################## GET INFO FROM THE BUILD get_project_info () { count_all=$((count_all+1)) # Diag has been already classified. Probably means no project. don't go further if [ ! -z "${test['diag']}" ]; then return; fi # Last poll download_project_file scmPollLog test['poll_date']=$(grep 'Started on' scmPollLog | sed -e 's|.*Started on ||' -e 's|,||g' || true) test['poll_date']=$(echo ${test['poll_date']} | sed -e 's|,||g' -e 's| mo | month |g' -e 's| hr | hour |g' || true) test['poll_date']=$(date --date="${test['poll_date']}" +"%x %R") # LastBuild run date download_project_file lastBuild/__toppage__ test['run_date']=$(grep 'Started .* ago' lastBuild/__toppage__ | sed -e 's|.*Started \(.*\) ago.*|\1 ago|'|head -1) } get_artifact_dir () { lookfor=$1 download_project_file ${test['run_nb']}/artifact/artifacts/__toppage__ local i nb stepname for i in {1..15}; do stepname=$(printf "%02d" $i)-$lookfor nb=$(grep -c "href=\"$stepname\"" ${test['run_nb']}/artifact/artifacts/__toppage__) if [ $nb != 0 ]; then test["run_dir_$lookfor"]="$stepname" echo "$stepname" #echo "$jenkins_base_url/job/${test['jkproject']}/${test['run_nb']}/artifact/artifacts/${test[run_dir_$lookfor]}" break fi done } get_run_title_and_status () { run=$1 verbose " - get_run_title_and_status() : $run" # Last run download_project_file $run/__toppage__ if [ -s "$run/__toppage__" ]; then test['run_title']=$(grep '.*' $run/__toppage__ | head -1 | sed -e 's|.*||' -e 's|.*||'||true) test['run_title']=$(echo ${test['run_title']}|sed -e 's|.* #||' -e 's| \[Jenkins\].*||') test['run_nb']=$(echo ${test['run_title']}|sed -e 's|\([0-9]*\)-.*|\1|') test['run_status']=$(grep 'tooltip' $run/__toppage__ | head -1 | sed -e 's|.*tooltip="||' -e 's|"* .*||' ||true) fi verbose " > [${test['run_status']}] ${test['run_title']}" } get_last_interesting_run () { gitprojectshort=$(echo ${test['gitproject']}|cut -d- -f1) test['last_run']="lastBuild" get_run_title_and_status "lastBuild" [ "x${test['run_nb']}" = "x" ] && return [[ "${test['gitproject']}" =~ base-artifacts ]] && return verbose " . last interesting run() : ${test['run_nb']}" export r # to avoid shellcheck unused warning for r in {1..8}; do get_run_title_and_status ${test['run_nb']} if [[ "${test['run_title']}" =~ $gitprojectshort ]] || [ $gitprojectshort == "*all*" ]; then test['last_run']=${test['run_nb']} verbose " > ${test['run_nb']}" return fi test['run_nb']=$((test['run_nb']-1)) done verbose " > ${test['run_nb']}" } ################## CLASSIFY FUNCTIONS classify_polling_error () { if [ ! -z "${test['diag']}" ]; then return; fi verbose " - classify_polling_error()" classify grep scmPollLog "Connection timed out" "ERROR(timeout while polling)" classify grep scmPollLog "fatal: read error: Connection reset by peer" "ERROR(fatal polling error)" } classify_project_deleted () { if [ ! -z "${test['diag']}" ]; then return; fi verbose " - classify_project_deleted()" classify exist __toppage__ "x" "ERROR (project doesnot exist)" } classify_project_disabled () { if [ ! -z "${test['diag']}" ]; then return; fi verbose " - classify_project_disabled()" classify grep __toppage__ "Project DELETE ME" "ERROR (project disabled)" } classify_gcc_boostrap_timeout () { if [ ! -z "${test['diag']}" ]; then return; fi verbose " - classify_gcc_boostrap_timeout()" classify grep __toppage__ "tcwg_gcc_bootstrap" "ERROR (bootstrap timeout)" } classify_analyse_console () { if [ ! -z "${test['diag']}" ]; then return; fi verbose " - classify_analyse_console()" classify grep lastBuild/consoleText "Build timed out" "ERROR (build timeout)" classify grep lastBuild/consoleText "FATAL: \[ssh-agent\] Unable to start agent" "ERROR (cannot start ssh-agent)" } classify_analyse_result_file () { local stage if [ ! -z "${test['diag']}" ]; then return; fi if [ "${test['run_status']}" == "Success" ]; then return; fi verbose " - classify_analyse_result_file()" download_project_file ${test['run_nb']}/artifact/artifacts/results while read line do # stage line pat='^# .*(reset_artifacts|build_abe|build_bmk_llvm|benchmark|linux_n_obj)' if [[ $line =~ $pat ]]; then stage="$(echo $line|sed -e 's|# ||' -e 's| --.*||' -e 's|:.*||')" #echo " $line => $stage" fi # Clear error line pat='^# Benchmarking infra is offline' if [[ $line =~ $pat ]]; then set_diag "ERROR (infra offline)" fi pat='# .* error: patch failed' if [[ $line =~ $pat ]]; then set_diag "ERROR (git patch failed)" fi pat='# First few build errors in logs' if [[ $line =~ $pat ]]; then set_diag "ERROR ($stage build errors)" fi pat='^# .*grew in size.*' if [[ $line =~ $pat ]]; then set_diag "ERROR (grew in size)" fi pat='^# .*slowed down.*' if [[ $line =~ $pat ]]; then set_diag "ERROR (slowed down)" fi pat='^# .*reduced by.*' if [[ $line =~ $pat ]]; then set_diag "ERROR (reduced)" fi # single message before reset-artifact pat='^# FAILED' if [[ $line =~ $pat ]] && [ ! -v stage ]; then set_diag "ERROR (FAILED in reset_artifacts)" fi [ ! -z "${test['diag']}" ] && break done < "${test['run_nb']}/artifact/artifacts/results" # If diag is set if [ ! -z "${test['diag']}" ]; then return; fi # otherwise fill with the stage if [[ -v stage ]]; then set_diag "ERROR ($stage)"; fi } ## Detection quite fragile for the moment classify_no_change_in_sources () { local pjt days_limit if [ ! -z "${test['diag']}" ]; then return; fi verbose " - classify_no_change_in_sources()" # how many days if [[ ${test['jkproject']} =~ -release- ]]; then days_limit="$((days+days))" else days_limit="$days" fi # get date download_project_file ${test['run_nb']}/artifact/artifacts/jenkins/manifest.sh # shellcheck disable=SC2034 declare -A rr debug # shellcheck disable=SC1090 source ${test['run_nb']}/artifact/artifacts/jenkins/manifest.sh # set diag if appopriate pjt=$(echo ${test['gitproject']}|cut -d- -f1) if [ "${rr[debug_${pjt}_date]+abc}" ]; then local last_commit_date start_warn_date last_commit_date="${rr[debug_${pjt}_date]}" start_warn_date=$(date +%s --date="$days_limit days ago") if [ "$last_commit_date" -lt "$start_warn_date" ]; then set_diag "ERROR (no change in sources)"; fi fi } ################## PRINT SUGGESTIONS print_suggestions () { local gitbase="ssh://git.linaro.org/toolchain/ci" if [ ${#list_err_noproject[@]} -ne 0 ]; then echo "1) For deleted projects you may want to DELETE the stored results branches:" |& tee -a $output tmpdir=/tmp/empty_git echo "mkdir -p $tmpdir && cd $tmpdir && git init" |& tee -a $output echo "git push $gitbase/base-artifacts.git \\" |& tee -a $output for br in "${list_err_noproject[@]}"; do echo " --delete refs/heads/linaro-local/ci/$br \\" |& tee -a $output done echo "" |& tee -a $output echo "rm -rf $tmpdir" |& tee -a $output fi } ################## MAIN CLASSIFY FUNCTIONS classify_failures () { local stale_jobs_file stale_jobs_file="$(pwd)/$1" [ "x$tmpdir" = "x" ] && tmpdir="$(mktemp -d -t tmpdir-XXXXXXXXXX)" echo "working in $tmpdir" cd $tmpdir printf "\n" |& tee -a $output printf "%-45s | %-13s | %-16s | %-16s | %-50s\n" "AUTOMATIC DIAGNOSTIC" "LAST UPDATED" "LAST POLLING" "LAST RUN" "PROJECT NAME" |& tee -a $output printf "====================================================================================================================================================================\n" |& tee -a $output while read -r line; do classify_get_project "$line" if [ $only != false ] && [[ ! ${test['jkproject']} =~ $only ]]; then continue fi if [ $verbose == "*all*" ]; then set -x; fi # Check if project exist before getting the infos classify_project_deleted # Get info from project get_project_info # Classify classify_polling_error # is disabled ? classify_project_disabled # deeper analyse get_last_interesting_run classify_analyse_console classify_analyse_result_file classify_no_change_in_sources [ -z "${test['diag']}" ] && set_diag "-" printf "%-45s | %-13s | %-16s | %-16s | %-16s %-50s\n" "${test['diag']}" "${test['last_updated']}" "${test['poll_date']}" "${test['run_date']}" "[${test['gitproject']}]" "$jenkins_base_url/job/${test['jkproject']}/${test['run_nb']}" |& tee -a $output done < $stale_jobs_file printf "====================================================================================================================================================================\n" |& tee -a $output printf "SUMMARY : \n" |& tee -a $output for K in "${!alldiags[@]}"; do printf " %-28s : %-3s\n" "$K" "${alldiags[$K]}" |& tee -a $output done printf " %-28s : %-3s\n" "TOTAL FAILURES" "$count_all" |& tee -a $output printf "====================================================================================================================================================================\n" |& tee -a $output printf "SUGGESTIONS : \n" |& tee -a $output print_suggestions printf "====================================================================================================================================================================\n" |& tee -a $output printf "\n" |& tee -a $output [ $keep_tmp ] || rm -rf $tmpdir } # If classify is specified, classify the failures if [ $classify != false ]; then classify_failures $classify exit 0; fi process_all_base_artifacts