diff options
Diffstat (limited to 'tcwg-report-stale-rr-jobs.sh')
-rwxr-xr-x | tcwg-report-stale-rr-jobs.sh | 578 |
1 files changed, 547 insertions, 31 deletions
diff --git a/tcwg-report-stale-rr-jobs.sh b/tcwg-report-stale-rr-jobs.sh index f06cef0d..e4a120ea 100755 --- a/tcwg-report-stale-rr-jobs.sh +++ b/tcwg-report-stale-rr-jobs.sh @@ -10,51 +10,567 @@ convert_args_to_variables "$@" days="${days-10}" human="${human-true}" +output="${output-/dev/null}" refs_prefix="${refs_prefix-refs/heads/linaro-local/ci/}" -refs_url_prefix="${refs_url_prefix-https://git.linaro.org/toolchain/ci}" +refs_bkp_url_prefix="${refs_bkp_url_prefix-ssh://tcwg-buildslave@bkp.tcwglab/home/tcwg-buildslave}" repos=("${repos[@]-default}") verbose="${verbose-false}" +classify="${classify-false}" +only="${only-false}" +keep_tmp="${keep_tmp-false}" +tmpdir="${tmpdir-""}" -if $verbose; then - set -x +output=$(realpath $output) +if [ $output != "/dev/null" ]; then + rm -f $output fi -if [ x"${repos[*]}" = x"default" ]; then - repos=(base-artifacts binutils-gdb gcc glibc linux llvm-project newlib qemu) -fi +# This represent the average number of days between the last commit +# tested in the component, and the starting date of the jenkins test +declare -A delay_per_component=([binutils]=0 [gcc]=0 [glibc]=0 [llvm]=0 [linux]=8 [qemu]=0) -process_git_url () +report_not_updated_component () { ( set -euf -o pipefail - local refs_url="$1" - - # Initialize base-artifacts repo (by cloning its "empty" branch). - refs_repo=$(basename "$refs_url" .git) - clone_or_update_repo_no_checkout "$refs_repo" "$refs_url" auto empty origin \ - >/dev/null 2>&1 - git -C "$refs_repo" reset --hard >/dev/null 2>&1 - - # Walk through all commits of all tcwg_bmk* branches and mark results - # referenced in those results with "used_by" file. - while IFS= read -r ref; do - dst_ref="refs/remotes/origin/${ref#refs/heads/}" - git -C "$refs_repo" fetch -q origin "+$ref:$dst_ref" >/dev/null 2>&1 - commit_stamp=$(git -C "$refs_repo" show --no-patch --pretty=%ct "$dst_ref") - days_ago=$((($(date +%s) - $commit_stamp) / (24 * 3600))) - if [ $days_ago -gt $days ]; then - if $human; then - echo "$refs_repo: ${ref#$refs_prefix}: last updated $days_ago days ago" + + echo -e "\n### Reporting component not updated recently\n" |& tee -a $output + + # process all unitary base-artifacts/<ci_project>/<ci_config> + local ci_project_config + idx_remote=0 + while read -r ci_project_config; do + + local baseartifacts_url="$refs_bkp_url_prefix/base-artifacts/$ci_project_config.git" + local repo_url_dash_branch="$baseartifacts_url#linaro-local/ci/$ci_project_config" + local days_limit=$days + + idx_remote=$((idx_remote+1)) + verbose -en "# Processing base-artifacts no. $idx_remote\r" + + if [ $only != false ] && [[ ! $ci_project_config =~ $only ]]; then + continue + fi + + # -- get components for this project + local components + readarray -t manifests < <(get_git_history 1 $repo_url_dash_branch manifest.sh) + [ ${#manifests[@]} == 2 ] || continue + components="$(get_manifest "${manifests[1]}" "{rr[components]}")" + rm -rf "${manifests[0]}" + + # -- check last update of that component + declare -A commit_stamps + for c in $components; do + + local tmproot="" last_changed_sha1="" + local -a git_history + + readarray -t git_history < <(get_git_history 1 $repo_url_dash_branch "git/${c}_rev") + tmproot=${git_history[0]} + if [ ${#git_history[@]} == 2 ]; then + last_changed_sha1=${git_history[1]} + last_changed_sha1="${last_changed_sha1#$tmproot/}" + last_changed_sha1="${last_changed_sha1%/git/${c}_rev}" + commit_stamps[$c]="$(date -r "${git_history[1]}" "+%s")" + fi + rm -rf "$tmproot" + done + + # -- dump the messages + for c in $components; do + if [[ -v commit_stamps[$c] ]]; then + days_ago=$((($(date +%s) - ${commit_stamps[$c]}) / (24 * 3600))) + if [ "$days_ago" -gt "$((days_limit + delay_per_component[$c]))" ]; then + echo "$c: $ci_project_config: last updated $days_ago days ago" |& tee -a $output + fi else - echo "$refs_repo:$ref" + echo "$c: $ci_project_config: no date for this component" |& tee -a $output fi + done + + + done < <(ssh bkp.tcwglab 'cd /home/tcwg-buildslave/base-artifacts/; find . -type d -name "*.git"' | \ + sed -e 's/.git$//' | cut -d/ -f2-3) + + verbose -en "# Processed $idx_remote base-artifacts\n" + ) +} + +report_old_backup_branches () +{ + ( + set -euf -o pipefail + + echo -e "\n### Reporting useless backup branches\n" |& tee -a $output + + local ci_project_config + while read -r ci_project_config; do + + if [ $only != false ] && [[ ! $ci_project_config =~ $only ]]; then + continue fi - done < <(git ls-remote "$refs_url" "${refs_prefix}*" | awk '{ print $2 }') + + # get the branches of this $ci_project_config git repository + local baseartifacts_url="$refs_bkp_url_prefix/base-artifacts/$ci_project_config.git" + local repo_url_dash_branch="$baseartifacts_url#linaro-local/ci/$ci_project_config" + + readarray -t git_branches < <(git ls-remote "$baseartifacts_url"|cut -f2|sort -r) + + verbose "# $ci_project_config: ${git_branches[0]} : branch kept" + for br in "${git_branches[@]:1}"; do + + if [[ $br =~ refs/heads/linaro-local/ci/.* ]]; then + continue + + elif [[ $br =~ refs/heads/linaro-local/v.*_to_v.*-.*/.* ]]; then + echo "$ci_project_config: $br : to be removed" |& tee -a $output + + else + verbose "# $ci_project_config: $br : strangely formed" + fi + done + + done < <(ssh bkp.tcwglab 'cd /home/tcwg-buildslave/base-artifacts/; find . -type d -name "*.git"' | \ + sed -e 's/.git$//' | cut -d/ -f2-3) ) } -for repo in "${repos[@]}"; do - process_git_url "$refs_url_prefix/$repo" & -done | sort +process_all_base_artifacts () +{ + ( + set -euf -o pipefail + + # -- Fetching all remotes + local idx_remote + + # -- Process every components dates + report_not_updated_component + + # -- Check for old branches + report_old_backup_branches + ) +} + +jenkins_base_url="https://ci.linaro.org" +use_last_build="${use_last_build-no}" + +count_all=0 +list_err_noproject=() + +declare -A test +declare -A alldiags + +################## UTILITY FUNCTIONS +classify_get_project() +{ + verbose " * $1" + + # zero-initialize test var + test=(['gitproject']="" ['jkproject']="" ['branch']="" ['last_updated']="" + ['poll_date']="" + # + ['run_nb']="" ['run_date']="" ['run_status']="" ['run_title']="" ['run_check_regression']="" + # + ['last_run']="" ['diag']="" ) + + test['last_updated']=$(echo $1 | sed -e 's|.*: last updated ||' -e 's|.*: No successful run since ||') + test['gitproject']=$(echo $1 |cut -d: -f 1) + test['branch']=$(echo $1 |cut -d: -f 2) + test['branch']=${test['branch']:1} + + test['jkproject']=$(echo ${test['branch']} | sed \ + -e's|\(.*\)/\(.*\)|\1--\2-build|') + + verbose " : $jenkins_base_url/job/${test['jkproject']}" + verbose " : $tmpdir/""${test['jkproject']}" + + mkdir -p $tmpdir/"${test['jkproject']}" ; cd $tmpdir/"${test['jkproject']}" +} + +set_diag() +{ + diag_error="$1" + test['diag']="$diag_error" + + if [ "${test['diag']}" == "ERROR (project doesnot exist)" ] && + [[ ! ${list_err_noproject[*]} =~ (^|[[:space:]])${test['branch']}($|[[:space:]]) ]]; then + list_err_noproject+=("${test['branch']}"); + fi + + [ -z "${alldiags["$diag_error"]+set}" ] && alldiags["$diag_error"]=0 + alldiags["$diag_error"]="$(( alldiags["$diag_error"] + 1 ))" + verbose " ==> diag=$diag_error" +} + +verbose () +{ + if [ $verbose != false ]; then + echo "$@" + fi +} + +download_project_file () +{ + local filename=$1 + local local_file=$filename + local remote_file + + remote_file="$(echo $filename | sed -e 's|__toppage__|.|')" + + cd $tmpdir/"${test['jkproject']}" + [ -f "$local_file" ] && return + + mkdir -p "$(dirname "$local_file")" + # echo $(pwd)/$local_file + wget -O "$(pwd)/$local_file" -o /dev/null "$jenkins_base_url/job/${test['jkproject']}/$remote_file" || true +} + +classify () +{ + local condition="$1" + local filename="$2" + local expression="$3" + local diag_error="$4" + + # Only if not already classified + if [ ! -z "${test['diag']}" ]; then return; fi + + download_project_file "$filename" + + if [ "$condition" == "exist" ]; then + if [ ! -s "$filename" ]; then + set_diag "$diag_error" + fi + fi + if [ "$condition" == "grep" ] && [ -f "$filename" ]; then + nb=$(grep -c "$expression" $filename || true) + if [ "$nb" != "0" ]; then + set_diag "$diag_error" + fi + fi + if [ "$condition" == "xzgrep" ] && [ -s "$filename" ]; then + nb=$(xzcat $filename | grep -c "$expression" || true) + if [ "$nb" != "0" ]; then + set_diag "$diag_error" + fi + fi +} + + +################## GET INFO FROM THE BUILD +get_project_info () +{ + count_all=$((count_all+1)) + + # Diag has been already classified. Probably means no project. don't go further + if [ ! -z "${test['diag']}" ]; then return; fi + + # Last poll + download_project_file scmPollLog + test['poll_date']=$(grep 'Started on' scmPollLog | sed -e 's|.*Started on ||' -e 's|,||g' || true) + test['poll_date']=$(echo ${test['poll_date']} | sed -e 's|,||g' -e 's| mo | month |g' -e 's| hr | hour |g' || true) + test['poll_date']=$(date --date="${test['poll_date']}" +"%x %R") + + # LastBuild run date + download_project_file lastBuild/__toppage__ + test['run_date']=$(grep 'Started .* ago' lastBuild/__toppage__ | sed -e 's|.*Started \(.*\) ago.*|\1 ago|'|head -1) +} +get_artifact_dir () +{ + lookfor=$1 + download_project_file ${test['run_nb']}/artifact/artifacts/__toppage__ + local i nb stepname + for i in {1..15}; do + stepname=$(printf "%02d" $i)-$lookfor + nb=$(grep -c "href=\"$stepname\"" ${test['run_nb']}/artifact/artifacts/__toppage__) + if [ $nb != 0 ]; then + test["run_dir_$lookfor"]="$stepname" + echo "$stepname" + #echo "$jenkins_base_url/job/${test['jkproject']}/${test['run_nb']}/artifact/artifacts/${test[run_dir_$lookfor]}" + break + fi + done +} +get_run_title_and_status () +{ + run=$1 + + verbose " - get_run_title_and_status() : $run" + # Last run + download_project_file $run/__toppage__ + if [ -s "$run/__toppage__" ]; then + test['run_title']=$(grep '<title>.*</title>' $run/__toppage__ | head -1 | sed -e 's|</title>.*||' -e 's|.*<title>||'||true) + test['run_title']=$(echo ${test['run_title']}|sed -e 's|.* #||' -e 's| \[Jenkins\].*||') + test['run_nb']=$(echo ${test['run_title']}|sed -e 's|\([0-9]*\)-.*|\1|') + test['run_status']=$(grep 'tooltip' $run/__toppage__ | head -1 | sed -e 's|.*tooltip="||' -e 's|"* .*||' ||true) + fi + verbose " > [${test['run_status']}] ${test['run_title']}" +} +get_last_interesting_run () +{ + gitprojectshort=$(echo ${test['gitproject']}|cut -d- -f1) + + test['last_run']="lastBuild" + get_run_title_and_status "lastBuild" + + [ "x${test['run_nb']}" = "x" ] && return + [[ "${test['gitproject']}" =~ base-artifacts ]] && return + + verbose " . last interesting run() : ${test['run_nb']}" + + export r # to avoid shellcheck unused warning + for r in {1..8}; do + get_run_title_and_status ${test['run_nb']} + if [[ "${test['run_title']}" =~ $gitprojectshort ]] || + [ $gitprojectshort == "*all*" ]; then + test['last_run']=${test['run_nb']} + verbose " > ${test['run_nb']}" + return + fi + test['run_nb']=$((test['run_nb']-1)) + done + verbose " > ${test['run_nb']}" +} + +################## CLASSIFY FUNCTIONS + +classify_polling_error () +{ + if [ ! -z "${test['diag']}" ]; then return; fi + verbose " - classify_polling_error()" + classify grep scmPollLog "Connection timed out" "ERROR(timeout while polling)" + classify grep scmPollLog "fatal: read error: Connection reset by peer" "ERROR(fatal polling error)" +} +classify_project_deleted () +{ + if [ ! -z "${test['diag']}" ]; then return; fi + verbose " - classify_project_deleted()" + classify exist __toppage__ "x" "ERROR (project doesnot exist)" +} +classify_project_disabled () +{ + if [ ! -z "${test['diag']}" ]; then return; fi + verbose " - classify_project_disabled()" + classify grep __toppage__ "This project is currently disabled" "ERROR (project disabled)" +} +classify_gcc_boostrap_timeout () +{ + if [ ! -z "${test['diag']}" ]; then return; fi + verbose " - classify_gcc_boostrap_timeout()" + classify grep __toppage__ "tcwg_gcc_bootstrap" "ERROR (bootstrap timeout)" +} + +classify_analyse_console () +{ + if [ ! -z "${test['diag']}" ]; then return; fi + verbose " - classify_analyse_console()" + classify grep lastBuild/consoleText "Build timed out" "ERROR (build timeout)" + classify grep lastBuild/consoleText "FATAL: \[ssh-agent\] Unable to start agent" "ERROR (cannot start ssh-agent)" +} + +classify_analyse_result_file () +{ + local stage + if [ ! -z "${test['diag']}" ]; then return; fi + if [ "${test['run_status']}" == "Success" ]; then return; fi + + verbose " - classify_analyse_result_file()" + download_project_file ${test['run_nb']}/artifact/artifacts/results + + while read line + do + # stage line + pat='^# .*(reset_artifacts|build_abe|build_bmk_llvm|benchmark|linux_n_obj)' + if [[ $line =~ $pat ]]; then + stage="$(echo $line|sed -e 's|# ||' -e 's| --.*||' -e 's|:.*||')" + #echo " $line => $stage" + fi + + # Clear error line + pat='^# Benchmarking infra is offline' + if [[ $line =~ $pat ]]; then + set_diag "ERROR (infra offline)" + fi + pat='# .* error: patch failed' + if [[ $line =~ $pat ]]; then + set_diag "ERROR (git patch failed)" + fi + pat='# First few build errors in logs' + if [[ $line =~ $pat ]]; then + set_diag "ERROR ($stage build errors)" + fi + pat='^# .*grew in size.*' + if [[ $line =~ $pat ]]; then + set_diag "ERROR (grew in size)" + fi + pat='^# .*slowed down.*' + if [[ $line =~ $pat ]]; then + set_diag "ERROR (slowed down)" + fi + pat='^# .*reduced by.*' + if [[ $line =~ $pat ]]; then + set_diag "ERROR (reduced)" + fi + + # single message before reset-artifact + pat='^# FAILED' + if [[ $line =~ $pat ]] && [ ! -v stage ]; then + set_diag "ERROR (FAILED in reset_artifacts)" + fi + + [ ! -z "${test['diag']}" ] && break + + done < "${test['run_nb']}/artifact/artifacts/results" + + # If diag is set + if [ ! -z "${test['diag']}" ]; then return; fi + + # otherwise fill with the stage + if [[ -v stage ]]; then + set_diag "ERROR ($stage)"; + fi +} + +## Detection quite fragile for the moment +classify_no_change_in_sources () +{ + local pjt days_limit + + if [ ! -z "${test['diag']}" ]; then return; fi + + verbose " - classify_no_change_in_sources()" + # how many days + if [[ ${test['jkproject']} =~ -release- ]]; then + days_limit="$((days+days))" + else + days_limit="$days" + fi + + # get date + download_project_file ${test['run_nb']}/artifact/artifacts/jenkins/manifest.sh + + # shellcheck disable=SC2034 + declare -A rr debug + # shellcheck disable=SC1090 + source ${test['run_nb']}/artifact/artifacts/jenkins/manifest.sh + + # set diag if appopriate + pjt=$(echo ${test['gitproject']}|cut -d- -f1) + if [ "${rr[debug_${pjt}_date]+abc}" ]; then + local last_commit_date start_warn_date + last_commit_date="${rr[debug_${pjt}_date]}" + start_warn_date=$(date +%s --date="$days_limit days ago") + + if [ "$last_commit_date" -lt "$start_warn_date" ]; then + set_diag "ERROR (no change in sources)"; + fi + fi +} + + +################## PRINT SUGGESTIONS +print_suggestions () +{ + local stale_jobs_file="$1" + + useless_backup_branches=false + while read -r line; do + if [[ "$line" =~ 'useless backup branches' ]]; then + echo "1) To delete the useless backup branches:" |& tee -a $output + useless_backup_branches=true + fi + if $useless_backup_branches; then + if [[ "$line" =~ 'to be removed' ]]; then + local ci_project_config branch + ci_project_config=$(echo $line | cut -f: -f1) + branch=$(echo $line | cut -f: -f2) + echo "git -C /home/tcwg-buildslave/base-artifacts/$ci_project_config.git branch -D $branch" |& tee -a $output + elif [[ "$line" =~ 'branch kept' ]]; then + echo "$line" |& tee -a $output + fi + fi + done < $stale_jobs_file +} + +################## MAIN CLASSIFY FUNCTIONS +classify_failures () +{ + local stale_jobs_file + + stale_jobs_file="$(pwd)/$1" + + [ "x$tmpdir" = "x" ] && + tmpdir="$(mktemp -d -t tmpdir-XXXXXXXXXX)" + + echo "working in $tmpdir" + cd $tmpdir + + printf "\n" |& tee -a $output + printf "%-45s | %-13s | %-16s | %-16s | %-50s\n" "AUTOMATIC DIAGNOSTIC" "LAST UPDATED" "LAST POLLING" "LAST RUN" "PROJECT NAME" |& tee -a $output + printf "====================================================================================================================================================================\n" |& tee -a $output + + while read -r line; + do + + if [ "$line" = "" ] || [[ "$line" =~ 'not updated recently' ]]; then + continue + fi + if [[ "$line" =~ 'useless backup branches' ]]; then + break + fi + + classify_get_project "$line" + + if [ $only != false ] && [[ ! ${test['jkproject']} =~ $only ]]; then + continue + fi + + if [ $verbose == "*all*" ]; then set -x; fi + + # Check if project exist before getting the infos + classify_project_deleted + + # Get info from project + get_project_info + + # Classify + classify_polling_error + + # is disabled ? + classify_project_disabled + + # deeper analyse + get_last_interesting_run + classify_analyse_console + classify_analyse_result_file + classify_no_change_in_sources + [ -z "${test['diag']}" ] && set_diag "-" + + printf "%-45s | %-13s | %-16s | %-16s | %-16s %-50s\n" "${test['diag']}" "${test['last_updated']}" "${test['poll_date']}" "${test['run_date']}" "[${test['gitproject']}]" "$jenkins_base_url/job/${test['jkproject']}/${test['run_nb']}" |& tee -a $output + done < $stale_jobs_file + + printf "====================================================================================================================================================================\n" |& tee -a $output + printf "SUMMARY : \n" |& tee -a $output + for K in "${!alldiags[@]}"; do + printf " %-28s : %-3s\n" "$K" "${alldiags[$K]}" |& tee -a $output + done + printf " %-28s : %-3s\n" "TOTAL FAILURES" "$count_all" |& tee -a $output + printf "====================================================================================================================================================================\n" |& tee -a $output + printf "SUGGESTIONS : \n" |& tee -a $output + print_suggestions $stale_jobs_file + printf "====================================================================================================================================================================\n" |& tee -a $output + printf "\n" |& tee -a $output + + [ $keep_tmp ] || rm -rf $tmpdir +} + + +# If classify is specified, classify the failures +if [ $classify != false ]; then + classify_failures $classify + exit 0; +fi + +process_all_base_artifacts -wait |