diff options
Diffstat (limited to 'round-robin-baseline.sh')
-rwxr-xr-x | round-robin-baseline.sh | 635 |
1 files changed, 635 insertions, 0 deletions
diff --git a/round-robin-baseline.sh b/round-robin-baseline.sh new file mode 100755 index 00000000..a0d4dfe2 --- /dev/null +++ b/round-robin-baseline.sh @@ -0,0 +1,635 @@ +#!/bin/bash + +set -euf -o pipefail + +scripts=$(dirname $0) +# shellcheck source=jenkins-helpers.sh +. $scripts/jenkins-helpers.sh + +convert_args_to_variables "$@" + +obligatory_variables rr[top_artifacts] rr[update_baseline] +declare -A rr + +push_base_artifacts="${push_base_artifacts-false}" +rewrite_base_artifacts="${rewrite_base_artifacts-false}" +rewrite_num="${rewrite_num-1}" +commit_artifacts="${commit_artifacts-true}" +verbose="${verbose-true}" +max_removed_revs="${max_removed_revs-10%}" +skip_annex_downloads="${skip_annex_downloads-false}" + +if $rewrite_base_artifacts; then + obligatory_variables build_script + declare build_script +fi + +# To enable rewrite: +# - set rewrite_base_artifacts to true, and +# - set rewrite_num to N+1 to rewrite N oldest revisions (0 for all) + +# To rewrite local base-artifacts (e.g., for testing of round-robin-notify.sh +# or bmk-scripts): +# 1. Increase rr[minor] or rr[major] in your local build script. +# 2. Do a baseline build (to clone all repos and checkout dependencies of +# round-robin-notify.sh). +# ~/jenkins-scripts/tcwg_bmk-build.sh '%%rr[top_artifacts]' artifacts \ +# '==rr[ci_project]' CI_PROJECT '==rr[ci_config]' CI_CONFIG +# 2a. If you know that you have all dependencies already present, then just +# copy latest artifacts from base-artifacts's HEAD, and edit manifest +# manually to increase rr[minor] or rr[major]. +# rsync -a --del --exclude /.git base-artifacts/ artifacts/ +# vi artifacts/manifest.sh +# 3. Run this script with "__push_base_artifacts false", +# "__commit_artifacts false" and "__rewrite_num 0". +# ~/jenkins-scripts/round-robin-baseline.sh \ +# '@@rr[top_artifacts]' artifacts __build_script tcwg_bmk-build.sh \ +# __push_base_artifacts false __commit_artifacts false \ +# __rewrite_base_artifacts true __rewrite_num 0 +# 4. Note that the above will not change upstream base-artifacts, but notify +# logic may push to interesting commits and/or update jira cards. + +# The patch version represent the version of the generated notification files. +# upgrading it will automatically enable the rewrite process bellow. +rr[patch]=0 + +if $verbose; then + set -x +fi + +# Trim history of base-artifacts to keep repo size managable. +trim_base_artifacts () +{ + ( + set -euf -o pipefail + + # - For the last 100 builds: keep everything + # - For the next 100 builds: keep essential artifacts + # -- NN-<step> directories are non-essential, the rest -- jenkins/, + # dashboard/, etc. -- are essential. + # - For the rest of the builds: keep only "update_baseline==force" builds + local old_commit + old_commit=$(git -C base-artifacts rev-parse --verify HEAD~100 \ + 2>/dev/null || true) + + if [ "$old_commit" = "" ]; then + return 0 + fi + + local head + head=$(git -C base-artifacts rev-parse HEAD) + + # Remove step directories (start with a number) from $old_commit + # and older. + git -C base-artifacts checkout --detach $old_commit + git -C base-artifacts filter-repo --force \ + --invert-paths --path-regex '^[0-9].*' \ + --refs HEAD + + local new_old_commit + new_old_commit=$(git -C base-artifacts rev-parse HEAD) + + # Walk through even older history (starting with new_old_commit~100) + # and leave only commits that have update_baseline=={force,init}. + local child orig_parent new_parent + child=$(git -C base-artifacts rev-parse --verify HEAD~100 \ + 2>/dev/null || true) + + while [ "$child" != "" ]; do + git -C base-artifacts checkout --detach $child + + orig_parent=$(git -C base-artifacts rev-parse --verify HEAD^ \ + 2>/dev/null || true) + # Find new_parent -- commit that has update_baseline!=onsuccess. + new_parent="" + while true; do + new_parent=$(git -C base-artifacts rev-parse --verify HEAD^ \ + 2>/dev/null || true) + if [ "$new_parent" = "" ]; then + break + fi + + git -C base-artifacts checkout --detach $new_parent + + local u_b + u_b=$(get_baseline_manifest "{rr[update_baseline]}") + if [ "$u_b" != "onsuccess" ]; then + break + fi + done + + # Replace $orig_parent with $new_parent, and update new_old_commit. + if [ "$new_parent" != "$orig_parent" ]; then + # Note that if $new_parent is empty, then $child will become + # the root commit. + git -C base-artifacts replace --force --graft $child $new_parent + git -C base-artifacts checkout --detach $new_old_commit + git -C base-artifacts filter-repo --force --refs HEAD + git -C base-artifacts replace --delete $child + new_old_commit=$(git -C base-artifacts rev-parse HEAD) + fi + + # Proceed to the next commit in history. + child="$new_parent" + done + + git -C base-artifacts checkout --detach $head + + # Reparent history on the new version of $old_commit. + if [ "$old_commit" != "$new_old_commit" ]; then + git -C base-artifacts replace --force $old_commit $new_old_commit + git -C base-artifacts filter-repo --force --refs HEAD + git -C base-artifacts replace --delete $old_commit + fi + ) +} + +# Commit current result and artifacts to the baseline repository +update_baseline () +{ + ( + set -euf -o pipefail + + # Rsync current artifacts. Make sure to use -I rsync option since + # quite often size and timestamp on artifacts/results will be the same + # as on base-artifacts/results due to "git reset --hard HEAD^" below. + # This caused rsync's "quick check" heuristic to skip "results" file. + # !!! From this point on, logs and other artifacts won't be included + # in base-artifacts.git repo (though they will be uploaded to jenkins). + rsync -aI --del --exclude /.git ${rr[top_artifacts]}/ base-artifacts/ + + local amend="" + + if [ x"${rr[update_baseline]}" = x"init" ]; then + amend="--amend" + fi + + local msg_title="${rr[update_baseline]}" + if [ x"${BUILD_URL+set}" = x"set" ]; then + # Add build number + msg_title="$msg_title: #$(basename "$BUILD_URL")" + fi + msg_title="$msg_title: $(grep -v "^#" ${rr[top_artifacts]}/results | tail -n1)" + msg_title="$msg_title: [TCWG CI] ${BUILD_URL-$(pwd)}" + + git -C base-artifacts add . + git -C base-artifacts commit $amend -m "$msg_title + +Results : +$(cat ${rr[top_artifacts]}/results | sed -e 's/^/ | /') + +check_regression status : ${rr[no_regression_result]} +" + ) +} + +rewrite_single_revision () +{ + ( + set -euf -o pipefail + local old_commit="$1" + local log_prefix="$2" + + local orig_head + orig_head=$(git -C base-artifacts rev-parse HEAD) + + # Return to $orig_head in case of an error. + # shellcheck disable=SC2064 + trap "echo CLEANUP; git -C base-artifacts reset --hard $orig_head" EXIT + + local -a fixup_opts=() + + echo "Rewriting revision $old_commit :" + echo " $(git -C base-artifacts show --no-patch --oneline $old_commit)" + echo "" + + # Verify that parent of $old_commit is reasonable: + # 1. it has manifest.sh; + # 2. TODO: maybe check that git_annex_download() succeeds for + # base-artifacts? Thinking about it, that should have been + # verified on the previous call of rewrite_single_revision() + # that processed the parent revision. + # + # At least in one case we had history starting with an empty commit, which + # wasn't ammended in update_baseline(). Scanning of history in + # rewrite_base_artifacts() ignored the empty commit because it had no + # manifest.sh file. The loop below can be used to remove unwanted weird + # commits from history. + local old_parent="$old_commit" + while true; do + old_parent=$(git -C base-artifacts rev-parse --verify "$old_parent^" \ + 2>/dev/null || true) + if [ "$old_parent" = "" ]; then + # We reached beginning of history. + break + fi + + git -C base-artifacts checkout --detach "$old_parent" + if ! [ -f base-artifacts/manifest.sh ]; then + # Remove commits with no manifest.sh . + continue + fi + + break + done + + if [ "$old_parent" != "" ]; then + git_annex_download base-artifacts annex + else + # Initialize a new baseline when base-artifacts is empty. + fixup_opts+=("==rr[update_baseline]" init) + # FIXME: Move empty.git to bkp.tcwglab. + git -C base-artifacts fetch \ + ssh://bkp.tcwglab/home/tcwg-buildslave/base-artifacts/empty.git \ + refs/heads/empty + git -C base-artifacts checkout --detach FETCH_HEAD + fi + + local old_artifacts="${rr[top_artifacts]}/99-rewrite/artifacts.old" + + # Fetch artifacts/ of old build + rm -rf "$old_artifacts" + mkdir "$old_artifacts" + git -C base-artifacts archive "$old_commit" | tar x -C "$old_artifacts" + + # FIXME: Remove workarounds for out-dated files: + # Remove .gitignore that ignores annex/bmk-data symlink. + rm -f "$old_artifacts/.gitignore" + # Remove results_id now that we use annex/bmk-data . Note that we have + # fetched the results pointed to by results_id in git_annex_download(). + rm -f "$old_artifacts/results_id" + + # Fetch old rr values before they are re-written + local old_manifest="$old_artifacts/manifest.sh" + local -A old + old[major]=$(get_manifest "$old_manifest" "{rr[major]-0}") + old[minor]=$(get_manifest "$old_manifest" "{rr[minor]-0}") + old[patch]=$(get_manifest "$old_manifest" "{rr[patch]-0}") + old[notify]=$(get_manifest "$old_manifest" "{notify-}") + old[update_baseline]=$(get_manifest "$old_manifest" \ + "{rr[update_baseline]-}") + old[ci_project]=$(get_manifest "$old_manifest" "{rr[ci_project]-}") + old[ci_config]=$(get_manifest "$old_manifest" "{rr[ci_config]-}") + + # downloading the annex, unless the user explicitely asked to skip + local res force_remove=false + if ! $skip_annex_downloads; then + git_annex_download "$old_artifacts" annex & + res=0 && wait $! || res=$? + if [ $res != 0 ]; then + # Something has happened to the annex'ed files. Remove the result. + force_remove=true + fi + fi + + case "${old[major]}.${old[minor]}" in + "0."*) + # FIXME: Workaround old/renamed names of ci_project/ci_config. + # This is, mostly, for tcwg_bmk_tx1 and tcwg_bmk_tk1 projects. + if [ "${old[ci_project]}" != "${rr[ci_project]}" ]; then + fixup_opts+=("==rr[ci_project]" "${rr[ci_project]}") + fi + if [ "${old[ci_config]}" != "${rr[ci_config]}" ]; then + fixup_opts+=("==rr[ci_config]" "${rr[ci_config]}") + fi + + # FIXME: Remove old result with no git/ information. + # We have switched to storing git information in artifacts/git/ + # directory long time ago, so it doesn't worth the effort to + # workaround such cases. Just remove the result. + # Note that this will remove the result even when only minor + # (not major) version is increased. + if ! [ -d "$old_artifacts/git" ]; then + force_remove=true + fi + ;; + esac + + res=0 + # If major and minor are the same, it means that check_regression stage + # is already up-to-date. Only append the manifest with patch version + if [ "${rr[major]-0}.${rr[minor]-0}" == "${old[major]}.${old[minor]}" ]; then + echo "rr[patch]=${rr[patch]}" | manifest_out + else + # otherwise run the check_regression stage + $scripts/$build_script \ + @@rr[top_artifacts] "$old_artifacts" __start_at check_regression \ + "${fixup_opts[@]}" & + res=0 && wait $! || res=$? + fi + + if [ $res != 0 ]; then + # check_regression() can fail for ${old[update_baseline]}=="onsuccess" + # builds, and, in rare cases for ${old[update_baseline]}=="force" + # builds. So far we have encountered a case when "forced" entry + # fails due to corrupted data, which we happened to accept in the past. + # In this case we get $EXTERNAL_FAIL as the exit code. + assert_with_msg "check_regression() failed on forced update_baseline" \ + [ "${old[update_baseline]}" = "onsuccess" \ + -o "$res" = "$EXTERNAL_FAIL" ] + + if [ "${rr[major]-0}" -gt "${old[major]}" ]; then + # $build_script [somewhat expectedly] failed to process old results, + # so remove it from history. + # In this case $new_old_commit will be set to $old_commit's parent, + # so $old_commit will be removed from history. + force_remove=true + fi + fi + + if $force_remove; then + res=1 + fi + + if [ $res = 0 ]; then + local -a notify_opts=() + + case "${old[major]}.${old[minor]}" in + "0."*) + # FIXME: Workaround possible lack of "$notify" in v0.* + # manifests. + # Remove once there are no configurations with v0.0 manifests. + case "${old[notify]}":"${old[update_baseline]}" in + "":"force") notify_opts=(--notify onregression) ;; + "":*) notify_opts=(--notify ignore) ;; + esac + ;; + esac + + $scripts/round-robin-notify.sh \ + @@rr[top_artifacts] "$old_artifacts" __post_mail false \ + __post_jira_comment false "${notify_opts[@]}" \ + __build_script "$build_script" \ + __verbose "$verbose" &> "$log_prefix-notify.log" + + ( + unset rr + manifest_pop + declare -A rr + convert_args_to_variables @@rr[top_artifacts] "$old_artifacts" + update_baseline + + local repo1="${rr[baseline_branch]#linaro-local/ci/}" + git_annex_upload base-artifacts annex \ + "$repo1/$(basename "${BUILD_URL-0}")-" + ) + + git -C base-artifacts diff "$old_commit" "HEAD" -- manifest.sh \ + &> "$log_prefix-manifest.diff" + git -C base-artifacts diff "$old_commit" "HEAD" -- notify/ \ + &> "$log_prefix-notify.diff" + git -C base-artifacts diff --stat -p "$old_commit" "HEAD" -- \ + ':(exclude)manifest.sh' ':(exclude)notify/' \ + &> "$log_prefix-other.diff" + elif $force_remove; then + touch "$log_prefix.removed" + # Above "git_annex_download base-artifacts annex" may have changed + # files in base-artifacts/annex/ directory. Restore to prestine + # state to avoid failure in "git -C base-artifacts checkout" below. + git_clean base-artifacts + else + # $build_script [unexpectedly] failed to process old results, + # so fail and notify developers (by sending error-mail). + assert_with_msg "$build_script failed to process $old_commit" false + fi + + local new_old_commit + new_old_commit=$(git -C base-artifacts rev-parse HEAD) + + assert_with_msg "Rewritten commit did not change" \ + [ "$old_commit" != "$new_old_commit" ] + + # Reparent history on the new version of $old_commit. + trap "" EXIT + git -C base-artifacts checkout --detach $orig_head + git -C base-artifacts replace --force $old_commit $new_old_commit + git -C base-artifacts filter-repo --force --refs HEAD + git -C base-artifacts replace --delete $old_commit + ) +} + +declare -g rewrite_base_artifacts_first=true +# Update history of base-artifacts +rewrite_base_artifacts () +{ + ( + set -euf -o pipefail + + set +x + + local n_rev=0 total_revs=-1 + + # Fetch flaky tests from base-artifacts history. + local manifest history_root="" old_revision="" + local -A old + while read -r manifest; do + total_revs=$(($total_revs + 1)) + if [ "$history_root" = "" ]; then + history_root="$manifest" + continue + elif [ "$old_revision" != "" ]; then + # Continue reading from get_git_history() to have it finish + # gracefully. + continue + fi + n_rev=$(($n_rev + 1)) + + old[major]=$(get_manifest "$manifest" "{rr[major]-0}") + old[minor]=$(get_manifest "$manifest" "{rr[minor]-0}") + old[patch]=$(get_manifest "$manifest" "{rr[patch]-0}") + + assert_with_msg "rr[minor] should be less than 100" [ "${rr[minor]-0}" -lt 100 ] + assert_with_msg "rr[patch] should be less than 100" [ "${rr[patch]-0}" -lt 100 ] + + if [ "$(( rr[major]*100*100 + rr[minor]*100 + rr[patch] ))" -gt \ + "$(( old[major]*100*100 + old[minor]*100 + old[patch] ))" ]; then + # Found old entry to update; + # directory name of $manifest is the revision + old_revision=$(basename "$(dirname "$manifest")") + fi + done < <(get_git_history -0 base-artifacts manifest.sh) + + if $verbose; then + set -x + fi + + rm -rf "$history_root" + + if [ "$old_revision" = "" ]; then + return 0 + fi + + local rewrite_top="${rr[top_artifacts]}/99-rewrite" + + if $rewrite_base_artifacts_first; then + change_tag="v${old[major]}.${old[minor]}.${old[patch]}_to_v${rr[major]-0}.${rr[minor]-0}.${rr[patch]-0}" + if [ "${BUILD_URL-}" != "" ]; then + change_tag="$change_tag-$(basename "$BUILD_URL")" + fi + + local backup_branch + backup_branch=$(echo "${rr[baseline_branch]}" \ + | sed -e "s#linaro-local/ci/#linaro-local/$change_tag/#") + if $push_base_artifacts; then + local repo="${rr[baseline_branch]#linaro-local/ci/}" + repo="ssh://bkp.tcwglab/home/tcwg-buildslave/base-artifacts/$repo.git" + + git -C base-artifacts push --force \ + "$repo" "HEAD:refs/heads/$backup_branch" + else + git -C base-artifacts branch --force "$backup_branch" HEAD + fi + + rm -rf "$rewrite_top" + fi + + local log_prefix="$rewrite_top/$rewrite_num-$n_rev-$total_revs" + mkdir -p "$(dirname "$log_prefix")" + + echo -e "\n"" Rewriting: $(git -C base-artifacts show --no-patch --oneline $old_revision)""\n" + + rewrite_single_revision "$old_revision" "$log_prefix" \ + &> "$log_prefix-rewrite.log" + + # Rescan base-artifacts again for another entry to update. + touch "$rewrite_top/more" + ) +} + +# Push base-artifacts, or, maybe, skip. +# The first push, which is outside of rewrite process, always happens. +# Subsequent pushes may be skipped, if the previous push is still running. +# This is an optimization to avoid re-pushing histories during rewrite, +# which are only to be discarded moments later. +declare -g push_baseline_pid=0 +declare -g push_baseline_skipped=0 +push_baseline () +{ + if [ "$push_baseline_pid" != "0" ]; then + if ! ps -p "$push_baseline_pid" >/dev/null; then + wait "$push_baseline_pid" + push_baseline_pid=0 + fi + + if [ "$push_baseline_pid" != "0" ]; then + push_baseline_skipped=$(($push_baseline_skipped + 1)) + return 0 + fi + fi + + push_baseline_skipped=0 + + local repo1="${rr[baseline_branch]#linaro-local/ci/}" + repo="ssh://bkp.tcwglab/home/tcwg-buildslave/base-artifacts/$repo1.git" + + ( + set -euf -o pipefail + + git_annex_upload base-artifacts annex "$repo1/$(basename "${BUILD_URL-0}")-" + + if ! git ls-remote --heads "$repo" &>/dev/null; then + ssh bkp.tcwglab git init --bare \ + "/home/tcwg-buildslave/base-artifacts/$repo1.git" + fi + ) + + git -C base-artifacts push --force \ + "$repo" "HEAD:refs/heads/${rr[baseline_branch]}" & + push_baseline_pid=$! +} + +if $commit_artifacts; then + update_baseline +fi + +# make sure base-artifact repository is clean to continue +git -C base-artifacts reset --hard + +# Compute the maximum of revisions that we accept to remove. If we remove +# more than we expected. This is suspicious, stop the rewriting process +declare nb_revs nb_removed_revs +nb_revs=$(git -C base-artifacts rev-list --count HEAD) +nb_revs=$((nb_revs<rewrite_num ? nb_revs : rewrite_num)) +if [[ "$max_removed_revs" =~ .*% ]]; then + # If max_removed_revs is expressed in percentage of the total revisions + # convert max_removed_revs in term of number of revisions. + max_removed_revs=${max_removed_revs/\%/ / 100} + max_removed_revs=$((nb_revs * $max_removed_revs)) +fi + +while true; do + if $push_base_artifacts; then + if $rewrite_base_artifacts_first; then + # Trimming base-artifacts takes a lot of time on big histories, + # and it doesn't really do anything on repeat trimmings during + # history rewrite. Therefore, trim only on the first iteration + # of this loop. + trim_base_artifacts + fi + push_baseline + if $rewrite_base_artifacts_first; then + # We create a backup copy of the branch when rewriting the first + # revision. If we don't have the initial push done by that time + # it would start to push a duplicate copy of baseline, thus slowing + # the initial push. Therefore, wait for the initial push here. + wait "$push_baseline_pid" + push_baseline_pid=0 + fi + fi + + if $rewrite_base_artifacts; then + rewrite_num=$(($rewrite_num - 1)) + if [ "$rewrite_num" = "0" ]; then + break + fi + + rm -f "${rr[top_artifacts]}/99-rewrite/more" + rewrite_base_artifacts & + res=0 && wait $! || res=$? + rewrite_base_artifacts_first=false + + if [ "$res" != "0" ]; then + echo "WARNING: failed rewriting base-artifacts" + if [ -d ${rr[top_artifacts]}/jenkins ]; then + echo "maxim.kuvyrkov@linaro.org, laurent.alfonsi@linaro.org" \ + > artifacts/jenkins/error-mail-recipients.txt + echo -e "${BUILD_URL-}\nWARNING: failed rewriting base-artifacts" \ + >> artifacts/jenkins/error-mail-body.txt + fi + # If fail happens during a rewrite, we consider it as an internal_fail + exit $INTERNAL_FAIL + fi + + if [ -f "${rr[top_artifacts]}/99-rewrite/more" ]; then + + nb_removed_revs="$(find ${rr[top_artifacts]}/99-rewrite/ -maxdepth 1 \ + -name '*.removed' | wc -l)" + + if [ "$nb_removed_revs" -gt "$max_removed_revs" ]; then + echo "WARNING: Too many revisions removed. Aborting." + if [ -d ${rr[top_artifacts]}/jenkins ]; then + echo "maxim.kuvyrkov@linaro.org, laurent.alfonsi@linaro.org" \ + > artifacts/jenkins/error-mail-recipients.txt + echo -e "${BUILD_URL-}\nWARNING: Too many revisions removed while "\ + "rewriting base-artifacts" >> artifacts/jenkins/error-mail-body.txt + fi + # If fail happens during a rewrite, we consider it as an internal_fail + exit $INTERNAL_FAIL + fi + + # Push current version and search for another revision to update. + continue + fi + fi + + break +done + +if [ "$push_baseline_pid" != "0" ]; then + wait "$push_baseline_pid" + push_baseline_pid=0 + if [ "$push_baseline_skipped" != "0" ]; then + # Do the final push, which was previously skipped. + push_baseline + fi +fi |