diff options
Diffstat (limited to 'jenkins-helpers.sh')
-rw-r--r-- | jenkins-helpers.sh | 1334 |
1 files changed, 1087 insertions, 247 deletions
diff --git a/jenkins-helpers.sh b/jenkins-helpers.sh index 7feb3edf..c6e23431 100644 --- a/jenkins-helpers.sh +++ b/jenkins-helpers.sh @@ -29,12 +29,12 @@ abs_path () assert_with_msg () { ( - set -euf -o pipefail + set -euf -o pipefail +x local failure_message=$1 shift - eval "$@" || (echo "$failure_message" && exit 1) + eval "$*" || (echo "$failure_message" >&2 && exit 1) ) } @@ -42,9 +42,9 @@ assert_with_msg () assert () { ( - set -euf -o pipefail + set -euf -o pipefail +x - eval "$@" + eval "$*" ) } @@ -78,6 +78,9 @@ fresh_dir () done done + # Make sure we can delete the files + find "$dir" "${find_opts[@]}" -type d -exec chmod +rwx {} \; + chmod -R +rw "$dir" find "$dir" "${find_opts[@]}" -delete ) } @@ -123,7 +126,7 @@ print_node_with_least_containers () # Re. --random-sort below: shuffle node list to mitigate races # when starting multiple containers at the same time - testers=$(print_nodes_in_labels ${tester_labels[*]} | sort --random-sort) + testers=$(print_nodes_in_labels "${tester_labels[@]}" | sort --random-sort) for tester in $testers; do ret=0 tester_host=$(print_host_for_node $tester "ignore_fail") @@ -153,8 +156,8 @@ print_arch_for_label () case $label in tcwg-x86_64-*) echo amd64 ;; tcwg-x86_32-*) echo i386 ;; - tcwg-amp_64-*|tcwg-apm_64-*|tcwg-d05_64-*|tcwg-lc_64*|tcwg-sq_64-*|tcwg-thx1_64-*|tcwg-tx1_64-*) echo arm64 ;; - tcwg-amp_32-*|tcwg-apm_32-*|tcwg-d05_32-*|tcwg-sq_32-*|tcwg-tk1_32-*|tcwg-tx1_32-*) echo armhf ;; + tcwg-amp_64-*|tcwg-apm_64-*|tcwg-armv8_64|tcwg-d05_64-*|tcwg-lc_64*|tcwg-sq_64-*|tcwg-thx1_64-*|tcwg-tx1_64-*) echo arm64 ;; + tcwg-amp_32-*|tcwg-apm_32-*|tcwg-armv7|tcwg-armv8_32|tcwg-d05_32-*|tcwg-sq_32-*|tcwg-tk1_32-*|tcwg-tx1_32-*) echo armhf ;; *) echo "ERROR: Unsupported label: $label" >&2; exit 1 ;; esac ) @@ -185,7 +188,7 @@ print_host_for_node () # .ssh/config (in dockerfiles.git/tcwg-base/tcwg-buildslave/). for suffix in "" ".tcwglab"; do host="$1$suffix" - if timeout 30s ssh "$host" true >& /dev/null; then + if timeout 30s ssh "$host" true &> /dev/null; then break fi host="" @@ -249,12 +252,55 @@ print_tester_label_for_target () # for cross-testing. This means we no longer test on armv7 # hardware. aarch64-linux*) echo "tcwg-apm_64-test" ;; - armv8l-linux*) echo "tcwg-apm_32-test" ;; - arm-linux*) echo "tcwg-apm_32-test" ;; + armv8l-linux*) echo "tcwg-armv8_32" ;; + arm-linux*) echo "tcwg-armv7" ;; esac ) } +# Print number of busy executors on a jenkins node +# $1: node +print_number_of_busy_executors () +{ + ( + set -euf -o pipefail + local node="$1" + + local json + json=$(mktemp) + # shellcheck disable=SC2064 + trap "rm $json" EXIT + + curl -s "https://ci.linaro.org/computer/$node/api/json?depth=1" > "$json" + + local n n_busy idle + + n=$(jq -r ".numExecutors" < "$json") + n_busy="$n" + + while [ "$n" -gt "0" ]; do + n=$(($n - 1)) + idle=$(jq -r ".executors[$n].idle" < "$json") + if [ "$idle" = "true" ]; then + n_busy=$(($n_busy - 1)) + fi + done + + n=0 + while true; do + idle=$(jq -r ".oneOffExecutors[$n].idle" < "$json") + if [ "$idle" = "null" ]; then + break + elif [ "$idle" != "true" ]; then + n_busy=$(($n_busy + 1)) + fi + n=$(($n + 1)) + done + + echo "$n_busy" + ) +} + # Run command on remote machine in given directory via ssh on a given port # "$1" -- <host>[:<port>[:<dir>[:<ssh_opts>[:<env>]]]] # "$2, $3, etc" -- command and its arguments @@ -290,6 +336,18 @@ remote_exec () ) } +# run given git command inside component's $c repo +git_component_cmd () +{ + ( + set -euf -o pipefail + local c="$1" + shift + + git -C "$(get_component_dir "$c")" "$@" + ) +} + # Resolve git ref to sha1 # $1 -- repo directory # $2 -- branch, tag or refspec @@ -467,6 +525,9 @@ clone_or_update_repo_no_checkout () if git -C $ref_dir rev-parse --git-dir >/dev/null 2>&1; then refopt="--reference $ref_dir" break + elif [ -d $ref_dir ]; then + refopt="--reference-if-able $ref_dir" + break fi done ;; @@ -477,14 +538,21 @@ clone_or_update_repo_no_checkout () if ! git -C "$dir" status >/dev/null 2>&1; then # Git repo doesn't exist or is corrupted. Make a new clone. rm -rf "$dir" + fi - local single_branch_opt="" - if [ x"$single_branch" != x"" ]; then - single_branch_opt="--single-branch --branch $single_branch" - fi + if [ -d "$dir" ] && [ x"$refopt" != x"" ] \ + && [ "$(du -s "$dir/.git" | cut -f 1)" -gt $((1024*1024)) ]; then + # Current clone has grown above 1GB, and we have a reference repo, + # which should cut down the size significantly. + # Redo the clone to save disk space. + # PS: Unfortunately, I could not find a way to make the current clone + # use new objects from the reference repo without a full re-clone. + # Fortunately, a new clone with a reference repo is quick. + rm -rf "$dir" + fi - run_with_timeout_and_retry 1h 3 git clone $refopt $single_branch_opt "$url" "$dir" - else + # This is a not a real loop; just easier to skip parts with "break". + while [ -d "$dir" ]; do # Clean up the clone (this is supposed to re-share objects from # reference clone and keep the size of the clone minimal). # It's possible that previous GC process was interrupted and left @@ -495,46 +563,86 @@ clone_or_update_repo_no_checkout () # Also, prune all loose objects to avoid "git gc --auto" failing # and creating .git/gc.log to warn us. rm -f "$dir/.git/gc.log" - git -C "$dir" gc --auto --force --prune=all + # Do not detach into background for GC. Running in the background may + # cause a failure during bisect's rsync, which may see some of + # the files disappering mid-rsync. + git -C "$dir" config gc.autoDetach false + if ! git -C "$dir" gc --auto --force --prune=all; then + # "git gc" can fail due to corrupted packs. + rm -rf "$dir" + break + fi + # Delete stale locks -- especially .git/refs/remotes/REMOTE/BRANCH.lock - # These occur when builds are aborted during "git remote update" or similar. + # These occur when builds are aborted during "git remote update" or + # similar. find "$dir/.git" -name "*.lock" -delete - fi - git_set_remote "$dir" "$remote" "$url" "$single_branch" + # Recover from any previous am/cherry-pick/rebase. + # In pre-commit CI we apply patches with "git am", which can fail + # and leave clone in a bad state. + local i + for i in am cherry-pick rebase; do + git -C "$dir" "$i" --abort &>/dev/null || true + done + + break + done - local refspec - if [ x"$single_branch" = x"" ]; then - run_with_timeout_and_retry 1h 3 git -C "$dir" remote update -p "$remote" - refspec="+refs/changes/*:refs/changes/*" - else - refspec="+refs/heads/$single_branch:refs/remotes/$remote/$single_branch" - fi - run_with_timeout_and_retry 1h 3 git -C "$dir" fetch -q $remote $refspec --prune + local fresh_clone=false + while true; do + if ! [ -d "$dir" ]; then + local single_branch_opt="" + if [ x"$single_branch" != x"" ]; then + single_branch_opt="--single-branch --branch $single_branch" + fi + + run_with_timeout_and_retry 1h 3 git clone \ + $refopt $single_branch_opt "$url" "$dir" + fresh_clone=true + fi + + git_set_remote "$dir" "$remote" "$url" "$single_branch" + + local refspec + if [ x"$single_branch" = x"" ]; then + run_with_timeout_and_retry 1h 3 git -C "$dir" remote update -p \ + "$remote" 2>/dev/null + refspec="+refs/changes/*:refs/changes/*" + else + refspec="+refs/heads/$single_branch:refs/remotes/$remote/$single_branch" + fi + + if ! run_with_timeout_and_retry 1h 3 git -C "$dir" fetch -q \ + $remote $refspec --prune; then + # "git fetch --prune" can fail due to running out of memory space + # on 32-bit architectures on big repos. Remove the repo and retry + # with a fresh clone. + if $fresh_clone; then + return 1 + fi + + rm -rf "$dir" + continue + fi + + break + done ) } -# Clone or update a git repo +# Checkout branch/ref/SHA1 in a git repo # $1 -- repo directory # $2 -- ref to checkout -# $3 -- master git repo -# $4 -- optional reference git repo (to speedup initial cloning) -# $5 -- optional single-branch to reduce fetching from remote repo -# $6 -- optional name of remote (default is "origin") -clone_or_update_repo () +# $3 -- name of the git remote +git_checkout () { ( set -euf -o pipefail local dir="$1" local ref="$2" - local url="$3" - local reference="${4-auto}" - local single_branch="${5-}" - local remote="${6-origin}" - - clone_or_update_repo_no_checkout "$dir" "$url" "$reference" \ - "$single_branch" "$remote" + local remote="$3" git_clean "$dir" # Convert git branch/tag names into SHA1 @@ -545,35 +653,29 @@ clone_or_update_repo () ) } -# Print baseline git repo -# $1 -- project name -# $3 -- whether to make the new remote read-only or read-write. -print_baseline_repo () +# Clone or update a git repo +# $1 -- repo directory +# $2 -- ref to checkout +# $3 -- master git repo +# $4 -- optional reference git repo (to speedup initial cloning) +# $5 -- optional single-branch to reduce fetching from remote repo +# $6 -- optional name of remote (default is "origin") +clone_or_update_repo () { ( set -euf -o pipefail local dir="$1" - local read_only="$2" - - local repo - case "$dir" in - binutils) repo=binutils-gdb.git ;; - llvm) repo=llvm-project.git ;; - *) repo=$dir.git ;; - esac + local ref="$2" + local url="$3" + local reference="${4-auto}" + local single_branch="${5-}" + local remote="${6-origin}" - # Use git-us.l.o to avoid delays between review.l.o and git.l.o - local url="git-us.linaro.org/toolchain/ci/$repo" - if $read_only; then - url="https://$url" - else - # Use gitolite access. Gerrit's ssh access verifies pushed commits, - # which can slow-down server on big pushes. - url="ssh://$url" - fi + clone_or_update_repo_no_checkout "$dir" "$url" "$reference" \ + "$single_branch" "$remote" - echo "$url" + git_checkout "$dir" "$ref" "$remote" ) } @@ -649,27 +751,28 @@ untar_url () # Wait until the ssh server is ready to accept connexions # $1: host -# $2: port -# $3: retry count (optional) +# $2: retry count; use "" for the default +# $3+: ssh options # Returns 0 on success, 1 in case of error wait_for_ssh_server () { ( set -euf -o pipefail local session_host="$1" - local session_port="$2" - local count="${3-20}" + local count="${2:-20}" + shift 2 + local -a session_opts=("$@") while [ $count -gt 0 ] do - timeout 30s ssh -p $session_port $session_host true && break + timeout 30s ssh "${session_opts[@]}" $session_host true && break echo "SSH server not ready, waiting....." sleep 5 count=$((count - 1)) done if [ $count -eq 0 ]; then - echo "ERROR: SSH server did not respond ($session_host:$session_port)" + echo "ERROR: SSH server did not respond (ssh ${session_opts[*]} $session_host)" return 1 fi return 0 @@ -706,17 +809,24 @@ print_memory_limit () local memlimit="$4" local memory case "$task" in - build) - # 2GB per compilation core, with 4GB minimum and - # half of total system RAM maximum. - memory=$(( 2000 * $weight * $nproc )) - - memlimit=$(( $memlimit / 2 )) + build|precommit) if [ "$memlimit" -lt "4000" ]; then - # Don't limit memory on machines with less than 8GB RAM. + # Don't limit memory on machines with less than 4GB RAM. memory="unlimited" else - # Use at most half of RAM + # We want to have at least 2GB of RAM for every core. E.g., + # on a machine with 32 cores and 128GB RAM we can run + # 2 concurrent builds, while on a 32-core machine with + # 64GB RAM we can run only 1 build at a time. + # Note that number of concurrent builds is controlled by number + # of node executors in jenkins. + memory=$(( 2000 * $weight * $nproc )) + + # Also, trim 5% off total RAM to have a bit of RAM reserved + # for processes on the bare machine, which really helps when + # build container goes into swap. + memlimit=$(( $memlimit * 95 / 100 )) + if [ "$memory" -gt "$memlimit" ]; then memory="$memlimit" fi @@ -744,79 +854,110 @@ print_pids_limit () local task="$1" local weight="$2" local pids - pids=$(( $weight * 5000 )) # 5000 processes per executor + + # On startup of GCC's guality tests we have $NCPUs guality_check$PID.exe + # processes, each of which forks into GDB with ($NCPUs+1) threads. + # This means that on a 160-core system we need around 30k PID limit. + pids=$(nproc --all) + pids=$(( pids * (pids + 1) + 5000 )) + pids=$(( pids * weight )) + + # Make sure we are using at most half of system PID limit + local pid_max_2 + pid_max_2=$(cat /proc/sys/kernel/pid_max) + pid_max_2=$(( pid_max_2 / 2 )) + + if [ $pids -gt $pid_max_2 ]; then + pids=$pid_max_2 + fi + echo "$pids" ) } -# Print default bind mounts for $task +# Print default bind and volume mounts for $task and $job # $1: task -print_bind_mounts () +# $2: job +# $3: Suffix to be appended to the volume names (e.g., -$container_arch-$distro) +# $4+: ssh command +print_mounts () { ( set -euf -o pipefail local task="$1" - local ssh="$2" - local -a bind_mounts + local job="$2" + local suffix="$3" + shift 3 + local ssh=("$@") + + if [ "${WORKSPACE+set}" = "set" ]; then + case $task in + bench|build) + echo "$WORKSPACE:$WORKSPACE" + ;; + precommit) + # Note the difference between "-v $WORKSPACE:$WORKSPACE" above + # and "-v $WORKSPACE" here. In the above case $WORKSPACE is + # bind-mounted from the host; but in this case a scratch volume + # is created and mounted inside container. + echo "$WORKSPACE" + # The only reason why we are bind-mounting base-artifacts/ + # is that it can be very big. Rsync-ing tens of gigs back and + # forth can take as much time as the actual pre-commit test. + # As a nice side-effect having base-artifacts/ read-only checks + # that our build scripts don't try to modify it by mistake. + echo "$WORKSPACE/base-artifacts:$WORKSPACE/base-artifacts:ro" + ;; + esac + fi case $task in - bench|build) - if [ x"${WORKSPACE+set}" = x"set" ]; then - bind_mounts+=("$WORKSPACE") - fi + build|precommit) + echo /home/tcwg-buildslave/snapshots-ref:/home/tcwg-buildslave/snapshots-ref:ro + ;; + bench) + echo /home/shared/git:/home/shared/git:ro ;; - esac - - case $task in - build) bind_mounts+=(/home/tcwg-buildslave/snapshots-ref:ro) ;; - bench) bind_mounts+=(/home/shared/git:ro) ;; esac local key - for key in $($ssh find /etc/ssh/ -name "ssh_host_*_key" \ - -o -name "ssh_host_*_key.pub"); do - bind_mounts+=("$key:ro") + for key in $("${ssh[@]}" find /etc/ssh/ -name "ssh_host_*_key" \ + -o -name "ssh_host_*_key.pub"); do + echo "$key:$key:ro" done - echo "${bind_mounts[@]:+${bind_mounts[@]}}" - ) -} - -# Print default volume mounts for $job -# $1: job -# $2: Suffix to be appended to the volume names (e.g., -$container_arch-$distro) -print_volume_mounts () -{ - ( - set -euf -o pipefail - local job="$1" - local suffix="$2" - - local -a mounts - local volume_id - case "$job" in tcwg_*-*) # Add ccache volume for tcwg_* jobs. # These jobs depend on ccache for fast rebuilds of LLVM and GCC with # the host compiler. - # tcwg_* jobs use per-executor WORKSPACES, and ccache uses separate - # cache entries for different paths. Therefore we need to use - # separate caches for different $WORKSPACES. Otherwise we get - # a lot of cache polution on high-executor machines, e.g., for - # tcwg_bmk builds on tcwg-x86_64-dev-01 node. local prefix if [ x"${WORKSPACE+set}" = x"set" ]; then prefix=$(basename $WORKSPACE) else prefix=$(echo $job | cut -d- -f 1) fi + # tcwg_* jobs use per-executor WORKSPACES, and we configure ccache + # to use CCACHE_BASEDIR=$WORKSPACE so that ccache sees same paths + # for builds on different executors. + # Strip "_$EXECUTOR_NUMBER" from the job/workspace ID. + prefix="${prefix%_[0-9]*}" + + local volume_id volume_id=$(print_docker_name "$prefix$suffix") - mounts+=(ccache-"$volume_id":"$HOME"/.ccache) + + local readonly="" + if [ "$task" = "precommit" ]; then + readonly=":ro" + fi + echo "ccache-$volume_id:$HOME/.ccache$readonly" ;; esac + case "$job" in tcwg_bmk*) + assert_with_msg "Precommit benchmarking requires more thought" \ + [ "$task" != "precommit" ] # Add scratch mount for tcwg-benchmark's $HOME. # tcwg_bmk-* jobs trigger tcwg-benchmark jenkins jobs, which # then ssh to the build container to compile benchmark objects @@ -824,10 +965,9 @@ print_volume_mounts () # parameter -- see tcwg_bmk-build.sh:benchmark()). # This generates a fair bit of disk trafic on /home/tcwg-benchmark, # and it's best to use docker scratch volume, rather than overlayfs. - mounts+=(/home/tcwg-benchmark) + echo /home/tcwg-benchmark ;; esac - echo "${mounts[@]:+${mounts[@]}}" ) } @@ -851,9 +991,11 @@ __manifest_filename=("/dev/null") # Set new file name for manifest # $1: File name +# $2: Optional true/false on whether start a new manifest manifest_push () { local filename="$1" + local clean="${2-true}" # Resolve absolute path to manifest. local dir @@ -862,7 +1004,9 @@ manifest_push () dir=$(cd "$dir"; pwd) __manifest_filename=("$dir/$(basename "$filename")" "${__manifest_filename[@]}") - rm -f "${__manifest_filename[0]}" + if $clean; then + rm -f "${__manifest_filename[0]}" + fi } # Return to previous manifest filename @@ -881,17 +1025,457 @@ manifest_out () cat >> "${__manifest_filename[0]}" } +# Fetch and print value from manifest +# $1: Manifest file +# $2: Variable to fetch +# $3: Whether to ignore lack of the variable or lack of the manifest +get_manifest () +{ + ( + set +x + set -euf -o pipefail + local manifest="$1" + local var="$2" + local strict="${3-true}" + + # Emtpy result if no manifest found (udpate_baseline=init for instance) + if ! [ -f "$manifest" ]; then + if $strict; then + return 1 + fi + return 0 + fi + + # Unwrap $var down to variable name that we can unset. + local name="$var" + # {name} -> name + name=$(echo "$name" | sed -e 's/^{\(.*\)}$/\1/') + # Strip "+, -, :+, :-" suffixes + name=$(echo "$name" | sed -e 's/[-+:].*$//') + + # remove any existing declarations of $name + unset "$name" + + # FIXME: manifest should declare "rr" itself + declare -A rr + + # shellcheck disable=SC1090 + source "$manifest" + + if ! $strict; then + # Do not complain about unbound variables + set +u + fi + + eval echo "\$$var" + ) +} + +# Fetch and print value from manifest of a baseline build +# $1: Variable to fetch. +get_baseline_manifest () +{ + get_manifest base-artifacts/manifest.sh "$1" false +} + +# Fetch and print value from manifest of the current build +# $1: Variable to fetch. +get_current_manifest () +{ + get_manifest "${rr[top_artifacts]}/manifest.sh" "$1" +} + +get_baseline_git () +{ + ( + set -euf -o pipefail + local base_artifacts="base-artifacts" + # may not exist + if [ -f "$base_artifacts/git/$1" ]; then + cat "$base_artifacts/git/$1" + fi + ) +} + +get_current_git () +{ + ( + set -euf -o pipefail + assert_with_msg "ERROR: No $1 in current git" \ + [ -f "${rr[top_artifacts]}/git/$1" ] + cat "${rr[top_artifacts]}/git/$1" + ) +} + +set_current_git () +{ + ( + set -euf -o pipefail + mkdir -p ${rr[top_artifacts]}/git + cat > "${rr[top_artifacts]}/git/$1" + ) +} + +declare -A deps_url=( + ["dawn"]="https://dawn.googlesource.com/dawn.git" + ["chromium_variations"]="https://chromium.googlesource.com/chromium-variations.git" + ["catapult"]="https://chromium.googlesource.com/catapult.git" + ["perfetto"]="https://android.googlesource.com/platform/external/perfetto.git" + ["vulkan-deps"]="https://chromium.googlesource.com/vulkan-deps" + ["angle"]="https://chromium.googlesource.com/angle/angle.git" + ["skia"]="https://skia.googlesource.com/skia.git" + ["v8"]="https://chromium.googlesource.com/v8/v8.git" +) + +declare -A deps_dir=( + ["dawn"]="src/third_party/dawn" + ["chromium_variations"]="src/third_party/chromium-variations" + ["catapult"]="src/third_party/catapult" + ["perfetto"]="src/third_party/perfetto" + ["vulkan-deps"]="src/third_party/vulkan-deps" + ["angle"]="src/third_party/angle" + ["skia"]="src/third_party/skia" + ["v8"]="src/v8" +) + +get_component_url () +{ + ( + set -euf -o pipefail + + if [[ -v deps_url["${1}"] ]]; then + echo "${deps_url["${1}"]}" + fi + ) +} + +get_component_dir () +{ + ( + set -euf -o pipefail + + if [[ -v deps_dir["${1}"] ]]; then + echo "${deps_dir["${1}"]}" + else + echo "${1}" + fi + ) +} + +# returns the date of the last component ($1) commit +get_baseline_component_date () +{ + ( + set -euf -o pipefail + local base_artifacts="base-artifacts" + assert_with_msg "ERROR: No $1 in current git" \ + [ -f "$base_artifacts/git/${1}_rev" ] + + git_component_cmd "$1" show --no-patch --pretty=%ct "$(cat "$base_artifacts/git/${1}_rev")" + ) +} + +get_current_component_date () +{ + ( + set -euf -o pipefail + assert_with_msg "ERROR: No $1 in current git" \ + [ -f "${rr[top_artifacts]}/git/${1}_rev" ] + + git_component_cmd "$1" show --no-patch --pretty=%ct "$(cat "${rr[top_artifacts]}/git/${1}_rev")" + ) +} + +# Print round-robin components that are being updated in this build +# (the ones using non-baseline branches). +print_updated_components () +{ + ( + set -euf -o pipefail + + local c delim="" + for c in ${rr[components]}; do + if [ x"${rr[${c}_git]}" != x"baseline" ]; then + echo -ne "$delim$c" + delim=" " + fi + done + echo + ) +} + +# Print the single round-robin component being updated in this build. +# Print nothing if multiple components are being updated. +print_single_updated_component () +{ + ( + set -euf -o pipefail + + local -a updated_components + IFS=" " read -r -a updated_components <<< "$(print_updated_components)" + + if [ ${#updated_components[@]} -eq 1 ]; then + echo "${updated_components[0]}" + fi + ) +} + +# Print round-robin components that have new commits in this build +# compared to the baseline. +# This expects all components to be cloned and checked out at appropriate revs. +# During bisect we have only a single component updated by definition, and +# it is guaranteed to have clone_repo() called for it. +print_changed_components () +{ + ( + set -euf -o pipefail + + local c delim="" + for c in $(print_updated_components); do + if ! [ -f base-artifacts/git/${c}_rev ]; then + # $c not present in baseline: consider that it changed. + echo -ne "$delim$c" + delim=${1- } + elif [ x"$(get_current_git ${c}_rev)" \ + != x"$(get_baseline_git ${c}_rev)" ]; then + echo -ne "$delim$c" + delim=${1- } + fi + done + echo + ) +} + +# Breakup changed components into $culprit and the rest of components. +# This will reduce the number of builds when $culprit is responsible for +# majority of regressions. +breakup_changed_components () +{ + ( + set -euf -o pipefail + + local culprit="${1-}" + + if [ "$culprit" = "" ] \ + || ! print_changed_components "\n" \ + | grep "^$culprit\$" >/dev/null; then + print_changed_components "\n" + else + echo "$culprit" + print_changed_components "\n" | grep -v "^$culprit\$" | tr '\n' ' ' \ + | sed -e "s/ \$//g" + echo + fi + ) +} + +# Fetch paths from git history +# $1 -- number of versions to fetch; if none of the paths are present in +# a particular revision that revision doesn't count towards this number; +# positive values will fetch the most recent N revisions starting from +# most recent to less recent; +# negative values will fetch the oldest -N revisions starting from oldest +# to less old. +# As a special case "0" will fetch you all revisions in "positive" order, +# and "-0" will fetch you all revisions in "negative" order. +# $2 -- git repo +# $3+ -- paths in git repo; can be files or directories, only the 1st path +# existing in a revision is fetched (useful for renamed/moved files). +# $4+ -- [optional after "--" separator] paths in git repo, which should be +# fetched in addition to the "main" path specified in "$3". +# +# This function fetches files into a temporary directory (pointed to by the first +# line of output) and prints out paths under that temporary directory for subsequent +# fetches of ${paths[@]}" from appropriate revisions. +# Once one of the paths is found in a given revision, we check it out and +# move on to the next revision. +# Optional "--relative" option supplied as $3 will make get_git_history() +# output path names relative to the output directory. This is useful when +# output directory needs to be moved before processing. +get_git_history () +{ + ( + set -euf -o pipefail + + local n_revs="$1" + local repo="$2" + shift 2 + + local relative=false + if [ "$1" = "--relative" ]; then + relative=true + shift 1 + fi + + local -a paths=() + while [ $# != 0 ]; do + if [ "$1" = "--" ]; then + shift + break + fi + paths+=("$1") + shift + done + + local -a extra_paths=("$@") + + local repo_branch + repo_branch=$(echo "$repo" | cut -s -d# -f2) + if [ "$repo_branch" = "" ]; then + repo_branch="HEAD" + fi + repo=$(echo "$repo" | cut -d# -f1) + + local -a git_rev_list=() git_archive=() + + case "$repo" in + ssh://*) + local repo_host + repo_host=$(echo "$repo" | cut -s -d/ -f3) + # "ssh" will read from stdin, unless specifically forbidden with + # "-n". Without "-n" ssh would consume the output of git_rev_list + # in the "while read rev" loop below. + git_rev_list+=(ssh -n "$repo_host") + git_archive+=(ssh -n "$repo_host") + + repo=$(echo "$repo" | cut -s -d/ -f4-) + repo="/$repo" + ;; + esac + + git_rev_list+=(git -C "$repo" rev-list) + git_archive+=(git -C "$repo" archive) + + if [ "$n_revs" = "-0" ] || [ "$n_revs" -lt "0" ]; then + git_rev_list+=(--reverse) + n_revs=$((-$n_revs)) + fi + git_rev_list+=("$repo_branch" -- "${paths[@]}") + + local rev tmp_root + tmp_root=$(mktemp -d) + echo "$tmp_root" + + while read rev; do + local found path + mkdir "$tmp_root/$rev" + + found=false + for path in "${paths[@]}"; do + "${git_archive[@]}" "$rev" -- "$path" "${extra_paths[@]}" \ + | tar -x -C "$tmp_root/$rev" & + # "git archive" fails when $path was deleted in $rev. + if wait $!; then + found=true + break + fi + done + + if $found; then + if $relative; then + echo "$rev/$path" + else + echo "$tmp_root/$rev/$path" + fi + n_revs=$(($n_revs-1)) + if [ $n_revs = 0 ]; then + break + fi + else + # shellcheck disable=SC2115 + rm -r "$tmp_root/$rev" + fi + done < <("${git_rev_list[@]}") + ) +} + +convert_arg_var () +{ + declare -g "$1=$2" + cat <<EOF | manifest_out +declare -g "$1=$2" +EOF +} + +convert_arg_arr () +{ + if ! test_array $1; then + declare -ag $1 + cat <<EOF | manifest_out +declare -ga $1 +EOF + fi + eval "$1+=(\"$2\")" + cat <<EOF | manifest_out +$1+=("$2") +EOF +} + +convert_arg_declare () +{ + local name="$1" + + case "$name" in + *"["*"]") + local arr="${1%\[*\]}" + if ! test_array $arr; then + declare -Ag $arr + cat <<EOF | manifest_out +declare -gA $arr +EOF + fi + ;; + *) + declare -g "$name" + cat <<EOF | manifest_out +declare -g $name +EOF + ;; + esac +} + +convert_arg_set () +{ + eval "$1=\"$2\"" + cat <<EOF | manifest_out +$1="$2" +EOF +} + +convert_arg_assarr () +{ + convert_arg_declare "$1" + convert_arg_set "$1" "$2" +} + +convert_arg_source () +{ + assert_with_msg "ERROR: manifest/include does not exist: $1" \ + [ -f "$1" ] + # shellcheck disable=SC1090 + source "$1" + echo "# Start of include $1" | manifest_out + cat "$1" | manifest_out + echo "# End of include $1" | manifest_out +} + # Process "--var value" and "++arr elem" arguments and define corresponding # variables and arrays. # "--var value" defines shell variable "$var" to "value". +# "__var value" defines shell variable "$var" to "value", but doesn't store +# it to the manifest. This is useful for passing secrets. # "++arr elem" defines shell array "$arr[@]" and adds "elem" to it. # "==arr[key] value" defines shell associative array "$arr[@]" and sets # "${arr[key]}" to "value". # "@@ file" sources file. -# "%% file" starts manifest in file. Also see "^^ true". -# "^^ true/false %% manifest" whether to reproduce the build using manifest. -# If "true" -- source manifest instead of generating it, then discard -# all following options at to separator "--". +# "@@artifacts_var dir" defines artifacts directory and sources the manifest in +# from dir/manifest.sh. This is useful for reproducing +# builds. +# "%%artifacts_var dir" defines artifacts directory and starts manifest in +# dir/manifest.sh. Also see "^^ true". +# "^^ true/false %%artifacts_var dir" whether to reproduce the build using manifest. +# If "true" -- source dir/manifest.sh instead of generating it, then discard +# all following options up to separator "--". # If "false" -- do nothing and proceed as usual. # # Shell array $CONVERTED_ARGS is set to the arguments processed. @@ -911,73 +1495,91 @@ convert_args_to_variables () break ;; "--"*) - name="${1#--}" + assert_with_msg "ERROR: Parameter value not provided for $1." \ + [ $# -ge 2 ] + convert_arg_var "${1#--}" "$2" + num=2 + ;; + "__"*) + assert_with_msg "ERROR: Parameter value not provided for $1." \ + [ $# -ge 2 ] + name="${1#__}" + # FIXME: Can we add "set +x" here? declare -g "$name=$2" - cat <<EOF | manifest_out -declare -g "$name=$2" -EOF num=2 ;; "++"*) - name="${1#++}" - if ! test_array $name; then - declare -ag $name - cat <<EOF | manifest_out -declare -ga $name -EOF - fi - eval "$name+=(\"$2\")" - cat <<EOF | manifest_out -$name+=("$2") -EOF + assert_with_msg "ERROR: Parameter value not provided for $1." \ + [ $# -ge 2 ] + convert_arg_arr "${1#++}" "$2" num=2 ;; "=="*) - name="${1#==}" - arr="${name%\[*\]}" - if ! test_array $arr; then - declare -Ag $arr - cat <<EOF | manifest_out -declare -gA $arr -EOF - fi - if [ $# -lt 2 ]; then - echo "ERROR: Parameter value not provided for $1." - exit 1 - fi - eval "$name=\"$2\"" - cat <<EOF | manifest_out -$name="$2" -EOF + assert_with_msg "ERROR: Parameter value not provided for $1." \ + [ $# -ge 2 ] + convert_arg_assarr "${1#==}" "$2" num=2 ;; "@@") - # shellcheck disable=SC1090 - source "$2" - echo "# Start of include $2" | manifest_out - cat "$2" | manifest_out - echo "# End of include $2" | manifest_out + assert_with_msg "ERROR: Parameter value not provided for $1." \ + [ $# -ge 2 ] + convert_arg_source "$2" num=2 ;; - "%%") - manifest_push "$2" + "@@"*) + # TODO: It should be possible to simplify handling of "^^" + # now that we have @@artifacts dir. + assert_with_msg "ERROR: Parameter value not provided for $1." \ + [ $# -ge 2 ] + + name="${1#@@}" + + # FIXME: This should not be necessary since manifests should + # "declare -Ag rr" themselves, but current manifests don't + # do that, due to "declare -A rr" in round-robin.sh. That + # declaration makes convert_arg_declare think that rr was + # already added to the manifest. + convert_arg_declare "$name" + + convert_arg_source "$2/manifest.sh" + manifest_push "$2/manifest.sh" false + + # Builds are supposed to be re-runnable from different + # directories, so do not put artifacts directory into manifest. + eval "$name=\"$2\"" + + num=2 + ;; + "%%"*) + assert_with_msg "ERROR: Parameter value not provided for $1." \ + [ $# -ge 2 ] + manifest_push "$2/manifest.sh" cat <<EOF | manifest_out -# Start option processing -jenkins_scripts_rev=$(git -C "$(dirname "$0")" rev-parse HEAD) +declare -g "jenkins_scripts_rev=$(git -C "$(dirname "$0")" rev-parse HEAD)" +# Artifacts directory +EOF + name="${1#%%}" + convert_arg_declare "$name" + + # Builds are supposed to be re-runnable from different + # directories, so do not put artifacts directory into manifest. + eval "$name=\"$2\"" + + cat <<EOF | manifest_out +# Recording parameters to manifest: $2/manifest.sh EOF num=2 ;; "^^") + assert_with_msg "ERROR: Parameter value not provided for $1." \ + [ $# -ge 4 ] if [ x"$2" = x"true" ]; then - # Check that we have a manifest to reproduce - if [ x"$3" != x"%%" ] || [ ! -f "$4" ]; then - echo "ERROR: '^^ true' must be followed by '%% <MANIFEST>'" - exit 1 - fi - - # Source the manifest for reproduction. - # shellcheck disable=SC1090 - source "$4" + name="${3#%%}" + case "$name" in + *"["*"]") convert_arg_assarr "$name" "$4" ;; + *) convert_arg_var "$name" "$4" ;; + esac + convert_arg_source "$4/manifest.sh" # Skip processing all following arguments. num=0 @@ -1004,9 +1606,6 @@ EOF done done eval "SHIFT_CONVERTED_ARGS=$total" - cat <<EOF | manifest_out -# Processed $total options -EOF } # Check that varible names in "$@" are set @@ -1016,10 +1615,21 @@ obligatory_variables () ( set -euf -o pipefail for i in "$@"; do - if eval "[ x\"\${$i+set}\" != x\"set\" ]"; then - echo "ERROR: required parameter $i not set" - exit 1 - fi + case "$i" in + *"["*"]") + if eval "[ x\"\${$i+set}\" != x\"set\" ]"; then + echo "ERROR: required parameter $i not set" + exit 1 + fi + ;; + *) + if [[ "$(declare -p "$i" 2>/dev/null)" \ + != "declare "* ]]; then + echo "ERROR: required parameter $i not set" + exit 1 + fi + ;; + esac done ) } @@ -1043,14 +1653,25 @@ print_gnu_target () set -euf -o pipefail local target="$1" - if [ x"$target" = x"native" ]; then - target=$(uname -m) - fi case "$target" in "aarch64") target="aarch64-linux-gnu" ;; - "arm_eabi") target="arm-eabi" ;; + arm*_eabi) target="arm-eabi" ;; + thumb*_eabi) target="arm-eabi" ;; "arm"*) target="arm-linux-gnueabihf" ;; + "woa64") target="aarch64-w64-mingw32" ;; "x86_64") target="x86_64-linux-gnu" ;; + "native") + case "$(uname -m)" in + "aarch64") target="aarch64-unknown-linux-gnu" ;; + "armv7l") target="armv7l-unknown-linux-gnueabihf" ;; + "armv8l") target="armv8l-unknown-linux-gnueabihf" ;; + "x86_64") target="x86_64-pc-linux-gnu" ;; + *) + echo "ERROR: Unknown native target $(uname -m)" >&2 + exit 1 + ;; + esac + ;; *) echo "ERROR: Unknown target $target" >&2; exit 1 ;; esac echo "$target" @@ -1103,54 +1724,15 @@ print_kernel_target () git_clean () { ( set -euf -o pipefail - - fresh_dir "$1" "$1/.git/*" - git -C "$1" reset --hard - ) -} - -# Add git remote pointing to linaro's git repo/mirrors with writable -# toolchain/ci/* repo. Deduce repo's URL from URL of existing -# "origin" git remote. -# $1: Git clone directory (must have "origin" remote configured) -# $2: Name of the new remote. -# $3: Whether to make the new remote read-only or read-write. -git_init_linaro_local_remote () -{ - ( - set -euf -o pipefail local dir="$1" - local remote="$2" - local read_only="$3" - - local origin_url - local new_url - origin_url=$(git -C "$dir" remote get-url origin) - - # Figure out mirror repo on linaro's servers. - case "$origin_url" in - *"kernel.org/"*"/linux"*) - new_url="toolchain/ci/linux.git" - ;; - *"linaro.org/toolchain/gcc-compare-results.git") - new_url="toolchain/gcc-compare-results.git" - ;; - *) - new_url="toolchain/ci/$(basename $origin_url)" - ;; - esac + shift - # Use git-us.l.o to avoid delays between review.l.o and git.l.o - new_url="git-us.linaro.org/$new_url" - if $read_only; then - new_url="https://$new_url" - else - # Use gitolite access. Gerrit's ssh access verifies pushed commits, - # which can slow-down server on big pushes. - new_url="ssh://$new_url" + fresh_dir "$dir" "$dir/.git/*" + if ! git -C "$dir" reset -q --hard "$@"; then + # "git reset" may fail if index gets corrupted -- remove it and retry. + rm -f "$dir/.git/index" + git -C "$dir" reset -q --hard "$@" fi - - git_set_remote "$dir" "$remote" "$new_url" ) } @@ -1177,6 +1759,8 @@ git_push () # Initialize run_step state # $1: Step to start execution at (or "" to start at the very first step) +# Appending "+" to the step name, e.g., "__start_at reset_artifacts+" +# makes us start on the step right AFTER the specified step. # $2: Step to finish execution at (or "" to run till the very end) # $3: Top artifact directory # $4: Whether to enable "set -x" verbosity for execution steps. @@ -1202,7 +1786,33 @@ finishing at step \"$run_step_finish_at\"" run_step_top_artifacts=$(cd "$run_step_top_artifacts"; pwd) rm -f $run_step_top_artifacts/console.log + rm -f $run_step_top_artifacts/console.log.xz rm -f $run_step_top_artifacts/results + + # If no manifest file was provided, supply a default one. + if [ ${#__manifest_filename[@]} -eq 1 ]; then + manifest_push "$run_step_top_artifacts/manifest.sh" + fi +} + +# Patch environment for subsequent steps. This works by generating +# a source-able file patch-env.sh in the artifacts of the current step. +# Run_step() then sources this file to update the environment. +# Note that we build walls around individual steps on purpose. This allows +# us to SKIP several initial steps during bisect builds, and have a clear +# record of environment modifications in artifacts/NN-step/patch-env.sh +# scripts, which could be applied in correct order. +# +# $@: parameters in the format that convert_args_to_variables() understands. +run_step_patch_env () +{ + # !!! Each step is limited to a single invocation of run_step_patch_env() + # !!! due to manifest_push() re-writing the manifest. + assert_with_msg "patch-env.sh manifest already exists" \ + ! [ -e $run_step_artifacts/patch-env.sh ] + manifest_push $run_step_artifacts/patch-env.sh + convert_args_to_variables "$@" + manifest_pop } # Run execution step and handle its failure as requested @@ -1215,8 +1825,7 @@ finishing at step \"$run_step_finish_at\"" # Step commands have $run_step_artifacts pointing to artifact directory # for current step. # 3. logging -- dump stdout and and stderr output of step commands -# into per-step console.log files, and, also, into the top-level -# console.log file. +# into per-step console.log files # 4. result handling -- output provided success result to artifacts/results # for successful steps. Special value "x" means to let the step itself # update artifacts/results. Results are written to artifacts/results @@ -1244,6 +1853,12 @@ run_step () step=("$@") + if [ "$success_result" != "x" ]; then + cat >> $run_step_top_artifacts/results <<EOF +# ${step[@]}: +EOF + fi + pretty_step="$1" shift while [ $# -gt 0 ]; do @@ -1257,13 +1872,19 @@ run_step () run_step_count=$(($run_step_count+1)) + local full_step_name + full_step_name=$(printf "%02d" $run_step_count)-$pretty_step + # This is used when accessing the workspace + run_step_artifacts=$run_step_top_artifacts/$full_step_name + # Start running steps if: # the current step is the starting step OR # we haven't run any steps yet and # there is no set starting step - if [ x"$pretty_step" = x"$run_step_start_at" ] || \ - ( [ x"$run_step_start_at" = x"" ] && \ - [ x"$run_step_prev_step" = x"" ] ); then + if [ "$pretty_step" = "$run_step_start_at" ] \ + || [ "${run_step_prev_step}+" = "$run_step_start_at" ] \ + || ( [ "$run_step_start_at" = "" ] \ + && [ "$run_step_prev_step" = "" ] ); then run_step_active=true fi @@ -1271,7 +1892,7 @@ run_step () local skip=false case "$run_step_status:$run_mode" in 0:*) ;; - $EXTERNAL_FAIL:stop_on_fail) + "$EXTERNAL_FAIL:stop_on_fail") echo "STOPPING before ${step[*]} due to previous external failure" return $EXTERNAL_FAIL ;; @@ -1291,14 +1912,10 @@ run_step () esac if ! $skip; then - local full_step_name - full_step_name=$(printf "%02d" $run_step_count)-$pretty_step - # This is used when accessing the workspace - run_step_artifacts=$run_step_top_artifacts/$full_step_name local log_url="" if [ -v BUILD_URL ]; then # Link to jenkins, valid once the job has finished - log_url="(${BUILD_URL}artifact/artifacts/$full_step_name/console.log)" + log_url="(${BUILD_URL}artifact/artifacts/$full_step_name/console.log.xz)" fi rm -rf "$run_step_artifacts" @@ -1306,17 +1923,53 @@ run_step () echo "RUNNING ${step[*]}; see tail -f $run_step_artifacts/console.log" $log_url run_step_status=0 - eval "if $run_step_verbose; then set -x; else set +x; fi; ${step[*]}" 2>&1 | ts -s "%T" | tee -a $run_step_top_artifacts/console.log > $run_step_artifacts/console.log & - wait $! || run_step_status=$? + # We are running "${step[@]}" in a sub-shell, so that any + # modifications to environment will be lost. + # The steps can modify environment for subsequent steps by using + # run_step_patch_env(). + # We redirect stdout and stderr of "${step[@]} to a pipe, which + # is connected to timestamping console. Piping "step | ts -s" + # directly causes weird issue with failed exit code always being + # "1" instead of, e.g., 125. + + local pipe step_pid ts_pid + pipe=$(mktemp -u) + mkfifo "$pipe" + + ( + if $run_step_verbose; then + set -x + else + set +x + fi + "${step[@]}" + ) &> "$pipe" & + step_pid=$! + + ts -s "%T" < "$pipe" > $run_step_artifacts/console.log & + ts_pid=$! + + wait $step_pid || run_step_status=$? + wait $ts_pid + rm "$pipe" + + xz $run_step_artifacts/console.log + + if [ x"$success_result" != x"x" ] \ + && [ x"$run_step_status" != x"0" ]; then + cat >> $run_step_top_artifacts/results <<EOF +# FAILED +EOF + fi case "$run_step_status:$run_mode" in 0:*) ;; - $EXTERNAL_FAIL:stop_on_fail|$EXTERNAL_FAIL:reset_on_fail) + "$EXTERNAL_FAIL:stop_on_fail"|"$EXTERNAL_FAIL:reset_on_fail") echo "STOPPING at ${step[*]} due to external failure" return $EXTERNAL_FAIL ;; *:stop_on_fail|*:reset_on_fail) - echo "STOPPING at ${step[*]} due to internal failure" + echo "STOPPING at ${step[*]} due to failure" return $INTERNAL_FAIL ;; *:skip_on_fail) @@ -1330,13 +1983,17 @@ run_step () echo "SKIPPING ${step[*]}" fi - if [ x"$run_step_status" = x"0" ] && [ x"$success_result" != x"x" ]; then + if [ x"$success_result" != x"x" ] && [ x"$run_step_status" = x"0" ]; then cat >> $run_step_top_artifacts/results <<EOF -# ${step[@]}: $success_result EOF fi + if [ -f $run_step_artifacts/patch-env.sh ]; then + # shellcheck disable=SC1090 + source $run_step_artifacts/patch-env.sh + fi + if [ x"$pretty_step" = x"$run_step_finish_at" ]; then run_step_active=false fi @@ -1368,7 +2025,7 @@ print_traceback () { local exit_status=$? case $exit_status in - $INTERNAL_FAIL|$EXTERNAL_FAIL) ;; + "$INTERNAL_FAIL"|"$EXTERNAL_FAIL") ;; *) echo "ERROR Traceback (most recent call last):" # Show most recent calls last @@ -1388,3 +2045,186 @@ print_traceback () ;; esac } + +# Print destination sub-directory of interesting-commit.git for ... +# $1: component +# $2: sha1 of the commit +# $3: ci_project +# $4: ci_config +interesting_subdir () +{ + local dir="$1/sha1" # $component/sha1 + if [ $# -ge 2 ]; then dir="$dir/$2"; fi # /$sha1 + if [ $# -ge 3 ]; then dir="$dir/$3"; fi # /$ci_project + if [ $# -ge 4 ]; then dir="$dir/$4"; fi # /$ci_config + echo "$dir" +} + +# Print user-friendly "git describe" of a given commit +# $1: Component (gcc, llvm, etc.) +# $2: Commit hash +# $3: If "true", never fail to describe and print out something sensible. +# Otherwise return empty string on failure. +describe_sha1 () +{ + local component="$1" + local sha1="$2" + local anything="$3" + + local -a match=() + case "$component" in + gcc) match=(--match "basepoints/*" --match "releases/*") ;; + binutils) match=(--match "binutils*") ;; + gdb) match=(--match "gdb*") ;; + newlib) match=(--match "newlib*") ;; + esac + + if ! git -C "$component" describe "${match[@]}" $sha1 2>/dev/null \ + && $anything; then + echo "$component#$(git -C "$component" rev-parse --short $sha1)" + fi +} + +# To avoid committing unwanted files into git (e.g., raw benchmarking +# data) we implement "annex" support. Files in base-artifacts/annex +# can be either symlinks to directories or regular files containing +# rsync-able urls. +# +# Here we convert directory symlinks into tarballs, upload to bkp-01 +# and replace symlinks with files pointing to their uploaded location. +# +# In git_annex_download we do the opposite: download and extract tarball +# into a temporary directory, and replace the file with a symlink +# to that directory. +# +# The end result is that during a build base-artifacts/annex/bmk-data +# is a symlink with directory-like behavior. Outside of a build +# base-artifacts/ repo contains a regular file pointing to a tarball +# on a private fileserver. +# +# FIXME: We do not automatically remove annex tarballs when trimming +# or rewriting history. We rely on tcwg-cleanup-stale-results.sh for that. +# +# $1: git repo +# $2: annex directory inside the repo +# $3: tarball name prefix +git_annex_upload () +{ + ( + set -euf -o pipefail + local repo="$1" + local annex_dir="$2" + local pretty_id="$3" + + if ! [ -d "$repo/$annex_dir" ]; then + return 0 + fi + + local n_cpus=0 + if [ "$(getconf LONG_BIT)" = "32" ]; then + # XZ allocates few hundred megabytes per thread, which can easily + # exhaust VM in armhf containers on 160-core machines. Limit xz + # parallelism to 8. + n_cpus=$(nproc --all) + if [ "$n_cpus" -gt "8" ]; then + n_cpus=8 + fi + fi + + # Convert annex symlinks to remote links + local symlink dir md5 remote_path newlink + while IFS= read -r -d '' symlink; do + dir=$(readlink "$repo/$annex_dir/$symlink") + + # Generate MD5 hash of the contents of the annex: find all files + # and generate md5sum for each of them, and then generate md5sum + # of that list. + # We avoid using md5sum of the tarball because using tar with + # multi-threaded xz compression may produce different tarballs. + md5=$(cd "$dir"; find -L -type f -print0 | xargs -0 md5sum | sort \ + | md5sum - | awk '{ print $1 }') + + remote_path="$HOME/$repo/$annex_dir/${pretty_id}$md5.tar.xz" + newlink="bkp-01.tcwglab:$remote_path" + + # Check if bkp-01.tcwglab already has an annexed tarball with our data. + # When re-writing history in round-robin-baseline.sh we download and + # re-upload same data multiple times. This optimization saves up + # on compression and upload time. + if ! ssh -n bkp-01.tcwglab test -f "$remote_path"; then + local tarball + tarball=$(mktemp --suffix=.tar.xz) + chmod 0644 "$tarball" + # We have a local link to the annex -- make it remote. + XZ_OPT=-T$n_cpus tar cJf "$tarball" -C "$dir" . + + ssh -n bkp-01.tcwglab mkdir -p "$(dirname "$remote_path")" + rsync -a "$tarball" "$newlink" + + rm "$tarball" + fi + + # In normal builds files inside $dir will be owned by tcwg-benchmark, + # so we will fail trying to delete them. Still, try to delete + # the directory to avoid running of disk space when re-writing history. + rm -rf "$dir" &>/dev/null || true + + git -C "$repo" rm "$annex_dir/$symlink" + # if $symlink is the last file in $annex_dir, then "git rm" will + # remove the directory as well. Re-create it. + mkdir -p "$repo/$annex_dir" + echo "$newlink" > "$repo/$annex_dir/$symlink" + git -C "$repo" add "$annex_dir/$symlink" + done < <(cd "$repo/$annex_dir"; find . -type l -print0) + + # update commit with new links. + git -C "$repo" commit --amend -C HEAD + ) +} + +# $1: git repo +# $2: annex directory inside the repo +git_annex_download () +{ + ( + set -euf -o pipefail + local repo="$1" + local annex_dir="$2" + + # FIXME: Remove workaround for old-style bmk-data after history rewrite. + if [ -f "$repo/results_id" ]; then + local link dir + link=$(cat "$repo/results_id") + + dir=$(mktemp -d) + rsync -a --del "bkp-01.tcwglab:/home/tcwg-benchmark/results-$link/" \ + "$dir/" + + rm -rf "${repo:?}/$annex_dir" + mkdir "$repo/$annex_dir" + + ln -s "$dir" "$repo/$annex_dir/bmk-data" + fi + + if ! [ -d "$repo/$annex_dir" ]; then + return 0 + fi + + # Resolve annex links to local symlinks. + # See round-robin-baseline.sh:push_baseline() for details. + local linkfile link tarball dir + while IFS= read -r -d '' linkfile; do + link=$(cat "$repo/$annex_dir/$linkfile") + + tarball=$(mktemp --suffix=.tar.xz) + rsync -a "$link" "$tarball" + + dir=$(mktemp -d) + tar xf "$tarball" -C "$dir" + rm "$tarball" + + rm "$repo/$annex_dir/$linkfile" + ln -s "$dir" "$repo/$annex_dir/$linkfile" + done < <(cd "$repo/$annex_dir"; find . -type f -print0) + ) +} |