1 files changed, 1087 insertions, 247 deletions
diff --git a/jenkins-helpers.sh b/jenkins-helpers.sh
index 7feb3edf..c6e23431 100644
--- a/jenkins-helpers.sh
+++ b/jenkins-helpers.sh
@@ -29,12 +29,12 @@ abs_path ()
 assert_with_msg ()
 {
     (
-    set -euf -o pipefail
+    set -euf -o pipefail +x
 
     local failure_message=$1
     shift
 
-    eval "$@" || (echo "$failure_message" && exit 1)
+    eval "$*" || (echo "$failure_message" >&2 && exit 1)
     )
 }
 
@@ -42,9 +42,9 @@ assert_with_msg ()
 assert ()
 {
     (
-    set -euf -o pipefail
+    set -euf -o pipefail +x
 
-    eval "$@"
+    eval "$*"
     )
 }
 
@@ -78,6 +78,9 @@ fresh_dir ()
 	done
     done
 
+    # Make sure we can delete the files
+    find "$dir" "${find_opts[@]}" -type d -exec chmod +rwx {} \;
+    chmod -R +rw "$dir"
     find "$dir" "${find_opts[@]}" -delete
     )
 }
@@ -123,7 +126,7 @@ print_node_with_least_containers ()
 
     # Re. --random-sort below: shuffle node list to mitigate races
     # when starting multiple containers at the same time
-    testers=$(print_nodes_in_labels ${tester_labels[*]} | sort --random-sort)
+    testers=$(print_nodes_in_labels "${tester_labels[@]}" | sort --random-sort)
     for tester in $testers; do
         ret=0
 	tester_host=$(print_host_for_node $tester "ignore_fail")
@@ -153,8 +156,8 @@ print_arch_for_label ()
     case $label in
         tcwg-x86_64-*) echo amd64 ;;
         tcwg-x86_32-*) echo i386 ;;
-        tcwg-amp_64-*|tcwg-apm_64-*|tcwg-d05_64-*|tcwg-lc_64*|tcwg-sq_64-*|tcwg-thx1_64-*|tcwg-tx1_64-*) echo arm64 ;;
-        tcwg-amp_32-*|tcwg-apm_32-*|tcwg-d05_32-*|tcwg-sq_32-*|tcwg-tk1_32-*|tcwg-tx1_32-*) echo armhf ;;
+        tcwg-amp_64-*|tcwg-apm_64-*|tcwg-armv8_64|tcwg-d05_64-*|tcwg-lc_64*|tcwg-sq_64-*|tcwg-thx1_64-*|tcwg-tx1_64-*) echo arm64 ;;
+        tcwg-amp_32-*|tcwg-apm_32-*|tcwg-armv7|tcwg-armv8_32|tcwg-d05_32-*|tcwg-sq_32-*|tcwg-tk1_32-*|tcwg-tx1_32-*) echo armhf ;;
         *) echo "ERROR: Unsupported label: $label" >&2; exit 1 ;;
     esac
     )
@@ -185,7 +188,7 @@ print_host_for_node ()
     # .ssh/config (in dockerfiles.git/tcwg-base/tcwg-buildslave/).
     for suffix in "" ".tcwglab"; do
 	host="$1$suffix"
-	if timeout 30s ssh "$host" true >& /dev/null; then
+	if timeout 30s ssh "$host" true &> /dev/null; then
 	    break
 	fi
 	host=""
@@ -249,12 +252,55 @@ print_tester_label_for_target ()
 	# for cross-testing. This means we no longer test on armv7
 	# hardware.
         aarch64-linux*) echo "tcwg-apm_64-test" ;;
-        armv8l-linux*) echo "tcwg-apm_32-test" ;;
-        arm-linux*) echo "tcwg-apm_32-test" ;;
+        armv8l-linux*) echo "tcwg-armv8_32" ;;
+        arm-linux*) echo "tcwg-armv7" ;;
     esac
     )
 }
 
+# Print number of busy executors on a jenkins node
+# $1: node
+print_number_of_busy_executors ()
+{
+    (
+    set -euf -o pipefail
+    local node="$1"
+
+    local json
+    json=$(mktemp)
+    # shellcheck disable=SC2064
+    trap "rm $json" EXIT
+
+    curl -s "https://ci.linaro.org/computer/$node/api/json?depth=1" > "$json"
+
+    local n n_busy idle
+
+    n=$(jq -r ".numExecutors" < "$json")
+    n_busy="$n"
+
+    while [ "$n" -gt "0" ]; do
+        n=$(($n - 1))
+        idle=$(jq -r ".executors[$n].idle" < "$json")
+        if [ "$idle" = "true" ]; then
+            n_busy=$(($n_busy - 1))
+        fi
+    done
+
+    n=0
+    while true; do
+        idle=$(jq -r ".oneOffExecutors[$n].idle" < "$json")
+        if [ "$idle" = "null" ]; then
+            break
+        elif [ "$idle" != "true" ]; then
+            n_busy=$(($n_busy + 1))
+        fi
+        n=$(($n + 1))
+    done
+
+    echo "$n_busy"
+    )
+}
+
 # Run command on remote machine in given directory via ssh on a given port
 # "$1" -- <host>[:<port>[:<dir>[:<ssh_opts>[:<env>]]]]
 # "$2, $3, etc" -- command and its arguments
@@ -290,6 +336,18 @@ remote_exec ()
     )
 }
 
+# run given git command inside component's $c repo
+git_component_cmd ()
+{
+    (
+        set -euf -o pipefail
+        local c="$1"
+        shift
+
+        git -C "$(get_component_dir "$c")" "$@"
+    )
+}
+
 # Resolve git ref to sha1
 # $1 -- repo directory
 # $2 -- branch, tag or refspec
@@ -467,6 +525,9 @@ clone_or_update_repo_no_checkout ()
 		if git -C $ref_dir rev-parse --git-dir >/dev/null 2>&1; then
 		    refopt="--reference $ref_dir"
 		    break
+		elif [ -d $ref_dir ]; then
+		    refopt="--reference-if-able $ref_dir"
+		    break
 		fi
 	    done
 	    ;;
@@ -477,14 +538,21 @@ clone_or_update_repo_no_checkout ()
     if ! git -C "$dir" status >/dev/null 2>&1; then
 	# Git repo doesn't exist or is corrupted.  Make a new clone.
 	rm -rf "$dir"
+    fi
 
-	local single_branch_opt=""
-	if [ x"$single_branch" != x"" ]; then
-	    single_branch_opt="--single-branch --branch $single_branch"
-	fi
+    if [ -d "$dir" ] && [ x"$refopt" != x"" ] \
+	   && [ "$(du -s "$dir/.git" | cut -f 1)" -gt $((1024*1024)) ]; then
+	# Current clone has grown above 1GB, and we have a reference repo,
+	# which should cut down the size significantly.
+	# Redo the clone to save disk space.
+	# PS: Unfortunately, I could not find a way to make the current clone
+	# use new objects from the reference repo without a full re-clone.
+	# Fortunately, a new clone with a reference repo is quick.
+	rm -rf "$dir"
+    fi
 
-	run_with_timeout_and_retry 1h 3 git clone $refopt $single_branch_opt "$url" "$dir"
-    else
+    # This is a not a real loop; just easier to skip parts with "break".
+    while [ -d "$dir" ]; do
 	# Clean up the clone (this is supposed to re-share objects from
 	# reference clone and keep the size of the clone minimal).
 	# It's possible that previous GC process was interrupted and left
@@ -495,46 +563,86 @@ clone_or_update_repo_no_checkout ()
 	# Also, prune all loose objects to avoid "git gc --auto" failing
 	# and creating .git/gc.log to warn us.
 	rm -f "$dir/.git/gc.log"
-	git -C "$dir" gc --auto --force --prune=all
+	# Do not detach into background for GC.  Running in the background may
+	# cause a failure during bisect's rsync, which may see some of
+	# the files disappering mid-rsync.
+	git -C "$dir" config gc.autoDetach false
+	if ! git -C "$dir" gc --auto --force --prune=all; then
+	    # "git gc" can fail due to corrupted packs.
+	    rm -rf "$dir"
+	    break
+	fi
+
 	# Delete stale locks -- especially .git/refs/remotes/REMOTE/BRANCH.lock
-	# These occur when builds are aborted during "git remote update" or similar.
+	# These occur when builds are aborted during "git remote update" or
+	# similar.
 	find "$dir/.git" -name "*.lock" -delete
-    fi
 
-    git_set_remote "$dir" "$remote" "$url" "$single_branch"
+	# Recover from any previous am/cherry-pick/rebase.
+	# In pre-commit CI we apply patches with "git am", which can fail
+	# and leave clone in a bad state.
+	local i
+	for i in am cherry-pick rebase; do
+	    git -C "$dir" "$i" --abort &>/dev/null || true
+	done
+
+	break
+    done
 
-    local refspec
-    if [ x"$single_branch" = x"" ]; then
-	run_with_timeout_and_retry 1h 3 git -C "$dir" remote update -p "$remote"
-	refspec="+refs/changes/*:refs/changes/*"
-    else
-	refspec="+refs/heads/$single_branch:refs/remotes/$remote/$single_branch"
-    fi
-    run_with_timeout_and_retry 1h 3 git -C "$dir" fetch -q $remote $refspec --prune
+    local fresh_clone=false
+    while true; do
+	if ! [ -d "$dir" ]; then
+	    local single_branch_opt=""
+	    if [ x"$single_branch" != x"" ]; then
+		single_branch_opt="--single-branch --branch $single_branch"
+	    fi
+
+	    run_with_timeout_and_retry 1h 3 git clone \
+				       $refopt $single_branch_opt "$url" "$dir"
+	    fresh_clone=true
+	fi
+
+	git_set_remote "$dir" "$remote" "$url" "$single_branch"
+
+	local refspec
+	if [ x"$single_branch" = x"" ]; then
+	    run_with_timeout_and_retry 1h 3 git -C "$dir" remote update -p \
+				       "$remote" 2>/dev/null
+	    refspec="+refs/changes/*:refs/changes/*"
+	else
+	    refspec="+refs/heads/$single_branch:refs/remotes/$remote/$single_branch"
+	fi
+
+	if ! run_with_timeout_and_retry 1h 3 git -C "$dir" fetch -q \
+	     $remote $refspec --prune; then
+	    # "git fetch --prune" can fail due to running out of memory space
+	    # on 32-bit architectures on big repos.  Remove the repo and retry
+	    # with a fresh clone.
+	    if $fresh_clone; then
+		return 1
+	    fi
+
+	    rm -rf "$dir"
+	    continue
+	fi
+
+	break
+    done
     )
 }
 
-# Clone or update a git repo
+# Checkout branch/ref/SHA1 in a git repo
 # $1 -- repo directory
 # $2 -- ref to checkout
-# $3 -- master git repo
-# $4 -- optional reference git repo (to speedup initial cloning)
-# $5 -- optional single-branch to reduce fetching from remote repo
-# $6 -- optional name of remote (default is "origin")
-clone_or_update_repo ()
+# $3 -- name of the git remote
+git_checkout ()
 {
     (
     set -euf -o pipefail
 
     local dir="$1"
     local ref="$2"
-    local url="$3"
-    local reference="${4-auto}"
-    local single_branch="${5-}"
-    local remote="${6-origin}"
-
-    clone_or_update_repo_no_checkout "$dir" "$url" "$reference" \
-				     "$single_branch" "$remote"
+    local remote="$3"
 
     git_clean "$dir"
     # Convert git branch/tag names into SHA1
@@ -545,35 +653,29 @@ clone_or_update_repo ()
     )
 }
 
-# Print baseline git repo
-# $1 -- project name
-# $3 -- whether to make the new remote read-only or read-write.
-print_baseline_repo ()
+# Clone or update a git repo
+# $1 -- repo directory
+# $2 -- ref to checkout
+# $3 -- master git repo
+# $4 -- optional reference git repo (to speedup initial cloning)
+# $5 -- optional single-branch to reduce fetching from remote repo
+# $6 -- optional name of remote (default is "origin")
+clone_or_update_repo ()
 {
     (
     set -euf -o pipefail
 
     local dir="$1"
-    local read_only="$2"
-
-    local repo
-    case "$dir" in
-	binutils) repo=binutils-gdb.git ;;
-	llvm) repo=llvm-project.git ;;
-	*) repo=$dir.git ;;
-    esac
+    local ref="$2"
+    local url="$3"
+    local reference="${4-auto}"
+    local single_branch="${5-}"
+    local remote="${6-origin}"
 
-    # Use git-us.l.o to avoid delays between review.l.o and git.l.o
-    local url="git-us.linaro.org/toolchain/ci/$repo"
-    if $read_only; then
-	url="https://$url"
-    else
-	# Use gitolite access.  Gerrit's ssh access verifies pushed commits,
-	# which can slow-down server on big pushes.
-	url="ssh://$url"
-    fi
+    clone_or_update_repo_no_checkout "$dir" "$url" "$reference" \
+				     "$single_branch" "$remote"
 
-    echo "$url"
+    git_checkout "$dir" "$ref" "$remote"
     )
 }
 
@@ -649,27 +751,28 @@ untar_url ()
 
 # Wait until the ssh server is ready to accept connexions
 # $1: host
-# $2: port
-# $3: retry count (optional)
+# $2: retry count; use "" for the default
+# $3+: ssh options
 # Returns 0 on success, 1 in case of error
 wait_for_ssh_server ()
 {
     (
     set -euf -o pipefail
     local session_host="$1"
-    local session_port="$2"
-    local count="${3-20}"
+    local count="${2:-20}"
+    shift 2
+    local -a session_opts=("$@")
 
     while [ $count -gt 0 ]
     do
-	timeout 30s ssh -p $session_port $session_host true && break
+	timeout 30s ssh "${session_opts[@]}" $session_host true && break
 	echo "SSH server not ready, waiting....."
 	sleep 5
 	count=$((count - 1))
     done
 
     if [ $count -eq 0 ]; then
-	echo "ERROR: SSH server did not respond ($session_host:$session_port)"
+	echo "ERROR: SSH server did not respond (ssh ${session_opts[*]} $session_host)"
 	return 1
     fi
     return 0
@@ -706,17 +809,24 @@ print_memory_limit ()
     local memlimit="$4"
     local memory
     case "$task" in
-	build)
-	    # 2GB per compilation core, with 4GB minimum and
-	    # half of total system RAM maximum.
-	    memory=$(( 2000 * $weight * $nproc ))
-
-	    memlimit=$(( $memlimit / 2 ))
+	build|precommit)
 	    if [ "$memlimit" -lt "4000" ]; then
-		# Don't limit memory on machines with less than 8GB RAM.
+		# Don't limit memory on machines with less than 4GB RAM.
 		memory="unlimited"
 	    else
-		# Use at most half of RAM
+		# We want to have at least 2GB of RAM for every core.  E.g.,
+		# on a machine with 32 cores and 128GB RAM we can run
+		# 2 concurrent builds, while on a 32-core machine with
+		# 64GB RAM we can run only 1 build at a time.
+		# Note that number of concurrent builds is controlled by number
+		# of node executors in jenkins.
+		memory=$(( 2000 * $weight * $nproc ))
+
+		# Also, trim 5% off total RAM to have a bit of RAM reserved
+		# for processes on the bare machine, which really helps when
+		# build container goes into swap.
+		memlimit=$(( $memlimit * 95 / 100 ))
+
 		if [ "$memory" -gt "$memlimit" ]; then
 		    memory="$memlimit"
 		fi
@@ -744,79 +854,110 @@ print_pids_limit ()
     local task="$1"
     local weight="$2"
     local pids
-    pids=$(( $weight * 5000 )) # 5000 processes per executor
+
+    # On startup of GCC's guality tests we have $NCPUs guality_check$PID.exe
+    # processes, each of which forks into GDB with ($NCPUs+1) threads.
+    # This means that on a 160-core system we need around 30k PID limit.
+    pids=$(nproc --all)
+    pids=$(( pids * (pids + 1) + 5000 ))
+    pids=$(( pids * weight ))
+
+    # Make sure we are using at most half of system PID limit
+    local pid_max_2
+    pid_max_2=$(cat /proc/sys/kernel/pid_max)
+    pid_max_2=$(( pid_max_2 / 2 ))
+
+    if [ $pids -gt $pid_max_2 ]; then
+	pids=$pid_max_2
+    fi
+
     echo "$pids"
     )
 }
 
-# Print default bind mounts for $task
+# Print default bind and volume mounts for $task and $job
 # $1: task
-print_bind_mounts ()
+# $2: job
+# $3: Suffix to be appended to the volume names (e.g., -$container_arch-$distro)
+# $4+: ssh command
+print_mounts ()
 {
     (
     set -euf -o pipefail
     local task="$1"
-    local ssh="$2"
-    local -a bind_mounts
+    local job="$2"
+    local suffix="$3"
+    shift 3
+    local ssh=("$@")
+
+    if [ "${WORKSPACE+set}" = "set" ]; then
+	case $task in
+	    bench|build)
+		echo "$WORKSPACE:$WORKSPACE"
+		;;
+	    precommit)
+		# Note the difference between "-v $WORKSPACE:$WORKSPACE" above
+		# and "-v $WORKSPACE" here.  In the above case $WORKSPACE is
+		# bind-mounted from the host; but in this case a scratch volume
+		# is created and mounted inside container.
+		echo "$WORKSPACE"
+		# The only reason why we are bind-mounting base-artifacts/
+		# is that it can be very big.  Rsync-ing tens of gigs back and
+		# forth can take as much time as the actual pre-commit test.
+		# As a nice side-effect having base-artifacts/ read-only checks
+		# that our build scripts don't try to modify it by mistake.
+		echo "$WORKSPACE/base-artifacts:$WORKSPACE/base-artifacts:ro"
+		;;
+	esac
+    fi
 
     case $task in
-	bench|build)
-	    if [ x"${WORKSPACE+set}" = x"set" ]; then
-		bind_mounts+=("$WORKSPACE")
-	    fi
+	build|precommit)
+	    echo /home/tcwg-buildslave/snapshots-ref:/home/tcwg-buildslave/snapshots-ref:ro
+	    ;;
+	bench)
+	    echo /home/shared/git:/home/shared/git:ro
 	    ;;
-    esac
-
-    case $task in
-	build) bind_mounts+=(/home/tcwg-buildslave/snapshots-ref:ro) ;;
-	bench) bind_mounts+=(/home/shared/git:ro) ;;
     esac
 
     local key
-    for key in $($ssh find /etc/ssh/ -name "ssh_host_*_key" \
-		      -o -name "ssh_host_*_key.pub"); do
-	bind_mounts+=("$key:ro")
+    for key in $("${ssh[@]}" find /etc/ssh/ -name "ssh_host_*_key" \
+			     -o -name "ssh_host_*_key.pub"); do
+	echo "$key:$key:ro"
     done
 
-    echo "${bind_mounts[@]:+${bind_mounts[@]}}"
-    )
-}
-
-# Print default volume mounts for $job
-# $1: job
-# $2: Suffix to be appended to the volume names (e.g., -$container_arch-$distro)
-print_volume_mounts ()
-{
-    (
-    set -euf -o pipefail
-    local job="$1"
-    local suffix="$2"
-
-    local -a mounts
-    local volume_id
-
     case "$job" in
 	tcwg_*-*)
 	    # Add ccache volume for tcwg_* jobs.
 	    # These jobs depend on ccache for fast rebuilds of LLVM and GCC with
 	    # the host compiler.
-	    # tcwg_* jobs use per-executor WORKSPACES, and ccache uses separate
-	    # cache entries for different paths.  Therefore we need to use
-	    # separate caches for different $WORKSPACES.  Otherwise we get
-	    # a lot of cache polution on high-executor machines, e.g., for
-	    # tcwg_bmk builds on tcwg-x86_64-dev-01 node.
 	    local prefix
 	    if [ x"${WORKSPACE+set}" = x"set" ]; then
 		prefix=$(basename $WORKSPACE)
 	    else
 		prefix=$(echo $job | cut -d- -f 1)
 	    fi
+	    # tcwg_* jobs use per-executor WORKSPACES, and we configure ccache
+	    # to use CCACHE_BASEDIR=$WORKSPACE so that ccache sees same paths
+	    # for builds on different executors.
+	    # Strip "_$EXECUTOR_NUMBER" from the job/workspace ID.
+	    prefix="${prefix%_[0-9]*}"
+
+	    local volume_id
 	    volume_id=$(print_docker_name "$prefix$suffix")
-	    mounts+=(ccache-"$volume_id":"$HOME"/.ccache)
+
+	    local readonly=""
+	    if [ "$task" = "precommit" ]; then
+		readonly=":ro"
+	    fi
+	    echo "ccache-$volume_id:$HOME/.ccache$readonly"
 	    ;;
     esac
+
     case "$job" in
 	tcwg_bmk*)
+	    assert_with_msg "Precommit benchmarking requires more thought" \
+			    [ "$task" != "precommit" ]
 	    # Add scratch mount for tcwg-benchmark's $HOME.
 	    # tcwg_bmk-* jobs trigger tcwg-benchmark jenkins jobs, which
 	    # then ssh to the build container to compile benchmark objects
@@ -824,10 +965,9 @@ print_volume_mounts ()
 	    # parameter -- see tcwg_bmk-build.sh:benchmark()).
 	    # This generates a fair bit of disk trafic on /home/tcwg-benchmark,
 	    # and it's best to use docker scratch volume, rather than overlayfs.
-	    mounts+=(/home/tcwg-benchmark)
+	    echo /home/tcwg-benchmark
 	;;
     esac
-    echo "${mounts[@]:+${mounts[@]}}"
     )
 }
 
@@ -851,9 +991,11 @@ __manifest_filename=("/dev/null")
 
 # Set new file name for manifest
 # $1: File name
+# $2: Optional true/false on whether start a new manifest
 manifest_push ()
 {
     local filename="$1"
+    local clean="${2-true}"
 
     # Resolve absolute path to manifest.
     local dir
@@ -862,7 +1004,9 @@ manifest_push ()
     dir=$(cd "$dir"; pwd)
 
     __manifest_filename=("$dir/$(basename "$filename")" "${__manifest_filename[@]}")
-    rm -f "${__manifest_filename[0]}"
+    if $clean; then
+	rm -f "${__manifest_filename[0]}"
+    fi
 }
 
 # Return to previous manifest filename
@@ -881,17 +1025,457 @@ manifest_out ()
     cat >> "${__manifest_filename[0]}"
 }
 
+# Fetch and print value from manifest
+# $1: Manifest file
+# $2: Variable to fetch
+# $3: Whether to ignore lack of the variable or lack of the manifest
+get_manifest ()
+{
+    (
+    set +x
+    set -euf -o pipefail
+    local manifest="$1"
+    local var="$2"
+    local strict="${3-true}"
+
+    # Emtpy result if no manifest found (udpate_baseline=init for instance)
+    if ! [ -f "$manifest" ]; then
+	if $strict; then
+	    return 1
+	fi
+	return 0
+    fi
+
+    # Unwrap $var down to variable name that we can unset.
+    local name="$var"
+    # {name} -> name
+    name=$(echo "$name" | sed -e 's/^{\(.*\)}$/\1/')
+    # Strip "+, -, :+, :-" suffixes
+    name=$(echo "$name" | sed -e 's/[-+:].*$//')
+
+    # remove any existing declarations of $name
+    unset "$name"
+
+    # FIXME: manifest should declare "rr" itself
+    declare -A rr
+
+    # shellcheck disable=SC1090
+    source "$manifest"
+
+    if ! $strict; then
+	# Do not complain about unbound variables
+	set +u
+    fi
+
+    eval echo "\$$var"
+    )
+}
+
+# Fetch and print value from manifest of a baseline build
+# $1: Variable to fetch.
+get_baseline_manifest ()
+{
+    get_manifest base-artifacts/manifest.sh "$1" false
+}
+
+# Fetch and print value from manifest of the current build
+# $1: Variable to fetch.
+get_current_manifest ()
+{
+    get_manifest "${rr[top_artifacts]}/manifest.sh" "$1"
+}
+
+get_baseline_git ()
+{
+    (
+    set -euf -o pipefail
+    local base_artifacts="base-artifacts"
+    # may not exist
+    if [ -f "$base_artifacts/git/$1" ]; then
+        cat "$base_artifacts/git/$1"
+    fi
+    )
+}
+
+get_current_git ()
+{
+    (
+    set -euf -o pipefail
+    assert_with_msg "ERROR: No $1 in current git" \
+		    [ -f "${rr[top_artifacts]}/git/$1" ]
+    cat "${rr[top_artifacts]}/git/$1"
+    )
+}
+
+set_current_git ()
+{
+    (
+    set -euf -o pipefail
+    mkdir -p ${rr[top_artifacts]}/git
+    cat > "${rr[top_artifacts]}/git/$1"
+    )
+}
+
+declare -A deps_url=(
+    ["dawn"]="https://dawn.googlesource.com/dawn.git"
+    ["chromium_variations"]="https://chromium.googlesource.com/chromium-variations.git"
+    ["catapult"]="https://chromium.googlesource.com/catapult.git"
+    ["perfetto"]="https://android.googlesource.com/platform/external/perfetto.git"
+    ["vulkan-deps"]="https://chromium.googlesource.com/vulkan-deps"
+    ["angle"]="https://chromium.googlesource.com/angle/angle.git"
+    ["skia"]="https://skia.googlesource.com/skia.git"
+    ["v8"]="https://chromium.googlesource.com/v8/v8.git"
+)
+
+declare -A deps_dir=(
+    ["dawn"]="src/third_party/dawn"
+    ["chromium_variations"]="src/third_party/chromium-variations"
+    ["catapult"]="src/third_party/catapult"
+    ["perfetto"]="src/third_party/perfetto"
+    ["vulkan-deps"]="src/third_party/vulkan-deps"
+    ["angle"]="src/third_party/angle"
+    ["skia"]="src/third_party/skia"
+    ["v8"]="src/v8"
+)
+
+get_component_url ()
+{
+    (
+    set -euf -o pipefail
+
+    if [[ -v deps_url["${1}"] ]]; then
+        echo "${deps_url["${1}"]}"
+    fi
+    )
+}
+
+get_component_dir ()
+{
+    (
+    set -euf -o pipefail
+
+    if [[ -v deps_dir["${1}"] ]]; then
+        echo "${deps_dir["${1}"]}"
+    else
+        echo "${1}"
+    fi
+    )
+}
+
+# returns the date of the last component ($1) commit
+get_baseline_component_date ()
+{
+    (
+    set -euf -o pipefail
+    local base_artifacts="base-artifacts"
+    assert_with_msg "ERROR: No $1 in current git" \
+		    [ -f "$base_artifacts/git/${1}_rev" ]
+
+    git_component_cmd "$1" show --no-patch --pretty=%ct "$(cat "$base_artifacts/git/${1}_rev")"
+    )
+}
+
+get_current_component_date ()
+{
+    (
+    set -euf -o pipefail
+    assert_with_msg "ERROR: No $1 in current git" \
+		    [ -f "${rr[top_artifacts]}/git/${1}_rev" ]
+
+    git_component_cmd "$1" show --no-patch --pretty=%ct "$(cat "${rr[top_artifacts]}/git/${1}_rev")"
+    )
+}
+
+# Print round-robin components that are being updated in this build
+# (the ones using non-baseline branches).
+print_updated_components ()
+{
+    (
+    set -euf -o pipefail
+
+    local c delim=""
+    for c in ${rr[components]}; do
+	if [ x"${rr[${c}_git]}" != x"baseline" ]; then
+	    echo -ne "$delim$c"
+	    delim=" "
+	fi
+    done
+    echo
+    )
+}
+
+# Print the single round-robin component being updated in this build.
+# Print nothing if multiple components are being updated.
+print_single_updated_component ()
+{
+    (
+    set -euf -o pipefail
+
+    local -a updated_components
+    IFS=" " read -r -a updated_components <<< "$(print_updated_components)"
+
+    if [ ${#updated_components[@]} -eq 1 ]; then
+	echo "${updated_components[0]}"
+    fi
+    )
+}
+
+# Print round-robin components that have new commits in this build
+# compared to the baseline.
+# This expects all components to be cloned and checked out at appropriate revs.
+# During bisect we have only a single component updated by definition, and
+# it is guaranteed to have clone_repo() called for it.
+print_changed_components ()
+{
+    (
+    set -euf -o pipefail
+
+    local c delim=""
+    for c in $(print_updated_components); do
+	if ! [ -f base-artifacts/git/${c}_rev ]; then
+	    # $c not present in baseline: consider that it changed.
+	    echo -ne "$delim$c"
+	    delim=${1- }
+	elif [ x"$(get_current_git ${c}_rev)" \
+	     != x"$(get_baseline_git ${c}_rev)" ]; then
+	    echo -ne "$delim$c"
+	    delim=${1- }
+	fi
+    done
+    echo
+    )
+}
+
+# Breakup changed components into $culprit and the rest of components.
+# This will reduce the number of builds when $culprit is responsible for
+# majority of regressions.
+breakup_changed_components ()
+{
+    (
+    set -euf -o pipefail
+
+    local culprit="${1-}"
+
+    if [ "$culprit" = "" ] \
+	   || ! print_changed_components "\n" \
+	       | grep "^$culprit\$" >/dev/null; then
+	print_changed_components "\n"
+    else
+	echo "$culprit"
+	print_changed_components "\n" | grep -v "^$culprit\$" | tr '\n' ' ' \
+	    | sed -e "s/ \$//g"
+	echo
+    fi
+    )
+}
+
+# Fetch paths from git history
+# $1 -- number of versions to fetch; if none of the paths are present in
+#       a particular revision that revision doesn't count towards this number;
+#       positive values will fetch the most recent N revisions starting from
+#       most recent to less recent;
+#       negative values will fetch the oldest -N revisions starting from oldest
+#       to less old.
+#       As a special case "0" will fetch you all revisions in "positive" order,
+#       and "-0" will fetch you all revisions in "negative" order.
+# $2 -- git repo
+# $3+ -- paths in git repo; can be files or directories, only the 1st path
+#        existing in a revision is fetched (useful for renamed/moved files).
+# $4+ -- [optional after "--" separator] paths in git repo, which should be
+#        fetched in addition to the "main" path specified in "$3".
+#
+# This function fetches files into a temporary directory (pointed to by the first
+# line of output) and prints out paths under that temporary directory for subsequent
+# fetches of ${paths[@]}" from appropriate revisions.
+# Once one of the paths is found in a given revision, we check it out and
+# move on to the next revision.
+# Optional "--relative" option supplied as $3 will make get_git_history()
+# output path names relative to the output directory.  This is useful when
+# output directory needs to be moved before processing.
+get_git_history ()
+{
+    (
+    set -euf -o pipefail
+
+    local n_revs="$1"
+    local repo="$2"
+    shift 2
+
+    local relative=false
+    if [ "$1" = "--relative" ]; then
+	relative=true
+	shift 1
+    fi
+
+    local -a paths=()
+    while [ $# != 0 ]; do
+	if [ "$1" = "--" ]; then
+	    shift
+	    break
+	fi
+	paths+=("$1")
+	shift
+    done
+
+    local -a extra_paths=("$@")
+
+    local repo_branch
+    repo_branch=$(echo "$repo" | cut -s -d# -f2)
+    if [ "$repo_branch" = "" ]; then
+	repo_branch="HEAD"
+    fi
+    repo=$(echo "$repo" | cut -d# -f1)
+
+    local -a git_rev_list=() git_archive=()
+
+    case "$repo" in
+	ssh://*)
+	    local repo_host
+	    repo_host=$(echo "$repo" | cut -s -d/ -f3)
+	    # "ssh" will read from stdin, unless specifically forbidden with
+	    # "-n".  Without "-n" ssh would consume the output of git_rev_list
+	    # in the "while read rev" loop below.
+	    git_rev_list+=(ssh -n "$repo_host")
+	    git_archive+=(ssh -n "$repo_host")
+
+	    repo=$(echo "$repo" | cut -s -d/ -f4-)
+	    repo="/$repo"
+	    ;;
+    esac
+
+    git_rev_list+=(git -C "$repo" rev-list)
+    git_archive+=(git -C "$repo" archive)
+
+    if [ "$n_revs" = "-0" ] || [ "$n_revs" -lt "0" ]; then
+	git_rev_list+=(--reverse)
+	n_revs=$((-$n_revs))
+    fi
+    git_rev_list+=("$repo_branch" -- "${paths[@]}")
+
+    local rev tmp_root
+    tmp_root=$(mktemp -d)
+    echo "$tmp_root"
+
+    while read rev; do
+	local found path
+	mkdir "$tmp_root/$rev"
+
+	found=false
+	for path in "${paths[@]}"; do
+	    "${git_archive[@]}" "$rev" -- "$path" "${extra_paths[@]}" \
+		| tar -x -C "$tmp_root/$rev" &
+	    # "git archive" fails when $path was deleted in $rev.
+	    if wait $!; then
+		found=true
+		break
+	    fi
+	done
+
+	if $found; then
+	    if $relative; then
+		echo "$rev/$path"
+	    else
+		echo "$tmp_root/$rev/$path"
+	    fi
+	    n_revs=$(($n_revs-1))
+	    if [ $n_revs = 0 ]; then
+		break
+	    fi
+	else
+	    # shellcheck disable=SC2115
+	    rm -r "$tmp_root/$rev"
+	fi
+    done < <("${git_rev_list[@]}")
+    )
+}
+
+convert_arg_var ()
+{
+    declare -g "$1=$2"
+    cat <<EOF | manifest_out
+declare -g "$1=$2"
+EOF
+}
+
+convert_arg_arr ()
+{
+    if ! test_array $1; then
+	declare -ag $1
+	cat <<EOF | manifest_out
+declare -ga $1
+EOF
+    fi
+    eval "$1+=(\"$2\")"
+    cat <<EOF | manifest_out
+$1+=("$2")
+EOF
+}
+
+convert_arg_declare ()
+{
+    local name="$1"
+
+    case "$name" in
+	*"["*"]")
+	    local arr="${1%\[*\]}"
+	    if ! test_array $arr; then
+		declare -Ag $arr
+		cat <<EOF | manifest_out
+declare -gA $arr
+EOF
+	    fi
+	    ;;
+	*)
+	    declare -g "$name"
+	    cat <<EOF | manifest_out
+declare -g $name
+EOF
+	    ;;
+    esac
+}
+
+convert_arg_set ()
+{
+    eval "$1=\"$2\""
+    cat <<EOF | manifest_out
+$1="$2"
+EOF
+}
+
+convert_arg_assarr ()
+{
+    convert_arg_declare "$1"
+    convert_arg_set "$1" "$2"
+}
+
+convert_arg_source ()
+{
+    assert_with_msg "ERROR: manifest/include does not exist: $1" \
+		    [ -f "$1" ]
+    # shellcheck disable=SC1090
+    source "$1"
+    echo "# Start of include $1" | manifest_out
+    cat "$1" | manifest_out
+    echo "# End of include $1" | manifest_out
+}
+
 # Process "--var value" and "++arr elem" arguments and define corresponding
 # variables and arrays.
 # "--var value" defines shell variable "$var" to "value".
+# "__var value" defines shell variable "$var" to "value", but doesn't store
+#               it to the manifest.  This is useful for passing secrets.
 # "++arr elem" defines shell array "$arr[@]" and adds "elem" to it.
 # "==arr[key] value" defines shell associative array "$arr[@]" and sets
 #                    "${arr[key]}" to "value".
 # "@@ file" sources file.
-# "%% file" starts manifest in file.  Also see "^^ true".
-# "^^ true/false %% manifest" whether to reproduce the build using manifest.
-#     If "true" -- source manifest instead of generating it, then discard
-#     all following options at to separator "--".
+# "@@artifacts_var dir" defines artifacts directory and sources the manifest in
+#                       from dir/manifest.sh.  This is useful for reproducing
+#                       builds.
+# "%%artifacts_var dir" defines artifacts directory and starts manifest in
+#                       dir/manifest.sh.  Also see "^^ true".
+# "^^ true/false %%artifacts_var dir" whether to reproduce the build using manifest.
+#     If "true" -- source dir/manifest.sh instead of generating it, then discard
+#     all following options up to separator "--".
 #     If "false" -- do nothing and proceed as usual.
 #
 # Shell array $CONVERTED_ARGS is set to the arguments processed.
@@ -911,73 +1495,91 @@ convert_args_to_variables ()
 		break
 		;;
 	    "--"*)
-		name="${1#--}"
+		assert_with_msg "ERROR: Parameter value not provided for $1." \
+				[ $# -ge 2 ]
+		convert_arg_var "${1#--}" "$2"
+		num=2
+		;;
+	    "__"*)
+		assert_with_msg "ERROR: Parameter value not provided for $1." \
+				[ $# -ge 2 ]
+		name="${1#__}"
+		# FIXME: Can we add "set +x" here?
 		declare -g "$name=$2"
-		cat <<EOF | manifest_out
-declare -g "$name=$2"
-EOF
 		num=2
 		;;
 	    "++"*)
-		name="${1#++}"
-		if ! test_array $name; then
-		    declare -ag $name
-		    cat <<EOF | manifest_out
-declare -ga $name
-EOF
-		fi
-		eval "$name+=(\"$2\")"
-		cat <<EOF | manifest_out
-$name+=("$2")
-EOF
+		assert_with_msg "ERROR: Parameter value not provided for $1." \
+				[ $# -ge 2 ]
+		convert_arg_arr "${1#++}" "$2"
 		num=2
 		;;
 	    "=="*)
-		name="${1#==}"
-		arr="${name%\[*\]}"
-		if ! test_array $arr; then
-		    declare -Ag $arr
-		    cat <<EOF | manifest_out
-declare -gA $arr
-EOF
-		fi
-		if [ $# -lt 2 ]; then
-		    echo "ERROR: Parameter value not provided for $1."
-		    exit 1
-		fi
-		eval "$name=\"$2\""
-		cat <<EOF | manifest_out
-$name="$2"
-EOF
+		assert_with_msg "ERROR: Parameter value not provided for $1." \
+				[ $# -ge 2 ]
+		convert_arg_assarr "${1#==}" "$2"
 		num=2
 		;;
 	    "@@")
-		# shellcheck disable=SC1090
-		source "$2"
-		echo "# Start of include $2" | manifest_out
-		cat "$2" | manifest_out
-		echo "# End of include $2" | manifest_out
+		assert_with_msg "ERROR: Parameter value not provided for $1." \
+				[ $# -ge 2 ]
+		convert_arg_source "$2"
 		num=2
 		;;
-	    "%%")
-		manifest_push "$2"
+	    "@@"*)
+		# TODO: It should be possible to simplify handling of "^^"
+		# now that we have @@artifacts dir.
+		assert_with_msg "ERROR: Parameter value not provided for $1." \
+				[ $# -ge 2 ]
+
+		name="${1#@@}"
+
+		# FIXME: This should not be necessary since manifests should
+		# "declare -Ag rr" themselves, but current manifests don't
+		# do that, due to "declare -A rr" in round-robin.sh.  That
+		# declaration makes convert_arg_declare think that rr was
+		# already added to the manifest.
+		convert_arg_declare "$name"
+
+		convert_arg_source "$2/manifest.sh"
+		manifest_push "$2/manifest.sh" false
+
+		# Builds are supposed to be re-runnable from different
+		# directories, so do not put artifacts directory into manifest.
+		eval "$name=\"$2\""
+
+		num=2
+		;;
+	    "%%"*)
+		assert_with_msg "ERROR: Parameter value not provided for $1." \
+				[ $# -ge 2 ]
+		manifest_push "$2/manifest.sh"
 		cat <<EOF | manifest_out
-# Start option processing
-jenkins_scripts_rev=$(git -C "$(dirname "$0")" rev-parse HEAD)
+declare -g "jenkins_scripts_rev=$(git -C "$(dirname "$0")" rev-parse HEAD)"
+# Artifacts directory
+EOF
+		name="${1#%%}"
+		convert_arg_declare "$name"
+
+		# Builds are supposed to be re-runnable from different
+		# directories, so do not put artifacts directory into manifest.
+		eval "$name=\"$2\""
+
+		cat <<EOF | manifest_out
+# Recording parameters to manifest: $2/manifest.sh
 EOF
 		num=2
 		;;
 	    "^^")
+		assert_with_msg "ERROR: Parameter value not provided for $1." \
+				[ $# -ge 4 ]
 		if [ x"$2" = x"true" ]; then
-		    # Check that we have a manifest to reproduce
-		    if [ x"$3" != x"%%"  ] || [ ! -f "$4" ]; then
-			echo "ERROR: '^^ true' must be followed by '%% <MANIFEST>'"
-			exit 1
-		    fi
-
-		    # Source the manifest for reproduction.
-		    # shellcheck disable=SC1090
-		    source "$4"
+		    name="${3#%%}"
+		    case "$name" in
+			*"["*"]") convert_arg_assarr "$name" "$4" ;;
+			*) convert_arg_var "$name" "$4" ;;
+		    esac
+		    convert_arg_source "$4/manifest.sh"
 
 		    # Skip processing all following arguments.
 		    num=0
@@ -1004,9 +1606,6 @@ EOF
 	done
     done
     eval "SHIFT_CONVERTED_ARGS=$total"
-    cat <<EOF | manifest_out
-# Processed $total options
-EOF
 }
 
 # Check that varible names in "$@" are set
@@ -1016,10 +1615,21 @@ obligatory_variables ()
     (
 	set -euf -o pipefail
 	for i in "$@"; do
-	    if eval "[ x\"\${$i+set}\" != x\"set\" ]"; then
-		echo "ERROR: required parameter $i not set"
-		exit 1
-	    fi
+	    case "$i" in
+		*"["*"]")
+		    if eval "[ x\"\${$i+set}\" != x\"set\" ]"; then
+			echo "ERROR: required parameter $i not set"
+			exit 1
+		    fi
+		    ;;
+		*)
+		    if [[ "$(declare -p "$i" 2>/dev/null)" \
+			      != "declare "* ]]; then
+			echo "ERROR: required parameter $i not set"
+			exit 1
+		    fi
+		    ;;
+	    esac
 	done
     )
 }
@@ -1043,14 +1653,25 @@ print_gnu_target ()
     set -euf -o pipefail
     local target="$1"
 
-    if [ x"$target" = x"native" ]; then
-	target=$(uname -m)
-    fi
     case "$target" in
 	"aarch64") target="aarch64-linux-gnu" ;;
-	"arm_eabi") target="arm-eabi" ;;
+        arm*_eabi) target="arm-eabi" ;;
+        thumb*_eabi) target="arm-eabi" ;;
 	"arm"*) target="arm-linux-gnueabihf" ;;
+	"woa64") target="aarch64-w64-mingw32" ;;
 	"x86_64") target="x86_64-linux-gnu" ;;
+	"native")
+	    case "$(uname -m)" in
+		"aarch64") target="aarch64-unknown-linux-gnu" ;;
+		"armv7l") target="armv7l-unknown-linux-gnueabihf" ;;
+		"armv8l") target="armv8l-unknown-linux-gnueabihf" ;;
+		"x86_64") target="x86_64-pc-linux-gnu" ;;
+		*)
+		    echo "ERROR: Unknown native target $(uname -m)" >&2
+		    exit 1
+		    ;;
+	    esac
+	    ;;
 	*) echo "ERROR: Unknown target $target" >&2; exit 1 ;;
     esac
     echo "$target"
@@ -1103,54 +1724,15 @@ print_kernel_target ()
 git_clean () {
     (
     set -euf -o pipefail
-
-    fresh_dir "$1" "$1/.git/*"
-    git -C "$1" reset --hard
-    )
-}
-
-# Add git remote pointing to linaro's git repo/mirrors with writable
-# toolchain/ci/* repo.  Deduce repo's URL from URL of existing
-# "origin" git remote.
-# $1: Git clone directory (must have "origin" remote configured)
-# $2: Name of the new remote.
-# $3: Whether to make the new remote read-only or read-write.
-git_init_linaro_local_remote ()
-{
-    (
-    set -euf -o pipefail
     local dir="$1"
-    local remote="$2"
-    local read_only="$3"
-
-    local origin_url
-    local new_url
-    origin_url=$(git -C "$dir" remote get-url origin)
-
-    # Figure out mirror repo on linaro's servers.
-    case "$origin_url" in
-	*"kernel.org/"*"/linux"*)
-	    new_url="toolchain/ci/linux.git"
-	    ;;
-	*"linaro.org/toolchain/gcc-compare-results.git")
-	    new_url="toolchain/gcc-compare-results.git"
-	    ;;
-	*)
-	    new_url="toolchain/ci/$(basename $origin_url)"
-	    ;;
-    esac
+    shift
 
-    # Use git-us.l.o to avoid delays between review.l.o and git.l.o
-    new_url="git-us.linaro.org/$new_url"
-    if $read_only; then
-	new_url="https://$new_url"
-    else
-	# Use gitolite access.  Gerrit's ssh access verifies pushed commits,
-	# which can slow-down server on big pushes.
-	new_url="ssh://$new_url"
+    fresh_dir "$dir" "$dir/.git/*"
+    if ! git -C "$dir" reset -q --hard "$@"; then
+	# "git reset" may fail if index gets corrupted -- remove it and retry.
+	rm -f "$dir/.git/index"
+	git -C "$dir" reset -q --hard "$@"
     fi
-
-    git_set_remote "$dir" "$remote" "$new_url"
     )
 }
 
@@ -1177,6 +1759,8 @@ git_push ()
 
 # Initialize run_step state
 # $1: Step to start execution at (or "" to start at the very first step)
+#     Appending "+" to the step name, e.g., "__start_at reset_artifacts+"
+#     makes us start on the step right AFTER the specified step.
 # $2: Step to finish execution at (or "" to run till the very end)
 # $3: Top artifact directory
 # $4: Whether to enable "set -x" verbosity for execution steps.
@@ -1202,7 +1786,33 @@ finishing at step \"$run_step_finish_at\""
     run_step_top_artifacts=$(cd "$run_step_top_artifacts"; pwd)
 
     rm -f $run_step_top_artifacts/console.log
+    rm -f $run_step_top_artifacts/console.log.xz
     rm -f $run_step_top_artifacts/results
+
+    # If no manifest file was provided, supply a default one.
+    if [ ${#__manifest_filename[@]} -eq 1 ]; then
+	manifest_push "$run_step_top_artifacts/manifest.sh"
+    fi
+}
+
+# Patch environment for subsequent steps.  This works by generating
+# a source-able file patch-env.sh in the artifacts of the current step.
+# Run_step() then sources this file to update the environment.
+# Note that we build walls around individual steps on purpose.  This allows
+# us to SKIP several initial steps during bisect builds, and have a clear
+# record of environment modifications in artifacts/NN-step/patch-env.sh
+# scripts, which could be applied in correct order.
+#
+# $@: parameters in the format that convert_args_to_variables() understands.
+run_step_patch_env ()
+{
+    # !!! Each step is limited to a single invocation of run_step_patch_env()
+    # !!! due to manifest_push() re-writing the manifest.
+    assert_with_msg "patch-env.sh manifest already exists" \
+		    ! [ -e $run_step_artifacts/patch-env.sh ]
+    manifest_push $run_step_artifacts/patch-env.sh
+    convert_args_to_variables "$@"
+    manifest_pop
 }
 
 # Run execution step and handle its failure as requested
@@ -1215,8 +1825,7 @@ finishing at step \"$run_step_finish_at\""
 #      Step commands have $run_step_artifacts pointing to artifact directory
 #      for current step.
 #   3. logging -- dump stdout and and stderr output of step commands
-#      into per-step console.log files, and, also, into the top-level
-#      console.log file.
+#      into per-step console.log files
 #   4. result handling -- output provided success result to artifacts/results
 #      for successful steps.  Special value "x" means to let the step itself
 #      update artifacts/results.  Results are written to artifacts/results
@@ -1244,6 +1853,12 @@ run_step ()
 
     step=("$@")
 
+    if [ "$success_result" != "x" ]; then
+	cat >> $run_step_top_artifacts/results <<EOF
+# ${step[@]}:
+EOF
+    fi
+
     pretty_step="$1"
     shift
     while [ $# -gt 0 ]; do
@@ -1257,13 +1872,19 @@ run_step ()
 
     run_step_count=$(($run_step_count+1))
 
+    local full_step_name
+    full_step_name=$(printf "%02d" $run_step_count)-$pretty_step
+    # This is used when accessing the workspace
+    run_step_artifacts=$run_step_top_artifacts/$full_step_name
+
     # Start running steps if:
     # the current step is the starting step OR
     # we haven't run any steps yet and
     # there is no set starting step
-    if [ x"$pretty_step" = x"$run_step_start_at" ] || \
-	     ( [ x"$run_step_start_at" = x"" ] && \
-         [ x"$run_step_prev_step" = x"" ] ); then
+    if [ "$pretty_step" = "$run_step_start_at" ] \
+	   || [ "${run_step_prev_step}+" = "$run_step_start_at" ] \
+	   || ( [ "$run_step_start_at" = "" ] \
+		    && [ "$run_step_prev_step" = "" ] ); then
 	run_step_active=true
     fi
 
@@ -1271,7 +1892,7 @@ run_step ()
 	local skip=false
 	case "$run_step_status:$run_mode" in
 	    0:*) ;;
-	    $EXTERNAL_FAIL:stop_on_fail)
+	    "$EXTERNAL_FAIL:stop_on_fail")
 		echo "STOPPING before ${step[*]} due to previous external failure"
 		return $EXTERNAL_FAIL
 		;;
@@ -1291,14 +1912,10 @@ run_step ()
 	esac
 
 	if ! $skip; then
-	    local full_step_name
-	    full_step_name=$(printf "%02d" $run_step_count)-$pretty_step
-	    # This is used when accessing the workspace
-	    run_step_artifacts=$run_step_top_artifacts/$full_step_name
 	    local log_url=""
 	    if [ -v BUILD_URL ]; then
 	        # Link to jenkins, valid once the job has finished
-	        log_url="(${BUILD_URL}artifact/artifacts/$full_step_name/console.log)"
+	        log_url="(${BUILD_URL}artifact/artifacts/$full_step_name/console.log.xz)"
 	    fi
 
 	    rm -rf "$run_step_artifacts"
@@ -1306,17 +1923,53 @@ run_step ()
 
 	    echo "RUNNING ${step[*]}; see tail -f $run_step_artifacts/console.log" $log_url
 	    run_step_status=0
-	    eval "if $run_step_verbose; then set -x; else set +x; fi; ${step[*]}" 2>&1 | ts -s "%T" | tee -a $run_step_top_artifacts/console.log > $run_step_artifacts/console.log &
-	    wait $! || run_step_status=$?
+	    # We are running "${step[@]}" in a sub-shell, so that any
+	    # modifications to environment will be lost.
+	    # The steps can modify environment for subsequent steps by using
+	    # run_step_patch_env().
+	    # We redirect stdout and stderr of "${step[@]} to a pipe, which
+	    # is connected to timestamping console.  Piping "step | ts -s"
+	    # directly causes weird issue with failed exit code always being
+	    # "1" instead of, e.g., 125.
+
+	    local pipe step_pid ts_pid
+	    pipe=$(mktemp -u)
+	    mkfifo "$pipe"
+
+	    (
+		if $run_step_verbose; then
+		    set -x
+		else
+		    set +x
+		fi
+		"${step[@]}"
+	    ) &> "$pipe" &
+	    step_pid=$!
+
+	    ts -s "%T" < "$pipe" > $run_step_artifacts/console.log &
+	    ts_pid=$!
+
+	    wait $step_pid || run_step_status=$?
+	    wait $ts_pid
+	    rm "$pipe"
+
+	    xz $run_step_artifacts/console.log
+
+	    if [ x"$success_result" != x"x" ] \
+		   && [ x"$run_step_status" != x"0" ]; then
+		cat >> $run_step_top_artifacts/results <<EOF
+# FAILED
+EOF
+	    fi
 
 	    case "$run_step_status:$run_mode" in
 		0:*) ;;
-		$EXTERNAL_FAIL:stop_on_fail|$EXTERNAL_FAIL:reset_on_fail)
+		"$EXTERNAL_FAIL:stop_on_fail"|"$EXTERNAL_FAIL:reset_on_fail")
 		    echo "STOPPING at ${step[*]} due to external failure"
 		    return $EXTERNAL_FAIL
 		    ;;
 		*:stop_on_fail|*:reset_on_fail)
-		    echo "STOPPING at ${step[*]} due to internal failure"
+		    echo "STOPPING at ${step[*]} due to failure"
 		    return $INTERNAL_FAIL
 		    ;;
 		*:skip_on_fail)
@@ -1330,13 +1983,17 @@ run_step ()
 	echo "SKIPPING ${step[*]}"
     fi
 
-    if [ x"$run_step_status" = x"0" ] && [ x"$success_result" != x"x" ]; then
+    if [ x"$success_result" != x"x" ] && [ x"$run_step_status" = x"0" ]; then
 	cat >> $run_step_top_artifacts/results <<EOF
-# ${step[@]}:
 $success_result
 EOF
     fi
 
+    if [ -f $run_step_artifacts/patch-env.sh ]; then
+	# shellcheck disable=SC1090
+	source $run_step_artifacts/patch-env.sh
+    fi
+
     if [ x"$pretty_step" = x"$run_step_finish_at" ]; then
 	run_step_active=false
     fi
@@ -1368,7 +2025,7 @@ print_traceback ()
 {
   local exit_status=$?
   case $exit_status in
-      $INTERNAL_FAIL|$EXTERNAL_FAIL) ;;
+      "$INTERNAL_FAIL"|"$EXTERNAL_FAIL") ;;
       *)
 	  echo "ERROR Traceback (most recent call last):"
 	  # Show most recent calls last
@@ -1388,3 +2045,186 @@ print_traceback ()
 	  ;;
   esac
 }
+
+# Print destination sub-directory of interesting-commit.git for ...
+# $1: component
+# $2: sha1 of the commit
+# $3: ci_project
+# $4: ci_config
+interesting_subdir ()
+{
+    local dir="$1/sha1"				# $component/sha1
+    if [ $# -ge 2 ]; then dir="$dir/$2"; fi	# /$sha1
+    if [ $# -ge 3 ]; then dir="$dir/$3"; fi	# /$ci_project
+    if [ $# -ge 4 ]; then dir="$dir/$4"; fi	# /$ci_config
+    echo "$dir"
+}
+
+# Print user-friendly "git describe" of a given commit
+# $1: Component (gcc, llvm, etc.)
+# $2: Commit hash
+# $3: If "true", never fail to describe and print out something sensible.
+#     Otherwise return empty string on failure.
+describe_sha1 ()
+{
+    local component="$1"
+    local sha1="$2"
+    local anything="$3"
+
+    local -a match=()
+    case "$component" in
+	gcc) match=(--match "basepoints/*" --match "releases/*") ;;
+	binutils) match=(--match "binutils*") ;;
+	gdb) match=(--match "gdb*") ;;
+	newlib) match=(--match "newlib*") ;;
+    esac
+
+    if ! git -C "$component" describe "${match[@]}" $sha1 2>/dev/null \
+	    && $anything; then
+	echo "$component#$(git -C "$component" rev-parse --short $sha1)"
+    fi
+}
+
+# To avoid committing unwanted files into git (e.g., raw benchmarking
+# data) we implement "annex" support.  Files in base-artifacts/annex
+# can be either symlinks to directories or regular files containing
+# rsync-able urls.
+#
+# Here we convert directory symlinks into tarballs, upload to bkp-01
+# and replace symlinks with files pointing to their uploaded location.
+#
+# In git_annex_download we do the opposite: download and extract tarball
+# into a temporary directory, and replace the file with a symlink
+# to that directory.
+#
+# The end result is that during a build base-artifacts/annex/bmk-data
+# is a symlink with directory-like behavior.  Outside of a build
+# base-artifacts/ repo contains a regular file pointing to a tarball
+# on a private fileserver.
+#
+# FIXME: We do not automatically remove annex tarballs when trimming
+# or rewriting history.  We rely on tcwg-cleanup-stale-results.sh for that.
+#
+# $1: git repo
+# $2: annex directory inside the repo
+# $3: tarball name prefix
+git_annex_upload ()
+{
+    (
+    set -euf -o pipefail
+    local repo="$1"
+    local annex_dir="$2"
+    local pretty_id="$3"
+
+    if ! [ -d "$repo/$annex_dir" ]; then
+	return 0
+    fi
+
+    local n_cpus=0
+    if [ "$(getconf LONG_BIT)" = "32" ]; then
+	# XZ allocates few hundred megabytes per thread, which can easily
+	# exhaust VM in armhf containers on 160-core machines.  Limit xz
+	# parallelism to 8.
+	n_cpus=$(nproc --all)
+	if [ "$n_cpus" -gt "8" ]; then
+	    n_cpus=8
+	fi
+    fi
+
+    # Convert annex symlinks to remote links
+    local symlink dir md5 remote_path newlink
+    while IFS= read -r -d '' symlink; do
+	dir=$(readlink "$repo/$annex_dir/$symlink")
+
+	# Generate MD5 hash of the contents of the annex: find all files
+	# and generate md5sum for each of them, and then generate md5sum
+	# of that list.
+	# We avoid using md5sum of the tarball because using tar with
+	# multi-threaded xz compression may produce different tarballs.
+	md5=$(cd "$dir"; find -L -type f -print0 | xargs -0 md5sum | sort \
+		  | md5sum - | awk '{ print $1 }')
+
+	remote_path="$HOME/$repo/$annex_dir/${pretty_id}$md5.tar.xz"
+	newlink="bkp-01.tcwglab:$remote_path"
+
+	# Check if bkp-01.tcwglab already has an annexed tarball with our data.
+	# When re-writing history in round-robin-baseline.sh we download and
+	# re-upload same data multiple times.  This optimization saves up
+	# on compression and upload time.
+	if ! ssh -n bkp-01.tcwglab test -f "$remote_path"; then
+	    local tarball
+	    tarball=$(mktemp --suffix=.tar.xz)
+	    chmod 0644 "$tarball"
+	    # We have a local link to the annex -- make it remote.
+	    XZ_OPT=-T$n_cpus tar cJf "$tarball" -C "$dir" .
+
+	    ssh -n bkp-01.tcwglab mkdir -p "$(dirname "$remote_path")"
+	    rsync -a "$tarball" "$newlink"
+
+	    rm "$tarball"
+	fi
+
+	# In normal builds files inside $dir will be owned by tcwg-benchmark,
+	# so we will fail trying to delete them.  Still, try to delete
+	# the directory to avoid running of disk space when re-writing history.
+	rm -rf "$dir" &>/dev/null || true
+
+	git -C "$repo" rm "$annex_dir/$symlink"
+	# if $symlink is the last file in $annex_dir, then "git rm" will
+	# remove the directory as well.  Re-create it.
+	mkdir -p "$repo/$annex_dir"
+	echo "$newlink" > "$repo/$annex_dir/$symlink"
+	git -C "$repo" add "$annex_dir/$symlink"
+    done < <(cd "$repo/$annex_dir"; find . -type l -print0)
+
+    # update commit with new links.
+    git -C "$repo" commit --amend -C HEAD
+    )
+}
+
+# $1: git repo
+# $2: annex directory inside the repo
+git_annex_download ()
+{
+    (
+    set -euf -o pipefail
+    local repo="$1"
+    local annex_dir="$2"
+
+    # FIXME: Remove workaround for old-style bmk-data after history rewrite.
+    if [ -f "$repo/results_id" ]; then
+	local link dir
+	link=$(cat "$repo/results_id")
+
+	dir=$(mktemp -d)
+	rsync -a --del "bkp-01.tcwglab:/home/tcwg-benchmark/results-$link/" \
+	      "$dir/"
+
+	rm -rf "${repo:?}/$annex_dir"
+	mkdir "$repo/$annex_dir"
+
+	ln -s "$dir" "$repo/$annex_dir/bmk-data"
+    fi
+
+    if ! [ -d "$repo/$annex_dir" ]; then
+	return 0
+    fi
+
+    # Resolve annex links to local symlinks.
+    # See round-robin-baseline.sh:push_baseline() for details.
+    local linkfile link tarball dir
+    while IFS= read -r -d '' linkfile; do
+	link=$(cat "$repo/$annex_dir/$linkfile")
+
+	tarball=$(mktemp --suffix=.tar.xz)
+	rsync -a "$link" "$tarball"
+
+	dir=$(mktemp -d)
+	tar xf "$tarball" -C "$dir"
+	rm "$tarball"
+
+	rm "$repo/$annex_dir/$linkfile"
+	ln -s "$dir" "$repo/$annex_dir/$linkfile"
+    done < <(cd "$repo/$annex_dir"; find . -type f -print0)
+    )
+}