summaryrefslogtreecommitdiff
path: root/tcwg_kernel-bisect.sh
blob: c96da8984488b0f56b7c28407222e1ee0b68e577 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
#!/bin/bash

set -ef -o pipefail

scripts=$(dirname $0)
. $scripts/jenkins-helpers.sh

declare -A config
declare -A git_repo
declare -A git_branch

convert_args_to_variables "@@gnu-default" "@@llvm-default" "@@linux-default" "$@"

obligatory_variables current_project toolchain

bad_rev="${bad_rev:-default}"
baseline_rev="${baseline_rev:-baseline}"
BUILD_URL="${BUILD_URL:-$(pwd)}"
confirm_failure="${confirm_failure:-false}"
linux_config="${linux_config:-defconfig}"
target="${target:-native}"
verbose="${verbose:-true}"

set -u

if $verbose; then set -x; fi

artifacts=$(pwd)/artifacts
rm -rf $artifacts
mkdir $artifacts

trap "eval \"echo ERROR at \${FUNCNAME[0]}:\${BASH_LINENO[0]}\" > $artifacts/failures" EXIT

if $confirm_failure; then
    # Make sure we are not about to bisect a sporadic failure.
    $scripts/tcwg_kernel-build.sh \
	"@@gnu-${config[gnu]}" "@@llvm-${config[llvm]}" "@@linux-${config[linux]}" \
	--linux_config "$linux_config" \
	--mode "baseline" \
	--current_project "$current_project" \
	--current_rev "$bad_rev" \
	--target "$target" \
	--tidy false \
	--toolchain "$toolchain" \
	--top_artifacts "$artifacts/build-bad" \
	--verbose "$verbose" &
    res=0 && wait $! || res=$?

    bad_rev=$(git -C $current_project rev-parse HEAD)
    if [ x"$res" = x"0" ]; then
	# Build job had a spurious failure.  Re-try.
	cat > $artifacts/trigger-2-build-master <<EOF
current_project=$current_project
current_rev=$bad_rev
EOF
	trap "" EXIT
	exit 0
    fi
elif [ x"$bad_rev" = x"default" ]; then
    echo "ERROR: Need explicit --bad_rev"
    exit 1
else
    bad_rev=$(git_rev_parse "$current_project" "$bad_rev")
fi

# Build baseline that we are going to re-use to speed-up bisection.
# (This also confirms that infrastructure is OK.)
$scripts/tcwg_kernel-build.sh \
    "@@gnu-${config[gnu]}" "@@llvm-${config[llvm]}" "@@linux-${config[linux]}" \
    --linux_config "$linux_config" \
    --mode "baseline" \
    --current_project "$current_project" \
    --current_rev "$baseline_rev" \
    --reset_baseline true \
    --target "$target" \
    --tidy false \
    --toolchain "$toolchain" \
    --top_artifacts "$artifacts/build-baseline" \
    --verbose "$verbose"

assert ! [ -f artifacts/failures ]

cd $current_project

# Bisect script.
#
# With this script we find the first commit that has regressed compared
# to baseline, but not, necessarily, the commit that caused regression in
# $bad_rev.  Consider the scenario:
# - rev_10 produced good result "2000" -- this is current baseline
# - rev_20 completely broke the build (say, result "10")
# - rev_22 fixed the build
# - rev_30 regressed the build to result "1000" -- this is the regression we
#   detected vs "2000" baseline.
#
# The script will identify rev_20 as the first failing commit, which will
# cause the baseline to be reset to rev_20 with metric "10".  When we then
# rebuild master (at rev_30) we will see a /progression/ from "10" to "1000",
# thus missing the regression of "2000" to "1000".
#
# To catch the "2000" to "1000" regression someone would need to manually
# trigger bisect between rev_22 and rev_30.
#
# TODO: We could skip revisions (exit 125) that are worse than metric
# for $bad_rev (result metric <1000 in the above scenario), so we would
# skip revisions between rev_20 and rev_22.  This might cause other edge
# cases to be handled sub-optimally, though.
cat > ../bisect-run.sh <<EOF
#!/bin/sh
rev=\$(git rev-parse HEAD)
cd ..
$scripts/tcwg_kernel-build.sh \
  "@@gnu-${config[gnu]}" "@@llvm-${config[llvm]}" "@@linux-${config[linux]}" \
  --linux_config "$linux_config" \
  --mode bisect \
  --current_project "$current_project" \
  --target "$target" \
  --tidy false \
  --toolchain "$toolchain" \
  --top_artifacts $artifacts/build-\$rev \
  --verbose "$verbose" &
res=0 && wait \$! || res=\$?
if [ -f $artifacts/build-\$rev/failures ]; then
  exit 1
elif [ x"\$res" != x"0" ]; then
  exit 125
else
  exit 0
fi
EOF
chmod +x ../bisect-run.sh

# Remember $good_rev from the baseline build above.
good_rev=$(git rev-parse HEAD)

# Workaround linux-next/master rebasing on top of linux-next/stable.
# Try to find $good_rev that is "good" compared to baseline
# and that is an ancesstor of both $baseline_rev and $bad_rev.
merge_base=$(git merge-base $bad_rev HEAD)
origin=$(git remote get-url origin)
origin=$(basename "$origin")
if [ x"$merge_base" != x"$good_rev" -a x"$origin" = x"linux-next.git" ]; then
    git checkout --detach "$merge_base"
    ../bisect-run.sh &
    res=0 && wait $! || res=$?
    if [ x"$res" != x"0" ]; then
	# With bad merge-base below bisect will fail.
	# Check if we can use linux-next/stable as our last resort.
	git checkout --detach refs/remotes/origin/stable
	../bisect-run.sh &
	res=0 && wait $! || res=$?
	if [ x"$res" = x"0" ]; then
	    good_rev=$(git rev-parse HEAD)
	else
	    :
	    # Below bisect will fail :-(
	fi
    else
	:
	# With good merge-base below bisect should succeed.
    fi
fi

if ! $confirm_failure; then
    git checkout --detach $bad_rev
    ../bisect-run.sh &
    res=0 && wait $! || res=$?

    if [ x"$res" = x"0" ]; then
	echo "ERROR: build for bad_rev $bad_rev succeeded"
	exit 1
    fi

    mv "$artifacts/build-$bad_rev" "$artifacts/build-$bad_rev-bad"
fi

git bisect start $bad_rev $good_rev 2>&1 | tee $artifacts/bisect.log

# "git bisect run" can fail (exit with non-zero) in a number of cases:
# - on trivial bisects (e.g., between HEAD^ and HEAD),
# - when merge-base between baseline and bad is worse than baseline,
# - something else?
# In all these cases we want to reset baseline to HEAD, so that we catch
# most of the commits that introduced change in the result metric.
git bisect run ../bisect-run.sh 2>&1 | tee -a $artifacts/bisect.log &
res=0 && wait $! || res=$?

if [ x"$res" = x"0" ]; then
    first_bad=$(cat .git/BISECT_RUN | head -n 1 | grep "is the first bad commit" | cut -d" " -f 1)
    assert [ x"$first_bad" != x"" ]
    echo $first_bad > $artifacts/first-bad
else
    first_bad=$(git rev-parse HEAD)
    if ! [ -f .git/BISECT_LOG ]; then
	# It seems this was a trivial bisect with $bad_rev^ == $good_rev.
	first_bad=$bad_rev
	echo $first_bad > $artifacts/first-bad
    fi
fi
cd ..

# Save BISECT_* logs
mkdir $artifacts/git-logs
find "$current_project" -path "$current_project/.git/BISECT_*" -print0 | xargs -0 -I@ mv @ $artifacts/git-logs/

# Reset baseline to the regressed commit so that we will catch subsequent
# regressions (worse than $bad_rev).
cat > $artifacts/trigger-1-reset-baseline <<EOF
current_project=$current_project
current_rev=$first_bad
reset_baseline=true
EOF

if [ -f artifacts/first-bad ]; then
    first_bad=$(cat artifacts/first-bad)
    touch build-name
    sed -i -e "s/\$/-$first_bad/" build-name

    # Email developers on successful bisects.
    CI_MAIL_RECIPIENTS="tcwg-validation@linaro.org, maxim.kuvyrkov@linaro.org"
    case "$toolchain:$current_project" in
        gnu:*) CI_MAIL_RECIPIENTS="$CI_MAIL_RECIPIENTS, christophe.lyon@linaro.org" ;;
        llvm:linux) CI_MAIL_RECIPIENTS="$CI_MAIL_RECIPIENTS, arnd@linaro.org, mark.brown@linaro.org, ndesaulniers@google.com" ;;
        llvm:llvm) CI_MAIL_RECIPIENTS="$CI_MAIL_RECIPIENTS, adhemerval.zanella@linaro.org, ndesaulniers@google.com, yvan.roux@linaro.org" ;;
    esac

    cat > artifacts/ci_mail_parameters <<EOF
CI_MAIL_RECIPIENTS=$CI_MAIL_RECIPIENTS
CI_MAIL_BODY="Bisected $current_project on $target @ $toolchain-${config[$toolchain]} @ linux-${config[linux]}-$linux_config between bad $bad_rev and baseline $baseline_rev .

Bisect log: ${BUILD_URL}artifact/artifacts/bisect.log/*view*/
Artifacts: ${BUILD_URL}artifact/artifacts/
Build URL: $BUILD_URL
Build log: ${BUILD_URL}consoleText
"
EOF
fi

# Trigger master build now instead of waiting for next timed SCM trigger.
cat > $artifacts/trigger-2-build-master <<EOF
current_project=$current_project
EOF

trap "" EXIT