summaryrefslogtreecommitdiff
path: root/tcwg_kernel-bisect.sh
blob: 136dc24c1c4e661e9fafd43ce62739aea2c5e916 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
#!/bin/bash

set -ef -o pipefail

scripts=$(dirname $0)
. $scripts/jenkins-helpers.sh

# Relative artifacts are used for generation of manifests and reproduction
# instructions.
rel_artifacts=artifacts
artifacts=$(pwd)/$rel_artifacts

fresh_dir $artifacts "$artifacts/manifests/*" "$artifacts/jenkins/*"

# Process bisect-only args
convert_args_to_variables "$@"
shift "$SHIFT_CONVERTED_ARGS"

obligatory_variables bad_branch baseline_branch build_script current_project

BUILD_URL="${BUILD_URL:-$(pwd)}"
reproduce_bisect="${reproduce_bisect:-false}"

# Process build args and record them in build-parameters.sh
convert_args_to_variables ^^ $reproduce_bisect %% $artifacts/manifests/build-parameters.sh "$@"
$reproduce_bisect || manifest_pop

obligatory_variables rr[ci_project] rr[ci_config]

verbose="${verbose:-true}"

set -u

if $verbose; then set -x; fi

trap "eval \"echo ERROR at \${FUNCNAME[0]}:\${BASH_LINENO[0]}\" > $artifacts/failures" EXIT

rebase_workaround=false

case "${rr[ci_project]}/${rr[ci_config]}:$current_project" in
    tcwg_kernel/*-next-*:linux)
	# Workaround linux-next/master rebasing on top of linux-next/stable.
	# Search for regressions against linux-mainline:master (aka linux-next:stable).
	clone_or_update_repo $current_project stable ${rr[linux_url]}
	# Just in case linux-next:stable has advanced between the build and bisect jobs,
	# use merge base between linux-next:stable and $bad_branch.
	bad_rev="${bad_rev-$(git_rev_parse_long $current_project $bad_branch)}"
	linux_next_stable="${linux_next_stable-$(git -C $current_project merge-base HEAD $bad_rev)}"
	cat <<EOF | manifest_out
declare -g linux_next_stable=$linux_next_stable
EOF
	echo "Rebase workaround: forcing baseline_rev to $linux_next_stable"
	baseline_rev=$linux_next_stable
	rebase_workaround=true
	;;
esac

# Build baseline that we are going to re-use to speed-up bisection.
# (This also confirms that infrastructure is OK.)
echo "Testing baseline_branch $baseline_branch (should be success)"
$build_script \
    ^^ $reproduce_bisect \
    %% $rel_artifacts/manifests/build-baseline.sh \
    @@ $rel_artifacts/manifests/build-parameters.sh \
    ==rr[mode] "baseline" \
    ==rr[${current_project}_branch] "$baseline_branch" \
    ==rr[reset_baseline] true \
    ==rr[top_artifacts] "$rel_artifacts/build-baseline" \
    --verbose "$verbose"

baseline_rev="${baseline_rev-$(git -C ${current_project} rev-parse HEAD)}"
cat <<EOF | manifest_out
declare -g baseline_rev=$baseline_rev
EOF

ln -f -s "build-baseline" "$artifacts/build-$baseline_rev"
ln -f -s "build-baseline.sh" "$artifacts/manifests/build-$baseline_rev.sh"
echo "$baseline_rev" >> $artifacts/good_revs

# Bisect script.
#
# With this script we find the first commit that has regressed compared
# to baseline, but not, necessarily, the commit that caused regression in
# $bad_rev.  Consider the scenario:
# - rev_10 produced good result "2000" -- this is current baseline
# - rev_20 completely broke the build (say, result "10")
# - rev_22 fixed the build
# - rev_30 regressed the build to result "1000" -- this is the regression we
#   detected vs "2000" baseline.
#
# The script will identify rev_20 as the first failing commit, which will
# cause the baseline to be reset to rev_20 with metric "10".  When we then
# rebuild master (at rev_30) we will see a /progression/ from "10" to "1000",
# thus missing the regression of "2000" to "1000".
#
# To catch the "2000" to "1000" regression someone would need to manually
# trigger bisect between rev_22 and rev_30.
#
# TODO: We could skip revisions (exit 125) that are worse than metric
# for $bad_rev (result metric <1000 in the above scenario), so we would
# skip revisions between rev_20 and rev_22.  This might cause other edge
# cases to be handled sub-optimally, though.
cat > $artifacts/test.sh <<EOF
#!/bin/bash

set -euf -o pipefail

rev=\$(git rev-parse HEAD)
cd ..

touch $rel_artifacts/bad_revs $rel_artifacts/good_revs

if grep -q \$rev $rel_artifacts/bad_revs; then
  exit 1
elif grep -q \$rev $rel_artifacts/good_revs; then
  exit 0
fi

$build_script \
  ^^ $reproduce_bisect \
  %% $rel_artifacts/manifests/build-\$rev.sh \
  @@ $rel_artifacts/manifests/build-parameters.sh \
  ==rr[mode] bisect \
  ==rr[${current_project}_branch] \$rev \
  ==rr[top_artifacts] $rel_artifacts/build-\$rev \
  --verbose "$verbose" &
res=0 && wait \$! || res=\$?

if [ x"\$res" != x"0" ]; then
  echo "\$rev" >> $rel_artifacts/bad_revs
  exit 1
else
  echo "\$rev" >> $rel_artifacts/good_revs
  exit 0
fi
EOF
chmod +x $artifacts/test.sh

bad_rev="${bad_rev-$(git_rev_parse_long $current_project $bad_branch)}"
cat <<EOF | manifest_out
declare -g bad_rev=$bad_rev
EOF

cd $current_project

git checkout --detach $bad_rev
$artifacts/test.sh &
res=0 && wait $! || res=$?

if [ x"$res" = x"0" ]; then
    touch $artifacts/jenkins/build-name
    if $rebase_workaround; then
	echo "Rebase workaround: no regression between $baseline_rev and $bad_rev"
	sed -i -e "s/\$/-bad_rev-good/" $artifacts/jenkins/build-name
	case "${rr[ci_project]}/${rr[ci_config]}:$current_project" in
	    tcwg_kernel/llvm-*-next-*:linux)
		cat > $artifacts/trigger-build-rebase <<EOF
llvm_branch=baseline
EOF
		;;
	    tcwg_kernel/gnu-*-next-*:linux)
		cat > $artifacts/trigger-build-rebase <<EOF
binutils_branch=baseline
gcc_branch=baseline
EOF
		;;
	    *) assert false ;;
	esac
	cat >> $artifacts/trigger-build-rebase <<EOF
linux_branch=$baseline_rev
reset_baseline=true
EOF
    else
	echo "WARNING: build for bad_rev $bad_rev succeeded"
	sed -i -e "s/\$/-spurious/" $artifacts/jenkins/build-name
	# Retry build with default parameters
	cat > $artifacts/trigger-build-retry <<EOF
EOF
    fi
    echo > $artifacts/jenkins/mail-recipients.txt
    trap "" EXIT
    exit 0
fi

ln -f -s "build-$bad_rev" "$artifacts/build-bad"
ln -f -s "build-$bad_rev.sh" "$artifacts/manifests/build-bad.sh"

git bisect start $bad_rev $baseline_rev 2>&1 | tee $artifacts/bisect.log

# "git bisect run" can fail (exit with non-zero) in a number of cases:
# - on trivial bisects (e.g., between HEAD^ and HEAD),
# - when merge-base between baseline and bad is worse than baseline,
# - something else?
# In all these cases we want to reset baseline to HEAD, so that we catch
# most of the commits that introduced change in the result metric.
git bisect run $artifacts/test.sh 2>&1 | tee -a $artifacts/bisect.log &
res=0 && wait $! || res=$?

if [ x"$res" = x"0" ]; then
    first_bad=$(cat .git/BISECT_RUN | head -n 1 | grep "is the first bad commit" | cut -d" " -f 1)
    assert [ x"$first_bad" != x"" ]

    res=0
    for last_good in $(git rev-parse $first_bad^@); do
	# It seems that git-bisect assumes parent commit as "good" on
	# the basis of one of its children being "good".  Therefore we
	# can have a situation when we have parent P with children C1 and C2,
	# and child C1 has a child of its own CB.  Git-bisect tests C2 as
	# "good", and CB as "bad".  From C2 being good it assumes P as "good",
	# and it knows CB is "bad", so git-bisect returns C1 as the first bad
	# commit.
	# To simplify investigations we explicitly test parent of $first_bad.
	echo "Testing first_bad's parent $last_good (hoping for success)"
	git checkout --detach "$last_good"
	$artifacts/test.sh &
	res=0 && wait $! || res=$?
	if [ x"$res" = x"0" ]; then
	    break
	fi
    done
    if [ x"$res" != x"0" ]; then
	# It seems $last_good was on a path that tested good, even though
	# it itself is bad.  We re-trigger the bisection job with updated
	# parameters.
	# We need to be careful to avoid re-trigger loops, so verify that
	# last_good is an ancestor of bad_rev.
	assert git merge-base --is-ancestor $last_good $bad_rev
	cat > $artifacts/trigger-bisect <<EOF
current_project=$current_project
baseline_branch=$baseline_rev
bad_branch=$last_good
EOF
	# Don't send any emails.
	echo > $artifacts/jenkins/mail-recipients.txt
	touch $artifacts/jenkins/build-name
	sed -i -e "s/\$/-last_good-bad/" $artifacts/jenkins/build-name
	trap "" EXIT
	exit 0
    fi

    echo $first_bad > $artifacts/first-bad
else
    first_bad=$(git rev-parse HEAD)
    if ! [ -f .git/BISECT_LOG ]; then
	# It seems this was a trivial bisect with $bad_rev^ == $baseline_rev.
	first_bad=$bad_rev
	last_good=$(git rev-parse $first_bad^)
	assert [ x"$last_good" = x"$baseline_rev" ]

	echo $first_bad > $artifacts/first-bad
    fi
fi
cd ..

# Save BISECT_* logs
mkdir $artifacts/git-logs
find "$current_project" -path "$current_project/.git/BISECT_*" -print0 | xargs -0 -I@ mv @ $artifacts/git-logs/

if [ -f $artifacts/first-bad ]; then
    mkdir -p $artifacts/jenkins
    touch $artifacts/jenkins/build-name
    sed -i -e "s/\$/-$first_bad/" $artifacts/jenkins/build-name

    ln -f -s "build-$first_bad" "$artifacts/build-first_bad"
    ln -f -s "build-$first_bad.sh" "$artifacts/manifests/build-first_bad.sh"

    good_name="last_good"
    good_sha1="$last_good"
    bad_name="first_bad"
    bad_sha1="$first_bad"

    ln -f -s "build-$last_good" "$artifacts/build-last_good"
    ln -f -s "build-$last_good.sh" "$artifacts/manifests/build-last_good.sh"

    cat >> $artifacts/jenkins/mail-body.txt <<EOF
Successfully identified regression in $current_project for CI configuration ${rr[ci_config]}.

Culprit:
<cut>
$(git -C $current_project log -n 1 $first_bad)
</cut>

EOF
else
    good_name="baseline_rev"
    good_sha1="$baseline_rev"
    bad_name="bad"
    bad_sha1="$bad_rev"
    cat >> $artifacts/jenkins/mail-body.txt <<EOF
Could not identify regression in $current_project for CI configuration ${rr[ci_config]}.  See 'Bisect log' in the links below for bisection details.

EOF
fi

# In log scan for errors below
#   - ": error:" detects compiler errors from GCC and Clang
#   - "^ERROR:" detects linker errors
#   - "] Error " detects GNU make errors
cat >> $artifacts/jenkins/mail-body.txt <<EOF
Configuration details:
$(cat $artifacts/manifests/build-baseline.sh | grep '_url]\|_branch]' | grep -v '="no_')

Results regressed from (for $good_name == $good_sha1)
$(cat $artifacts/build-$good_sha1/results)

to (for $bad_name == $bad_sha1)
$(cat $artifacts/build-$bad_sha1/results)

First few errors in logs of $bad_name:
$(grep ": error:\|^ERROR:\|\] Error " $artifacts/build-$bad_sha1/console.log | head)

Artifacts of $good_name build: ${BUILD_URL}artifact/$rel_artifacts/build-$good_sha1/
Artifacts of $bad_name build: ${BUILD_URL}artifact/$rel_artifacts/build-$bad_sha1/

Reproduce builds:
<cut>
mkdir investigate-$current_project-$bad_sha1
cd investigate-$current_project-$bad_sha1

git clone https://git.linaro.org/toolchain/jenkins-scripts

mkdir -p $rel_artifacts/manifests
curl -o $rel_artifacts/manifests/build-baseline.sh ${BUILD_URL}artifact/$rel_artifacts/manifests/build-baseline.sh
curl -o $rel_artifacts/manifests/build-parameters.sh ${BUILD_URL}artifact/$rel_artifacts/manifests/build-parameters.sh
curl -o $rel_artifacts/test.sh ${BUILD_URL}artifact/$rel_artifacts/test.sh
chmod +x $rel_artifacts/test.sh

# Reproduce the baseline build (build all pre-requisites)
$build_script @@ $rel_artifacts/manifests/build-baseline.sh

cd $current_project

# Reproduce $bad_name build
git checkout --detach $bad_sha1
../$rel_artifacts/test.sh

# Reproduce $good_name build
git checkout --detach $good_sha1
../$rel_artifacts/test.sh

cd ..
</cut>

History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/ci/${rr[ci_project]}/${rr[ci_config]}

Bisect log: ${BUILD_URL}artifact/$rel_artifacts/bisect.log/*view*/
Artifacts: ${BUILD_URL}artifact/$rel_artifacts/
Build URL: $BUILD_URL
Build log: ${BUILD_URL}consoleText
EOF

if [ -f $artifacts/first-bad ]; then
    cat >> $artifacts/jenkins/mail-body.txt <<EOF

Full commit:
<cut>
$(git -C $current_project show --stat --patch $first_bad | head -n 1000)
</cut>
EOF
fi

# Set mail recipients last to preserve catch-error value from .yaml file.
# Email developers.
CI_MAIL_RECIPIENTS="tcwg-validation@linaro.org"
case "${rr[ci_project]}/${rr[ci_config]}:$current_project" in
    tcwg_kernel/gnu-*:linux) ;;
    tcwg_kernel/gnu-*:*) CI_MAIL_RECIPIENTS="$CI_MAIL_RECIPIENTS, christophe.lyon@linaro.org, maxim.kuvyrkov@linaro.org" ;;
    tcwg_kernel/llvm-*:linux) CI_MAIL_RECIPIENTS="$CI_MAIL_RECIPIENTS, arnd@linaro.org, mark.brown@linaro.org, ndesaulniers@google.com, trong@google.com" ;;
    tcwg_kernel/llvm-*:llvm) CI_MAIL_RECIPIENTS="$CI_MAIL_RECIPIENTS, adhemerval.zanella@linaro.org, maxim.kuvyrkov@linaro.org, ndesaulniers@google.com, trong@google.com, yvan.roux@linaro.org" ;;
esac
cat > $artifacts/jenkins/mail-recipients.txt <<EOF
$CI_MAIL_RECIPIENTS
EOF

# Reset baseline to the regressed commit so that we will catch subsequent
# regressions (worse than $bad_rev).
cp $artifacts/build-$first_bad/trigger-build-$current_project $artifacts/trigger-build-1-reset
echo "reset_baseline=true" >> $artifacts/trigger-build-1-reset

# Trigger master build now instead of waiting for next timed SCM trigger.
cp $artifacts/build-$bad_rev/trigger-build-$current_project $artifacts/trigger-build-2-default

trap "" EXIT