summaryrefslogtreecommitdiff
path: root/tcwg_kernel-bisect.sh
blob: 24a0917b448ec57b9c6dbfa75a114fe76461bb30 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
#!/bin/bash

set -ef -o pipefail

scripts=$(dirname $0)
. $scripts/jenkins-helpers.sh

# Relative artifacts are used for generation of manifests and reproduction
# instructions.
rel_artifacts=artifacts
artifacts=$(pwd)/$rel_artifacts

fresh_dir $artifacts "$artifacts/manifests/*" "$artifacts/jenkins/*"

# Process bisect-only args
convert_args_to_variables "$@"
shift "$SHIFT_CONVERTED_ARGS"

obligatory_variables bad_rev baseline_rev build_script ci_project ci_config

BUILD_URL="${BUILD_URL:-$(pwd)}"
reproduce_bisect="${reproduce_bisect:-false}"

# Process build args and record them in build-parameters.sh
convert_args_to_variables ^^ $reproduce_bisect %% $artifacts/manifests/build-parameters.sh "$@"
$reproduce_bisect || manifest_pop

obligatory_variables current_project

if [ x"$ci_project" = x"tcwg_kernel" ]; then
    obligatory_variables toolchain
fi

verbose="${verbose:-true}"

set -u

if $verbose; then set -x; fi

trap "eval \"echo ERROR at \${FUNCNAME[0]}:\${BASH_LINENO[0]}\" > $artifacts/failures" EXIT

# Build baseline that we are going to re-use to speed-up bisection.
# (This also confirms that infrastructure is OK.)
echo "Testing baseline_rev $baseline_rev (should be success)"
$build_script \
    ^^ $reproduce_bisect \
    %% $rel_artifacts/manifests/build-baseline.sh \
    @@ $rel_artifacts/manifests/build-parameters.sh \
    --mode "baseline" \
    --current_branch "$baseline_rev" \
    --reset_baseline true \
    --top_artifacts "$rel_artifacts/build-baseline" \
    --verbose "$verbose"

assert ! [ -f $artifacts/failures ]

cd $current_project

ln -f -s "build-baseline" "$artifacts/build-$baseline_rev"
ln -f -s "build-baseline.sh" "$artifacts/manifests/build-$baseline_rev.sh"
echo "$baseline_rev" >> $artifacts/good_revs

cat <<EOF | manifest_out
declare -g reproduce_bisect=true
declare -g bad_rev=$bad_rev
declare -g baseline_rev=$baseline_rev
EOF

# Bisect script.
#
# With this script we find the first commit that has regressed compared
# to baseline, but not, necessarily, the commit that caused regression in
# $bad_rev.  Consider the scenario:
# - rev_10 produced good result "2000" -- this is current baseline
# - rev_20 completely broke the build (say, result "10")
# - rev_22 fixed the build
# - rev_30 regressed the build to result "1000" -- this is the regression we
#   detected vs "2000" baseline.
#
# The script will identify rev_20 as the first failing commit, which will
# cause the baseline to be reset to rev_20 with metric "10".  When we then
# rebuild master (at rev_30) we will see a /progression/ from "10" to "1000",
# thus missing the regression of "2000" to "1000".
#
# To catch the "2000" to "1000" regression someone would need to manually
# trigger bisect between rev_22 and rev_30.
#
# TODO: We could skip revisions (exit 125) that are worse than metric
# for $bad_rev (result metric <1000 in the above scenario), so we would
# skip revisions between rev_20 and rev_22.  This might cause other edge
# cases to be handled sub-optimally, though.
cat > $artifacts/test.sh <<EOF
#!/bin/sh
rev=\$(git rev-parse HEAD)
cd ..
$build_script \
  ^^ $reproduce_bisect \
  %% $rel_artifacts/manifests/build-\$rev.sh \
  @@ $rel_artifacts/manifests/build-parameters.sh \
  --mode bisect \
  --top_artifacts $rel_artifacts/build-\$rev \
  --verbose "$verbose" &
res=0 && wait \$! || res=\$?
if [ -f $rel_artifacts/build-\$rev/failures ]; then
  echo "\$rev" >> $rel_artifacts/bad_revs
  exit 1
elif [ x"\$res" != x"0" ]; then
  echo "\$rev" >> $rel_artifacts/skipped_revs
  exit 125
else
  echo "\$rev" >> $rel_artifacts/good_revs
  exit 0
fi
EOF
chmod +x $artifacts/test.sh

# Workaround linux-next/master rebasing on top of linux-next/stable.
# Try to find $good_rev that is "good" compared to baseline
# and that is an ancesstor of both $baseline_rev and $bad_rev.
merge_base=$(git merge-base $bad_rev $baseline_rev)
origin=$(git remote get-url origin)
origin=$(basename "$origin")
if [ x"$merge_base" != x"$baseline_rev" -a x"$origin" = x"linux-next.git" ]; then
    # Try to use merge_base (just like git-bisect).
    try_revs=($merge_base)

    # Check if we can use linux-next/stable as our 2nd try.
    linux_next_stable="${linux_next_stable-$(git rev-parse refs/remotes/origin/stable)}"
    cat <<EOF | manifest_out
declare -g linux_next_stable=$linux_next_stable
EOF
    try_revs+=($linux_next_stable)

    # Check if we can use linux-mainline/master as our last resort.
    url=git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
    git remote add mainline "$url" > /dev/null 2>&1 || true
    git remote set-url mainline "$url"
    clone_or_update_repo . refs/remotes/mainline/master "$url"
    mainline_master="${mainline_master-$(git merge-base $bad_rev HEAD)}"
    cat <<EOF | manifest_out
declare -g mainline_master=$mainline_master
EOF
    try_revs+=($mainline_master)

    for good_rev in "${try_revs[@]}"; do
	echo "Linux-next rebase workaround: testing potential good_rev $good_rev (hoping for success)"
	git checkout --detach "$good_rev"
	$artifacts/test.sh &
	res=0 && wait $! || res=$?
	if [ x"$res" = x"0" ]; then
	    break
	fi
    done

    if [ x"$res" != x"0" ]; then
	# Below bisect will fail :-(
	echo "Linux-next rebase workaround: could not find a good parent, below bisect will fail"
	good_rev=$baseline_rev
    fi
else
    good_rev=$baseline_rev
fi

ln -f -s "build-$good_rev" "$artifacts/build-good"
ln -f -s "build-$good_rev.sh" "$artifacts/manifests/build-good.sh"

git checkout --detach $bad_rev
$artifacts/test.sh &
res=0 && wait $! || res=$?

if [ x"$res" = x"0" ]; then
    echo "WARNING: build for bad_rev $bad_rev succeeded"
    # Build job had a spurious failure.  Re-try.
    cat > $artifacts/trigger-2-build-master <<EOF
current_project=$current_project
current_branch=$bad_rev
EOF
    rm -f $artifacts/jenkins/mail-recipients.txt
    trap "" EXIT
    exit 0
fi

ln -f -s "build-$bad_rev" "$artifacts/build-bad"
ln -f -s "build-$bad_rev.sh" "$artifacts/manifests/build-bad.sh"

git bisect start $bad_rev $good_rev 2>&1 | tee $artifacts/bisect.log

# "git bisect run" can fail (exit with non-zero) in a number of cases:
# - on trivial bisects (e.g., between HEAD^ and HEAD),
# - when merge-base between baseline and bad is worse than baseline,
# - something else?
# In all these cases we want to reset baseline to HEAD, so that we catch
# most of the commits that introduced change in the result metric.
git bisect run $artifacts/test.sh 2>&1 | tee -a $artifacts/bisect.log &
res=0 && wait $! || res=$?

if [ x"$res" = x"0" ]; then
    first_bad=$(cat .git/BISECT_RUN | head -n 1 | grep "is the first bad commit" | cut -d" " -f 1)
    assert [ x"$first_bad" != x"" ]

    for last_good in $(git rev-parse $first_bad^@); do
	if grep -q $last_good "$artifacts/good_revs"; then
	    # One of immediate parrents of $first_bad tested good.
	    break
	fi
	last_good=""
    done
    if [ x"$last_good" = x"" ]; then
	# None of immediate parents of $first_bad tested as good.
	# It seems that git-bisect assumes parent commit as "good" on
	# the basis of one of its children being "good".  Therefore we
	# can have a situation when we have parent P with children C1 and C2,
	# and child C1 has a child of its own CB.  Git-bisect tests C2 as
	# "good", and CB as "bad".  From C2 being good it assumes P as "good",
	# and it knows CB is "bad", so git-bisect returns C1 as the first bad
	# commit.
	# To simplify investigations we explicitly test parent of $first_bad.
	res=0
	for last_good in $(git rev-parse $first_bad^@); do
	    if grep -q $last_good "$artifacts/bad_revs" "$artifacts/skipped_revs"; then
		echo "First_bad's parent $last_good has tested bad or skipped"
		last_good=""
		continue
	    fi
	    if [ x"$(git merge-base $merge_base $last_good)" != x"$merge_base" ]; then
		echo "First_bad's parent $last_good is outside of bisect scope"
		last_good=""
		continue
	    fi
	    echo "Testing first_bad's parent $last_good (hoping for success)"
	    git checkout --detach "$last_good"
	    $artifacts/test.sh &
	    res=0 && wait $! || res=$?
	    if [ x"$res" = x"0" ]; then
		break
	    fi
	done
	if [ x"$res" != x"0" ]; then
	    # It seems $last_good was on a path that tested good, even though
	    # it itself is bad.  We re-trigger the bisection job with updated
	    # parameters.
	    # We need to be careful to avoid re-trigger loops, so verify that
	    # last_good is an ancestor of bad_rev.
	    assert [ x"$(git rev-list $last_good ^$bad_rev)" = x"" ]
	    cat > $artifacts/trigger-0-bisect-again <<EOF
current_project=$current_project
baseline_rev=$baseline_rev
bad_rev=$last_good
EOF
	    # Don't send any emails.
	    echo > $artifacts/jenkins/mail-recipients.txt
	    touch $artifacts/jenkins/build-name
	    sed -i -e "s/\$/-retry/" $artifacts/jenkins/build-name
	    trap "" EXIT
	    exit 0
	fi
    fi

    echo $first_bad > $artifacts/first-bad
else
    first_bad=$(git rev-parse HEAD)
    if ! [ -f .git/BISECT_LOG ]; then
	# It seems this was a trivial bisect with $bad_rev^ == $good_rev.
	first_bad=$bad_rev
	last_good=$(git rev-parse $first_bad^)
	assert [ x"$last_good" = x"$good_rev" ]

	echo $first_bad > $artifacts/first-bad
    fi
fi
cd ..

# Save BISECT_* logs
mkdir $artifacts/git-logs
find "$current_project" -path "$current_project/.git/BISECT_*" -print0 | xargs -0 -I@ mv @ $artifacts/git-logs/

if [ -f $artifacts/first-bad ]; then
    mkdir -p $artifacts/jenkins
    touch $artifacts/jenkins/build-name
    sed -i -e "s/\$/-$first_bad/" $artifacts/jenkins/build-name

    ln -f -s "build-$first_bad" "$artifacts/build-first_bad"
    ln -f -s "build-$first_bad.sh" "$artifacts/manifests/build-first_bad.sh"

    good_name="last_good"
    good_sha1="$last_good"
    bad_name="first_bad"
    bad_sha1="$first_bad"

    ln -f -s "build-$last_good" "$artifacts/build-last_good"
    ln -f -s "build-$last_good.sh" "$artifacts/manifests/build-last_good.sh"

    cat >> $artifacts/jenkins/mail-body.txt <<EOF
Successfully identified regression in CI configuration $ci_config for $current_project between bad_rev $bad_rev and baseline_rev $baseline_rev .

Culprit:
<cut>
$(git -C $current_project log -n 1 $first_bad)
</cut>

EOF
else
    good_name="good_rev"
    good_sha1="$good_rev"
    bad_name="bad"
    bad_sha1="$bad_rev"
    cat >> $artifacts/jenkins/mail-body.txt <<EOF
Could not identify regression in CI configuration $ci_config for $current_project between bad_rev $bad_rev and baseline_rev $baseline_rev down to a single commit.  See 'Bisect log' in the links below for bisection details.

EOF
fi

cat >> $artifacts/jenkins/mail-body.txt <<EOF
Results regressed from (for $good_name == $good_sha1)

<cut>
$(cat $artifacts/build-$good_sha1/results)
</cut>

to (for $bad_name == $bad_sha1)

<cut>
$(cat $artifacts/build-$bad_sha1/results)
</cut>

Artifacts of $good_name build: ${BUILD_URL}artifact/$rel_artifacts/build-$good_sha1/
Artifacts of $bad_name build: ${BUILD_URL}artifact/$rel_artifacts/build-$bad_sha1/

Reproduce builds:
<cut>
mkdir investigate-$current_project-$bad_sha1
cd investigate-$current_project-$bad_sha1

git clone https://git.linaro.org/toolchain/jenkins-scripts

mkdir -p $rel_artifacts/manifests
curl -o $rel_artifacts/manifests/build-baseline.sh ${BUILD_URL}artifact/$rel_artifacts/manifests/build-baseline.sh
curl -o $rel_artifacts/manifests/build-parameters.sh ${BUILD_URL}artifact/$rel_artifacts/manifests/build-parameters.sh
curl -o $rel_artifacts/test.sh ${BUILD_URL}artifact/$rel_artifacts/test.sh
chmod +x $rel_artifacts/test.sh

# Reproduce the baseline build (build all pre-requisites)
$build_script @@ $rel_artifacts/manifests/build-baseline.sh

cd $current_project

# Reproduce $bad_name build
git checkout --detach $bad_sha1
../$rel_artifacts/test.sh

# Reproduce $good_name build
git checkout --detach $good_sha1
../$rel_artifacts/test.sh

cd ..
</cut>

History of pending regressions and results: https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/ci/$ci_project/$ci_config

Bisect log: ${BUILD_URL}artifact/$rel_artifacts/bisect.log/*view*/
Artifacts: ${BUILD_URL}artifact/$rel_artifacts/
Build URL: $BUILD_URL
Build log: ${BUILD_URL}consoleText
EOF

if [ -f $artifacts/first-bad ]; then
    cat >> $artifacts/jenkins/mail-body.txt <<EOF

Full commit:
<cut>
$(git -C $current_project show --stat --patch $first_bad | head -n 1000)
</cut>
EOF
fi

# Set mail recipients last to preserve catch-error value from .yaml file.
# Email developers.
CI_MAIL_RECIPIENTS="tcwg-validation@linaro.org"
if [ x"$ci_project" = x"tcwg_kernel" ]; then
    case "$toolchain:$current_project" in
	gnu:*) CI_MAIL_RECIPIENTS="$CI_MAIL_RECIPIENTS, christophe.lyon@linaro.org, maxim.kuvyrkov@linaro.org" ;;
	llvm:linux) CI_MAIL_RECIPIENTS="$CI_MAIL_RECIPIENTS, arnd@linaro.org, mark.brown@linaro.org, ndesaulniers@google.com, trong@google.com" ;;
	llvm:llvm) CI_MAIL_RECIPIENTS="$CI_MAIL_RECIPIENTS, adhemerval.zanella@linaro.org, maxim.kuvyrkov@linaro.org, ndesaulniers@google.com, trong@google.com, yvan.roux@linaro.org" ;;
    esac
fi
cat > $artifacts/jenkins/mail-recipients.txt <<EOF
$CI_MAIL_RECIPIENTS
EOF

# Reset baseline to the regressed commit so that we will catch subsequent
# regressions (worse than $bad_rev).
cat > $artifacts/trigger-1-reset-baseline <<EOF
current_project=$current_project
current_branch=$first_bad
reset_baseline=true
EOF

# Trigger master build now instead of waiting for next timed SCM trigger.
cat > $artifacts/trigger-2-build-master <<EOF
current_project=$current_project
EOF

trap "" EXIT