diff options
author | Mathieu Poirier <mathieu.poirier@linaro.org> | 2020-12-15 10:59:39 -0700 |
---|---|---|
committer | Mathieu Poirier <mathieu.poirier@linaro.org> | 2020-12-16 11:39:08 -0700 |
commit | 7ac865480d014bd2a3137196727bbffe4348af6e (patch) | |
tree | 994c93114d7dc5cb48ed894f9099f09c7d38b3cf |
Initial commit
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
-rw-r--r-- | README.txt | 133 | ||||
-rwxr-xr-x | get-mainline-commit | 48 | ||||
-rwxr-xr-x | get-mainline-feature | 50 | ||||
-rwxr-xr-x | negative-set | 45 | ||||
-rwxr-xr-x | patchnet-set | 53 | ||||
-rwxr-xr-x | patchnet-write | 19 | ||||
-rwxr-xr-x | positive-set | 60 | ||||
-rwxr-xr-x | testing-set | 50 |
8 files changed, 458 insertions, 0 deletions
diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..50627fb --- /dev/null +++ b/README.txt @@ -0,0 +1,133 @@ +# +# SPDX-License-Identifier: GPL-2.0 +# (c) 2020,2022, Mathieu Poirier <mathieu.poirier@linaro.org> +# + +1 ## Install GNU's parallel utility: + +mpoirier@xps15:~/work$ sudo apt-get install parallel + +2 ## Install git: + +mpoirier@xps15:~/work$ sudo apt-get git + +3 ## Add clone tools and add them to system PATH: + +mpoirier@xps15:~/work$ git clone https://git.linaro.org/people/mathieu.poirier/tooling.git +mpoirier@xps15:~/work$ cd tooling +mpoirier@xps15:~/work$ ls +mpoirier@xps15:~/work/kwg-523/test$ ls tooling/ +get-mainline-commit get-mainline-feature negative-set patchnet-set patchnet-write positive-set README.txt +mpoirier@xps15:~/work/tooling$ export PATH=$PATH:/home/mpoirier/work/tooling +mpoirier@xps15:~/work/tooling$ chmod 755 * + +4 ## Download the PatchNetTool model: + +mpoirier@xps15:~/work$ mkdir patchnet +mpoirier@xps15:~/work$ cd patchnet +mpoirier@xps15:~/work/patchnet$ git init . +mpoirier@xps15:~/work/patchnet$ git remote add hvdthong/PatchNetTool.git https://github.com/hvdthong/PatchNetTool.git + +mpoirier@xps15:~/work/patchnet$ git fetch hvdthong/PatchNetTool.git +mpoirier@xps15:~/work/patchnet$ git checkout -b PatchNetTool.git/master hvdthong/PatchNetTool.git/master + +5 ## Download the PatchNet_updated model: + +mpoirier@xps15:~/work$ mkdir patchnet_updated +mpoirier@xps15:~/work$ cd patchnet_updated +mpoirier@xps15:~/work/patchnet_updated$ git init . +mpoirier@xps15:~/work/patchnet_updated$ git remote add Xin-Zhou-smu/PatchNet_updated.git https://github.com/Xin-Zhou-smu/PatchNet_updated.git + +mpoirier@xps15:~/work/patchnet_updated$ git fetch Xin-Zhou-smu/PatchNet_updated.git +mpoirier@xps15:~/work/patchnet_updated$ git checkout -b PatchNet_updated.git/patch-1 Xin-Zhou-smu/PatchNet_updated.git/patch-1 + +6 ## Create kernel directory and download source: + +mpoirier@xps15:~/work$ mkdir kernel +mpoirier@xps15:~/work$ cd kernel +mpoirier@xps15:~/work/kernel$ git init . +mpoirier@xps15:~/work/kernel$ git remote add torvalds/linux.git git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git +mpoirier@xps15:~/work/kernel$ git remote +torvalds/linux.git +mpoirier@xps15:~/work/kernel$ git fetch torvalds/linux.git + +7 ## Checkout the latest version: + +mpoirier@xps15:~/work/kernel$ git branch -r +torvalds/linux.git/master +mpoirier@xps15:~/work/kernel$ git checkout -b torvalds/linux.git/master-v5.8 v5.8 +Switched to a new branch 'torvalds/linux.git/master-v5.8' + +8 ## Get a handle on the stable trees: + +IMPORTANT: For the time being the name used for the stable trees must be the +same as what follows. See script "positive-set" for details. + +mpoirier@xps15:~/work/kernel$ git remote add stable/linux-stable.git git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git +mpoirier@xps15:~/work/kernel$ git fetch stable/linux-stable.git +mpoirier@xps15:~/work/kernel$ git branch -r | grep linux-stable + stable/linux-stable.git/linux-2.6.11.y + stable/linux-stable.git/linux-2.6.12.y + stable/linux-stable.git/linux-2.6.13.y +... +... +... + stable/linux-stable.git/linux-5.5.y + stable/linux-stable.git/linux-5.6.y + stable/linux-stable.git/linux-5.7.y + stable/linux-stable.git/master + +9 ## Generate the positive training set by gathering all the patches that have + been backported to stable kernels: + +mpoirier@xps15:~/work/kernel$ positive-set positive.txt +(depending on the machine, this can take hours to complete) + +mpoirier@xps15:~/work/kernel$ wc -l positive.txt +44397 positive.txt + +10 ## Generate the negative training set using the output, i.e file positive.txt, + from the above command: + +mpoirier@xps15:~/work/kernel$ negative-set positive.txt negative.txt v5.0..v5.8 +(depending on the machine, this can take hours to complete) + +mpoirier@xps15:~/work/kernel$ wc -l negative.txt +82121 negative.txt + +11 ## Generate the input file to train PatchNetTool by using the output of the above + two commands: + +mpoirier@xps15:~/work/kernel$ patchnet-set -p positive.txt -n negative.txt train.txt +mpoirier@xps15:~/work/kernel$ wc -l train.txt +126518 training-set.txt + +12 ## Generate the input file to test new patches: + +mpoirier@xps15:~/work/kernel$ testing-set testing.txt v5.8..v5.9 + +13 ## Use the training and test sets throught the preprocessing phase: + +mpoirier@xps15:~/work$ cp kernel/train.txt patchnet/preprocessing/ +mpoirier@xps15:~/work$ cp kernel/test.txt patchnet/preprocessing/ +mpoirier@xps15:~/work$ cd patchnet +mpoirier@xps15:~/work/patchnet/preprocesing$ ./getinfo -j 36 --commit-list train.txt --git /home/linaro/kernel -o train +(Depeding on the machine and the size of the training set, this can take hours) +mpoirier@xps15:~/work/patchnet/preprocessing$ ./getinfo -j 36 --commit-list testing.txt --git /home/linaro/kernel -o testing +(Depeding on the machine and the size of the training set, this can take hours) + +14 ## Generate .pkl data from .out files: + +mpoirier@xps15:~/work/patchnet/preprocessing$ cp train.out testing.out ../../patchnet_updated/ +mpoirier@xps15:~/work/patchnet$ cd ../../patchnet_updated/ + +mpoirier@xps15:~/work/patchnet_updated$ python text2dict.py -text_path train.out -dict_path train.pkl + +mpoirier@xps15:~/work/patchnet_updated$ python text2dict.py -text_path 'testing.out' -dict_path 'test.pkl' + +mpoirier@xps15:~/work/patchnet_updated$ python generate_dict.py -text_path1 train.out -text_path2 testing.out -dict_path dict.pkl + +15 ## Train the model: + +mpoirier@xps15:~/work/patchnet_updated$ python main.py -train -train_data train.pkl -dictionary_data dict.pkl + diff --git a/get-mainline-commit b/get-mainline-commit new file mode 100755 index 0000000..7262e3f --- /dev/null +++ b/get-mainline-commit @@ -0,0 +1,48 @@ +#!/bin/bash +# +# SPDX-License-Identifier: GPL-2.0 +# (c) 2020,2022, Mathieu Poirier <mathieu.poirier@linaro.org> +# +# For each *stable* commit in range, output their mainline equivalent +# +# USAGE: +# $ git log --format="%H" v4.14..v4.14.186 | parallel --link get-mainline-commit :::: - ::: $1 +# or +# cat data.txt | parallel --link get-mainline-commit :::: - ::: output.file +# +# INPUT: +# $1: A git generated SHA1, i.e "bcf876870b95592b52519ed4aafcf9d95999bc9c" +# $2: Name of the output file +# + +# Needed if input is coming from a file +sha1=$(echo $1 | awk {'print $1'}) + +# tagged commits don't carry useful information, simply return +git describe --exact-match $sha1 &> /dev/null +if [ $? -eq 0 ]; then + exit 0 +fi + +while read line ; do + # Look for the upsream commit at the top of the changelog + if [ "$(echo $line | grep -i "upstream" | wc -l)" -eq 0 ]; then + continue + fi + + # Look for the SHA1, it is 40 character long + for word in $line; do + if [ ${#word} -eq 40 ]; then + # We have the SHA1, add it to the list only if it is + # not in there already + if [ "$(grep $word $2 | wc -l)" -eq 0 ]; then + git log -1 --format="%H %s" $word >> $2 + fi + + # No need to go further + exit 0 + fi + done +done < <(git log -1 --format="%b" $sha1) + +exit 0 diff --git a/get-mainline-feature b/get-mainline-feature new file mode 100755 index 0000000..c8ffd72 --- /dev/null +++ b/get-mainline-feature @@ -0,0 +1,50 @@ +#!/bin/bash +# +# SPDX-License-Identifier: GPL-2.0 +# (c) 2020,2022, Mathieu Poirier <mathieu.poirier@linaro.org> +# +# Find all the commits that: +# 1) Have a title that doesn't include any of the words in the dictionary +# 2) Have not been back ported to any of the stable trees +# +# USAGE: +# $ git log --no-merges --no-renames --format="%H" v5.6..v5.7 | parallel --link get-mainline-feature :::: - ::: stable.file ::: output.file +# $ cat data.txt | parallel --link get-mainline-feature :::: - ::: stable.file ::: output.file +# +# INPUT: +# $1: A git generated SHA1, i.e "bcf876870b95592b52519ed4aafcf9d95999bc9c" +# $2: Output file from the command "positive-set" +# $3: Name of the output file +# + +dictionary=(MAINTAINER tag defconfig dts selftest script doc + bindings kbuild kconfig coccinelle mailmap checkpatch) + +# Needed if input is coming from a file +sha1=$(echo $1 | awk {'print $1'}) + +subj=$(git log -1 --pretty="%s" $sha1) +if [ $? -gt 0 ]; then + exit 1 +fi + +# No point in going further if the commit is a tag +git describe --exact-match $sha1 &> /dev/null +if [ $? -eq 0 ]; then + exit 0 +fi + +# Skip patches with a title that includes one of the words in the dictionary +for token in "${dictionary[@]}"; do + if [ "$(echo $subj | grep -i $token | wc -l)" -ne 0 ]; then + exit 0 + fi +done + +# See if $sha is part of the stable set in $2 +if [ "$(grep $sha1 $2 | wc -l)" -eq 0 ]; then + # This patch looks like a feature, add it to the log + echo "$sha1 $subj" >> $3 +fi + +exit 0 diff --git a/negative-set b/negative-set new file mode 100755 index 0000000..14c8714 --- /dev/null +++ b/negative-set @@ -0,0 +1,45 @@ +#!/bin/bash +# +# SPDX-License-Identifier: GPL-2.0 +# (c) 2020,2022, Mathieu Poirier <mathieu.poirier@linaro.org> +# +# Finds all the commits within the commit range(s) that have not been pack +# ported to a stable kernel tree. +# +# USAGE: +# $ negative-set <postive.set> <outputfilename.out> <commit range> <commit range> ... +# +# INPUT: +# $1: Output file from the command "positive-set" +# $2: Name of the ouput file +# $3: A valid kernel commit range, i.e v5.0..v5.2 or bcf876870b95..65550098c1c4 +# + +if [ $# -lt 3 ]; then + echo "Script needs at least two arguments" + exit 1 +fi + +# Make a backup of the output file if it already exists +if [ -e "$2" ]; then + mv $2 $2.bkp +fi + +# Needed by grep command in get-mainline-delta +touch $2 + +declare -i count=1 + +for arg in "$@"; do + # Skip over the stable and filename argument + if [ $count -lt 3 ]; then + count+=1 + continue + fi + echo processing $arg + git log --no-merges --no-renames --format="%H" $arg | parallel --link get-mainline-feature :::: - ::: $1 ::: $2 +done + +exit + + diff --git a/patchnet-set b/patchnet-set new file mode 100755 index 0000000..5a4e7c4 --- /dev/null +++ b/patchnet-set @@ -0,0 +1,53 @@ +#!/bin/bash +# +# SPDX-License-Identifier: GPL-2.0 +# (c) 2020,2022, Mathieu Poirier <mathieu.poirier@linaro.org> +# +# Takes the content of the positive and negative set and builds an input +# set that can be processed by PatchNet model. The order of the positive +# and negative file sets matters. +# +# USAGE: +# $ patchnet-set -p positive.txt -n negative.txt output.txt (training set) +# $ patchnet-set negative.txt output.txt (evaluation set) +# +# INPUT: +# $1: Name of the positive set file generated by "positive-set" +# $2: Name of the negative set file generated by "negative-set" +# $3: Name of output file +# + +usage() { echo "usage: patchnet-set [-p <positive.txt>] -n <negative.txt> <output.txt>" 1>&2; exit 1; } + +while getopts ":p:n:" opt; do + case ${opt} in + n) negative=${OPTARG} + ;; + p) positive=${OPTARG} + ;; + *) + usage + ;; + esac +done +shift $((OPTIND-1)) + +if [ -z "${negative}" ]; then + usage +fi + +# Make a backup of the output file if it already exists +# After getopts above the last argument, i.e output.txt is in $1 +if [ -e "$1" ]; then + mv $1 $1.bkp +fi + +if [ -z "${positive}" ]; then + cat $negative | parallel --link patchnet-write :::: - ::: false ::: $1 +else + cat $positive | parallel --link patchnet-write :::: - ::: true ::: $1 + cat $negative | parallel --link patchnet-write :::: - ::: false ::: $1 +fi + +exit + diff --git a/patchnet-write b/patchnet-write new file mode 100755 index 0000000..e18a403 --- /dev/null +++ b/patchnet-write @@ -0,0 +1,19 @@ +#!/bin/bash +# +# SPDX-License-Identifier: GPL-2.0 +# (c) 2020,2022, Mathieu Poirier <mathieu.poirier@linaro.org> +# +# Outputs a SHA1 in a way that it can be digested by the PatchNet model +# +# USAGE: +# cat positive-set.txt | parallel --link patchnet-write :::: - ::: true ::: $1 +# cat negative-set.txt | parallel --link patchnet-write :::: - ::: false ::: $1 +# +# INPUT: +# $1: A git generated SHA1, i.e "39030e1351aa security: Add LSM hooks to set*gid syscalls" +# $2: true if $1 should be a stable patch, false otherwise +# $3: Name of the output file +# + +sha1=$(echo $1 | awk {'print $1'}) +echo $sha1: $2 >> $3 diff --git a/positive-set b/positive-set new file mode 100755 index 0000000..13dc57e --- /dev/null +++ b/positive-set @@ -0,0 +1,60 @@ +#!/bin/bash +# +# SPDX-License-Identifier: GPL-2.0 +# (c) 2020,2022, Mathieu Poirier <mathieu.poirier@linaro.org> +# +# Goes through all the commits in @stable and outputs their mainline equivalent +# Since the same commit can be found in more than one tree, the script also +# checks for redundency. +# +# USAGE: +# $ positive-set <outputfilename.out> +# +# INPUT: +# $1: Name of the output file +# + +if [ $# -ne 1 ]; then + echo "Script only takes one argument" + exit 1 +fi + +# Make a backup of the output file if it already exists +if [ -e "$1" ]; then + mv $1 $1.bkp +fi + +# Needed by 'grep' command in get-mainline-commit script +touch $1 + +stable=(stable/linux-stable.git/linux-5.4.y + stable/linux-stable.git/linux-4.19.y + stable/linux-stable.git/linux-4.14.y + stable/linux-stable.git/linux-4.9.y + stable/linux-stable.git/linux-4.4.y + stable/linux-stable.git/linux-3.16.y) + +for tree in "${stable[@]}"; do + # Get the top commit in each stable tree + top_commit=$(git log -1 --format="%H" $tree) + + # Theoretically the top commit has a tag - skip to the next + # tree if a tag can't be found. + $(git describe --exact-match $top_commit) &> /dev/null + if [ $? -eq 0 ]; then + echo "Skipping $tree, can't find stable tag" + continue + fi + + # Get the base and top commits + # $top_tag will end up being something like 4.18.23 + # $base_tag will end up being something like 4.18 + top_tag=$(git describe --exact-match $top_commit) + base_tag=${top_tag%.*} + + echo Processing $base_tag to $top_tag + + # For each commit, get the mainline equivalent + git log --format="%H" $base_tag..$top_tag | parallel --link get-mainline-commit :::: - ::: $1 +done + diff --git a/testing-set b/testing-set new file mode 100755 index 0000000..0235a52 --- /dev/null +++ b/testing-set @@ -0,0 +1,50 @@ +#!/bin/bash +# +# SPDX-License-Identifier: GPL-2.0 +# (c) 2020,2022, Mathieu Poirier <mathieu.poirier@linaro.org> +# +# Finds all the commits within the commit range(s) that have not been pack +# ported to a stable kernel tree. +# +# USAGE: +# $ testing-set <outputfilename.out> <commit range> <commit range> ... +# +# INPUT: +# $1: Name of the ouput file +# $2: A valid kernel commit range, i.e v5.0..v5.2 or bcf876870b95..65550098c1c4 +# + +empty=$(mktemp) +commits=$(mktemp) + +if [ $# -lt 2 ]; then + echo "Script needs at least two arguments" + exit 1 +fi + +# Make a backup of the output file if it already exists +if [ -e "$1" ]; then + mv $1 $1.bkp +fi + +touch $empty $commits + +declare -i count=1 + +for arg in "$@"; do + # Skip over the filename argument + if [ $count -lt 2 ]; then + count+=1 + continue + fi + echo processing $arg + git log --no-merges --no-renames --format="%H" $arg | parallel --link get-mainline-feature :::: - ::: $empty ::: $commits +done + +cat $commits | parallel --link patchnet-write :::: - ::: false ::: $1 + +rm $empty $commits + +exit + + |