mt-job-parser.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225

#!/usr/bin/python3

import sys
import os
import argparse
import requests
import json
from datetime import timezone
import datetime
import math
from pprint import pprint
from operator import itemgetter
import threading
import gspread
from bs4 import BeautifulSoup
import statistics
import urllib.request

MBEDTLS_PR_HEAD_JOB_URL = f"https://ci.staging.trustedfirmware.org/job/mbed-tls-pr-head/view/change-requests/api/json"
nightly_job = { "name": "mbed-tls-nightly-tests",
                "url": "https://ci.staging.trustedfirmware.org/view/Mbed-TLS/job/mbed-tls-nightly-tests/" }

hr_sec = (60 * 60)
nightly_jobs = {"development": [], "mbedtls-2.28": []}
nj_result = {"ABORTED": "Timeout",
             "FAILURE": "Failed",
             "SUCCESS": "Pass"}
time_trigger_jobs = []
interactive_jobs = []
gs_color = {"FAILURE": {"backgroundColor": {"red": 1}},
            "SUCCESS": {"backgroundColor": {"green": 1}},
            "ABORTED": {"backgroundColor": {"red": 0.8, "green": 0.8, "blue": 0.8}}}

def parse_job(job_url, period):
    api_url = job_url + "api/json"
    job_info = json.loads(requests.get(api_url).text)
    job_name = job_url.split('/')[-2]
#    print("Start {} thread".format(job_name))

    for build in job_info["builds"]:
        try:
            url = build["url"] + "api/json"
            build_info = json.loads(requests.get(url).text)
            # Check if this build was done in the past 24H
            build_time = math.floor(build_info["timestamp"]/1000)
            # Only collect the test that had been finished and during the period
            if build_time > period:
                if build_info["inProgress"] == False:
                    # print(json.dumps(build, indent=4))
                    duration_sec = math.floor(build_info["duration"]/1000)
                    build_des = build_info["actions"][0]["causes"][-1]["shortDescription"]
                    build_job = {"Name": job_name,
                                 "Build_No": build_info["number"],
                                 "URL": build_info["url"],
                                 "Result": build_info["result"],
                                 "Duration_hr": sec_to_hr(duration_sec),
                                 "Duration_sec": duration_sec,
                                 "Total_waiting_time": get_total_waiting_time(build_info["url"]),
                                 "Build-time": "{}".format(datetime.datetime.utcfromtimestamp(math.floor(build_info["timestamp"]/1000)))}
                    print(build_job)
                    if "PR-" in job_name:
                        # PR merge jobs
                        if "Branch indexing" in build_des:
                            time_trigger_jobs.append(build_job)
                        else:
                            interactive_jobs.append(build_job)
                    else:
                        # nightly build job
                        # only parse time trigger nightly build jobs
                        if "Started by timer" in build_des:
                            build_branch = build_des.replace('}', '').split('=')[1]
                            nightly_jobs[build_branch].append(build_job)
            else:
                break
        except:
            print("Can not parse job: {}-{}.\n URL: {}".format(job["name"], build_info["number"], url))
            pass

def get_total_waiting_time(job_url):
    timing_page_url = f"{job_url}timings/"

    response = requests.get(timing_page_url)
    bs = BeautifulSoup(response.text, "html.parser")

    for row in bs.findAll('tr'):
        if "Total" in row.getText():
            twt = row.findAll('td')[-1].getText()

    return twt

def sec_to_hr(job_sec):
    job_hr = math.floor(job_sec / hr_sec)
    quarter_hr = round((job_sec - (job_hr * hr_sec))/hr_sec, 2)

    if job_sec == 0:
        remainder = 0
    else:
        if quarter_hr <= 0.25:
            remainder = 0.25
        elif quarter_hr <= 0.5:
            remainder = 0.5
        elif quarter_hr <= 0.75:
            remainder = 0.75
        else:
            remainder = 1

    return (job_hr + remainder)
 
def get_median_avg_value(job_list):
    buildtime_hr = []

    if len(job_list) == 0:
        return {"median": 0, "avg": 0}
    total_sec = sum([j["Duration_sec"] for j in job_list])
    buildtime_hr = [ j["Duration_hr"] for j in job_list ]

    return {"median": statistics.median(buildtime_hr), "avg": sec_to_hr(total_sec/len(job_list))}

def update_gsheet(period_date):
    date = period_date.strftime("%Y/%m/%d")
    new_row = [date, "", "", ""]
    nj_builds = nightly_jobs.keys()

    print("Update google spreadsheet")
    for n in nj_builds:
        if len(nightly_jobs[n]) == 0:
            b = ["NOT_BUILT", ""]
        else:
            nj = nightly_jobs[n][0]
            b = [f"#{nj['Build_No']} {nj_result[nj['Result']]} ({nj['Duration_hr']} hr)", nj["Total_waiting_time"]]
        new_row.extend(b)
    for j in [time_trigger_jobs, interactive_jobs]:
        if len(j) == 0:
            pr_build = "NOT_BUILD"
        else:
            v = get_median_avg_value(j)
            n_builds = len([b for b in j if b["Result"] == "NOT_BUILT"])
            f_builds = len([b for b in j if b["Result"] == "FAILURE"])
            p_builds = len([b for b in j if b["Result"] == "SUCCESS"])
            pr_build = f"{v['median']} hr / {v['avg']} hr ({len(j)} builds: {p_builds}/{f_builds}/{n_builds})"
        new_row.append(pr_build)

    # Load google service account credential
    credential = "{}/g-service-account.json".format(os.path.dirname(os.path.realpath(__file__)))
    gs = gspread.service_account(filename = credential)
    sh = gs.open("Mbedtls jobs on the staging")
    ws = sh.get_worksheet(0)
    ws_update = ws.append_row(new_row)
    # Get the row number that we just append
    updated_row = ws_update["updates"]["updatedRange"].split(':')[1][1:]
    last_row = int(updated_row) - 1
    nj_cell = {"development":  f"E{updated_row}",
               "mbedtls-2.28": f"G{updated_row}"}
    for i in nj_builds:
        if len(nightly_jobs[i]) != 0:
            # Set the background color for the nightly jobs result
            print(f"Update cell {nj_cell[i]} background to color {gs_color[nightly_jobs[i][0]['Result']]}")
            ws.format(nj_cell[i], gs_color[nightly_jobs[i][0]["Result"]])
            # Add build link
            cell_content = ws.acell(nj_cell[i]).value
            ws.update_acell(nj_cell[i], f'=HYPERLINK("{nightly_jobs[i][0]["URL"]}", "{cell_content}")')
    # Copy the master node and executor info form the last row
    ws.copy_range(f'B{last_row}:D{last_row}', f'B{updated_row}:D{updated_row}')

def download_nightly_timestamp_file():
    for j in nightly_jobs:
        url = f"{nightly_jobs[j][0]['URL']}artifact/timestamps.json"
        file_name = f"timestamp-{nightly_jobs[j][0]['Build_No']}.json"
        print(f"Download {file_name} from {url}")
        try:
            urllib.request.urlretrieve(url, file_name)
        except:
            print(f"Can not download timestamp file from {url}!!")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Mbed TLS job parser')
    parser.add_argument("-d", "--day", type=int, default=1, help='Get the jobs of this period of days')
    parser.add_argument("-g", "--update-google-spreadsheet",
                        action='store_true', default=False, help='Update Google Spreadsheet')
    parser.add_argument("-t", "--download-timestamp-file",
                        action='store_true', default=False, help='Download nightly job timestamps.json file')

    days_period = parser.parse_args().day
    update_gs = parser.parse_args().update_google_spreadsheet
    download_timestamp = parser.parse_args().download_timestamp_file

    period_date = (datetime.datetime.now(timezone.utc) - datetime.timedelta(days = days_period))
    period = period_date.timestamp()

    print(f"-------------- Get {days_period} day of jobs from {period_date.strftime('%Y/%m/%d')} ---------------\n \
           Update Google spreadsheet: {update_gs}\n\t Download timestamp file: {download_timestamp}" )

    pr_head_jobs = json.loads(requests.get(MBEDTLS_PR_HEAD_JOB_URL).text)
    #print(json.dumps(pr_head_jobs, indent=4))
    build_jobs = pr_head_jobs["jobs"]
    build_jobs.append(nightly_job)
    threads = []

    for job in build_jobs:
        t = threading.Thread(target = parse_job, args = (job["url"], period))
        t.start()
        threads.append(t)

    print("Created {} threads".format(len(threads)))

    for t in threads:
        t.join()


    print("Time trigger: {} builds".format(len(time_trigger_jobs)))
    pprint(sorted(time_trigger_jobs, key=itemgetter("Duration_hr")))
    print("Interactive: {} builds".format(len(interactive_jobs)))
    pprint(sorted(interactive_jobs, key=itemgetter("Duration_hr")))

    print(f"Time Trigger: {get_median_avg_value(time_trigger_jobs)}")
    print(f"Interactive: {get_median_avg_value(interactive_jobs)}")
    pprint(nightly_jobs)

    if update_gs:
        update_gsheet(period_date)

    if download_timestamp:
        download_nightly_timestamp_file()

    print("----------------- END --------------------\n")