summaryrefslogtreecommitdiff
path: root/check-link.py
blob: d09fd062801c549db3e62b7a14a8a60cdba61887 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/python

import re
import requests
import subprocess
import sys

OUTPUT_FILE = "./link_check_result.txt"

argv = sys.argv
argv[0] = 'linkchecker'
if '-F' in argv:
    argv[argv.index('-F')+1] = "text"
else:
    argv.insert(-1, '-F')
    argv.insert(-1, 'text')

# Run linkchecker here
ret = subprocess.call(argv)
if ret == 2:
    exit(1)

# Have the final output of linkchecker
ifile    = open('./linkchecker-out.txt', 'r')
rest_str = ifile.read()
ifile.close()
ofile    = open(OUTPUT_FILE, 'w')

PATTERN = '^URL.*$\n(^Name.*$\n)?(^Parent URL.*$\n)?^Real URL\\s+(?P<real_url>.*)$\n^Check time\\s+.*$\n(^Size.*$\n)?^Result\\s+Error:\\s+(?P<reason>ConnectionError:.*)$\n^$\n'
pattern = re.compile(PATTERN, re.MULTILINE)
checked_url = []
match_cnt   = 0
match = pattern.search(rest_str)
while (match):
    match_cnt += 1
    match_dict = match.groupdict()
    match_str  = rest_str[match.start():match.end()]
    if match.start() != 0:
        ofile.write(rest_str[:match.start()])
    # test the URL 
    if match_dict['real_url'] not in checked_url:
        checked_url.append(match_dict['real_url'])
        print("Checking %s" % match_dict['real_url'])
        try:
            req = requests.get(match_dict['real_url'])
            print("Req status code: %d" % req.status_code)
            if req.status_code != 200:
                print("Got Error: %d %s" % (req.status_code, req.reason))
                wstr = match_str.replace(match_dict['reason'], "%d %s" % (req.status_code, req.reason))
                ofile.write(wstr)
        except Exception as e:
            ofile.write(match_str)
            match_cnt -= 1
    rest_str = rest_str[match.end():]
    match = pattern.search(rest_str)

ofile.write(rest_str)
ofile.close()
print("--------------------------")
print("Fix %d ConnectionError\nFinal report: %s" % (match_cnt, OUTPUT_FILE))