diff options
-rwxr-xr-x | check-link.py | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/check-link.py b/check-link.py new file mode 100755 index 0000000..d09fd06 --- /dev/null +++ b/check-link.py @@ -0,0 +1,60 @@ +#!/usr/bin/python + +import re +import requests +import subprocess +import sys + +OUTPUT_FILE = "./link_check_result.txt" + +argv = sys.argv +argv[0] = 'linkchecker' +if '-F' in argv: + argv[argv.index('-F')+1] = "text" +else: + argv.insert(-1, '-F') + argv.insert(-1, 'text') + +# Run linkchecker here +ret = subprocess.call(argv) +if ret == 2: + exit(1) + +# Have the final output of linkchecker +ifile = open('./linkchecker-out.txt', 'r') +rest_str = ifile.read() +ifile.close() +ofile = open(OUTPUT_FILE, 'w') + +PATTERN = '^URL.*$\n(^Name.*$\n)?(^Parent URL.*$\n)?^Real URL\\s+(?P<real_url>.*)$\n^Check time\\s+.*$\n(^Size.*$\n)?^Result\\s+Error:\\s+(?P<reason>ConnectionError:.*)$\n^$\n' +pattern = re.compile(PATTERN, re.MULTILINE) +checked_url = [] +match_cnt = 0 +match = pattern.search(rest_str) +while (match): + match_cnt += 1 + match_dict = match.groupdict() + match_str = rest_str[match.start():match.end()] + if match.start() != 0: + ofile.write(rest_str[:match.start()]) + # test the URL + if match_dict['real_url'] not in checked_url: + checked_url.append(match_dict['real_url']) + print("Checking %s" % match_dict['real_url']) + try: + req = requests.get(match_dict['real_url']) + print("Req status code: %d" % req.status_code) + if req.status_code != 200: + print("Got Error: %d %s" % (req.status_code, req.reason)) + wstr = match_str.replace(match_dict['reason'], "%d %s" % (req.status_code, req.reason)) + ofile.write(wstr) + except Exception as e: + ofile.write(match_str) + match_cnt -= 1 + rest_str = rest_str[match.end():] + match = pattern.search(rest_str) + +ofile.write(rest_str) +ofile.close() +print("--------------------------") +print("Fix %d ConnectionError\nFinal report: %s" % (match_cnt, OUTPUT_FILE)) |