import difflib, json, sys import requests from bs4 import BeautifulSoup as bs with open(sys.argv[1]) as config_file: config = json.load(config_file) def get_file(urlpart, prefixed=True): url = config['source']['path'].strip('/') + '/' if prefixed: url += config['source']['prefix'] url += urlpart r = requests.get(url) r.raise_for_status() r.encoding = 'utf8' return r.text def check_round_date(round_no, round_date): round_html = get_file('runda%d.html' % round_no) round_content = bs(round_html, 'lxml') date_cell = round_content.select('td.bdnt12')[0].text.split('\xa0\xa0') if date_cell[1] != round_date: print('Round date for round %d differs: expected "%s" got "%s"' % ( round_no, round_date, date_cell[1])) def check_dates(): for i in range(0, len(config['round_dates'])): check_round_date(i+1, config['round_dates'][i]) def check_logoh(): if 'custom_file' in config['logoh']: html_logoh = get_file(config['logoh']['custom_file'], prefixed=False) else: html_logoh = get_file('logo.html') with open('config/' + config['logoh']['template']) as logoh_file: template_logoh = logoh_file.read() for var, val in config['logoh'].get('variables', {}).items(): template_logoh = template_logoh.replace('%' + var + '%', val) d = difflib.Differ() for diff in d.compare( [l.strip() for l in template_logoh.splitlines()], [l.strip() for l in html_logoh.splitlines()] ): if not diff.startswith(' '): print(diff) def check_vp_table(): with open('config/' + config['vp_table']) as vp_file: vp_table = [l.strip().split(' - ') for l in vp_file.readlines() if l.strip()] vp_html = bs(get_file('vptable.html'), 'lxml') vp_row = 0 for row in vp_html.select('tr'): imp_cells = row.select('td.bdc1') vp_cells = row.select('td.bdc') if len(imp_cells) == len(vp_cells) and len(imp_cells) > 0: if imp_cells[0].parent == row: for i in range(0, len(imp_cells)): if (imp_cells[i].text.strip().replace('\xa0', ' ') != vp_table[vp_row][0].strip()) or \ (vp_cells[i].text.strip().replace('\xa0', ' ') != vp_table[vp_row][1].strip()): print('VP table differs: expected (%s - %s), got (%s - %s)' % ( *vp_table[vp_row], imp_cells[i].text.strip(), vp_cells[i].text.strip())) vp_row += 1 def check_page_language(): leaderboard = bs(get_file('leaderb.html'), 'lxml') place_str = leaderboard.select('tr > td.bdcc12')[0].text if place_str != config['language']: print('Page language does not match: expected "%s", got "%s"' % ( config['language'], place_str)) def get_cezar_team(team_id): r = requests.get('https://msc.com.pl/cezar/?p=213&action=1&id=%d' % (team_id)) r.raise_for_status() r.encoding = 'utf8' cezar_html = bs(r.text, 'lxml') title = cezar_html.select('p.msc_pagetitle')[0].contents[0].strip().replace('Drużyna ', '') return title def get_cezar_names(): with open(config['team_names']) as team_file: teams_lines = team_file.readlines() teams = {} for t in teams_lines: t = t.strip() if t: t = t.split() teams[int(t[0])] = int(t[1]) return { team_no: get_cezar_team(team_id) for team_no, team_id in teams.items() } def get_html_names(): leaderboard = bs(get_file('leaderb.html'), 'lxml') cells = leaderboard.select('td.bd') teams = {} for cell in cells: links = cell.select('a[onmouseover][href]') for link in links: if link['href'].startswith(config['source']['prefix'] + 'T'): team_id = int(link['href'].replace(config['source']['prefix'] + 'T', '').split('.')[0]) team_name = link.text.strip() teams[team_id] = team_name return teams def check_team_names(): cezar_names = get_cezar_names() html_names = get_html_names() for team_id, team_name in cezar_names.items(): if html_names[team_id] != cezar_names[team_id]: print('Team name differs: expected "%s", got "%s"' % (cezar_names[team_id], html_names[team_id])) print('Checking %s' % (sys.argv[1])) check_dates() check_logoh() check_vp_table() check_page_language() check_team_names()