import difflib, io, json, sys import requests from bs4 import BeautifulSoup as bs with open(sys.argv[1]) as config_file: config = json.load(config_file) def get_file(urlpart, prefixed=True): url = config['source']['path'].strip('/') + '/' if prefixed: url += config['source']['prefix'] url += urlpart r = requests.get(url) r.raise_for_status() r.encoding = 'utf8' return r.text def check_round_date(round_no, round_date): round_html = get_file('runda%d.html' % round_no) round_content = bs(round_html, 'lxml') date_cell = round_content.select('td.bdnt12')[0].text.split('\xa0\xa0') if date_cell[1] != round_date: print('\tRound date for round %d differs: expected "%s" got "%s"' % ( round_no, round_date, date_cell[1])) def check_dates(): for i in range(0, len(config['round_dates'])): check_round_date(i+1, config['round_dates'][i]) def print_diff(str_1, str_2): d = difflib.Differ() for diff in d.compare( [l.strip() for l in str_1.splitlines()], [l.strip() for l in str_2.splitlines()] ): if not diff.startswith(' '): print('\t' + diff) def check_logoh(): if 'custom_file' in config['logoh']: html_logoh = get_file(config['logoh']['custom_file'], prefixed=False) else: html_logoh = get_file('logo.html') with open('config/' + config['logoh']['template']) as logoh_file: template_logoh = logoh_file.read() for var, val in config['logoh'].get('variables', {}).items(): template_logoh = template_logoh.replace('%' + var + '%', val) print_diff(template_logoh, html_logoh) def check_kolorki(): server_kolorki = get_file('css/kolorki.css', prefixed=False) with io.open('config/' + config['kolorki'], encoding='iso-8859-2') as kolorki_file: template_kolorki = kolorki_file.read() print_diff(template_kolorki, server_kolorki) def check_vp_table(): with open('config/' + config['vp_table']) as vp_file: vp_table = [l.strip().split(' - ') for l in vp_file.readlines() if l.strip()] vp_html = bs(get_file('vptable.html'), 'lxml') vp_row = 0 for row in vp_html.select('tr'): imp_cells = row.select('td.bdc1') vp_cells = row.select('td.bdc') if len(imp_cells) == len(vp_cells) and len(imp_cells) > 0: if imp_cells[0].parent == row: for i in range(0, len(imp_cells)): if (imp_cells[i].text.strip().replace('\xa0', ' ') != vp_table[vp_row][0].strip()) or \ (vp_cells[i].text.strip().replace('\xa0', ' ') != vp_table[vp_row][1].strip()): print('\tVP table differs: expected (%s - %s), got (%s - %s)' % ( *vp_table[vp_row], imp_cells[i].text.strip(), vp_cells[i].text.strip())) vp_row += 1 def check_page_language(): leaderboard = bs(get_file('leaderb.html'), 'lxml') place_str = leaderboard.select('tr > td.bdcc12')[0].text if place_str != config['language']: print('\tPage language does not match: expected "%s", got "%s"' % ( config['language'], place_str)) def get_cezar_data(team_id): r = requests.get('https://msc.com.pl/cezar/?p=213&action=1&id=%d' % (team_id)) r.raise_for_status() r.encoding = 'utf8' cezar_html = bs(r.text, 'lxml') title = cezar_html.select('p.msc_pagetitle')[0].contents[0].strip().replace('Drużyna ', '').replace('\xa0', ' ').replace(' ', ' ') player_ids = [] for link in cezar_html.select('a[href]'): if link['href'].startswith('?p=21&pid='): player_ids.append(int(link['href'].replace('?p=21&pid=', ''))) return (title, player_ids) def get_cezar_names(): with open(config['team_names']) as team_file: teams_lines = team_file.readlines() teams = {} for t in teams_lines: t = t.strip() if t: t = t.split() teams[int(t[0])] = int(t[1]) return { team_no: get_cezar_data(team_id) for team_no, team_id in teams.items() } def get_html_names(): leaderboard = bs(get_file('leaderb.html'), 'lxml') cells = leaderboard.select('td.bd') teams = {} for cell in cells: links = cell.select('a[onmouseover][href]') for link in links: if link['href'].startswith(config['source']['prefix'] + 'T'): team_id = int(link['href'].replace(config['source']['prefix'] + 'T', '').split('.')[0]) team_name = link.text.strip() team_page = bs(get_file('T%d.html' % (team_id)), 'lxml') team_players = [] for link in team_page.select('a[href]'): if link['href'].startswith('http://www.msc.com.pl/cezar/?mycl=1&p=21&r='): team_players.append(int(link['href'].replace('http://www.msc.com.pl/cezar/?mycl=1&p=21&r=', ''))) teams[team_id] = (team_name, team_players) return teams def check_team_names(): cezar_names = get_cezar_names() html_names = get_html_names() for team_id, team_name in cezar_names.items(): if html_names[team_id][0] != cezar_names[team_id][0]: print('\tTeam name differs: expected "%s", got "%s"' % (cezar_names[team_id][0], html_names[team_id][0])) if not html_names[team_id][1]: print('\tNo player links found for team "%s"' % (html_names[team_id][0])) else: for pid in html_names[team_id][1]: if pid not in cezar_names[team_id][1]: print('\tPlayer #%d not a member of team "%s"' % (pid, html_names[team_id][0])) print('Checking %s' % (sys.argv[1])) for check in sys.argv[2:]: print(' ' + check) globals()['check_' + check]()