preseason/checks.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

import difflib, json, sys

import requests
from bs4 import BeautifulSoup as bs


with open(sys.argv[1]) as config_file:
    config = json.load(config_file)


def get_file(urlpart, prefixed=True):
    url = config['source']['path'].strip('/') + '/'
    if prefixed:
        url += config['source']['prefix']
    url += urlpart
    r = requests.get(url)
    r.raise_for_status()
    r.encoding = 'utf8'
    return r.text


def check_round_date(round_no, round_date):
    round_html = get_file('runda%d.html' % round_no)
    round_content = bs(round_html, 'lxml')
    date_cell = round_content.select('td.bdnt12')[0].text.split('\xa0\xa0')
    if date_cell[1] != round_date:
        print('Round date for round %d differs: expected "%s" got "%s"' % (
            round_no, round_date, date_cell[1]))


def check_dates():
    for i in range(0, len(config['round_dates'])):
        check_round_date(i+1, config['round_dates'][i])


def check_logoh():
    if 'custom_file' in config['logoh']:
        html_logoh = get_file(config['logoh']['custom_file'], prefixed=False)
    else:
        html_logoh = get_file('logo.html')
    with open('config/' + config['logoh']['template']) as logoh_file:
        template_logoh = logoh_file.read()
    for var, val in config['logoh'].get('variables', {}).items():
        template_logoh = template_logoh.replace('%' + var + '%', val)
    d = difflib.Differ()
    for diff in d.compare(
            [l.strip() for l in template_logoh.splitlines()],
            [l.strip() for l in html_logoh.splitlines()]
    ):
        if not diff.startswith(' '):
            print(diff)


def check_vp_table():
    with open('config/' + config['vp_table']) as vp_file:
        vp_table = [l.strip().split(' - ') for l in vp_file.readlines() if l.strip()]
    vp_html = bs(get_file('vptable.html'), 'lxml')
    vp_row = 0
    for row in vp_html.select('tr'):
        imp_cells = row.select('td.bdc1')
        vp_cells = row.select('td.bdc')
        if len(imp_cells) == len(vp_cells) and len(imp_cells) > 0:
            if imp_cells[0].parent == row:
                for i in range(0, len(imp_cells)):
                    if (imp_cells[i].text.strip().replace('\xa0', ' ') != vp_table[vp_row][0].strip()) or \
                       (vp_cells[i].text.strip().replace('\xa0', ' ')  != vp_table[vp_row][1].strip()):
                        print('VP table differs: expected (%s - %s), got (%s - %s)' % (
                            *vp_table[vp_row],
                            imp_cells[i].text.strip(), vp_cells[i].text.strip()))
                    vp_row += 1


def check_page_language():
    leaderboard = bs(get_file('leaderb.html'), 'lxml')
    place_str = leaderboard.select('tr > td.bdcc12')[0].text
    if place_str != config['language']:
        print('Page language does not match: expected "%s", got "%s"' % (
            config['language'],
            place_str))


def get_cezar_team(team_id):
    r = requests.get('https://msc.com.pl/cezar/?p=213&action=1&id=%d' % (team_id))
    r.raise_for_status()
    r.encoding = 'utf8'
    cezar_html = bs(r.text, 'lxml')
    title = cezar_html.select('p.msc_pagetitle')[0].contents[0].strip().replace('Drużyna ', '')
    return title


def get_cezar_names():
    with open(config['team_names']) as team_file:
        teams_lines = team_file.readlines()
    teams = {}
    for t in teams_lines:
        t = t.strip()
        if t:
            t = t.split()
            teams[int(t[0])] = int(t[1])
    return { team_no: get_cezar_team(team_id) for team_no, team_id in teams.items() }


def get_html_names():
    leaderboard = bs(get_file('leaderb.html'), 'lxml')
    cells = leaderboard.select('td.bd')
    teams = {}
    for cell in cells:
        links = cell.select('a[onmouseover][href]')
        for link in links:
            if link['href'].startswith(config['source']['prefix'] + 'T'):
                team_id = int(link['href'].replace(config['source']['prefix'] + 'T', '').split('.')[0])
                team_name = link.text.strip()
                teams[team_id] = team_name
    return teams


def check_team_names():
    cezar_names = get_cezar_names()
    html_names = get_html_names()
    for team_id, team_name in cezar_names.items():
        if html_names[team_id] != cezar_names[team_id]:
            print('Team name differs: expected "%s", got "%s"' % (cezar_names[team_id], html_names[team_id]))


print('Checking %s' % (sys.argv[1]))
check_dates()
check_logoh()
check_vp_table()
check_page_language()
check_team_names()