summaryrefslogtreecommitdiff
path: root/jfr_playoff/data/match/jfrhtml.py
blob: a86981137936f632ff305c0ef62cc7083b7aef85 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import copy
import re
from urlparse import urljoin

from jfr_playoff.data.match import MatchInfoClient
from jfr_playoff.remote import RemoteUrl as p_remote
from jfr_playoff.logger import PlayoffLogger


class JFRHtmlMatchInfo(MatchInfoClient):
    @property
    def priority(self):
        return 30

    def is_capable(self):
        return ('link' in self.settings) and ('#' not in self.settings['link'])

    def get_exceptions(self, method):
        return (TypeError, IndexError, KeyError, IOError, ValueError)

    def get_match_link(self):
        PlayoffLogger.get('match.jfrhtml').info(
            'match #%d link pre-defined: %s',
            self.settings['id'], self.settings['link'])
        return self.settings['link']

    def _find_table_row(self, url):
        html_content = p_remote.fetch(url)
        for row in html_content.select('tr tr'):
            for cell in row.select('td.t1'):
                if cell.text.strip() == str(self.settings['table']):
                    PlayoffLogger.get('match.jfrhtml').debug(
                        'HTML row for table %d found: %s',
                        self.settings['table'], row)
                    return row
        PlayoffLogger.get('match.jfrhtml').debug(
            'HTML row for table %d not found',
            self.settings['table'])
        return None

    def fetch_teams(self, teams):
        if self.settings['link'] is None:
            raise ValueError('link not set')
        row = self._find_table_row(self.settings['link'])
        if row is None:
            raise ValueError('table row not found')
        try:
            scores = [
                float(text) for text
                in row.select('td.bdc')[-1].contents
                if isinstance(text, unicode)]
        except ValueError:
            # single-segment match
            try:
                # running single-segment
                scores = [
                    float(text.strip()) for text
                    in row.select('td.bdcg a')[-1].contents
                    if isinstance(text, unicode)]
            except IndexError:
                try:
                    # static single-segment
                    scores = [
                        float(text.strip()) for text
                        in row.select('td.bdc a')[-1].contents
                        if isinstance(text, unicode)]
                except IndexError:
                    # toweled single-segment
                    scores = [0.0, 0.0]
            # carry-over
            carry_over = [
                float(text.strip()) if len(text.strip()) > 0 else 0.0 for text
                in row.select('td.bdc')[0].contents
                if isinstance(text, unicode)]
            if len(carry_over) < 2:
                # no carry-over, possibly no carry-over cells or empty
                carry_over = [0.0, 0.0]
            for i in range(0, 2):
                scores[i] += carry_over[i]
        team_names = []
        for link in row.select('a[onmouseover]'):
            texts = [text for text in link.contents if isinstance(text, unicode)]
            if texts:
                team_names.append(texts[0].strip(u'\xa0'))
        for i in range(0, 2):
            teams[i].name = [team_names[i]]
            teams[i].known_teams = 1
            teams[i].score = scores[i]
        PlayoffLogger.get('match.jfrhtml').info(
            'scores fetched: %s', teams)
        return teams


    def _has_segment_link(self, cell):
        links = [link for link in cell.select('a[href]')
                 if re.match(r'^.*\d+t\d+-\d+\.htm$', link['href'])]
        return len(links) > 0

    def _has_towel_image(self, cell):
        return len(cell.select('img[alt="towel"]')) > 0

    def _get_html_running_boards(self, cell):
        return int(cell.contents[-1].strip())

    def segment_board_count(self, segment_url):
        segment_content = p_remote.fetch(segment_url)
        board_rows = [
            row for row
            in segment_content.find_all('tr')
            if len(row.select('td.bdcc a.zb')) > 0]
        board_count = len(board_rows)
        played_boards = len([
            row for row
            in board_rows
            if len(''.join([
                    cell.text.strip()
                    for cell in row.select('td.bdc')])) > 0])
        return played_boards, board_count

    def _get_finished_info(self, cell):
        segment_link = cell.select('a[href]')
        if len(segment_link) > 0:
            segment_url = re.sub(
                r'\.htm$', '.html',
                urljoin(self.settings['link'], segment_link[0]['href']))
            try:
                played_boards, board_count = \
                    self.segment_board_count(segment_url)
                PlayoffLogger.get('match.jfrhtml').info(
                    'HTML played boards count for segment: %d/%d',
                    played_boards, board_count)
                return board_count, played_boards >= board_count
            except IOError as e:
                PlayoffLogger.get('match.jfrhtml').info(
                    'cannot fetch played boards count for segment: %s(%s)',
                    type(e).__name__, str(e))
                return 0, False
        return 0, False

    def board_count(self):
        row = self._find_table_row(self.settings['link'])
        if row is None:
            raise ValueError('table row not found')
        for selector in ['td.bdc', 'td.bdcg']:
            cells = row.select(selector)
            segments = [cell for cell in cells if self._has_segment_link(cell)]
            towels = [cell for cell in cells if self._has_towel_image(cell)]
            if len(segments) == 0:
                # in single-segment match,
                # there are no td.bdc cells with segment links,
                # but maybe it's a multi-segment match with towels
                if len(towels) > 0:
                    PlayoffLogger.get('match.jfrhtml').info(
                        'board count: all towels')
                    return 1, 1 # entire match is toweled, so mark as finished
            else:
                # not a single-segment match
                # no need to look for td.bdcg cells
                break
        if len(segments) == 0:
            raise ValueError('segments not found')
        running_segments = row.select('td.bdca')
        running_boards = sum([
            self._get_html_running_boards(segment)
            for segment
            in running_segments])
        finished_segments = []
        boards_in_segment = None
        for segment in segments:
            if segment not in running_segments:
                boards, is_finished = self._get_finished_info(segment)
                if is_finished:
                    finished_segments.append(segment)
                if boards_in_segment is None and boards > 0:
                    boards_in_segment = boards
        if 'bdcg' in segments[0]['class']:
            # only a single-segment match will yield
            # td.bdcg cells with segment scores
            total_boards = boards_in_segment
        else:
            PlayoffLogger.get('match.jfrhtml').info(
                'board count, found: %d finished segments, %d towels, ' \
                + '%d boards per segment and %d boards in running segment',
                len(finished_segments), len(towels),
                boards_in_segment, running_boards)
            total_boards = (
                len(segments) + len(towels) + len(running_segments)) \
                * boards_in_segment
        played_boards = (len(towels) + len(finished_segments)) \
            * boards_in_segment \
            + running_boards
        PlayoffLogger.get('match.jfrhtml').info(
            'board count: %d/%d', played_boards, total_boards)
        return played_boards, total_boards

    def running_link(self):
        row = self._find_table_row(self.settings['link'])
        running_link = row.select('td.bdcg a[href]')
        if len(running_link) == 0:
            raise ValueError('running link not found')
        PlayoffLogger.get('match.jfrhtml').info(
            'fetched running link from HTML: %s', running_link)
        match_link = urljoin(self.settings['link'], running_link[0]['href'])
        try:
            boards_played, board_count = self.segment_board_count(
                re.sub('\.htm$', '.html', match_link))
        except Exception as e:
            boards_played = 0
        if not boards_played:
            PlayoffLogger.get('match.jfrhtml').info(
                'running link is not live - reverting to match link (%s)',
                self.settings['link'])
            match_link = self.settings['link']
        return match_link