1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
|
import copy
import re
from urlparse import urljoin
from jfr_playoff.data.match import MatchInfoClient
from jfr_playoff.remote import RemoteUrl as p_remote
from jfr_playoff.logger import PlayoffLogger
class JFRHtmlMatchInfo(MatchInfoClient):
@property
def priority(self):
return 30
def is_capable(self):
return ('link' in self.settings) and ('#' not in self.settings['link'])
def get_exceptions(self, method):
return (TypeError, IndexError, KeyError, IOError, ValueError)
def get_match_link(self):
PlayoffLogger.get('match.jfrhtml').info(
'match #%d link pre-defined: %s',
self.settings['id'], self.settings['link'])
return self.settings['link']
def _find_table_row(self, url):
html_content = p_remote.fetch(url)
for row in html_content.select('tr tr'):
for cell in row.select('td.t1'):
if cell.text.strip() == str(self.settings['table']):
PlayoffLogger.get('match.jfrhtml').debug(
'HTML row for table %d found: %s',
self.settings['table'], row)
return row
PlayoffLogger.get('match.jfrhtml').debug(
'HTML row for table %d not found',
self.settings['table'])
return None
def fetch_teams(self, teams):
if self.settings['link'] is None:
raise ValueError('link not set')
row = self._find_table_row(self.settings['link'])
if row is None:
raise ValueError('table row not found')
try:
scores = [
float(text) for text
in row.select('td.bdc')[-1].contents
if isinstance(text, unicode)]
except ValueError:
# single-segment match
try:
# running single-segment
scores = [
float(text.strip()) for text
in row.select('td.bdcg a')[-1].contents
if isinstance(text, unicode)]
except IndexError:
try:
# static single-segment
scores = [
float(text.strip()) for text
in row.select('td.bdc a')[-1].contents
if isinstance(text, unicode)]
except IndexError:
# toweled single-segment
scores = [0.0, 0.0]
# carry-over
carry_over = [
float(text.strip()) if len(text.strip()) > 0 else 0.0 for text
in row.select('td.bdc')[0].contents
if isinstance(text, unicode)]
if len(carry_over) < 2:
# no carry-over, possibly no carry-over cells or empty
carry_over = [0.0, 0.0]
for i in range(0, 2):
scores[i] += carry_over[i]
team_names = [[text for text in link.contents
if isinstance(text, unicode)][0].strip(u'\xa0')
for link in row.select('a[onmouseover]')]
for i in range(0, 2):
teams[i].name = [team_names[i]]
teams[i].known_teams = 1
teams[i].score = scores[i]
PlayoffLogger.get('match.jfrhtml').info(
'scores fetched: %s', teams)
return teams
def _has_segment_link(self, cell):
links = [link for link in cell.select('a[href]')
if re.match(r'^.*\d+t\d+-\d+\.htm$', link['href'])]
return len(links) > 0
def _has_towel_image(self, cell):
return len(cell.select('img[alt="towel"]')) > 0
def _get_html_running_boards(self, cell):
return int(cell.contents[-1].strip())
def segment_board_count(self, segment_url):
segment_content = p_remote.fetch(segment_url)
board_rows = [
row for row
in segment_content.find_all('tr')
if len(row.select('td.bdcc a.zb')) > 0]
board_count = len(board_rows)
played_boards = len([
row for row
in board_rows
if len(''.join([
cell.text.strip()
for cell in row.select('td.bdc')])) > 0])
return played_boards, board_count
def _get_finished_info(self, cell):
segment_link = cell.select('a[href]')
if len(segment_link) > 0:
segment_url = re.sub(
r'\.htm$', '.html',
urljoin(self.settings['link'], segment_link[0]['href']))
try:
played_boards, board_count = \
self.segment_board_count(segment_url)
PlayoffLogger.get('match.jfrhtml').info(
'HTML played boards count for segment: %d/%d',
played_boards, board_count)
return board_count, played_boards >= board_count
except IOError as e:
PlayoffLogger.get('match.jfrhtml').info(
'cannot fetch played boards count for segment: %s(%s)',
type(e).__name__, str(e))
return 0, False
return 0, False
def board_count(self):
row = self._find_table_row(self.settings['link'])
if row is None:
raise ValueError('table row not found')
for selector in ['td.bdc', 'td.bdcg']:
cells = row.select(selector)
segments = [cell for cell in cells if self._has_segment_link(cell)]
towels = [cell for cell in cells if self._has_towel_image(cell)]
if len(segments) == 0:
# in single-segment match,
# there are no td.bdc cells with segment links,
# but maybe it's a multi-segment match with towels
if len(towels) > 0:
PlayoffLogger.get('match.jfrhtml').info(
'board count: all towels')
return 1, 1 # entire match is toweled, so mark as finished
else:
# not a single-segment match
# no need to look for td.bdcg cells
break
if len(segments) == 0:
raise ValueError('segments not found')
running_segments = row.select('td.bdca')
running_boards = sum([
self._get_html_running_boards(segment)
for segment
in running_segments])
finished_segments = []
boards_in_segment = None
for segment in segments:
if segment not in running_segments:
boards, is_finished = self._get_finished_info(segment)
if is_finished:
finished_segments.append(segment)
if boards_in_segment is None and boards > 0:
boards_in_segment = boards
if 'bdcg' in segments[0]['class']:
# only a single-segment match will yield
# td.bdcg cells with segment scores
total_boards = boards_in_segment
else:
PlayoffLogger.get('match.jfrhtml').info(
'board count, found: %d finished segments, %d towels, ' \
+ '%d boards per segment and %d boards in running segment',
len(finished_segments), len(towels),
boards_in_segment, running_boards)
total_boards = (
len(segments) + len(towels) + len(running_segments)) \
* boards_in_segment
played_boards = (len(towels) + len(finished_segments)) \
* boards_in_segment \
+ running_boards
PlayoffLogger.get('match.jfrhtml').info(
'board count: %d/%d', played_boards, total_boards)
return played_boards, total_boards
def running_link(self):
row = self._find_table_row(self.settings['link'])
running_link = row.select('td.bdcg a[href]')
if len(running_link) == 0:
raise ValueError('running link not found')
PlayoffLogger.get('match.jfrhtml').info(
'fetched running link from HTML: %s', running_link)
match_link = urljoin(self.settings['link'], running_link[0]['href'])
try:
boards_played, board_count = self.segment_board_count(
re.sub('\.htm$', '.html', match_link))
except Exception as e:
boards_played = 0
if not boards_played:
PlayoffLogger.get('match.jfrhtml').info(
'running link is not live - reverting to match link (%s)',
self.settings['link'])
match_link = self.settings['link']
return match_link
|