diff options
Diffstat (limited to 'jfr_playoff')
-rw-r--r-- | jfr_playoff/generator.py | 5 | ||||
-rw-r--r-- | jfr_playoff/matchinfo.py | 66 | ||||
-rw-r--r-- | jfr_playoff/remote.py | 3 |
3 files changed, 57 insertions, 17 deletions
diff --git a/jfr_playoff/generator.py b/jfr_playoff/generator.py index 1daae1c..679fb13 100644 --- a/jfr_playoff/generator.py +++ b/jfr_playoff/generator.py @@ -57,7 +57,10 @@ class PlayoffGenerator(object): team_label = ' / '.join([ self.data.get_shortname(name) for name in team.name.split('<br />')]) - team_label = team_label[:30] + (team_label[30:] and '(...)') + label_max_length = self.page['label_length_limit'] \ + if 'label_length_limit' in self.page else 0 + if label_max_length: + team_label = team_label[:label_max_length] + (team_label[label_max_length:] and '(...)') team_html = self.p_temp.get( 'MATCH_TEAM_LINK', match.link, team.name, team_label) \ diff --git a/jfr_playoff/matchinfo.py b/jfr_playoff/matchinfo.py index 4de2044..1357ba3 100644 --- a/jfr_playoff/matchinfo.py +++ b/jfr_playoff/matchinfo.py @@ -118,6 +118,7 @@ class MatchInfo: in row.select('td.bdc')[-1].contents if isinstance(text, unicode)] except ValueError: + # single-segment match try: # running single-segment scores = [ @@ -125,16 +126,23 @@ class MatchInfo: in row.select('td.bdcg a')[-1].contents if isinstance(text, unicode)] except IndexError: - # static single-segment - scores = [ - float(text.strip()) for text - in row.select('td.bdc a')[-1].contents - if isinstance(text, unicode)] + try: + # static single-segment + scores = [ + float(text.strip()) for text + in row.select('td.bdc a')[-1].contents + if isinstance(text, unicode)] + except IndexError: + # toweled single-segment + scores = [0.0, 0.0] # carry-over carry_over = [ float(text.strip()) if len(text.strip()) > 0 else 0.0 for text in row.select('td.bdc')[0].contents if isinstance(text, unicode)] + if len(carry_over) < 2: + # no carry-over, possibly no carry-over cells or empty + carry_over = [0.0, 0.0] for i in range(0, 2): scores[i] += carry_over[i] team_names = [[text for text in link.contents @@ -243,6 +251,15 @@ class MatchInfo: def __get_html_running_boards(self, cell): return int(cell.contents[-1].strip()) + def __get_html_segment_board_count(self, segment_url): + segment_content = p_remote.fetch(segment_url) + board_rows = [row for row in segment_content.find_all('tr') if len(row.select('td.bdcc a.zb')) > 0] + board_count = len(board_rows) + played_boards = len([ + row for row in board_rows if len( + ''.join([cell.text.strip() for cell in row.select('td.bdc')])) > 0]) + return played_boards, board_count + def __get_finished_info(self, cell): segment_link = cell.select('a[href]') if len(segment_link) > 0: @@ -250,12 +267,7 @@ class MatchInfo: r'\.htm$', '.html', urljoin(self.info.link, segment_link[0]['href'])) try: - segment_content = p_remote.fetch(segment_url) - board_rows = [row for row in segment_content.find_all('tr') if len(row.select('td.bdcc a.zb')) > 0] - board_count = len(board_rows) - played_boards = len([ - row for row in board_rows if len( - ''.join([cell.text.strip() for cell in row.select('td.bdc')])) > 0]) + played_boards, board_count = self.__get_html_segment_board_count(segment_url) PlayoffLogger.get('matchinfo').info( 'HTML played boards count for segment: %d/%d', played_boards, board_count) @@ -278,16 +290,18 @@ class MatchInfo: segments = [cell for cell in cells if self.__has_segment_link(cell)] towels = [cell for cell in cells if self.__has_towel_image(cell)] if len(segments) == 0: + # in single-segment match, there are no td.bdc cells with segment links + # but maybe it's a multi-segment match with towels if len(towels) > 0: PlayoffLogger.get('matchinfo').info( 'HTML board count for match #%d: all towels', self.info.id) return 1, 1 # entire match is toweled, so mark as finished else: + # not a single-segment match, no need to look for td.bdcg cells break if len(segments) == 0: raise ValueError('segments not found') running_segments = row.select('td.bdca') - # FIXME: running single-segment match board count running_boards = sum([self.__get_html_running_boards(segment) for segment in running_segments]) finished_segments = [] boards_in_segment = None @@ -298,10 +312,14 @@ class MatchInfo: finished_segments.append(segment) if boards_in_segment is None and boards > 0: boards_in_segment = boards - PlayoffLogger.get('matchinfo').info( - 'HTML board count for match #%d, found: %d finished segments, %d towels, %d boards per segment and %d boards in running segment', - self.info.id, len(finished_segments), len(towels), boards_in_segment, running_boards) - total_boards = (len(segments) + len(towels) + len(running_segments)) * boards_in_segment + if 'bdcg' in segments[0]['class']: + # only a single-segment match will yield td.bdcg cells with segment scores + total_boards = boards_in_segment + else: + PlayoffLogger.get('matchinfo').info( + 'HTML board count for match #%d, found: %d finished segments, %d towels, %d boards per segment and %d boards in running segment', + self.info.id, len(finished_segments), len(towels), boards_in_segment, running_boards) + total_boards = (len(segments) + len(towels) + len(running_segments)) * boards_in_segment played_boards = (len(towels) + len(finished_segments)) * boards_in_segment + running_boards PlayoffLogger.get('matchinfo').info( 'HTML board count for match #%d: %d/%d', @@ -365,6 +383,7 @@ class MatchInfo: def __determine_running_link(self): if self.info.link is None: return + match_link = self.info.link link_match = re.match(r'^(.*)runda(\d+)\.html$', self.info.link) if link_match: try: @@ -382,6 +401,21 @@ class MatchInfo: PlayoffLogger.get('matchinfo').warning( 'cannot determine running link from HTML for match #%d: %s(%s)', self.info.id, type(e).__name__, str(e)) + if self.info.link != match_link: + # we've detected a running segment link + # we should check if the segment's uploaded live + try: + boards_played, board_count = self.__get_html_segment_board_count(re.sub('\.htm$', '.html', self.info.link)) + except IOError as e: + PlayoffLogger.get('matchinfo').warning( + 'cannot determine running link (%s) board count for match #%d: %s(%s)', + self.info.link, self.info.id, type(e).__name__, str(e)) + boards_played = 0 + if not boards_played: + PlayoffLogger.get('matchinfo').warning( + 'running link (%s) for match #%d is not live, reverting to match link (%s)', + self.info.link, self.info.id, match_link) + self.info.link = match_link def set_phase_link(self, phase_link): if self.info.link is None: diff --git a/jfr_playoff/remote.py b/jfr_playoff/remote.py index 2deb7ce..f0bf6c2 100644 --- a/jfr_playoff/remote.py +++ b/jfr_playoff/remote.py @@ -21,4 +21,7 @@ class RemoteUrl: if encoding_match: request.encoding = encoding_match.group(2) cls.url_cache[url] = request.text + PlayoffLogger.get('remote').info( + 'fetched %d bytes from remote location', + len(cls.url_cache[url])) return bs(cls.url_cache[url], 'lxml') |