summaryrefslogtreecommitdiff
path: root/jfr_playoff/data/match
diff options
context:
space:
mode:
authoremkael <emkael@tlen.pl>2019-12-31 13:19:48 +0100
committeremkael <emkael@tlen.pl>2019-12-31 13:19:48 +0100
commit7d3a08128bcadd562e7eaa48644c830ed166c98c (patch)
treef9f3a33c67a3cf0fd85175d346c36883e02fe0bf /jfr_playoff/data/match
parent009c26df9ccc65e3c58f62154a63498debd8e5b3 (diff)
Migrating JFR match results to result info client model
Diffstat (limited to 'jfr_playoff/data/match')
-rw-r--r--jfr_playoff/data/match/__init__.py9
-rw-r--r--jfr_playoff/data/match/jfrdb.py75
-rw-r--r--jfr_playoff/data/match/jfrhtml.py192
3 files changed, 276 insertions, 0 deletions
diff --git a/jfr_playoff/data/match/__init__.py b/jfr_playoff/data/match/__init__.py
index 7db05c2..6137078 100644
--- a/jfr_playoff/data/match/__init__.py
+++ b/jfr_playoff/data/match/__init__.py
@@ -4,3 +4,12 @@ from jfr_playoff.data.info import ResultInfoClient
class MatchInfoClient(ResultInfoClient):
def get_match_link(self):
raise NotImplementedError
+
+ def fetch_teams(self, teams):
+ raise NotImplementedError
+
+ def board_count(self):
+ raise NotImplementedError
+
+ def running_link(self):
+ raise NotImplementedError
diff --git a/jfr_playoff/data/match/jfrdb.py b/jfr_playoff/data/match/jfrdb.py
index 7d9e067..8dc57ab 100644
--- a/jfr_playoff/data/match/jfrdb.py
+++ b/jfr_playoff/data/match/jfrdb.py
@@ -1,3 +1,6 @@
+import re
+
+import jfr_playoff.sql as p_sql
from jfr_playoff.data import TournamentInfo
from jfr_playoff.data.match import MatchInfoClient
from jfr_playoff.logger import PlayoffLogger
@@ -26,3 +29,75 @@ class JFRDbMatchInfo(MatchInfoClient):
PlayoffLogger.get('match.jfrdb').info(
'match #%d link fetched: %s', self.settings['id'], link)
return link
+
+ def fetch_teams(self, teams):
+ row = self.database.fetch(
+ self.settings['database'], p_sql.MATCH_RESULTS,
+ (self.settings['table'], self.settings['round']))
+ for i in range(0, 2):
+ teams[i].name = [row[i]]
+ teams[i].known_teams = 1
+ teams[0].score = row[3] + row[5]
+ teams[1].score = row[4] + row[6]
+ if row[2] > 0:
+ teams[0].score += row[2]
+ else:
+ teams[1].score -= row[2]
+ PlayoffLogger.get('match.jfrdb').info(
+ 'scores fetched: %s', teams)
+ return teams
+
+ def board_count(self):
+ towels = self.database.fetch(
+ self.settings['database'], p_sql.TOWEL_COUNT,
+ (self.settings['table'], self.settings['round']))
+ row = [0 if r is None
+ else r for r in
+ self.database.fetch(
+ self.settings['database'], p_sql.BOARD_COUNT,
+ (self.settings['table'], self.settings['round']))]
+ boards_to_play = int(row[0])
+ boards_played = max(int(row[1]), 0)
+ if boards_to_play > 0:
+ boards_played += int(towels[0])
+ PlayoffLogger.get('match.jfrdb').info(
+ 'board count: %d/%d', boards_played, boards_to_play)
+ return boards_played, boards_to_play
+
+ def running_link(self):
+ match_link = self.settings['link']
+ link_match = re.match(r'^(.*)runda(\d+)\.html$', match_link)
+ if link_match:
+ current_segment = int(
+ self.database.fetch(
+ self.settings['database'], p_sql.CURRENT_SEGMENT,
+ (self.settings['round'],))[0])
+ PlayoffLogger.get('match.jfrdb').info(
+ 'fetched running segment: %d', current_segment)
+ match_link = '%s%st%d-%d.html' % (
+ link_match.group(1), link_match.group(2),
+ self.settings['table'], current_segment)
+ if match_link != self.settings['link']:
+ PlayoffLogger.get('match.jfrdb').info(
+ 'checking if running link %s is live', match_link)
+ from jfr_playoff.data.match.jfrhtml import JFRHtmlMatchInfo
+ client = JFRHtmlMatchInfo(self.settings)
+ try:
+ boards_played, board_count = client.segment_board_count(
+ re.sub('\.htm$', '.html', match_link))
+ except Exception as e:
+ PlayoffLogger.get('match.jfrdb').info(
+ 'unable to determine confirm link: %s(%s)',
+ type(e).__name__, str(e))
+ boards_played = 0
+ if not boards_played:
+ PlayoffLogger.get('match.jfrdb').info(
+ 'running link is not live - reverting to match link (%s)',
+ self.settings['link'])
+ match_link = self.settings['link']
+ elif boards_played == board_count:
+ PlayoffLogger.get('match.jfrdb').info(
+ 'running link is finished - reverting to match link (%s)',
+ self.settings['link'])
+ match_link = self.settings['link']
+ return match_link
diff --git a/jfr_playoff/data/match/jfrhtml.py b/jfr_playoff/data/match/jfrhtml.py
index 40f1395..b4a8808 100644
--- a/jfr_playoff/data/match/jfrhtml.py
+++ b/jfr_playoff/data/match/jfrhtml.py
@@ -1,4 +1,9 @@
+import copy
+import re
+from urlparse import urljoin
+
from jfr_playoff.data.match import MatchInfoClient
+from jfr_playoff.remote import RemoteUrl as p_remote
from jfr_playoff.logger import PlayoffLogger
@@ -18,3 +23,190 @@ class JFRHtmlMatchInfo(MatchInfoClient):
'match #%d link pre-defined: %s',
self.settings['id'], self.settings['link'])
return self.settings['link']
+
+ def _find_table_row(self, url):
+ html_content = p_remote.fetch(url)
+ for row in html_content.select('tr tr'):
+ for cell in row.select('td.t1'):
+ if cell.text.strip() == str(self.settings['table']):
+ PlayoffLogger.get('match.jfrhtml').debug(
+ 'HTML row for table %d found: %s',
+ self.settings['table'], row)
+ return row
+ PlayoffLogger.get('match.jfrhtml').debug(
+ 'HTML row for table %d not found',
+ self.settings['table'])
+ return None
+
+ def fetch_teams(self, teams):
+ if self.settings['link'] is None:
+ raise ValueError('link not set')
+ row = self._find_table_row(self.settings['link'])
+ if row is None:
+ raise ValueError('table row not found')
+ try:
+ scores = [
+ float(text) for text
+ in row.select('td.bdc')[-1].contents
+ if isinstance(text, unicode)]
+ except ValueError:
+ # single-segment match
+ try:
+ # running single-segment
+ scores = [
+ float(text.strip()) for text
+ in row.select('td.bdcg a')[-1].contents
+ if isinstance(text, unicode)]
+ except IndexError:
+ try:
+ # static single-segment
+ scores = [
+ float(text.strip()) for text
+ in row.select('td.bdc a')[-1].contents
+ if isinstance(text, unicode)]
+ except IndexError:
+ # toweled single-segment
+ scores = [0.0, 0.0]
+ # carry-over
+ carry_over = [
+ float(text.strip()) if len(text.strip()) > 0 else 0.0 for text
+ in row.select('td.bdc')[0].contents
+ if isinstance(text, unicode)]
+ if len(carry_over) < 2:
+ # no carry-over, possibly no carry-over cells or empty
+ carry_over = [0.0, 0.0]
+ for i in range(0, 2):
+ scores[i] += carry_over[i]
+ team_names = [[text for text in link.contents
+ if isinstance(text, unicode)][0].strip(u'\xa0')
+ for link in row.select('a[onmouseover]')]
+ for i in range(0, 2):
+ teams[i].name = [team_names[i]]
+ teams[i].known_teams = 1
+ teams[i].score = scores[i]
+ PlayoffLogger.get('match.jfrhtml').info(
+ 'scores fetched: %s', teams)
+ return teams
+
+
+ def _has_segment_link(self, cell):
+ links = [link for link in cell.select('a[href]')
+ if re.match(r'^.*\d+t\d+-\d+\.htm$', link['href'])]
+ return len(links) > 0
+
+ def _has_towel_image(self, cell):
+ return len(cell.select('img[alt="towel"]')) > 0
+
+ def _get_html_running_boards(self, cell):
+ return int(cell.contents[-1].strip())
+
+ def segment_board_count(self, segment_url):
+ segment_content = p_remote.fetch(segment_url)
+ board_rows = [
+ row for row
+ in segment_content.find_all('tr')
+ if len(row.select('td.bdcc a.zb')) > 0]
+ board_count = len(board_rows)
+ played_boards = len([
+ row for row
+ in board_rows
+ if len(''.join([
+ cell.text.strip()
+ for cell in row.select('td.bdc')])) > 0])
+ return played_boards, board_count
+
+ def _get_finished_info(self, cell):
+ segment_link = cell.select('a[href]')
+ if len(segment_link) > 0:
+ segment_url = re.sub(
+ r'\.htm$', '.html',
+ urljoin(self.settings['link'], segment_link[0]['href']))
+ try:
+ played_boards, board_count = \
+ self.segment_board_count(segment_url)
+ PlayoffLogger.get('match.jfrhtml').info(
+ 'HTML played boards count for segment: %d/%d',
+ played_boards, board_count)
+ return board_count, played_boards >= board_count
+ except IOError as e:
+ PlayoffLogger.get('match.jfrhtml').info(
+ 'cannot fetch played boards count for segment: %s(%s)',
+ type(e).__name__, str(e))
+ return 0, False
+ return 0, False
+
+ def board_count(self):
+ row = self._find_table_row(self.settings['link'])
+ if row is None:
+ raise ValueError('table row not found')
+ for selector in ['td.bdc', 'td.bdcg']:
+ cells = row.select(selector)
+ segments = [cell for cell in cells if self._has_segment_link(cell)]
+ towels = [cell for cell in cells if self._has_towel_image(cell)]
+ if len(segments) == 0:
+ # in single-segment match,
+ # there are no td.bdc cells with segment links,
+ # but maybe it's a multi-segment match with towels
+ if len(towels) > 0:
+ PlayoffLogger.get('match.jfrhtml').info(
+ 'board count: all towels')
+ return 1, 1 # entire match is toweled, so mark as finished
+ else:
+ # not a single-segment match
+ # no need to look for td.bdcg cells
+ break
+ if len(segments) == 0:
+ raise ValueError('segments not found')
+ running_segments = row.select('td.bdca')
+ running_boards = sum([
+ self._get_html_running_boards(segment)
+ for segment
+ in running_segments])
+ finished_segments = []
+ boards_in_segment = None
+ for segment in segments:
+ if segment not in running_segments:
+ boards, is_finished = self._get_finished_info(segment)
+ if is_finished:
+ finished_segments.append(segment)
+ if boards_in_segment is None and boards > 0:
+ boards_in_segment = boards
+ if 'bdcg' in segments[0]['class']:
+ # only a single-segment match will yield
+ # td.bdcg cells with segment scores
+ total_boards = boards_in_segment
+ else:
+ PlayoffLogger.get('match.jfrhtml').info(
+ 'board count, found: %d finished segments, %d towels, ' \
+ + '%d boards per segment and %d boards in running segment',
+ len(finished_segments), len(towels),
+ boards_in_segment, running_boards)
+ total_boards = (
+ len(segments) + len(towels) + len(running_segments)) \
+ * boards_in_segment
+ played_boards = (len(towels) + len(finished_segments)) \
+ * boards_in_segment \
+ + running_boards
+ PlayoffLogger.get('match.jfrhtml').info(
+ 'board count: %d/%d', played_boards, total_boards)
+ return played_boards, total_boards
+
+ def running_link(self):
+ row = self._find_table_row(self.settings['link'])
+ running_link = row.select('td.bdcg a[href]')
+ if len(running_link) == 0:
+ raise ValueError('running link not found')
+ PlayoffLogger.get('match.jfrhtml').info(
+ 'fetched running link from HTML: %s', running_link)
+ match_link = urljoin(self.settings['link'], running_link[0]['href'])
+ try:
+ boards_played, board_count = self.segment_board_count(
+ re.sub('\.htm$', '.html', match_link))
+ except Exception as e:
+ boards_played = 0
+ if not boards_played:
+ PlayoffLogger.get('match.jfrhtml').info(
+ 'running link is not live - reverting to match link (%s)',
+ self.settings['link'])
+ match_link = self.settings['link']
+ return match_link