summaryrefslogtreecommitdiff
path: root/jfr_playoff/matchinfo.py
blob: fad81552c0b559d5a3f704d49301f69812bc36eb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
import re
from urlparse import urljoin

import jfr_playoff.sql as p_sql
from jfr_playoff.dto import Match, Team
from jfr_playoff.remote import RemoteUrl as p_remote
from jfr_playoff.tournamentinfo import TournamentInfo
from jfr_playoff.logger import PlayoffLogger

class MatchInfo:

    matches = {}

    def __init__(self, match_config, teams, database, aliases=None):
        self.config = match_config
        self.teams = teams
        self.database = database
        self.aliases = {}
        if aliases:
            for team, team_aliases in aliases.iteritems():
                for alias in team_aliases:
                    self.aliases[alias] = team
        self.info = Match()
        self.__init_info()
        self.__fetch_match_link()

    def __init_info(self):
        self.info.id = self.config['id']
        MatchInfo.matches[self.info.id] = self.info
        self.info.running = 0
        self.info.winner_matches = []
        self.info.loser_matches = []
        for i in range(0, 2):
            if 'winner' in self.config['teams'][i]:
                self.info.winner_matches += self.config['teams'][i]['winner']
            if 'loser' in self.config['teams'][i]:
                self.info.loser_matches += self.config['teams'][i]['loser']
        self.info.winner_matches = list(set(self.info.winner_matches))
        self.info.loser_matches = list(set(self.info.loser_matches))
        self.info.winner_place = self.config.get('winner', [])
        self.info.loser_place = self.config.get('loser', [])
        self.info.teams = []

    def __fetch_match_link(self):
        if 'link' in self.config:
            self.info.link = self.config['link']
            PlayoffLogger.get('matchinfo').info(
                'match #%d link pre-defined: %s', self.info.id, self.info.link)
        elif ('round' in self.config) and ('database' in self.config):
            event_info = TournamentInfo(self.config, self.database)
            self.info.link = event_info.get_results_link(
                'runda%d.html' % (self.config['round']))
            PlayoffLogger.get('matchinfo').info(
                'match #%d link fetched: %s', self.info.id, self.info.link)
        else:
            PlayoffLogger.get('matchinfo').info('match #%d link empty', self.info.id)

    def __get_predefined_scores(self):
        teams = [Team(), Team()]
        scores_fetched = False
        teams_fetched = False
        if 'score' in self.config:
            i = 0
            for score in self.config['score']:
                if isinstance(self.config['score'], dict):
                    teams[i].score = self.config['score'][score]
                    try:
                        team_no = int(score)
                        teams[i].name = [self.teams[team_no-1][0]]
                    except ValueError:
                        teams[i].name = [score]
                    teams_fetched = True
                else:
                    teams[i].score = score
                i += 1
                if i == 2:
                    break
            scores_fetched = True
            PlayoffLogger.get('matchinfo').info(
                'pre-defined scores for match #%d: %s',
                self.info.id, teams)
        return scores_fetched, teams_fetched, teams

    def __get_db_teams(self, teams, fetch_scores):
        row = self.database.fetch(
            self.config['database'], p_sql.MATCH_RESULTS,
            (self.config['table'], self.config['round']))
        for i in range(0, 2):
            teams[i].name = [row[i]]
            teams[i].known_teams = 1
        if fetch_scores:
            teams[0].score = row[3] + row[5]
            teams[1].score = row[4] + row[6]
            if row[2] > 0:
                teams[0].score += row[2]
            else:
                teams[1].score -= row[2]
        PlayoffLogger.get('matchinfo').info(
            'db scores for match #%d: %s', self.info.id, teams)
        return teams

    def __find_table_row(self, url):
        html_content = p_remote.fetch(url)
        for row in html_content.select('tr tr'):
            for cell in row.select('td.t1'):
                if cell.text.strip() == str(self.config['table']):
                    PlayoffLogger.get('matchinfo.html').debug(
                        'HTML row for table %d found: %s',
                        self.config['table'], row)
                    return row
        PlayoffLogger.get('matchinfo.html').debug(
            'HTML row for table %d not found',
            self.config['table'])
        return None

    def __get_html_teams(self, teams, fetch_score):
        if self.info.link is None:
            raise ValueError('link not set')
        row = self.__find_table_row(self.info.link)
        if row is None:
            raise ValueError('table row not found')
        try:
            scores = [
                float(text) for text
                in row.select('td.bdc')[-1].contents
                if isinstance(text, unicode)]
        except ValueError:
            # single-segment match
            try:
                # running single-segment
                scores = [
                    float(text.strip()) for text
                    in row.select('td.bdcg a')[-1].contents
                    if isinstance(text, unicode)]
            except IndexError:
                try:
                    # static single-segment
                    scores = [
                        float(text.strip()) for text
                        in row.select('td.bdc a')[-1].contents
                        if isinstance(text, unicode)]
                except IndexError:
                    # toweled single-segment
                    scores = [0.0, 0.0]
            # carry-over
            carry_over = [
                float(text.strip()) if len(text.strip()) > 0 else 0.0 for text
                in row.select('td.bdc')[0].contents
                if isinstance(text, unicode)]
            if len(carry_over) < 2:
                # no carry-over, possibly no carry-over cells or empty
                carry_over = [0.0, 0.0]
            for i in range(0, 2):
                scores[i] += carry_over[i]
        team_names = [[text for text in link.contents
                       if isinstance(text, unicode)][0].strip(u'\xa0')
                      for link in row.select('a[onmouseover]')]
        for i in range(0, 2):
            teams[i].name = [team_names[i]]
            teams[i].known_teams = 1
            teams[i].score = scores[i]
        PlayoffLogger.get('matchinfo').info(
            'HTML scores for match #%d: %s',
            self.info.id, teams)
        return teams

    def __get_config_teams(self, teams):
        for i in range(0, 2):
            match_teams = []
            possible_teams = []
            if isinstance(self.config['teams'][i], basestring):
                match_teams = [self.config['teams'][i]]
            elif isinstance(self.config['teams'][i], list):
                match_teams = self.config['teams'][i]
            else:
                if 'winner' in self.config['teams'][i]:
                    match_teams += [
                        MatchInfo.matches[winner_match].winner
                        for winner_match in self.config['teams'][i]['winner']]
                    possible_teams += [
                        MatchInfo.matches[winner_match].possible_winner
                        for winner_match in self.config['teams'][i]['winner']]
                if 'loser' in self.config['teams'][i]:
                    match_teams += [
                        MatchInfo.matches[loser_match].loser
                        for loser_match in self.config['teams'][i]['loser']]
                    possible_teams += [
                        MatchInfo.matches[loser_match].possible_loser
                        for loser_match in self.config['teams'][i]['loser']]
                if 'place' in self.config['teams'][i]:
                    match_teams += [
                        self.teams[place-1][0]
                        for place in self.config['teams'][i]['place']]
            teams[i].name = match_teams
            teams[i].possible_name = possible_teams
            teams[i].known_teams = len([team for team in match_teams if team is not None])
            teams[i].selected_team = self.config['selected_teams'][i] if 'selected_teams' in self.config else -1
        PlayoffLogger.get('matchinfo').info(
            'config scores for match #%d: %s',
            self.info.id, teams)
        return teams

    def __resolve_team_aliases(self, teams):
        return [self.aliases[team] if team in self.aliases else team for team in teams]

    def __fetch_teams_with_scores(self):
        (scores_fetched, teams_fetched, self.info.teams) = self.__get_predefined_scores()
        if scores_fetched:
            PlayoffLogger.get('matchinfo').info(
                'pre-defined scores for match #%d fetched', self.info.id)
            self.info.running = int(self.config.get('running', -1))
        if not teams_fetched:
            try:
                try:
                    if self.database is None:
                        raise KeyError('database not configured')
                    if 'database' not in self.config:
                        raise KeyError('database not configured')
                    self.info.teams = self.__get_db_teams(
                        self.info.teams, not scores_fetched)
                except (IOError, TypeError, IndexError, KeyError) as e:
                    PlayoffLogger.get('matchinfo').warning(
                        'fetching DB scores for match #%d failed: %s(%s)',
                        self.info.id, type(e).__name__, str(e))
                    self.info.teams = self.__get_html_teams(
                        self.info.teams, not scores_fetched)
            except (TypeError, IndexError, KeyError, IOError, ValueError) as e:
                PlayoffLogger.get('matchinfo').warning(
                    'fetching HTML scores for match #%d failed: %s(%s)',
                    self.info.id, type(e).__name__, str(e))
                self.info.teams = self.__get_config_teams(self.info.teams)
        for team in range(0, len(self.info.teams)):
            if isinstance(self.config['teams'][team], dict):
                self.info.teams[team].place = self.config['teams'][team].get(
                    'place', self.info.teams[team].place)
            self.info.teams[team].name = self.__resolve_team_aliases(self.info.teams[team].name)
            PlayoffLogger.get('matchinfo').info('team list after resolving aliases: %s', self.info.teams[team].name)
            self.info.teams[team].possible_name = self.__resolve_team_aliases(self.info.teams[team].possible_name)
            PlayoffLogger.get('matchinfo').info('predicted team list after resolving aliases: %s', self.info.teams[team].possible_name)


    def __get_db_board_count(self):
        towels = self.database.fetch(
            self.config['database'], p_sql.TOWEL_COUNT,
            (self.config['table'], self.config['round']))
        row = [0 if r is None
               else r for r in
               self.database.fetch(
                   self.config['database'], p_sql.BOARD_COUNT,
                   (self.config['table'], self.config['round']))]
        boards_to_play = int(row[0])
        boards_played = max(int(row[1]), 0)
        if boards_to_play > 0:
            boards_played += int(towels[0])
        PlayoffLogger.get('matchinfo').info(
            'DB board count for match #%d: %d/%d',
            self.info.id, boards_played, boards_to_play)
        return boards_played, boards_to_play

    def __has_segment_link(self, cell):
        links = [link for link in cell.select('a[href]')
                 if re.match(r'^.*\d+t\d+-\d+\.htm$', link['href'])]
        return len(links) > 0

    def __has_towel_image(self, cell):
        return len(cell.select('img[alt="towel"]')) > 0

    def __get_html_running_boards(self, cell):
        return int(cell.contents[-1].strip())

    def __get_html_segment_board_count(self, segment_url):
        segment_content = p_remote.fetch(segment_url)
        board_rows = [row for row in segment_content.find_all('tr') if len(row.select('td.bdcc a.zb')) > 0]
        board_count = len(board_rows)
        played_boards = len([
            row for row in board_rows if len(
                ''.join([cell.text.strip() for cell in row.select('td.bdc')])) > 0])
        return played_boards, board_count

    def __get_finished_info(self, cell):
        segment_link = cell.select('a[href]')
        if len(segment_link) > 0:
            segment_url = re.sub(
                r'\.htm$', '.html',
                urljoin(self.info.link, segment_link[0]['href']))
            try:
                played_boards, board_count = self.__get_html_segment_board_count(segment_url)
                PlayoffLogger.get('matchinfo').info(
                    'HTML played boards count for segment: %d/%d',
                    played_boards, board_count)
                return board_count, played_boards >= board_count
            except IOError as e:
                PlayoffLogger.get('matchinfo').info(
                    'cannot fetch HTML played boards count for segment: %s(%s)',
                    self.info.id, type(e).__name__, str(e))
                return 0, False
        return 0, False

    def __get_html_board_count(self):
        if self.info.link is None:
            raise ValueError('link not set')
        row = self.__find_table_row(self.info.link)
        if row is None:
            raise ValueError('table row not found')
        for selector in ['td.bdc', 'td.bdcg']:
            cells = row.select(selector)
            segments = [cell for cell in cells if self.__has_segment_link(cell)]
            towels = [cell for cell in cells if self.__has_towel_image(cell)]
            if len(segments) == 0:
                # in single-segment match, there are no td.bdc cells with segment links
                # but maybe it's a multi-segment match with towels
                if len(towels) > 0:
                    PlayoffLogger.get('matchinfo').info(
                        'HTML board count for match #%d: all towels', self.info.id)
                    return 1, 1 # entire match is toweled, so mark as finished
            else:
                # not a single-segment match, no need to look for td.bdcg cells
                break
        if len(segments) == 0:
            raise ValueError('segments not found')
        running_segments = row.select('td.bdca')
        running_boards = sum([self.__get_html_running_boards(segment) for segment in running_segments])
        finished_segments = []
        boards_in_segment = None
        for segment in segments:
            if segment not in running_segments:
                boards, is_finished = self.__get_finished_info(segment)
                if is_finished:
                    finished_segments.append(segment)
                if boards_in_segment is None and boards > 0:
                    boards_in_segment = boards
        if 'bdcg' in segments[0]['class']:
            # only a single-segment match will yield td.bdcg cells with segment scores
            total_boards = boards_in_segment
        else:
            PlayoffLogger.get('matchinfo').info(
                'HTML board count for match #%d, found: %d finished segments, %d towels, %d boards per segment and %d boards in running segment',
                self.info.id, len(finished_segments), len(towels), boards_in_segment, running_boards)
            total_boards = (len(segments) + len(towels) + len(running_segments)) * boards_in_segment
        played_boards = (len(towels) + len(finished_segments)) * boards_in_segment + running_boards
        PlayoffLogger.get('matchinfo').info(
            'HTML board count for match #%d: %d/%d',
            self.info.id, played_boards, total_boards)
        return played_boards, total_boards

    def __fetch_board_count(self):
        boards_played = 0
        boards_to_play = 0
        try:
            if self.database is None:
                raise KeyError('database not configured')
            boards_played, boards_to_play = self.__get_db_board_count()
        except (IOError, TypeError, IndexError, KeyError) as e:
            PlayoffLogger.get('matchinfo').warning(
                'fetching board count from DB for match #%d failed: %s(%s)',
                self.info.id, type(e).__name__, str(e))
            try:
                boards_played, boards_to_play = self.__get_html_board_count()
            except (TypeError, IndexError, KeyError, IOError, ValueError) as e:
                PlayoffLogger.get('matchinfo').warning(
                    'fetching board count from HTML for match #%d failed: %s(%s)',
                    self.info.id, type(e).__name__, str(e))
                pass
        if boards_played > 0:
            self.info.running = -1 \
                                if boards_played >= boards_to_play \
                                   else boards_played

    def __determine_outcome(self):
        if (self.info.teams[0].known_teams == 1) \
           and (self.info.teams[1].known_teams == 1):
            if self.info.running == -1:
                if self.info.teams[0].score > self.info.teams[1].score:
                    self.info.winner = self.info.teams[0].name[0]
                    self.info.loser = self.info.teams[1].name[0]
                else:
                    self.info.loser = self.info.teams[0].name[0]
                    self.info.winner = self.info.teams[1].name[0]
            elif self.info.running > 0:
                if self.info.teams[0].score > self.info.teams[1].score:
                    self.info.possible_winner = self.info.teams[0].name[0]
                    self.info.possible_loser = self.info.teams[1].name[0]
                elif self.info.teams[0].score < self.info.teams[1].score:
                    self.info.possible_loser = self.info.teams[0].name[0]
                    self.info.possible_winner = self.info.teams[1].name[0]

    def __get_db_running_link(self, prefix, round_no):
        current_segment = int(
            self.database.fetch(
                self.config['database'], p_sql.CURRENT_SEGMENT, ())[0])
        PlayoffLogger.get('matchinfo').info(
            'fetched running segment from DB for match #%d: %d',
            self.info.id, current_segment)
        return '%s%st%d-%d.html' % (
            prefix, round_no, self.config['table'], current_segment)

    def __get_html_running_link(self):
        if self.info.link is None:
            raise ValueError('link not set')
        row = self.__find_table_row(self.info.link)
        running_link = row.select('td.bdcg a[href]')
        if len(running_link) == 0:
            raise ValueError('running link not found')
        PlayoffLogger.get('matchinfo').info(
            'fetched running link from HTML for match #%d: %s',
            self.info.id, running_link)
        return urljoin(self.info.link, running_link[0]['href'])

    def __determine_running_link(self):
        if self.info.link is None:
            return
        match_link = self.info.link
        link_match = re.match(r'^(.*)runda(\d+)\.html$', self.info.link)
        if link_match:
            try:
                if self.database is None:
                    raise KeyError('database not configured')
                self.info.link = self.__get_db_running_link(
                    link_match.group(1), link_match.group(2))
            except (IOError, TypeError, IndexError, KeyError) as e:
                PlayoffLogger.get('matchinfo').warning(
                    'cannot determine running link from DB for match #%d: %s(%s)',
                    self.info.id, type(e).__name__, str(e))
                try:
                    self.info.link = self.__get_html_running_link()
                except (TypeError, IndexError, KeyError, IOError, ValueError) as e:
                    PlayoffLogger.get('matchinfo').warning(
                        'cannot determine running link from HTML for match #%d: %s(%s)',
                        self.info.id, type(e).__name__, str(e))
            if self.info.link != match_link:
                # we've detected a running segment link
                # we should check if the segment's uploaded live
                try:
                    boards_played, board_count = self.__get_html_segment_board_count(re.sub('\.htm$', '.html', self.info.link))
                except IOError as e:
                    PlayoffLogger.get('matchinfo').warning(
                        'cannot determine running link (%s) board count for match #%d: %s(%s)',
                        self.info.link, self.info.id, type(e).__name__, str(e))
                    boards_played = 0
                if not boards_played:
                    PlayoffLogger.get('matchinfo').warning(
                        'running link (%s) for match #%d is not live, reverting to match link (%s)',
                        self.info.link, self.info.id, match_link)
                    self.info.link = match_link

    def set_phase_link(self, phase_link):
        if self.info.link is None:
            self.info.link = phase_link
        else:
            if self.info.link != '#':
                self.info.link = urljoin(phase_link, self.info.link)
        PlayoffLogger.get('matchinfo').info(
            'applying phase link %s to match #%d: %s',
            phase_link, self.info.id, self.info.link)

    def get_info(self):
        self.__fetch_teams_with_scores()
        self.__fetch_board_count()
        self.__determine_outcome()
        if self.info.running > 0:
            self.__determine_running_link()
        return self.info