diff options
author | emkael <emkael@tlen.pl> | 2017-11-22 14:09:17 +0100 |
---|---|---|
committer | emkael <emkael@tlen.pl> | 2017-11-22 14:09:17 +0100 |
commit | 3e9bdf760c812f7e2a539144ebbe39a5d4bc730f (patch) | |
tree | 142ebfd95fc46fa1f60a3554c44b68d62ff59c26 /scrape-scores.py |
Initial commit.
Board scraper:
* takes traveller file
* complete produces PBN for all boards in segment
Scores scraper:
* takes board file, round, segment and board number (1..n, not physical board number)
* produces SQL that UPDATES scores table (so scores needs to have rows)
* does not support ARB/Axx scores
* probably won't support wrong lines
Diffstat (limited to 'scrape-scores.py')
-rw-r--r-- | scrape-scores.py | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/scrape-scores.py b/scrape-scores.py new file mode 100644 index 0000000..05517e6 --- /dev/null +++ b/scrape-scores.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +from bs4 import BeautifulSoup as bs +import bs4 +import re +import sys + +input_file = sys.argv[1] +rnd = int(sys.argv[2]) +segment = int(sys.argv[3]) +board = int(sys.argv[4]) + +content = bs(file(input_file), 'lxml') +rows = content.select('tr') + +scores = [] +points = [] + +for row in rows: + cells = row.select('td.bdc') + if len(cells) > 0: + score = '' + cells = cells[0:-1] + for cell in cells: + for element in cell.contents: + if type(element) is bs4.element.Tag: + score += element['alt'] + else: + score += element.replace('×'.decode('utf8'), 'x') + scores.append(score) + point_cells = row.select('td.zno') + row.select('td.zeo') + points.append((point_cells)[0].text if len(point_cells) > 0 else '0') + + +sorted_scores = scores[0::2] + scores[1::2] +sorted_points = points[0::2] + points[1::2] + +score_regex = re.compile(r'^([1-7])([cdhns])(x?)([ENSW])([cdhs]([AKQJ2-9]|10))(=|(-|\+)[1-7])') + +for room in [1, 2]: + for tabl in range(1, 6): + score = sorted_scores[(room - 1) * 5 + tabl - 1] + point_result = sorted_points[(room - 1) * 5 + tabl - 1] + parsed_score = re.match(score_regex, score) + if parsed_score: + contract = ('%s %s %s' % ( + parsed_score.group(1), + parsed_score.group(2).replace('n', 'nt').upper(), + parsed_score.group(3) + )).strip() + declarer = parsed_score.group(4) + lead = parsed_score.group(5).upper() + result = parsed_score.group(7) + result = 0 if result == '=' else int(result) + print ('UPDATE scores SET ' \ + + 'declarer = "' + declarer + '", ' \ + + 'contract = "' + contract + '", ' \ + + 'result = ' + str(result) + ', ' \ + + 'score = ' + point_result + ', ' \ + + 'lead = "' + lead + '" WHERE ' \ + + 'rnd = ' + str(rnd) + ' AND ' \ + + 'segment = ' + str(segment) + ' AND ' \ + + 'room = ' + str(room) + ' AND ' \ + + 'tabl = ' + str(tabl) + ' AND ' \ + + 'board = ' + str(board) + ';').encode('utf8') + elif score[0:3] == 'PAS': + print ('UPDATE scores SET ' \ + + 'declarer = NULL, ' \ + + 'contract = "PASS", ' \ + + 'result = NULL, ' \ + + 'score = 0, ' \ + + 'lead = NULL WHERE ' \ + + 'rnd = ' + str(rnd) + ' AND ' \ + + 'segment = ' + str(segment) + ' AND ' \ + + 'room = ' + str(room) + ' AND ' \ + + 'tabl = ' + str(tabl) + ' AND ' \ + + 'board = ' + str(board) + ';').encode('utf8') + else: + print '-- ->' + score.encode('utf8') |