From 3e9bdf760c812f7e2a539144ebbe39a5d4bc730f Mon Sep 17 00:00:00 2001 From: emkael Date: Wed, 22 Nov 2017 14:09:17 +0100 Subject: Initial commit. Board scraper: * takes traveller file * complete produces PBN for all boards in segment Scores scraper: * takes board file, round, segment and board number (1..n, not physical board number) * produces SQL that UPDATES scores table (so scores needs to have rows) * does not support ARB/Axx scores * probably won't support wrong lines --- scrape-boards.py | 51 ++++++++++++++++++++++++++++++++++++ scrape-scores.py | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scrape.sh | 13 ++++++++++ 3 files changed, 142 insertions(+) create mode 100644 scrape-boards.py create mode 100644 scrape-scores.py create mode 100644 scrape.sh diff --git a/scrape-boards.py b/scrape-boards.py new file mode 100644 index 0000000..f9e02cd --- /dev/null +++ b/scrape-boards.py @@ -0,0 +1,51 @@ +from bs4 import BeautifulSoup as bs +import bs4 +import os +import sys + +traveller_file = file(sys.argv[1]) +traveller = bs(traveller_file, 'lxml') + +print '% PBN 1.0' +print '[Generator "JFRTeamy-restorerer"]' +print '[Event "%s"]' % (traveller_file.name) + +board_links = traveller.select('td.bdcc a.zb') +for board_link in board_links: + if board_link.has_attr('href'): + board_number = board_link.text.strip() + board_file = open( + os.path.join( + os.path.dirname(traveller_file.name), + board_link['href'] + ) + ) + board = bs(board_file, 'lxml') + conditions = [ + c for c in + board.select('td[valign="top"] h4')[0].contents + if type(c) == bs4.element.NavigableString + ] + dealer = conditions[0] + vulnerability = conditions[1].title() + if len(vulnerability) < 3: + vulnerability = vulnerability.upper() + card_cells = board.select('td.w') + if len(card_cells) == 4: + cards = [ + [ + line.replace('10', 'T').replace(' ', '').strip() + for line in c + if type(line) == bs4.element.NavigableString + ][1::2] for c in card_cells + ] + print '[Board "%s"]' % board_number + print '[Dealer "%s"]' % dealer + print '[Vulnerable "%s"]' % vulnerability + print '[Deal "N:%s %s %s %s"]' % ( + '.'.join(cards[0]), + '.'.join(cards[2]), + '.'.join(cards[3]), + '.'.join(cards[1]) + ) + print diff --git a/scrape-scores.py b/scrape-scores.py new file mode 100644 index 0000000..05517e6 --- /dev/null +++ b/scrape-scores.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +from bs4 import BeautifulSoup as bs +import bs4 +import re +import sys + +input_file = sys.argv[1] +rnd = int(sys.argv[2]) +segment = int(sys.argv[3]) +board = int(sys.argv[4]) + +content = bs(file(input_file), 'lxml') +rows = content.select('tr') + +scores = [] +points = [] + +for row in rows: + cells = row.select('td.bdc') + if len(cells) > 0: + score = '' + cells = cells[0:-1] + for cell in cells: + for element in cell.contents: + if type(element) is bs4.element.Tag: + score += element['alt'] + else: + score += element.replace('×'.decode('utf8'), 'x') + scores.append(score) + point_cells = row.select('td.zno') + row.select('td.zeo') + points.append((point_cells)[0].text if len(point_cells) > 0 else '0') + + +sorted_scores = scores[0::2] + scores[1::2] +sorted_points = points[0::2] + points[1::2] + +score_regex = re.compile(r'^([1-7])([cdhns])(x?)([ENSW])([cdhs]([AKQJ2-9]|10))(=|(-|\+)[1-7])') + +for room in [1, 2]: + for tabl in range(1, 6): + score = sorted_scores[(room - 1) * 5 + tabl - 1] + point_result = sorted_points[(room - 1) * 5 + tabl - 1] + parsed_score = re.match(score_regex, score) + if parsed_score: + contract = ('%s %s %s' % ( + parsed_score.group(1), + parsed_score.group(2).replace('n', 'nt').upper(), + parsed_score.group(3) + )).strip() + declarer = parsed_score.group(4) + lead = parsed_score.group(5).upper() + result = parsed_score.group(7) + result = 0 if result == '=' else int(result) + print ('UPDATE scores SET ' \ + + 'declarer = "' + declarer + '", ' \ + + 'contract = "' + contract + '", ' \ + + 'result = ' + str(result) + ', ' \ + + 'score = ' + point_result + ', ' \ + + 'lead = "' + lead + '" WHERE ' \ + + 'rnd = ' + str(rnd) + ' AND ' \ + + 'segment = ' + str(segment) + ' AND ' \ + + 'room = ' + str(room) + ' AND ' \ + + 'tabl = ' + str(tabl) + ' AND ' \ + + 'board = ' + str(board) + ';').encode('utf8') + elif score[0:3] == 'PAS': + print ('UPDATE scores SET ' \ + + 'declarer = NULL, ' \ + + 'contract = "PASS", ' \ + + 'result = NULL, ' \ + + 'score = 0, ' \ + + 'lead = NULL WHERE ' \ + + 'rnd = ' + str(rnd) + ' AND ' \ + + 'segment = ' + str(segment) + ' AND ' \ + + 'room = ' + str(room) + ' AND ' \ + + 'tabl = ' + str(tabl) + ' AND ' \ + + 'board = ' + str(board) + ';').encode('utf8') + else: + print '-- ->' + score.encode('utf8') diff --git a/scrape.sh b/scrape.sh new file mode 100644 index 0000000..489ecf4 --- /dev/null +++ b/scrape.sh @@ -0,0 +1,13 @@ +rm scores.sql +rm *.pbn +for RND in {1..3} +do + for SEGMENT in {1..2} + do + for BOARD in {1..8} + do + python scrape-scores.py ../ivld_www/ivld_rr1_${RND}b-$((($SEGMENT - 1) * 8 + $BOARD)).html $RND $SEGMENT $BOARD >> scores.sql + done + python scrape-boards.py ../ivld_www/ivld_rr1_${RND}t1-${SEGMENT}.html > ${RND}-${SEGMENT}.pbn + done +done -- cgit v1.2.3