summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoremkael <emkael@tlen.pl>2017-11-22 14:09:17 +0100
committeremkael <emkael@tlen.pl>2017-11-22 14:09:17 +0100
commit3e9bdf760c812f7e2a539144ebbe39a5d4bc730f (patch)
tree142ebfd95fc46fa1f60a3554c44b68d62ff59c26
Initial commit.
Board scraper: * takes traveller file * complete produces PBN for all boards in segment Scores scraper: * takes board file, round, segment and board number (1..n, not physical board number) * produces SQL that UPDATES scores table (so scores needs to have rows) * does not support ARB/Axx scores * probably won't support wrong lines
-rw-r--r--scrape-boards.py51
-rw-r--r--scrape-scores.py78
-rw-r--r--scrape.sh13
3 files changed, 142 insertions, 0 deletions
diff --git a/scrape-boards.py b/scrape-boards.py
new file mode 100644
index 0000000..f9e02cd
--- /dev/null
+++ b/scrape-boards.py
@@ -0,0 +1,51 @@
+from bs4 import BeautifulSoup as bs
+import bs4
+import os
+import sys
+
+traveller_file = file(sys.argv[1])
+traveller = bs(traveller_file, 'lxml')
+
+print '% PBN 1.0'
+print '[Generator "JFRTeamy-restorerer"]'
+print '[Event "%s"]' % (traveller_file.name)
+
+board_links = traveller.select('td.bdcc a.zb')
+for board_link in board_links:
+ if board_link.has_attr('href'):
+ board_number = board_link.text.strip()
+ board_file = open(
+ os.path.join(
+ os.path.dirname(traveller_file.name),
+ board_link['href']
+ )
+ )
+ board = bs(board_file, 'lxml')
+ conditions = [
+ c for c in
+ board.select('td[valign="top"] h4')[0].contents
+ if type(c) == bs4.element.NavigableString
+ ]
+ dealer = conditions[0]
+ vulnerability = conditions[1].title()
+ if len(vulnerability) < 3:
+ vulnerability = vulnerability.upper()
+ card_cells = board.select('td.w')
+ if len(card_cells) == 4:
+ cards = [
+ [
+ line.replace('10', 'T').replace(' ', '').strip()
+ for line in c
+ if type(line) == bs4.element.NavigableString
+ ][1::2] for c in card_cells
+ ]
+ print '[Board "%s"]' % board_number
+ print '[Dealer "%s"]' % dealer
+ print '[Vulnerable "%s"]' % vulnerability
+ print '[Deal "N:%s %s %s %s"]' % (
+ '.'.join(cards[0]),
+ '.'.join(cards[2]),
+ '.'.join(cards[3]),
+ '.'.join(cards[1])
+ )
+ print
diff --git a/scrape-scores.py b/scrape-scores.py
new file mode 100644
index 0000000..05517e6
--- /dev/null
+++ b/scrape-scores.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+from bs4 import BeautifulSoup as bs
+import bs4
+import re
+import sys
+
+input_file = sys.argv[1]
+rnd = int(sys.argv[2])
+segment = int(sys.argv[3])
+board = int(sys.argv[4])
+
+content = bs(file(input_file), 'lxml')
+rows = content.select('tr')
+
+scores = []
+points = []
+
+for row in rows:
+ cells = row.select('td.bdc')
+ if len(cells) > 0:
+ score = ''
+ cells = cells[0:-1]
+ for cell in cells:
+ for element in cell.contents:
+ if type(element) is bs4.element.Tag:
+ score += element['alt']
+ else:
+ score += element.replace('×'.decode('utf8'), 'x')
+ scores.append(score)
+ point_cells = row.select('td.zno') + row.select('td.zeo')
+ points.append((point_cells)[0].text if len(point_cells) > 0 else '0')
+
+
+sorted_scores = scores[0::2] + scores[1::2]
+sorted_points = points[0::2] + points[1::2]
+
+score_regex = re.compile(r'^([1-7])([cdhns])(x?)([ENSW])([cdhs]([AKQJ2-9]|10))(=|(-|\+)[1-7])')
+
+for room in [1, 2]:
+ for tabl in range(1, 6):
+ score = sorted_scores[(room - 1) * 5 + tabl - 1]
+ point_result = sorted_points[(room - 1) * 5 + tabl - 1]
+ parsed_score = re.match(score_regex, score)
+ if parsed_score:
+ contract = ('%s %s %s' % (
+ parsed_score.group(1),
+ parsed_score.group(2).replace('n', 'nt').upper(),
+ parsed_score.group(3)
+ )).strip()
+ declarer = parsed_score.group(4)
+ lead = parsed_score.group(5).upper()
+ result = parsed_score.group(7)
+ result = 0 if result == '=' else int(result)
+ print ('UPDATE scores SET ' \
+ + 'declarer = "' + declarer + '", ' \
+ + 'contract = "' + contract + '", ' \
+ + 'result = ' + str(result) + ', ' \
+ + 'score = ' + point_result + ', ' \
+ + 'lead = "' + lead + '" WHERE ' \
+ + 'rnd = ' + str(rnd) + ' AND ' \
+ + 'segment = ' + str(segment) + ' AND ' \
+ + 'room = ' + str(room) + ' AND ' \
+ + 'tabl = ' + str(tabl) + ' AND ' \
+ + 'board = ' + str(board) + ';').encode('utf8')
+ elif score[0:3] == 'PAS':
+ print ('UPDATE scores SET ' \
+ + 'declarer = NULL, ' \
+ + 'contract = "PASS", ' \
+ + 'result = NULL, ' \
+ + 'score = 0, ' \
+ + 'lead = NULL WHERE ' \
+ + 'rnd = ' + str(rnd) + ' AND ' \
+ + 'segment = ' + str(segment) + ' AND ' \
+ + 'room = ' + str(room) + ' AND ' \
+ + 'tabl = ' + str(tabl) + ' AND ' \
+ + 'board = ' + str(board) + ';').encode('utf8')
+ else:
+ print '-- ->' + score.encode('utf8')
diff --git a/scrape.sh b/scrape.sh
new file mode 100644
index 0000000..489ecf4
--- /dev/null
+++ b/scrape.sh
@@ -0,0 +1,13 @@
+rm scores.sql
+rm *.pbn
+for RND in {1..3}
+do
+ for SEGMENT in {1..2}
+ do
+ for BOARD in {1..8}
+ do
+ python scrape-scores.py ../ivld_www/ivld_rr1_${RND}b-$((($SEGMENT - 1) * 8 + $BOARD)).html $RND $SEGMENT $BOARD >> scores.sql
+ done
+ python scrape-boards.py ../ivld_www/ivld_rr1_${RND}t1-${SEGMENT}.html > ${RND}-${SEGMENT}.pbn
+ done
+done