summaryrefslogtreecommitdiff
path: root/boards/scrapers
diff options
context:
space:
mode:
Diffstat (limited to 'boards/scrapers')
-rw-r--r--boards/scrapers/jfrteamy/.gitignore3
-rw-r--r--boards/scrapers/jfrteamy/requirements.txt2
-rw-r--r--boards/scrapers/jfrteamy/scrape-boards.py51
-rwxr-xr-xboards/scrapers/jfrteamy/scrape.sh45
4 files changed, 101 insertions, 0 deletions
diff --git a/boards/scrapers/jfrteamy/.gitignore b/boards/scrapers/jfrteamy/.gitignore
new file mode 100644
index 0000000..b983d81
--- /dev/null
+++ b/boards/scrapers/jfrteamy/.gitignore
@@ -0,0 +1,3 @@
+*.html
+*.htm
+*.pbn
diff --git a/boards/scrapers/jfrteamy/requirements.txt b/boards/scrapers/jfrteamy/requirements.txt
new file mode 100644
index 0000000..83780ba
--- /dev/null
+++ b/boards/scrapers/jfrteamy/requirements.txt
@@ -0,0 +1,2 @@
+beautifulsoup4==4.6.0
+lxml
diff --git a/boards/scrapers/jfrteamy/scrape-boards.py b/boards/scrapers/jfrteamy/scrape-boards.py
new file mode 100644
index 0000000..b3f806a
--- /dev/null
+++ b/boards/scrapers/jfrteamy/scrape-boards.py
@@ -0,0 +1,51 @@
+from bs4 import BeautifulSoup as bs
+import bs4
+import os
+import sys
+
+traveller_file = open(sys.argv[1], encoding='utf8')
+traveller = bs(traveller_file, 'lxml')
+
+print('% PBN 1.0')
+print('[Generator "JFRTeamy-restorerer"]')
+print('[Event "%s"]' % (traveller_file.name))
+
+board_links = traveller.select('td.bdcc a.zb')
+for board_link in board_links:
+ if board_link.has_attr('href'):
+ board_number = board_link.text.strip()
+ dealer = ['W', 'N', 'E', 'S'][int(board_number) % 4]
+ with open(
+ os.path.join(
+ os.path.dirname(traveller_file.name),
+ board_link['href']
+ ), encoding='utf8'
+ ) as board_file:
+ board = bs(board_file, 'lxml')
+ card_cells = board.select('td.w') # ordinary JFR
+ if len(card_cells) != 4: # ukrywacz'ed JFR (TDD with a single table)
+ cell_brs = board.select('td br')
+ for br in cell_brs:
+ cell = br.parent
+ if cell.name == 'td' and cell not in card_cells:
+ card_cells.append(cell)
+ card_strings = [
+ [
+ line.replace('10', 'T').replace(' ', '').strip()
+ for line in c
+ if type(line) == bs4.element.NavigableString
+ ] for c in card_cells
+ ]
+ # ordinary JFR has 8 strings per cell, TDD makes it 4
+ cards = [c if len(c) == 4 else c[1::2] for c in card_strings]
+ print('[Board "%s"]' % board_number)
+ print('[Dealer "%s"]' % dealer)
+ print('[Deal "N:%s %s %s %s"]' % (
+ '.'.join(cards[0]),
+ '.'.join(cards[2]),
+ '.'.join(cards[3]),
+ '.'.join(cards[1])
+ ))
+ print()
+
+traveller_file.close()
diff --git a/boards/scrapers/jfrteamy/scrape.sh b/boards/scrapers/jfrteamy/scrape.sh
new file mode 100755
index 0000000..1ab16e1
--- /dev/null
+++ b/boards/scrapers/jfrteamy/scrape.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+# usage: ./scrape.sh URL ROUND_FROM ROUND_TO SEGMENT_FROM SEGMENT_TO
+# URL should be *rundaX.html or *leadeb.html full URL
+# script scrapes only single-hand-record protocols: non-TDD files or TDD-files in ukrywacz-only mode (with a single board layout)
+
+set -u
+shopt -s extglob
+
+CURRDIR=$(pwd)
+
+cd $(dirname $0)
+
+URL=$1
+URLDIR=${URL%/*}
+URLPATH=${URL##*/}
+
+PREFIX=${URLPATH%.html*}
+PREFIX=${PREFIX%%+([[:digit:]])}
+PREFIX=${PREFIX%runda*}
+PREFIX=${PREFIX%leaderb*}
+
+mkdir -p tmp
+cd tmp
+
+for RND in $(seq $2 $3)
+do
+ for SEGMENT in $(seq $4 $5)
+ do
+ SEGMENTPATH="${PREFIX}${RND}t1-${SEGMENT}.html"
+ if [ ! -f "$SEGMENTPATH" ]
+ then
+ curl -s "${URLDIR}/${SEGMENTPATH}" > "$SEGMENTPATH"
+ fi
+ for BOARD in {1..12}
+ do
+ BOARDPATH="${PREFIX}${RND}b-$(( (SEGMENT-1) * 12 + BOARD)).html"
+ if [ ! -f "$BOARDPATH" ]
+ then
+ curl -s "${URLDIR}/${BOARDPATH}" > "$BOARDPATH"
+ fi
+ done
+ python3 ../scrape-boards.py $SEGMENTPATH > ${CURRDIR}/${PREFIX}_${RND}-${SEGMENT}.pbn
+ done
+done