diff options
author | emkael <emkael@tlen.pl> | 2022-04-04 01:24:36 +0200 |
---|---|---|
committer | emkael <emkael@tlen.pl> | 2022-04-04 01:27:46 +0200 |
commit | cd418f19e42c946c7216005d3dce97c545d120fc (patch) | |
tree | 23a6c01bca7b96e406844fdd848ae10300781a75 /boards/scrapers/jfrteamy/scrape.sh | |
parent | 0ebeac6121c10b41ac6060f06c6de50e49f929dd (diff) |
JFR Teamy board scraper
Diffstat (limited to 'boards/scrapers/jfrteamy/scrape.sh')
-rwxr-xr-x | boards/scrapers/jfrteamy/scrape.sh | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/boards/scrapers/jfrteamy/scrape.sh b/boards/scrapers/jfrteamy/scrape.sh new file mode 100755 index 0000000..1ab16e1 --- /dev/null +++ b/boards/scrapers/jfrteamy/scrape.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# usage: ./scrape.sh URL ROUND_FROM ROUND_TO SEGMENT_FROM SEGMENT_TO +# URL should be *rundaX.html or *leadeb.html full URL +# script scrapes only single-hand-record protocols: non-TDD files or TDD-files in ukrywacz-only mode (with a single board layout) + +set -u +shopt -s extglob + +CURRDIR=$(pwd) + +cd $(dirname $0) + +URL=$1 +URLDIR=${URL%/*} +URLPATH=${URL##*/} + +PREFIX=${URLPATH%.html*} +PREFIX=${PREFIX%%+([[:digit:]])} +PREFIX=${PREFIX%runda*} +PREFIX=${PREFIX%leaderb*} + +mkdir -p tmp +cd tmp + +for RND in $(seq $2 $3) +do + for SEGMENT in $(seq $4 $5) + do + SEGMENTPATH="${PREFIX}${RND}t1-${SEGMENT}.html" + if [ ! -f "$SEGMENTPATH" ] + then + curl -s "${URLDIR}/${SEGMENTPATH}" > "$SEGMENTPATH" + fi + for BOARD in {1..12} + do + BOARDPATH="${PREFIX}${RND}b-$(( (SEGMENT-1) * 12 + BOARD)).html" + if [ ! -f "$BOARDPATH" ] + then + curl -s "${URLDIR}/${BOARDPATH}" > "$BOARDPATH" + fi + done + python3 ../scrape-boards.py $SEGMENTPATH > ${CURRDIR}/${PREFIX}_${RND}-${SEGMENT}.pbn + done +done |