#!/bin/bash # usage: ./scrape.sh URL ROUND_FROM ROUND_TO SEGMENT_FROM SEGMENT_TO # URL should be *rundaX.html or *leadeb.html full URL # script scrapes only single-hand-record protocols: non-TDD files or TDD-files in ukrywacz-only mode (with a single board layout) set -u shopt -s extglob CURRDIR=$(pwd) cd $(dirname $0) URL=$1 URLDIR=${URL%/*} URLPATH=${URL##*/} PREFIX=${URLPATH%.html*} PREFIX=${PREFIX%%+([[:digit:]])} PREFIX=${PREFIX%runda*} PREFIX=${PREFIX%leaderb*} mkdir -p tmp cd tmp for RND in $(seq $2 $3) do for SEGMENT in $(seq $4 $5) do SEGMENTPATH="${PREFIX}${RND}t1-${SEGMENT}.html" if [ ! -f "$SEGMENTPATH" ] then curl -s "${URLDIR}/${SEGMENTPATH}" > "$SEGMENTPATH" fi for BOARD in {1..12} do BOARDPATH="${PREFIX}${RND}b-$(( (SEGMENT-1) * 12 + BOARD)).html" if [ ! -f "$BOARDPATH" ] then curl -s "${URLDIR}/${BOARDPATH}" > "$BOARDPATH" fi done python3 ../scrape-boards.py $SEGMENTPATH > ${CURRDIR}/${PREFIX}_${RND}-${SEGMENT}.pbn done done