import sys
import glob
import re
import math
import copy
from os import path
from bs4 import BeautifulSoup as bs4
from bs4.element import NavigableString
class JFRVirtualTable:
def __parse_filepaths(self, prefix):
file_path = path.realpath(prefix + '.html')
tournament_path = path.dirname(file_path)
tournament_prefix = path.splitext(path.basename(file_path))[0]
# RegEx matching traveller files for each board
traveller_files_match = re.compile(
re.escape(tournament_prefix) + '([0-9]{3})\.txt'
)
# converts {prefix}{anything}.{ext} filename to full path
def get_path(relative_path):
return path.join(tournament_path, relative_path)
# filtering out traveller files from all TXT files
self.__traveller_files = [f for f
in glob.glob(
get_path(tournament_prefix + '*.txt'))
if re.search(traveller_files_match, f)]
# RegEx for matching pair record files
records_files_match = re.compile(
'H-' + tournament_prefix + '-([0-9]{1,3})\.html')
self.__pair_records_files = [
f for f
in glob.glob(get_path('H-' + tournament_prefix + '*.html'))
if re.search(records_files_match, f)
]
# short rersult list, from side frame
self.__results_file = get_path(tournament_prefix + 'WYN.txt')
# full results page
self.__full_results_file = get_path('W-' + tournament_prefix + '.html')
# list of pair records links page
self.__pair_records_list_file = get_path(
'H-' + tournament_prefix + '-lista.html')
# collected scores page
self.__collected_scores_file = get_path(
tournament_prefix + 'zbior.html')
# auto-detect virtual pairs by their record file header
def __detect_virtual_pairs(self):
virtual_pairs = []
# RegEx for matching pair number and names in pair record header
pair_header_match = re.compile('([0-9]{1,}): (.*) - (.*), .*')
for record_file_path in self.__pair_records_files:
with file(record_file_path) as record_file:
record = bs4(record_file)
# first
with content matching
# pair header is what we're after
header = [con for con
in record.select('td.o1')[0].contents
if type(con) is NavigableString and re.match(
pair_header_match, con)]
if len(header):
header_match = re.match(pair_header_match, header[0])
pair_number = int(header_match.group(1))
names = filter(len,
[header_match.group(2),
header_match.group(3)])
# virtual pair does not have any names filled
if len(names) == 0:
virtual_pairs.append(pair_number)
return sorted(virtual_pairs)
# wrapper for DOM manipulation
# wraps the inner function into BS4 invokation and file overwrite
def __fix_file(worker):
def file_wrapper(self, file_path, encoding='utf-8'):
with file(file_path, 'r+') as content_file:
content = bs4(content_file, from_encoding=encoding)
content = worker(self, content)
content_file.seek(0)
content_file.write(
content.prettify(encoding, formatter='html')
)
content_file.truncate()
return file_wrapper
# fix simple results list by removing virtual pair rows
@__fix_file
def __fix_results(self, content):
rows = content.select('tr')
for row in rows:
cells = row.select('td')
# 6 or more cells in a "proper" result row
# (may contain carry over or penalties)
if len(cells) >= 6:
try:
# third cell in the row is pair number
if int(cells[2].contents[0]) in self.__virtual_pairs:
row.extract()
except ValueError:
pass
return content.table
# fix full results file by removing virtual pair rows
@__fix_file
def __fix_full_results(self, content):
rows = content.select('tr')
for row in rows:
# select rows by cells containing pair records links
cell_links = [link for link
in row.select('td a')
if link.has_attr('href') and
link['href'].startswith('H-') and
not link['href'].endswith('lista.html')]
# remove these containing links to virtual pairs
if len(cell_links):
if int(cell_links[0].contents[0]) in self.__virtual_pairs:
row.extract()
return content
# fix the page with pair records links list
@__fix_file
def __fix_records_list(self, content):
# read the original column count
row_cell_count = int(content.table.select('tr td.o')[0]['colspan'])
rows = content.select('tr')
# gather rows which containted any links
link_rows = []
# gather cells which should stay
link_cells = []
for row in rows:
cells = row.select('td.u')
cells_found = False
for cell in cells:
# select cells by pair records links inside
cell_links = [link for link
in cell.select('a.pa')
if link.has_attr('href') and
link['href'].startswith('H-') and
not link['href'].endswith('lista.html')]
if len(cell_links):
# delete virtual pair cells
if int(cell_links[0].contents[0]) in self.__virtual_pairs:
cell.extract()
# store actual pair cells
else:
link_cells.append(cell)
cells_found = True
# gather processed rows
if cells_found:
link_rows.append(row)
# detach actual pair cells from the tree
cells = map(lambda cell: cell.extract(), link_cells)
for row in link_rows:
row.extract()
# first filler cell of each new row
first_cell = content.new_tag('td', **{'class': 'n'})
first_cell.string = u'\xa0'
# arrange cells into rows, full rows first
while len(cells) >= row_cell_count:
new_row = content.new_tag('tr')
new_row.append(copy.copy(first_cell))
for cell in cells[0:row_cell_count]:
new_row.append(cell)
content.table.append(new_row)
del cells[0:row_cell_count]
# last row may or may not be full
last_row = content.new_tag('tr')
last_row.append(copy.copy(first_cell))
for cell in cells:
last_row.append(cell)
# if it wasn't full, fill it with a col-spanned last cell
if len(cells) < row_cell_count:
last_cell = content.new_tag('td',
colspan=row_cell_count-len(cells))
last_cell.string = u'\xa0'
last_row.append(last_cell)
content.table.append(last_row)
return content
# fix collected scores tables by removing virtual pair rows
@__fix_file
def __fix_collected(self, content):
rows = content.select('tr')
for row in rows:
cells = row.select('td')
# "proper" rows should have 7 cells
if len(cells) == 7:
# ignore cells without proper pair numbers
try:
if int(cells[1].contents[0]) in self.__virtual_pairs:
if int(cells[2].contents[0]) in self.__virtual_pairs:
row.extract()
except ValueError:
pass
# there are some clearly broken table cells, just throw them away
if len(cells) == 1 and cells[0]['colspan'] == '7':
if cells[0].contents[0] == ' ':
row.extract()
return content
# fix board travellers, removing virtual tables and leaving one, annotated
@__fix_file
def __fix_traveller(self, content):
# this should only happen if the traveller wasn't already processed
# as it's the only operaton that may yield any results on second run
# and it might break stuff
if not len(content.select('tr.virtualTable')):
# looking for all the rows with more than 2 cells
rows = [row for row
in content.select('tr')
if len(row.select('td')) >= 3]
# only the first "virtual" row needs to be prefixed with a header
header_added = False
virtual_row = None
for row in rows:
cells = row.select('td')
# we're already added a header, meaning we're below the first
# virtual table, we need to move the row above it
# or remove it entirely
if header_added:
row_below = row.extract()
# only move it if it has meaningful information (10 cells)
if len(cells) >= 10:
virtual_row.insert_before(row_below)
# we're looking for a "proper" row, with at least 10 cells
if len(cells) >= 10:
# and with both pair numbers virtual
if int(cells[1].contents[0]) in self.__virtual_pairs:
if int(cells[2].contents[0]) in self.__virtual_pairs:
# if we're already processed the first one,
# just drop subsequent virtual tables
if header_added:
row.extract()
# it's the first virtual table
# prefix it with a header
else:
virtual_row = content.new_tag(
'tr',
**{'class': 'virtualTable'})
virtual_row.append(
content.new_tag('td', **{'class': 'n'}))
virtual_row_header = content.new_tag(
'td',
colspan=10, **{'class': 'noc'})
virtual_row_header.string = self.__header_text
virtual_row.append(virtual_row_header)
row.insert_before(virtual_row)
# clear pair numbers
for cell in cells[1:3]:
cell.contents = ''
header_added = True
return content.table
__traveller_files = []
__pair_records_files = []
__results_file = None
__full_results_file = None
__pair_records_list_file = None
__collected_scores_file = None
# text for traveller header row
__header_text = ''
def __init__(self, path_prefix, virtual_pairs=None, header_text=''):
self.__parse_filepaths(path_prefix)
if virtual_pairs is None or len(virtual_pairs) == 0:
virtual_pairs = self.__detect_virtual_pairs()
self.__virtual_pairs = virtual_pairs
self.__header_text = header_text
def fix_results(self):
self.__fix_results(self.__results_file)
def fix_full_results(self):
self.__fix_full_results(self.__full_results_file)
def fix_collected_scores(self):
self.__fix_collected(self.__collected_scores_file)
def fix_records_list(self):
self.__fix_records_list(self.__pair_records_list_file)
def fix_travellers(self):
for traveller_file in self.__traveller_files:
self.__fix_traveller(traveller_file, encoding='iso-8859-2')
if __name__ == '__main__':
import argparse
argument_parser = argparse.ArgumentParser(
description='Fix display for virtual tables in JFR Pary result pages')
argument_parser.add_argument('path', metavar='PATH',
help='tournament path with JFR prefix')
argument_parser.add_argument('--text', metavar='HEADER',
default='Wirtualny stolik:',
help='traveller header for virtual score')
argument_parser.add_argument('pairs', metavar='PAIR_NO', nargs='*',
type=int, help='virtual pair numbers')
arguments = argument_parser.parse_args()
table_parser = JFRVirtualTable(
path_prefix=arguments.path,
virtual_pairs=arguments.pairs,
header_text=arguments.text)
table_parser.fix_results()
table_parser.fix_full_results()
table_parser.fix_collected_scores()
table_parser.fix_records_list()
table_parser.fix_travellers()
|