diff options
author | emkael <emkael@tlen.pl> | 2015-08-27 17:23:08 +0200 |
---|---|---|
committer | emkael <emkael@tlen.pl> | 2015-08-27 17:23:08 +0200 |
commit | fc137e95b3e2a1323262619a8cd18467b5f2fc1a (patch) | |
tree | 63d3fd580fca19fe59f052985e0f2a017c707d69 /virtual_table.py | |
parent | 3b46ca2c10088def567cbf4d2271c9d2ae6847ff (diff) |
* major refactoring
* additional files (pair records list, full results) processed
Diffstat (limited to 'virtual_table.py')
-rw-r--r-- | virtual_table.py | 249 |
1 files changed, 190 insertions, 59 deletions
diff --git a/virtual_table.py b/virtual_table.py index dc0f2a4..b910705 100644 --- a/virtual_table.py +++ b/virtual_table.py @@ -1,67 +1,198 @@ -import sys, glob, re, math +# coding=utf-8 + +import sys +import glob +import re +import math + from os import path from bs4 import BeautifulSoup as bs4 +from bs4.element import NavigableString + +class JFRVirtualTable: + + def __parse_filepaths(self, prefix): + file_path = path.realpath(prefix + '.html') + tournament_path = path.dirname(file_path) + tournament_prefix = path.splitext(path.basename(file_path))[0] + traveller_files_match = re.compile( + re.escape(tournament_prefix) + '([0-9]{3})\.txt' + ) + self.__traveller_files = [ + f for f + in glob.glob( + path.join(tournament_path, tournament_prefix) + '*.txt' + ) + if re.search(traveller_files_match, f) + ] + records_files_match = re.compile( + 'H-' + tournament_prefix + '-([0-9]{1,3})\.html' + ) + self.__pair_records_files = [ + f for f + in glob.glob( + path.join(tournament_path, 'H-' + tournament_prefix) + '*.html' + ) + if re.search(records_files_match, f) + ] + self.__results_file = path.join( + tournament_path, + tournament_prefix + 'WYN.txt' + ) + self.__full_results_file = path.join( + tournament_path, + 'W-' + tournament_prefix + '.html' + ) + self.__pair_records_list_file = path.join( + tournament_path, + 'H-' + tournament_prefix + '-lista.html' + ) + self.__collected_scores_file = path.join( + tournament_path, + tournament_prefix + 'zbior.html' + ) -tournament_path_prefix = sys.argv[1] + '.html' + def __detect_virtual_pairs(self): + virtual_pairs = [] + pair_header_match = re.compile('([0-9]{1,}): (.*) - (.*), .*') + for record_file_path in self.__pair_records_files: + with file(record_file_path) as record_file: + record = bs4(record_file) + header = [ + con for con in record.select('td.o1')[0].contents + if type(con) is NavigableString and re.match(pair_header_match, con) + ] + if len(header): + header_match = re.match(pair_header_match, header[0]) + pair_number = int(header_match.group(1)) + names = filter( + len, + [header_match.group(2), header_match.group(3)] + ) + if len(names) == 0: + virtual_pairs.append(pair_number) + return sorted(virtual_pairs) -output_path = path.dirname(tournament_path_prefix) -tournament_prefix = path.splitext(path.realpath(tournament_path_prefix))[0] + def __fix_file(worker): + def file_wrapper(self, file_path, encoding='utf-8'): + with file(file_path, 'r+') as content_file: + content = bs4(content_file, from_encoding=encoding) + content = worker(self, content) + content_file.seek(0) + content_file.write(content.prettify(encoding, formatter='html')) + content_file.truncate() + return file_wrapper -tournament_files_match = re.compile(re.escape(tournament_prefix) + '([0-9]{3})\.txt') -tournament_files = [f for f in glob.glob(tournament_prefix + '*.txt') if re.search(tournament_files_match, f)] + @__fix_file + def __fix_results(self, content): + rows = content.select('tr') + for row in rows: + cells = row.select('td') + if len(cells) >= 6: + try: + if int(cells[2].contents[0]) in self.__virtual_pairs: + row.extract() + except ValueError: + pass + return content.table -virtual_pairs = [] + @__fix_file + def __fix_full_results(self, content): + rows = content.select('tr') + for row in rows: + cell_links = [ + link for link + in row.select('td a') + if link['href'].startswith('H-') and not link['href'].endswith('lista.html') + ] + if len(cell_links) and int(cell_links[0].contents[0]) in self.__virtual_pairs: + row.extract() + return content -for tournament_file in tournament_files: - with file(tournament_file, 'r+') as board_text: - board_text_content = bs4(board_text, from_encoding='iso-8859-2') - if not len(board_text_content.select('tr.virtualTable')): - rows = [row for row in board_text_content.select('tr') if len(row.select('td')) == 11] - virtual_count = int(math.ceil(len(rows) / 2.0)) + @__fix_file + def __fix_records_list(self, content): + # TODO: ułożyć ponownie komórki w tabelce + cells = content.select('td.u') + for cell in cells: + cell_links = [ + link for link + in cell.select('a.pa') + if link['href'].startswith('H-') and not link['href'].endswith('lista.html') + ] + if len(cell_links) and int(cell_links[0].contents[0]) in self.__virtual_pairs: + cell.extract() + return content + + @__fix_file + def __fix_collected(self, content): + rows = content.select('tr') + for row in rows: + cells = row.select('td') + if len(cells) == 7: + try: + if int(cells[1].contents[0]) in self.__virtual_pairs and int(cells[2].contents[0]) in self.__virtual_pairs: + row.extract() + except ValueError: + pass + if len(cells) == 1 and cells[0]['colspan'] == '7' and cells[0].contents[0] == ' ': + row.extract() + return content + + @__fix_file + def __fix_traveller(self, content): + if not len(content.select('tr.virtualTable')): + rows = [row for row in content.select('tr') if len(row.select('td')) >= 10] header_added = False - for row in rows[virtual_count-1:]: - for cell in row.select('td')[1:3]: - pair_no = int(cell.contents[0]) - virtual_pairs.append(pair_no) - if header_added: - row.extract() - else: - row.insert_before(bs4('<tr class="virtualTable"><td class="n"></td><td class="noc" colspan="10">Wirtualny stolik:</td></tr>')) - for cell in row.select('td')[1:3]: - cell.contents = '' - header_added = True - board_text.seek(0) - board_text.write(board_text_content.table.prettify('iso-8859-2', formatter='html')) - board_text.truncate() - -with file(tournament_prefix + 'WYN.txt', 'r+') as results_file: - results_text_content = bs4(results_file, from_encoding='iso-8859-2') - rows = results_text_content.select('tr') - for row in rows: - cells = row.select('td') - if len(cells) == 6: - try: - if int(cells[2].contents[0]) in virtual_pairs: - row.extract() - except ValueError: - pass - results_file.seek(0) - results_file.write(results_text_content.table.prettify('iso-8859-2')) - results_file.truncate() - -with file(tournament_prefix + 'zbior.html', 'r+') as results_file: - results_content = bs4(results_file) - rows = results_content.select('tr') - for row in rows: - cells = row.select('td') - if len(cells) == 7: - try: - if int(cells[1].contents[0]) in virtual_pairs: - row.extract() - except ValueError: - pass - if len(cells) == 1 and cells[0]['colspan'] == '7' and cells[0].contents[0] == ' ': - row.extract() - results_file.seek(0) - results_file.write(results_content.prettify('utf-8')) - results_file.truncate() + for row in rows: + cells = row.select('td') + if int(cells[1].contents[0]) in self.__virtual_pairs and int(cells[2].contents[0]) in self.__virtual_pairs: + if header_added: + row.extract() + else: + row.insert_before( + bs4('<tr class="virtualTable"><td class="n"></td><td class="noc" colspan="10">Wirtualny stolik:</td></tr>') + ) + for cell in cells[1:3]: + cell.contents = '' + header_added = True + # TODO: przesunąć wiersz wirtualnego stolika na dół tabeli + return content.table + + __traveller_files = [] + __pair_records_files = [] + __results_file = None + __full_results_file = None + __pair_records_list_file = None + __collected_scores_file = None + + def __init__(self, path_prefix, virtual_pairs=None): + self.__parse_filepaths(path_prefix) + if virtual_pairs is None: + virtual_pairs = self.__detect_virtual_pairs() + self.__virtual_pairs = virtual_pairs + + def fix_results(self): + self.__fix_results(self.__results_file) + + def fix_full_results(self): + self.__fix_full_results(self.__full_results_file) + + def fix_collected_scores(self): + self.__fix_collected(self.__collected_scores_file) + + def fix_records_list(self): + self.__fix_records_list(self.__pair_records_list_file) + + def fix_travellers(self): + for traveller_file in self.__traveller_files: + self.__fix_traveller(traveller_file, encoding='iso-8859-2') + +table_parser = JFRVirtualTable( + path_prefix=sys.argv[1], + virtual_pairs=map(int, sys.argv[2:]) if len(sys.argv) > 2 else None +) +table_parser.fix_results() +table_parser.fix_full_results() +table_parser.fix_collected_scores() +table_parser.fix_records_list() +table_parser.fix_travellers() |