diff options
author | emkael <emkael@tlen.pl> | 2015-09-27 00:23:59 +0200 |
---|---|---|
committer | emkael <emkael@tlen.pl> | 2015-09-27 00:23:59 +0200 |
commit | 58fd84b32d7456fc9aae32d2e9b6cb97b9aab7b2 (patch) | |
tree | fe347ca51803635bfedb88d09d9947332cba42cb | |
parent | ae9f87cd3deaf002d6cdd2f1199243ac6e0c8dd9 (diff) |
* 10.00/10 pylint compatibility
-rw-r--r-- | dist/virtual_table.exe | bin | 6040729 -> 6170134 bytes | |||
-rw-r--r-- | src/virtual_table.py | 258 |
2 files changed, 152 insertions, 106 deletions
diff --git a/dist/virtual_table.exe b/dist/virtual_table.exe Binary files differindex e331ca4..c78cf18 100644 --- a/dist/virtual_table.exe +++ b/dist/virtual_table.exe diff --git a/src/virtual_table.py b/src/virtual_table.py index fd43726..a397bf3 100644 --- a/src/virtual_table.py +++ b/src/virtual_table.py @@ -1,7 +1,10 @@ +""" Virtual tables for JFR Pary result pages. +Utility to clean up pages generated by JFR Pary from data on virtual tables. +""" + import sys import glob import re -import math import copy import logging as log @@ -12,9 +15,67 @@ from bs4.element import NavigableString __version__ = '1.0.1' -class JFRVirtualTable: +def bs4_fix_file(worker_method): + """ Decorator for manipulating files with BeautifulSoup4 + """ + def file_wrapper(self, file_path, encoding='utf-8'): + """ Wrapper for DOM manipulation. + Wraps the inner function into BS4 invokation and file overwrite. + """ + with file(file_path, 'r+') as content_file: + content = bs4(content_file, 'lxml', from_encoding=encoding) + content = worker_method(self, content) + content_file.seek(0) + content_file.write( + content.prettify(encoding, formatter='html')) + content_file.truncate() + return file_wrapper + +def fill_pair_list_table(cells, row_cell_count=20): + """ Formats cell list into well-formed rows, aligned by column count. + """ + content = bs4('<table />', 'lxml') + content.append(content.new_tag('table')) + # first filler cell of each new row + first_cell = content.new_tag('td', **{'class': 'n'}) + first_cell.string = u'\xa0' + # arrange cells into rows, full rows first + while len(cells) >= row_cell_count: + new_row = content.new_tag('tr') + new_row.append(copy.copy(first_cell)) + for cell in cells[0:row_cell_count]: + new_row.append(cell) + content.table.append(new_row) + log.getLogger('rec_list').debug('aligning cells %s to %s in a row', + cells[0].a.contents, + cells[row_cell_count-1].a.contents) + del cells[0:row_cell_count] + # last row may or may not be full + last_row = content.new_tag('tr') + last_row.append(copy.copy(first_cell)) + for cell in cells: + last_row.append(cell) + log.getLogger('rec_list').debug('leaving cells %s to %s in last row', + cells[0].a.contents, + cells[-1].a.contents) + # if it wasn't full, fill it with a col-spanned last cell + if len(cells) < row_cell_count: + last_cell = content.new_tag('td', + colspan=row_cell_count-len(cells)) + last_cell.string = u'\xa0' + last_row.append(last_cell) + log.getLogger('rec_list').debug('filling last row with: %s', + last_cell) + content.table.append(last_row) + return content.table.contents + +class JFRVirtualTable(object): + """ Virtual tables formatter (for JFR Pary pages) + """ def __parse_filepaths(self, prefix): + """ Detects all filepaths for results pages. + """ file_path = path.realpath(prefix) log.getLogger('paths').debug('realpath = %s', file_path) tournament_path = path.dirname(file_path) @@ -25,12 +86,14 @@ class JFRVirtualTable: # RegEx matching traveller files for each board traveller_files_match = re.compile( - re.escape(tournament_prefix) + '([0-9]{3})\.txt') + re.escape(tournament_prefix) + r'([0-9]{3})\.txt') log.getLogger('paths').debug('traveller files regex = %s', traveller_files_match.pattern) - # converts {prefix}{anything}.{ext} filename to full path def get_path(relative_path): + """ Compiles full path for tournament file from base name. + Converts {prefix}{anything}.{ext} filename to full path. + """ return path.join(tournament_path, relative_path) # filtering out traveller files from all TXT files @@ -45,7 +108,7 @@ class JFRVirtualTable: # RegEx for matching pair record files records_files_match = re.compile( - 'H-' + tournament_prefix + '-([0-9]{1,3})\.html') + 'H-' + tournament_prefix + r'-([0-9]{1,3})\.html') log.getLogger('paths').debug('records files regex = %s', records_files_match.pattern) self.__pair_records_files = [ @@ -59,26 +122,29 @@ class JFRVirtualTable: 'record files:\n' + '\n'.join(self.__pair_records_files)) # short rersult list, from side frame - self.__results_file = get_path(tournament_prefix + 'WYN.txt') + self.__single_files['results'] = get_path( + tournament_prefix + 'WYN.txt') log.getLogger('paths').info('generated results path = %s', - self.__results_file) + self.__single_files['results']) # full results page - self.__full_results_file = get_path('W-' + tournament_prefix + '.html') + self.__single_files['full_results'] = get_path( + 'W-' + tournament_prefix + '.html') log.getLogger('paths').info('generated full results path = %s', - self.__full_results_file) + self.__single_files['full_results']) # list of pair records links page - self.__pair_records_list_file = get_path( + self.__single_files['pair_records_list'] = get_path( 'H-' + tournament_prefix + '-lista.html') log.getLogger('paths').info('generated records list path = %s', - self.__pair_records_list_file) + self.__single_files['pair_records_list']) # collected scores page - self.__collected_scores_file = get_path( + self.__single_files['collected_scores'] = get_path( tournament_prefix + 'zbior.html') log.getLogger('paths').info('generated collected scores path = %s', - self.__collected_scores_file) + self.__single_files['collected_scores']) - # auto-detect virtual pairs by their record file header def __detect_virtual_pairs(self): + """ Auto-detect virtual pairs by their record file header. + """ virtual_pairs = [] # RegEx for matching pair number and names in pair record header pair_header_match = re.compile('([0-9]{1,}): (.*) - (.*), .*') @@ -91,15 +157,15 @@ class JFRVirtualTable: # pair header is what we're after header = [con for con in record.select('td.o1')[0].contents - if type(con) is NavigableString and re.search( - pair_header_match, con)] + if isinstance(con, NavigableString) and re.search( + pair_header_match, con)] log.getLogger('detect').debug('detected header: %s', header) if len(header): header_match = re.match(pair_header_match, header[0]) pair_number = int(header_match.group(1)) - names = filter(len, - [header_match.group(2).strip(), - header_match.group(3).strip()]) + names = [name for name in [header_match.group(2).strip(), + header_match.group(3).strip()] + if len(name)] log.getLogger('detect').debug('parsed header: %d, %s', pair_number, names) # virtual pair does not have any names filled @@ -110,25 +176,13 @@ class JFRVirtualTable: else: log.getLogger('detect').info('virtual pairs: %s', ' '.join(sorted( - map(str, virtual_pairs)))) + [str(pair) for pair in virtual_pairs]))) return sorted(virtual_pairs) - # wrapper for DOM manipulation - # wraps the inner function into BS4 invokation and file overwrite - def __fix_file(worker): - def file_wrapper(self, file_path, encoding='utf-8'): - with file(file_path, 'r+') as content_file: - content = bs4(content_file, 'lxml', from_encoding=encoding) - content = worker(self, content) - content_file.seek(0) - content_file.write( - content.prettify(encoding, formatter='html')) - content_file.truncate() - return file_wrapper - - # fix simple results list by removing virtual pair rows - @__fix_file + @bs4_fix_file def __fix_results(self, content): + """ Fixes simple results list by removing virtual pair rows. + """ rows = content.select('tr') for row in rows: cells = row.select('td') @@ -146,12 +200,12 @@ class JFRVirtualTable: except ValueError: log.getLogger('results').debug( 'no pair number in cell: %s', cells[2].contents) - pass return content.table - # fix full results file by removing virtual pair rows - @__fix_file + @bs4_fix_file def __fix_full_results(self, content): + """ Fixes full results file by removing virtual pair rows. + """ rows = content.select('tr') for row in rows: # select rows by cells containing pair records links @@ -161,8 +215,7 @@ class JFRVirtualTable: link['href'].startswith('H-') and not link['href'].endswith('lista.html')] log.getLogger('f_result').debug('found pair links: %s', - map(lambda c: c['href'], - cell_links)) + [c['href'] for c in cell_links]) # remove these containing links to virtual pairs if len(cell_links): if int(cell_links[0].contents[0]) in self.__virtual_pairs: @@ -171,9 +224,10 @@ class JFRVirtualTable: cell_links[0].contents[0]) return content - # fix the page with pair records links list - @__fix_file + @bs4_fix_file def __fix_records_list(self, content): + """ Fixes the page with pair records links list. + """ # read the original column count row_cell_count = int(content.table.select('tr td.o')[0]['colspan']) log.getLogger('rec_list').debug('found %d cells in column', @@ -196,8 +250,8 @@ class JFRVirtualTable: link['href'].startswith('H-') and not link['href'].endswith('lista.html')] log.getLogger('rec_list').debug('found links in cell: %s', - map(lambda c: c['href'], - cell_links)) + [c['href'] for c + in cell_links]) if len(cell_links): # delete virtual pair cells if int(cell_links[0].contents[0]) in self.__virtual_pairs: @@ -212,48 +266,20 @@ class JFRVirtualTable: if cells_found: link_rows.append(row) # detach actual pair cells from the tree - cells = map(lambda cell: cell.extract(), link_cells) + cells = [cell.extract() for cell in link_cells] log.getLogger('rec_list').info('remaining cell count: %d', len(cells)) log.getLogger('rec_list').info('remaining row count: %d', len(link_rows)) for row in link_rows: row.extract() - # first filler cell of each new row - first_cell = content.new_tag('td', **{'class': 'n'}) - first_cell.string = u'\xa0' - # arrange cells into rows, full rows first - while len(cells) >= row_cell_count: - new_row = content.new_tag('tr') - new_row.append(copy.copy(first_cell)) - for cell in cells[0:row_cell_count]: - new_row.append(cell) - content.table.append(new_row) - log.getLogger('rec_list').debug('aligning cells %s to %s in a row', - cells[0].a.contents, - cells[row_cell_count-1].a.contents) - del cells[0:row_cell_count] - # last row may or may not be full - last_row = content.new_tag('tr') - last_row.append(copy.copy(first_cell)) - for cell in cells: - last_row.append(cell) - log.getLogger('rec_list').debug('leaving cells %s to %s in last row', - cells[0].a.contents, - cells[-1].a.contents) - # if it wasn't full, fill it with a col-spanned last cell - if len(cells) < row_cell_count: - last_cell = content.new_tag('td', - colspan=row_cell_count-len(cells)) - last_cell.string = u'\xa0' - last_row.append(last_cell) - log.getLogger('rec_list').debug('filling last row with: %s', - last_cell) - content.table.append(last_row) + for row in fill_pair_list_table(cells, row_cell_count): + content.table.append(row) return content - # fix collected scores tables by removing virtual pair rows - @__fix_file + @bs4_fix_file def __fix_collected(self, content): + """ Fixes collected scores tables by removing virtual pair rows. + """ rows = content.select('tr') for row in rows: cells = row.select('td') @@ -263,8 +289,7 @@ class JFRVirtualTable: log.getLogger('c_scores').debug( 'found collected scores row: %s', cells[1:3]) try: - pairs = map(lambda c: int(c.contents[0]), - cells[1:3]) + pairs = [int(c.contents[0]) for c in cells[1:3]] if int(cells[1].contents[0]) in self.__virtual_pairs: if int(cells[2].contents[0]) in self.__virtual_pairs: log.getLogger('c_scores').info('removed %s', pairs) @@ -272,7 +297,6 @@ class JFRVirtualTable: except ValueError: log.getLogger('c_scores').debug( 'pair numbers not found, ignoring') - pass # there are some clearly broken table cells, fix them if len(cells) == 1 and cells[0]['colspan'] == '7': if cells[0].contents[0] == ' ': @@ -281,12 +305,14 @@ class JFRVirtualTable: cells[0].contents[0] = u'\xa0' return content - # fix board travellers, removing virtual tables and leaving one, annotated - @__fix_file + @bs4_fix_file def __fix_traveller(self, content): - # this should only happen if the traveller wasn't already processed + """ Fixes board travellers, removing virtual tables and leaving one, + annotated. + """ + # This should only happen if the traveller wasn't already processed # as it's the only operaton that may yield any results on second run - # and it might break stuff + # and it might break stuff. if not len(content.select('tr.virtualTable')): # looking for all the rows with more than 2 cells rows = [row for row @@ -297,11 +323,10 @@ class JFRVirtualTable: virtual_row = None for row in rows: cells = row.select('td') - debug_string = ' '.join(map( - lambda c: ''.join(filter( - lambda cc: isinstance(cc, basestring), - c.contents)), - cells)) + debug_string = ' '.join([ + ''.join([cc for cc in c.contents + if isinstance(cc, basestring)]) + for c in cells]) # we're already added a header, meaning we're below the first # virtual table, we need to move the row above it # or remove it entirely @@ -352,12 +377,13 @@ class JFRVirtualTable: __traveller_files = [] __pair_records_files = [] - __results_file = None - __full_results_file = None - __pair_records_list_file = None - __collected_scores_file = None - # text for traveller header row - __header_text = '' + __single_files = { + 'results': None, + 'full_results': None, + 'pair_records_list': None, + 'collected_scores': None + } + __header_text = '' # text for traveller header row def __init__(self, path_prefix, virtual_pairs=None, header_text=''): log.getLogger('init').debug('parsing filepaths, prefix = %s', @@ -372,23 +398,33 @@ class JFRVirtualTable: self.__header_text = header_text def fix_results(self): - self.__fix_results(self.__results_file) + """ Fixes results for specific detected tournament files. + """ + self.__fix_results(self.__single_files['results']) def fix_full_results(self): - self.__fix_full_results(self.__full_results_file) + """ Fixes full results for specific detected tournament files. + """ + self.__fix_full_results(self.__single_files['full_results']) def fix_collected_scores(self): - if path.isfile(self.__collected_scores_file): - self.__fix_collected(self.__collected_scores_file) + """ Fixes collected scores for specific detected tournament files. + """ + if path.isfile(self.__single_files['collected_scores']): + self.__fix_collected(self.__single_files['collected_scores']) else: log.getLogger('c_scores').warning( 'Collected scores file %s not found', - self.__collected_scores_file) + self.__single_files['collected_scores']) def fix_records_list(self): - self.__fix_records_list(self.__pair_records_list_file) + """ Fixes pair records list for specific detected tournament files. + """ + self.__fix_records_list(self.__single_files['pair_records_list']) def fix_travellers(self): + """ Fixes board travellers for specific detected tournament files. + """ for traveller_file in self.__traveller_files: log.getLogger('traveller').info('fixing traveller: %s', traveller_file) @@ -398,13 +434,17 @@ class JFRVirtualTable: log.getLogger('traveller').warning('%s: %s', traveller_file, warn) -if __name__ == '__main__': +def main(): + """ Program entry point, invoked when __name__ is __main__ + """ import argparse argument_parser = argparse.ArgumentParser( description='Fix display for virtual tables in JFR Pary result pages') def file_path(filepath): + """ Sanitizes and validates file paths from input parameters. + """ filepath = unicode(filepath, sys.getfilesystemencoding()) if path.isfile(filepath): return filepath @@ -412,11 +452,13 @@ if __name__ == '__main__': argument_parser.error('File %s does not exist' % filepath) def decoded_text(text): + """ Sanitizes and normalizes command line input for parameters. + """ return unicode(text, sys.getfilesystemencoding()) argument_parser.add_argument('-V', '--version', action='version', version='%(prog)s {version}'.format( - version=__version__)) + version=__version__)) argument_parser.add_argument('path', metavar='PATH', help='tournament path with JFR prefix', @@ -475,7 +517,11 @@ if __name__ == '__main__': table_parser.fix_collected_scores() table_parser.fix_records_list() table_parser.fix_travellers() - except Exception as e: - log.getLogger('root').error(e.strerror) + except Exception as ex: + log.getLogger('root').error(ex.strerror) + raise ex log.info('--------- program ended ---------') + +if __name__ == '__main__': + main() |