diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/virtual_table.py | 116 |
1 files changed, 108 insertions, 8 deletions
diff --git a/src/virtual_table.py b/src/virtual_table.py index 7fb5f31..cd61c42 100644 --- a/src/virtual_table.py +++ b/src/virtual_table.py @@ -16,12 +16,18 @@ class JFRVirtualTable: def __parse_filepaths(self, prefix): file_path = path.realpath(prefix) + log.getLogger('paths').debug('realpath = %s', file_path) tournament_path = path.dirname(file_path) + log.getLogger('paths').info('tournament dir = %s', tournament_path) tournament_prefix = path.splitext(path.basename(file_path))[0] + log.getLogger('paths').info('tournament prefix = %s', + tournament_prefix) # RegEx matching traveller files for each board traveller_files_match = re.compile( re.escape(tournament_prefix) + '([0-9]{3})\.txt') + log.getLogger('paths').debug('traveller files regex = %s', + traveller_files_match.pattern) # converts {prefix}{anything}.{ext} filename to full path def get_path(relative_path): @@ -32,26 +38,44 @@ class JFRVirtualTable: in glob.glob( get_path(tournament_prefix + '*.txt')) if re.search(traveller_files_match, f)] + log.getLogger('paths').info('found %d traveller files', + len(self.__traveller_files)) + log.getLogger('paths').debug( + 'traveller files:\n' + '\n'.join(self.__traveller_files)) # RegEx for matching pair record files records_files_match = re.compile( 'H-' + tournament_prefix + '-([0-9]{1,3})\.html') + log.getLogger('paths').debug('records files regex = %s', + records_files_match.pattern) self.__pair_records_files = [ f for f in glob.glob(get_path('H-' + tournament_prefix + '*.html')) if re.search(records_files_match, f) ] + log.getLogger('paths').info('found %d records files', + len(self.__pair_records_files)) + log.getLogger('paths').debug( + 'record files:\n' + '\n'.join(self.__pair_records_files)) # short rersult list, from side frame self.__results_file = get_path(tournament_prefix + 'WYN.txt') + log.getLogger('paths').info('generated results path = %s', + self.__results_file) # full results page self.__full_results_file = get_path('W-' + tournament_prefix + '.html') + log.getLogger('paths').info('generated full results path = %s', + self.__full_results_file) # list of pair records links page self.__pair_records_list_file = get_path( 'H-' + tournament_prefix + '-lista.html') + log.getLogger('paths').info('generated records list path = %s', + self.__pair_records_list_file) # collected scores page self.__collected_scores_file = get_path( tournament_prefix + 'zbior.html') + log.getLogger('paths').info('generated collected scores path = %s', + self.__collected_scores_file) # auto-detect virtual pairs by their record file header def __detect_virtual_pairs(self): @@ -59,6 +83,8 @@ class JFRVirtualTable: # RegEx for matching pair number and names in pair record header pair_header_match = re.compile('([0-9]{1,}): (.*) - (.*), .*') for record_file_path in self.__pair_records_files: + log.getLogger('detect').debug('examining record file %s', + record_file_path) with file(record_file_path) as record_file: record = bs4(record_file, 'lxml') # first <td class="o1"> with content matching @@ -67,19 +93,22 @@ class JFRVirtualTable: in record.select('td.o1')[0].contents if type(con) is NavigableString and re.search( pair_header_match, con)] + log.getLogger('detect').debug('detected header: %s', header) if len(header): header_match = re.match(pair_header_match, header[0]) pair_number = int(header_match.group(1)) names = filter(len, [header_match.group(2).strip(), header_match.group(3).strip()]) + log.getLogger('detect').debug('parsed header: %d, %s', + pair_number, names) # virtual pair does not have any names filled if len(names) == 0: virtual_pairs.append(pair_number) if len(virtual_pairs) == 0: log.getLogger('detect').warning('No virtual pairs detected') else: - log.getLogger('detect').info('Virtual pairs: %s', + log.getLogger('detect').info('virtual pairs: %s', ' '.join(sorted( map(str, virtual_pairs)))) return sorted(virtual_pairs) @@ -107,10 +136,16 @@ class JFRVirtualTable: # (may contain carry over or penalties) if len(cells) >= 6: try: + log.getLogger('results').debug('table cell: %s', + cells[2].contents) # third cell in the row is pair number if int(cells[2].contents[0]) in self.__virtual_pairs: row.extract() + log.getLogger('results').info( + 'removed: %s', cells[2].contents[0]) except ValueError: + log.getLogger('results').debug( + 'no pair number in cell: %s', cells[2].contents) pass return content.table @@ -125,10 +160,15 @@ class JFRVirtualTable: if link.has_attr('href') and link['href'].startswith('H-') and not link['href'].endswith('lista.html')] + log.getLogger('f_result').debug('found pair links: %s', + map(lambda c: c['href'], + cell_links)) # remove these containing links to virtual pairs if len(cell_links): if int(cell_links[0].contents[0]) in self.__virtual_pairs: row.extract() + log.getLogger('f_result').info('removed: %s', + cell_links[0].contents[0]) return content # fix the page with pair records links list @@ -136,6 +176,8 @@ class JFRVirtualTable: def __fix_records_list(self, content): # read the original column count row_cell_count = int(content.table.select('tr td.o')[0]['colspan']) + log.getLogger('rec_list').debug('found %d cells in column', + row_cell_count) rows = content.select('tr') # gather rows which containted any links link_rows = [] @@ -144,6 +186,8 @@ class JFRVirtualTable: for row in rows: cells = row.select('td.u') cells_found = False + log.getLogger('rec_list').debug('found %d cells in a row', + len(cells)) for cell in cells: # select cells by pair records links inside cell_links = [link for link @@ -151,10 +195,15 @@ class JFRVirtualTable: if link.has_attr('href') and link['href'].startswith('H-') and not link['href'].endswith('lista.html')] + log.getLogger('rec_list').debug('found links in cell: %s', + map(lambda c: c['href'], + cell_links)) if len(cell_links): # delete virtual pair cells if int(cell_links[0].contents[0]) in self.__virtual_pairs: cell.extract() + log.getLogger('rec_list').info( + 'removed: %s', cell_links[0].contents[0]) # store actual pair cells else: link_cells.append(cell) @@ -164,6 +213,9 @@ class JFRVirtualTable: link_rows.append(row) # detach actual pair cells from the tree cells = map(lambda cell: cell.extract(), link_cells) + log.getLogger('rec_list').info('remaining cell count: %d', len(cells)) + log.getLogger('rec_list').info('remaining row count: %d', + len(link_rows)) for row in link_rows: row.extract() # first filler cell of each new row @@ -176,18 +228,26 @@ class JFRVirtualTable: for cell in cells[0:row_cell_count]: new_row.append(cell) content.table.append(new_row) + log.getLogger('rec_list').debug('aligning cells %s to %s in a row', + cells[0].a.contents, + cells[row_cell_count-1].a.contents) del cells[0:row_cell_count] # last row may or may not be full last_row = content.new_tag('tr') last_row.append(copy.copy(first_cell)) for cell in cells: last_row.append(cell) + log.getLogger('rec_list').debug('leaving cells %s to %s in last row', + cells[0].a.contents, + cells[-1].a.contents) # if it wasn't full, fill it with a col-spanned last cell if len(cells) < row_cell_count: last_cell = content.new_tag('td', colspan=row_cell_count-len(cells)) last_cell.string = u'\xa0' last_row.append(last_cell) + log.getLogger('rec_list').debug('filling last row with: %s', + last_cell) content.table.append(last_row) return content @@ -200,15 +260,24 @@ class JFRVirtualTable: # "proper" rows should have 7 cells if len(cells) == 7: # ignore cells without proper pair numbers + log.getLogger('c_scores').debug( + 'found collected scores row: %s', cells[1:3]) try: + pairs = map(lambda c: int(c.contents[0]), + cells[1:3]) if int(cells[1].contents[0]) in self.__virtual_pairs: if int(cells[2].contents[0]) in self.__virtual_pairs: + log.getLogger('c_scores').info('removed %s', pairs) row.extract() except ValueError: + log.getLogger('c_scores').debug( + 'pair numbers not found, ignoring') pass # there are some clearly broken table cells, fix them if len(cells) == 1 and cells[0]['colspan'] == '7': if cells[0].contents[0] == ' ': + log.getLogger('c_scores').debug('fixing cell: %s', + cells[0]) cells[0].contents[0] = u'\xa0' return content @@ -228,6 +297,11 @@ class JFRVirtualTable: virtual_row = None for row in rows: cells = row.select('td') + debug_string = ' '.join(map( + lambda c: ''.join(filter( + lambda cc: isinstance(cc, basestring), + c.contents)), + cells)) # we're already added a header, meaning we're below the first # virtual table, we need to move the row above it # or remove it entirely @@ -235,7 +309,12 @@ class JFRVirtualTable: row_below = row.extract() # only move it if it has meaningful information (10 cells) if len(cells) >= 10: + log.getLogger('traveller').debug( + 'row moved upwards: %s', debug_string) virtual_row.insert_before(row_below) + else: + log.getLogger('traveller').info( + 'removed row %s', debug_string) # we're looking for a "proper" row, with at least 10 cells if len(cells) >= 10: # and with both pair numbers virtual @@ -245,6 +324,8 @@ class JFRVirtualTable: # just drop subsequent virtual tables if header_added: row.extract() + log.getLogger('traveller').info( + 'removed row %s', debug_string) # it's the first virtual table # prefix it with a header else: @@ -259,10 +340,14 @@ class JFRVirtualTable: virtual_row_header.string = self.__header_text virtual_row.append(virtual_row_header) row.insert_before(virtual_row) + log.getLogger('traveller').info( + 'added header above row %s', debug_string) # clear pair numbers for cell in cells[1:3]: cell.contents = '' header_added = True + else: + raise UserWarning('already processed, skipping') return content.table __traveller_files = [] @@ -275,10 +360,15 @@ class JFRVirtualTable: __header_text = '' def __init__(self, path_prefix, virtual_pairs=None, header_text=''): + log.getLogger('init').debug('parsing filepaths, prefix = %s', + path_prefix) self.__parse_filepaths(path_prefix) + log.getLogger('init').debug('collecting virtual pairs, %s provided', + virtual_pairs) if virtual_pairs is None or len(virtual_pairs) == 0: virtual_pairs = self.__detect_virtual_pairs() self.__virtual_pairs = virtual_pairs + log.getLogger('init').debug('setting header text to "%s"', header_text) self.__header_text = header_text def fix_results(self): @@ -291,17 +381,22 @@ class JFRVirtualTable: if path.isfile(self.__collected_scores_file): self.__fix_collected(self.__collected_scores_file) else: - log.getLogger( - 'collected_scores').warning( - 'Collected scores file %s not found', - self.__collected_scores_file) + log.getLogger('c_scores').warning( + 'Collected scores file %s not found', + self.__collected_scores_file) def fix_records_list(self): self.__fix_records_list(self.__pair_records_list_file) def fix_travellers(self): for traveller_file in self.__traveller_files: - self.__fix_traveller(traveller_file) + log.getLogger('traveller').debug('fixing traveller: %s', + traveller_file) + try: + self.__fix_traveller(traveller_file) + except UserWarning as warn: + log.getLogger('traveller').warning('%s: %s', + traveller_file, warn) if __name__ == '__main__': import argparse @@ -355,7 +450,7 @@ if __name__ == '__main__': # primary logging facility - virtual_table.log file log.basicConfig( level=getattr(log, arguments.log_level), - format='%(asctime)s %(levelname)-8s %(name)-16s %(message)s', + format='%(asctime)s %(levelname)-8s %(name)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S', filename=arguments.log_file) @@ -364,9 +459,12 @@ if __name__ == '__main__': console_log.setLevel(log.INFO if arguments.verbose else ( log.ERROR if arguments.quiet else log.WARNING)) console_log.setFormatter(log.Formatter( - '%(levelname)-8s %(name)-16s: %(message)s')) + '%(levelname)-8s %(name)-8s: %(message)s')) log.getLogger().addHandler(console_log) + log.info('-------- program started --------') + log.debug('parsed arguments: %s', arguments) + table_parser = JFRVirtualTable( path_prefix=arguments.path, virtual_pairs=arguments.pairs, @@ -376,3 +474,5 @@ if __name__ == '__main__': table_parser.fix_collected_scores() table_parser.fix_records_list() table_parser.fix_travellers() + + log.info('--------- program ended ---------') |