From 1367feca21f425a0ee5a07173e8a1897ec427e20 Mon Sep 17 00:00:00 2001 From: emkael Date: Mon, 28 Sep 2015 13:21:26 +0200 Subject: * code reformatting --- rrb2txt.py | 229 +++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 148 insertions(+), 81 deletions(-) diff --git a/rrb2txt.py b/rrb2txt.py index 55e9f17..7b5bea4 100644 --- a/rrb2txt.py +++ b/rrb2txt.py @@ -1,14 +1,20 @@ # coding=utf-8 +import sys +import os +import re +import urlparse + from bs4 import BeautifulSoup, Comment -import sys, os, re +from glob import glob +from itertools import chain, cycle -dir = sys.argv[1] if len(sys.argv) > 1 else '.' -file = os.path.join(dir, 'pary.txt') +directory = sys.argv[1] if len(sys.argv) > 1 else '.' +filepath = os.path.join(directory, 'pary.txt') -content = '' -with open(file, 'r') as file: - content = BeautifulSoup(file.read()) +content = None +with open(filepath, 'r') as file_content: + content = BeautifulSoup(file_content.read()) pdf_fixed = False comments = content.findAll(text=lambda t: isinstance(t, Comment)) @@ -31,8 +37,9 @@ if not pdf_fixed: if score_cell: score_cell[0].insert_before(tag) - extra_headers = ['PKL','PDF','nagroda'] - extra_headers_present = [bool(header.find_all(text=h)) for h in extra_headers] + extra_headers = ['PKL', 'PDF', 'nagroda'] + extra_headers_present = [bool(header.find_all(text=h)) + for h in extra_headers] extra_headers_offset = 8 @@ -44,7 +51,8 @@ if not pdf_fixed: for row in body: cells = row.find_all('td') if len(cells) >= extra_headers_offset: - tag = content.new_tag('td', style='display:none', rowspan=2) + tag = content.new_tag( + 'td', style='display:none', rowspan=2) cells[extra_headers_offset].insert_after(tag) extra_headers_offset += 1 @@ -55,7 +63,7 @@ if not pdf_fixed: return 0 max_points_count = max([get_points_count(row) for row in body]) + 1 - + header.find_all('td')[10]['colspan'] = max_points_count for row in body: @@ -81,15 +89,12 @@ if not pdf_fixed: new_content = content.find('body').decode_contents() new_length = len(new_content) + 1 - file = open(file.name, 'wb') - file.write('%012d' % new_length) - file.write('\n') - file.write(new_content.encode('utf-8')) - file.write('\n') + output = open(filepath.name, 'wb') + output.write('%012d' % new_length) + output.write('\n') + output.write(new_content.encode('utf-8')) + output.write('\n') -from glob import glob -from itertools import chain, cycle -import urlparse, math def format_boards(rows): rows = rows[1:4] @@ -99,30 +104,35 @@ def format_boards(rows): .replace('obie po', 'OBIE') .split(' / '))) rows[1][1] = '' + def split_hand(hand): return hand.split(os.linesep) + rows[0][1] = split_hand(rows[0][1]) rows[1][0] = split_hand(rows[1][0]) rows[1][2] = split_hand(rows[1][2]) rows[2][1] = split_hand(rows[2][1]) + def side_rows(row): - ret =[ + ret = [ [row[0], row[1][0][2:], row[2]] ] - for i in range(1,4): + for i in range(1, 4): ret.append(['', row[1][i][2:] or '===', '']) return ret + def middle_rows(row): ret = [] - for i in range(0,4): + for i in range(0, 4): ret.append([row[0][i][2:] or '===', row[1], row[2][i][2:] or '===']) return ret + rows = side_rows(rows[0]) + middle_rows(rows[1]) + side_rows(rows[2]) header = 'ROZDANIE NR ' + header[0] output = [header, ''] @@ -132,6 +142,7 @@ def format_boards(rows): output.append('') return output + def format_protocols(rows): output = [' ZAPIS WYNIK', ' NS EW KONTRAKT WIST NS EW NS EW'] @@ -160,50 +171,75 @@ def format_protocols(rows): '{:.1f}'.format(float(row[8])) ] if content: - output.append(u'{:>3s} {:>3s} {:11s}{:^4s}{:>4s}{:>5s} {:>5s} {:>5s}'.format(*content)) + output.append( + u'{:>3s} {:>3s} {:11s}{:^4s}{:>4s}{:>5s} {:>5s} {:>5s}'.format( + *content)) elif len(row) != 4 and len(row) != 8: print 'protocols: row of unexpected length' print row output.append('') return output + def format_results(rows): rows.pop(0) content = [] - link_regex = re.compile('^http://www.msc.com.pl') - cezar_ids = ['{:05d}'.format(int(dict(urlparse.parse_qsl(urlparse.urlparse(row.pop()).query))['r'])) - if re.match(link_regex, row[-1]) - else '' - for row in rows] + link_regex = re.compile(r'^http://www\.msc\.com\.pl') + cezar_ids = [ + '{:05d}'.format(int( + dict(urlparse.parse_qsl(urlparse.urlparse(row.pop()).query))['r'])) + if re.match(link_regex, row[-1]) + else '' + for row in rows] pdf_columns = max([len(row) for row in rows]) - 11 for row in rows: length = len(row) if length > 5: content.append(row[0:3] + [cezar_ids.pop(0)] + row[3:]) elif length == 5: - content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:] + [''] * (3 + pdf_columns)) + content.append([''] * 2 + row[0:1] + [ + cezar_ids.pop(0)] + row[1:] + [''] * (3 + pdf_columns)) elif length == 4: if len(row[3]) != 2: - content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:3] + content[-1][6:7] + row[3:4] + [''] * (3 + pdf_columns)) + content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + + row[1:3] + content[-1][6:7] + + row[3:4] + [''] * (3 + pdf_columns)) else: - content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:4] + [''] * (4 + pdf_columns)) + content.append([''] * 2 + row[0:1] + [ + cezar_ids.pop(0)] + row[1:4] + [''] * (4 + pdf_columns)) elif length == 3: - content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:3] + content[-1][6:8] + [''] * (3 + pdf_columns)) + content.append([''] * 2 + row[0:1] + + [cezar_ids.pop(0)] + row[1:3] + content[-1][6:8] + + [''] * (3 + pdf_columns)) wk_sum = sum([float(c[5]) if len(c[5]) else 0.0 for c in content]) output = [] name_column = max([len(r[2]) for r in content]) - output.append('M-CE NR ' + ' ' * name_column + ' WK CEZAR +/- WYNIK PKL ' + ('{:^' + str(3 * pdf_columns) + 's}').format('PDF') + ' NAGRODA') + output.append('%s %s %s %s %s' % ( + 'M-CE NR', + ' ' * name_column, + 'WK CEZAR +/- WYNIK PKL', + ('{:^' + str(3 * pdf_columns) + 's}').format('PDF'), + 'NAGRODA' + )) output.append('-' * len(output[-1])) for c in content: - line = (u'{:>3s} {:>3s} {:' + unicode(name_column) + u's} {:>4s} {:2s} {:5s} {:2s} {:>5s} {:>6s} {:>3s}').format(*(c[0:3] + c[5:7] + c[3:5] + c[8:11])) - pdf = (u' {:' + unicode(3 * pdf_columns) + u's}').format(''.join([u'{:>3s}'.format(cc) for cc in c[11:-1]])) + line = ( + u'{:>3s} {:>3s} {:' + unicode(name_column) + + u's} {:>4s} {:2s} {:5s} {:2s} {:>5s} {:>6s} {:>3s}').format( + *(c[0:3] + c[5:7] + c[3:5] + c[8:11])) + pdf = ( + u' {:' + unicode(3 * pdf_columns) + u's}').format( + ''.join([u'{:>3s}'.format(cc) for cc in c[11:-1]])) line += pdf line += u' {:>6s}'.format(c[-1]) output.append(line) output.append(' ' * (8 + name_column) + '-----') - output.append(('{:>' + str(13 + name_column) + 's}').format('Suma WK = {:.1f}'.format(wk_sum))) + output.append( + ('{:>' + str(13 + name_column) + 's}').format( + 'Suma WK = {:.1f}'.format(wk_sum))) return output + def format_histories(rows): header = rows.pop(0)[0] rows.pop(0) @@ -216,38 +252,57 @@ def format_histories(rows): for row in rows: content = [] if len(row) == 11: - add_separator = (len(''.join(row[0:9])) == 0) and ((add_separator is False) or (row[-2] == 'miejsce')) - content = row[0:4] + [row[4] + ' ' + row[5] + ' ' + row[7]] + [row[6]] + row[8:11] + add_separator = ( + len(''.join(row[0:9])) == 0) and ( + (add_separator is False) or (row[-2] == 'miejsce')) + content = row[0:4] + [ + row[4] + ' ' + row[5] + ' ' + row[7] + ] + [row[6]] + row[8:11] elif len(row) == 10: - content = [''] + row[0:3] + [row[3] + ' ' + row[4] + ' ' + row[6]] + [row[5]] + row[7:10] + content = [''] + row[0:3] + [ + row[3] + ' ' + row[4] + ' ' + row[6] + ] + [row[5]] + row[7:10] elif len(row) == 9: - content = ['',''] + row[0:2] + [row[2] + ' ' + row[3] + ' ' + row[5]] + [row[4]] + row[6:9] + content = ['', ''] + row[0:2] + [ + row[2] + ' ' + row[3] + ' ' + row[5] + ] + [row[4]] + row[6:9] if content: if add_separator: - content_rows.append(['','','','','','','','-------','--------']) + content_rows.append( + ['', '', '', '', '', '', '', '-------', '--------']) content_rows.append(content) else: print 'histories: unexpected row length' print row column_width = max([len(r[1]) for r in content_rows]) - content_rows = [['RND', 'PRZECIWNIK', 'RZD', ' ', 'KONTRAKT', 'WIST', 'ZAPIS', 'WYNIK ', u'/ BIEŻĄCY']] + content_rows + content_rows = [[ + 'RND', 'PRZECIWNIK', 'RZD', ' ', 'KONTRAKT', 'WIST', + 'ZAPIS', 'WYNIK ', u'/ BIEŻĄCY' + ]] + content_rows for content in content_rows: if content[6]: - score_align = u'>' if content[6][0] == u'-' else (u'' if content[6][0] == u'+' else u'^') + score_align = u'>' if content[6][0] == u'-' else ( + u'' if content[6][0] == u'+' else u'^') else: score_align = u'' - output.append((u'{:>3s} {:' + unicode(column_width) + u's} {:>3s} {:2s} {:9s}{:^4s} {:' + score_align + u'7s} {:>7s}{:>8s}').format(*[c or ' ' for c in content])) + output.append( + (u'{:>3s} {:' + unicode(column_width) + + u's} {:>3s} {:2s} {:9s}{:^4s} {:' + + score_align + u'7s} {:>7s}{:>8s}').format( + *[c or ' ' for c in content])) output.insert(3, '-' * len(output[2])) output.append('') return output - -def format_rows(rows, type): - return globals()['format_' + type](rows) + + +def format_rows(rows, rowtype): + return globals()['format_' + rowtype](rows) + def get_rows(content): soup = BeautifulSoup(content) output = [] - link_regex = re.compile('^http://www.msc.com.pl') + link_regex = re.compile(r'^http://www\.msc\.com\.pl') header = soup.find('h2') if header: output.append([header.text]) @@ -261,57 +316,69 @@ def get_rows(content): output.append(row) return output -def get_content(file): + +def get_content(filepath): return re.sub('', lambda img: img.group(1)[0].capitalize(), - open(file, 'r').read()) + open(filepath, 'r').read()) + def get_header(directory): - soup = BeautifulSoup(open(os.path.join(directory, 'index.html'), 'r').read()) + soup = BeautifulSoup( + open(os.path.join(directory, 'index.html'), 'r').read()) return [node.text for node in soup.select('#header *')] + def get_files(directory): - return dict(map(lambda (key, val): (key, - reduce(list.__add__, map(lambda v: sorted(glob(os.path.join(directory, v))), val), [])), + return dict(map(lambda (key, val): ( + key, + reduce(list.__add__, map( + lambda v: sorted(glob(os.path.join(directory, v))), val), [])), { - 'boards': ['d?.txt','d??.txt'], - 'protocols': ['p?.txt','p??.txt'], - 'histories': ['h?.txt','h??.txt'], + 'boards': ['d?.txt', 'd??.txt'], + 'protocols': ['p?.txt', 'p??.txt'], + 'histories': ['h?.txt', 'h??.txt'], 'results': ['pary.txt'], }.items())) -def compile(directory): + +def compile_dir(directory): files = get_files(directory) - return dict(map(lambda (key, val): - (key, - list(chain(* - list( - i.next() for i in cycle(map(lambda v: - iter( - map(lambda file: - format_rows( - get_rows( - get_content(file) - ), - v), - files[v])), - val)) - ) - )) - ), - { - 'P': ['boards', 'protocols'], - 'H': ['histories'], - 'W': ['results'] - }.items())) + return dict( + map(lambda (key, val): + ( + key, + list( + chain( + *list( + i.next() for i in cycle( + map(lambda v: + iter( + map(lambda file: + format_rows( + get_rows( + get_content(file) + ), + v), + files[v])), + val)) + ) + ) + ) + ), + { + 'P': ['boards', 'protocols'], + 'H': ['histories'], + 'W': ['results'] + }.items())) directory = sys.argv[1] if len(sys.argv) > 1 else os.path.abspath('.') header = get_header(directory) + [''] -output = compile(directory) +output = compile_dir(directory) file_prefix = os.path.basename(directory) -for file, rows in output.iteritems(): - output_file = open(file_prefix + file + '.txt', 'w') +for filepath, rows in output.iteritems(): + output_file = open(file_prefix + filepath + '.txt', 'w') for line in header: output_file.write(line.encode('windows-1250') + '\n') for row in rows: -- cgit v1.2.3