* code reformatting

author: emkael <emkael@tlen.pl> 2015-09-28 13:21:26 +0200
committer: emkael <emkael@tlen.pl> 2015-09-28 13:21:26 +0200
commit: 1367feca21f425a0ee5a07173e8a1897ec427e20 (patch)
tree: 5c7ac717e9a9ae3e60376518926503102e6a532c
parent: 373a37d35e8326ad6e3a6732053bc50a5c0f8517 (diff)
1 files changed, 148 insertions, 81 deletions
diff --git a/rrb2txt.py b/rrb2txt.py
index 55e9f17..7b5bea4 100644
--- a/rrb2txt.py
+++ b/rrb2txt.py
@@ -1,14 +1,20 @@
 # coding=utf-8
 
+import sys
+import os
+import re
+import urlparse
+
 from bs4 import BeautifulSoup, Comment
-import sys, os, re
+from glob import glob
+from itertools import chain, cycle
 
-dir = sys.argv[1] if len(sys.argv) > 1 else '.'
-file = os.path.join(dir, 'pary.txt')
+directory = sys.argv[1] if len(sys.argv) > 1 else '.'
+filepath = os.path.join(directory, 'pary.txt')
 
-content = ''
-with open(file, 'r') as file:
-    content = BeautifulSoup(file.read())
+content = None
+with open(filepath, 'r') as file_content:
+    content = BeautifulSoup(file_content.read())
 
 pdf_fixed = False
 comments = content.findAll(text=lambda t: isinstance(t, Comment))
@@ -31,8 +37,9 @@ if not pdf_fixed:
             if score_cell:
                 score_cell[0].insert_before(tag)
 
-    extra_headers = ['PKL','PDF','nagroda']
-    extra_headers_present = [bool(header.find_all(text=h)) for h in extra_headers]
+    extra_headers = ['PKL', 'PDF', 'nagroda']
+    extra_headers_present = [bool(header.find_all(text=h))
+                             for h in extra_headers]
 
     extra_headers_offset = 8
 
@@ -44,7 +51,8 @@ if not pdf_fixed:
             for row in body:
                 cells = row.find_all('td')
                 if len(cells) >= extra_headers_offset:
-                    tag = content.new_tag('td', style='display:none', rowspan=2)
+                    tag = content.new_tag(
+                        'td', style='display:none', rowspan=2)
                     cells[extra_headers_offset].insert_after(tag)
         extra_headers_offset += 1
 
@@ -55,7 +63,7 @@ if not pdf_fixed:
             return 0
 
     max_points_count = max([get_points_count(row) for row in body]) + 1
-        
+
     header.find_all('td')[10]['colspan'] = max_points_count
 
     for row in body:
@@ -81,15 +89,12 @@ if not pdf_fixed:
     new_content = content.find('body').decode_contents()
     new_length = len(new_content) + 1
 
-    file = open(file.name, 'wb')
-    file.write('%012d' % new_length)
-    file.write('\n')
-    file.write(new_content.encode('utf-8'))
-    file.write('\n')
+    output = open(filepath.name, 'wb')
+    output.write('%012d' % new_length)
+    output.write('\n')
+    output.write(new_content.encode('utf-8'))
+    output.write('\n')
 
-from glob import glob
-from itertools import chain, cycle
-import urlparse, math
 
 def format_boards(rows):
     rows = rows[1:4]
@@ -99,30 +104,35 @@ def format_boards(rows):
                                    .replace('obie po', 'OBIE')
                                    .split(' / ')))
     rows[1][1] = ''
+
     def split_hand(hand):
         return hand.split(os.linesep)
+
     rows[0][1] = split_hand(rows[0][1])
     rows[1][0] = split_hand(rows[1][0])
     rows[1][2] = split_hand(rows[1][2])
     rows[2][1] = split_hand(rows[2][1])
+
     def side_rows(row):
-        ret =[
+        ret = [
             [row[0],
              row[1][0][2:],
              row[2]]
         ]
-        for i in range(1,4):
+        for i in range(1, 4):
             ret.append(['',
                         row[1][i][2:] or '===',
                         ''])
         return ret
+
     def middle_rows(row):
         ret = []
-        for i in range(0,4):
+        for i in range(0, 4):
             ret.append([row[0][i][2:] or '===',
                         row[1],
                         row[2][i][2:] or '==='])
         return ret
+
     rows = side_rows(rows[0]) + middle_rows(rows[1]) + side_rows(rows[2])
     header = 'ROZDANIE NR ' + header[0]
     output = [header, '']
@@ -132,6 +142,7 @@ def format_boards(rows):
     output.append('')
     return output
 
+
 def format_protocols(rows):
     output = ['                          ZAPIS      WYNIK',
               ' NS  EW  KONTRAKT  WIST  NS   EW    NS    EW']
@@ -160,50 +171,75 @@ def format_protocols(rows):
                 '{:.1f}'.format(float(row[8]))
             ]
         if content:
-            output.append(u'{:>3s} {:>3s} {:11s}{:^4s}{:>4s}{:>5s} {:>5s} {:>5s}'.format(*content))
+            output.append(
+                u'{:>3s} {:>3s} {:11s}{:^4s}{:>4s}{:>5s} {:>5s} {:>5s}'.format(
+                    *content))
         elif len(row) != 4 and len(row) != 8:
             print 'protocols: row of unexpected length'
             print row
     output.append('')
     return output
 
+
 def format_results(rows):
     rows.pop(0)
     content = []
-    link_regex = re.compile('^http://www.msc.com.pl')
-    cezar_ids = ['{:05d}'.format(int(dict(urlparse.parse_qsl(urlparse.urlparse(row.pop()).query))['r']))
-                 if re.match(link_regex, row[-1])
-                 else ''
-                 for row in rows]
+    link_regex = re.compile(r'^http://www\.msc\.com\.pl')
+    cezar_ids = [
+        '{:05d}'.format(int(
+            dict(urlparse.parse_qsl(urlparse.urlparse(row.pop()).query))['r']))
+        if re.match(link_regex, row[-1])
+        else ''
+        for row in rows]
     pdf_columns = max([len(row) for row in rows]) - 11
     for row in rows:
         length = len(row)
         if length > 5:
             content.append(row[0:3] + [cezar_ids.pop(0)] + row[3:])
         elif length == 5:
-            content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:] + [''] * (3 + pdf_columns))
+            content.append([''] * 2 + row[0:1] + [
+                cezar_ids.pop(0)] + row[1:] + [''] * (3 + pdf_columns))
         elif length == 4:
             if len(row[3]) != 2:
-                content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:3] + content[-1][6:7] + row[3:4] + [''] * (3 + pdf_columns))
+                content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] +
+                               row[1:3] + content[-1][6:7] +
+                               row[3:4] + [''] * (3 + pdf_columns))
             else:
-                content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:4] + [''] * (4 + pdf_columns))
+                content.append([''] * 2 + row[0:1] + [
+                    cezar_ids.pop(0)] + row[1:4] + [''] * (4 + pdf_columns))
         elif length == 3:
-            content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:3] + content[-1][6:8] + [''] * (3 + pdf_columns))
+            content.append([''] * 2 + row[0:1] +
+                           [cezar_ids.pop(0)] + row[1:3] + content[-1][6:8] +
+                           [''] * (3 + pdf_columns))
     wk_sum = sum([float(c[5]) if len(c[5]) else 0.0 for c in content])
     output = []
     name_column = max([len(r[2]) for r in content])
-    output.append('M-CE NR ' + ' ' * name_column + '  WK     CEZAR     +/-   WYNIK PKL ' + ('{:^' + str(3 * pdf_columns) + 's}').format('PDF') + ' NAGRODA')
+    output.append('%s %s  %s %s %s' % (
+        'M-CE NR',
+        ' ' * name_column,
+        'WK     CEZAR     +/-   WYNIK PKL',
+        ('{:^' + str(3 * pdf_columns) + 's}').format('PDF'),
+        'NAGRODA'
+    ))
     output.append('-' * len(output[-1]))
     for c in content:
-        line = (u'{:>3s} {:>3s} {:' + unicode(name_column) + u's} {:>4s} {:2s} {:5s} {:2s} {:>5s} {:>6s} {:>3s}').format(*(c[0:3] + c[5:7] + c[3:5] + c[8:11]))
-        pdf = (u' {:' + unicode(3 * pdf_columns) + u's}').format(''.join([u'{:>3s}'.format(cc) for cc in c[11:-1]]))
+        line = (
+            u'{:>3s} {:>3s} {:' + unicode(name_column) +
+            u's} {:>4s} {:2s} {:5s} {:2s} {:>5s} {:>6s} {:>3s}').format(
+                *(c[0:3] + c[5:7] + c[3:5] + c[8:11]))
+        pdf = (
+            u' {:' + unicode(3 * pdf_columns) + u's}').format(
+                ''.join([u'{:>3s}'.format(cc) for cc in c[11:-1]]))
         line += pdf
         line += u' {:>6s}'.format(c[-1])
         output.append(line)
     output.append(' ' * (8 + name_column) + '-----')
-    output.append(('{:>' + str(13 + name_column) + 's}').format('Suma WK = {:.1f}'.format(wk_sum)))
+    output.append(
+        ('{:>' + str(13 + name_column) + 's}').format(
+            'Suma WK = {:.1f}'.format(wk_sum)))
     return output
 
+
 def format_histories(rows):
     header = rows.pop(0)[0]
     rows.pop(0)
@@ -216,38 +252,57 @@ def format_histories(rows):
     for row in rows:
         content = []
         if len(row) == 11:
-            add_separator = (len(''.join(row[0:9])) == 0) and ((add_separator is False) or (row[-2] == 'miejsce'))
-            content = row[0:4] + [row[4] + ' ' + row[5] + ' ' + row[7]] + [row[6]] + row[8:11]
+            add_separator = (
+                len(''.join(row[0:9])) == 0) and (
+                    (add_separator is False) or (row[-2] == 'miejsce'))
+            content = row[0:4] + [
+                row[4] + ' ' + row[5] + ' ' + row[7]
+            ] + [row[6]] + row[8:11]
         elif len(row) == 10:
-            content = [''] + row[0:3] + [row[3] + ' ' + row[4] + ' ' + row[6]] + [row[5]] + row[7:10]
+            content = [''] + row[0:3] + [
+                row[3] + ' ' + row[4] + ' ' + row[6]
+            ] + [row[5]] + row[7:10]
         elif len(row) == 9:
-            content = ['',''] + row[0:2] + [row[2] + ' ' + row[3] + ' ' + row[5]] + [row[4]] + row[6:9]
+            content = ['', ''] + row[0:2] + [
+                row[2] + ' ' + row[3] + ' ' + row[5]
+            ] + [row[4]] + row[6:9]
         if content:
             if add_separator:
-                content_rows.append(['','','','','','','','-------','--------'])
+                content_rows.append(
+                    ['', '', '', '', '', '', '', '-------', '--------'])
             content_rows.append(content)
         else:
             print 'histories: unexpected row length'
             print row
     column_width = max([len(r[1]) for r in content_rows])
-    content_rows = [['RND', 'PRZECIWNIK', 'RZD', ' ', 'KONTRAKT', 'WIST', 'ZAPIS', 'WYNIK ', u'/ BIEŻĄCY']] + content_rows
+    content_rows = [[
+        'RND', 'PRZECIWNIK', 'RZD', ' ', 'KONTRAKT', 'WIST',
+        'ZAPIS', 'WYNIK ', u'/ BIEŻĄCY'
+    ]] + content_rows
     for content in content_rows:
         if content[6]:
-            score_align = u'>' if content[6][0] == u'-' else (u'' if content[6][0] == u'+' else u'^')
+            score_align = u'>' if content[6][0] == u'-' else (
+                u'' if content[6][0] == u'+' else u'^')
         else:
             score_align = u''
-        output.append((u'{:>3s} {:' + unicode(column_width) + u's} {:>3s} {:2s} {:9s}{:^4s} {:' + score_align + u'7s} {:>7s}{:>8s}').format(*[c or ' ' for c in content]))
+        output.append(
+            (u'{:>3s} {:' + unicode(column_width) +
+             u's} {:>3s} {:2s} {:9s}{:^4s} {:' +
+             score_align + u'7s} {:>7s}{:>8s}').format(
+                 *[c or ' ' for c in content]))
     output.insert(3, '-' * len(output[2]))
     output.append('')
     return output
-    
-def format_rows(rows, type):
-    return globals()['format_' + type](rows)
+
+
+def format_rows(rows, rowtype):
+    return globals()['format_' + rowtype](rows)
+
 
 def get_rows(content):
     soup = BeautifulSoup(content)
     output = []
-    link_regex = re.compile('^http://www.msc.com.pl')
+    link_regex = re.compile(r'^http://www\.msc\.com\.pl')
     header = soup.find('h2')
     if header:
         output.append([header.text])
@@ -261,57 +316,69 @@ def get_rows(content):
         output.append(row)
     return output
 
-def get_content(file):
+
+def get_content(filepath):
     return re.sub('<img src=".*/(.*).gif" ?/>',
                   lambda img: img.group(1)[0].capitalize(),
-                  open(file, 'r').read())
+                  open(filepath, 'r').read())
+
 
 def get_header(directory):
-    soup = BeautifulSoup(open(os.path.join(directory, 'index.html'), 'r').read())
+    soup = BeautifulSoup(
+        open(os.path.join(directory, 'index.html'), 'r').read())
     return [node.text for node in soup.select('#header *')]
 
+
 def get_files(directory):
-    return dict(map(lambda (key, val): (key,
-                                        reduce(list.__add__, map(lambda v: sorted(glob(os.path.join(directory, v))), val), [])),
+    return dict(map(lambda (key, val): (
+        key,
+        reduce(list.__add__, map(
+            lambda v: sorted(glob(os.path.join(directory, v))), val), [])),
                     {
-                        'boards': ['d?.txt','d??.txt'],
-                        'protocols': ['p?.txt','p??.txt'],
-                        'histories': ['h?.txt','h??.txt'],
+                        'boards': ['d?.txt', 'd??.txt'],
+                        'protocols': ['p?.txt', 'p??.txt'],
+                        'histories': ['h?.txt', 'h??.txt'],
                         'results': ['pary.txt'],
                     }.items()))
 
-def compile(directory):
+
+def compile_dir(directory):
     files = get_files(directory)
-    return dict(map(lambda (key, val):
-                    (key,
-                     list(chain(*
-                                list(
-                                    i.next() for i in cycle(map(lambda v:
-                                                                iter(
-                                                                    map(lambda file:
-                                                                        format_rows(
-                                                                            get_rows(
-                                                                                get_content(file)
-                                                                            ),
-                                                                            v),
-                                                                        files[v])),
-                                                                val))
-                                          )
-                                      ))
-                 ),
-                    {
-                        'P': ['boards', 'protocols'],
-                        'H': ['histories'],
-                        'W': ['results']
-                    }.items()))
+    return dict(
+        map(lambda (key, val):
+            (
+                key,
+                list(
+                    chain(
+                        *list(
+                            i.next() for i in cycle(
+                                map(lambda v:
+                                    iter(
+                                        map(lambda file:
+                                            format_rows(
+                                                get_rows(
+                                                    get_content(file)
+                                                ),
+                                                v),
+                                            files[v])),
+                                    val))
+                        )
+                    )
+                )
+            ),
+            {
+                'P': ['boards', 'protocols'],
+                'H': ['histories'],
+                'W': ['results']
+            }.items()))
 
 directory = sys.argv[1] if len(sys.argv) > 1 else os.path.abspath('.')
 header = get_header(directory) + ['']
-output = compile(directory)
+output = compile_dir(directory)
 file_prefix = os.path.basename(directory)
 
-for file, rows in output.iteritems():
-    output_file = open(file_prefix + file + '.txt', 'w')
+for filepath, rows in output.iteritems():
+    output_file = open(file_prefix + filepath + '.txt', 'w')
     for line in header:
         output_file.write(line.encode('windows-1250') + '\n')
     for row in rows:
author	emkael <emkael@tlen.pl>	2015-09-28 13:21:26 +0200
committer	emkael <emkael@tlen.pl>	2015-09-28 13:21:26 +0200
commit	1367feca21f425a0ee5a07173e8a1897ec427e20 (patch)
tree	5c7ac717e9a9ae3e60376518926503102e6a532c
parent	373a37d35e8326ad6e3a6732053bc50a5c0f8517 (diff)