* initial commit

author: emkael <emkael@tlen.pl> 2015-09-28 13:20:34 +0200
committer: emkael <emkael@tlen.pl> 2015-09-28 13:20:34 +0200
commit: 373a37d35e8326ad6e3a6732053bc50a5c0f8517 (patch)
tree: 33c609b5bfff5b4ef6452c46f442675617bd8e39
2 files changed, 342 insertions, 0 deletions
diff --git a/rrb2txt.py b/rrb2txt.py
new file mode 100644
index 0000000..55e9f17
--- /dev/null
+++ b/rrb2txt.py
@@ -0,0 +1,318 @@
+# coding=utf-8
+
+from bs4 import BeautifulSoup, Comment
+import sys, os, re
+
+dir = sys.argv[1] if len(sys.argv) > 1 else '.'
+file = os.path.join(dir, 'pary.txt')
+
+content = ''
+with open(file, 'r') as file:
+    content = BeautifulSoup(file.read())
+
+pdf_fixed = False
+comments = content.findAll(text=lambda t: isinstance(t, Comment))
+for comment in comments:
+    if comment == 'fixpdf.py':
+        pdf_fixed = True
+        break
+
+if not pdf_fixed:
+    header = content.select('thead tr')[0]
+    body = content.select('tbody tr')
+
+    if not header.find_all(text='+/-'):
+        tag = content.new_tag('td', style='display:none')
+        tag.string = '+/-'
+        header.find(lambda n: n.text[0:5] == 'wynik').insert_before(tag)
+        for row in body:
+            tag = content.new_tag('td', style='display:none', rowspan=2)
+            score_cell = row.select('td.right')
+            if score_cell:
+                score_cell[0].insert_before(tag)
+
+    extra_headers = ['PKL','PDF','nagroda']
+    extra_headers_present = [bool(header.find_all(text=h)) for h in extra_headers]
+
+    extra_headers_offset = 8
+
+    for i in range(0, len(extra_headers)):
+        if not extra_headers_present[i]:
+            tag = content.new_tag('td', style='display:none')
+            tag.string = extra_headers[i]
+            header.select('td')[extra_headers_offset].insert_after(tag)
+            for row in body:
+                cells = row.find_all('td')
+                if len(cells) >= extra_headers_offset:
+                    tag = content.new_tag('td', style='display:none', rowspan=2)
+                    cells[extra_headers_offset].insert_after(tag)
+        extra_headers_offset += 1
+
+    def get_points_count(row):
+        try:
+            return row.find_all('td')[10].text.count('|')
+        except IndexError:
+            return 0
+
+    max_points_count = max([get_points_count(row) for row in body]) + 1
+        
+    header.find_all('td')[10]['colspan'] = max_points_count
+
+    for row in body:
+        cells = row.find_all('td')
+        if len(cells) >= extra_headers_offset:
+            span = max_points_count
+            points = cells[10].text.split('|')
+            new_cells = []
+            for point in points:
+                tag = content.new_tag('td', rowspan=2)
+                tag.string = point
+                new_cells.append(tag)
+                span -= 1
+            if span > 0:
+                new_cells[-1]['colspan'] = span + 1
+            for new_cell in new_cells:
+                cells[11].insert_before(new_cell)
+            cells[10].extract()
+
+    content.body.append(content.new_string('fixpdf.py', Comment))
+
+    content.body.p.extract()
+    new_content = content.find('body').decode_contents()
+    new_length = len(new_content) + 1
+
+    file = open(file.name, 'wb')
+    file.write('%012d' % new_length)
+    file.write('\n')
+    file.write(new_content.encode('utf-8'))
+    file.write('\n')
+
+from glob import glob
+from itertools import chain, cycle
+import urlparse, math
+
+def format_boards(rows):
+    rows = rows[1:4]
+    header = rows[0][0].split(os.linesep)
+    rows[0][0] = '/'.join(reversed(header[1]
+                                   .replace('obie przed', 'NIKT')
+                                   .replace('obie po', 'OBIE')
+                                   .split(' / ')))
+    rows[1][1] = ''
+    def split_hand(hand):
+        return hand.split(os.linesep)
+    rows[0][1] = split_hand(rows[0][1])
+    rows[1][0] = split_hand(rows[1][0])
+    rows[1][2] = split_hand(rows[1][2])
+    rows[2][1] = split_hand(rows[2][1])
+    def side_rows(row):
+        ret =[
+            [row[0],
+             row[1][0][2:],
+             row[2]]
+        ]
+        for i in range(1,4):
+            ret.append(['',
+                        row[1][i][2:] or '===',
+                        ''])
+        return ret
+    def middle_rows(row):
+        ret = []
+        for i in range(0,4):
+            ret.append([row[0][i][2:] or '===',
+                        row[1],
+                        row[2][i][2:] or '==='])
+        return ret
+    rows = side_rows(rows[0]) + middle_rows(rows[1]) + side_rows(rows[2])
+    header = 'ROZDANIE NR ' + header[0]
+    output = [header, '']
+    output.append('{:10s}{:6s}{:10s}'.format(*rows.pop(0)))
+    for row in rows:
+        output.append('  {:8s}{:6s}{:10s}'.format(*row))
+    output.append('')
+    return output
+
+def format_protocols(rows):
+    output = ['                          ZAPIS      WYNIK',
+              ' NS  EW  KONTRAKT  WIST  NS   EW    NS    EW']
+    for row in rows:
+        content = []
+        if len(row) == 10:
+            content = [
+                row[0],
+                row[1],
+                ' ' + row[2] + ' ' + row[3] + ' ' + row[5],
+                row[4] or '',
+                row[6] or '',
+                '-' + row[7] if row[7] else '',
+                '{:.1f}'.format(float(row[8])),
+                '{:.1f}'.format(float(row[9]))
+            ]
+        elif len(row) == 9:
+            content = [
+                row[0],
+                row[1],
+                ' ' + row[2] + ' ' + row[3] + ' ' + row[5],
+                row[4],
+                '0',
+                '',
+                '{:.1f}'.format(float(row[7])),
+                '{:.1f}'.format(float(row[8]))
+            ]
+        if content:
+            output.append(u'{:>3s} {:>3s} {:11s}{:^4s}{:>4s}{:>5s} {:>5s} {:>5s}'.format(*content))
+        elif len(row) != 4 and len(row) != 8:
+            print 'protocols: row of unexpected length'
+            print row
+    output.append('')
+    return output
+
+def format_results(rows):
+    rows.pop(0)
+    content = []
+    link_regex = re.compile('^http://www.msc.com.pl')
+    cezar_ids = ['{:05d}'.format(int(dict(urlparse.parse_qsl(urlparse.urlparse(row.pop()).query))['r']))
+                 if re.match(link_regex, row[-1])
+                 else ''
+                 for row in rows]
+    pdf_columns = max([len(row) for row in rows]) - 11
+    for row in rows:
+        length = len(row)
+        if length > 5:
+            content.append(row[0:3] + [cezar_ids.pop(0)] + row[3:])
+        elif length == 5:
+            content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:] + [''] * (3 + pdf_columns))
+        elif length == 4:
+            if len(row[3]) != 2:
+                content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:3] + content[-1][6:7] + row[3:4] + [''] * (3 + pdf_columns))
+            else:
+                content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:4] + [''] * (4 + pdf_columns))
+        elif length == 3:
+            content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:3] + content[-1][6:8] + [''] * (3 + pdf_columns))
+    wk_sum = sum([float(c[5]) if len(c[5]) else 0.0 for c in content])
+    output = []
+    name_column = max([len(r[2]) for r in content])
+    output.append('M-CE NR ' + ' ' * name_column + '  WK     CEZAR     +/-   WYNIK PKL ' + ('{:^' + str(3 * pdf_columns) + 's}').format('PDF') + ' NAGRODA')
+    output.append('-' * len(output[-1]))
+    for c in content:
+        line = (u'{:>3s} {:>3s} {:' + unicode(name_column) + u's} {:>4s} {:2s} {:5s} {:2s} {:>5s} {:>6s} {:>3s}').format(*(c[0:3] + c[5:7] + c[3:5] + c[8:11]))
+        pdf = (u' {:' + unicode(3 * pdf_columns) + u's}').format(''.join([u'{:>3s}'.format(cc) for cc in c[11:-1]]))
+        line += pdf
+        line += u' {:>6s}'.format(c[-1])
+        output.append(line)
+    output.append(' ' * (8 + name_column) + '-----')
+    output.append(('{:>' + str(13 + name_column) + 's}').format('Suma WK = {:.1f}'.format(wk_sum)))
+    return output
+
+def format_histories(rows):
+    header = rows.pop(0)[0]
+    rows.pop(0)
+    if ' pauza ' in header:
+        return []
+    output = ['WYNIKI PARY NR ' + header,
+              '']
+    content_rows = []
+    add_separator = False
+    for row in rows:
+        content = []
+        if len(row) == 11:
+            add_separator = (len(''.join(row[0:9])) == 0) and ((add_separator is False) or (row[-2] == 'miejsce'))
+            content = row[0:4] + [row[4] + ' ' + row[5] + ' ' + row[7]] + [row[6]] + row[8:11]
+        elif len(row) == 10:
+            content = [''] + row[0:3] + [row[3] + ' ' + row[4] + ' ' + row[6]] + [row[5]] + row[7:10]
+        elif len(row) == 9:
+            content = ['',''] + row[0:2] + [row[2] + ' ' + row[3] + ' ' + row[5]] + [row[4]] + row[6:9]
+        if content:
+            if add_separator:
+                content_rows.append(['','','','','','','','-------','--------'])
+            content_rows.append(content)
+        else:
+            print 'histories: unexpected row length'
+            print row
+    column_width = max([len(r[1]) for r in content_rows])
+    content_rows = [['RND', 'PRZECIWNIK', 'RZD', ' ', 'KONTRAKT', 'WIST', 'ZAPIS', 'WYNIK ', u'/ BIEŻĄCY']] + content_rows
+    for content in content_rows:
+        if content[6]:
+            score_align = u'>' if content[6][0] == u'-' else (u'' if content[6][0] == u'+' else u'^')
+        else:
+            score_align = u''
+        output.append((u'{:>3s} {:' + unicode(column_width) + u's} {:>3s} {:2s} {:9s}{:^4s} {:' + score_align + u'7s} {:>7s}{:>8s}').format(*[c or ' ' for c in content]))
+    output.insert(3, '-' * len(output[2]))
+    output.append('')
+    return output
+    
+def format_rows(rows, type):
+    return globals()['format_' + type](rows)
+
+def get_rows(content):
+    soup = BeautifulSoup(content)
+    output = []
+    link_regex = re.compile('^http://www.msc.com.pl')
+    header = soup.find('h2')
+    if header:
+        output.append([header.text])
+    for table_row in soup.find_all('tr'):
+        row = map(lambda t:
+                  os.linesep.join(t.stripped_strings),
+                  table_row.find_all('td'))
+        row = row + map(lambda l:
+                        l['href'],
+                        table_row.find_all('a', {'href': link_regex}))
+        output.append(row)
+    return output
+
+def get_content(file):
+    return re.sub('<img src=".*/(.*).gif" ?/>',
+                  lambda img: img.group(1)[0].capitalize(),
+                  open(file, 'r').read())
+
+def get_header(directory):
+    soup = BeautifulSoup(open(os.path.join(directory, 'index.html'), 'r').read())
+    return [node.text for node in soup.select('#header *')]
+
+def get_files(directory):
+    return dict(map(lambda (key, val): (key,
+                                        reduce(list.__add__, map(lambda v: sorted(glob(os.path.join(directory, v))), val), [])),
+                    {
+                        'boards': ['d?.txt','d??.txt'],
+                        'protocols': ['p?.txt','p??.txt'],
+                        'histories': ['h?.txt','h??.txt'],
+                        'results': ['pary.txt'],
+                    }.items()))
+
+def compile(directory):
+    files = get_files(directory)
+    return dict(map(lambda (key, val):
+                    (key,
+                     list(chain(*
+                                list(
+                                    i.next() for i in cycle(map(lambda v:
+                                                                iter(
+                                                                    map(lambda file:
+                                                                        format_rows(
+                                                                            get_rows(
+                                                                                get_content(file)
+                                                                            ),
+                                                                            v),
+                                                                        files[v])),
+                                                                val))
+                                          )
+                                      ))
+                 ),
+                    {
+                        'P': ['boards', 'protocols'],
+                        'H': ['histories'],
+                        'W': ['results']
+                    }.items()))
+
+directory = sys.argv[1] if len(sys.argv) > 1 else os.path.abspath('.')
+header = get_header(directory) + ['']
+output = compile(directory)
+file_prefix = os.path.basename(directory)
+
+for file, rows in output.iteritems():
+    output_file = open(file_prefix + file + '.txt', 'w')
+    for line in header:
+        output_file.write(line.encode('windows-1250') + '\n')
+    for row in rows:
+        output_file.write(row.encode('windows-1250') + '\n')
diff --git a/rrb2txt.spec b/rrb2txt.spec
new file mode 100644
index 0000000..fb0c810
--- /dev/null
+++ b/rrb2txt.spec
@@ -0,0 +1,24 @@
+# -*- mode: python -*-
+
+block_cipher = None
+
+
+a = Analysis(['rrb2txt.py'],
+             pathex=['f:\\Brydz\\RRBridge'],
+             hiddenimports=[],
+             hookspath=None,
+             runtime_hooks=None,
+             excludes=None,
+             cipher=block_cipher)
+pyz = PYZ(a.pure,
+             cipher=block_cipher)
+exe = EXE(pyz,
+          a.scripts,
+          a.binaries,
+          a.zipfiles,
+          a.datas,
+          name='rrb2txt.exe',
+          debug=False,
+          strip=None,
+          upx=True,
+          console=True )
author	emkael <emkael@tlen.pl>	2015-09-28 13:20:34 +0200
committer	emkael <emkael@tlen.pl>	2015-09-28 13:20:34 +0200
commit	373a37d35e8326ad6e3a6732053bc50a5c0f8517 (patch)
tree	33c609b5bfff5b4ef6452c46f442675617bd8e39