diff options
author | emkael <emkael@tlen.pl> | 2015-09-28 13:21:26 +0200 |
---|---|---|
committer | emkael <emkael@tlen.pl> | 2015-09-28 13:21:26 +0200 |
commit | 1367feca21f425a0ee5a07173e8a1897ec427e20 (patch) | |
tree | 5c7ac717e9a9ae3e60376518926503102e6a532c | |
parent | 373a37d35e8326ad6e3a6732053bc50a5c0f8517 (diff) |
* code reformatting
-rw-r--r-- | rrb2txt.py | 229 |
1 files changed, 148 insertions, 81 deletions
@@ -1,14 +1,20 @@ # coding=utf-8
+import sys
+import os
+import re
+import urlparse
+
from bs4 import BeautifulSoup, Comment
-import sys, os, re
+from glob import glob
+from itertools import chain, cycle
-dir = sys.argv[1] if len(sys.argv) > 1 else '.'
-file = os.path.join(dir, 'pary.txt')
+directory = sys.argv[1] if len(sys.argv) > 1 else '.'
+filepath = os.path.join(directory, 'pary.txt')
-content = ''
-with open(file, 'r') as file:
- content = BeautifulSoup(file.read())
+content = None
+with open(filepath, 'r') as file_content:
+ content = BeautifulSoup(file_content.read())
pdf_fixed = False
comments = content.findAll(text=lambda t: isinstance(t, Comment))
@@ -31,8 +37,9 @@ if not pdf_fixed: if score_cell:
score_cell[0].insert_before(tag)
- extra_headers = ['PKL','PDF','nagroda']
- extra_headers_present = [bool(header.find_all(text=h)) for h in extra_headers]
+ extra_headers = ['PKL', 'PDF', 'nagroda']
+ extra_headers_present = [bool(header.find_all(text=h))
+ for h in extra_headers]
extra_headers_offset = 8
@@ -44,7 +51,8 @@ if not pdf_fixed: for row in body:
cells = row.find_all('td')
if len(cells) >= extra_headers_offset:
- tag = content.new_tag('td', style='display:none', rowspan=2)
+ tag = content.new_tag(
+ 'td', style='display:none', rowspan=2)
cells[extra_headers_offset].insert_after(tag)
extra_headers_offset += 1
@@ -55,7 +63,7 @@ if not pdf_fixed: return 0
max_points_count = max([get_points_count(row) for row in body]) + 1
-
+
header.find_all('td')[10]['colspan'] = max_points_count
for row in body:
@@ -81,15 +89,12 @@ if not pdf_fixed: new_content = content.find('body').decode_contents()
new_length = len(new_content) + 1
- file = open(file.name, 'wb')
- file.write('%012d' % new_length)
- file.write('\n')
- file.write(new_content.encode('utf-8'))
- file.write('\n')
+ output = open(filepath.name, 'wb')
+ output.write('%012d' % new_length)
+ output.write('\n')
+ output.write(new_content.encode('utf-8'))
+ output.write('\n')
-from glob import glob
-from itertools import chain, cycle
-import urlparse, math
def format_boards(rows):
rows = rows[1:4]
@@ -99,30 +104,35 @@ def format_boards(rows): .replace('obie po', 'OBIE')
.split(' / ')))
rows[1][1] = ''
+
def split_hand(hand):
return hand.split(os.linesep)
+
rows[0][1] = split_hand(rows[0][1])
rows[1][0] = split_hand(rows[1][0])
rows[1][2] = split_hand(rows[1][2])
rows[2][1] = split_hand(rows[2][1])
+
def side_rows(row):
- ret =[
+ ret = [
[row[0],
row[1][0][2:],
row[2]]
]
- for i in range(1,4):
+ for i in range(1, 4):
ret.append(['',
row[1][i][2:] or '===',
''])
return ret
+
def middle_rows(row):
ret = []
- for i in range(0,4):
+ for i in range(0, 4):
ret.append([row[0][i][2:] or '===',
row[1],
row[2][i][2:] or '==='])
return ret
+
rows = side_rows(rows[0]) + middle_rows(rows[1]) + side_rows(rows[2])
header = 'ROZDANIE NR ' + header[0]
output = [header, '']
@@ -132,6 +142,7 @@ def format_boards(rows): output.append('')
return output
+
def format_protocols(rows):
output = [' ZAPIS WYNIK',
' NS EW KONTRAKT WIST NS EW NS EW']
@@ -160,50 +171,75 @@ def format_protocols(rows): '{:.1f}'.format(float(row[8]))
]
if content:
- output.append(u'{:>3s} {:>3s} {:11s}{:^4s}{:>4s}{:>5s} {:>5s} {:>5s}'.format(*content))
+ output.append(
+ u'{:>3s} {:>3s} {:11s}{:^4s}{:>4s}{:>5s} {:>5s} {:>5s}'.format(
+ *content))
elif len(row) != 4 and len(row) != 8:
print 'protocols: row of unexpected length'
print row
output.append('')
return output
+
def format_results(rows):
rows.pop(0)
content = []
- link_regex = re.compile('^http://www.msc.com.pl')
- cezar_ids = ['{:05d}'.format(int(dict(urlparse.parse_qsl(urlparse.urlparse(row.pop()).query))['r']))
- if re.match(link_regex, row[-1])
- else ''
- for row in rows]
+ link_regex = re.compile(r'^http://www\.msc\.com\.pl')
+ cezar_ids = [
+ '{:05d}'.format(int(
+ dict(urlparse.parse_qsl(urlparse.urlparse(row.pop()).query))['r']))
+ if re.match(link_regex, row[-1])
+ else ''
+ for row in rows]
pdf_columns = max([len(row) for row in rows]) - 11
for row in rows:
length = len(row)
if length > 5:
content.append(row[0:3] + [cezar_ids.pop(0)] + row[3:])
elif length == 5:
- content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:] + [''] * (3 + pdf_columns))
+ content.append([''] * 2 + row[0:1] + [
+ cezar_ids.pop(0)] + row[1:] + [''] * (3 + pdf_columns))
elif length == 4:
if len(row[3]) != 2:
- content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:3] + content[-1][6:7] + row[3:4] + [''] * (3 + pdf_columns))
+ content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] +
+ row[1:3] + content[-1][6:7] +
+ row[3:4] + [''] * (3 + pdf_columns))
else:
- content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:4] + [''] * (4 + pdf_columns))
+ content.append([''] * 2 + row[0:1] + [
+ cezar_ids.pop(0)] + row[1:4] + [''] * (4 + pdf_columns))
elif length == 3:
- content.append([''] * 2 + row[0:1] + [cezar_ids.pop(0)] + row[1:3] + content[-1][6:8] + [''] * (3 + pdf_columns))
+ content.append([''] * 2 + row[0:1] +
+ [cezar_ids.pop(0)] + row[1:3] + content[-1][6:8] +
+ [''] * (3 + pdf_columns))
wk_sum = sum([float(c[5]) if len(c[5]) else 0.0 for c in content])
output = []
name_column = max([len(r[2]) for r in content])
- output.append('M-CE NR ' + ' ' * name_column + ' WK CEZAR +/- WYNIK PKL ' + ('{:^' + str(3 * pdf_columns) + 's}').format('PDF') + ' NAGRODA')
+ output.append('%s %s %s %s %s' % (
+ 'M-CE NR',
+ ' ' * name_column,
+ 'WK CEZAR +/- WYNIK PKL',
+ ('{:^' + str(3 * pdf_columns) + 's}').format('PDF'),
+ 'NAGRODA'
+ ))
output.append('-' * len(output[-1]))
for c in content:
- line = (u'{:>3s} {:>3s} {:' + unicode(name_column) + u's} {:>4s} {:2s} {:5s} {:2s} {:>5s} {:>6s} {:>3s}').format(*(c[0:3] + c[5:7] + c[3:5] + c[8:11]))
- pdf = (u' {:' + unicode(3 * pdf_columns) + u's}').format(''.join([u'{:>3s}'.format(cc) for cc in c[11:-1]]))
+ line = (
+ u'{:>3s} {:>3s} {:' + unicode(name_column) +
+ u's} {:>4s} {:2s} {:5s} {:2s} {:>5s} {:>6s} {:>3s}').format(
+ *(c[0:3] + c[5:7] + c[3:5] + c[8:11]))
+ pdf = (
+ u' {:' + unicode(3 * pdf_columns) + u's}').format(
+ ''.join([u'{:>3s}'.format(cc) for cc in c[11:-1]]))
line += pdf
line += u' {:>6s}'.format(c[-1])
output.append(line)
output.append(' ' * (8 + name_column) + '-----')
- output.append(('{:>' + str(13 + name_column) + 's}').format('Suma WK = {:.1f}'.format(wk_sum)))
+ output.append(
+ ('{:>' + str(13 + name_column) + 's}').format(
+ 'Suma WK = {:.1f}'.format(wk_sum)))
return output
+
def format_histories(rows):
header = rows.pop(0)[0]
rows.pop(0)
@@ -216,38 +252,57 @@ def format_histories(rows): for row in rows:
content = []
if len(row) == 11:
- add_separator = (len(''.join(row[0:9])) == 0) and ((add_separator is False) or (row[-2] == 'miejsce'))
- content = row[0:4] + [row[4] + ' ' + row[5] + ' ' + row[7]] + [row[6]] + row[8:11]
+ add_separator = (
+ len(''.join(row[0:9])) == 0) and (
+ (add_separator is False) or (row[-2] == 'miejsce'))
+ content = row[0:4] + [
+ row[4] + ' ' + row[5] + ' ' + row[7]
+ ] + [row[6]] + row[8:11]
elif len(row) == 10:
- content = [''] + row[0:3] + [row[3] + ' ' + row[4] + ' ' + row[6]] + [row[5]] + row[7:10]
+ content = [''] + row[0:3] + [
+ row[3] + ' ' + row[4] + ' ' + row[6]
+ ] + [row[5]] + row[7:10]
elif len(row) == 9:
- content = ['',''] + row[0:2] + [row[2] + ' ' + row[3] + ' ' + row[5]] + [row[4]] + row[6:9]
+ content = ['', ''] + row[0:2] + [
+ row[2] + ' ' + row[3] + ' ' + row[5]
+ ] + [row[4]] + row[6:9]
if content:
if add_separator:
- content_rows.append(['','','','','','','','-------','--------'])
+ content_rows.append(
+ ['', '', '', '', '', '', '', '-------', '--------'])
content_rows.append(content)
else:
print 'histories: unexpected row length'
print row
column_width = max([len(r[1]) for r in content_rows])
- content_rows = [['RND', 'PRZECIWNIK', 'RZD', ' ', 'KONTRAKT', 'WIST', 'ZAPIS', 'WYNIK ', u'/ BIEŻĄCY']] + content_rows
+ content_rows = [[
+ 'RND', 'PRZECIWNIK', 'RZD', ' ', 'KONTRAKT', 'WIST',
+ 'ZAPIS', 'WYNIK ', u'/ BIEŻĄCY'
+ ]] + content_rows
for content in content_rows:
if content[6]:
- score_align = u'>' if content[6][0] == u'-' else (u'' if content[6][0] == u'+' else u'^')
+ score_align = u'>' if content[6][0] == u'-' else (
+ u'' if content[6][0] == u'+' else u'^')
else:
score_align = u''
- output.append((u'{:>3s} {:' + unicode(column_width) + u's} {:>3s} {:2s} {:9s}{:^4s} {:' + score_align + u'7s} {:>7s}{:>8s}').format(*[c or ' ' for c in content]))
+ output.append(
+ (u'{:>3s} {:' + unicode(column_width) +
+ u's} {:>3s} {:2s} {:9s}{:^4s} {:' +
+ score_align + u'7s} {:>7s}{:>8s}').format(
+ *[c or ' ' for c in content]))
output.insert(3, '-' * len(output[2]))
output.append('')
return output
-
-def format_rows(rows, type):
- return globals()['format_' + type](rows)
+
+
+def format_rows(rows, rowtype):
+ return globals()['format_' + rowtype](rows)
+
def get_rows(content):
soup = BeautifulSoup(content)
output = []
- link_regex = re.compile('^http://www.msc.com.pl')
+ link_regex = re.compile(r'^http://www\.msc\.com\.pl')
header = soup.find('h2')
if header:
output.append([header.text])
@@ -261,57 +316,69 @@ def get_rows(content): output.append(row)
return output
-def get_content(file):
+
+def get_content(filepath):
return re.sub('<img src=".*/(.*).gif" ?/>',
lambda img: img.group(1)[0].capitalize(),
- open(file, 'r').read())
+ open(filepath, 'r').read())
+
def get_header(directory):
- soup = BeautifulSoup(open(os.path.join(directory, 'index.html'), 'r').read())
+ soup = BeautifulSoup(
+ open(os.path.join(directory, 'index.html'), 'r').read())
return [node.text for node in soup.select('#header *')]
+
def get_files(directory):
- return dict(map(lambda (key, val): (key,
- reduce(list.__add__, map(lambda v: sorted(glob(os.path.join(directory, v))), val), [])),
+ return dict(map(lambda (key, val): (
+ key,
+ reduce(list.__add__, map(
+ lambda v: sorted(glob(os.path.join(directory, v))), val), [])),
{
- 'boards': ['d?.txt','d??.txt'],
- 'protocols': ['p?.txt','p??.txt'],
- 'histories': ['h?.txt','h??.txt'],
+ 'boards': ['d?.txt', 'd??.txt'],
+ 'protocols': ['p?.txt', 'p??.txt'],
+ 'histories': ['h?.txt', 'h??.txt'],
'results': ['pary.txt'],
}.items()))
-def compile(directory):
+
+def compile_dir(directory):
files = get_files(directory)
- return dict(map(lambda (key, val):
- (key,
- list(chain(*
- list(
- i.next() for i in cycle(map(lambda v:
- iter(
- map(lambda file:
- format_rows(
- get_rows(
- get_content(file)
- ),
- v),
- files[v])),
- val))
- )
- ))
- ),
- {
- 'P': ['boards', 'protocols'],
- 'H': ['histories'],
- 'W': ['results']
- }.items()))
+ return dict(
+ map(lambda (key, val):
+ (
+ key,
+ list(
+ chain(
+ *list(
+ i.next() for i in cycle(
+ map(lambda v:
+ iter(
+ map(lambda file:
+ format_rows(
+ get_rows(
+ get_content(file)
+ ),
+ v),
+ files[v])),
+ val))
+ )
+ )
+ )
+ ),
+ {
+ 'P': ['boards', 'protocols'],
+ 'H': ['histories'],
+ 'W': ['results']
+ }.items()))
directory = sys.argv[1] if len(sys.argv) > 1 else os.path.abspath('.')
header = get_header(directory) + ['']
-output = compile(directory)
+output = compile_dir(directory)
file_prefix = os.path.basename(directory)
-for file, rows in output.iteritems():
- output_file = open(file_prefix + file + '.txt', 'w')
+for filepath, rows in output.iteritems():
+ output_file = open(file_prefix + filepath + '.txt', 'w')
for line in header:
output_file.write(line.encode('windows-1250') + '\n')
for row in rows:
|