# -*- coding: utf-8 -*- from bs4 import BeautifulSoup as bs import bs4 import re import sys input_file = sys.argv[1] rnd = int(sys.argv[2]) segment = int(sys.argv[3]) board = int(sys.argv[4]) content = bs(file(input_file), 'lxml') rows = content.select('tr') scores = {} current_table_no = None current_table = [] for row in rows: table_link = row.select('a[target="polew"]') if table_link: if current_table_no: scores[current_table_no] = current_table current_table_no = int(table_link[0].text) current_table = [] cells = row.select('td.bdc') if len(cells) > 0: score = '' cells = cells[0:-1] for cell in cells: for element in cell.contents: if type(element) is bs4.element.Tag: score += element['alt'] else: score += element.replace('×'.decode('utf8'), 'x') point_cells = row.select('td.zno') + row.select('td.zeo') current_table.append({ 'score': score, 'points': (point_cells)[0].text if len(point_cells) > 0 else '0' }) scores[current_table_no] = current_table score_regex = re.compile(r'^([1-7])([cdhns])(x?)([ENSW])([cdhs]([AKQJ2-9]|10))(=|(-|\+)[1-7])') for room in [1, 2]: for tabl in scores.keys(): score = scores[tabl][room - 1]['score'] point_result = scores[tabl][room - 1]['points'] parsed_score = re.match(score_regex, score) if parsed_score: contract = ('%s %s %s' % ( parsed_score.group(1), parsed_score.group(2).replace('n', 'nt').upper(), parsed_score.group(3) )).strip() declarer = parsed_score.group(4) lead = parsed_score.group(5).upper() result = parsed_score.group(7) result = 0 if result == '=' else int(result) print ('UPDATE scores SET ' \ + 'declarer = "' + declarer + '", ' \ + 'contract = "' + contract + '", ' \ + 'result = ' + str(result) + ', ' \ + 'score = ' + point_result + ', ' \ + 'lead = "' + lead + '" WHERE ' \ + 'rnd = ' + str(rnd) + ' AND ' \ + 'segment = ' + str(segment) + ' AND ' \ + 'room = ' + str(room) + ' AND ' \ + 'tabl = ' + str(tabl) + ' AND ' \ + 'board = ' + str(board) + ';').encode('utf8') elif score[0:3] == 'PAS': print ('UPDATE scores SET ' \ + 'declarer = NULL, ' \ + 'contract = "PASS", ' \ + 'result = NULL, ' \ + 'score = 0, ' \ + 'lead = NULL WHERE ' \ + 'rnd = ' + str(rnd) + ' AND ' \ + 'segment = ' + str(segment) + ' AND ' \ + 'room = ' + str(room) + ' AND ' \ + 'tabl = ' + str(tabl) + ' AND ' \ + 'board = ' + str(board) + ';').encode('utf8') else: print '-- ->' + score.encode('utf8')