From 761e993ecf6eaea9627f370de1a5403998dfc2da Mon Sep 17 00:00:00 2001 From: emkael Date: Sun, 30 Aug 2015 19:22:52 +0200 Subject: * lxml parser used --- README.md | 1 + bidding_data.py | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 260eea4..6109ed9 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ Wymagania systemowe * python 2.x (testowane i tworzone w wersji 2.7.10) * BeautifulSoup4 +* lxml (jako parser dla BS4) * argparse Opcjonalnie, wrapper Basha konwertujący dane z BWS do CSV, używa `mdb-export` diff --git a/bidding_data.py b/bidding_data.py index 88f3fc1..cc7a4c4 100644 --- a/bidding_data.py +++ b/bidding_data.py @@ -68,7 +68,7 @@ class JFRBidding: # converts bidding data into HTML table def __format_bidding(self, bidding): bid_match = re.compile('(\d)([SHDCN])') - html_output = bs4('') + html_output = bs4('
', 'lxml') header_row = html_output.new_tag('tr') html_output.table.append(header_row) for direction in self.__directions: @@ -132,7 +132,8 @@ class JFRBidding: self.__tournament_files_match, tournament_file).group(1) with file(tournament_file, 'r+') as board_html: - board_content = bs4(board_html, from_encoding='utf-8') + board_content = bs4( + board_html, 'lxml', from_encoding='utf-8') # first found

element should be actual board number board_number = re.sub( '[^0-9]', '', @@ -205,7 +206,7 @@ class JFRBidding: def write_bidding_scripts(self): for tournament_file in self.__tournament_files: with file(tournament_file, 'r+') as board_html: - board_content = bs4(board_html, from_encoding='utf-8') + board_content = bs4(board_html, 'lxml', from_encoding='utf-8') header_scripts = board_content.select('head script') # check for jQuery, append if necessary jquery_scripts = [script for script in header_scripts @@ -240,7 +241,7 @@ class JFRBidding: board_text_path = path.splitext(tournament_file)[0] + '.txt' with file(board_text_path, 'r+') as board_text: board_text_content = bs4( - board_text, from_encoding='iso-8859-2') + board_text, 'lxml', from_encoding='iso-8859-2') for row in board_text_content.select('tr'): cells = row.select('td') # traveller table rows for specific score entries -- cgit v1.2.3