diff options
author | emkael <emkael@tlen.pl> | 2015-08-30 19:22:52 +0200 |
---|---|---|
committer | emkael <emkael@tlen.pl> | 2015-08-30 19:22:52 +0200 |
commit | 761e993ecf6eaea9627f370de1a5403998dfc2da (patch) | |
tree | 6964cea50b3a46fac40b94aebe6a2d9e81b45b9d | |
parent | 7dc6f7799e1882fae42a9f0890b71e7e4d47e0d7 (diff) |
* lxml parser used
-rw-r--r-- | README.md | 1 | ||||
-rw-r--r-- | bidding_data.py | 9 |
2 files changed, 6 insertions, 4 deletions
@@ -12,6 +12,7 @@ Wymagania systemowe * python 2.x (testowane i tworzone w wersji 2.7.10) * BeautifulSoup4 +* lxml (jako parser dla BS4) * argparse Opcjonalnie, wrapper Basha konwertujący dane z BWS do CSV, używa `mdb-export` diff --git a/bidding_data.py b/bidding_data.py index 88f3fc1..cc7a4c4 100644 --- a/bidding_data.py +++ b/bidding_data.py @@ -68,7 +68,7 @@ class JFRBidding: # converts bidding data into HTML table def __format_bidding(self, bidding): bid_match = re.compile('(\d)([SHDCN])') - html_output = bs4('<table>') + html_output = bs4('<table>', 'lxml') header_row = html_output.new_tag('tr') html_output.table.append(header_row) for direction in self.__directions: @@ -132,7 +132,8 @@ class JFRBidding: self.__tournament_files_match, tournament_file).group(1) with file(tournament_file, 'r+') as board_html: - board_content = bs4(board_html, from_encoding='utf-8') + board_content = bs4( + board_html, 'lxml', from_encoding='utf-8') # first found <h4> element should be actual board number board_number = re.sub( '[^0-9]', '', @@ -205,7 +206,7 @@ class JFRBidding: def write_bidding_scripts(self): for tournament_file in self.__tournament_files: with file(tournament_file, 'r+') as board_html: - board_content = bs4(board_html, from_encoding='utf-8') + board_content = bs4(board_html, 'lxml', from_encoding='utf-8') header_scripts = board_content.select('head script') # check for jQuery, append if necessary jquery_scripts = [script for script in header_scripts @@ -240,7 +241,7 @@ class JFRBidding: board_text_path = path.splitext(tournament_file)[0] + '.txt' with file(board_text_path, 'r+') as board_text: board_text_content = bs4( - board_text, from_encoding='iso-8859-2') + board_text, 'lxml', from_encoding='iso-8859-2') for row in board_text_content.select('tr'): cells = row.select('td') # traveller table rows for specific score entries |