* lxml parser used

author: emkael <emkael@tlen.pl> 2015-08-30 19:22:52 +0200
committer: emkael <emkael@tlen.pl> 2015-08-30 19:22:52 +0200
commit: 761e993ecf6eaea9627f370de1a5403998dfc2da (patch)
tree: 6964cea50b3a46fac40b94aebe6a2d9e81b45b9d
parent: 7dc6f7799e1882fae42a9f0890b71e7e4d47e0d7 (diff)
2 files changed, 6 insertions, 4 deletions
diff --git a/README.md b/README.md
index 260eea4..6109ed9 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@ Wymagania systemowe
 
 * python 2.x (testowane i tworzone w wersji 2.7.10)
 * BeautifulSoup4
+* lxml (jako parser dla BS4)
 * argparse
 
 Opcjonalnie, wrapper Basha konwertujący dane z BWS do CSV, używa `mdb-export`
diff --git a/bidding_data.py b/bidding_data.py
index 88f3fc1..cc7a4c4 100644
--- a/bidding_data.py
+++ b/bidding_data.py
@@ -68,7 +68,7 @@ class JFRBidding:
     # converts bidding data into HTML table
     def __format_bidding(self, bidding):
         bid_match = re.compile('(\d)([SHDCN])')
-        html_output = bs4('<table>')
+        html_output = bs4('<table>', 'lxml')
         header_row = html_output.new_tag('tr')
         html_output.table.append(header_row)
         for direction in self.__directions:
@@ -132,7 +132,8 @@ class JFRBidding:
                     self.__tournament_files_match,
                     tournament_file).group(1)
                 with file(tournament_file, 'r+') as board_html:
-                    board_content = bs4(board_html, from_encoding='utf-8')
+                    board_content = bs4(
+                        board_html, 'lxml', from_encoding='utf-8')
                     # first found <h4> element should be actual board number
                     board_number = re.sub(
                         '[^0-9]', '',
@@ -205,7 +206,7 @@ class JFRBidding:
     def write_bidding_scripts(self):
         for tournament_file in self.__tournament_files:
             with file(tournament_file, 'r+') as board_html:
-                board_content = bs4(board_html, from_encoding='utf-8')
+                board_content = bs4(board_html, 'lxml', from_encoding='utf-8')
                 header_scripts = board_content.select('head script')
                 # check for jQuery, append if necessary
                 jquery_scripts = [script for script in header_scripts
@@ -240,7 +241,7 @@ class JFRBidding:
             board_text_path = path.splitext(tournament_file)[0] + '.txt'
             with file(board_text_path, 'r+') as board_text:
                 board_text_content = bs4(
-                    board_text, from_encoding='iso-8859-2')
+                    board_text, 'lxml', from_encoding='iso-8859-2')
                 for row in board_text_content.select('tr'):
                     cells = row.select('td')
                     # traveller table rows for specific score entries
author	emkael <emkael@tlen.pl>	2015-08-30 19:22:52 +0200
committer	emkael <emkael@tlen.pl>	2015-08-30 19:22:52 +0200
commit	761e993ecf6eaea9627f370de1a5403998dfc2da (patch)
tree	6964cea50b3a46fac40b94aebe6a2d9e81b45b9d
parent	7dc6f7799e1882fae42a9f0890b71e7e4d47e0d7 (diff)