summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--virtual_table.py13
1 files changed, 7 insertions, 6 deletions
diff --git a/virtual_table.py b/virtual_table.py
index 482b57e..d947336 100644
--- a/virtual_table.py
+++ b/virtual_table.py
@@ -58,7 +58,7 @@ class JFRVirtualTable:
pair_header_match = re.compile('([0-9]{1,}): (.*) - (.*), .*')
for record_file_path in self.__pair_records_files:
with file(record_file_path) as record_file:
- record = bs4(record_file)
+ record = bs4(record_file, 'lxml')
# first <td class="o1"> with content matching
# pair header is what we're after
header = [con for con
@@ -69,11 +69,13 @@ class JFRVirtualTable:
header_match = re.match(pair_header_match, header[0])
pair_number = int(header_match.group(1))
names = filter(len,
- [header_match.group(2),
- header_match.group(3)])
+ [header_match.group(2).strip(),
+ header_match.group(3).strip()])
# virtual pair does not have any names filled
if len(names) == 0:
virtual_pairs.append(pair_number)
+ if len(virtual_pairs) == 0:
+ print 'Warning: no virtual pairs detected'
return sorted(virtual_pairs)
# wrapper for DOM manipulation
@@ -81,12 +83,11 @@ class JFRVirtualTable:
def __fix_file(worker):
def file_wrapper(self, file_path, encoding='utf-8'):
with file(file_path, 'r+') as content_file:
- content = bs4(content_file, from_encoding=encoding)
+ content = bs4(content_file, 'lxml', from_encoding=encoding)
content = worker(self, content)
content_file.seek(0)
content_file.write(
- content.prettify(encoding, formatter='html')
- )
+ content.prettify(encoding, formatter='html'))
content_file.truncate()
return file_wrapper