summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--http/index.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/http/index.py b/http/index.py
index cc21bfe..7b672b6 100644
--- a/http/index.py
+++ b/http/index.py
@@ -81,7 +81,8 @@ def handler(req):
return apache.OK
# we need to sanitize line breaks and double-closed anchors manually
- remote_content = bs4('<div>' + ' '.join(remote_content[delimiters[0]+1:delimiters[1]]).replace('<BR>', '<br />').replace('</a></a>', '</a>') + '</div>', 'html.parser')
+ remote_content = bs4('<div>' + ' '.join(remote_content[delimiters[0]+1:delimiters[1]]).replace('</SPAN><BR></SPAN>', '</SPAN><BR>').replace('<BR>', '<br />').replace('</a></a>', '</a>') + '</div>', 'html.parser')
+
# stub template for output page
page_content = bs4('''
<html><head>
@@ -108,7 +109,8 @@ def handler(req):
# unnecessary tables removed
for table in page_content.select('table > tr > td > table')[4:]:
table.extract()
- page_content.select('table > tr > td')[-1].extract()
+ if u'Lista' not in [b.text for b in page_content.select('b')]:
+ page_content.select('table > tr > td')[-1].extract()
# internal link targets (team, WK breakdown, etc.) removed
for internal_link in page_content.select('table > tr > td > table a'):