diff options
author | emkael <emkael@tlen.pl> | 2019-05-31 00:33:07 +0200 |
---|---|---|
committer | emkael <emkael@tlen.pl> | 2019-05-31 00:33:07 +0200 |
commit | f98d9bb01bc99689dabbf412b61e5e7d4deba55c (patch) | |
tree | b5b56d2a5fa0d5199e65bf037c821b58d421d498 | |
parent | 28324980f4edaa6cf0dcb341a202c52f11f2f050 (diff) |
Table cleanup fixes (probably the order of matched elements from CSS selector in html.parser changed)
-rw-r--r-- | http/index.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/http/index.py b/http/index.py index 4699207..cc21bfe 100644 --- a/http/index.py +++ b/http/index.py @@ -106,9 +106,9 @@ def handler(req): garbage.extract() # unnecessary tables removed - page_content.select('table > tr > td')[1].extract() - for table in page_content.select('table > tr > td > table')[2:4]: + for table in page_content.select('table > tr > td > table')[4:]: table.extract() + page_content.select('table > tr > td')[-1].extract() # internal link targets (team, WK breakdown, etc.) removed for internal_link in page_content.select('table > tr > td > table a'): |