diff options
author | emkael <emkael@tlen.pl> | 2018-02-21 00:42:00 +0100 |
---|---|---|
committer | emkael <emkael@tlen.pl> | 2018-02-21 00:42:00 +0100 |
commit | 83a00a66d80789aaaad9759c9a89d9980a6320ac (patch) | |
tree | 350dc7bb56a99707aebcebde91860808c16d5110 /jfr_playoff | |
parent | 95270b1f32ff946879cc9d8d7d4fe5f33b3a2868 (diff) |
Refactoring remote HTML retrieval to separate class
Diffstat (limited to 'jfr_playoff')
-rw-r--r-- | jfr_playoff/matchinfo.py | 13 | ||||
-rw-r--r-- | jfr_playoff/remote.py | 13 |
2 files changed, 16 insertions, 10 deletions
diff --git a/jfr_playoff/matchinfo.py b/jfr_playoff/matchinfo.py index e0994a8..ebae673 100644 --- a/jfr_playoff/matchinfo.py +++ b/jfr_playoff/matchinfo.py @@ -1,18 +1,16 @@ import re -import urllib from urlparse import urljoin import mysql -from bs4 import BeautifulSoup as bs import jfr_playoff.sql as p_sql from jfr_playoff.dto import Match, Team +from jfr_playoff.remote import RemoteUrl as p_remote from jfr_playoff.tournamentinfo import TournamentInfo class MatchInfo: matches = {} - url_cache = {} def __init__(self, match_config, teams, database): self.config = match_config @@ -83,13 +81,8 @@ class MatchInfo: teams[1].score -= row[2] return teams - def __fetch_url(self, url): - if url not in MatchInfo.url_cache: - MatchInfo.url_cache[url] = urllib.urlopen(url).read() - return MatchInfo.url_cache[url] - def __find_table_row(self, url): - html_content = bs(self.__fetch_url(url), 'lxml') + html_content = p_remote.fetch(url) for row in html_content.select('tr tr'): for cell in row.select('td.t1'): if cell.text.strip() == str(self.config['table']): @@ -200,7 +193,7 @@ class MatchInfo: r'\.htm$', '.html', urljoin(self.info.link, segment_link[0]['href'])) try: - segment_content = bs(self.__fetch_url(segment_url), 'lxml') + segment_content = p_remote.fetch(segment_url) board_rows = [row for row in segment_content.find_all('tr') if len(row.select('a.zb')) > 0] board_count = len(board_rows) played_boards = len([ diff --git a/jfr_playoff/remote.py b/jfr_playoff/remote.py new file mode 100644 index 0000000..b6d0a34 --- /dev/null +++ b/jfr_playoff/remote.py @@ -0,0 +1,13 @@ +import urllib + +from bs4 import BeautifulSoup as bs + +class RemoteUrl: + + url_cache = {} + + @classmethod + def fetch(cls, url): + if url not in cls.url_cache: + cls.url_cache[url] = urllib.urlopen(url).read() + return bs(cls.url_cache[url], 'lxml') |