From 83a00a66d80789aaaad9759c9a89d9980a6320ac Mon Sep 17 00:00:00 2001 From: emkael Date: Wed, 21 Feb 2018 00:42:00 +0100 Subject: Refactoring remote HTML retrieval to separate class --- jfr_playoff/matchinfo.py | 13 +++---------- jfr_playoff/remote.py | 13 +++++++++++++ 2 files changed, 16 insertions(+), 10 deletions(-) create mode 100644 jfr_playoff/remote.py diff --git a/jfr_playoff/matchinfo.py b/jfr_playoff/matchinfo.py index e0994a8..ebae673 100644 --- a/jfr_playoff/matchinfo.py +++ b/jfr_playoff/matchinfo.py @@ -1,18 +1,16 @@ import re -import urllib from urlparse import urljoin import mysql -from bs4 import BeautifulSoup as bs import jfr_playoff.sql as p_sql from jfr_playoff.dto import Match, Team +from jfr_playoff.remote import RemoteUrl as p_remote from jfr_playoff.tournamentinfo import TournamentInfo class MatchInfo: matches = {} - url_cache = {} def __init__(self, match_config, teams, database): self.config = match_config @@ -83,13 +81,8 @@ class MatchInfo: teams[1].score -= row[2] return teams - def __fetch_url(self, url): - if url not in MatchInfo.url_cache: - MatchInfo.url_cache[url] = urllib.urlopen(url).read() - return MatchInfo.url_cache[url] - def __find_table_row(self, url): - html_content = bs(self.__fetch_url(url), 'lxml') + html_content = p_remote.fetch(url) for row in html_content.select('tr tr'): for cell in row.select('td.t1'): if cell.text.strip() == str(self.config['table']): @@ -200,7 +193,7 @@ class MatchInfo: r'\.htm$', '.html', urljoin(self.info.link, segment_link[0]['href'])) try: - segment_content = bs(self.__fetch_url(segment_url), 'lxml') + segment_content = p_remote.fetch(segment_url) board_rows = [row for row in segment_content.find_all('tr') if len(row.select('a.zb')) > 0] board_count = len(board_rows) played_boards = len([ diff --git a/jfr_playoff/remote.py b/jfr_playoff/remote.py new file mode 100644 index 0000000..b6d0a34 --- /dev/null +++ b/jfr_playoff/remote.py @@ -0,0 +1,13 @@ +import urllib + +from bs4 import BeautifulSoup as bs + +class RemoteUrl: + + url_cache = {} + + @classmethod + def fetch(cls, url): + if url not in cls.url_cache: + cls.url_cache[url] = urllib.urlopen(url).read() + return bs(cls.url_cache[url], 'lxml') -- cgit v1.2.3