summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoremkael <emkael@tlen.pl>2018-02-21 00:42:00 +0100
committeremkael <emkael@tlen.pl>2018-02-21 00:42:00 +0100
commit83a00a66d80789aaaad9759c9a89d9980a6320ac (patch)
tree350dc7bb56a99707aebcebde91860808c16d5110
parent95270b1f32ff946879cc9d8d7d4fe5f33b3a2868 (diff)
Refactoring remote HTML retrieval to separate class
-rw-r--r--jfr_playoff/matchinfo.py13
-rw-r--r--jfr_playoff/remote.py13
2 files changed, 16 insertions, 10 deletions
diff --git a/jfr_playoff/matchinfo.py b/jfr_playoff/matchinfo.py
index e0994a8..ebae673 100644
--- a/jfr_playoff/matchinfo.py
+++ b/jfr_playoff/matchinfo.py
@@ -1,18 +1,16 @@
import re
-import urllib
from urlparse import urljoin
import mysql
-from bs4 import BeautifulSoup as bs
import jfr_playoff.sql as p_sql
from jfr_playoff.dto import Match, Team
+from jfr_playoff.remote import RemoteUrl as p_remote
from jfr_playoff.tournamentinfo import TournamentInfo
class MatchInfo:
matches = {}
- url_cache = {}
def __init__(self, match_config, teams, database):
self.config = match_config
@@ -83,13 +81,8 @@ class MatchInfo:
teams[1].score -= row[2]
return teams
- def __fetch_url(self, url):
- if url not in MatchInfo.url_cache:
- MatchInfo.url_cache[url] = urllib.urlopen(url).read()
- return MatchInfo.url_cache[url]
-
def __find_table_row(self, url):
- html_content = bs(self.__fetch_url(url), 'lxml')
+ html_content = p_remote.fetch(url)
for row in html_content.select('tr tr'):
for cell in row.select('td.t1'):
if cell.text.strip() == str(self.config['table']):
@@ -200,7 +193,7 @@ class MatchInfo:
r'\.htm$', '.html',
urljoin(self.info.link, segment_link[0]['href']))
try:
- segment_content = bs(self.__fetch_url(segment_url), 'lxml')
+ segment_content = p_remote.fetch(segment_url)
board_rows = [row for row in segment_content.find_all('tr') if len(row.select('a.zb')) > 0]
board_count = len(board_rows)
played_boards = len([
diff --git a/jfr_playoff/remote.py b/jfr_playoff/remote.py
new file mode 100644
index 0000000..b6d0a34
--- /dev/null
+++ b/jfr_playoff/remote.py
@@ -0,0 +1,13 @@
+import urllib
+
+from bs4 import BeautifulSoup as bs
+
+class RemoteUrl:
+
+ url_cache = {}
+
+ @classmethod
+ def fetch(cls, url):
+ if url not in cls.url_cache:
+ cls.url_cache[url] = urllib.urlopen(url).read()
+ return bs(cls.url_cache[url], 'lxml')