diff options
-rw-r--r-- | jfr_playoff/remote.py | 13 |
1 files changed, 12 insertions, 1 deletions
diff --git a/jfr_playoff/remote.py b/jfr_playoff/remote.py index f02a8d4..3a519f6 100644 --- a/jfr_playoff/remote.py +++ b/jfr_playoff/remote.py @@ -1,3 +1,5 @@ +import re + import requests from bs4 import BeautifulSoup as bs @@ -9,5 +11,14 @@ class RemoteUrl: @classmethod def fetch(cls, url): if url not in cls.url_cache: - cls.url_cache[url] = requests.get(url).text + request = requests.get(url) + encoding_match = re.search( + 'content=".*;( )?charset=(.*)"', + request.content, re.IGNORECASE) + if encoding_match: + request.encoding = encoding_match.group(2) + cls.url_cache[url] = request.text + PlayoffLogger.get('remote').info( + 'content for %s not in cache: retrieved %d bytes', + url, len(cls.url_cache[url])) return bs(cls.url_cache[url], 'lxml') |