From 9febaf58536bb32a361d6126b8f1327f6a7dcde2 Mon Sep 17 00:00:00 2001 From: emkael Date: Sat, 24 Feb 2018 11:01:05 +0100 Subject: Fixing remote HTML encoding accordingly to HTML content meta tags --- jfr_playoff/remote.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/jfr_playoff/remote.py b/jfr_playoff/remote.py index f02a8d4..3a519f6 100644 --- a/jfr_playoff/remote.py +++ b/jfr_playoff/remote.py @@ -1,3 +1,5 @@ +import re + import requests from bs4 import BeautifulSoup as bs @@ -9,5 +11,14 @@ class RemoteUrl: @classmethod def fetch(cls, url): if url not in cls.url_cache: - cls.url_cache[url] = requests.get(url).text + request = requests.get(url) + encoding_match = re.search( + 'content=".*;( )?charset=(.*)"', + request.content, re.IGNORECASE) + if encoding_match: + request.encoding = encoding_match.group(2) + cls.url_cache[url] = request.text + PlayoffLogger.get('remote').info( + 'content for %s not in cache: retrieved %d bytes', + url, len(cls.url_cache[url])) return bs(cls.url_cache[url], 'lxml') -- cgit v1.2.3 From f22476a36f2096628dc336f9adf0caa9a827dc11 Mon Sep 17 00:00:00 2001 From: emkael Date: Sat, 24 Feb 2018 11:02:01 +0100 Subject: Fix for rethrowing mysql.connector.Error as IOError --- jfr_playoff/db.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/jfr_playoff/db.py b/jfr_playoff/db.py index b7c3726..71f3dcb 100644 --- a/jfr_playoff/db.py +++ b/jfr_playoff/db.py @@ -29,9 +29,7 @@ class PlayoffDB(object): row = self.db_cursor.fetchone() return row except mysql.connector.Error as e: - raise IOError( - message=str(e), filename=db_name, - errno=e.errno, strerror=str(e)) + raise IOError(e.errno, str(e), db_name) def fetch_all(self, db_name, sql, params): import mysql.connector -- cgit v1.2.3