summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoremkael <emkael@tlen.pl>2021-06-13 12:50:35 +0200
committeremkael <emkael@tlen.pl>2021-06-13 12:50:35 +0200
commit6f11a6a00cdc7f96b91ded3ff313eabc57cc9f83 (patch)
tree784ac7578fb9b45f4837f7d453934f1f2e8e8cf1
parent047511b4be81344196c8afca4d7c67c07374e6e0 (diff)
Correctly detecting remote content encoding
Remember kids, never parse HTML with regex.
-rw-r--r--jfr_playoff/remote.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/jfr_playoff/remote.py b/jfr_playoff/remote.py
index 74d75b5..b918ea1 100644
--- a/jfr_playoff/remote.py
+++ b/jfr_playoff/remote.py
@@ -16,8 +16,11 @@ class RemoteUrl:
if url not in cls.url_cache:
request = requests.get(url)
encoding_match = re.search(
- 'content=".*;( )?charset=(.*)"',
+ 'content=".*;( )?charset=(.*?)"',
request.content, re.IGNORECASE)
+ PlayoffLogger.get('remote').debug(
+ 'Content encoding: %s',
+ encoding_match.group(2))
if encoding_match:
request.encoding = encoding_match.group(2)
cls.url_cache[url] = request.text