diff options
author | emkael <emkael@tlen.pl> | 2021-06-13 12:50:35 +0200 |
---|---|---|
committer | emkael <emkael@tlen.pl> | 2021-06-13 12:50:35 +0200 |
commit | 6f11a6a00cdc7f96b91ded3ff313eabc57cc9f83 (patch) | |
tree | 784ac7578fb9b45f4837f7d453934f1f2e8e8cf1 | |
parent | 047511b4be81344196c8afca4d7c67c07374e6e0 (diff) |
Correctly detecting remote content encoding
Remember kids, never parse HTML with regex.
-rw-r--r-- | jfr_playoff/remote.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/jfr_playoff/remote.py b/jfr_playoff/remote.py index 74d75b5..b918ea1 100644 --- a/jfr_playoff/remote.py +++ b/jfr_playoff/remote.py @@ -16,8 +16,11 @@ class RemoteUrl: if url not in cls.url_cache: request = requests.get(url) encoding_match = re.search( - 'content=".*;( )?charset=(.*)"', + 'content=".*;( )?charset=(.*?)"', request.content, re.IGNORECASE) + PlayoffLogger.get('remote').debug( + 'Content encoding: %s', + encoding_match.group(2)) if encoding_match: request.encoding = encoding_match.group(2) cls.url_cache[url] = request.text |