diff options
author | emkael <emkael@tlen.pl> | 2022-02-19 14:55:48 +0100 |
---|---|---|
committer | emkael <emkael@tlen.pl> | 2022-02-19 14:55:48 +0100 |
commit | c51a0a4fefb6fb9e8ca088244891ad0826ba514b (patch) | |
tree | feda7fc16a7b70bf08cdc3af3fcdde07448b8251 | |
parent | 66d95bb9795f058e67a842a9ba25f7dc3dc0fe47 (diff) |
PBN charset auto-detection using chardetpbn-charset-detection
-rw-r--r-- | dealconvert/formats/html.py | 4 | ||||
-rw-r--r-- | requirements.txt | 1 |
2 files changed, 5 insertions, 0 deletions
diff --git a/dealconvert/formats/html.py b/dealconvert/formats/html.py index 4185c40..efb16ff 100644 --- a/dealconvert/formats/html.py +++ b/dealconvert/formats/html.py @@ -1,6 +1,9 @@ from collections import OrderedDict +import sys import warnings +import chardet + from bcdd.BCalcWrapper import BCalcWrapper from bcdd.DDTable import DDTable from bcdd.Exceptions import FieldNotFoundException @@ -181,6 +184,7 @@ class HTMLFormat(BinaryFormat): def get_html_content(self, dealset): deal_rows = [] event_name = dealset[0].event + event_name = event_name.decode(chardet.detect(event_name)['encoding']) while len(dealset) > 0: deal_rows.append(dealset[0:self.deals_per_column]) dealset = dealset[self.deals_per_column:] diff --git a/requirements.txt b/requirements.txt index 6d68c60..4c3b9ce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ pdfkit == 0.6.1 +chardet == 4.0.0 |