From bb8d5d0520e078f157448a7d3b4ce196f447a627 Mon Sep 17 00:00:00 2001 From: emkael Date: Fri, 2 Apr 2021 03:37:29 +0200 Subject: Filtering out Unicode retard-speak --- bin/emoji-list.py | 8 ++++++++ config/emoji.json | Bin 77700 -> 96131 bytes 2 files changed, 8 insertions(+) diff --git a/bin/emoji-list.py b/bin/emoji-list.py index 31ba4b1..823347d 100644 --- a/bin/emoji-list.py +++ b/bin/emoji-list.py @@ -1,4 +1,5 @@ import json, os, urllib +import xml.etree.ElementTree as ET emoji_list = json.load(urllib.urlopen( 'https://raw.githubusercontent.com/iamcal/emoji-data/master/emoji.json' @@ -9,6 +10,13 @@ for emoji in emoji_list: name = emoji['name'].replace(' ', '_') if emoji['name'] else emoji['short_name'].upper().replace('-', '_') dictionary[character] = ' [%s] ' % (name) +retardspeak_map = ET.fromstring(urllib.urlopen( + 'http://slothsoft.net/getResource.php/slothsoft/unicode-mapper' +).read()) +for letter in retardspeak_map.findall('.//letter'): + if letter.attrib['target'] != letter.attrib['source']: + dictionary[letter.attrib['target']] = letter.attrib['source'] + json.dump(dictionary, file(os.path.join( os.path.dirname(os.path.realpath(__file__)), '../config/emoji.json' diff --git a/config/emoji.json b/config/emoji.json index 161831d..f3992c2 100644 Binary files a/config/emoji.json and b/config/emoji.json differ -- cgit v1.2.3