Update strip-icu to python 3

This commit is contained in:
Ryan Lucia 2018-12-05 15:50:46 -05:00
parent 129e748abd
commit 628962559d

View file

@ -15,7 +15,7 @@
# Aegisub Project http://www.aegisub.org/ # Aegisub Project http://www.aegisub.org/
# A script to strip all of the data we don't use out of ICU's data files # A script to strip all of the data we don't use out of ICU's data files
# Run from $ICU_ROOT/source/data # Run from $ICU_ROOT/icu4c/source/data
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@ -25,10 +25,10 @@ import os
def delete_matching(filename, strs): def delete_matching(filename, strs):
exprs = [re.compile(s) for s in strs] exprs = [re.compile(s) for s in strs]
with open(filename) as f: with open(filename, encoding='utf-8') as f:
lines = [line for line in f if not any(r.match(line.decode('utf-8')) for r in exprs)] lines = [line for line in f if not any(r.match(line) for r in exprs)]
with open(filename, 'w') as f: with open(filename, 'w', encoding='utf-8') as f:
for line in lines: for line in lines:
f.write(line) f.write(line)
@ -36,7 +36,7 @@ REMOVE_SUBDIRS=['LOCSRCDIR', 'CURRSRCDIR', 'ZONESRCDIR', 'COLSRCDIR', 'RBNFSRCDI
delete_matching('Makefile.in', ['^-include .*%s' % s for s in REMOVE_SUBDIRS]) delete_matching('Makefile.in', ['^-include .*%s' % s for s in REMOVE_SUBDIRS])
delete_matching('Makefile.in', ['^CNV_FILES']) delete_matching('Makefile.in', ['^CNV_FILES'])
with open('misc/misclocal.mk', 'w') as f: with open('misc/misclocal.mk', 'w', encoding='utf-8') as f:
f.write('MISC_SOURCE = supplementalData.txt likelySubtags.txt icuver.txt icustd.txt metadata.txt') f.write('MISC_SOURCE = supplementalData.txt likelySubtags.txt icuver.txt icustd.txt metadata.txt')
# Remove data we don't need from the lang and region files # Remove data we don't need from the lang and region files
@ -45,8 +45,7 @@ def parse_txt(filename):
cur = root cur = root
stack = [root] stack = [root]
comment = False comment = False
for line in open(filename): for line in open(filename, encoding='utf-8'):
line = line.decode('utf-8')
line = line.strip() line = line.strip()
if len(line) == 0: if len(line) == 0:
continue continue
@ -72,15 +71,13 @@ def parse_txt(filename):
continue continue
m = re.match('(.*){"(.*)"}', line) m = re.match('(.*){"(.*)"}', line)
if not m: if m:
print line
else:
cur[m.group(1)] = m.group(2) cur[m.group(1)] = m.group(2)
return root return root
def remove_sections(root): def remove_sections(root):
for child in root.itervalues(): for child in root.values():
child.pop('Keys', None) child.pop('Keys', None)
child.pop('LanguagesShort', None) child.pop('LanguagesShort', None)
child.pop('Types', None) child.pop('Types', None)
@ -91,7 +88,7 @@ def remove_sections(root):
child.pop('Scripts%stand-alone', None) child.pop('Scripts%stand-alone', None)
def remove_languages(root): def remove_languages(root):
for lang, child in root.iteritems(): for lang, child in root.items():
# We only care about a language's name in that language # We only care about a language's name in that language
lang = lang.split('_')[0] lang = lang.split('_')[0]
trimmed = {} trimmed = {}
@ -103,7 +100,7 @@ def remove_languages(root):
# Scripts which are actually used by stuff # Scripts which are actually used by stuff
SCRIPTS = ['Cyrl', 'Latn', 'Arab', 'Vaii', 'Hans', 'Hant'] SCRIPTS = ['Cyrl', 'Latn', 'Arab', 'Vaii', 'Hans', 'Hant']
def remove_scripts(root): def remove_scripts(root):
for lang, child in root.iteritems(): for lang, child in root.items():
v = child.get('Scripts') v = child.get('Scripts')
if not v: if not v:
continue continue
@ -121,20 +118,20 @@ def write_dict(name, value, out, indent):
child_indent = indent + ' ' child_indent = indent + ' '
out.write(indent) out.write(indent)
out.write(name.encode('utf-8')) out.write(name)
out.write('{\n') out.write('{\n')
for k in sorted(value.keys()): for k in sorted(value.keys()):
v = value[k] v = value[k]
if type(v) == dict: if type(v) == dict:
write_dict(k, v, out, child_indent) write_dict(k, v, out, child_indent)
else: else:
out.write(('%s%s{"%s"}\n' % (child_indent, k, v)).encode('utf-8')) out.write(('%s%s{"%s"}\n' % (child_indent, k, v)))
out.write(indent) out.write(indent)
out.write('}\n') out.write('}\n')
def write_file(root, filename): def write_file(root, filename):
with open(filename, 'w') as f: with open(filename, 'w', encoding='utf-8') as f:
for k, v in root.iteritems(): for k, v in root.items():
write_dict(k, v, f, '') write_dict(k, v, f, '')
def minify_lang(filename): def minify_lang(filename):
@ -287,7 +284,7 @@ def gather_regions():
REGIONS = gather_regions() REGIONS = gather_regions()
def remove_countries(root): def remove_countries(root):
for lang, child in root.iteritems(): for lang, child in root.items():
v = child.get('Countries', {}) v = child.get('Countries', {})
if not v: continue if not v: continue