forked from mia/Aegisub
Update strip-icu to python 3
This commit is contained in:
parent
129e748abd
commit
628962559d
1 changed files with 15 additions and 18 deletions
|
@ -15,7 +15,7 @@
|
||||||
# Aegisub Project http://www.aegisub.org/
|
# Aegisub Project http://www.aegisub.org/
|
||||||
|
|
||||||
# A script to strip all of the data we don't use out of ICU's data files
|
# A script to strip all of the data we don't use out of ICU's data files
|
||||||
# Run from $ICU_ROOT/source/data
|
# Run from $ICU_ROOT/icu4c/source/data
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
@ -25,10 +25,10 @@ import os
|
||||||
def delete_matching(filename, strs):
|
def delete_matching(filename, strs):
|
||||||
exprs = [re.compile(s) for s in strs]
|
exprs = [re.compile(s) for s in strs]
|
||||||
|
|
||||||
with open(filename) as f:
|
with open(filename, encoding='utf-8') as f:
|
||||||
lines = [line for line in f if not any(r.match(line.decode('utf-8')) for r in exprs)]
|
lines = [line for line in f if not any(r.match(line) for r in exprs)]
|
||||||
|
|
||||||
with open(filename, 'w') as f:
|
with open(filename, 'w', encoding='utf-8') as f:
|
||||||
for line in lines:
|
for line in lines:
|
||||||
f.write(line)
|
f.write(line)
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ REMOVE_SUBDIRS=['LOCSRCDIR', 'CURRSRCDIR', 'ZONESRCDIR', 'COLSRCDIR', 'RBNFSRCDI
|
||||||
delete_matching('Makefile.in', ['^-include .*%s' % s for s in REMOVE_SUBDIRS])
|
delete_matching('Makefile.in', ['^-include .*%s' % s for s in REMOVE_SUBDIRS])
|
||||||
delete_matching('Makefile.in', ['^CNV_FILES'])
|
delete_matching('Makefile.in', ['^CNV_FILES'])
|
||||||
|
|
||||||
with open('misc/misclocal.mk', 'w') as f:
|
with open('misc/misclocal.mk', 'w', encoding='utf-8') as f:
|
||||||
f.write('MISC_SOURCE = supplementalData.txt likelySubtags.txt icuver.txt icustd.txt metadata.txt')
|
f.write('MISC_SOURCE = supplementalData.txt likelySubtags.txt icuver.txt icustd.txt metadata.txt')
|
||||||
|
|
||||||
# Remove data we don't need from the lang and region files
|
# Remove data we don't need from the lang and region files
|
||||||
|
@ -45,8 +45,7 @@ def parse_txt(filename):
|
||||||
cur = root
|
cur = root
|
||||||
stack = [root]
|
stack = [root]
|
||||||
comment = False
|
comment = False
|
||||||
for line in open(filename):
|
for line in open(filename, encoding='utf-8'):
|
||||||
line = line.decode('utf-8')
|
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if len(line) == 0:
|
if len(line) == 0:
|
||||||
continue
|
continue
|
||||||
|
@ -72,15 +71,13 @@ def parse_txt(filename):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
m = re.match('(.*){"(.*)"}', line)
|
m = re.match('(.*){"(.*)"}', line)
|
||||||
if not m:
|
if m:
|
||||||
print line
|
|
||||||
else:
|
|
||||||
cur[m.group(1)] = m.group(2)
|
cur[m.group(1)] = m.group(2)
|
||||||
|
|
||||||
return root
|
return root
|
||||||
|
|
||||||
def remove_sections(root):
|
def remove_sections(root):
|
||||||
for child in root.itervalues():
|
for child in root.values():
|
||||||
child.pop('Keys', None)
|
child.pop('Keys', None)
|
||||||
child.pop('LanguagesShort', None)
|
child.pop('LanguagesShort', None)
|
||||||
child.pop('Types', None)
|
child.pop('Types', None)
|
||||||
|
@ -91,7 +88,7 @@ def remove_sections(root):
|
||||||
child.pop('Scripts%stand-alone', None)
|
child.pop('Scripts%stand-alone', None)
|
||||||
|
|
||||||
def remove_languages(root):
|
def remove_languages(root):
|
||||||
for lang, child in root.iteritems():
|
for lang, child in root.items():
|
||||||
# We only care about a language's name in that language
|
# We only care about a language's name in that language
|
||||||
lang = lang.split('_')[0]
|
lang = lang.split('_')[0]
|
||||||
trimmed = {}
|
trimmed = {}
|
||||||
|
@ -103,7 +100,7 @@ def remove_languages(root):
|
||||||
# Scripts which are actually used by stuff
|
# Scripts which are actually used by stuff
|
||||||
SCRIPTS = ['Cyrl', 'Latn', 'Arab', 'Vaii', 'Hans', 'Hant']
|
SCRIPTS = ['Cyrl', 'Latn', 'Arab', 'Vaii', 'Hans', 'Hant']
|
||||||
def remove_scripts(root):
|
def remove_scripts(root):
|
||||||
for lang, child in root.iteritems():
|
for lang, child in root.items():
|
||||||
v = child.get('Scripts')
|
v = child.get('Scripts')
|
||||||
if not v:
|
if not v:
|
||||||
continue
|
continue
|
||||||
|
@ -121,20 +118,20 @@ def write_dict(name, value, out, indent):
|
||||||
child_indent = indent + ' '
|
child_indent = indent + ' '
|
||||||
|
|
||||||
out.write(indent)
|
out.write(indent)
|
||||||
out.write(name.encode('utf-8'))
|
out.write(name)
|
||||||
out.write('{\n')
|
out.write('{\n')
|
||||||
for k in sorted(value.keys()):
|
for k in sorted(value.keys()):
|
||||||
v = value[k]
|
v = value[k]
|
||||||
if type(v) == dict:
|
if type(v) == dict:
|
||||||
write_dict(k, v, out, child_indent)
|
write_dict(k, v, out, child_indent)
|
||||||
else:
|
else:
|
||||||
out.write(('%s%s{"%s"}\n' % (child_indent, k, v)).encode('utf-8'))
|
out.write(('%s%s{"%s"}\n' % (child_indent, k, v)))
|
||||||
out.write(indent)
|
out.write(indent)
|
||||||
out.write('}\n')
|
out.write('}\n')
|
||||||
|
|
||||||
def write_file(root, filename):
|
def write_file(root, filename):
|
||||||
with open(filename, 'w') as f:
|
with open(filename, 'w', encoding='utf-8') as f:
|
||||||
for k, v in root.iteritems():
|
for k, v in root.items():
|
||||||
write_dict(k, v, f, '')
|
write_dict(k, v, f, '')
|
||||||
|
|
||||||
def minify_lang(filename):
|
def minify_lang(filename):
|
||||||
|
@ -287,7 +284,7 @@ def gather_regions():
|
||||||
|
|
||||||
REGIONS = gather_regions()
|
REGIONS = gather_regions()
|
||||||
def remove_countries(root):
|
def remove_countries(root):
|
||||||
for lang, child in root.iteritems():
|
for lang, child in root.items():
|
||||||
v = child.get('Countries', {})
|
v = child.get('Countries', {})
|
||||||
if not v: continue
|
if not v: continue
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue