diff options
author | Steven R. Loomis <srloomis@us.ibm.com> | 2019-09-30 11:17:49 -0700 |
---|---|---|
committer | Rich Trott <rtrott@gmail.com> | 2019-10-03 15:21:26 -0700 |
commit | 1a25e901b7c380929f0d08599f49dd77897a627f (patch) | |
tree | 3fdac9d1df5970cf8870bf5a9a5ae1cb45fa890a /tools/icu | |
parent | a71fb978a40b5251fdb012bab5e03598d5397a10 (diff) | |
download | android-node-v8-1a25e901b7c380929f0d08599f49dd77897a627f.tar.gz android-node-v8-1a25e901b7c380929f0d08599f49dd77897a627f.tar.bz2 android-node-v8-1a25e901b7c380929f0d08599f49dd77897a627f.zip |
tools: support full-icu by default
Instead of an English-only icudt64l.dat in the repo,
we now have icudt64l.dat.gz with all locales.
- updated READMEs and docs
- shrinker now copies source, and compresses (bzip2) the ICU data file
- configure expects deps/icu-small to be full ICU with a full
compressed data file
Fixes: https://github.com/nodejs/node/issues/19214
Co-Authored-By: Richard Lau <riclau@uk.ibm.com>
Co-Authored-By: Jan Olaf Krems <jan.krems@gmail.com>
Co-Authored-By: James M Snell <jasnell@gmail.com>
PR-URL: https://github.com/nodejs/node/pull/29522
Reviewed-By: Jan Krems <jan.krems@gmail.com>
Reviewed-By: Jiawen Geng <technicalcute@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Michael Dawson <michael_dawson@ca.ibm.com>
Reviewed-By: Michaël Zasso <targos@protonmail.com>
Diffstat (limited to 'tools/icu')
-rw-r--r-- | tools/icu/README.md | 8 | ||||
-rw-r--r-- | tools/icu/icu-generic.gyp | 37 | ||||
-rw-r--r-- | tools/icu/shrink-icu-src.py | 66 |
3 files changed, 65 insertions, 46 deletions
diff --git a/tools/icu/README.md b/tools/icu/README.md index 94eaf50a55..51b58455b4 100644 --- a/tools/icu/README.md +++ b/tools/icu/README.md @@ -27,7 +27,7 @@ internationalization functionality. ```shell ./configure \ - --with-intl=small-icu \ + --with-intl=full-icu \ --with-icu-source=http://download.icu-project.org/files/icu4c/58.1/icu4c-58_1-src.tgz make ``` @@ -54,7 +54,7 @@ Also running new Intl.DateTimeFormat('es', {month: 'long'}).format(new Date(9E8)); ``` -…Should return `January` not `enero`. +…Should return `enero` not `January`. * Now, copy `deps/icu` over to `deps/icu-small` @@ -94,12 +94,12 @@ tools/license-builder.sh * Update the URL and hash for the full ICU file in `tools/icu/current_ver.dep`. It should match the ICU URL used in the first step. When this is done, the -following should build with full ICU. +following should build with small ICU. ```shell # clean up rm -rf out deps/icu deps/icu4c* -./configure --with-intl=full-icu --download=all +./configure --with-intl=small-icu --download=all make make test-ci ``` diff --git a/tools/icu/icu-generic.gyp b/tools/icu/icu-generic.gyp index b8f0d13836..d2d0e5a318 100644 --- a/tools/icu/icu-generic.gyp +++ b/tools/icu/icu-generic.gyp @@ -212,16 +212,17 @@ 'conditions': [ [ 'icu_small == "false"', { # and OS=win # full data - just build the full data file, then we are done. - 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.<(icu_asm_ext)' ], 'dependencies': [ 'genccode#host' ], 'actions': [ { 'action_name': 'icudata', 'msvs_quote_cmd': 0, 'inputs': [ '<(icu_data_in)' ], - 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.<(icu_asm_ext)' ], + # on Windows, we can go directly to .obj file (-o) option. 'action': [ '<(PRODUCT_DIR)/genccode', - '-o', + '<@(icu_asm_opts)', # -o '-d', '<(SHARED_INTERMEDIATE_DIR)', '-n', 'icudata', '-e', 'icudt<(icu_ver_major)', @@ -256,9 +257,9 @@ 'action_name': 'genccode', 'msvs_quote_cmd': 0, 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], - 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.<(icu_asm_ext)' ], 'action': [ '<(PRODUCT_DIR)/genccode', - '-o', + '<@(icu_asm_opts)', # -o '-d', '<(SHARED_INTERMEDIATE_DIR)/', '-n', 'icudata', '-e', 'icusmdt<(icu_ver_major)', @@ -266,20 +267,20 @@ }, ], # This file contains the small ICU data. - 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.<(icu_asm_ext)' ], } ] ], #end of OS==win and icu_small == true }, { # OS != win 'conditions': [ [ 'icu_small == "false"', { - # full data - just build the full data file, then we are done. - 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.c' ], + # full data - no trim needed + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.<(icu_asm_ext)' ], 'dependencies': [ 'genccode#host', 'icupkg#host', 'icu_implementation#host', 'icu_uconfig' ], 'include_dirs': [ '<(icu_path)/source/common', ], 'actions': [ { - # Swap endianness (if needed), or at least copy the file + # Copy the .dat file, swapping endianness if needed. 'action_name': 'icupkg', 'inputs': [ '<(icu_data_in)' ], 'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness).dat' ], @@ -290,7 +291,7 @@ ], }, { - # Rename without the endianness marker + # Rename without the endianness marker (icudt64l.dat -> icudt64.dat) 'action_name': 'copy', 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness).dat' ], 'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major).dat' ], @@ -300,12 +301,14 @@ ], }, { + # convert full ICU data file to .c, or .S, etc. 'action_name': 'icudata', 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major).dat' ], - 'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.c' ], + 'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.<(icu_asm_ext)' ], 'action': [ '<(PRODUCT_DIR)/genccode', '-e', 'icudt<(icu_ver_major)', '-d', '<(SHARED_INTERMEDIATE_DIR)', + '<@(icu_asm_opts)', '-f', 'icudt<(icu_ver_major)_dat', '<@(_inputs)' ], }, @@ -318,7 +321,8 @@ 'export_dependent_settings': [ 'icustubdata' ], 'actions': [ { - # trim down ICU + # Trim down ICU. + # Note that icupkg is invoked automatically, swapping endianness if needed. 'action_name': 'icutrim', 'inputs': [ '<(icu_data_in)', 'icu_small.json' ], 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], @@ -333,7 +337,7 @@ '-v', '-L', '<(icu_locales)'], }, { - # rename to get the final entrypoint name right + # rename to get the final entrypoint name right (icudt64l.dat -> icusmdt64.dat) 'action_name': 'rename', 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icusmdt<(icu_ver_major).dat' ], @@ -342,17 +346,18 @@ '<@(_outputs)', ], }, { - # build final .dat -> .obj + # For icu-small, always use .c, don't try to use .S, etc. 'action_name': 'genccode', 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icusmdt<(icu_ver_major).dat' ], - 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.c' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.<(icu_asm_ext)' ], 'action': [ '<(PRODUCT_DIR)/genccode', + '<@(icu_asm_opts)', '-d', '<(SHARED_INTERMEDIATE_DIR)', '<@(_inputs)' ], }, ], # This file contains the small ICU data - 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.c' ], + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.<(icu_asm_ext)' ], # for umachine.h 'include_dirs': [ '<(icu_path)/source/common', diff --git a/tools/icu/shrink-icu-src.py b/tools/icu/shrink-icu-src.py index 0df16cde21..c91472ed30 100644 --- a/tools/icu/shrink-icu-src.py +++ b/tools/icu/shrink-icu-src.py @@ -5,14 +5,15 @@ import os import re import sys import shutil +import bz2 parser = optparse.OptionParser() -parser.add_option('--icu-small', +parser.add_option('--icudst', action='store', - dest='icusmall', + dest='icudst', default='deps/icu-small', - help='path to target ICU directory to shrink. Will be deleted.') + help='path to target ICU directory. Will be deleted.') parser.add_option('--icu-src', action='store', @@ -26,18 +27,26 @@ parser.add_option('--icutmp', default='out/Release/obj/gen/icutmp', help='path to icutmp dir.') - (options, args) = parser.parse_args() -if os.path.isdir(options.icusmall): - print('Deleting existing icusmall %s' % (options.icusmall)) - shutil.rmtree(options.icusmall) +if os.path.isdir(options.icudst): + print('Deleting existing icudst %s' % (options.icudst)) + shutil.rmtree(options.icudst) if not os.path.isdir(options.icusrc): print('Missing source ICU dir --icusrc=%s' % (options.icusrc)) sys.exit(1) +# compression stuff. Keep the suffix and the compression function in sync. +compression_suffix = '.bz2' +def compress_data(infp, outfp): + with open(infp, 'rb') as inf: + with bz2.BZ2File(outfp, 'wb') as outf: + shutil.copyfileobj(inf, outf) +def print_size(fn): + size = (os.stat(fn).st_size) / 1024000 + print('%dM\t%s' % (size, fn)) ignore_regex = re.compile('^.*\.(vcxproj|filters|nrm|icu|dat|xml|txt|ac|guess|m4|in|sub|py|mak)$') @@ -90,36 +99,41 @@ def icu_info(icu_full_path): return (icu_ver_major, icu_endianness) (icu_ver_major, icu_endianness) = icu_info(options.icusrc) -print("icudt%s%s" % (icu_ver_major, icu_endianness)) +print("Data file root: icudt%s%s" % (icu_ver_major, icu_endianness)) +dst_datafile = os.path.join(options.icudst, "source","data","in", "icudt%s%s.dat" % (icu_ver_major, icu_endianness)) -src_datafile = os.path.join(options.icutmp, "icusmdt%s.dat" % (icu_ver_major)) -dst_datafile = os.path.join(options.icusmall, "source","data","in", "icudt%s%s.dat" % (icu_ver_major, icu_endianness)) +src_datafile = os.path.join(options.icusrc, "source/data/in/icudt%sl.dat" % (icu_ver_major)) +dst_cmp_datafile = "%s%s" % (dst_datafile, compression_suffix) if not os.path.isfile(src_datafile): - print("Could not find source datafile %s - did you build small-icu node?" % src_datafile) - sys.exit(1) -else: - print("will use small datafile %s" % (src_datafile)) -print('%s --> %s' % (options.icusrc, options.icusmall)) -shutil.copytree(options.icusrc, options.icusmall, ignore=icu_ignore) -print('%s --> %s' % (src_datafile, dst_datafile)) + print("Error: icu data file not found: %s" % src_datafile) + exit(1) + +print("will use datafile %s" % (src_datafile)) + +print('%s --> %s' % (options.icusrc, options.icudst)) +shutil.copytree(options.icusrc, options.icudst, ignore=icu_ignore) # now, make the data dir (since we ignored it) -os.mkdir(os.path.join(os.path.join(options.icusmall, "source", "data"))) -os.mkdir(os.path.join(os.path.join(options.icusmall, "source", "data", "in"))) +icudst_data = os.path.join(options.icudst, "source", "data") +icudst_in = os.path.join(icudst_data, "in") +os.mkdir(icudst_data) +os.mkdir(icudst_in) -# OK, now copy the data file -shutil.copy(src_datafile, dst_datafile) +print_size(src_datafile) -# Now, print a short notice -readme_name = os.path.join(options.icusmall, "README-SMALL-ICU.txt" ) +print('%s --compress-> %s' % (src_datafile, dst_cmp_datafile)) +compress_data(src_datafile, dst_cmp_datafile) +print_size(dst_cmp_datafile) +readme_name = os.path.join(options.icudst, "README-FULL-ICU.txt" ) +# Now, print a short notice fi = open(readme_name, 'wb') -print("Small ICU sources - auto generated by shrink-icu-src.py", file=fi) +print("ICU sources - auto generated by shrink-icu-src.py", file=fi) print("", file=fi) -print("This directory contains the ICU subset used by --with-intl=small-icu (the default)", file=fi) +print("This directory contains the ICU subset used by --with-intl=full-icu", file=fi) print("It is a strict subset of ICU %s source files with the following exception(s):" % (icu_ver_major), file=fi) -print("* %s : Reduced-size data file" % (dst_datafile), file=fi) +print("* %s : compressed data file" % (dst_cmp_datafile), file=fi) print("", file=fi) print("", file=fi) print("To rebuild this directory, see ../../tools/icu/README.md", file=fi) |