From a30839576c65b88e93bc915ae97b59874afde8ac Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Wed, 12 Nov 2014 17:13:14 -0800 Subject: build: i18n: add icu config options Make "--with-intl=none" the default and add "intl-none" option to vcbuild.bat. If icu data is missing print a warning unless either --download=all or --download=icu is set. If set then automatically download, verify (MD5) and unpack the ICU data if not already available. There's a "list" of URLs being used, but right now only the first is picked up. The logic works something like this: * If there is no directory deps/icu, * If no zip file (currently icu4c-54_1-src.zip), * Download zip file (icu-project.org -> sf.net) * Verify the MD5 sum of the zipfile * If bad, print error and exit * Unpack the zipfile into deps/icu * If deps/icu now exists, use it, else fail with help text Add the configuration option "--with-icu-source=..." Usage: * --with-icu-source=/path/to/my/other/icu * --with-icu-source=/path/to/icu54.zip * --with-icu-source=/path/to/icu54.tgz * --with-icu-source=http://example.com/icu54.tar.bz2 Add the configuration option "--with-icu-locals=...". Allows choosing which locales are used in the "small-icu" case. Example: configure --with-intl=small-icu --with-icu-locales=tlh,grc,nl (Also note that as of this writing, neither Klingon nor Ancient Greek are in upstream CLDR data. Serving suggestion only.) Don't use hard coded ../../out paths on windows. This was suggested by @misterdjules as it causes test failures. With this fix, "out" is no longer created on windows and the following can run properly: python tools/test.py simple Reduce space by about 1MB with ICU 54 (over without this patch). Also trims a few other source files, but only conditional on the exact ICU version used. This is to future-proof - a file that is unneeded now may be needed in future ICUs. Also: * Update distclean to remove icu related files * Refactor some code into tools/configure.d/nodedownload.py * Update docs * Add test PR-URL: https://github.com/joyent/node/pull/8719 Fixes: https://github.com/joyent/node/issues/7676#issuecomment-64704230 [trev.norris@gmail.com small change to test's whitespace and logic] Signed-off-by: Trevor Norris --- tools/configure.d/nodedownload.py | 127 ++++++++++++++++++++++++++++ tools/icu/icu-generic.gyp | 174 ++++++++++++++++++++++++++++++-------- tools/icu/icu_small.json | 15 ++-- tools/icu/icutrim.py | 13 +++ 4 files changed, 289 insertions(+), 40 deletions(-) create mode 100644 tools/configure.d/nodedownload.py (limited to 'tools') diff --git a/tools/configure.d/nodedownload.py b/tools/configure.d/nodedownload.py new file mode 100644 index 0000000000..e24efd865f --- /dev/null +++ b/tools/configure.d/nodedownload.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python +# Moved some utilities here from ../../configure + +import urllib +import hashlib +import sys +import zipfile +import tarfile +import fpformat +import contextlib + +def formatSize(amt): + """Format a size as a string in MB""" + return fpformat.fix(amt / 1024000., 1) + +def spin(c): + """print out an ASCII 'spinner' based on the value of counter 'c'""" + spin = ".:|'" + return (spin[c % len(spin)]) + +class ConfigOpener(urllib.FancyURLopener): + """fancy opener used by retrievefile. Set a UA""" + # append to existing version (UA) + version = '%s node.js/configure' % urllib.URLopener.version + +def reporthook(count, size, total): + """internal hook used by retrievefile""" + sys.stdout.write(' Fetch: %c %sMB total, %sMB downloaded \r' % + (spin(count), + formatSize(total), + formatSize(count*size))) + +def retrievefile(url, targetfile): + """fetch file 'url' as 'targetfile'. Return targetfile or throw.""" + try: + sys.stdout.write(' <%s>\nConnecting...\r' % url) + sys.stdout.flush() + msg = ConfigOpener().retrieve(url, targetfile, reporthook=reporthook) + print '' # clear the line + return targetfile + except: + print ' ** Error occurred while downloading\n <%s>' % url + raise + +def md5sum(targetfile): + """md5sum a file. Return the hex digest.""" + digest = hashlib.md5() + with open(targetfile, 'rb') as f: + chunk = f.read(1024) + while chunk != "": + digest.update(chunk) + chunk = f.read(1024) + return digest.hexdigest() + +def unpack(packedfile, parent_path): + """Unpacks packedfile into parent_path. Assumes .zip. Returns parent_path""" + if zipfile.is_zipfile(packedfile): + with contextlib.closing(zipfile.ZipFile(packedfile, 'r')) as icuzip: + print ' Extracting zipfile: %s' % packedfile + icuzip.extractall(parent_path) + return parent_path + elif tarfile.is_tarfile(packedfile): + with tarfile.TarFile.open(packedfile, 'r') as icuzip: + print ' Extracting tarfile: %s' % packedfile + icuzip.extractall(parent_path) + return parent_path + else: + packedsuffix = packedfile.lower().split('.')[-1] # .zip, .tgz etc + raise Exception('Error: Don\'t know how to unpack %s with extension %s' % (packedfile, packedsuffix)) + +# List of possible "--download=" types. +download_types = set(['icu']) + +# Default options for --download. +download_default = "none" + +def help(): + """This function calculates the '--help' text for '--download'.""" + return """Select which packages may be auto-downloaded. +valid values are: none, all, %s. (default is "%s").""" % (", ".join(download_types), download_default) + +def set2dict(keys, value=None): + """Convert some keys (iterable) to a dict.""" + return dict((key, value) for (key) in keys) + +def parse(opt): + """This function parses the options to --download and returns a set such as { icu: true }, etc. """ + if not opt: + opt = download_default + + theOpts = set(opt.split(',')) + + if 'all' in theOpts: + # all on + return set2dict(download_types, True) + elif 'none' in theOpts: + # all off + return set2dict(download_types, False) + + # OK. Now, process each of the opts. + theRet = set2dict(download_types, False) + for anOpt in opt.split(','): + if not anOpt or anOpt == "": + # ignore stray commas, etc. + continue + elif anOpt is 'all': + # all on + theRet = dict((key, True) for (key) in download_types) + else: + # turn this one on + if anOpt in download_types: + theRet[anOpt] = True + else: + # future proof: ignore unknown types + print 'Warning: ignoring unknown --download= type "%s"' % anOpt + # all done + return theRet + +def candownload(auto_downloads, package): + if not (package in auto_downloads.keys()): + raise Exception('Internal error: "%s" is not in the --downloads list. Check nodedownload.py' % package) + if auto_downloads[package]: + return True + else: + print """Warning: Not downloading package "%s". You could pass "--download=all" + (Windows: "download-all") to try auto-downloading it.""" % package + return False diff --git a/tools/icu/icu-generic.gyp b/tools/icu/icu-generic.gyp index 220d2c16a6..bb2b5e5e4d 100644 --- a/tools/icu/icu-generic.gyp +++ b/tools/icu/icu-generic.gyp @@ -11,6 +11,17 @@ }, 'includes': [ '../../icu_config.gypi' ], 'targets': [ + { + # a target for additional uconfig defines, target only + 'target_name': 'icu_uconfig_target', + 'type': 'none', + 'toolsets': [ 'target' ], + 'direct_dependent_settings': { + 'defines': [ + 'UCONFIG_NO_CONVERSION=1', + ] + }, + }, { # a target to hold uconfig defines. # for now these are hard coded, but could be defined. @@ -92,24 +103,74 @@ }, { 'target_name': 'icui18n', - 'type': '<(library)', - 'toolsets': [ 'target' ], - 'sources': [ - '<@(icu_src_i18n)' - ], - 'include_dirs': [ - '../../deps/icu/source/i18n', - ], - 'defines': [ - 'U_I18N_IMPLEMENTATION=1', + 'toolsets': [ 'target', 'host' ], + 'conditions' : [ + ['_toolset=="target"', { + 'type': '<(library)', + 'sources': [ + '<@(icu_src_i18n)' + ], + 'conditions': [ + [ 'icu_ver_major == 54', { 'sources!': [ + ## Strip out the following for ICU 54 only. + ## add more conditions in the future? + ## if your compiler can dead-strip, this will + ## make ZERO difference to binary size. + ## Made ICU-specific for future-proofing. + + # alphabetic index + '../../deps/icu/source/i18n/alphaindex.cpp', + # BOCSU + # misc + '../../deps/icu/source/i18n/regexcmp.cpp', + '../../deps/icu/source/i18n/regexcmp.h', + '../../deps/icu/source/i18n/regexcst.h', + '../../deps/icu/source/i18n/regeximp.cpp', + '../../deps/icu/source/i18n/regeximp.h', + '../../deps/icu/source/i18n/regexst.cpp', + '../../deps/icu/source/i18n/regexst.h', + '../../deps/icu/source/i18n/regextxt.cpp', + '../../deps/icu/source/i18n/regextxt.h', + '../../deps/icu/source/i18n/region.cpp', + '../../deps/icu/source/i18n/region_impl.h', + '../../deps/icu/source/i18n/reldatefmt.cpp', + '../../deps/icu/source/i18n/reldatefmt.h' + '../../deps/icu/source/i18n/scientificformathelper.cpp', + '../../deps/icu/source/i18n/tmunit.cpp', + '../../deps/icu/source/i18n/tmutamt.cpp', + '../../deps/icu/source/i18n/tmutfmt.cpp', + '../../deps/icu/source/i18n/uregex.cpp', + '../../deps/icu/source/i18n/uregexc.cpp', + '../../deps/icu/source/i18n/uregion.cpp', + '../../deps/icu/source/i18n/uspoof.cpp', + '../../deps/icu/source/i18n/uspoof_build.cpp', + '../../deps/icu/source/i18n/uspoof_conf.cpp', + '../../deps/icu/source/i18n/uspoof_conf.h', + '../../deps/icu/source/i18n/uspoof_impl.cpp', + '../../deps/icu/source/i18n/uspoof_impl.h', + '../../deps/icu/source/i18n/uspoof_wsconf.cpp', + '../../deps/icu/source/i18n/uspoof_wsconf.h', + ]}]], + 'include_dirs': [ + '../../deps/icu/source/i18n', + ], + 'defines': [ + 'U_I18N_IMPLEMENTATION=1', + ], + 'dependencies': [ 'icuucx', 'icu_implementation', 'icu_uconfig', 'icu_uconfig_target' ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../../deps/icu/source/i18n', + ], + }, + 'export_dependent_settings': [ 'icuucx', 'icu_uconfig_target' ], + }], + ['_toolset=="host"', { + 'type': 'none', + 'dependencies': [ 'icutools' ], + 'export_dependent_settings': [ 'icutools' ], + }], ], - 'dependencies': [ 'icuucx', 'icu_implementation', 'icu_uconfig' ], - 'direct_dependent_settings': { - 'include_dirs': [ - '../../deps/icu/source/i18n', - ], - }, - 'export_dependent_settings': [ 'icuucx' ], }, # This exports actual ICU data { @@ -146,32 +207,33 @@ # trim down ICU 'action_name': 'icutrim', 'inputs': [ '<(icu_data_in)', 'icu_small.json' ], - 'outputs': [ '../../out/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], 'action': [ 'python', 'icutrim.py', '-P', '../../<(CONFIGURATION_NAME)', '-D', '<(icu_data_in)', '--delete-tmp', - '-T', '../../out/icutmp', + '-T', '<(SHARED_INTERMEDIATE_DIR)/icutmp', '-F', 'icu_small.json', '-O', 'icudt<(icu_ver_major)<(icu_endianness).dat', - '-v' ], + '-v', + '-L', '<(icu_locales)'], }, { # build final .dat -> .obj 'action_name': 'genccode', - 'inputs': [ '../../out/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], - 'outputs': [ '../../out/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], 'action': [ '../../<(CONFIGURATION_NAME)/genccode', '-o', - '-d', '../../out/', + '-d', '<(SHARED_INTERMEDIATE_DIR)/', '-n', 'icudata', '-e', 'icusmdt<(icu_ver_major)', '<@(_inputs)' ], }, ], # This file contains the small ICU data. - 'sources': [ '../../out/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], } ] ], #end of OS==win and icu_small == true }, { # OS != win 'conditions': [ @@ -235,7 +297,8 @@ '-T', '<(SHARED_INTERMEDIATE_DIR)/icutmp', '-F', 'icu_small.json', '-O', 'icudt<(icu_ver_major)<(icu_endianness).dat', - '-v' ], + '-v', + '-L', '<(icu_locales)'], }, { # rename to get the final entrypoint name right 'action_name': 'rename', @@ -284,19 +347,51 @@ { 'target_name': 'icuuc', 'type': 'none', - 'toolsets': [ 'target' ], - 'dependencies': [ 'icuucx', 'icudata' ], - 'export_dependent_settings': [ 'icuucx', 'icudata' ], + 'toolsets': [ 'target', 'host' ], + 'conditions' : [ + ['_toolset=="host"', { + 'dependencies': [ 'icutools' ], + 'export_dependent_settings': [ 'icutools' ], + }], + ['_toolset=="target"', { + 'dependencies': [ 'icuucx', 'icudata' ], + 'export_dependent_settings': [ 'icuucx', 'icudata' ], + }], + ], }, # This is the 'real' icuuc. - # tools can depend on 'icuuc + stubdata' { 'target_name': 'icuucx', 'type': '<(library)', - 'dependencies': [ 'icu_implementation', 'icu_uconfig' ], + 'dependencies': [ 'icu_implementation', 'icu_uconfig', 'icu_uconfig_target' ], 'toolsets': [ 'target' ], 'sources': [ - '<@(icu_src_common)' + '<@(icu_src_common)', + ], + 'conditions': [ + [ 'icu_ver_major == 54', { 'sources!': [ + ## Strip out the following for ICU 54 only. + ## add more conditions in the future? + ## if your compiler can dead-strip, this will + ## make ZERO difference to binary size. + ## Made ICU-specific for future-proofing. + + # bidi- not needed (yet!) + '../../deps/icu/source/common/ubidi.c', + '../../deps/icu/source/common/ubidiimp.h', + '../../deps/icu/source/common/ubidiln.c', + '../../deps/icu/source/common/ubidiwrt.c', + #'../../deps/icu/source/common/ubidi_props.c', + #'../../deps/icu/source/common/ubidi_props.h', + #'../../deps/icu/source/common/ubidi_props_data.h', + # and the callers + '../../deps/icu/source/common/ushape.cpp', + '../../deps/icu/source/common/usprep.cpp', + '../../deps/icu/source/common/uts46.cpp', + ]}], + [ 'OS == "solaris"', { 'defines': [ + '_XOPEN_SOURCE_EXTENDED=0', + ]}], ], 'include_dirs': [ '../../deps/icu/source/common', @@ -304,7 +399,8 @@ 'defines': [ 'U_COMMON_IMPLEMENTATION=1', ], - 'export_dependent_settings': [ 'icu_uconfig' ], + 'cflags_c': ['-std=c99'], + 'export_dependent_settings': [ 'icu_uconfig', 'icu_uconfig_target' ], 'direct_dependent_settings': { 'include_dirs': [ '../../deps/icu/source/common', @@ -331,6 +427,12 @@ '<@(icu_src_io)', '<@(icu_src_stubdata)', ], + 'sources!': [ + '../../deps/icu/source/tools/toolutil/udbgutil.cpp', + '../../deps/icu/source/tools/toolutil/udbgutil.h', + '../../deps/icu/source/tools/toolutil/dbgutil.cpp', + '../../deps/icu/source/tools/toolutil/dbgutil.h', + ], 'include_dirs': [ '../../deps/icu/source/common', '../../deps/icu/source/i18n', @@ -344,6 +446,12 @@ 'U_TOOLUTIL_IMPLEMENTATION=1', #'DEBUG=0', # http://bugs.icu-project.org/trac/ticket/10977 ], + 'cflags_c': ['-std=c99'], + 'conditions': [ + ['OS == "solaris"', { + 'defines': [ '_XOPEN_SOURCE_EXTENDED=0' ] + }] + ], 'direct_dependent_settings': { 'include_dirs': [ '../../deps/icu/source/common', @@ -359,7 +467,7 @@ }], ], }, - 'export_dependent_settings': [ 'icu_implementation', 'icu_uconfig' ], + 'export_dependent_settings': [ 'icu_uconfig' ], }, # This tool is needed to rebuild .res files from .txt, # or to build index (res_index.txt) files for small-icu diff --git a/tools/icu/icu_small.json b/tools/icu/icu_small.json index ddf7d1204e..e434794e91 100644 --- a/tools/icu/icu_small.json +++ b/tools/icu/icu_small.json @@ -1,11 +1,11 @@ { "copyright": "Copyright (c) 2014 IBM Corporation and Others. All Rights Reserved.", - "comment": "icutrim.py config: Trim down ICU to just English, needed for node.js use.", + "comment": "icutrim.py config: Trim down ICU to just a certain locale set, needed for node.js use.", "variables": { "none": { "only": [] }, - "en_only": { + "locales": { "only": [ "root", "en" @@ -15,20 +15,21 @@ } }, "trees": { - "ROOT": "en_only", + "ROOT": "locales", "brkitr": "none", - "coll": "en_only", - "curr": "en_only", + "coll": "locales", + "curr": "locales", "lang": "none", "rbnf": "none", "region": "none", - "zone": "en_only", + "zone": "locales", "converters": "none", "stringprep": "none", "translit": "none", "brkfiles": "none", "brkdict": "none", - "confusables": "none" + "confusables": "none", + "unit": "none" }, "remove": [ "cnvalias.icu", diff --git a/tools/icu/icutrim.py b/tools/icu/icutrim.py index 7f0fb3752e..517bf39bad 100755 --- a/tools/icu/icutrim.py +++ b/tools/icu/icutrim.py @@ -65,6 +65,12 @@ parser.add_option("-v","--verbose", action="count", default=0) +parser.add_option('-L',"--locales", + action="store", + dest="locales", + help="sets the 'locales.only' variable", + default=None) + parser.add_option('-e', '--endian', action='store', dest='endian', help='endian, big, little or host, your default is "%s".' % endian, default=endian, metavar='endianness') (options, args) = parser.parse_args() @@ -147,6 +153,13 @@ fi= open(options.filterfile, "rb") config=json.load(fi) fi.close() +if (options.locales): + if not config.has_key("variables"): + config["variables"] = {} + if not config["variables"].has_key("locales"): + config["variables"]["locales"] = {} + config["variables"]["locales"]["only"] = options.locales.split(',') + if (options.verbose > 6): print config -- cgit v1.2.3