summaryrefslogtreecommitdiff
path: root/tools/icu/iculslocs.cc
diff options
context:
space:
mode:
authorSteven R. Loomis <srl@icu-project.org>2014-09-04 22:03:24 -0700
committerTrevor Norris <trev.norris@gmail.com>2014-10-01 12:16:51 -0700
commitac2857b12cd819b68405b15c3f8e95e48bcc32d8 (patch)
tree201772e4e1d201b8a84f584ff5155d36fb4082a0 /tools/icu/iculslocs.cc
parent95726b0fce0ae1ae60591f0a535515e8dabfd6df (diff)
downloadandroid-node-v8-ac2857b12cd819b68405b15c3f8e95e48bcc32d8.tar.gz
android-node-v8-ac2857b12cd819b68405b15c3f8e95e48bcc32d8.tar.bz2
android-node-v8-ac2857b12cd819b68405b15c3f8e95e48bcc32d8.zip
build, i18n: improve Intl build, add "--with-intl"
The two main goals of this change are: - To make it easier to build the Intl option using ICU (particularly, using a newer ICU than v8/Chromium's version) - To enable a much smaller ICU build with only English support The goal here is to get node.js binaries built this way by default so that the Intl API can be used. Additional data can be added at execution time (see Readme and wiki) More details are at https://github.com/joyent/node/pull/7719 In particular, this change adds the "--with-intl=" configure option to provide more ways of building "Intl": - "full-icu" picks up an ICU from deps/icu - "small-icu" is similar, but builds only English - "system-icu" uses pkg-config to find an installed ICU - "none" does nothing (no Intl) For Windows builds, the "full-icu" or "small-icu" options are added to vcbuild.bat. Note that the existing "--with-icu-path" option is not removed from configure, but may not be used alongside the new option. Wiki changes have already been made on https://github.com/joyent/node/wiki/Installation and a new page created at https://github.com/joyent/node/wiki/Intl (marked as provisional until this change lands.) Summary of changes: * README.md : doc updates * .gitignore : added "deps/icu" as this is the location where ICU is unpacked to. * Makefile : added the tools/icu/* files to cpplint, but excluded a problematic file. * configure : added the "--with-intl" option mentioned above. Calculate at config time the list of ICU source files to use and data packaging options. * node.gyp : add the new files src/node_i18n.cc/.h as well as ICU linkage. * src/node.cc : add call into node::i18n::InitializeICUDirectory(icu_data_dir) as well as new --icu-data-dir option and NODE_ICU_DATA env variable to configure ICU data loading. This loading is only relevant in the "small" configuration. * src/node_i18n.cc : new source file for the above Initialize.. function, to setup ICU as needed. * tools/icu : new directory with some tools needed for this build. * tools/icu/icu-generic.gyp : new .gyp file that builds ICU in some new ways, both on unix/mac and windows. * tools/icu/icu-system.gyp : new .gyp file to build node against a pkg-config detected ICU. * tools/icu/icu_small.json : new config file for the "English-only" small build. * tools/icu/icutrim.py : new tool for trimming down ICU data. Reads the above .json file. * tools/icu/iculslocs.cc : new tool for repairing ICU data manifests after trim operation. * tools/icu/no-op.cc : dummy file to force .gyp into using a C++ linker. * vcbuild.bat : added small-icu and full-icu options, to call into configure. * Fixed toolset dependencies, see https://github.com/joyent/node/pull/7719#issuecomment-54641687 Note that because of a bug in gyp {CC,CXX}_host must also be set. Otherwise gcc/g++ will be used by default for part of the build. Reviewed-by: Trevor Norris <trev.norris@gmail.com> Reviewed-by: Fedor Indutny <fedor@indutny.com>
Diffstat (limited to 'tools/icu/iculslocs.cc')
-rw-r--r--tools/icu/iculslocs.cc388
1 files changed, 388 insertions, 0 deletions
diff --git a/tools/icu/iculslocs.cc b/tools/icu/iculslocs.cc
new file mode 100644
index 0000000000..66becace0a
--- /dev/null
+++ b/tools/icu/iculslocs.cc
@@ -0,0 +1,388 @@
+/*
+**********************************************************************
+* Copyright (C) 2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* Created 2014-06-20 by Steven R. Loomis
+*
+* See: http://bugs.icu-project.org/trac/ticket/10922
+*
+*/
+
+/*
+WHAT IS THIS?
+
+Here's the problem: It's difficult to reconfigure ICU from the command
+line without using the full makefiles. You can do a lot, but not
+everything.
+
+Consider:
+
+ $ icupkg -r 'ja*' icudt53l.dat
+
+Great, you've now removed the (main) Japanese data. But something's
+still wrong-- res_index (and thus, getAvailable* functions) still
+claim the locale is present.
+
+You are reading the source to a tool (using only public API C code)
+that can solve this problem. Use as follows:
+
+ $ iculslocs -i . -N icudt53l -b res_index.txt
+
+.. Generates a NEW res_index.txt (by looking at the .dat file, and
+figuring out which locales are actually available. Has commented out
+the ones which are no longer available:
+
+ ...
+ it_SM {""}
+// ja {""}
+// ja_JP {""}
+ jgo {""}
+ ...
+
+Then you can build and in-place patch it with existing ICU tools:
+ $ genrb res_index.txt
+ $ icupkg -a res_index.res icudt53l.dat
+
+.. Now you have a patched icudt539.dat that not only doesn't have
+Japanese, it doesn't *claim* to have Japanese.
+
+*/
+
+#include "string.h"
+#include "charstr.h" // ICU internal header
+#include <unicode/ustdio.h>
+#include <unicode/ures.h>
+#include <unicode/udata.h>
+
+const char* PROG = "iculslocs";
+const char* NAME = U_ICUDATA_NAME; // assume ICU data
+const char* TREE = "ROOT";
+int VERBOSE = 0;
+
+#define RES_INDEX "res_index"
+#define INSTALLEDLOCALES "InstalledLocales"
+
+CharString packageName;
+const char* locale = RES_INDEX; // locale referring to our index
+
+void usage() {
+ u_printf("Usage: %s [options]\n", PROG);
+ u_printf(
+ "This program lists and optionally regenerates the locale "
+ "manifests\n"
+ " in ICU 'res_index.res' files.\n");
+ u_printf(
+ " -i ICUDATA Set ICUDATA dir to ICUDATA.\n"
+ " NOTE: this must be the first option given.\n");
+ u_printf(" -h This Help\n");
+ u_printf(" -v Verbose Mode on\n");
+ u_printf(" -l List locales to stdout\n");
+ u_printf(
+ " if Verbose mode, then missing (unopenable)"
+ "locales\n"
+ " will be listed preceded by a '#'.\n");
+ u_printf(
+ " -b res_index.txt Write 'corrected' bundle "
+ "to res_index.txt\n"
+ " missing bundles will be "
+ "OMITTED\n");
+ u_printf(
+ " -T TREE Choose tree TREE\n"
+ " (TREE should be one of: \n"
+ " ROOT, brkitr, coll, curr, lang, rbnf, region, zone)\n");
+ // see ureslocs.h and elsewhere
+ u_printf(
+ " -N NAME Choose name NAME\n"
+ " (default: '%s')\n",
+ U_ICUDATA_NAME);
+ u_printf(
+ "\nNOTE: for best results, this tool ought to be "
+ "linked against\n"
+ "stubdata. i.e. '%s -l' SHOULD return an error with "
+ " no data.\n",
+ PROG);
+}
+
+#define ASSERT_SUCCESS(what) \
+ if (U_FAILURE(status)) { \
+ u_printf("%s:%d: %s: ERROR: %s %s\n", \
+ __FILE__, \
+ __LINE__, \
+ PROG, \
+ u_errorName(status), \
+ what); \
+ return 1; \
+ }
+
+/**
+ * @param status changed from reference to pointer to match node.js style
+ */
+void calculatePackageName(UErrorCode* status) {
+ packageName.clear();
+ if (strcmp(NAME, "NONE")) {
+ packageName.append(NAME, *status);
+ if (strcmp(TREE, "ROOT")) {
+ packageName.append(U_TREE_SEPARATOR_STRING, *status);
+ packageName.append(TREE, *status);
+ }
+ }
+ if (VERBOSE) {
+ u_printf("packageName: %s\n", packageName.data());
+ }
+}
+
+/**
+ * Does the locale exist?
+ * return zero for false, or nonzero if it was openable.
+ * Assumes calculatePackageName was called.
+ * @param exists set to TRUE if exists, FALSE otherwise.
+ * Changed from reference to pointer to match node.js style
+ * @return 0 on "OK" (success or resource-missing),
+ * 1 on "FAILURE" (unexpected error)
+ */
+int localeExists(const char* loc, UBool* exists) {
+ UErrorCode status = U_ZERO_ERROR;
+ if (VERBOSE > 1) {
+ u_printf("Trying to open %s:%s\n", packageName.data(), loc);
+ }
+ LocalUResourceBundlePointer aResource(
+ ures_openDirect(packageName.data(), loc, &status));
+ *exists = FALSE;
+ if (U_SUCCESS(status)) {
+ *exists = true;
+ if (VERBOSE > 1) {
+ u_printf("%s:%s existed!\n", packageName.data(), loc);
+ }
+ return 0;
+ } else if (status == U_MISSING_RESOURCE_ERROR) {
+ *exists = false;
+ if (VERBOSE > 1) {
+ u_printf("%s:%s did NOT exist (%s)!\n",
+ packageName.data(),
+ loc,
+ u_errorName(status));
+ }
+ return 0; // "good" failure
+ } else {
+ // some other failure..
+ u_printf("%s:%d: %s: ERROR %s opening %s:%s for test.\n",
+ __FILE__,
+ __LINE__,
+ u_errorName(status),
+ packageName.data(),
+ loc);
+ return 1; // abort
+ }
+}
+
+void printIndent(const LocalUFILEPointer& bf, int indent) {
+ for (int i = 0; i < indent + 1; i++) {
+ u_fprintf(bf.getAlias(), " ");
+ }
+}
+
+/**
+ * Dumps a table resource contents
+ * if lev==0, skips INSTALLEDLOCALES
+ * @return 0 for OK, 1 for err
+ */
+int dumpAllButInstalledLocales(int lev,
+ LocalUResourceBundlePointer& bund,
+ LocalUFILEPointer& bf,
+ UErrorCode& status) {
+ ures_resetIterator(bund.getAlias());
+ const UBool isTable = (UBool)(ures_getType(bund.getAlias()) == URES_TABLE);
+ LocalUResourceBundlePointer t;
+ while (U_SUCCESS(status) && ures_hasNext(bund.getAlias())) {
+ t.adoptInstead(ures_getNextResource(bund.getAlias(), t.orphan(), &status));
+ ASSERT_SUCCESS("while processing table");
+ const char* key = ures_getKey(t.getAlias());
+ if (VERBOSE > 1) {
+ u_printf("dump@%d: got key %s\n", lev, key);
+ }
+ if (lev == 0 && !strcmp(key, INSTALLEDLOCALES)) {
+ if (VERBOSE > 1) {
+ u_printf("dump: skipping '%s' as it must be evaluated.\n", key);
+ }
+ } else {
+ printIndent(bf, lev);
+ u_fprintf(bf.getAlias(), "%s", key);
+ switch (ures_getType(t.getAlias())) {
+ case URES_STRING: {
+ int32_t len = 0;
+ const UChar* s = ures_getString(t.getAlias(), &len, &status);
+ ASSERT_SUCCESS("getting string");
+ u_fprintf(bf.getAlias(), ":string {\"");
+ u_file_write(s, len, bf.getAlias());
+ u_fprintf(bf.getAlias(), "\"}");
+ } break;
+ default: {
+ u_printf("ERROR: unhandled type in dumpAllButInstalledLocales().\n");
+ return 1;
+ } break;
+ }
+ u_fprintf(bf.getAlias(), "\n");
+ }
+ }
+ return 0;
+}
+
+int list(const char* toBundle) {
+ UErrorCode status = U_ZERO_ERROR;
+
+ LocalUFILEPointer bf;
+
+ if (toBundle != NULL) {
+ if (VERBOSE) {
+ u_printf("writing to bundle %s\n", toBundle);
+ }
+ // we write UTF-8 with BOM only. No exceptions.
+ bf.adoptInstead(u_fopen(toBundle, "w", "en_US_POSIX", "UTF-8"));
+ if (bf.isNull()) {
+ u_printf("ERROR: Could not open '%s' for writing.\n", toBundle);
+ return 1;
+ }
+ u_fputc(0xFEFF, bf.getAlias()); // write BOM
+ u_fprintf(bf.getAlias(), "// -*- Coding: utf-8; -*-\n//\n");
+ }
+
+ // first, calculate the bundle name.
+ calculatePackageName(&status);
+ ASSERT_SUCCESS("calculating package name");
+
+ if (VERBOSE) {
+ u_printf("\"locale\": %s\n", locale);
+ }
+
+ LocalUResourceBundlePointer bund(
+ ures_openDirect(packageName.data(), locale, &status));
+ ASSERT_SUCCESS("while opening the bundle");
+ LocalUResourceBundlePointer installedLocales(
+ ures_getByKey(bund.getAlias(), INSTALLEDLOCALES, NULL, &status));
+ ASSERT_SUCCESS("while fetching installed locales");
+
+ int32_t count = ures_getSize(installedLocales.getAlias());
+ if (VERBOSE) {
+ u_printf("Locales: %d\n", count);
+ }
+
+ if (bf.isValid()) {
+ // write the HEADER
+ u_fprintf(bf.getAlias(),
+ "// Warning this file is automatically generated\n"
+ "// Updated by %s based on %s:%s.txt\n",
+ PROG,
+ packageName.data(),
+ locale);
+ u_fprintf(bf.getAlias(),
+ "%s:table(nofallback) {\n"
+ " // First, everything besides InstalledLocales:\n",
+ locale);
+ if (dumpAllButInstalledLocales(0, bund, bf, status)) {
+ u_printf("Error dumping prolog for %s\n", toBundle);
+ return 1;
+ }
+ ASSERT_SUCCESS("while writing prolog"); // in case an error was missed
+
+ u_fprintf(bf.getAlias(),
+ " %s:table { // %d locales in input %s.res\n",
+ INSTALLEDLOCALES,
+ count,
+ locale);
+ }
+
+ // OK, now list them.
+ LocalUResourceBundlePointer subkey;
+
+ int validCount = 0;
+ for (int32_t i = 0; i < count; i++) {
+ subkey.adoptInstead(ures_getByIndex(
+ installedLocales.getAlias(), i, subkey.orphan(), &status));
+ ASSERT_SUCCESS("while fetching an installed locale's name");
+
+ const char* key = ures_getKey(subkey.getAlias());
+ if (VERBOSE > 1) {
+ u_printf("@%d: %s\n", i, key);
+ }
+ // now, see if the locale is installed..
+
+ UBool exists;
+ if (localeExists(key, &exists)) {
+ return 1; // get out.
+ }
+ if (exists) {
+ validCount++;
+ u_printf("%s\n", key);
+ if (bf.isValid()) {
+ u_fprintf(bf.getAlias(), " %s {\"\"}\n", key);
+ }
+ } else {
+ if (bf.isValid()) {
+ u_fprintf(bf.getAlias(), "// %s {\"\"}\n", key);
+ }
+ if (VERBOSE) {
+ u_printf("#%s\n", key); // verbosity one - '' vs '#'
+ }
+ }
+ }
+
+ if (bf.isValid()) {
+ u_fprintf(bf.getAlias(), " } // %d/%d valid\n", validCount, count);
+ // write the HEADER
+ u_fprintf(bf.getAlias(), "}\n");
+ }
+ return 0;
+}
+
+int main(int argc, const char* argv[]) {
+ PROG = argv[0];
+ for (int i = 1; i < argc; i++) {
+ const char* arg = argv[i];
+ int argsLeft = argc - i - 1; /* how many remain? */
+ if (!strcmp(arg, "-v")) {
+ VERBOSE++;
+ } else if (!strcmp(arg, "-i") && (argsLeft >= 1)) {
+ if (i != 1) {
+ u_printf("ERROR: -i must be the first argument given.\n");
+ usage();
+ return 1;
+ }
+ const char* dir = argv[++i];
+ u_setDataDirectory(dir);
+ if (VERBOSE) {
+ u_printf("ICUDATA is now %s\n", dir);
+ }
+ } else if (!strcmp(arg, "-T") && (argsLeft >= 1)) {
+ TREE = argv[++i];
+ if (VERBOSE) {
+ u_printf("TREE is now %s\n", TREE);
+ }
+ } else if (!strcmp(arg, "-N") && (argsLeft >= 1)) {
+ NAME = argv[++i];
+ if (VERBOSE) {
+ u_printf("NAME is now %s\n", NAME);
+ }
+ } else if (!strcmp(arg, "-?") || !strcmp(arg, "-h")) {
+ usage();
+ return 0;
+ } else if (!strcmp(arg, "-l")) {
+ if (list(NULL)) {
+ return 1;
+ }
+ } else if (!strcmp(arg, "-b") && (argsLeft >= 1)) {
+ if (list(argv[++i])) {
+ return 1;
+ }
+ } else {
+ u_printf("Unknown or malformed option: %s\n", arg);
+ usage();
+ return 1;
+ }
+ }
+}
+
+// Local Variables:
+// compile-command: "icurun iculslocs.cpp"
+// End: