diff options
Diffstat (limited to 'deps/icu-small/source/tools')
-rw-r--r-- | deps/icu-small/source/tools/escapesrc/escapesrc.cpp | 17 | ||||
-rw-r--r-- | deps/icu-small/source/tools/genrb/filterrb.cpp | 236 | ||||
-rw-r--r-- | deps/icu-small/source/tools/genrb/filterrb.h | 180 | ||||
-rw-r--r-- | deps/icu-small/source/tools/genrb/genrb.cpp | 176 | ||||
-rw-r--r-- | deps/icu-small/source/tools/genrb/parse.cpp | 2 | ||||
-rw-r--r-- | deps/icu-small/source/tools/genrb/reslist.cpp | 124 | ||||
-rw-r--r-- | deps/icu-small/source/tools/genrb/reslist.h | 30 | ||||
-rw-r--r-- | deps/icu-small/source/tools/pkgdata/pkgdata.cpp | 32 | ||||
-rw-r--r-- | deps/icu-small/source/tools/toolutil/pkg_genc.cpp | 42 | ||||
-rw-r--r-- | deps/icu-small/source/tools/toolutil/pkg_gencmn.cpp | 8 | ||||
-rw-r--r-- | deps/icu-small/source/tools/toolutil/swapimpl.cpp | 105 | ||||
-rw-r--r-- | deps/icu-small/source/tools/toolutil/toolutil.cpp | 2 | ||||
-rw-r--r-- | deps/icu-small/source/tools/toolutil/ucbuf.cpp | 4 | ||||
-rw-r--r-- | deps/icu-small/source/tools/toolutil/ucmstate.cpp | 3 |
14 files changed, 813 insertions, 148 deletions
diff --git a/deps/icu-small/source/tools/escapesrc/escapesrc.cpp b/deps/icu-small/source/tools/escapesrc/escapesrc.cpp index f51a86ea96..a056098ece 100644 --- a/deps/icu-small/source/tools/escapesrc/escapesrc.cpp +++ b/deps/icu-small/source/tools/escapesrc/escapesrc.cpp @@ -327,6 +327,9 @@ bool fixLine(int /*no*/, std::string &linestr) { // start from the end and find all u" cases size_t pos = len = linestr.size(); + if(len>INT32_MAX/2) { + return true; + } while((pos>0) && (pos = linestr.rfind("u\"", pos)) != std::string::npos) { //printf("found doublequote at %d\n", pos); if(fixAt(linestr, pos)) return true; @@ -391,15 +394,19 @@ int convert(const std::string &infile, const std::string &outfile) { while( getline( inf, linestr)) { no++; if(fixLine(no, linestr)) { - outf.close(); - fprintf(stderr, "%s:%d: Fixup failed by %s\n", infile.c_str(), no, prog.c_str()); - cleanup(outfile); - return 1; + goto fail; } outf << linestr << '\n'; } - return 0; + if(inf.eof()) { + return 0; + } +fail: + outf.close(); + fprintf(stderr, "%s:%d: Fixup failed by %s\n", infile.c_str(), no, prog.c_str()); + cleanup(outfile); + return 1; } /** diff --git a/deps/icu-small/source/tools/genrb/filterrb.cpp b/deps/icu-small/source/tools/genrb/filterrb.cpp new file mode 100644 index 0000000000..d62d185d77 --- /dev/null +++ b/deps/icu-small/source/tools/genrb/filterrb.cpp @@ -0,0 +1,236 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include <iostream> +#include <stack> + +#include "filterrb.h" +#include "errmsg.h" + + +const char* PathFilter::kEInclusionNames[] = { + "INCLUDE", + "PARTIAL", + "EXCLUDE" +}; + + +ResKeyPath::ResKeyPath() {} + +ResKeyPath::ResKeyPath(const std::string& path, UErrorCode& status) { + if (path.empty() || path[0] != '/') { + std::cerr << "genrb error: path must start with /: " << path << std::endl; + status = U_PARSE_ERROR; + return; + } + size_t i; + size_t j = 0; + while (true) { + i = j + 1; + j = path.find('/', i); + std::string key = path.substr(i, j - i); + if (key.empty()) { + std::cerr << "genrb error: empty subpaths and trailing slashes are not allowed: " << path << std::endl; + status = U_PARSE_ERROR; + return; + } + push(key); + if (j == std::string::npos) { + break; + } + } +} + +void ResKeyPath::push(const std::string& key) { + fPath.push_back(key); +} + +void ResKeyPath::pop() { + fPath.pop_back(); +} + +const std::list<std::string>& ResKeyPath::pieces() const { + return fPath; +} + +std::ostream& operator<<(std::ostream& out, const ResKeyPath& value) { + if (value.pieces().empty()) { + out << "/"; + } else for (auto& key : value.pieces()) { + out << "/" << key; + } + return out; +} + + +PathFilter::~PathFilter() = default; + + +void SimpleRuleBasedPathFilter::addRule(const std::string& ruleLine, UErrorCode& status) { + if (ruleLine.empty()) { + std::cerr << "genrb error: empty filter rules are not allowed" << std::endl; + status = U_PARSE_ERROR; + return; + } + bool inclusionRule = false; + if (ruleLine[0] == '+') { + inclusionRule = true; + } else if (ruleLine[0] != '-') { + std::cerr << "genrb error: rules must start with + or -: " << ruleLine << std::endl; + status = U_PARSE_ERROR; + return; + } + ResKeyPath path(ruleLine.substr(1), status); + addRule(path, inclusionRule, status); +} + +void SimpleRuleBasedPathFilter::addRule(const ResKeyPath& path, bool inclusionRule, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + fRoot.applyRule(path, path.pieces().begin(), inclusionRule, status); +} + +PathFilter::EInclusion SimpleRuleBasedPathFilter::match(const ResKeyPath& path) const { + const Tree* node = &fRoot; + + // defaultResult "bubbles up" the nearest "definite" inclusion/exclusion rule + EInclusion defaultResult = INCLUDE; + if (node->fIncluded != PARTIAL) { + // rules handled here: "+/" and "-/" + defaultResult = node->fIncluded; + } + + // isLeaf is whether the filter tree can provide no additional information + // even if additional subpaths are added to the given key + bool isLeaf = false; + + for (auto& key : path.pieces()) { + auto child = node->fChildren.find(key); + // Leaf case 1: input path descends outside the filter tree + if (child == node->fChildren.end()) { + if (node->fWildcard) { + // A wildcard pattern is present; continue checking + node = node->fWildcard.get(); + } else { + isLeaf = true; + break; + } + } else { + node = &child->second; + } + if (node->fIncluded != PARTIAL) { + defaultResult = node->fIncluded; + } + } + + // Leaf case 2: input path exactly matches a filter leaf + if (node->isLeaf()) { + isLeaf = true; + } + + // Always return PARTIAL if we are not at a leaf + if (!isLeaf) { + return PARTIAL; + } + + // If leaf node is PARTIAL, return the default + if (node->fIncluded == PARTIAL) { + return defaultResult; + } + + return node->fIncluded; +} + + +SimpleRuleBasedPathFilter::Tree::Tree(const Tree& other) + : fIncluded(other.fIncluded), fChildren(other.fChildren) { + // Note: can't use the default copy assignment because of the std::unique_ptr + if (other.fWildcard) { + fWildcard.reset(new Tree(*other.fWildcard)); + } +} + +bool SimpleRuleBasedPathFilter::Tree::isLeaf() const { + return fChildren.empty() && !fWildcard; +} + +void SimpleRuleBasedPathFilter::Tree::applyRule( + const ResKeyPath& path, + std::list<std::string>::const_iterator it, + bool inclusionRule, + UErrorCode& status) { + + // Base Case + if (it == path.pieces().end()) { + if (isVerbose() && (fIncluded != PARTIAL || !isLeaf())) { + std::cout << "genrb info: rule on path " << path + << " overrides previous rules" << std::endl; + } + fIncluded = inclusionRule ? INCLUDE : EXCLUDE; + fChildren.clear(); + fWildcard.reset(); + return; + } + + // Recursive Step + auto& key = *it; + if (key == "*") { + // Case 1: Wildcard + if (!fWildcard) { + fWildcard.reset(new Tree()); + } + // Apply the rule to fWildcard and also to all existing children. + it++; + fWildcard->applyRule(path, it, inclusionRule, status); + for (auto& child : fChildren) { + child.second.applyRule(path, it, inclusionRule, status); + } + it--; + + } else { + // Case 2: Normal Key + auto search = fChildren.find(key); + if (search == fChildren.end()) { + if (fWildcard) { + // Deep-copy the existing wildcard tree into the new key + search = fChildren.emplace(key, Tree(*fWildcard)).first; + } else { + search = fChildren.emplace(key, Tree()).first; + } + } + it++; + search->second.applyRule(path, it, inclusionRule, status); + it--; + } +} + +void SimpleRuleBasedPathFilter::Tree::print(std::ostream& out, int32_t indent) const { + for (int32_t i=0; i<indent; i++) out << "\t"; + out << "included: " << kEInclusionNames[fIncluded] << std::endl; + for (auto& child : fChildren) { + for (int32_t i=0; i<indent; i++) out << "\t"; + out << child.first << ": {" << std::endl; + child.second.print(out, indent + 1); + for (int32_t i=0; i<indent; i++) out << "\t"; + out << "}" << std::endl; + } + if (fWildcard) { + for (int32_t i=0; i<indent; i++) out << "\t"; + out << "* {" << std::endl; + fWildcard->print(out, indent + 1); + for (int32_t i=0; i<indent; i++) out << "\t"; + out << "}" << std::endl; + } +} + +void SimpleRuleBasedPathFilter::print(std::ostream& out) const { + out << "SimpleRuleBasedPathFilter {" << std::endl; + fRoot.print(out, 1); + out << "}" << std::endl; +} + +std::ostream& operator<<(std::ostream& out, const SimpleRuleBasedPathFilter& value) { + value.print(out); + return out; +} diff --git a/deps/icu-small/source/tools/genrb/filterrb.h b/deps/icu-small/source/tools/genrb/filterrb.h new file mode 100644 index 0000000000..cf54766041 --- /dev/null +++ b/deps/icu-small/source/tools/genrb/filterrb.h @@ -0,0 +1,180 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef __FILTERRB_H__ +#define __FILTERRB_H__ + +#include <list> +#include <map> +#include <memory> +#include <ostream> +#include <string> + +#include "unicode/utypes.h" + + +/** + * Represents an absolute path into a resource bundle. + * For example: "/units/length/meter" + */ +class ResKeyPath { +public: + /** Constructs an empty path (top of tree) */ + ResKeyPath(); + + /** Constructs from a string path */ + ResKeyPath(const std::string& path, UErrorCode& status); + + void push(const std::string& key); + void pop(); + + const std::list<std::string>& pieces() const; + + private: + std::list<std::string> fPath; +}; + +std::ostream& operator<<(std::ostream& out, const ResKeyPath& value); + + +/** + * Interface used to determine whether to include or reject pieces of a + * resource bundle based on their absolute path. + */ +class PathFilter { +public: + enum EInclusion { + INCLUDE, + PARTIAL, + EXCLUDE + }; + + static const char* kEInclusionNames[]; + + virtual ~PathFilter(); + + /** + * Returns an EInclusion on whether or not the given path should be included. + * + * INCLUDE = include the whole subtree + * PARTIAL = recurse into the subtree + * EXCLUDE = reject the whole subtree + */ + virtual EInclusion match(const ResKeyPath& path) const = 0; +}; + + +/** + * Implementation of PathFilter for a list of inclusion/exclusion rules. + * + * The wildcard pattern "*" means that the subsequent filters are applied to + * every other tree sharing the same parent. + * + * For example, given this list of filter rules: + */ +// -/alabama +// +/alabama/alaska/arizona +// -/fornia/hawaii +// -/mississippi +// +/mississippi/michigan +// +/mississippi/*/maine +// -/mississippi/*/iowa +// +/mississippi/louisiana/iowa +/* + * You get the following structure: + * + * SimpleRuleBasedPathFilter { + * included: PARTIAL + * alabama: { + * included: EXCLUDE + * alaska: { + * included: PARTIAL + * arizona: { + * included: INCLUDE + * } + * } + * } + * fornia: { + * included: PARTIAL + * hawaii: { + * included: EXCLUDE + * } + * } + * mississippi: { + * included: EXCLUDE + * louisiana: { + * included: PARTIAL + * iowa: { + * included: INCLUDE + * } + * maine: { + * included: INCLUDE + * } + * } + * michigan: { + * included: INCLUDE + * iowa: { + * included: EXCLUDE + * } + * maine: { + * included: INCLUDE + * } + * } + * * { + * included: PARTIAL + * iowa: { + * included: EXCLUDE + * } + * maine: { + * included: INCLUDE + * } + * } + * } + * } + */ +class SimpleRuleBasedPathFilter : public PathFilter { +public: + void addRule(const std::string& ruleLine, UErrorCode& status); + void addRule(const ResKeyPath& path, bool inclusionRule, UErrorCode& status); + + EInclusion match(const ResKeyPath& path) const override; + + void print(std::ostream& out) const; + +private: + struct Tree { + + Tree() = default; + + /** Copy constructor */ + Tree(const Tree& other); + + /** + * Information on the USER-SPECIFIED inclusion/exclusion. + * + * INCLUDE = this path exactly matches a "+" rule + * PARTIAL = this path does not match any rule, but subpaths exist + * EXCLUDE = this path exactly matches a "-" rule + */ + EInclusion fIncluded = PARTIAL; + std::map<std::string, Tree> fChildren; + std::unique_ptr<Tree> fWildcard; + + void applyRule( + const ResKeyPath& path, + std::list<std::string>::const_iterator it, + bool inclusionRule, + UErrorCode& status); + + bool isLeaf() const; + + void print(std::ostream& out, int32_t indent) const; + }; + + Tree fRoot; +}; + +std::ostream& operator<<(std::ostream& out, const SimpleRuleBasedPathFilter& value); + + +#endif //__FILTERRB_H__ diff --git a/deps/icu-small/source/tools/genrb/genrb.cpp b/deps/icu-small/source/tools/genrb/genrb.cpp index c4fc462066..885f3039bf 100644 --- a/deps/icu-small/source/tools/genrb/genrb.cpp +++ b/deps/icu-small/source/tools/genrb/genrb.cpp @@ -18,6 +18,11 @@ ******************************************************************************* */ +#include <fstream> +#include <iostream> +#include <list> +#include <string> + #include <assert.h> #include "genrb.h" #include "unicode/localpointer.h" @@ -25,13 +30,15 @@ #include "unicode/utf16.h" #include "charstr.h" #include "cmemory.h" +#include "filterrb.h" #include "reslist.h" #include "ucmndata.h" /* TODO: for reading the pool bundle */ U_NAMESPACE_USE /* Protos */ -void processFile(const char *filename, const char* cp, const char *inputDir, const char *outputDir, +void processFile(const char *filename, const char* cp, + const char *inputDir, const char *outputDir, const char *filterDir, const char *packageName, SRBRoot *newPoolBundle, UBool omitBinaryCollation, UErrorCode &status); static char *make_res_filename(const char *filename, const char *outputDir, @@ -76,7 +83,8 @@ enum FORMAT_VERSION, WRITE_POOL_BUNDLE, USE_POOL_BUNDLE, - INCLUDE_UNIHAN_COLL + INCLUDE_UNIHAN_COLL, + FILTERDIR }; UOption options[]={ @@ -99,9 +107,10 @@ UOption options[]={ UOPTION_DEF("language", 'l', UOPT_REQUIRES_ARG), /* 16 */ UOPTION_DEF("omitCollationRules", 'R', UOPT_NO_ARG),/* 17 */ UOPTION_DEF("formatVersion", '\x01', UOPT_REQUIRES_ARG),/* 18 */ - UOPTION_DEF("writePoolBundle", '\x01', UOPT_NO_ARG),/* 19 */ + UOPTION_DEF("writePoolBundle", '\x01', UOPT_OPTIONAL_ARG),/* 19 */ UOPTION_DEF("usePoolBundle", '\x01', UOPT_OPTIONAL_ARG),/* 20 */ UOPTION_DEF("includeUnihanColl", '\x01', UOPT_NO_ARG),/* 21 */ /* temporary, don't display in usage info */ + UOPTION_DEF("filterDir", '\x01', UOPT_OPTIONAL_ARG), /* 22 */ }; static UBool write_java = FALSE; @@ -121,6 +130,7 @@ main(int argc, const char *arg = NULL; const char *outputDir = NULL; /* NULL = no output directory, use current */ const char *inputDir = NULL; + const char *filterDir = NULL; const char *encoding = ""; int i; UBool illegalArg = FALSE; @@ -224,10 +234,13 @@ main(int argc, "\t --formatVersion write a .res file compatible with the requested formatVersion (single digit);\n" "\t for example, --formatVersion 1\n"); fprintf(stderr, - "\t --writePoolBundle write a pool.res file with all of the keys of all input bundles\n" - "\t --usePoolBundle [path-to-pool.res] point to keys from the pool.res keys pool bundle if they are available there;\n" + "\t --writePoolBundle [directory] write a pool.res file with all of the keys of all input bundles\n" + "\t --usePoolBundle [directory] point to keys from the pool.res keys pool bundle if they are available there;\n" "\t makes .res files smaller but dependent on the pool bundle\n" "\t (--writePoolBundle and --usePoolBundle cannot be combined)\n"); + fprintf(stderr, + "\t --filterDir Input directory where filter files are available.\n" + "\t For more on filter files, see Python buildtool.\n"); return illegalArg ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } @@ -254,6 +267,10 @@ main(int argc, outputDir = options[DESTDIR].value; } + if (options[FILTERDIR].doesOccur) { + filterDir = options[FILTERDIR].value; + } + if(options[ENCODING].doesOccur) { encoding = options[ENCODING].value; } @@ -524,7 +541,7 @@ main(int argc, if (isVerbose()) { printf("Processing file \"%s\"\n", theCurrentFileName.data()); } - processFile(arg, encoding, inputDir, outputDir, NULL, + processFile(arg, encoding, inputDir, outputDir, filterDir, NULL, newPoolBundle.getAlias(), options[NO_BINARY_COLLATION].doesOccur, status); } @@ -532,8 +549,14 @@ main(int argc, poolBundle.close(); if(U_SUCCESS(status) && options[WRITE_POOL_BUNDLE].doesOccur) { + const char* writePoolDir; + if (options[WRITE_POOL_BUNDLE].value!=NULL) { + writePoolDir = options[WRITE_POOL_BUNDLE].value; + } else { + writePoolDir = outputDir; + } char outputFileName[256]; - newPoolBundle->write(outputDir, NULL, outputFileName, sizeof(outputFileName), status); + newPoolBundle->write(writePoolDir, NULL, outputFileName, sizeof(outputFileName), status); if(U_FAILURE(status)) { fprintf(stderr, "unable to write the pool bundle: %s\n", u_errorName(status)); } @@ -552,19 +575,17 @@ main(int argc, /* Process a file */ void processFile(const char *filename, const char *cp, - const char *inputDir, const char *outputDir, const char *packageName, + const char *inputDir, const char *outputDir, const char *filterDir, + const char *packageName, SRBRoot *newPoolBundle, UBool omitBinaryCollation, UErrorCode &status) { LocalPointer<SRBRoot> data; - UCHARBUF *ucbuf = NULL; - char *rbname = NULL; - char *openFileName = NULL; - char *inputDirBuf = NULL; - - char outputFileName[256]; + LocalUCHARBUFPointer ucbuf; + CharString openFileName; + CharString inputDirBuf; + char outputFileName[256]; int32_t dirlen = 0; - int32_t filelen = 0; if (U_FAILURE(status)) { return; @@ -572,14 +593,10 @@ processFile(const char *filename, const char *cp, if(filename==NULL){ status=U_ILLEGAL_ARGUMENT_ERROR; return; - }else{ - filelen = (int32_t)uprv_strlen(filename); } if(inputDir == NULL) { const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR); - openFileName = (char *) uprv_malloc(dirlen + filelen + 2); - openFileName[0] = '\0'; if (filenameBegin != NULL) { /* * When a filename ../../../data/root.txt is specified, @@ -588,31 +605,15 @@ processFile(const char *filename, const char *cp, * another file, like UCARules.txt or thaidict.brk. */ int32_t filenameSize = (int32_t)(filenameBegin - filename + 1); - inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize); - - /* test for NULL */ - if(inputDirBuf == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - goto finish; - } + inputDirBuf.append(filename, filenameSize, status); - inputDirBuf[filenameSize - 1] = 0; - inputDir = inputDirBuf; - dirlen = (int32_t)uprv_strlen(inputDir); + inputDir = inputDirBuf.data(); + dirlen = inputDirBuf.length(); } }else{ dirlen = (int32_t)uprv_strlen(inputDir); if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { - openFileName = (char *) uprv_malloc(dirlen + filelen + 2); - - /* test for NULL */ - if(openFileName == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - goto finish; - } - - openFileName[0] = '\0'; /* * append the input dir to openFileName if the first char in * filename is not file seperation char and the last char input directory is not '.'. @@ -625,49 +626,80 @@ processFile(const char *filename, const char *cp, * genrb -s. icu/data --- start from CWD and look in icu/data dir */ if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){ - uprv_strcpy(openFileName, inputDir); - openFileName[dirlen] = U_FILE_SEP_CHAR; + openFileName.append(inputDir, status); } - openFileName[dirlen + 1] = '\0'; } else { - openFileName = (char *) uprv_malloc(dirlen + filelen + 1); - - /* test for NULL */ - if(openFileName == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - goto finish; - } - - uprv_strcpy(openFileName, inputDir); - + openFileName.append(inputDir, status); } } + openFileName.appendPathPart(filename, status); - uprv_strcat(openFileName, filename); + // Test for CharString failure + if (U_FAILURE(status)) { + return; + } - ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, &status); + ucbuf.adoptInstead(ucbuf_open(openFileName.data(), &cp,getShowWarning(),TRUE, &status)); if(status == U_FILE_ACCESS_ERROR) { - fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName); - goto finish; + fprintf(stderr, "couldn't open file %s\n", openFileName.data()); + return; } - if (ucbuf == NULL || U_FAILURE(status)) { + if (ucbuf.isNull() || U_FAILURE(status)) { fprintf(stderr, "An error occurred processing file %s. Error: %s\n", - openFileName == NULL ? filename : openFileName, u_errorName(status)); - goto finish; + openFileName.data(), u_errorName(status)); + return; } /* auto detected popular encodings? */ if (cp!=NULL && isVerbose()) { printf("autodetected encoding %s\n", cp); } /* Parse the data into an SRBRoot */ - data.adoptInstead(parse(ucbuf, inputDir, outputDir, filename, + data.adoptInstead(parse(ucbuf.getAlias(), inputDir, outputDir, filename, !omitBinaryCollation, options[NO_COLLATION_RULES].doesOccur, &status)); if (data.isNull() || U_FAILURE(status)) { fprintf(stderr, "couldn't parse the file %s. Error:%s\n", filename, u_errorName(status)); - goto finish; + return; } + + // Run filtering before writing pool bundle + if (filterDir != nullptr) { + CharString filterFileName(filterDir, status); + filterFileName.appendPathPart(filename, status); + if (U_FAILURE(status)) { + return; + } + + // Open the file and read it into filter + SimpleRuleBasedPathFilter filter; + std::ifstream f(filterFileName.data()); + if (f.fail()) { + std::cerr << "genrb error: unable to open " << filterFileName.data() << std::endl; + status = U_FILE_ACCESS_ERROR; + return; + } + std::string currentLine; + while (std::getline(f, currentLine)) { + // Ignore # comments and empty lines + if (currentLine.empty() || currentLine[0] == '#') { + continue; + } + filter.addRule(currentLine, status); + if (U_FAILURE(status)) { + return; + } + } + + if (isVerbose()) { + filter.print(std::cout); + } + + // Apply the filter to the data + ResKeyPath path; + data->fRoot->applyFilter(filter, path, data.getAlias()); + } + if(options[WRITE_POOL_BUNDLE].doesOccur) { data->fWritePoolBundle = newPoolBundle; data->compactKeys(status); @@ -677,7 +709,7 @@ processFile(const char *filename, const char *cp, if(U_FAILURE(status)) { fprintf(stderr, "bundle_compactKeys(%s) or bundle_getKeyBytes() failed: %s\n", filename, u_errorName(status)); - goto finish; + return; } /* count the number of just-added key strings */ for(const char *newKeysLimit = newKeys + newKeysLength; newKeys < newKeysLimit; ++newKeys) { @@ -692,11 +724,11 @@ processFile(const char *filename, const char *cp, } /* Determine the target rb filename */ - rbname = make_res_filename(filename, outputDir, packageName, status); + uprv_free(make_res_filename(filename, outputDir, packageName, status)); if(U_FAILURE(status)) { fprintf(stderr, "couldn't make the res fileName for bundle %s. Error:%s\n", filename, u_errorName(status)); - goto finish; + return; } if(write_java== TRUE){ bundle_write_java(data.getAlias(), outputDir, outputEnc, @@ -713,24 +745,6 @@ processFile(const char *filename, const char *cp, if (U_FAILURE(status)) { fprintf(stderr, "couldn't write bundle %s. Error:%s\n", outputFileName, u_errorName(status)); } - -finish: - - if (inputDirBuf != NULL) { - uprv_free(inputDirBuf); - } - - if (openFileName != NULL) { - uprv_free(openFileName); - } - - if(ucbuf) { - ucbuf_close(ucbuf); - } - - if (rbname) { - uprv_free(rbname); - } } /* Generate the target .res file name from the input file name */ diff --git a/deps/icu-small/source/tools/genrb/parse.cpp b/deps/icu-small/source/tools/genrb/parse.cpp index 1f6246d3cf..884d5d5666 100644 --- a/deps/icu-small/source/tools/genrb/parse.cpp +++ b/deps/icu-small/source/tools/genrb/parse.cpp @@ -2000,6 +2000,8 @@ parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *fi if (state.bundle == NULL || U_FAILURE(*status)) { + delete state.bundle; + return NULL; } diff --git a/deps/icu-small/source/tools/genrb/reslist.cpp b/deps/icu-small/source/tools/genrb/reslist.cpp index 0493347ebe..bf57516047 100644 --- a/deps/icu-small/source/tools/genrb/reslist.cpp +++ b/deps/icu-small/source/tools/genrb/reslist.cpp @@ -28,13 +28,17 @@ #endif #include <assert.h> +#include <iostream> +#include <set> #include <stdio.h> + #include "unicode/localpointer.h" #include "reslist.h" #include "unewdata.h" #include "unicode/ures.h" #include "unicode/putil.h" #include "errmsg.h" +#include "filterrb.h" #include "uarrsort.h" #include "uelement.h" @@ -42,6 +46,8 @@ #include "uinvchar.h" #include "ustr_imp.h" #include "unicode/utf16.h" +#include "uassert.h" + /* * Align binary data at a 16-byte offset from the start of the resource bundle, * to be safe for any data type it may contain. @@ -921,9 +927,6 @@ void SRBRoot::write(const char *outputDir, const char *outputPkg, if (f16BitUnits.length() & 1) { f16BitUnits.append((UChar)0xaaaa); /* pad to multiple of 4 bytes */ } - /* all keys have been mapped */ - uprv_free(fKeyMap); - fKeyMap = NULL; byteOffset = fKeysTop + f16BitUnits.length() * 2; fRoot->preWrite(&byteOffset); @@ -1037,14 +1040,15 @@ void SRBRoot::write(const char *outputDir, const char *outputPkg, // Swap to big-endian so we get the same checksum on all platforms // (except for charset family, due to the key strings). UnicodeString s(f16BitUnits); - s.append((UChar)1); // Ensure that we own this buffer. assert(!s.isBogus()); - uint16_t *p = const_cast<uint16_t *>(reinterpret_cast<const uint16_t *>(s.getBuffer())); + // .getBuffer(capacity) returns a mutable buffer + char16_t* p = s.getBuffer(f16BitUnits.length()); for (int32_t count = f16BitUnits.length(); count > 0; --count) { uint16_t x = *p; *p++ = (uint16_t)((x << 8) | (x >> 8)); } - checksum = computeCRC((const char *)p, + s.releaseBuffer(f16BitUnits.length()); + checksum = computeCRC((const char *)s.getBuffer(), (uint32_t)f16BitUnits.length() * 2, checksum); } indexes[URES_INDEX_POOL_CHECKSUM] = (int32_t)checksum; @@ -1127,7 +1131,8 @@ SRBRoot::SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCo : fRoot(NULL), fLocale(NULL), fIndexLength(0), fMaxTableLength(0), fNoFallback(FALSE), fStringsForm(STRINGS_UTF16_V1), fIsPoolBundle(isPoolBundle), fKeys(NULL), fKeyMap(NULL), - fKeysBottom(0), fKeysTop(0), fKeysCapacity(0), fKeysCount(0), fLocalKeyLimit(0), + fKeysBottom(0), fKeysTop(0), fKeysCapacity(0), + fKeysCount(0), fLocalKeyLimit(0), f16BitUnits(), f16BitStringsLength(0), fUsePoolBundle(&kNoPoolBundle), fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0), @@ -1232,6 +1237,9 @@ int32_t SRBRoot::addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode) { int32_t keypos; + // It is not legal to add new key bytes after compactKeys is run! + U_ASSERT(fKeyMap == nullptr); + if (U_FAILURE(errorCode)) { return -1; } @@ -1333,11 +1341,35 @@ compareKeyOldpos(const void * /*context*/, const void *l, const void *r) { return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos); } +void SResource::collectKeys(std::function<void(int32_t)> collector) const { + collector(fKey); +} + +void ContainerResource::collectKeys(std::function<void(int32_t)> collector) const { + collector(fKey); + for (SResource* curr = fFirst; curr != NULL; curr = curr->fNext) { + curr->collectKeys(collector); + } +} + void SRBRoot::compactKeys(UErrorCode &errorCode) { KeyMapEntry *map; char *keys; int32_t i; + + // Except for pool bundles, keys might not be used. + // Do not add unused keys to the final bundle. + std::set<int32_t> keysInUse; + if (!fIsPoolBundle) { + fRoot->collectKeys([&keysInUse](int32_t key) { + if (key >= 0) { + keysInUse.insert(key); + } + }); + fKeysCount = static_cast<int32_t>(keysInUse.size()); + } + int32_t keysCount = fUsePoolBundle->fKeysCount + fKeysCount; if (U_FAILURE(errorCode) || fKeysCount == 0 || fKeyMap != NULL) { return; @@ -1356,11 +1388,23 @@ SRBRoot::compactKeys(UErrorCode &errorCode) { ++keys; /* skip the NUL */ } keys = fKeys + fKeysBottom; - for (; i < keysCount; ++i) { - map[i].oldpos = (int32_t)(keys - fKeys); - map[i].newpos = 0; - while (*keys != 0) { ++keys; } /* skip the key */ - ++keys; /* skip the NUL */ + while (i < keysCount) { + int32_t keyOffset = static_cast<int32_t>(keys - fKeys); + if (!fIsPoolBundle && keysInUse.count(keyOffset) == 0) { + // Mark the unused key as deleted + while (*keys != 0) { *keys++ = 1; } + *keys++ = 1; + } else { + map[i].oldpos = keyOffset; + map[i].newpos = 0; + while (*keys != 0) { ++keys; } /* skip the key */ + ++keys; /* skip the NUL */ + i++; + } + } + if (keys != fKeys + fKeysTop) { + // Throw away any unused keys from the end + fKeysTop = static_cast<int32_t>(keys - fKeys); } /* Sort the keys so that each one is immediately followed by all of its suffixes. */ uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry), @@ -1403,7 +1447,7 @@ SRBRoot::compactKeys(UErrorCode &errorCode) { for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {} if (suffix == suffixLimit && *k == *suffixLimit) { map[j].newpos = map[i].oldpos + offset; /* yes, point to the earlier key */ - /* mark the suffix as deleted */ + // Mark the suffix as deleted while (*suffix != 0) { *suffix++ = 1; } *suffix = 1; } else { @@ -1437,7 +1481,7 @@ SRBRoot::compactKeys(UErrorCode &errorCode) { keys[newpos++] = keys[oldpos++]; } } - assert(i == keysCount); + U_ASSERT(i == keysCount); } fKeysTop = newpos; /* Re-sort once more, by old offsets for binary searching. */ @@ -1691,3 +1735,55 @@ SRBRoot::compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode) { // +1 to account for the initial zero in f16BitUnits assert(f16BitUnits.length() <= (f16BitStringsLength + 1)); } + +void SResource::applyFilter( + const PathFilter& /*filter*/, + ResKeyPath& /*path*/, + const SRBRoot* /*bundle*/) { + // Only a few resource types (tables) are capable of being filtered. +} + +void TableResource::applyFilter( + const PathFilter& filter, + ResKeyPath& path, + const SRBRoot* bundle) { + SResource* prev = nullptr; + SResource* curr = fFirst; + for (; curr != nullptr;) { + path.push(curr->getKeyString(bundle)); + auto inclusion = filter.match(path); + if (inclusion == PathFilter::EInclusion::INCLUDE) { + // Include whole subtree + // no-op + if (isVerbose()) { + std::cout << "genrb subtree: " << bundle->fLocale << ": INCLUDE: " << path << std::endl; + } + } else if (inclusion == PathFilter::EInclusion::EXCLUDE) { + // Reject the whole subtree + // Remove it from the linked list + if (isVerbose()) { + std::cout << "genrb subtree: " << bundle->fLocale << ": DELETE: " << path << std::endl; + } + if (prev == nullptr) { + fFirst = curr->fNext; + } else { + prev->fNext = curr->fNext; + } + fCount--; + delete curr; + curr = prev; + } else { + U_ASSERT(inclusion == PathFilter::EInclusion::PARTIAL); + // Recurse into the child + curr->applyFilter(filter, path, bundle); + } + path.pop(); + + prev = curr; + if (curr == nullptr) { + curr = fFirst; + } else { + curr = curr->fNext; + } + } +} diff --git a/deps/icu-small/source/tools/genrb/reslist.h b/deps/icu-small/source/tools/genrb/reslist.h index 53ade5b82c..34b710c423 100644 --- a/deps/icu-small/source/tools/genrb/reslist.h +++ b/deps/icu-small/source/tools/genrb/reslist.h @@ -23,6 +23,8 @@ #define KEY_SPACE_SIZE 65536 #define RESLIST_MAX_INT_VECTOR 2048 +#include <functional> + #include "unicode/utypes.h" #include "unicode/unistr.h" #include "unicode/ures.h" @@ -36,7 +38,9 @@ U_CDECL_BEGIN +class PathFilter; class PseudoListResource; +class ResKeyPath; struct ResFile { ResFile() @@ -212,6 +216,19 @@ struct SResource { void write(UNewDataMemory *mem, uint32_t *byteOffset); virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset); + /** + * Applies the given filter with the given base path to this resource. + * Removes child resources rejected by the filter recursively. + * + * @param bundle Needed in order to access the key for this and child resources. + */ + virtual void applyFilter(const PathFilter& filter, ResKeyPath& path, const SRBRoot* bundle); + + /** + * Calls the given function for every key ID present in this tree. + */ + virtual void collectKeys(std::function<void(int32_t)> collector) const; + int8_t fType; /* nominal type: fRes (when != 0xffffffff) may use subtype */ UBool fWritten; /* res_write() can exit early */ uint32_t fRes; /* resource item word; RES_BOGUS=0xffffffff if not known yet */ @@ -231,7 +248,10 @@ public: fCount(0), fFirst(NULL) {} virtual ~ContainerResource(); - virtual void handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode); + void handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) override; + + void collectKeys(std::function<void(int32_t)> collector) const override; + protected: void writeAllRes16(SRBRoot *bundle); void preWriteAllRes(uint32_t *byteOffset); @@ -254,9 +274,11 @@ public: void add(SResource *res, int linenumber, UErrorCode &errorCode); - virtual void handleWrite16(SRBRoot *bundle); - virtual void handlePreWrite(uint32_t *byteOffset); - virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset); + void handleWrite16(SRBRoot *bundle) override; + void handlePreWrite(uint32_t *byteOffset) override; + void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) override; + + void applyFilter(const PathFilter& filter, ResKeyPath& path, const SRBRoot* bundle) override; int8_t fTableType; // determined by table_write16() for table_preWrite() & table_write() SRBRoot *fRoot; diff --git a/deps/icu-small/source/tools/pkgdata/pkgdata.cpp b/deps/icu-small/source/tools/pkgdata/pkgdata.cpp index d7e5721c2d..9d512a3ae5 100644 --- a/deps/icu-small/source/tools/pkgdata/pkgdata.cpp +++ b/deps/icu-small/source/tools/pkgdata/pkgdata.cpp @@ -504,7 +504,6 @@ main(int argc, char* argv[]) { if (o.files != NULL) { pkg_deleteList(o.files); } - return result; } @@ -544,6 +543,7 @@ normal_command_mode: int result = system(cmd); if (result != 0) { fprintf(stderr, "-- return status = %d\n", result); + result = 1; // system() result code is platform specific. } if (cmd != cmdBuffer && cmd != command) { @@ -1350,8 +1350,8 @@ static int32_t pkg_generateLibraryFile(const char *targetDir, const char mode, c if (IN_STATIC_MODE(mode)) { if (cmd == NULL) { - length = uprv_strlen(pkgDataFlags[AR]) + uprv_strlen(pkgDataFlags[ARFLAGS]) + uprv_strlen(targetDir) + - uprv_strlen(libFileNames[LIB_FILE_VERSION]) + uprv_strlen(objectFile) + uprv_strlen(pkgDataFlags[RANLIB]) + BUFFER_PADDING_SIZE; + length = static_cast<int32_t>(uprv_strlen(pkgDataFlags[AR]) + uprv_strlen(pkgDataFlags[ARFLAGS]) + uprv_strlen(targetDir) + + uprv_strlen(libFileNames[LIB_FILE_VERSION]) + uprv_strlen(objectFile) + uprv_strlen(pkgDataFlags[RANLIB]) + BUFFER_PADDING_SIZE); if ((cmd = (char *)uprv_malloc(sizeof(char) * length)) == NULL) { fprintf(stderr, "Unable to allocate memory for command.\n"); return -1; @@ -1376,15 +1376,15 @@ static int32_t pkg_generateLibraryFile(const char *targetDir, const char mode, c } } else /* if (IN_DLL_MODE(mode)) */ { if (cmd == NULL) { - length = uprv_strlen(pkgDataFlags[GENLIB]) + uprv_strlen(pkgDataFlags[LDICUDTFLAGS]) + + length = static_cast<int32_t>(uprv_strlen(pkgDataFlags[GENLIB]) + uprv_strlen(pkgDataFlags[LDICUDTFLAGS]) + ((uprv_strlen(targetDir) + uprv_strlen(libFileNames[LIB_FILE_VERSION_TMP])) * 2) + uprv_strlen(objectFile) + uprv_strlen(pkgDataFlags[LD_SONAME]) + uprv_strlen(pkgDataFlags[LD_SONAME][0] == 0 ? "" : libFileNames[LIB_FILE_VERSION_MAJOR]) + - uprv_strlen(pkgDataFlags[RPATH_FLAGS]) + uprv_strlen(pkgDataFlags[BIR_FLAGS]) + BUFFER_PADDING_SIZE; + uprv_strlen(pkgDataFlags[RPATH_FLAGS]) + uprv_strlen(pkgDataFlags[BIR_FLAGS]) + BUFFER_PADDING_SIZE); #if U_PLATFORM == U_PF_CYGWIN - length += uprv_strlen(targetDir) + uprv_strlen(libFileNames[LIB_FILE_CYGWIN_VERSION]); + length += static_cast<int32_t>(uprv_strlen(targetDir) + uprv_strlen(libFileNames[LIB_FILE_CYGWIN_VERSION])); #elif U_PLATFORM == U_PF_MINGW - length += uprv_strlen(targetDir) + uprv_strlen(libFileNames[LIB_FILE_MINGW]); + length += static_cast<int32_t>(uprv_strlen(targetDir) + uprv_strlen(libFileNames[LIB_FILE_MINGW])); #endif if ((cmd = (char *)uprv_malloc(sizeof(char) * length)) == NULL) { fprintf(stderr, "Unable to allocate memory for command.\n"); @@ -1516,8 +1516,8 @@ static int32_t pkg_createWithAssemblyCode(const char *targetDir, const char mode uprv_strcpy(tempObjectFile, gencFilePath); tempObjectFile[uprv_strlen(tempObjectFile)-1] = 'o'; - length = uprv_strlen(pkgDataFlags[COMPILER]) + uprv_strlen(pkgDataFlags[LIBFLAGS]) - + uprv_strlen(tempObjectFile) + uprv_strlen(gencFilePath) + BUFFER_PADDING_SIZE; + length = static_cast<int32_t>(uprv_strlen(pkgDataFlags[COMPILER]) + uprv_strlen(pkgDataFlags[LIBFLAGS]) + + uprv_strlen(tempObjectFile) + uprv_strlen(gencFilePath) + BUFFER_PADDING_SIZE); cmd = (char *)uprv_malloc(sizeof(char) * length); if (cmd == NULL) { @@ -1905,7 +1905,7 @@ static UPKGOptions *pkg_checkFlag(UPKGOptions *o) { char *tmpGenlibFlagBuffer = NULL; int32_t i, offset; - length = uprv_strlen(flag) + 1; + length = static_cast<int32_t>(uprv_strlen(flag) + 1); tmpGenlibFlagBuffer = (char *)uprv_malloc(length); if (tmpGenlibFlagBuffer == NULL) { /* Memory allocation error */ @@ -1915,7 +1915,7 @@ static UPKGOptions *pkg_checkFlag(UPKGOptions *o) { uprv_strcpy(tmpGenlibFlagBuffer, flag); - offset = uprv_strlen(rm_cmd); + offset = static_cast<int32_t>(uprv_strlen(rm_cmd)); for (i = 0; i < (length - offset); i++) { flag[i] = tmpGenlibFlagBuffer[offset + i]; @@ -1928,7 +1928,7 @@ static UPKGOptions *pkg_checkFlag(UPKGOptions *o) { } flag = pkgDataFlags[BIR_FLAGS]; - length = uprv_strlen(pkgDataFlags[BIR_FLAGS]); + length = static_cast<int32_t>(uprv_strlen(pkgDataFlags[BIR_FLAGS])); for (int32_t i = 0; i < length; i++) { if (flag[i] == MAP_FILE_EXT[count]) { @@ -1988,7 +1988,7 @@ static UPKGOptions *pkg_checkFlag(UPKGOptions *o) { int32_t length = 0; flag = pkgDataFlags[GENLIB]; - length = uprv_strlen(pkgDataFlags[GENLIB]); + length = static_cast<int32_t>(uprv_strlen(pkgDataFlags[GENLIB])); int32_t position = length - 1; @@ -2006,7 +2006,7 @@ static UPKGOptions *pkg_checkFlag(UPKGOptions *o) { int32_t length = 0; flag = pkgDataFlags[GENLIB]; - length = uprv_strlen(pkgDataFlags[GENLIB]); + length = static_cast<int32_t>(uprv_strlen(pkgDataFlags[GENLIB])); int32_t position = length - 1; @@ -2117,8 +2117,8 @@ static void loadLists(UPKGOptions *o, UErrorCode *status) fprintf(stderr, "pkgdata: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, s); exit(U_ILLEGAL_ARGUMENT_ERROR); } - tmpLength = uprv_strlen(o->srcDir) + - uprv_strlen(s) + 5; /* 5 is to add a little extra space for, among other things, PKGDATA_FILE_SEP_STRING */ + /* The +5 is to add a little extra space for, among other things, PKGDATA_FILE_SEP_STRING */ + tmpLength = static_cast<int32_t>(uprv_strlen(o->srcDir) + uprv_strlen(s) + 5); if((tmp = (char *)uprv_malloc(tmpLength)) == NULL) { fprintf(stderr, "pkgdata: Error: Unable to allocate tmp buffer size: %d\n", tmpLength); exit(U_MEMORY_ALLOCATION_ERROR); diff --git a/deps/icu-small/source/tools/toolutil/pkg_genc.cpp b/deps/icu-small/source/tools/toolutil/pkg_genc.cpp index 5ab0d84630..2a8425e334 100644 --- a/deps/icu-small/source/tools/toolutil/pkg_genc.cpp +++ b/deps/icu-small/source/tools/toolutil/pkg_genc.cpp @@ -309,16 +309,11 @@ writeAssemblyCode(const char *filename, const char *destdir, const char *optEntr T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine); for(;;) { + memset(buffer, 0, sizeof(buffer)); length=T_FileStream_read(in, buffer, sizeof(buffer)); if(length==0) { break; } - if (length != sizeof(buffer)) { - /* pad with extra 0's when at the end of the file */ - for(i=0; i < (length % sizeof(uint32_t)); ++i) { - buffer[length+i] = 0; - } - } for(i=0; i<(length/sizeof(buffer[0])); i++) { column = write32(out, buffer[i], column); } @@ -685,23 +680,30 @@ getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *pBits=32; *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB); #elif U_PLATFORM_HAS_WIN32_API -/* _M_IA64 should be defined in windows.h */ -# if defined(_M_IA64) - *pCPU=IMAGE_FILE_MACHINE_IA64; - *pBits = 64; -# elif defined(_M_AMD64) -// link.exe does not really care about the .obj machine type and this will -// allow us to build a dll for both ARM & x64 with an amd64 built tool -// ARM is same as x64 except for first 2 bytes of object file - *pCPU = IMAGE_FILE_MACHINE_UNKNOWN; - // *pCPU = IMAGE_FILE_MACHINE_ARMNT; // If we wanted to be explicit - // *pCPU = IMAGE_FILE_MACHINE_AMD64; // We would use one of these names - *pBits = 64; // Doesn't seem to be used for anything interesting? + // Windows always runs in little-endian mode. + *pIsBigEndian = FALSE; + + // Note: The various _M_<arch> macros are predefined by the MSVC compiler based + // on the target compilation architecture. + // https://docs.microsoft.com/cpp/preprocessor/predefined-macros + + // link.exe will link an IMAGE_FILE_MACHINE_UNKNOWN data-only .obj file + // no matter what architecture it is targeting (though other values are + // required to match). Unfortunately, the variable name decoration/mangling + // is slightly different on x86, which means we can't use the UNKNOWN type + // for all architectures though. +# if defined(_M_IX86) + *pCPU = IMAGE_FILE_MACHINE_I386; # else - *pCPU=IMAGE_FILE_MACHINE_I386; // We would use one of these names + *pCPU = IMAGE_FILE_MACHINE_UNKNOWN; +# endif +# if defined(_M_IA64) || defined(_M_AMD64) || defined (_M_ARM64) + *pBits = 64; // Doesn't seem to be used for anything interesting though? +# elif defined(_M_IX86) || defined(_M_ARM) *pBits = 32; +# else +# error "Unknown platform for CAN_GENERATE_OBJECTS." # endif - *pIsBigEndian=FALSE; #else # error "Unknown platform for CAN_GENERATE_OBJECTS." #endif diff --git a/deps/icu-small/source/tools/toolutil/pkg_gencmn.cpp b/deps/icu-small/source/tools/toolutil/pkg_gencmn.cpp index 423e4b7363..29a1f7bc18 100644 --- a/deps/icu-small/source/tools/toolutil/pkg_gencmn.cpp +++ b/deps/icu-small/source/tools/toolutil/pkg_gencmn.cpp @@ -379,14 +379,14 @@ createCommonDataFile(const char *destDir, const char *name, const char *entrypoi " {0, 0, 0, 0}\n" " },\n" " \"\", %lu, 0, {\n", - (unsigned long)32-4-sizeof(UDataInfo), - (unsigned long)fileCount, + static_cast<unsigned long>(32-4-sizeof(UDataInfo)), + static_cast<unsigned long>(fileCount), entrypointName, - (unsigned long)sizeof(UDataInfo), + static_cast<unsigned long>(sizeof(UDataInfo)), U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, U_SIZEOF_UCHAR, - (unsigned long)fileCount + static_cast<unsigned long>(fileCount) ); T_FileStream_writeLine(out, buffer); diff --git a/deps/icu-small/source/tools/toolutil/swapimpl.cpp b/deps/icu-small/source/tools/toolutil/swapimpl.cpp index e8850cb986..926755a2aa 100644 --- a/deps/icu-small/source/tools/toolutil/swapimpl.cpp +++ b/deps/icu-small/source/tools/toolutil/swapimpl.cpp @@ -41,6 +41,7 @@ #include "uarrsort.h" #include "ucmndata.h" #include "udataswp.h" +#include "ulayout_props.h" /* swapping implementations in common */ @@ -640,6 +641,106 @@ unorm_swap(const UDataSwapper *ds, #endif +// Unicode text layout properties data swapping -------------------------------- + +static int32_t U_CALLCONV +ulayout_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + // udata_swapDataHeader checks the arguments. + int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if (pErrorCode == nullptr || U_FAILURE(*pErrorCode)) { + return 0; + } + + // Check data format and format version. + const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4); + if (!( + pInfo->dataFormat[0] == ULAYOUT_FMT_0 && // dataFormat="Layo" + pInfo->dataFormat[1] == ULAYOUT_FMT_1 && + pInfo->dataFormat[2] == ULAYOUT_FMT_2 && + pInfo->dataFormat[3] == ULAYOUT_FMT_3 && + pInfo->formatVersion[0] == 1)) { + udata_printError(ds, + "ulayout_swap(): data format %02x.%02x.%02x.%02x (format version %02x) " + "is not recognized as text layout properties data\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + pInfo->formatVersion[0]); + *pErrorCode = U_UNSUPPORTED_ERROR; + return 0; + } + + const uint8_t *inBytes = (const uint8_t *)inData + headerSize; + uint8_t *outBytes = (uint8_t *)outData + headerSize; + + const int32_t *inIndexes = (const int32_t *)inBytes; + + if (length >= 0) { + length -= headerSize; + if (length < 12 * 4) { + udata_printError(ds, + "ulayout_swap(): too few bytes (%d after header) for text layout properties data\n", + length); + *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + + int32_t indexesLength = udata_readInt32(ds, inIndexes[ULAYOUT_IX_INDEXES_LENGTH]); + if (indexesLength < 12) { + udata_printError(ds, + "ulayout_swap(): too few indexes (%d) for text layout properties data\n", + indexesLength); + *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + // Read the data offsets before swapping anything. + int32_t indexes[ULAYOUT_IX_TRIES_TOP + 1]; + for (int32_t i = ULAYOUT_IX_INPC_TRIE_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) { + indexes[i] = udata_readInt32(ds, inIndexes[i]); + } + int32_t size = indexes[ULAYOUT_IX_TRIES_TOP]; + + if (length >= 0) { + if (length < size) { + udata_printError(ds, + "ulayout_swap(): too few bytes (%d after header) " + "for all of text layout properties data\n", + length); + *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + // Copy the data for inaccessible bytes. + if (inBytes != outBytes) { + uprv_memcpy(outBytes, inBytes, size); + } + + // Swap the int32_t indexes[]. + int32_t offset = 0; + int32_t count = indexesLength * 4; + ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); + offset += count; + + // Swap each trie. + for (int32_t i = ULAYOUT_IX_INPC_TRIE_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) { + int32_t top = indexes[i]; + count = top - offset; + U_ASSERT(count >= 0); + if (count >= 16) { + utrie_swapAnyVersion(ds, inBytes + offset, count, outBytes + offset, pErrorCode); + } + offset = top; + } + + U_ASSERT(offset == size); + } + + return headerSize + size; +} + /* Swap 'Test' data from gentest */ static int32_t U_CALLCONV test_swap(const UDataSwapper *ds, @@ -731,6 +832,10 @@ static const struct { { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */ #endif + + { { ULAYOUT_FMT_0, ULAYOUT_FMT_1, ULAYOUT_FMT_2, ULAYOUT_FMT_3 }, + ulayout_swap }, // dataFormat="Layo" + #if !UCONFIG_NO_COLLATION { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */ diff --git a/deps/icu-small/source/tools/toolutil/toolutil.cpp b/deps/icu-small/source/tools/toolutil/toolutil.cpp index 0f7d0984a8..21dca7fe5d 100644 --- a/deps/icu-small/source/tools/toolutil/toolutil.cpp +++ b/deps/icu-small/source/tools/toolutil/toolutil.cpp @@ -143,7 +143,7 @@ findDirname(const char *path, char *buffer, int32_t bufLen, UErrorCode* status) resultLen = 0; } else { resultPtr = path; - resultLen = basename - path; + resultLen = static_cast<int32_t>(basename - path); if(resultLen<1) { resultLen = 1; /* '/' or '/a' -> '/' */ } diff --git a/deps/icu-small/source/tools/toolutil/ucbuf.cpp b/deps/icu-small/source/tools/toolutil/ucbuf.cpp index 5269c8177c..9b5e615d25 100644 --- a/deps/icu-small/source/tools/toolutil/ucbuf.cpp +++ b/deps/icu-small/source/tools/toolutil/ucbuf.cpp @@ -178,7 +178,7 @@ ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){ memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar)); } -#if UCBUF_DEBUG +#ifdef UCBUF_DEBUG memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset)); #endif if(buf->isBuffered){ @@ -295,7 +295,7 @@ ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){ } outputWritten = (int32_t)(target - pTarget); -#if UCBUF_DEBUG +#ifdef UCBUF_DEBUG { int i; target = pTarget; diff --git a/deps/icu-small/source/tools/toolutil/ucmstate.cpp b/deps/icu-small/source/tools/toolutil/ucmstate.cpp index 2776575229..206c2f172e 100644 --- a/deps/icu-small/source/tools/toolutil/ucmstate.cpp +++ b/deps/icu-small/source/tools/toolutil/ucmstate.cpp @@ -653,7 +653,8 @@ compactToUnicode2(UCMStates *states, /* for each lead byte */ for(i=0; i<256; ++i) { entry=states->stateTable[leadState][i]; - if(MBCS_ENTRY_IS_TRANSITION(entry) && (MBCS_ENTRY_TRANSITION_STATE(entry))==trailState) { + if(MBCS_ENTRY_IS_TRANSITION(entry) && + (MBCS_ENTRY_TRANSITION_STATE(entry))==static_cast<uint32_t>(trailState)) { /* the offset is different for each lead byte */ offset=MBCS_ENTRY_TRANSITION_OFFSET(entry); /* for each trail byte for this lead byte */ |