From da736d8259331a8ef13bf4bbb10bbb8a5c0e5299 Mon Sep 17 00:00:00 2001 From: Florian Dold Date: Tue, 13 Aug 2019 12:29:07 +0200 Subject: remove node/v8 from source tree --- .../deps/icu-small/source/tools/genrb/parse.cpp | 2084 -------------------- 1 file changed, 2084 deletions(-) delete mode 100644 deps/node/deps/icu-small/source/tools/genrb/parse.cpp (limited to 'deps/node/deps/icu-small/source/tools/genrb/parse.cpp') diff --git a/deps/node/deps/icu-small/source/tools/genrb/parse.cpp b/deps/node/deps/icu-small/source/tools/genrb/parse.cpp deleted file mode 100644 index 1f6246d3..00000000 --- a/deps/node/deps/icu-small/source/tools/genrb/parse.cpp +++ /dev/null @@ -1,2084 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1998-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* -* File parse.cpp -* -* Modification History: -* -* Date Name Description -* 05/26/99 stephen Creation. -* 02/25/00 weiv Overhaul to write udata -* 5/10/01 Ram removed ustdio dependency -* 06/10/2001 Dominic Ludlam Rewritten -******************************************************************************* -*/ - -// Safer use of UnicodeString. -#ifndef UNISTR_FROM_CHAR_EXPLICIT -# define UNISTR_FROM_CHAR_EXPLICIT explicit -#endif - -// Less important, but still a good idea. -#ifndef UNISTR_FROM_STRING_EXPLICIT -# define UNISTR_FROM_STRING_EXPLICIT explicit -#endif - -#include -#include "parse.h" -#include "errmsg.h" -#include "uhash.h" -#include "cmemory.h" -#include "cstring.h" -#include "uinvchar.h" -#include "read.h" -#include "ustr.h" -#include "reslist.h" -#include "rbt_pars.h" -#include "genrb.h" -#include "unicode/stringpiece.h" -#include "unicode/unistr.h" -#include "unicode/ustring.h" -#include "unicode/uscript.h" -#include "unicode/utf16.h" -#include "unicode/putil.h" -#include "charstr.h" -#include "collationbuilder.h" -#include "collationdata.h" -#include "collationdatareader.h" -#include "collationdatawriter.h" -#include "collationfastlatinbuilder.h" -#include "collationinfo.h" -#include "collationroot.h" -#include "collationruleparser.h" -#include "collationtailoring.h" -#include - -/* Number of tokens to read ahead of the current stream position */ -#define MAX_LOOKAHEAD 3 - -#define CR 0x000D -#define LF 0x000A -#define SPACE 0x0020 -#define TAB 0x0009 -#define ESCAPE 0x005C -#define HASH 0x0023 -#define QUOTE 0x0027 -#define ZERO 0x0030 -#define STARTCOMMAND 0x005B -#define ENDCOMMAND 0x005D -#define OPENSQBRACKET 0x005B -#define CLOSESQBRACKET 0x005D - -using icu::CharString; -using icu::LocalMemory; -using icu::LocalPointer; -using icu::LocalUCHARBUFPointer; -using icu::StringPiece; -using icu::UnicodeString; - -struct Lookahead -{ - enum ETokenType type; - struct UString value; - struct UString comment; - uint32_t line; -}; - -/* keep in sync with token defines in read.h */ -const char *tokenNames[TOK_TOKEN_COUNT] = -{ - "string", /* A string token, such as "MonthNames" */ - "'{'", /* An opening brace character */ - "'}'", /* A closing brace character */ - "','", /* A comma */ - "':'", /* A colon */ - - "", /* End of the file has been reached successfully */ - "" -}; - -/* Just to store "TRUE" */ -//static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000}; - -typedef struct { - struct Lookahead lookahead[MAX_LOOKAHEAD + 1]; - uint32_t lookaheadPosition; - UCHARBUF *buffer; - struct SRBRoot *bundle; - const char *inputdir; - uint32_t inputdirLength; - const char *outputdir; - uint32_t outputdirLength; - const char *filename; - UBool makeBinaryCollation; - UBool omitCollationRules; -} ParseState; - -typedef struct SResource * -ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status); - -static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status); - -/* The nature of the lookahead buffer: - There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides - MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value. - When getToken is called, the current pointer is moved to the next slot and the - old slot is filled with the next token from the reader by calling getNextToken. - The token values are stored in the slot, which means that token values don't - survive a call to getToken, ie. - - UString *value; - - getToken(&value, NULL, status); - getToken(NULL, NULL, status); bad - value is now a different string -*/ -static void -initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status) -{ - static uint32_t initTypeStrings = 0; - uint32_t i; - - if (!initTypeStrings) - { - initTypeStrings = 1; - } - - state->lookaheadPosition = 0; - state->buffer = buf; - - resetLineNumber(); - - for (i = 0; i < MAX_LOOKAHEAD; i++) - { - state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status); - if (U_FAILURE(*status)) - { - return; - } - } - - *status = U_ZERO_ERROR; -} - -static void -cleanupLookahead(ParseState* state) -{ - uint32_t i; - for (i = 0; i <= MAX_LOOKAHEAD; i++) - { - ustr_deinit(&state->lookahead[i].value); - ustr_deinit(&state->lookahead[i].comment); - } - -} - -static enum ETokenType -getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status) -{ - enum ETokenType result; - uint32_t i; - - result = state->lookahead[state->lookaheadPosition].type; - - if (tokenValue != NULL) - { - *tokenValue = &state->lookahead[state->lookaheadPosition].value; - } - - if (linenumber != NULL) - { - *linenumber = state->lookahead[state->lookaheadPosition].line; - } - - if (comment != NULL) - { - ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status); - } - - i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1); - state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1); - ustr_setlen(&state->lookahead[i].comment, 0, status); - ustr_setlen(&state->lookahead[i].value, 0, status); - state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status); - - /* printf("getToken, returning %s\n", tokenNames[result]); */ - - return result; -} - -static enum ETokenType -peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status) -{ - uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1); - - if (U_FAILURE(*status)) - { - return TOK_ERROR; - } - - if (lookaheadCount >= MAX_LOOKAHEAD) - { - *status = U_INTERNAL_PROGRAM_ERROR; - return TOK_ERROR; - } - - if (tokenValue != NULL) - { - *tokenValue = &state->lookahead[i].value; - } - - if (linenumber != NULL) - { - *linenumber = state->lookahead[i].line; - } - - if(comment != NULL){ - ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status); - } - - return state->lookahead[i].type; -} - -static void -expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status) -{ - uint32_t line; - - enum ETokenType token = getToken(state, tokenValue, comment, &line, status); - - if (linenumber != NULL) - { - *linenumber = line; - } - - if (U_FAILURE(*status)) - { - return; - } - - if (token != expectedToken) - { - *status = U_INVALID_FORMAT_ERROR; - error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]); - } - else - { - *status = U_ZERO_ERROR; - } -} - -static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status) -{ - struct UString *tokenValue; - char *result; - uint32_t count; - - expect(state, TOK_STRING, &tokenValue, comment, line, status); - - if (U_FAILURE(*status)) - { - return NULL; - } - - count = u_strlen(tokenValue->fChars); - if(!uprv_isInvariantUString(tokenValue->fChars, count)) { - *status = U_INVALID_FORMAT_ERROR; - error(*line, "invariant characters required for table keys, binary data, etc."); - return NULL; - } - - result = static_cast(uprv_malloc(count+1)); - - if (result == NULL) - { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - u_UCharsToChars(tokenValue->fChars, result, count+1); - return result; -} - -static struct SResource * -parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status) -{ - struct SResource *result = NULL; - struct UString *tokenValue; - FileStream *file = NULL; - char filename[256] = { '\0' }; - char cs[128] = { '\0' }; - uint32_t line; - UBool quoted = FALSE; - UCHARBUF *ucbuf=NULL; - UChar32 c = 0; - const char* cp = NULL; - UChar *pTarget = NULL; - UChar *target = NULL; - UChar *targetLimit = NULL; - int32_t size = 0; - - expect(state, TOK_STRING, &tokenValue, NULL, &line, status); - - if(isVerbose()){ - printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - - if (U_FAILURE(*status)) - { - return NULL; - } - /* make the filename including the directory */ - if (state->inputdir != NULL) - { - uprv_strcat(filename, state->inputdir); - - if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) - { - uprv_strcat(filename, U_FILE_SEP_STRING); - } - } - - u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); - - expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); - - if (U_FAILURE(*status)) - { - return NULL; - } - uprv_strcat(filename, cs); - - if(state->omitCollationRules) { - return res_none(); - } - - ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); - - if (U_FAILURE(*status)) { - error(line, "An error occurred while opening the input file %s\n", filename); - return NULL; - } - - /* We allocate more space than actually required - * since the actual size needed for storing UChars - * is not known in UTF-8 byte stream - */ - size = ucbuf_size(ucbuf) + 1; - pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size); - uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); - target = pTarget; - targetLimit = pTarget+size; - - /* read the rules into the buffer */ - while (target < targetLimit) - { - c = ucbuf_getc(ucbuf, status); - if(c == QUOTE) { - quoted = (UBool)!quoted; - } - /* weiv (06/26/2002): adding the following: - * - preserving spaces in commands [...] - * - # comments until the end of line - */ - if (c == STARTCOMMAND && !quoted) - { - /* preserve commands - * closing bracket will be handled by the - * append at the end of the loop - */ - while(c != ENDCOMMAND) { - U_APPEND_CHAR32_ONLY(c, target); - c = ucbuf_getc(ucbuf, status); - } - } - else if (c == HASH && !quoted) { - /* skip comments */ - while(c != CR && c != LF) { - c = ucbuf_getc(ucbuf, status); - } - continue; - } - else if (c == ESCAPE) - { - c = unescape(ucbuf, status); - - if (c == (UChar32)U_ERR) - { - uprv_free(pTarget); - T_FileStream_close(file); - return NULL; - } - } - else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF)) - { - /* ignore spaces carriage returns - * and line feed unless in the form \uXXXX - */ - continue; - } - - /* Append UChar * after dissembling if c > 0xffff*/ - if (c != (UChar32)U_EOF) - { - U_APPEND_CHAR32_ONLY(c, target); - } - else - { - break; - } - } - - /* terminate the string */ - if(target < targetLimit){ - *target = 0x0000; - } - - result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status); - - - ucbuf_close(ucbuf); - uprv_free(pTarget); - T_FileStream_close(file); - - return result; -} - -static struct SResource * -parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status) -{ - struct SResource *result = NULL; - struct UString *tokenValue; - FileStream *file = NULL; - char filename[256] = { '\0' }; - char cs[128] = { '\0' }; - uint32_t line; - UCHARBUF *ucbuf=NULL; - const char* cp = NULL; - UChar *pTarget = NULL; - const UChar *pSource = NULL; - int32_t size = 0; - - expect(state, TOK_STRING, &tokenValue, NULL, &line, status); - - if(isVerbose()){ - printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - - if (U_FAILURE(*status)) - { - return NULL; - } - /* make the filename including the directory */ - if (state->inputdir != NULL) - { - uprv_strcat(filename, state->inputdir); - - if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) - { - uprv_strcat(filename, U_FILE_SEP_STRING); - } - } - - u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); - - expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); - - if (U_FAILURE(*status)) - { - return NULL; - } - uprv_strcat(filename, cs); - - - ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); - - if (U_FAILURE(*status)) { - error(line, "An error occurred while opening the input file %s\n", filename); - return NULL; - } - - /* We allocate more space than actually required - * since the actual size needed for storing UChars - * is not known in UTF-8 byte stream - */ - pSource = ucbuf_getBuffer(ucbuf, &size, status); - pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1)); - uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); - -#if !UCONFIG_NO_TRANSLITERATION - size = utrans_stripRules(pSource, size, pTarget, status); -#else - size = 0; - fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n"); -#endif - result = string_open(state->bundle, tag, pTarget, size, NULL, status); - - ucbuf_close(ucbuf); - uprv_free(pTarget); - T_FileStream_close(file); - - return result; -} -static ArrayResource* dependencyArray = NULL; - -static struct SResource * -parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) -{ - struct SResource *result = NULL; - struct SResource *elem = NULL; - struct UString *tokenValue; - uint32_t line; - char filename[256] = { '\0' }; - char cs[128] = { '\0' }; - - expect(state, TOK_STRING, &tokenValue, NULL, &line, status); - - if(isVerbose()){ - printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - - if (U_FAILURE(*status)) - { - return NULL; - } - /* make the filename including the directory */ - if (state->outputdir != NULL) - { - uprv_strcat(filename, state->outputdir); - - if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR) - { - uprv_strcat(filename, U_FILE_SEP_STRING); - } - } - - u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); - - if (U_FAILURE(*status)) - { - return NULL; - } - uprv_strcat(filename, cs); - if(!T_FileStream_file_exists(filename)){ - if(isStrict()){ - error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); - }else{ - warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); - } - } - if(dependencyArray==NULL){ - dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status); - } - if(tag!=NULL){ - result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); - } - elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status); - - dependencyArray->add(elem); - - if (U_FAILURE(*status)) - { - return NULL; - } - expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); - return result; -} -static struct SResource * -parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) -{ - struct UString *tokenValue; - struct SResource *result = NULL; - -/* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0) - { - return parseUCARules(tag, startline, status); - }*/ - if(isVerbose()){ - printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - expect(state, TOK_STRING, &tokenValue, NULL, NULL, status); - - if (U_SUCCESS(*status)) - { - /* create the string now - tokenValue doesn't survive a call to getToken (and therefore - doesn't survive expect either) */ - - result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); - if(U_SUCCESS(*status) && result) { - expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); - - if (U_FAILURE(*status)) - { - res_close(result); - return NULL; - } - } - } - - return result; -} - -static struct SResource * -parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) -{ - struct UString *tokenValue; - struct SResource *result = NULL; - - expect(state, TOK_STRING, &tokenValue, NULL, NULL, status); - - if(isVerbose()){ - printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - - if (U_SUCCESS(*status)) - { - /* create the string now - tokenValue doesn't survive a call to getToken (and therefore - doesn't survive expect either) */ - - result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); - - expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); - - if (U_FAILURE(*status)) - { - res_close(result); - return NULL; - } - } - - return result; -} - -#if !UCONFIG_NO_COLLATION - -namespace { - -static struct SResource* resLookup(struct SResource* res, const char* key){ - if (res == res_none() || !res->isTable()) { - return NULL; - } - - TableResource *list = static_cast(res); - SResource *current = list->fFirst; - while (current != NULL) { - if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) { - return current; - } - current = current->fNext; - } - return NULL; -} - -class GenrbImporter : public icu::CollationRuleParser::Importer { -public: - GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {} - virtual ~GenrbImporter(); - virtual void getRules( - const char *localeID, const char *collationType, - UnicodeString &rules, - const char *&errorReason, UErrorCode &errorCode); - -private: - const char *inputDir; - const char *outputDir; -}; - -GenrbImporter::~GenrbImporter() {} - -void -GenrbImporter::getRules( - const char *localeID, const char *collationType, - UnicodeString &rules, - const char *& /*errorReason*/, UErrorCode &errorCode) { - CharString filename(localeID, errorCode); - for(int32_t i = 0; i < filename.length(); i++){ - if(filename[i] == '-'){ - filename.data()[i] = '_'; - } - } - filename.append(".txt", errorCode); - if (U_FAILURE(errorCode)) { - return; - } - CharString inputDirBuf; - CharString openFileName; - if(inputDir == NULL) { - const char *filenameBegin = uprv_strrchr(filename.data(), U_FILE_SEP_CHAR); - if (filenameBegin != NULL) { - /* - * When a filename ../../../data/root.txt is specified, - * we presume that the input directory is ../../../data - * This is very important when the resource file includes - * another file, like UCARules.txt or thaidict.brk. - */ - StringPiece dir = filename.toStringPiece(); - const char *filenameLimit = filename.data() + filename.length(); - dir.remove_suffix((int32_t)(filenameLimit - filenameBegin)); - inputDirBuf.append(dir, errorCode); - inputDir = inputDirBuf.data(); - } - }else{ - int32_t dirlen = (int32_t)uprv_strlen(inputDir); - - if((filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')) { - /* - * append the input dir to openFileName if the first char in - * filename is not file separator char and the last char input directory is not '.'. - * This is to support : - * genrb -s. /home/icu/data - * genrb -s. icu/data - * The user cannot mix notations like - * genrb -s. /icu/data --- the absolute path specified. -s redundant - * user should use - * genrb -s. icu/data --- start from CWD and look in icu/data dir - */ - openFileName.append(inputDir, dirlen, errorCode); - if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { - openFileName.append(U_FILE_SEP_CHAR, errorCode); - } - } - } - openFileName.append(filename, errorCode); - if(U_FAILURE(errorCode)) { - return; - } - // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data()); - const char* cp = ""; - LocalUCHARBUFPointer ucbuf( - ucbuf_open(openFileName.data(), &cp, getShowWarning(), TRUE, &errorCode)); - if(errorCode == U_FILE_ACCESS_ERROR) { - fprintf(stderr, "couldn't open file %s\n", openFileName.data()); - return; - } - if (ucbuf.isNull() || U_FAILURE(errorCode)) { - fprintf(stderr, "An error occurred processing file %s. Error: %s\n", openFileName.data(), u_errorName(errorCode)); - return; - } - - /* Parse the data into an SRBRoot */ - LocalPointer data( - parse(ucbuf.getAlias(), inputDir, outputDir, filename.data(), FALSE, FALSE, &errorCode)); - if (U_FAILURE(errorCode)) { - return; - } - - struct SResource *root = data->fRoot; - struct SResource *collations = resLookup(root, "collations"); - if (collations != NULL) { - struct SResource *collation = resLookup(collations, collationType); - if (collation != NULL) { - struct SResource *sequence = resLookup(collation, "Sequence"); - if (sequence != NULL && sequence->isString()) { - // No string pointer aliasing so that we need not hold onto the resource bundle. - StringResource *sr = static_cast(sequence); - rules = sr->fString; - } - } - } -} - -// Quick-and-dirty escaping function. -// Assumes that we are on an ASCII-based platform. -static void -escape(const UChar *s, char *buffer) { - int32_t length = u_strlen(s); - int32_t i = 0; - for (;;) { - UChar32 c; - U16_NEXT(s, i, length, c); - if (c == 0) { - *buffer = 0; - return; - } else if (0x20 <= c && c <= 0x7e) { - // printable ASCII - *buffer++ = (char)c; // assumes ASCII-based platform - } else { - buffer += sprintf(buffer, "\\u%04X", (int)c); - } - } -} - -} // namespace - -#endif // !UCONFIG_NO_COLLATION - -static TableResource * -addCollation(ParseState* state, TableResource *result, const char *collationType, - uint32_t startline, UErrorCode *status) -{ - // TODO: Use LocalPointer for result, or make caller close it when there is a failure. - struct SResource *member = NULL; - struct UString *tokenValue; - struct UString comment; - enum ETokenType token; - char subtag[1024]; - UnicodeString rules; - UBool haveRules = FALSE; - UVersionInfo version; - uint32_t line; - - /* '{' . (name resource)* '}' */ - version[0]=0; version[1]=0; version[2]=0; version[3]=0; - - for (;;) - { - ustr_init(&comment); - token = getToken(state, &tokenValue, &comment, &line, status); - - if (token == TOK_CLOSE_BRACE) - { - break; - } - - if (token != TOK_STRING) - { - res_close(result); - *status = U_INVALID_FORMAT_ERROR; - - if (token == TOK_EOF) - { - error(startline, "unterminated table"); - } - else - { - error(line, "Unexpected token %s", tokenNames[token]); - } - - return NULL; - } - - u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); - - if (U_FAILURE(*status)) - { - res_close(result); - return NULL; - } - - member = parseResource(state, subtag, NULL, status); - - if (U_FAILURE(*status)) - { - res_close(result); - return NULL; - } - if (result == NULL) - { - // Ignore the parsed resources, continue parsing. - } - else if (uprv_strcmp(subtag, "Version") == 0 && member->isString()) - { - StringResource *sr = static_cast(member); - char ver[40]; - int32_t length = sr->length(); - - if (length >= UPRV_LENGTHOF(ver)) - { - length = UPRV_LENGTHOF(ver) - 1; - } - - sr->fString.extract(0, length, ver, UPRV_LENGTHOF(ver), US_INV); - u_versionFromString(version, ver); - - result->add(member, line, *status); - member = NULL; - } - else if(uprv_strcmp(subtag, "%%CollationBin")==0) - { - /* discard duplicate %%CollationBin if any*/ - } - else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString()) - { - StringResource *sr = static_cast(member); - rules = sr->fString; - haveRules = TRUE; - // Defer building the collator until we have seen - // all sub-elements of the collation table, including the Version. - /* in order to achieve smaller data files, we can direct genrb */ - /* to omit collation rules */ - if(!state->omitCollationRules) { - result->add(member, line, *status); - member = NULL; - } - } - else // Just copy non-special items. - { - result->add(member, line, *status); - member = NULL; - } - res_close(member); // TODO: use LocalPointer - if (U_FAILURE(*status)) - { - res_close(result); - return NULL; - } - } - - if (!haveRules) { return result; } - -#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO - warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h"); - (void)collationType; -#else - // CLDR ticket #3949, ICU ticket #8082: - // Do not build collation binary data for for-import-only "private" collation rule strings. - if (uprv_strncmp(collationType, "private-", 8) == 0) { - if(isVerbose()) { - printf("Not building %s~%s collation binary\n", state->filename, collationType); - } - return result; - } - - if(!state->makeBinaryCollation) { - if(isVerbose()) { - printf("Not building %s~%s collation binary\n", state->filename, collationType); - } - return result; - } - UErrorCode intStatus = U_ZERO_ERROR; - UParseError parseError; - uprv_memset(&parseError, 0, sizeof(parseError)); - GenrbImporter importer(state->inputdir, state->outputdir); - const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus); - if(U_FAILURE(intStatus)) { - error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus)); - res_close(result); - return NULL; // TODO: use LocalUResourceBundlePointer for result - } - icu::CollationBuilder builder(base, intStatus); - if(uprv_strncmp(collationType, "search", 6) == 0) { - builder.disableFastLatin(); // build fast-Latin table unless search collator - } - LocalPointer t( - builder.parseAndBuild(rules, version, &importer, &parseError, intStatus)); - if(U_FAILURE(intStatus)) { - const char *reason = builder.getErrorReason(); - if(reason == NULL) { reason = ""; } - error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s", - state->filename, collationType, - (long)parseError.offset, u_errorName(intStatus), reason); - if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) { - // Print pre- and post-context. - char preBuffer[100], postBuffer[100]; - escape(parseError.preContext, preBuffer); - escape(parseError.postContext, postBuffer); - error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer); - } - if(isStrict() || t.isNull()) { - *status = intStatus; - res_close(result); - return NULL; - } - } - icu::LocalMemory buffer; - int32_t capacity = 100000; - uint8_t *dest = buffer.allocateInsteadAndCopy(capacity); - if(dest == NULL) { - fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n", - (long)capacity); - *status = U_MEMORY_ALLOCATION_ERROR; - res_close(result); - return NULL; - } - int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1]; - int32_t totalSize = icu::CollationDataWriter::writeTailoring( - *t, *t->settings, indexes, dest, capacity, intStatus); - if(intStatus == U_BUFFER_OVERFLOW_ERROR) { - intStatus = U_ZERO_ERROR; - capacity = totalSize; - dest = buffer.allocateInsteadAndCopy(capacity); - if(dest == NULL) { - fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n", - (long)capacity); - *status = U_MEMORY_ALLOCATION_ERROR; - res_close(result); - return NULL; - } - totalSize = icu::CollationDataWriter::writeTailoring( - *t, *t->settings, indexes, dest, capacity, intStatus); - } - if(U_FAILURE(intStatus)) { - fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n", - u_errorName(intStatus)); - res_close(result); - return NULL; - } - if(isVerbose()) { - printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType); - icu::CollationInfo::printSizes(totalSize, indexes); - if(t->settings->hasReordering()) { - printf("%s~%s collation reordering ranges:\n", state->filename, collationType); - icu::CollationInfo::printReorderRanges( - *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength); - } -#if 0 // debugging output - } else { - printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType); - icu::CollationInfo::printSizes(totalSize, indexes); -#endif - } - struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status); - result->add(collationBin, line, *status); - if (U_FAILURE(*status)) { - res_close(result); - return NULL; - } -#endif - return result; -} - -static UBool -keepCollationType(const char * /*type*/) { - return TRUE; -} - -static struct SResource * -parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status) -{ - TableResource *result = NULL; - struct SResource *member = NULL; - struct UString *tokenValue; - struct UString comment; - enum ETokenType token; - char subtag[1024], typeKeyword[1024]; - uint32_t line; - - result = table_open(state->bundle, tag, NULL, status); - - if (result == NULL || U_FAILURE(*status)) - { - return NULL; - } - if(isVerbose()){ - printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - if(!newCollation) { - return addCollation(state, result, "(no type)", startline, status); - } - else { - for(;;) { - ustr_init(&comment); - token = getToken(state, &tokenValue, &comment, &line, status); - - if (token == TOK_CLOSE_BRACE) - { - return result; - } - - if (token != TOK_STRING) - { - res_close(result); - *status = U_INVALID_FORMAT_ERROR; - - if (token == TOK_EOF) - { - error(startline, "unterminated table"); - } - else - { - error(line, "Unexpected token %s", tokenNames[token]); - } - - return NULL; - } - - u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); - - if (U_FAILURE(*status)) - { - res_close(result); - return NULL; - } - - if (uprv_strcmp(subtag, "default") == 0) - { - member = parseResource(state, subtag, NULL, status); - - if (U_FAILURE(*status)) - { - res_close(result); - return NULL; - } - - result->add(member, line, *status); - } - else - { - token = peekToken(state, 0, &tokenValue, &line, &comment, status); - /* this probably needs to be refactored or recursively use the parser */ - /* first we assume that our collation table won't have the explicit type */ - /* then, we cannot handle aliases */ - if(token == TOK_OPEN_BRACE) { - token = getToken(state, &tokenValue, &comment, &line, status); - TableResource *collationRes; - if (keepCollationType(subtag)) { - collationRes = table_open(state->bundle, subtag, NULL, status); - } else { - collationRes = NULL; - } - // need to parse the collation data regardless - collationRes = addCollation(state, collationRes, subtag, startline, status); - if (collationRes != NULL) { - result->add(collationRes, startline, *status); - } - } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */ - /* we could have a table too */ - token = peekToken(state, 1, &tokenValue, &line, &comment, status); - u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1); - if(uprv_strcmp(typeKeyword, "alias") == 0) { - member = parseResource(state, subtag, NULL, status); - if (U_FAILURE(*status)) - { - res_close(result); - return NULL; - } - - result->add(member, line, *status); - } else { - res_close(result); - *status = U_INVALID_FORMAT_ERROR; - return NULL; - } - } else { - res_close(result); - *status = U_INVALID_FORMAT_ERROR; - return NULL; - } - } - - /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/ - - /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/ - - if (U_FAILURE(*status)) - { - res_close(result); - return NULL; - } - } - } -} - -/* Necessary, because CollationElements requires the bundle->fRoot member to be present which, - if this weren't special-cased, wouldn't be set until the entire file had been processed. */ -static struct SResource * -realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t startline, UErrorCode *status) -{ - struct SResource *member = NULL; - struct UString *tokenValue=NULL; - struct UString comment; - enum ETokenType token; - char subtag[1024]; - uint32_t line; - UBool readToken = FALSE; - - /* '{' . (name resource)* '}' */ - - if(isVerbose()){ - printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - for (;;) - { - ustr_init(&comment); - token = getToken(state, &tokenValue, &comment, &line, status); - - if (token == TOK_CLOSE_BRACE) - { - if (!readToken) { - warning(startline, "Encountered empty table"); - } - return table; - } - - if (token != TOK_STRING) - { - *status = U_INVALID_FORMAT_ERROR; - - if (token == TOK_EOF) - { - error(startline, "unterminated table"); - } - else - { - error(line, "unexpected token %s", tokenNames[token]); - } - - return NULL; - } - - if(uprv_isInvariantUString(tokenValue->fChars, -1)) { - u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); - } else { - *status = U_INVALID_FORMAT_ERROR; - error(line, "invariant characters required for table keys"); - return NULL; - } - - if (U_FAILURE(*status)) - { - error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status)); - return NULL; - } - - member = parseResource(state, subtag, &comment, status); - - if (member == NULL || U_FAILURE(*status)) - { - error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status)); - return NULL; - } - - table->add(member, line, *status); - - if (U_FAILURE(*status)) - { - error(line, "parse error. Stopped parsing table with %s", u_errorName(*status)); - return NULL; - } - readToken = TRUE; - ustr_deinit(&comment); - } - - /* not reached */ - /* A compiler warning will appear if all paths don't contain a return statement. */ -/* *status = U_INTERNAL_PROGRAM_ERROR; - return NULL;*/ -} - -static struct SResource * -parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) -{ - if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0) - { - return parseCollationElements(state, tag, startline, FALSE, status); - } - if (tag != NULL && uprv_strcmp(tag, "collations") == 0) - { - return parseCollationElements(state, tag, startline, TRUE, status); - } - if(isVerbose()){ - printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - - TableResource *result = table_open(state->bundle, tag, comment, status); - - if (result == NULL || U_FAILURE(*status)) - { - return NULL; - } - return realParseTable(state, result, tag, startline, status); -} - -static struct SResource * -parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) -{ - struct SResource *member = NULL; - struct UString *tokenValue; - struct UString memberComments; - enum ETokenType token; - UBool readToken = FALSE; - - ArrayResource *result = array_open(state->bundle, tag, comment, status); - - if (result == NULL || U_FAILURE(*status)) - { - return NULL; - } - if(isVerbose()){ - printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - - ustr_init(&memberComments); - - /* '{' . resource [','] '}' */ - for (;;) - { - /* reset length */ - ustr_setlen(&memberComments, 0, status); - - /* check for end of array, but don't consume next token unless it really is the end */ - token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status); - - - if (token == TOK_CLOSE_BRACE) - { - getToken(state, NULL, NULL, NULL, status); - if (!readToken) { - warning(startline, "Encountered empty array"); - } - break; - } - - if (token == TOK_EOF) - { - res_close(result); - *status = U_INVALID_FORMAT_ERROR; - error(startline, "unterminated array"); - return NULL; - } - - /* string arrays are a special case */ - if (token == TOK_STRING) - { - getToken(state, &tokenValue, &memberComments, NULL, status); - member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status); - } - else - { - member = parseResource(state, NULL, &memberComments, status); - } - - if (member == NULL || U_FAILURE(*status)) - { - res_close(result); - return NULL; - } - - result->add(member); - - /* eat optional comma if present */ - token = peekToken(state, 0, NULL, NULL, NULL, status); - - if (token == TOK_COMMA) - { - getToken(state, NULL, NULL, NULL, status); - } - - if (U_FAILURE(*status)) - { - res_close(result); - return NULL; - } - readToken = TRUE; - } - - ustr_deinit(&memberComments); - return result; -} - -static struct SResource * -parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) -{ - enum ETokenType token; - char *string; - int32_t value; - UBool readToken = FALSE; - char *stopstring; - uint32_t len; - struct UString memberComments; - - IntVectorResource *result = intvector_open(state->bundle, tag, comment, status); - - if (result == NULL || U_FAILURE(*status)) - { - return NULL; - } - - if(isVerbose()){ - printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - ustr_init(&memberComments); - /* '{' . string [','] '}' */ - for (;;) - { - ustr_setlen(&memberComments, 0, status); - - /* check for end of array, but don't consume next token unless it really is the end */ - token = peekToken(state, 0, NULL, NULL,&memberComments, status); - - if (token == TOK_CLOSE_BRACE) - { - /* it's the end, consume the close brace */ - getToken(state, NULL, NULL, NULL, status); - if (!readToken) { - warning(startline, "Encountered empty int vector"); - } - ustr_deinit(&memberComments); - return result; - } - - string = getInvariantString(state, NULL, NULL, status); - - if (U_FAILURE(*status)) - { - res_close(result); - return NULL; - } - - /* For handling illegal char in the Intvector */ - value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/ - len=(uint32_t)(stopstring-string); - - if(len==uprv_strlen(string)) - { - result->add(value, *status); - uprv_free(string); - token = peekToken(state, 0, NULL, NULL, NULL, status); - } - else - { - uprv_free(string); - *status=U_INVALID_CHAR_FOUND; - } - - if (U_FAILURE(*status)) - { - res_close(result); - return NULL; - } - - /* the comma is optional (even though it is required to prevent the reader from concatenating - consecutive entries) so that a missing comma on the last entry isn't an error */ - if (token == TOK_COMMA) - { - getToken(state, NULL, NULL, NULL, status); - } - readToken = TRUE; - } - - /* not reached */ - /* A compiler warning will appear if all paths don't contain a return statement. */ -/* intvector_close(result, status); - *status = U_INTERNAL_PROGRAM_ERROR; - return NULL;*/ -} - -static struct SResource * -parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) -{ - uint32_t line; - LocalMemory string(getInvariantString(state, &line, NULL, status)); - if (string.isNull() || U_FAILURE(*status)) - { - return NULL; - } - - expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); - if (U_FAILURE(*status)) - { - return NULL; - } - - if(isVerbose()){ - printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - - uint32_t count = (uint32_t)uprv_strlen(string.getAlias()); - if (count > 0){ - if((count % 2)==0){ - LocalMemory value; - if (value.allocateInsteadAndCopy(count) == NULL) - { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - char toConv[3] = {'\0', '\0', '\0'}; - for (uint32_t i = 0; i < count; i += 2) - { - toConv[0] = string[i]; - toConv[1] = string[i + 1]; - - char *stopstring; - value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16); - uint32_t len=(uint32_t)(stopstring-toConv); - - if(len!=2) - { - *status=U_INVALID_CHAR_FOUND; - return NULL; - } - } - - return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status); - } - else - { - *status = U_INVALID_CHAR_FOUND; - error(line, "Encountered invalid binary value (length is odd)"); - return NULL; - } - } - else - { - warning(startline, "Encountered empty binary value"); - return bin_open(state->bundle, tag, 0, NULL, "", comment, status); - } -} - -static struct SResource * -parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) -{ - struct SResource *result = NULL; - int32_t value; - char *string; - char *stopstring; - uint32_t len; - - string = getInvariantString(state, NULL, NULL, status); - - if (string == NULL || U_FAILURE(*status)) - { - return NULL; - } - - expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); - - if (U_FAILURE(*status)) - { - uprv_free(string); - return NULL; - } - - if(isVerbose()){ - printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - - if (uprv_strlen(string) <= 0) - { - warning(startline, "Encountered empty integer. Default value is 0."); - } - - /* Allow integer support for hexdecimal, octal digit and decimal*/ - /* and handle illegal char in the integer*/ - value = uprv_strtoul(string, &stopstring, 0); - len=(uint32_t)(stopstring-string); - if(len==uprv_strlen(string)) - { - result = int_open(state->bundle, tag, value, comment, status); - } - else - { - *status=U_INVALID_CHAR_FOUND; - } - uprv_free(string); - - return result; -} - -static struct SResource * -parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) -{ - uint32_t line; - LocalMemory filename(getInvariantString(state, &line, NULL, status)); - if (U_FAILURE(*status)) - { - return NULL; - } - - expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); - - if (U_FAILURE(*status)) - { - return NULL; - } - - if(isVerbose()){ - printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - - /* Open the input file for reading */ - CharString fullname; - if (state->inputdir != NULL) { - fullname.append(state->inputdir, *status); - } - fullname.appendPathPart(filename.getAlias(), *status); - if (U_FAILURE(*status)) { - return NULL; - } - - FileStream *file = T_FileStream_open(fullname.data(), "rb"); - if (file == NULL) - { - error(line, "couldn't open input file %s", filename.getAlias()); - *status = U_FILE_ACCESS_ERROR; - return NULL; - } - - int32_t len = T_FileStream_size(file); - LocalMemory data; - if(data.allocateInsteadAndCopy(len) == NULL) - { - *status = U_MEMORY_ALLOCATION_ERROR; - T_FileStream_close (file); - return NULL; - } - - /* int32_t numRead = */ T_FileStream_read(file, data.getAlias(), len); - T_FileStream_close (file); - - return bin_open(state->bundle, tag, len, data.getAlias(), fullname.data(), comment, status); -} - -static struct SResource * -parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) -{ - struct SResource *result; - int32_t len=0; - char *filename; - uint32_t line; - UChar *pTarget = NULL; - - UCHARBUF *ucbuf; - char *fullname = NULL; - int32_t count = 0; - const char* cp = NULL; - const UChar* uBuffer = NULL; - - filename = getInvariantString(state, &line, NULL, status); - count = (int32_t)uprv_strlen(filename); - - if (U_FAILURE(*status)) - { - return NULL; - } - - expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); - - if (U_FAILURE(*status)) - { - uprv_free(filename); - return NULL; - } - - if(isVerbose()){ - printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - - fullname = (char *) uprv_malloc(state->inputdirLength + count + 2); - /* test for NULL */ - if(fullname == NULL) - { - *status = U_MEMORY_ALLOCATION_ERROR; - uprv_free(filename); - return NULL; - } - - if(state->inputdir!=NULL){ - if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) - { - - uprv_strcpy(fullname, state->inputdir); - - fullname[state->inputdirLength] = U_FILE_SEP_CHAR; - fullname[state->inputdirLength + 1] = '\0'; - - uprv_strcat(fullname, filename); - } - else - { - uprv_strcpy(fullname, state->inputdir); - uprv_strcat(fullname, filename); - } - }else{ - uprv_strcpy(fullname,filename); - } - - ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status); - - if (U_FAILURE(*status)) { - error(line, "couldn't open input file %s\n", filename); - return NULL; - } - - uBuffer = ucbuf_getBuffer(ucbuf,&len,status); - result = string_open(state->bundle, tag, uBuffer, len, comment, status); - - ucbuf_close(ucbuf); - - uprv_free(pTarget); - - uprv_free(filename); - uprv_free(fullname); - - return result; -} - - - - - -U_STRING_DECL(k_type_string, "string", 6); -U_STRING_DECL(k_type_binary, "binary", 6); -U_STRING_DECL(k_type_bin, "bin", 3); -U_STRING_DECL(k_type_table, "table", 5); -U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17); -U_STRING_DECL(k_type_int, "int", 3); -U_STRING_DECL(k_type_integer, "integer", 7); -U_STRING_DECL(k_type_array, "array", 5); -U_STRING_DECL(k_type_alias, "alias", 5); -U_STRING_DECL(k_type_intvector, "intvector", 9); -U_STRING_DECL(k_type_import, "import", 6); -U_STRING_DECL(k_type_include, "include", 7); - -/* Various non-standard processing plugins that create one or more special resources. */ -U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18); -U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18); -U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23); -U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19); - -typedef enum EResourceType -{ - RESTYPE_UNKNOWN, - RESTYPE_STRING, - RESTYPE_BINARY, - RESTYPE_TABLE, - RESTYPE_TABLE_NO_FALLBACK, - RESTYPE_INTEGER, - RESTYPE_ARRAY, - RESTYPE_ALIAS, - RESTYPE_INTVECTOR, - RESTYPE_IMPORT, - RESTYPE_INCLUDE, - RESTYPE_PROCESS_UCA_RULES, - RESTYPE_PROCESS_COLLATION, - RESTYPE_PROCESS_TRANSLITERATOR, - RESTYPE_PROCESS_DEPENDENCY, - RESTYPE_RESERVED -} EResourceType; - -static struct { - const char *nameChars; /* only used for debugging */ - const UChar *nameUChars; - ParseResourceFunction *parseFunction; -} gResourceTypes[] = { - {"Unknown", NULL, NULL}, - {"string", k_type_string, parseString}, - {"binary", k_type_binary, parseBinary}, - {"table", k_type_table, parseTable}, - {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */ - {"integer", k_type_integer, parseInteger}, - {"array", k_type_array, parseArray}, - {"alias", k_type_alias, parseAlias}, - {"intvector", k_type_intvector, parseIntVector}, - {"import", k_type_import, parseImport}, - {"include", k_type_include, parseInclude}, - {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules}, - {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */}, - {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator}, - {"process(dependency)", k_type_plugin_dependency, parseDependency}, - {"reserved", NULL, NULL} -}; - -void initParser() -{ - U_STRING_INIT(k_type_string, "string", 6); - U_STRING_INIT(k_type_binary, "binary", 6); - U_STRING_INIT(k_type_bin, "bin", 3); - U_STRING_INIT(k_type_table, "table", 5); - U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17); - U_STRING_INIT(k_type_int, "int", 3); - U_STRING_INIT(k_type_integer, "integer", 7); - U_STRING_INIT(k_type_array, "array", 5); - U_STRING_INIT(k_type_alias, "alias", 5); - U_STRING_INIT(k_type_intvector, "intvector", 9); - U_STRING_INIT(k_type_import, "import", 6); - U_STRING_INIT(k_type_include, "include", 7); - - U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18); - U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18); - U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23); - U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19); -} - -static inline UBool isTable(enum EResourceType type) { - return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK); -} - -static enum EResourceType -parseResourceType(ParseState* state, UErrorCode *status) -{ - struct UString *tokenValue; - struct UString comment; - enum EResourceType result = RESTYPE_UNKNOWN; - uint32_t line=0; - ustr_init(&comment); - expect(state, TOK_STRING, &tokenValue, &comment, &line, status); - - if (U_FAILURE(*status)) - { - return RESTYPE_UNKNOWN; - } - - *status = U_ZERO_ERROR; - - /* Search for normal types */ - result=RESTYPE_UNKNOWN; - while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) { - if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) { - break; - } - } - /* Now search for the aliases */ - if (u_strcmp(tokenValue->fChars, k_type_int) == 0) { - result = RESTYPE_INTEGER; - } - else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) { - result = RESTYPE_BINARY; - } - else if (result == RESTYPE_RESERVED) { - char tokenBuffer[1024]; - u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer)); - tokenBuffer[sizeof(tokenBuffer) - 1] = 0; - *status = U_INVALID_FORMAT_ERROR; - error(line, "unknown resource type '%s'", tokenBuffer); - } - - return result; -} - -/* parse a non-top-level resource */ -static struct SResource * -parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status) -{ - enum ETokenType token; - enum EResourceType resType = RESTYPE_UNKNOWN; - ParseResourceFunction *parseFunction = NULL; - struct UString *tokenValue; - uint32_t startline; - uint32_t line; - - - token = getToken(state, &tokenValue, NULL, &startline, status); - - if(isVerbose()){ - printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); - } - - /* name . [ ':' type ] '{' resource '}' */ - /* This function parses from the colon onwards. If the colon is present, parse the - type then try to parse a resource of that type. If there is no explicit type, - work it out using the lookahead tokens. */ - switch (token) - { - case TOK_EOF: - *status = U_INVALID_FORMAT_ERROR; - error(startline, "Unexpected EOF encountered"); - return NULL; - - case TOK_ERROR: - *status = U_INVALID_FORMAT_ERROR; - return NULL; - - case TOK_COLON: - resType = parseResourceType(state, status); - expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status); - - if (U_FAILURE(*status)) - { - return NULL; - } - - break; - - case TOK_OPEN_BRACE: - break; - - default: - *status = U_INVALID_FORMAT_ERROR; - error(startline, "syntax error while reading a resource, expected '{' or ':'"); - return NULL; - } - - - if (resType == RESTYPE_UNKNOWN) - { - /* No explicit type, so try to work it out. At this point, we've read the first '{'. - We could have any of the following: - { { => array (nested) - { :/} => array - { string , => string array - - { string { => table - - { string :/{ => table - { string } => string - */ - - token = peekToken(state, 0, NULL, &line, NULL,status); - - if (U_FAILURE(*status)) - { - return NULL; - } - - if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE ) - { - resType = RESTYPE_ARRAY; - } - else if (token == TOK_STRING) - { - token = peekToken(state, 1, NULL, &line, NULL, status); - - if (U_FAILURE(*status)) - { - return NULL; - } - - switch (token) - { - case TOK_COMMA: resType = RESTYPE_ARRAY; break; - case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break; - case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break; - case TOK_COLON: resType = RESTYPE_TABLE; break; - default: - *status = U_INVALID_FORMAT_ERROR; - error(line, "Unexpected token after string, expected ',', '{' or '}'"); - return NULL; - } - } - else - { - *status = U_INVALID_FORMAT_ERROR; - error(line, "Unexpected token after '{'"); - return NULL; - } - - /* printf("Type guessed as %s\n", resourceNames[resType]); */ - } else if(resType == RESTYPE_TABLE_NO_FALLBACK) { - *status = U_INVALID_FORMAT_ERROR; - error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars); - return NULL; - } - - - /* We should now know what we need to parse next, so call the appropriate parser - function and return. */ - parseFunction = gResourceTypes[resType].parseFunction; - if (parseFunction != NULL) { - return parseFunction(state, tag, startline, comment, status); - } - else { - *status = U_INTERNAL_PROGRAM_ERROR; - error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars); - } - - return NULL; -} - -/* parse the top-level resource */ -struct SRBRoot * -parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename, - UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status) -{ - struct UString *tokenValue; - struct UString comment; - uint32_t line; - enum EResourceType bundleType; - enum ETokenType token; - ParseState state; - uint32_t i; - - - for (i = 0; i < MAX_LOOKAHEAD + 1; i++) - { - ustr_init(&state.lookahead[i].value); - ustr_init(&state.lookahead[i].comment); - } - - initLookahead(&state, buf, status); - - state.inputdir = inputDir; - state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0; - state.outputdir = outputDir; - state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0; - state.filename = filename; - state.makeBinaryCollation = makeBinaryCollation; - state.omitCollationRules = omitCollationRules; - - ustr_init(&comment); - expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status); - - state.bundle = new SRBRoot(&comment, FALSE, *status); - - if (state.bundle == NULL || U_FAILURE(*status)) - { - return NULL; - } - - - state.bundle->setLocale(tokenValue->fChars, *status); - - /* The following code is to make Empty bundle work no matter with :table specifer or not */ - token = getToken(&state, NULL, NULL, &line, status); - if(token==TOK_COLON) { - *status=U_ZERO_ERROR; - bundleType=parseResourceType(&state, status); - - if(isTable(bundleType)) - { - expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status); - } - else - { - *status=U_PARSE_ERROR; - error(line, "parse error. Stopped parsing with %s", u_errorName(*status)); - } - } - else - { - /* not a colon */ - if(token==TOK_OPEN_BRACE) - { - *status=U_ZERO_ERROR; - bundleType=RESTYPE_TABLE; - } - else - { - /* neither colon nor open brace */ - *status=U_PARSE_ERROR; - bundleType=RESTYPE_UNKNOWN; - error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status)); - } - } - - if (U_FAILURE(*status)) - { - delete state.bundle; - return NULL; - } - - if(bundleType==RESTYPE_TABLE_NO_FALLBACK) { - /* - * Parse a top-level table with the table(nofallback) declaration. - * This is the same as a regular table, but also sets the - * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] . - */ - state.bundle->fNoFallback=TRUE; - } - /* top-level tables need not handle special table names like "collations" */ - assert(!state.bundle->fIsPoolBundle); - assert(state.bundle->fRoot->fType == URES_TABLE); - TableResource *rootTable = static_cast(state.bundle->fRoot); - realParseTable(&state, rootTable, NULL, line, status); - if(dependencyArray!=NULL){ - rootTable->add(dependencyArray, 0, *status); - dependencyArray = NULL; - } - if (U_FAILURE(*status)) - { - delete state.bundle; - res_close(dependencyArray); - return NULL; - } - - if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF) - { - warning(line, "extraneous text after resource bundle (perhaps unmatched braces)"); - if(isStrict()){ - *status = U_INVALID_FORMAT_ERROR; - return NULL; - } - } - - cleanupLookahead(&state); - ustr_deinit(&comment); - return state.bundle; -} -- cgit v1.2.3