From da736d8259331a8ef13bf4bbb10bbb8a5c0e5299 Mon Sep 17 00:00:00 2001 From: Florian Dold Date: Tue, 13 Aug 2019 12:29:07 +0200 Subject: remove node/v8 from source tree --- .../deps/icu-small/source/tools/toolutil/ucm.cpp | 1195 -------------------- 1 file changed, 1195 deletions(-) delete mode 100644 deps/node/deps/icu-small/source/tools/toolutil/ucm.cpp (limited to 'deps/node/deps/icu-small/source/tools/toolutil/ucm.cpp') diff --git a/deps/node/deps/icu-small/source/tools/toolutil/ucm.cpp b/deps/node/deps/icu-small/source/tools/toolutil/ucm.cpp deleted file mode 100644 index 28c3f3f4..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/ucm.cpp +++ /dev/null @@ -1,1195 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2003-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ucm.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2003jun20 -* created by: Markus W. Scherer -* -* This file reads a .ucm file, stores its mappings and sorts them. -* It implements handling of Unicode conversion mappings from .ucm files -* for makeconv, canonucm, rptp2ucm, etc. -* -* Unicode code point sequences with a length of more than 1, -* as well as byte sequences with more than 4 bytes or more than one complete -* character sequence are handled to support m:n mappings. -*/ - -#include "unicode/utypes.h" -#include "unicode/ustring.h" -#include "cstring.h" -#include "cmemory.h" -#include "filestrm.h" -#include "uarrsort.h" -#include "ucnvmbcs.h" -#include "ucnv_bld.h" -#include "ucnv_ext.h" -#include "uparse.h" -#include "ucm.h" -#include - -#if !UCONFIG_NO_CONVERSION - -/* -------------------------------------------------------------------------- */ - -static void -printMapping(UCMapping *m, UChar32 *codePoints, uint8_t *bytes, FILE *f) { - int32_t j; - - for(j=0; juLen; ++j) { - fprintf(f, "", (long)codePoints[j]); - } - - fputc(' ', f); - - for(j=0; jbLen; ++j) { - fprintf(f, "\\x%02X", bytes[j]); - } - - if(m->f>=0) { - fprintf(f, " |%u\n", m->f); - } else { - fputs("\n", f); - } -} - -U_CAPI void U_EXPORT2 -ucm_printMapping(UCMTable *table, UCMapping *m, FILE *f) { - printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), f); -} - -U_CAPI void U_EXPORT2 -ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode) { - UCMapping *m; - int32_t i, length; - - m=table->mappings; - length=table->mappingsLength; - if(byUnicode) { - for(i=0; ireverseMap; - for(i=0; iuLen==1 && r->uLen==1) { - /* compare two single code points */ - return l->u-r->u; - } - - /* get pointers to the code point sequences */ - lu=UCM_GET_CODE_POINTS(lTable, l); - ru=UCM_GET_CODE_POINTS(rTable, r); - - /* get the minimum length */ - if(l->uLen<=r->uLen) { - length=l->uLen; - } else { - length=r->uLen; - } - - /* compare the code points */ - for(i=0; iuLen-r->uLen; -} - -static int32_t -compareBytes(UCMTable *lTable, const UCMapping *l, - UCMTable *rTable, const UCMapping *r, - UBool lexical) { - const uint8_t *lb, *rb; - int32_t result, i, length; - - /* - * A lexical comparison is used for sorting in the builder, to allow - * an efficient search for a byte sequence that could be a prefix - * of a previously entered byte sequence. - * - * Comparing by lengths first is for compatibility with old .ucm tools - * like canonucm and rptp2ucm. - */ - if(lexical) { - /* get the minimum length and continue */ - if(l->bLen<=r->bLen) { - length=l->bLen; - } else { - length=r->bLen; - } - } else { - /* compare lengths first */ - result=l->bLen-r->bLen; - if(result!=0) { - return result; - } else { - length=l->bLen; - } - } - - /* get pointers to the byte sequences */ - lb=UCM_GET_BYTES(lTable, l); - rb=UCM_GET_BYTES(rTable, r); - - /* compare the bytes */ - for(i=0; ibLen-r->bLen; -} - -/* compare UCMappings for sorting */ -static int32_t -compareMappings(UCMTable *lTable, const UCMapping *l, - UCMTable *rTable, const UCMapping *r, - UBool uFirst) { - int32_t result; - - /* choose which side to compare first */ - if(uFirst) { - /* Unicode then bytes */ - result=compareUnicode(lTable, l, rTable, r); - if(result==0) { - result=compareBytes(lTable, l, rTable, r, FALSE); /* not lexically, like canonucm */ - } - } else { - /* bytes then Unicode */ - result=compareBytes(lTable, l, rTable, r, TRUE); /* lexically, for builder */ - if(result==0) { - result=compareUnicode(lTable, l, rTable, r); - } - } - - if(result!=0) { - return result; - } - - /* compare the flags */ - return l->f-r->f; -} -U_CDECL_BEGIN -/* sorting by Unicode first sorts mappings directly */ -static int32_t U_CALLCONV -compareMappingsUnicodeFirst(const void *context, const void *left, const void *right) { - return compareMappings( - (UCMTable *)context, (const UCMapping *)left, - (UCMTable *)context, (const UCMapping *)right, TRUE); -} - -/* sorting by bytes first sorts the reverseMap; use indirection to mappings */ -static int32_t U_CALLCONV -compareMappingsBytesFirst(const void *context, const void *left, const void *right) { - UCMTable *table=(UCMTable *)context; - int32_t l=*(const int32_t *)left, r=*(const int32_t *)right; - return compareMappings( - table, table->mappings+l, - table, table->mappings+r, FALSE); -} -U_CDECL_END - -U_CAPI void U_EXPORT2 -ucm_sortTable(UCMTable *t) { - UErrorCode errorCode; - int32_t i; - - if(t->isSorted) { - return; - } - - errorCode=U_ZERO_ERROR; - - /* 1. sort by Unicode first */ - uprv_sortArray(t->mappings, t->mappingsLength, sizeof(UCMapping), - compareMappingsUnicodeFirst, t, - FALSE, &errorCode); - - /* build the reverseMap */ - if(t->reverseMap==NULL) { - /* - * allocate mappingsCapacity instead of mappingsLength so that - * if mappings are added, the reverseMap need not be - * reallocated each time - * (see ucm_moveMappings() and ucm_addMapping()) - */ - t->reverseMap=(int32_t *)uprv_malloc(t->mappingsCapacity*sizeof(int32_t)); - if(t->reverseMap==NULL) { - fprintf(stderr, "ucm error: unable to allocate reverseMap\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - } - for(i=0; imappingsLength; ++i) { - t->reverseMap[i]=i; - } - - /* 2. sort reverseMap by mappings bytes first */ - uprv_sortArray(t->reverseMap, t->mappingsLength, sizeof(int32_t), - compareMappingsBytesFirst, t, - FALSE, &errorCode); - - if(U_FAILURE(errorCode)) { - fprintf(stderr, "ucm error: sortTable()/uprv_sortArray() fails - %s\n", - u_errorName(errorCode)); - exit(errorCode); - } - - t->isSorted=TRUE; -} - -/* - * remove mappings with their move flag set from the base table - * and move some of them (with UCM_MOVE_TO_EXT) to the extension table - */ -U_CAPI void U_EXPORT2 -ucm_moveMappings(UCMTable *base, UCMTable *ext) { - UCMapping *mb, *mbLimit; - int8_t flag; - - mb=base->mappings; - mbLimit=mb+base->mappingsLength; - - while(mbmoveFlag; - if(flag!=0) { - /* reset the move flag */ - mb->moveFlag=0; - - if(ext!=NULL && (flag&UCM_MOVE_TO_EXT)) { - /* add the mapping to the extension table */ - ucm_addMapping(ext, mb, UCM_GET_CODE_POINTS(base, mb), UCM_GET_BYTES(base, mb)); - } - - /* remove this mapping: move the last base mapping down and overwrite the current one */ - if(mb<(mbLimit-1)) { - uprv_memcpy(mb, mbLimit-1, sizeof(UCMapping)); - } - --mbLimit; - --base->mappingsLength; - base->isSorted=FALSE; - } else { - ++mb; - } - } -} - -enum { - NEEDS_MOVE=1, - HAS_ERRORS=2 -}; - -static uint8_t -checkBaseExtUnicode(UCMStates *baseStates, UCMTable *base, UCMTable *ext, - UBool moveToExt, UBool intersectBase) { - (void)baseStates; - - UCMapping *mb, *me, *mbLimit, *meLimit; - int32_t cmp; - uint8_t result; - - mb=base->mappings; - mbLimit=mb+base->mappingsLength; - - me=ext->mappings; - meLimit=me+ext->mappingsLength; - - result=0; - - for(;;) { - /* skip irrelevant mappings on both sides */ - for(;;) { - if(mb==mbLimit) { - return result; - } - - if((0<=mb->f && mb->f<=2) || mb->f==4) { - break; - } - - ++mb; - } - - for(;;) { - if(me==meLimit) { - return result; - } - - if((0<=me->f && me->f<=2) || me->f==4) { - break; - } - - ++me; - } - - /* compare the base and extension mappings */ - cmp=compareUnicode(base, mb, ext, me); - if(cmp<0) { - if(intersectBase && (intersectBase!=2 || mb->bLen>1)) { - /* - * mapping in base but not in ext, move it - * - * if ext is DBCS, move DBCS mappings here - * and check SBCS ones for Unicode prefix below - */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - - /* does mb map from an input sequence that is a prefix of me's? */ - } else if( mb->uLenuLen && - 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) - ) { - if(moveToExt) { - /* mark this mapping to be moved to the extension table */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - } else { - fprintf(stderr, - "ucm error: the base table contains a mapping whose input sequence\n" - " is a prefix of the input sequence of an extension mapping\n"); - ucm_printMapping(base, mb, stderr); - ucm_printMapping(ext, me, stderr); - result|=HAS_ERRORS; - } - } - - ++mb; - } else if(cmp==0) { - /* - * same output: remove the extension mapping, - * otherwise treat as an error - */ - if( mb->f==me->f && mb->bLen==me->bLen && - 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) - ) { - me->moveFlag|=UCM_REMOVE_MAPPING; - result|=NEEDS_MOVE; - } else if(intersectBase) { - /* mapping in base but not in ext, move it */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - } else { - fprintf(stderr, - "ucm error: the base table contains a mapping whose input sequence\n" - " is the same as the input sequence of an extension mapping\n" - " but it maps differently\n"); - ucm_printMapping(base, mb, stderr); - ucm_printMapping(ext, me, stderr); - result|=HAS_ERRORS; - } - - ++mb; - } else /* cmp>0 */ { - ++me; - } - } -} - -static uint8_t -checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext, - UBool moveToExt, UBool intersectBase) { - UCMapping *mb, *me; - int32_t *baseMap, *extMap; - int32_t b, e, bLimit, eLimit, cmp; - uint8_t result; - UBool isSISO; - - baseMap=base->reverseMap; - extMap=ext->reverseMap; - - b=e=0; - bLimit=base->mappingsLength; - eLimit=ext->mappingsLength; - - result=0; - - isSISO=(UBool)(baseStates->outputType==MBCS_OUTPUT_2_SISO); - - for(;;) { - /* skip irrelevant mappings on both sides */ - for(;; ++b) { - if(b==bLimit) { - return result; - } - mb=base->mappings+baseMap[b]; - - if(intersectBase==2 && mb->bLen==1) { - /* - * comparing a base against a DBCS extension: - * leave SBCS base mappings alone - */ - continue; - } - - if(mb->f==0 || mb->f==3) { - break; - } - } - - for(;;) { - if(e==eLimit) { - return result; - } - me=ext->mappings+extMap[e]; - - if(me->f==0 || me->f==3) { - break; - } - - ++e; - } - - /* compare the base and extension mappings */ - cmp=compareBytes(base, mb, ext, me, TRUE); - if(cmp<0) { - if(intersectBase) { - /* mapping in base but not in ext, move it */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - - /* - * does mb map from an input sequence that is a prefix of me's? - * for SI/SO tables, a single byte is never a prefix because it - * occurs in a separate single-byte state - */ - } else if( mb->bLenbLen && - (!isSISO || mb->bLen>1) && - 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) - ) { - if(moveToExt) { - /* mark this mapping to be moved to the extension table */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - } else { - fprintf(stderr, - "ucm error: the base table contains a mapping whose input sequence\n" - " is a prefix of the input sequence of an extension mapping\n"); - ucm_printMapping(base, mb, stderr); - ucm_printMapping(ext, me, stderr); - result|=HAS_ERRORS; - } - } - - ++b; - } else if(cmp==0) { - /* - * same output: remove the extension mapping, - * otherwise treat as an error - */ - if( mb->f==me->f && mb->uLen==me->uLen && - 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) - ) { - me->moveFlag|=UCM_REMOVE_MAPPING; - result|=NEEDS_MOVE; - } else if(intersectBase) { - /* mapping in base but not in ext, move it */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - } else { - fprintf(stderr, - "ucm error: the base table contains a mapping whose input sequence\n" - " is the same as the input sequence of an extension mapping\n" - " but it maps differently\n"); - ucm_printMapping(base, mb, stderr); - ucm_printMapping(ext, me, stderr); - result|=HAS_ERRORS; - } - - ++b; - } else /* cmp>0 */ { - ++e; - } - } -} - -U_CAPI UBool U_EXPORT2 -ucm_checkValidity(UCMTable *table, UCMStates *baseStates) { - UCMapping *m, *mLimit; - int32_t count; - UBool isOK; - - m=table->mappings; - mLimit=m+table->mappingsLength; - isOK=TRUE; - - while(mbLen); - if(count<1) { - ucm_printMapping(table, m, stderr); - isOK=FALSE; - } - ++m; - } - - return isOK; -} - -U_CAPI UBool U_EXPORT2 -ucm_checkBaseExt(UCMStates *baseStates, - UCMTable *base, UCMTable *ext, UCMTable *moveTarget, - UBool intersectBase) { - uint8_t result; - - /* if we have an extension table, we must always use precision flags */ - if(base->flagsType&UCM_FLAGS_IMPLICIT) { - fprintf(stderr, "ucm error: the base table contains mappings without precision flags\n"); - return FALSE; - } - if(ext->flagsType&UCM_FLAGS_IMPLICIT) { - fprintf(stderr, "ucm error: extension table contains mappings without precision flags\n"); - return FALSE; - } - - /* checking requires both tables to be sorted */ - ucm_sortTable(base); - ucm_sortTable(ext); - - /* check */ - result= - checkBaseExtUnicode(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase)| - checkBaseExtBytes(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase); - - if(result&HAS_ERRORS) { - return FALSE; - } - - if(result&NEEDS_MOVE) { - ucm_moveMappings(ext, NULL); - ucm_moveMappings(base, moveTarget); - ucm_sortTable(base); - ucm_sortTable(ext); - if(moveTarget!=NULL) { - ucm_sortTable(moveTarget); - } - } - - return TRUE; -} - -/* merge tables for rptp2ucm ------------------------------------------------ */ - -U_CAPI void U_EXPORT2 -ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable, - const uint8_t *subchar, int32_t subcharLength, - uint8_t subchar1) { - UCMapping *fromUMapping, *toUMapping; - int32_t fromUIndex, toUIndex, fromUTop, toUTop, cmp; - - ucm_sortTable(fromUTable); - ucm_sortTable(toUTable); - - fromUMapping=fromUTable->mappings; - toUMapping=toUTable->mappings; - - fromUTop=fromUTable->mappingsLength; - toUTop=toUTable->mappingsLength; - - fromUIndex=toUIndex=0; - - while(fromUIndexcodepage - */ - if( (fromUMapping->bLen==subcharLength && - 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) || - (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) - ) { - fromUMapping->f=2; /* SUB mapping */ - } else { - fromUMapping->f=1; /* normal fallback */ - } - - ++fromUMapping; - ++fromUIndex; - } else { - /* - * the toU mapping does not have a fromU counterpart: - * (reverse) fallback codepage->Unicode, copy it to the fromU table - */ - - /* ignore reverse fallbacks to Unicode SUB */ - if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { - toUMapping->f=3; /* reverse fallback */ - ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); - - /* the table may have been reallocated */ - fromUMapping=fromUTable->mappings+fromUIndex; - } - - ++toUMapping; - ++toUIndex; - } - } - - /* either one or both tables are exhausted */ - while(fromUIndexbLen==subcharLength && - 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) || - (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) - ) { - fromUMapping->f=2; /* SUB mapping */ - } else { - fromUMapping->f=1; /* normal fallback */ - } - - ++fromUMapping; - ++fromUIndex; - } - - while(toUIndexuLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { - toUMapping->f=3; /* reverse fallback */ - ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); - } - - ++toUMapping; - ++toUIndex; - } - - fromUTable->isSorted=FALSE; -} - -/* separate extension mappings out of base table for rptp2ucm --------------- */ - -U_CAPI UBool U_EXPORT2 -ucm_separateMappings(UCMFile *ucm, UBool isSISO) { - UCMTable *table; - UCMapping *m, *mLimit; - int32_t type; - UBool needsMove, isOK; - - table=ucm->base; - m=table->mappings; - mLimit=m+table->mappingsLength; - - needsMove=FALSE; - isOK=TRUE; - - for(; mbLen==1 && (m->b.bytes[0]==0xe || m->b.bytes[0]==0xf)) { - fprintf(stderr, "warning: removing illegal mapping from an SI/SO-stateful table\n"); - ucm_printMapping(table, m, stderr); - m->moveFlag|=UCM_REMOVE_MAPPING; - needsMove=TRUE; - continue; - } - - type=ucm_mappingType( - &ucm->states, m, - UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m)); - if(type<0) { - /* illegal byte sequence */ - printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), stderr); - isOK=FALSE; - } else if(type>0) { - m->moveFlag|=UCM_MOVE_TO_EXT; - needsMove=TRUE; - } - } - - if(!isOK) { - return FALSE; - } - if(needsMove) { - ucm_moveMappings(ucm->base, ucm->ext); - return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, ucm->ext, FALSE); - } else { - ucm_sortTable(ucm->base); - return TRUE; - } -} - -/* ucm parser --------------------------------------------------------------- */ - -U_CAPI int8_t U_EXPORT2 -ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line, const char **ps) { - const char *s=*ps; - char *end; - uint8_t byte; - int8_t bLen; - - bLen=0; - for(;;) { - /* skip an optional plus sign */ - if(bLen>0 && *s=='+') { - ++s; - } - if(*s!='\\') { - break; - } - - if( s[1]!='x' || - (byte=(uint8_t)uprv_strtoul(s+2, &end, 16), end)!=s+4 - ) { - fprintf(stderr, "ucm error: byte must be formatted as \\xXX (2 hex digits) - \"%s\"\n", line); - return -1; - } - - if(bLen==UCNV_EXT_MAX_BYTES) { - fprintf(stderr, "ucm error: too many bytes on \"%s\"\n", line); - return -1; - } - bytes[bLen++]=byte; - s=end; - } - - *ps=s; - return bLen; -} - -/* parse a mapping line; must not be empty */ -U_CAPI UBool U_EXPORT2 -ucm_parseMappingLine(UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES], - const char *line) { - const char *s; - char *end; - UChar32 cp; - int32_t u16Length; - int8_t uLen, bLen, f; - - s=line; - uLen=bLen=0; - - /* parse code points */ - for(;;) { - /* skip an optional plus sign */ - if(uLen>0 && *s=='+') { - ++s; - } - if(*s!='<') { - break; - } - - if( s[1]!='U' || - (cp=(UChar32)uprv_strtoul(s+2, &end, 16), end)==s+2 || - *end!='>' - ) { - fprintf(stderr, "ucm error: Unicode code point must be formatted as (1..6 hex digits) - \"%s\"\n", line); - return FALSE; - } - if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) { - fprintf(stderr, "ucm error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", line); - return FALSE; - } - - if(uLen==UCNV_EXT_MAX_UCHARS) { - fprintf(stderr, "ucm error: too many code points on \"%s\"\n", line); - return FALSE; - } - codePoints[uLen++]=cp; - s=end+1; - } - - if(uLen==0) { - fprintf(stderr, "ucm error: no Unicode code points on \"%s\"\n", line); - return FALSE; - } else if(uLen==1) { - m->u=codePoints[0]; - } else { - UErrorCode errorCode=U_ZERO_ERROR; - u_strFromUTF32(NULL, 0, &u16Length, codePoints, uLen, &errorCode); - if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) || - u16Length>UCNV_EXT_MAX_UCHARS - ) { - fprintf(stderr, "ucm error: too many UChars on \"%s\"\n", line); - return FALSE; - } - } - - s=u_skipWhitespace(s); - - /* parse bytes */ - bLen=ucm_parseBytes(bytes, line, &s); - - if(bLen<0) { - return FALSE; - } else if(bLen==0) { - fprintf(stderr, "ucm error: no bytes on \"%s\"\n", line); - return FALSE; - } else if(bLen<=4) { - uprv_memcpy(m->b.bytes, bytes, bLen); - } - - /* skip everything until the fallback indicator, even the start of a comment */ - for(;;) { - if(*s==0) { - f=-1; /* no fallback indicator */ - break; - } else if(*s=='|') { - f=(int8_t)(s[1]-'0'); - if((uint8_t)f>4) { - fprintf(stderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line); - return FALSE; - } - break; - } - ++s; - } - - m->uLen=uLen; - m->bLen=bLen; - m->f=f; - return TRUE; -} - -/* general APIs ------------------------------------------------------------- */ - -U_CAPI UCMTable * U_EXPORT2 -ucm_openTable() { - UCMTable *table=(UCMTable *)uprv_malloc(sizeof(UCMTable)); - if(table==NULL) { - fprintf(stderr, "ucm error: unable to allocate a UCMTable\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - memset(table, 0, sizeof(UCMTable)); - return table; -} - -U_CAPI void U_EXPORT2 -ucm_closeTable(UCMTable *table) { - if(table!=NULL) { - uprv_free(table->mappings); - uprv_free(table->codePoints); - uprv_free(table->bytes); - uprv_free(table->reverseMap); - uprv_free(table); - } -} - -U_CAPI void U_EXPORT2 -ucm_resetTable(UCMTable *table) { - if(table!=NULL) { - table->mappingsLength=0; - table->flagsType=0; - table->unicodeMask=0; - table->bytesLength=table->codePointsLength=0; - table->isSorted=FALSE; - } -} - -U_CAPI void U_EXPORT2 -ucm_addMapping(UCMTable *table, - UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES]) { - UCMapping *tm; - UChar32 c; - int32_t idx; - - if(table->mappingsLength>=table->mappingsCapacity) { - /* make the mappings array larger */ - if(table->mappingsCapacity==0) { - table->mappingsCapacity=1000; - } else { - table->mappingsCapacity*=10; - } - table->mappings=(UCMapping *)uprv_realloc(table->mappings, - table->mappingsCapacity*sizeof(UCMapping)); - if(table->mappings==NULL) { - fprintf(stderr, "ucm error: unable to allocate %d UCMappings\n", - (int)table->mappingsCapacity); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - if(table->reverseMap!=NULL) { - /* the reverseMap must be reallocated in a new sort */ - uprv_free(table->reverseMap); - table->reverseMap=NULL; - } - } - - if(m->uLen>1 && table->codePointsCapacity==0) { - table->codePointsCapacity=10000; - table->codePoints=(UChar32 *)uprv_malloc(table->codePointsCapacity*4); - if(table->codePoints==NULL) { - fprintf(stderr, "ucm error: unable to allocate %d UChar32s\n", - (int)table->codePointsCapacity); - exit(U_MEMORY_ALLOCATION_ERROR); - } - } - - if(m->bLen>4 && table->bytesCapacity==0) { - table->bytesCapacity=10000; - table->bytes=(uint8_t *)uprv_malloc(table->bytesCapacity); - if(table->bytes==NULL) { - fprintf(stderr, "ucm error: unable to allocate %d bytes\n", - (int)table->bytesCapacity); - exit(U_MEMORY_ALLOCATION_ERROR); - } - } - - if(m->uLen>1) { - idx=table->codePointsLength; - table->codePointsLength+=m->uLen; - if(table->codePointsLength>table->codePointsCapacity) { - fprintf(stderr, "ucm error: too many code points in multiple-code point mappings\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - uprv_memcpy(table->codePoints+idx, codePoints, (size_t)m->uLen*4); - m->u=idx; - } - - if(m->bLen>4) { - idx=table->bytesLength; - table->bytesLength+=m->bLen; - if(table->bytesLength>table->bytesCapacity) { - fprintf(stderr, "ucm error: too many bytes in mappings with >4 charset bytes\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - uprv_memcpy(table->bytes+idx, bytes, m->bLen); - m->b.idx=idx; - } - - /* set unicodeMask */ - for(idx=0; idxuLen; ++idx) { - c=codePoints[idx]; - if(c>=0x10000) { - table->unicodeMask|=UCNV_HAS_SUPPLEMENTARY; /* there are supplementary code points */ - } else if(U_IS_SURROGATE(c)) { - table->unicodeMask|=UCNV_HAS_SURROGATES; /* there are surrogate code points */ - } - } - - /* set flagsType */ - if(m->f<0) { - table->flagsType|=UCM_FLAGS_IMPLICIT; - } else { - table->flagsType|=UCM_FLAGS_EXPLICIT; - } - - tm=table->mappings+table->mappingsLength++; - uprv_memcpy(tm, m, sizeof(UCMapping)); - - table->isSorted=FALSE; -} - -U_CAPI UCMFile * U_EXPORT2 -ucm_open() { - UCMFile *ucm=(UCMFile *)uprv_malloc(sizeof(UCMFile)); - if(ucm==NULL) { - fprintf(stderr, "ucm error: unable to allocate a UCMFile\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - memset(ucm, 0, sizeof(UCMFile)); - - ucm->base=ucm_openTable(); - ucm->ext=ucm_openTable(); - - ucm->states.stateFlags[0]=MBCS_STATE_FLAG_DIRECT; - ucm->states.conversionType=UCNV_UNSUPPORTED_CONVERTER; - ucm->states.outputType=-1; - ucm->states.minCharLength=ucm->states.maxCharLength=1; - - return ucm; -} - -U_CAPI void U_EXPORT2 -ucm_close(UCMFile *ucm) { - if(ucm!=NULL) { - ucm_closeTable(ucm->base); - ucm_closeTable(ucm->ext); - uprv_free(ucm); - } -} - -U_CAPI int32_t U_EXPORT2 -ucm_mappingType(UCMStates *baseStates, - UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES]) { - (void)codePoints; - /* check validity of the bytes and count the characters in them */ - int32_t count=ucm_countChars(baseStates, bytes, m->bLen); - if(count<1) { - /* illegal byte sequence */ - return -1; - } - - /* - * Suitable for an ICU conversion base table means: - * - a 1:1 mapping (1 Unicode code point : 1 byte sequence) - * - precision flag 0..3 - * - SBCS: any 1:1 mapping - * (the table stores additional bits to distinguish mapping types) - * - MBCS: not a |2 SUB mapping for - * - MBCS: not a |1 fallback to 0x00 - * - MBCS: not a multi-byte mapping with leading 0x00 bytes - * - * Further restrictions for fromUnicode tables - * are enforced in makeconv (MBCSOkForBaseFromUnicode()). - * - * All of the MBCS fromUnicode specific tests could be removed from here, - * but the ones above are for unusual mappings, and removing the tests - * from here would change canonucm output which seems gratuitous. - * (Markus Scherer 2006-nov-28) - * - * Exception: All implicit mappings (f<0) that need to be moved - * because of fromUnicode restrictions _must_ be moved here because - * makeconv uses a hack for moving mappings only for the fromUnicode table - * that only works with non-negative values of f. - */ - if( m->uLen==1 && count==1 && m->f<=3 && - (baseStates->maxCharLength==1 || - !((m->f==2 && m->bLen==1) || - (m->f==1 && bytes[0]==0) || - (m->f<=1 && m->bLen>1 && bytes[0]==0))) - ) { - return 0; /* suitable for a base table */ - } else { - return 1; /* needs to go into an extension table */ - } -} - -U_CAPI UBool U_EXPORT2 -ucm_addMappingAuto(UCMFile *ucm, UBool forBase, UCMStates *baseStates, - UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES]) { - int32_t type; - - if(m->f==2 && m->uLen>1) { - fprintf(stderr, "ucm error: illegal |2 mapping from multiple code points\n"); - printMapping(m, codePoints, bytes, stderr); - return FALSE; - } - - if(baseStates!=NULL) { - /* check validity of the bytes and count the characters in them */ - type=ucm_mappingType(baseStates, m, codePoints, bytes); - if(type<0) { - /* illegal byte sequence */ - printMapping(m, codePoints, bytes, stderr); - return FALSE; - } - } else { - /* not used - adding a mapping for an extension-only table before its base table is read */ - type=1; - } - - /* - * Add the mapping to the base table if this is requested and suitable. - * Otherwise, add it to the extension table. - */ - if(forBase && type==0) { - ucm_addMapping(ucm->base, m, codePoints, bytes); - } else { - ucm_addMapping(ucm->ext, m, codePoints, bytes); - } - - return TRUE; -} - -U_CAPI UBool U_EXPORT2 -ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates *baseStates) { - UCMapping m={ 0, {0}, 0, 0, 0, 0 }; - UChar32 codePoints[UCNV_EXT_MAX_UCHARS]; - uint8_t bytes[UCNV_EXT_MAX_BYTES]; - - const char *s; - - /* ignore empty and comment lines */ - if(line[0]=='#' || *(s=u_skipWhitespace(line))==0 || *s=='\n' || *s=='\r') { - return TRUE; - } - - return - ucm_parseMappingLine(&m, codePoints, bytes, line) && - ucm_addMappingAuto(ucm, forBase, baseStates, &m, codePoints, bytes); -} - -U_CAPI void U_EXPORT2 -ucm_readTable(UCMFile *ucm, FileStream* convFile, - UBool forBase, UCMStates *baseStates, - UErrorCode *pErrorCode) { - char line[500]; - char *end; - UBool isOK; - - if(U_FAILURE(*pErrorCode)) { - return; - } - - isOK=TRUE; - - for(;;) { - /* read the next line */ - if(!T_FileStream_readLine(convFile, line, sizeof(line))) { - fprintf(stderr, "incomplete charmap section\n"); - isOK=FALSE; - break; - } - - /* remove CR LF */ - end=uprv_strchr(line, 0); - while(line