diff options
Diffstat (limited to 'deps/node/deps/icu-small/source/tools/toolutil')
46 files changed, 0 insertions, 15036 deletions
diff --git a/deps/node/deps/icu-small/source/tools/toolutil/collationinfo.cpp b/deps/node/deps/icu-small/source/tools/toolutil/collationinfo.cpp deleted file mode 100644 index 6bad90e1..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/collationinfo.cpp +++ /dev/null @@ -1,152 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2013-2015, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* collationinfo.cpp -* -* created on: 2013aug05 -* created by: Markus W. Scherer -*/ - -#include <stdio.h> -#include <string.h> - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_COLLATION - -#include "collationdata.h" -#include "collationdatareader.h" -#include "collationinfo.h" -#include "uassert.h" -#include "uvectr32.h" - -U_NAMESPACE_BEGIN - -void -CollationInfo::printSizes(int32_t sizeWithHeader, const int32_t indexes[]) { - int32_t totalSize = indexes[CollationDataReader::IX_TOTAL_SIZE]; - if(sizeWithHeader > totalSize) { - printf(" header size: %6ld\n", (long)(sizeWithHeader - totalSize)); - } - - int32_t length = indexes[CollationDataReader::IX_INDEXES_LENGTH]; - printf(" indexes: %6ld *4 = %6ld\n", (long)length, (long)length * 4); - - length = getDataLength(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET); - if(length != 0) { - printf(" reorder codes: %6ld *4 = %6ld\n", (long)length / 4, (long)length); - } - - length = getDataLength(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET); - if(length != 0) { - U_ASSERT(length >= 256); - printf(" reorder table: %6ld\n", (long)length); - } - - length = getDataLength(indexes, CollationDataReader::IX_TRIE_OFFSET); - if(length != 0) { - printf(" trie size: %6ld\n", (long)length); - } - - length = getDataLength(indexes, CollationDataReader::IX_RESERVED8_OFFSET); - if(length != 0) { - printf(" reserved (offset 8): %6ld\n", (long)length); - } - - length = getDataLength(indexes, CollationDataReader::IX_CES_OFFSET); - if(length != 0) { - printf(" CEs: %6ld *8 = %6ld\n", (long)length / 8, (long)length); - } - - length = getDataLength(indexes, CollationDataReader::IX_RESERVED10_OFFSET); - if(length != 0) { - printf(" reserved (offset 10): %6ld\n", (long)length); - } - - length = getDataLength(indexes, CollationDataReader::IX_CE32S_OFFSET); - if(length != 0) { - printf(" CE32s: %6ld *4 = %6ld\n", (long)length / 4, (long)length); - } - - length = getDataLength(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET); - if(length != 0) { - printf(" rootElements: %6ld *4 = %6ld\n", (long)length / 4, (long)length); - } - - length = getDataLength(indexes, CollationDataReader::IX_CONTEXTS_OFFSET); - if(length != 0) { - printf(" contexts: %6ld *2 = %6ld\n", (long)length / 2, (long)length); - } - - length = getDataLength(indexes, CollationDataReader::IX_UNSAFE_BWD_OFFSET); - if(length != 0) { - printf(" unsafeBwdSet: %6ld *2 = %6ld\n", (long)length / 2, (long)length); - } - - length = getDataLength(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET); - if(length != 0) { - printf(" fastLatin table: %6ld *2 = %6ld\n", (long)length / 2, (long)length); - } - - length = getDataLength(indexes, CollationDataReader::IX_SCRIPTS_OFFSET); - if(length != 0) { - printf(" scripts data: %6ld *2 = %6ld\n", (long)length / 2, (long)length); - } - - length = getDataLength(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET); - if(length != 0) { - U_ASSERT(length >= 256); - printf(" compressibleBytes: %6ld\n", (long)length); - } - - length = getDataLength(indexes, CollationDataReader::IX_RESERVED18_OFFSET); - if(length != 0) { - printf(" reserved (offset 18): %6ld\n", (long)length); - } - - printf(" collator binary total size: %6ld\n", (long)sizeWithHeader); -} - -int32_t -CollationInfo::getDataLength(const int32_t indexes[], int32_t startIndex) { - return indexes[startIndex + 1] - indexes[startIndex]; -} - -void -CollationInfo::printReorderRanges(const CollationData &data, const int32_t *codes, int32_t length) { - UErrorCode errorCode = U_ZERO_ERROR; - UVector32 ranges(errorCode); - data.makeReorderRanges(codes, length, ranges, errorCode); - if(U_FAILURE(errorCode)) { - printf(" error building reorder ranges: %s\n", u_errorName(errorCode)); - return; - } - - int32_t start = 0; - for(int32_t i = 0; i < ranges.size(); ++i) { - int32_t pair = ranges.elementAti(i); - int32_t limit = (pair >> 16) & 0xffff; - int16_t offset = (int16_t)pair; - if(offset == 0) { - // [inclusive-start, exclusive-limit[ - printf(" [%04x, %04x[\n", start, limit); - } else if(offset > 0) { - printf(" reorder [%04x, %04x[ by offset %02x to [%04x, %04x[\n", - start, limit, offset, - start + (offset << 8), limit + (offset << 8)); - } else /* offset < 0 */ { - printf(" reorder [%04x, %04x[ by offset -%02x to [%04x, %04x[\n", - start, limit, -offset, - start + (offset << 8), limit + (offset << 8)); - } - start = limit; - } -} - -U_NAMESPACE_END - -#endif // !UCONFIG_NO_COLLATION diff --git a/deps/node/deps/icu-small/source/tools/toolutil/collationinfo.h b/deps/node/deps/icu-small/source/tools/toolutil/collationinfo.h deleted file mode 100644 index 815b89d4..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/collationinfo.h +++ /dev/null @@ -1,42 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2013-2015, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* collationinfo.h -* -* created on: 2013aug05 -* created by: Markus W. Scherer -*/ - -#ifndef __COLLATIONINFO_H__ -#define __COLLATIONINFO_H__ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_COLLATION - -U_NAMESPACE_BEGIN - -struct CollationData; - -/** - * Collation-related code for tools & demos. - */ -class U_TOOLUTIL_API CollationInfo /* all static */ { -public: - static void printSizes(int32_t sizeWithHeader, const int32_t indexes[]); - static void printReorderRanges(const CollationData &data, const int32_t *codes, int32_t length); - -private: - CollationInfo(); // no constructor - - static int32_t getDataLength(const int32_t indexes[], int32_t startIndex); -}; - -U_NAMESPACE_END - -#endif // !UCONFIG_NO_COLLATION -#endif // __COLLATIONINFO_H__ diff --git a/deps/node/deps/icu-small/source/tools/toolutil/dbgutil.cpp b/deps/node/deps/icu-small/source/tools/toolutil/dbgutil.cpp deleted file mode 100644 index 29bab927..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/dbgutil.cpp +++ /dev/null @@ -1,160 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/******************************************************************** - * COPYRIGHT: - * Copyright (c) 2007-2012, International Business Machines Corporation and - * others. All Rights Reserved. - ********************************************************************/ - -#include "udbgutil.h" -#include "dbgutil.h" - -#if !UCONFIG_NO_FORMATTING - -#include "unicode/unistr.h" -#include "unicode/ustring.h" -#include "util.h" -#include "ucln.h" - -#include <stdio.h> -#include <string.h> -#include <stdlib.h> - -U_NAMESPACE_USE - -static UnicodeString **strs = NULL; - -static const UnicodeString& _fieldString(UDebugEnumType type, int32_t field, UnicodeString& fillin) { - const char *str = udbg_enumName(type, field); - if(str == NULL) { - return fillin.remove(); - } else { - return fillin = UnicodeString(str, ""); // optimize? - } -} - -U_CDECL_BEGIN -static void udbg_cleanup(void) { - if(strs != NULL) { - for(int t=0;t<=UDBG_ENUM_COUNT;t++) { - delete [] strs[t]; - } - delete[] strs; - strs = NULL; - } -} - -static UBool tu_cleanup(void) -{ - udbg_cleanup(); - return TRUE; -} - -static void udbg_register_cleanup(void) { - ucln_registerCleanup(UCLN_TOOLUTIL, tu_cleanup); -} -U_CDECL_END - -static void udbg_setup(void) { - if(strs == NULL) { - udbg_register_cleanup(); - //fprintf(stderr,"Initializing string cache..\n"); - //fflush(stderr); - UnicodeString **newStrs = new UnicodeString*[UDBG_ENUM_COUNT+1]; - for(int t=0;t<UDBG_ENUM_COUNT;t++) { - int32_t c = udbg_enumCount((UDebugEnumType)t); - newStrs[t] = new UnicodeString[c+1]; - for(int f=0;f<=c;f++) { - _fieldString((UDebugEnumType)t, f, newStrs[t][f]); - } - } - newStrs[UDBG_ENUM_COUNT] = new UnicodeString[1]; // empty string - - strs = newStrs; - } -} - - - -U_TOOLUTIL_API const UnicodeString& U_EXPORT2 udbg_enumString(UDebugEnumType type, int32_t field) { - if(strs == NULL ) { - udbg_setup(); - } - if(type<0||type>=UDBG_ENUM_COUNT) { - // use UDBG_ENUM_COUNT,0 to mean an empty string - //fprintf(stderr, "** returning out of range on %d\n",type); - //fflush(stderr); - return strs[UDBG_ENUM_COUNT][0]; - } - int32_t count = udbg_enumCount(type); - //fprintf(stderr, "enumString [%d,%d]: typecount %d, fieldcount %d\n", type,field,UDBG_ENUM_COUNT,count); - //fflush(stderr); - if(field<0 || field > count) { - return strs[type][count]; - } else { return strs[type][field]; - } -} - -U_CAPI int32_t U_EXPORT2 udbg_enumByString(UDebugEnumType type, const UnicodeString& string) { - if(type<0||type>=UDBG_ENUM_COUNT) { - return -1; - } - // initialize array - udbg_enumString(type,0); - // search - /// printf("type=%d\n", type); fflush(stdout); - for(int i=0;i<udbg_enumCount(type);i++) { -// printf("i=%d/%d\n", i, udbg_enumCount(type)); fflush(stdout); - if(string == (strs[type][i])) { - return i; - } - } - return -1; -} - -// from DataMap::utoi -U_CAPI int32_t -udbg_stoi(const UnicodeString &s) -{ - char ch[256]; - const UChar *u = toUCharPtr(s.getBuffer()); - int32_t len = s.length(); - u_UCharsToChars(u, ch, len); - ch[len] = 0; /* include terminating \0 */ - return atoi(ch); -} - - -U_CAPI double -udbg_stod(const UnicodeString &s) -{ - char ch[256]; - const UChar *u = toUCharPtr(s.getBuffer()); - int32_t len = s.length(); - u_UCharsToChars(u, ch, len); - ch[len] = 0; /* include terminating \0 */ - return atof(ch); -} - -U_CAPI UnicodeString * -udbg_escape(const UnicodeString &src, UnicodeString *dst) -{ - dst->remove(); - for (int32_t i = 0; i < src.length(); ++i) { - UChar c = src[i]; - if(ICU_Utility::isUnprintable(c)) { - *dst += UnicodeString("["); - ICU_Utility::escapeUnprintable(*dst, c); - *dst += UnicodeString("]"); - } - else { - *dst += c; - } - } - - return dst; -} - - - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/dbgutil.h b/deps/node/deps/icu-small/source/tools/toolutil/dbgutil.h deleted file mode 100644 index 314a9ae8..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/dbgutil.h +++ /dev/null @@ -1,45 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -/* -************************************************************************ -* Copyright (c) 2007-2012, International Business Machines -* Corporation and others. All Rights Reserved. -************************************************************************ -*/ - -/** C++ Utilities to aid in debugging **/ - -#ifndef _DBGUTIL_H -#define _DBGUTIL_H - -#include "unicode/utypes.h" -#include "udbgutil.h" -#include "unicode/unistr.h" - -#if !UCONFIG_NO_FORMATTING - -U_TOOLUTIL_API const icu::UnicodeString& U_EXPORT2 -udbg_enumString(UDebugEnumType type, int32_t field); - -/** - * @return enum offset, or UDBG_INVALID_ENUM on error - */ -U_CAPI int32_t U_EXPORT2 -udbg_enumByString(UDebugEnumType type, const icu::UnicodeString& string); - -/** - * Convert a UnicodeString (with ascii digits) into a number. - * @param s string - * @return numerical value, or 0 on error - */ -U_CAPI int32_t U_EXPORT2 udbg_stoi(const icu::UnicodeString &s); - -U_CAPI double U_EXPORT2 udbg_stod(const icu::UnicodeString &s); - -U_CAPI icu::UnicodeString * U_EXPORT2 -udbg_escape(const icu::UnicodeString &s, icu::UnicodeString *dst); - -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/denseranges.cpp b/deps/node/deps/icu-small/source/tools/toolutil/denseranges.cpp deleted file mode 100644 index f5e52b1b..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/denseranges.cpp +++ /dev/null @@ -1,160 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: denseranges.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010sep25 -* created by: Markus W. Scherer -* -* Helper code for finding a small number of dense ranges. -*/ - -#include "unicode/utypes.h" -#include "denseranges.h" - -// Definitions in the anonymous namespace are invisible outside this file. -namespace { - -/** - * Collect up to 15 range gaps and sort them by ascending gap size. - */ -class LargestGaps { -public: - LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {} - - void add(int32_t gapStart, int64_t gapLength) { - int32_t i=length; - while(i>0 && gapLength>gapLengths[i-1]) { - --i; - } - if(i<maxLength) { - // The new gap is now one of the maxLength largest. - // Insert the new gap, moving up smaller ones of the previous - // length largest. - int32_t j= length<maxLength ? length++ : maxLength-1; - while(j>i) { - gapStarts[j]=gapStarts[j-1]; - gapLengths[j]=gapLengths[j-1]; - --j; - } - gapStarts[i]=gapStart; - gapLengths[i]=gapLength; - } - } - - void truncate(int32_t newLength) { - if(newLength<length) { - length=newLength; - } - } - - int32_t count() const { return length; } - int32_t gapStart(int32_t i) const { return gapStarts[i]; } - int64_t gapLength(int32_t i) const { return gapLengths[i]; } - - int32_t firstAfter(int32_t value) const { - if(length==0) { - return -1; - } - int32_t minValue=0; - int32_t minIndex=-1; - for(int32_t i=0; i<length; ++i) { - if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) { - minValue=gapStarts[i]; - minIndex=i; - } - } - return minIndex; - } - -private: - static const int32_t kCapacity=15; - - int32_t maxLength; - int32_t length; - int32_t gapStarts[kCapacity]; - int64_t gapLengths[kCapacity]; -}; - -} // namespace - -/** - * Does it make sense to write 1..capacity ranges? - * Returns 0 if not, otherwise the number of ranges. - * @param values Sorted array of signed-integer values. - * @param length Number of values. - * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.) - * Should be 0x80..0x100, must be 1..0x100. - * @param ranges Output ranges array. - * @param capacity Maximum number of ranges. - * @return Minimum number of ranges (at most capacity) that have the desired density, - * or 0 if that density cannot be achieved. - */ -U_CAPI int32_t U_EXPORT2 -uprv_makeDenseRanges(const int32_t values[], int32_t length, - int32_t density, - int32_t ranges[][2], int32_t capacity) { - if(length<=2) { - return 0; - } - int32_t minValue=values[0]; - int32_t maxValue=values[length-1]; // Assume minValue<=maxValue. - // Use int64_t variables for intermediate-value precision and to avoid - // signed-int32_t overflow of maxValue-minValue. - int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1; - if(length>=(density*maxLength)/0x100) { - // Use one range. - ranges[0][0]=minValue; - ranges[0][1]=maxValue; - return 1; - } - if(length<=4) { - return 0; - } - // See if we can split [minValue, maxValue] into 2..capacity ranges, - // divided by the 1..(capacity-1) largest gaps. - LargestGaps gaps(capacity-1); - int32_t i; - int32_t expectedValue=minValue; - for(i=1; i<length; ++i) { - ++expectedValue; - int32_t actualValue=values[i]; - if(expectedValue!=actualValue) { - gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue); - expectedValue=actualValue; - } - } - // We know gaps.count()>=1 because we have fewer values (length) than - // the length of the [minValue..maxValue] range (maxLength). - // (Otherwise we would have returned with the one range above.) - int32_t num; - for(i=0, num=2;; ++i, ++num) { - if(i>=gaps.count()) { - // The values are too sparse for capacity or fewer ranges - // of the requested density. - return 0; - } - maxLength-=gaps.gapLength(i); - if(length>num*2 && length>=(density*maxLength)/0x100) { - break; - } - } - // Use the num ranges with the num-1 largest gaps. - gaps.truncate(num-1); - ranges[0][0]=minValue; - for(i=0; i<=num-2; ++i) { - int32_t gapIndex=gaps.firstAfter(minValue); - int32_t gapStart=gaps.gapStart(gapIndex); - ranges[i][1]=gapStart-1; - ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex)); - } - ranges[num-1][1]=maxValue; - return num; -} diff --git a/deps/node/deps/icu-small/source/tools/toolutil/denseranges.h b/deps/node/deps/icu-small/source/tools/toolutil/denseranges.h deleted file mode 100644 index c489ca47..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/denseranges.h +++ /dev/null @@ -1,41 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: denseranges.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010sep25 -* created by: Markus W. Scherer -* -* Helper code for finding a small number of dense ranges. -*/ - -#ifndef __DENSERANGES_H__ -#define __DENSERANGES_H__ - -#include "unicode/utypes.h" - -/** - * Does it make sense to write 1..capacity ranges? - * Returns 0 if not, otherwise the number of ranges. - * @param values Sorted array of signed-integer values. - * @param length Number of values. - * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.) - * Should be 0x80..0x100, must be 1..0x100. - * @param ranges Output ranges array. - * @param capacity Maximum number of ranges. - * @return Minimum number of ranges (at most capacity) that have the desired density, - * or 0 if that density cannot be achieved. - */ -U_CAPI int32_t U_EXPORT2 -uprv_makeDenseRanges(const int32_t values[], int32_t length, - int32_t density, - int32_t ranges[][2], int32_t capacity); - -#endif // __DENSERANGES_H__ diff --git a/deps/node/deps/icu-small/source/tools/toolutil/filestrm.cpp b/deps/node/deps/icu-small/source/tools/toolutil/filestrm.cpp deleted file mode 100644 index a170c7b0..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/filestrm.cpp +++ /dev/null @@ -1,227 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File FILESTRM.C -* -* @author Glenn Marcy -* -* Modification History: -* -* Date Name Description -* 5/8/98 gm Created -* 03/02/99 stephen Reordered params in ungetc to match stdio -* Added wopen -* 3/29/99 helena Merged Stephen and Bertrand's changes. -* -****************************************************************************** -*/ - -#include "filestrm.h" - -#include "cmemory.h" - -#include <stdio.h> - -U_CAPI FileStream* U_EXPORT2 -T_FileStream_open(const char* filename, const char* mode) -{ - if(filename != NULL && *filename != 0 && mode != NULL && *mode != 0) { - FILE *file = fopen(filename, mode); - return (FileStream*)file; - } else { - return NULL; - } -} - -/* -U_CAPI FileStream* U_EXPORT2 -T_FileStream_wopen(const wchar_t* filename, const wchar_t* mode) -{ - // TBD: _wfopen is believed to be MS-specific? -#if U_PLATFORM_USES_ONLY_WIN32_API - FILE* result = _wfopen(filename, mode); - return (FileStream*)result; -#else - size_t fnMbsSize, mdMbsSize; - char *fn, *md; - FILE *result; - - // convert from wchar_t to char - fnMbsSize = wcstombs(NULL, filename, ((size_t)-1) >> 1); - fn = (char*)uprv_malloc(fnMbsSize+2); - wcstombs(fn, filename, fnMbsSize); - fn[fnMbsSize] = 0; - - mdMbsSize = wcstombs(NULL, mode, ((size_t)-1) >> 1); - md = (char*)uprv_malloc(mdMbsSize+2); - wcstombs(md, mode, mdMbsSize); - md[mdMbsSize] = 0; - - result = fopen(fn, md); - uprv_free(fn); - uprv_free(md); - return (FileStream*)result; -#endif -} -*/ -U_CAPI void U_EXPORT2 -T_FileStream_close(FileStream* fileStream) -{ - if (fileStream != 0) - fclose((FILE*)fileStream); -} - -U_CAPI UBool U_EXPORT2 -T_FileStream_file_exists(const char* filename) -{ - FILE* temp = fopen(filename, "r"); - if (temp) { - fclose(temp); - return TRUE; - } else - return FALSE; -} - -/*static const int32_t kEOF; -const int32_t FileStream::kEOF = EOF;*/ - -/* -U_CAPI FileStream* -T_FileStream_tmpfile() -{ - FILE* file = tmpfile(); - return (FileStream*)file; -} -*/ - -U_CAPI int32_t U_EXPORT2 -T_FileStream_read(FileStream* fileStream, void* addr, int32_t len) -{ - return static_cast<int32_t>(fread(addr, 1, len, (FILE*)fileStream)); -} - -U_CAPI int32_t U_EXPORT2 -T_FileStream_write(FileStream* fileStream, const void* addr, int32_t len) -{ - - return static_cast<int32_t>(fwrite(addr, 1, len, (FILE*)fileStream)); -} - -U_CAPI void U_EXPORT2 -T_FileStream_rewind(FileStream* fileStream) -{ - rewind((FILE*)fileStream); -} - -U_CAPI int32_t U_EXPORT2 -T_FileStream_putc(FileStream* fileStream, int32_t ch) -{ - int32_t c = fputc(ch, (FILE*)fileStream); - return c; -} - -U_CAPI int U_EXPORT2 -T_FileStream_getc(FileStream* fileStream) -{ - int c = fgetc((FILE*)fileStream); - return c; -} - -U_CAPI int32_t U_EXPORT2 -T_FileStream_ungetc(int32_t ch, FileStream* fileStream) -{ - - int32_t c = ungetc(ch, (FILE*)fileStream); - return c; -} - -U_CAPI int32_t U_EXPORT2 -T_FileStream_peek(FileStream* fileStream) -{ - int32_t c = fgetc((FILE*)fileStream); - return ungetc(c, (FILE*)fileStream); -} - -U_CAPI char* U_EXPORT2 -T_FileStream_readLine(FileStream* fileStream, char* buffer, int32_t length) -{ - return fgets(buffer, length, (FILE*)fileStream); -} - -U_CAPI int32_t U_EXPORT2 -T_FileStream_writeLine(FileStream* fileStream, const char* buffer) -{ - return fputs(buffer, (FILE*)fileStream); -} - -U_CAPI int32_t U_EXPORT2 -T_FileStream_size(FileStream* fileStream) -{ - int32_t savedPos = ftell((FILE*)fileStream); - int32_t size = 0; - - /*Changes by Bertrand A. D. doesn't affect the current position - goes to the end of the file before ftell*/ - fseek((FILE*)fileStream, 0, SEEK_END); - size = (int32_t)ftell((FILE*)fileStream); - fseek((FILE*)fileStream, savedPos, SEEK_SET); - return size; -} - -U_CAPI int U_EXPORT2 -T_FileStream_eof(FileStream* fileStream) -{ - return feof((FILE*)fileStream); -} - -/* - Warning - This function may not work consistently on all platforms - (e.g. HP-UX, FreeBSD and MacOSX don't return an error when - putc is used on a file opened as readonly) -*/ -U_CAPI int U_EXPORT2 -T_FileStream_error(FileStream* fileStream) -{ - return (fileStream == 0 || ferror((FILE*)fileStream)); -} - -/* This function doesn't work. */ -/* force the stream to set its error flag*/ -/*U_CAPI void U_EXPORT2 -T_FileStream_setError(FileStream* fileStream) -{ - fseek((FILE*)fileStream, 99999, SEEK_SET); -} -*/ - -U_CAPI FileStream* U_EXPORT2 -T_FileStream_stdin(void) -{ - return (FileStream*)stdin; -} - -U_CAPI FileStream* U_EXPORT2 -T_FileStream_stdout(void) -{ - return (FileStream*)stdout; -} - - -U_CAPI FileStream* U_EXPORT2 -T_FileStream_stderr(void) -{ - return (FileStream*)stderr; -} - -U_CAPI UBool U_EXPORT2 -T_FileStream_remove(const char* fileName){ - return (remove(fileName) == 0); -} diff --git a/deps/node/deps/icu-small/source/tools/toolutil/filestrm.h b/deps/node/deps/icu-small/source/tools/toolutil/filestrm.h deleted file mode 100644 index 86fac306..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/filestrm.h +++ /dev/null @@ -1,106 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2005, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File FILESTRM.H -* -* Contains FileStream interface -* -* @author Glenn Marcy -* -* Modification History: -* -* Date Name Description -* 5/8/98 gm Created. -* 03/02/99 stephen Reordered params in ungetc to match stdio -* Added wopen -* -****************************************************************************** -*/ - -#ifndef FILESTRM_H -#define FILESTRM_H - -#include "unicode/utypes.h" - -typedef struct _FileStream FileStream; - -U_CAPI FileStream* U_EXPORT2 -T_FileStream_open(const char* filename, const char* mode); - -/* -U_CAPI FileStream* U_EXPORT2 -T_FileStream_wopen(const wchar_t* filename, const wchar_t* mode); -*/ -U_CAPI void U_EXPORT2 -T_FileStream_close(FileStream* fileStream); - -U_CAPI UBool U_EXPORT2 -T_FileStream_file_exists(const char* filename); - -/* -U_CAPI FileStream* U_EXPORT2 -T_FileStream_tmpfile(void); -*/ - -U_CAPI int32_t U_EXPORT2 -T_FileStream_read(FileStream* fileStream, void* addr, int32_t len); - -U_CAPI int32_t U_EXPORT2 -T_FileStream_write(FileStream* fileStream, const void* addr, int32_t len); - -U_CAPI void U_EXPORT2 -T_FileStream_rewind(FileStream* fileStream); - -/*Added by Bertrand A. D. */ -U_CAPI char * U_EXPORT2 -T_FileStream_readLine(FileStream* fileStream, char* buffer, int32_t length); - -U_CAPI int32_t U_EXPORT2 -T_FileStream_writeLine(FileStream* fileStream, const char* buffer); - -U_CAPI int32_t U_EXPORT2 -T_FileStream_putc(FileStream* fileStream, int32_t ch); - -U_CAPI int U_EXPORT2 -T_FileStream_getc(FileStream* fileStream); - -U_CAPI int32_t U_EXPORT2 -T_FileStream_ungetc(int32_t ch, FileStream *fileStream); - -U_CAPI int32_t U_EXPORT2 -T_FileStream_peek(FileStream* fileStream); - -U_CAPI int32_t U_EXPORT2 -T_FileStream_size(FileStream* fileStream); - -U_CAPI int U_EXPORT2 -T_FileStream_eof(FileStream* fileStream); - -U_CAPI int U_EXPORT2 -T_FileStream_error(FileStream* fileStream); - -/* -U_CAPI void U_EXPORT2 -T_FileStream_setError(FileStream* fileStream); -*/ - -U_CAPI FileStream* U_EXPORT2 -T_FileStream_stdin(void); - -U_CAPI FileStream* U_EXPORT2 -T_FileStream_stdout(void); - -U_CAPI FileStream* U_EXPORT2 -T_FileStream_stderr(void); - -U_CAPI UBool U_EXPORT2 -T_FileStream_remove(const char* fileName); - -#endif /* _FILESTRM*/ diff --git a/deps/node/deps/icu-small/source/tools/toolutil/filetools.cpp b/deps/node/deps/icu-small/source/tools/toolutil/filetools.cpp deleted file mode 100644 index 6e88c94b..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/filetools.cpp +++ /dev/null @@ -1,140 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/****************************************************************************** - * Copyright (C) 2009-2013, International Business Machines - * Corporation and others. All Rights Reserved. - ******************************************************************************* - */ - -#include "unicode/platform.h" -#if U_PLATFORM == U_PF_MINGW -// *cough* - for struct stat -#ifdef __STRICT_ANSI__ -#undef __STRICT_ANSI__ -#endif -#endif - -#include "filetools.h" -#include "filestrm.h" -#include "charstr.h" -#include "cstring.h" -#include "unicode/putil.h" -#include "putilimp.h" - -#include <stdio.h> -#include <stdlib.h> -#include <sys/stat.h> -#include <time.h> -#include <string.h> - -#if U_HAVE_DIRENT_H -#include <dirent.h> -typedef struct dirent DIRENT; - -#define SKIP1 "." -#define SKIP2 ".." -#endif - -static int32_t whichFileModTimeIsLater(const char *file1, const char *file2); - -/* - * Goes through the given directory recursive to compare each file's modification time with that of the file given. - * Also can be given just one file to check against. Default value for isDir is FALSE. - */ -U_CAPI UBool U_EXPORT2 -isFileModTimeLater(const char *filePath, const char *checkAgainst, UBool isDir) { - UBool isLatest = TRUE; - - if (filePath == NULL || checkAgainst == NULL) { - return FALSE; - } - - if (isDir == TRUE) { -#if U_HAVE_DIRENT_H - DIR *pDir = NULL; - if ((pDir= opendir(checkAgainst)) != NULL) { - DIR *subDirp = NULL; - DIRENT *dirEntry = NULL; - - while ((dirEntry = readdir(pDir)) != NULL) { - if (uprv_strcmp(dirEntry->d_name, SKIP1) != 0 && uprv_strcmp(dirEntry->d_name, SKIP2) != 0) { - UErrorCode status = U_ZERO_ERROR; - icu::CharString newpath(checkAgainst, -1, status); - newpath.append(U_FILE_SEP_STRING, -1, status); - newpath.append(dirEntry->d_name, -1, status); - if (U_FAILURE(status)) { - fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, u_errorName(status)); - return FALSE; - }; - - if ((subDirp = opendir(newpath.data())) != NULL) { - /* If this new path is a directory, make a recursive call with the newpath. */ - closedir(subDirp); - isLatest = isFileModTimeLater(filePath, newpath.data(), isDir); - if (!isLatest) { - break; - } - } else { - int32_t latest = whichFileModTimeIsLater(filePath, newpath.data()); - if (latest < 0 || latest == 2) { - isLatest = FALSE; - break; - } - } - - } - } - closedir(pDir); - } else { - fprintf(stderr, "Unable to open directory: %s\n", checkAgainst); - return FALSE; - } -#endif - } else { - if (T_FileStream_file_exists(checkAgainst)) { - int32_t latest = whichFileModTimeIsLater(filePath, checkAgainst); - if (latest < 0 || latest == 2) { - isLatest = FALSE; - } - } else { - isLatest = FALSE; - } - } - - return isLatest; -} - -/* Compares the mod time of both files returning a number indicating which one is later. -1 if error ocurs. */ -static int32_t whichFileModTimeIsLater(const char *file1, const char *file2) { - int32_t result = 0; - struct stat stbuf1, stbuf2; - - if (stat(file1, &stbuf1) == 0 && stat(file2, &stbuf2) == 0) { - time_t modtime1, modtime2; - double diff; - - modtime1 = stbuf1.st_mtime; - modtime2 = stbuf2.st_mtime; - - diff = difftime(modtime1, modtime2); - if (diff < 0.0) { - result = 2; - } else if (diff > 0.0) { - result = 1; - } - - } else { - fprintf(stderr, "Unable to get stats from file: %s or %s\n", file1, file2); - result = -1; - } - - return result; -} - -/* Swap the file separater character given with the new one in the file path. */ -U_CAPI void U_EXPORT2 -swapFileSepChar(char *filePath, const char oldFileSepChar, const char newFileSepChar) { - for (int32_t i = 0, length = static_cast<int32_t>(uprv_strlen(filePath)); i < length; i++) { - filePath[i] = (filePath[i] == oldFileSepChar ) ? newFileSepChar : filePath[i]; - } -} diff --git a/deps/node/deps/icu-small/source/tools/toolutil/filetools.h b/deps/node/deps/icu-small/source/tools/toolutil/filetools.h deleted file mode 100644 index 6a25c360..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/filetools.h +++ /dev/null @@ -1,34 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2009, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: filetools.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009jan09 -* created by: Michael Ow -* -* Contains various functions to handle files. -* Not suitable for production use. Not supported. -* Not conformant. Not efficient. -*/ - -#ifndef __FILETOOLS_H__ -#define __FILETOOLS_H__ - -#include "unicode/utypes.h" - -U_CAPI UBool U_EXPORT2 -isFileModTimeLater(const char *filePath, const char *checkAgainst, UBool isDir=FALSE); - -U_CAPI void U_EXPORT2 -swapFileSepChar(char *filePath, const char oldFileSepChar, const char newFileSepChar); - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/flagparser.cpp b/deps/node/deps/icu-small/source/tools/toolutil/flagparser.cpp deleted file mode 100644 index c8d791c6..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/flagparser.cpp +++ /dev/null @@ -1,180 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/****************************************************************************** - * Copyright (C) 2009-2015, International Business Machines - * Corporation and others. All Rights Reserved. - ******************************************************************************* - */ - -#include "flagparser.h" -#include "filestrm.h" -#include "cstring.h" -#include "cmemory.h" - -#define DEFAULT_BUFFER_SIZE 512 - -static int32_t currentBufferSize = DEFAULT_BUFFER_SIZE; - -static int32_t extractFlag(char* buffer, int32_t bufferSize, char* flag, int32_t flagSize, const char ** flagNames, int32_t numOfFlags, UErrorCode *status); -static int32_t getFlagOffset(const char *buffer, int32_t bufferSize); - -/* - * Opens the given fileName and reads in the information storing the data in flagBuffer. - */ -U_CAPI int32_t U_EXPORT2 -parseFlagsFile(const char *fileName, char **flagBuffer, int32_t flagBufferSize, const char ** flagNames, int32_t numOfFlags, UErrorCode *status) { - char* buffer = NULL; - char* tmpFlagBuffer = NULL; - UBool allocateMoreSpace = FALSE; - int32_t idx, i; - int32_t result = 0; - - FileStream *f = T_FileStream_open(fileName, "r"); - if (f == NULL) { - *status = U_FILE_ACCESS_ERROR; - goto parseFlagsFile_cleanup; - } - - buffer = (char *)uprv_malloc(sizeof(char) * currentBufferSize); - tmpFlagBuffer = (char *)uprv_malloc(sizeof(char) * flagBufferSize); - - if (buffer == NULL || tmpFlagBuffer == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto parseFlagsFile_cleanup; - } - - do { - if (allocateMoreSpace) { - allocateMoreSpace = FALSE; - currentBufferSize *= 2; - uprv_free(buffer); - buffer = (char *)uprv_malloc(sizeof(char) * currentBufferSize); - if (buffer == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto parseFlagsFile_cleanup; - } - } - for (i = 0; i < numOfFlags;) { - if (T_FileStream_readLine(f, buffer, currentBufferSize) == NULL) { - /* End of file reached. */ - break; - } - if (buffer[0] == '#') { - continue; - } - - if ((int32_t)uprv_strlen(buffer) == (currentBufferSize - 1) && buffer[currentBufferSize-2] != '\n') { - /* Allocate more space for buffer if it didnot read the entrire line */ - allocateMoreSpace = TRUE; - T_FileStream_rewind(f); - break; - } else { - idx = extractFlag(buffer, currentBufferSize, tmpFlagBuffer, flagBufferSize, flagNames, numOfFlags, status); - if (U_FAILURE(*status)) { - if (*status == U_BUFFER_OVERFLOW_ERROR) { - result = currentBufferSize; - } else { - result = -1; - } - break; - } else { - if (flagNames != NULL) { - if (idx >= 0) { - uprv_strcpy(flagBuffer[idx], tmpFlagBuffer); - } else { - /* No match found. Skip it. */ - continue; - } - } else { - uprv_strcpy(flagBuffer[i++], tmpFlagBuffer); - } - } - } - } - } while (allocateMoreSpace && U_SUCCESS(*status)); - -parseFlagsFile_cleanup: - uprv_free(tmpFlagBuffer); - uprv_free(buffer); - - T_FileStream_close(f); - - if (U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) { - return -1; - } - - if (U_SUCCESS(*status) && result == 0) { - currentBufferSize = DEFAULT_BUFFER_SIZE; - } - - return result; -} - - -/* - * Extract the setting after the '=' and store it in flag excluding the newline character. - */ -static int32_t extractFlag(char* buffer, int32_t bufferSize, char* flag, int32_t flagSize, const char **flagNames, int32_t numOfFlags, UErrorCode *status) { - int32_t i, idx = -1; - char *pBuffer; - int32_t offset=0; - UBool bufferWritten = FALSE; - - if (buffer[0] != 0) { - /* Get the offset (i.e. position after the '=') */ - offset = getFlagOffset(buffer, bufferSize); - pBuffer = buffer+offset; - for(i = 0;;i++) { - if (i >= flagSize) { - *status = U_BUFFER_OVERFLOW_ERROR; - return -1; - } - if (pBuffer[i+1] == 0) { - /* Indicates a new line character. End here. */ - flag[i] = 0; - break; - } - - flag[i] = pBuffer[i]; - if (i == 0) { - bufferWritten = TRUE; - } - } - } - - if (!bufferWritten) { - flag[0] = 0; - } - - if (flagNames != NULL && offset>0) { - offset--; /* Move offset back 1 because of '='*/ - for (i = 0; i < numOfFlags; i++) { - if (uprv_strncmp(buffer, flagNames[i], offset) == 0) { - idx = i; - break; - } - } - } - - return idx; -} - -/* - * Get the position after the '=' character. - */ -static int32_t getFlagOffset(const char *buffer, int32_t bufferSize) { - int32_t offset = 0; - - for (offset = 0; offset < bufferSize;offset++) { - if (buffer[offset] == '=') { - offset++; - break; - } - } - - if (offset == bufferSize || (offset - 1) == bufferSize) { - offset = 0; - } - - return offset; -} diff --git a/deps/node/deps/icu-small/source/tools/toolutil/flagparser.h b/deps/node/deps/icu-small/source/tools/toolutil/flagparser.h deleted file mode 100644 index aa425471..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/flagparser.h +++ /dev/null @@ -1,32 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2009-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: flagparser.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009jan08 -* created by: Michael Ow -* -* Tiny flag file parser using ICU and intended for use in ICU tests and in build tools. -* Not suitable for production use. Not supported. -* Not conformant. Not efficient. -* But very small. -*/ - -#ifndef __FLAGPARSER_H__ -#define __FLAGPARSER_H__ - -#include "unicode/utypes.h" - -U_CAPI int32_t U_EXPORT2 -parseFlagsFile(const char *fileName, char **flagBuffer, int32_t flagBufferSize, const char ** flagNames, int32_t numOfFlags, UErrorCode *status); - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/package.cpp b/deps/node/deps/icu-small/source/tools/toolutil/package.cpp deleted file mode 100644 index f4e428a3..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/package.cpp +++ /dev/null @@ -1,1311 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: package.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2005aug25 -* created by: Markus W. Scherer -* -* Read, modify, and write ICU .dat data package files. -* This is an integral part of the icupkg tool, moved to the toolutil library -* because parts of tool implementations tend to be later shared by -* other tools. -* Subsumes functionality and implementation code from -* gencmn, decmn, and icuswap tools. -*/ - -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "unicode/udata.h" -#include "cstring.h" -#include "uarrsort.h" -#include "ucmndata.h" -#include "udataswp.h" -#include "swapimpl.h" -#include "toolutil.h" -#include "package.h" -#include "cmemory.h" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - - -static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */ - -// general definitions ----------------------------------------------------- *** - -/* UDataInfo cf. udata.h */ -static const UDataInfo dataInfo={ - (uint16_t)sizeof(UDataInfo), - 0, - - U_IS_BIG_ENDIAN, - U_CHARSET_FAMILY, - (uint8_t)sizeof(UChar), - 0, - - {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ - {1, 0, 0, 0}, /* formatVersion */ - {3, 0, 0, 0} /* dataVersion */ -}; - -U_CDECL_BEGIN -static void U_CALLCONV -printPackageError(void *context, const char *fmt, va_list args) { - vfprintf((FILE *)context, fmt, args); -} -U_CDECL_END - -static uint16_t -readSwapUInt16(uint16_t x) { - return (uint16_t)((x<<8)|(x>>8)); -} - -// platform types ---------------------------------------------------------- *** - -static const char *types="lb?e"; - -enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT }; - -static inline int32_t -makeTypeEnum(uint8_t charset, UBool isBigEndian) { - return 2*(int32_t)charset+isBigEndian; -} - -static inline int32_t -makeTypeEnum(char type) { - return - type == 'l' ? TYPE_L : - type == 'b' ? TYPE_B : - type == 'e' ? TYPE_E : - -1; -} - -static inline char -makeTypeLetter(uint8_t charset, UBool isBigEndian) { - return types[makeTypeEnum(charset, isBigEndian)]; -} - -static inline char -makeTypeLetter(int32_t typeEnum) { - return types[typeEnum]; -} - -static void -makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) { - int32_t typeEnum=makeTypeEnum(type); - charset=(uint8_t)(typeEnum>>1); - isBigEndian=(UBool)(typeEnum&1); -} - -U_CFUNC const UDataInfo * -getDataInfo(const uint8_t *data, int32_t length, - int32_t &infoLength, int32_t &headerLength, - UErrorCode *pErrorCode) { - const DataHeader *pHeader; - const UDataInfo *pInfo; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return NULL; - } - if( data==NULL || - (length>=0 && length<(int32_t)sizeof(DataHeader)) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - pHeader=(const DataHeader *)data; - pInfo=&pHeader->info; - if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || - pHeader->dataHeader.magic1!=0xda || - pHeader->dataHeader.magic2!=0x27 || - pInfo->sizeofUChar!=2 - ) { - *pErrorCode=U_UNSUPPORTED_ERROR; - return NULL; - } - - if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) { - headerLength=pHeader->dataHeader.headerSize; - infoLength=pInfo->size; - } else { - headerLength=readSwapUInt16(pHeader->dataHeader.headerSize); - infoLength=readSwapUInt16(pInfo->size); - } - - if( headerLength<(int32_t)sizeof(DataHeader) || - infoLength<(int32_t)sizeof(UDataInfo) || - headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) || - (length>=0 && length<headerLength) - ) { - *pErrorCode=U_UNSUPPORTED_ERROR; - return NULL; - } - - return pInfo; -} - -static int32_t -getTypeEnumForInputData(const uint8_t *data, int32_t length, - UErrorCode *pErrorCode) { - const UDataInfo *pInfo; - int32_t infoLength, headerLength; - - /* getDataInfo() checks for illegal arguments */ - pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode); - if(pInfo==NULL) { - return -1; - } - - return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian); -} - -// file handling ----------------------------------------------------------- *** - -static void -extractPackageName(const char *filename, - char pkg[], int32_t capacity) { - const char *basename; - int32_t len; - - basename=findBasename(filename); - len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */ - - if(len<=0 || 0!=strcmp(basename+len, ".dat")) { - fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n", - basename); - exit(U_ILLEGAL_ARGUMENT_ERROR); - } - - if(len>=capacity) { - fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n", - basename, (long)capacity); - exit(U_ILLEGAL_ARGUMENT_ERROR); - } - - memcpy(pkg, basename, len); - pkg[len]=0; -} - -static int32_t -getFileLength(FILE *f) { - int32_t length; - - fseek(f, 0, SEEK_END); - length=(int32_t)ftell(f); - fseek(f, 0, SEEK_SET); - return length; -} - -/* - * Turn tree separators and alternate file separators into normal file separators. - */ -#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR -#define treeToPath(s) -#else -static void -treeToPath(char *s) { - char *t; - - for(t=s; *t!=0; ++t) { - if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { - *t=U_FILE_SEP_CHAR; - } - } -} -#endif - -/* - * Turn file separators into tree separators. - */ -#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR -#define pathToTree(s) -#else -static void -pathToTree(char *s) { - char *t; - - for(t=s; *t!=0; ++t) { - if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { - *t=U_TREE_ENTRY_SEP_CHAR; - } - } -} -#endif - -/* - * Prepend the path (if any) to the name and run the name through treeToName(). - */ -static void -makeFullFilename(const char *path, const char *name, - char *filename, int32_t capacity) { - char *s; - - // prepend the path unless NULL or empty - if(path!=NULL && path[0]!=0) { - if((int32_t)(strlen(path)+1)>=capacity) { - fprintf(stderr, "pathname too long: \"%s\"\n", path); - exit(U_BUFFER_OVERFLOW_ERROR); - } - strcpy(filename, path); - - // make sure the path ends with a file separator - s=strchr(filename, 0); - if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) { - *s++=U_FILE_SEP_CHAR; - } - } else { - s=filename; - } - - // turn the name into a filename, turn tree separators into file separators - if((int32_t)((s-filename)+strlen(name))>=capacity) { - fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name); - exit(U_BUFFER_OVERFLOW_ERROR); - } - strcpy(s, name); - treeToPath(s); -} - -static void -makeFullFilenameAndDirs(const char *path, const char *name, - char *filename, int32_t capacity) { - char *sep; - UErrorCode errorCode; - - makeFullFilename(path, name, filename, capacity); - - // make tree directories - errorCode=U_ZERO_ERROR; - sep=strchr(filename, 0)-strlen(name); - while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) { - if(sep!=filename) { - *sep=0; // truncate temporarily - uprv_mkdir(filename, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - } - *sep++=U_FILE_SEP_CHAR; // restore file separator character - } -} - -static uint8_t * -readFile(const char *path, const char *name, int32_t &length, char &type) { - char filename[1024]; - FILE *file; - UErrorCode errorCode; - int32_t fileLength, typeEnum; - - makeFullFilename(path, name, filename, (int32_t)sizeof(filename)); - - /* open the input file, get its length, allocate memory for it, read the file */ - file=fopen(filename, "rb"); - if(file==NULL) { - fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - /* get the file length */ - fileLength=getFileLength(file); - if(ferror(file) || fileLength<=0) { - fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename); - fclose(file); - exit(U_FILE_ACCESS_ERROR); - } - - /* allocate the buffer, pad to multiple of 16 */ - length=(fileLength+0xf)&~0xf; - icu::LocalMemory<uint8_t> data((uint8_t *)uprv_malloc(length)); - if(data.isNull()) { - fclose(file); - fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - /* read the file */ - if(fileLength!=(int32_t)fread(data.getAlias(), 1, fileLength, file)) { - fprintf(stderr, "icupkg: error reading \"%s\"\n", filename); - fclose(file); - exit(U_FILE_ACCESS_ERROR); - } - - /* pad the file to a multiple of 16 using the usual padding byte */ - if(fileLength<length) { - memset(data.getAlias()+fileLength, 0xaa, length-fileLength); - } - - fclose(file); - - // minimum check for ICU-format data - errorCode=U_ZERO_ERROR; - typeEnum=getTypeEnumForInputData(data.getAlias(), length, &errorCode); - if(typeEnum<0 || U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename); -#if !UCONFIG_NO_LEGACY_CONVERSION - exit(U_INVALID_FORMAT_ERROR); -#else - fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n"); - exit(0); -#endif - } - type=makeTypeLetter(typeEnum); - - return data.orphan(); -} - -// .dat package file representation ---------------------------------------- *** - -U_CDECL_BEGIN - -static int32_t U_CALLCONV -compareItems(const void * /*context*/, const void *left, const void *right) { - U_NAMESPACE_USE - - return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name); -} - -U_CDECL_END - -U_NAMESPACE_BEGIN - -Package::Package() - : doAutoPrefix(FALSE), prefixEndsWithType(FALSE) { - inPkgName[0]=0; - pkgPrefix[0]=0; - inData=NULL; - inLength=0; - inCharset=U_CHARSET_FAMILY; - inIsBigEndian=U_IS_BIG_ENDIAN; - - itemCount=0; - itemMax=0; - items=NULL; - - inStringTop=outStringTop=0; - - matchMode=0; - findPrefix=findSuffix=NULL; - findPrefixLength=findSuffixLength=0; - findNextIndex=-1; - - // create a header for an empty package - DataHeader *pHeader; - pHeader=(DataHeader *)header; - pHeader->dataHeader.magic1=0xda; - pHeader->dataHeader.magic2=0x27; - memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo)); - headerLength=(int32_t)(4+sizeof(dataInfo)); - if(headerLength&0xf) { - /* NUL-pad the header to a multiple of 16 */ - int32_t length=(headerLength+0xf)&~0xf; - memset(header+headerLength, 0, length-headerLength); - headerLength=length; - } - pHeader->dataHeader.headerSize=(uint16_t)headerLength; -} - -Package::~Package() { - int32_t idx; - - uprv_free(inData); - - for(idx=0; idx<itemCount; ++idx) { - if(items[idx].isDataOwned) { - uprv_free(items[idx].data); - } - } - - uprv_free((void*)items); -} - -void -Package::setPrefix(const char *p) { - if(strlen(p)>=sizeof(pkgPrefix)) { - fprintf(stderr, "icupkg: --toc_prefix %s too long\n", p); - exit(U_ILLEGAL_ARGUMENT_ERROR); - } - strcpy(pkgPrefix, p); -} - -void -Package::readPackage(const char *filename) { - UDataSwapper *ds; - const UDataInfo *pInfo; - UErrorCode errorCode; - - const uint8_t *inBytes; - - int32_t length, offset, i; - int32_t itemLength, typeEnum; - char type; - - const UDataOffsetTOCEntry *inEntries; - - extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); - - /* read the file */ - inData=readFile(NULL, filename, inLength, type); - length=inLength; - - /* - * swap the header - even if the swapping itself is a no-op - * because it tells us the header length - */ - errorCode=U_ZERO_ERROR; - makeTypeProps(type, inCharset, inIsBigEndian); - ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", - filename, u_errorName(errorCode)); - exit(errorCode); - } - - ds->printError=printPackageError; - ds->printErrorContext=stderr; - - headerLength=sizeof(header); - if(length<headerLength) { - headerLength=length; - } - headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); - if(U_FAILURE(errorCode)) { - exit(errorCode); - } - - /* check data format and format version */ - pInfo=(const UDataInfo *)((const char *)inData+4); - if(!( - pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ - pInfo->dataFormat[1]==0x6d && - pInfo->dataFormat[2]==0x6e && - pInfo->dataFormat[3]==0x44 && - pInfo->formatVersion[0]==1 - )) { - fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0]); - exit(U_UNSUPPORTED_ERROR); - } - inIsBigEndian=(UBool)pInfo->isBigEndian; - inCharset=pInfo->charsetFamily; - - inBytes=(const uint8_t *)inData+headerLength; - inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); - - /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ - length-=headerLength; - if(length<4) { - /* itemCount does not fit */ - offset=0x7fffffff; - } else { - itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); - setItemCapacity(itemCount); /* resize so there's space */ - if(itemCount==0) { - offset=4; - } else if(length<(4+8*itemCount)) { - /* ToC table does not fit */ - offset=0x7fffffff; - } else { - /* offset of the last item plus at least 20 bytes for its header */ - offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); - } - } - if(length<offset) { - fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", - (long)length); - exit(U_INDEX_OUTOFBOUNDS_ERROR); - } - /* do not modify the package length variable until the last item's length is set */ - - if(itemCount<=0) { - if(doAutoPrefix) { - fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but the input package is empty\n"); - exit(U_INVALID_FORMAT_ERROR); - } - } else { - char prefix[MAX_PKG_NAME_LENGTH+4]; - char *s, *inItemStrings; - - if(itemCount>itemMax) { - fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax); - exit(U_BUFFER_OVERFLOW_ERROR); - } - - /* swap the item name strings */ - int32_t stringsOffset=4+8*itemCount; - itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; - - // don't include padding bytes at the end of the item names - while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { - --itemLength; - } - - if((inStringTop+itemLength)>STRING_STORE_SIZE) { - fprintf(stderr, "icupkg: total length of item name strings too long\n"); - exit(U_BUFFER_OVERFLOW_ERROR); - } - - inItemStrings=inStrings+inStringTop; - ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); - exit(U_INVALID_FORMAT_ERROR); - } - inStringTop+=itemLength; - - // reset the Item entries - memset(items, 0, itemCount*sizeof(Item)); - - /* - * Get the common prefix of the items. - * New-style ICU .dat packages use tree separators ('/') between package names, - * tree names, and item names, - * while old-style ICU .dat packages (before multi-tree support) - * use an underscore ('_') between package and item names. - */ - offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; - s=inItemStrings+offset; // name of the first entry - int32_t prefixLength; - if(doAutoPrefix) { - // Use the first entry's prefix. Must be a new-style package. - const char *prefixLimit=strchr(s, U_TREE_ENTRY_SEP_CHAR); - if(prefixLimit==NULL) { - fprintf(stderr, - "icupkg: --auto_toc_prefix[_with_type] but " - "the first entry \"%s\" does not contain a '%c'\n", - s, U_TREE_ENTRY_SEP_CHAR); - exit(U_INVALID_FORMAT_ERROR); - } - prefixLength=(int32_t)(prefixLimit-s); - if(prefixLength==0 || prefixLength>=UPRV_LENGTHOF(pkgPrefix)) { - fprintf(stderr, - "icupkg: --auto_toc_prefix[_with_type] but " - "the prefix of the first entry \"%s\" is empty or too long\n", - s); - exit(U_INVALID_FORMAT_ERROR); - } - if(prefixEndsWithType && s[prefixLength-1]!=type) { - fprintf(stderr, - "icupkg: --auto_toc_prefix_with_type but " - "the prefix of the first entry \"%s\" does not end with '%c'\n", - s, type); - exit(U_INVALID_FORMAT_ERROR); - } - memcpy(pkgPrefix, s, prefixLength); - pkgPrefix[prefixLength]=0; - memcpy(prefix, s, ++prefixLength); // include the / - } else { - // Use the package basename as prefix. - int32_t inPkgNameLength= static_cast<int32_t>(strlen(inPkgName)); - memcpy(prefix, inPkgName, inPkgNameLength); - prefixLength=inPkgNameLength; - - if( (int32_t)strlen(s)>=(inPkgNameLength+2) && - 0==memcmp(s, inPkgName, inPkgNameLength) && - s[inPkgNameLength]=='_' - ) { - // old-style .dat package - prefix[prefixLength++]='_'; - } else { - // new-style .dat package - prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; - // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR - // then the test in the loop below will fail - } - } - prefix[prefixLength]=0; - - /* read the ToC table */ - for(i=0; i<itemCount; ++i) { - // skip the package part of the item name, error if it does not match the actual package name - // or if nothing follows the package name - offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; - s=inItemStrings+offset; - if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { - fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", - s, prefix); - exit(U_INVALID_FORMAT_ERROR); - } - items[i].name=s+prefixLength; - - // set the item's data - items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); - if(i>0) { - items[i-1].length=(int32_t)(items[i].data-items[i-1].data); - - // set the previous item's platform type - typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); - if(typeEnum<0 || U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); - exit(U_INVALID_FORMAT_ERROR); - } - items[i-1].type=makeTypeLetter(typeEnum); - } - items[i].isDataOwned=FALSE; - } - // set the last item's length - items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); - - // set the last item's platform type - typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); - if(typeEnum<0 || U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[itemCount-1].name, filename); - exit(U_INVALID_FORMAT_ERROR); - } - items[itemCount-1].type=makeTypeLetter(typeEnum); - - if(type!=U_ICUDATA_TYPE_LETTER[0]) { - // sort the item names for the local charset - sortItems(); - } - } - - udata_closeSwapper(ds); -} - -char -Package::getInType() { - return makeTypeLetter(inCharset, inIsBigEndian); -} - -void -Package::writePackage(const char *filename, char outType, const char *comment) { - char prefix[MAX_PKG_NAME_LENGTH+4]; - UDataOffsetTOCEntry entry; - UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT]; - FILE *file; - Item *pItem; - char *name; - UErrorCode errorCode; - int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32; - uint8_t outCharset; - UBool outIsBigEndian; - - extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH); - - // if there is an explicit comment, then use it, else use what's in the current header - if(comment!=NULL) { - /* get the header size minus the current comment */ - DataHeader *pHeader; - int32_t length; - - pHeader=(DataHeader *)header; - headerLength=4+pHeader->info.size; - length=(int32_t)strlen(comment); - if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) { - fprintf(stderr, "icupkg: comment too long\n"); - exit(U_BUFFER_OVERFLOW_ERROR); - } - memcpy(header+headerLength, comment, length+1); - headerLength+=length; - if(headerLength&0xf) { - /* NUL-pad the header to a multiple of 16 */ - length=(headerLength+0xf)&~0xf; - memset(header+headerLength, 0, length-headerLength); - headerLength=length; - } - pHeader->dataHeader.headerSize=(uint16_t)headerLength; - } - - makeTypeProps(outType, outCharset, outIsBigEndian); - - // open (TYPE_COUNT-2) swappers - // one is a no-op for local type==outType - // one type (TYPE_LE) is bogus - errorCode=U_ZERO_ERROR; - i=makeTypeEnum(outType); - ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); - ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); - ds[TYPE_LE]=NULL; - ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode)); - exit(errorCode); - } - for(i=0; i<TYPE_COUNT; ++i) { - if(ds[i]!=NULL) { - ds[i]->printError=printPackageError; - ds[i]->printErrorContext=stderr; - } - } - - dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)]; - - // create the file and write its contents - file=fopen(filename, "wb"); - if(file==NULL) { - fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - // swap and write the header - if(dsLocalToOut!=NULL) { - udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode)); - exit(errorCode); - } - } - length=(int32_t)fwrite(header, 1, headerLength, file); - if(length!=headerLength) { - fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - // prepare and swap the package name with a tree separator - // for prepending to item names - if(pkgPrefix[0]==0) { - prefixLength=(int32_t)strlen(prefix); - } else { - prefixLength=(int32_t)strlen(pkgPrefix); - memcpy(prefix, pkgPrefix, prefixLength); - if(prefixEndsWithType) { - prefix[prefixLength-1]=outType; - } - } - prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; - prefix[prefixLength]=0; - if(dsLocalToOut!=NULL) { - dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode)); - exit(errorCode); - } - - // swap and sort the item names (sorting needs to be done in the output charset) - dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode)); - exit(errorCode); - } - sortItems(); - } - - // create the output item names in sorted order, with the package name prepended to each - for(i=0; i<itemCount; ++i) { - length=(int32_t)strlen(items[i].name); - name=allocString(FALSE, length+prefixLength); - memcpy(name, prefix, prefixLength); - memcpy(name+prefixLength, items[i].name, length+1); - items[i].name=name; - } - - // calculate offsets for item names and items, pad to 16-align items - // align only the first item; each item's length is a multiple of 16 - basenameOffset=4+8*itemCount; - offset=basenameOffset+outStringTop; - if((length=(offset&15))!=0) { - length=16-length; - memset(allocString(FALSE, length-1), 0xaa, length); - offset+=length; - } - - // write the table of contents - // first the itemCount - outInt32=itemCount; - if(dsLocalToOut!=NULL) { - dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode)); - exit(errorCode); - } - } - length=(int32_t)fwrite(&outInt32, 1, 4, file); - if(length!=4) { - fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - // then write the item entries (and collect the maxItemLength) - maxItemLength=0; - for(i=0; i<itemCount; ++i) { - entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings)); - entry.dataOffset=(uint32_t)offset; - if(dsLocalToOut!=NULL) { - dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode)); - exit(errorCode); - } - } - length=(int32_t)fwrite(&entry, 1, 8, file); - if(length!=8) { - fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename); - exit(U_FILE_ACCESS_ERROR); - } - - length=items[i].length; - if(length>maxItemLength) { - maxItemLength=length; - } - offset+=length; - } - - // write the item names - length=(int32_t)fwrite(outStrings, 1, outStringTop, file); - if(length!=outStringTop) { - fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - // write the items - for(pItem=items, i=0; i<itemCount; ++pItem, ++i) { - int32_t type=makeTypeEnum(pItem->type); - if(ds[type]!=NULL) { - // swap each item from its platform properties to the desired ones - udata_swap( - ds[type], - pItem->data, pItem->length, pItem->data, - &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode)); - exit(errorCode); - } - } - length=(int32_t)fwrite(pItem->data, 1, pItem->length, file); - if(length!=pItem->length) { - fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename); - exit(U_FILE_ACCESS_ERROR); - } - } - - if(ferror(file)) { - fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - fclose(file); - for(i=0; i<TYPE_COUNT; ++i) { - udata_closeSwapper(ds[i]); - } -} - -int32_t -Package::findItem(const char *name, int32_t length) const { - int32_t i, start, limit; - int result; - - /* do a binary search for the string */ - start=0; - limit=itemCount; - while(start<limit) { - i=(start+limit)/2; - if(length>=0) { - result=strncmp(name, items[i].name, length); - } else { - result=strcmp(name, items[i].name); - } - - if(result==0) { - /* found */ - if(length>=0) { - /* - * if we compared just prefixes, then we may need to back up - * to the first item with this prefix - */ - while(i>0 && 0==strncmp(name, items[i-1].name, length)) { - --i; - } - } - return i; - } else if(result<0) { - limit=i; - } else /* result>0 */ { - start=i+1; - } - } - - return ~start; /* not found, return binary-not of the insertion point */ -} - -void -Package::findItems(const char *pattern) { - const char *wild; - - if(pattern==NULL || *pattern==0) { - findNextIndex=-1; - return; - } - - findPrefix=pattern; - findSuffix=NULL; - findSuffixLength=0; - - wild=strchr(pattern, '*'); - if(wild==NULL) { - // no wildcard - findPrefixLength=(int32_t)strlen(pattern); - } else { - // one wildcard - findPrefixLength=(int32_t)(wild-pattern); - findSuffix=wild+1; - findSuffixLength=(int32_t)strlen(findSuffix); - if(NULL!=strchr(findSuffix, '*')) { - // two or more wildcards - fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern); - exit(U_PARSE_ERROR); - } - } - - if(findPrefixLength==0) { - findNextIndex=0; - } else { - findNextIndex=findItem(findPrefix, findPrefixLength); - } -} - -int32_t -Package::findNextItem() { - const char *name, *middle, *treeSep; - int32_t idx, nameLength, middleLength; - - if(findNextIndex<0) { - return -1; - } - - while(findNextIndex<itemCount) { - idx=findNextIndex++; - name=items[idx].name; - nameLength=(int32_t)strlen(name); - if(nameLength<(findPrefixLength+findSuffixLength)) { - // item name too short for prefix & suffix - continue; - } - if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) { - // left the range of names with this prefix - break; - } - middle=name+findPrefixLength; - middleLength=nameLength-findPrefixLength-findSuffixLength; - if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) { - // suffix does not match - continue; - } - // prefix & suffix match - - if(matchMode&MATCH_NOSLASH) { - treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR); - if(treeSep!=NULL && (treeSep-middle)<middleLength) { - // the middle (matching the * wildcard) contains a tree separator / - continue; - } - } - - // found a matching item - return idx; - } - - // no more items - findNextIndex=-1; - return -1; -} - -void -Package::setMatchMode(uint32_t mode) { - matchMode=mode; -} - -void -Package::addItem(const char *name) { - addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]); -} - -void -Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) { - int32_t idx; - - idx=findItem(name); - if(idx<0) { - // new item, make space at the insertion point - ensureItemCapacity(); - // move the following items down - idx=~idx; - if(idx<itemCount) { - memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item)); - } - ++itemCount; - - // reset this Item entry - memset(items+idx, 0, sizeof(Item)); - - // copy the item's name - items[idx].name=allocString(TRUE, static_cast<int32_t>(strlen(name))); - strcpy(items[idx].name, name); - pathToTree(items[idx].name); - } else { - // same-name item found, replace it - if(items[idx].isDataOwned) { - uprv_free(items[idx].data); - } - - // keep the item's name since it is the same - } - - // set the item's data - items[idx].data=data; - items[idx].length=length; - items[idx].isDataOwned=isDataOwned; - items[idx].type=type; -} - -void -Package::addFile(const char *filesPath, const char *name) { - uint8_t *data; - int32_t length; - char type; - - data=readFile(filesPath, name, length, type); - // readFile() exits the tool if it fails - addItem(name, data, length, TRUE, type); -} - -void -Package::addItems(const Package &listPkg) { - const Item *pItem; - int32_t i; - - for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { - addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type); - } -} - -void -Package::removeItem(int32_t idx) { - if(idx>=0) { - // remove the item - if(items[idx].isDataOwned) { - uprv_free(items[idx].data); - } - - // move the following items up - if((idx+1)<itemCount) { - memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item)); - } - --itemCount; - - if(idx<=findNextIndex) { - --findNextIndex; - } - } -} - -void -Package::removeItems(const char *pattern) { - int32_t idx; - - findItems(pattern); - while((idx=findNextItem())>=0) { - removeItem(idx); - } -} - -void -Package::removeItems(const Package &listPkg) { - const Item *pItem; - int32_t i; - - for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { - removeItems(pItem->name); - } -} - -void -Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) { - char filename[1024]; - UDataSwapper *ds; - FILE *file; - Item *pItem; - int32_t fileLength; - uint8_t itemCharset, outCharset; - UBool itemIsBigEndian, outIsBigEndian; - - if(idx<0 || itemCount<=idx) { - return; - } - pItem=items+idx; - - // swap the data to the outType - // outType==0: don't swap - if(outType!=0 && pItem->type!=outType) { - // open the swapper - UErrorCode errorCode=U_ZERO_ERROR; - makeTypeProps(pItem->type, itemCharset, itemIsBigEndian); - makeTypeProps(outType, outCharset, outIsBigEndian); - ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n", - (long)idx, u_errorName(errorCode)); - exit(errorCode); - } - - ds->printError=printPackageError; - ds->printErrorContext=stderr; - - // swap the item from its platform properties to the desired ones - udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode)); - exit(errorCode); - } - udata_closeSwapper(ds); - pItem->type=outType; - } - - // create the file and write its contents - makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename)); - file=fopen(filename, "wb"); - if(file==NULL) { - fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file); - - if(ferror(file) || fileLength!=pItem->length) { - fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - fclose(file); -} - -void -Package::extractItem(const char *filesPath, int32_t idx, char outType) { - extractItem(filesPath, items[idx].name, idx, outType); -} - -void -Package::extractItems(const char *filesPath, const char *pattern, char outType) { - int32_t idx; - - findItems(pattern); - while((idx=findNextItem())>=0) { - extractItem(filesPath, idx, outType); - } -} - -void -Package::extractItems(const char *filesPath, const Package &listPkg, char outType) { - const Item *pItem; - int32_t i; - - for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { - extractItems(filesPath, pItem->name, outType); - } -} - -int32_t -Package::getItemCount() const { - return itemCount; -} - -const Item * -Package::getItem(int32_t idx) const { - if (0 <= idx && idx < itemCount) { - return &items[idx]; - } - return NULL; -} - -void -Package::checkDependency(void *context, const char *itemName, const char *targetName) { - // check dependency: make sure the target item is in the package - Package *me=(Package *)context; - if(me->findItem(targetName)<0) { - me->isMissingItems=TRUE; - fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName); - } -} - -UBool -Package::checkDependencies() { - isMissingItems=FALSE; - enumDependencies(this, checkDependency); - return (UBool)!isMissingItems; -} - -void -Package::enumDependencies(void *context, CheckDependency check) { - int32_t i; - - for(i=0; i<itemCount; ++i) { - enumDependencies(items+i, context, check); - } -} - -char * -Package::allocString(UBool in, int32_t length) { - char *p; - int32_t top; - - if(in) { - top=inStringTop; - p=inStrings+top; - } else { - top=outStringTop; - p=outStrings+top; - } - top+=length+1; - - if(top>STRING_STORE_SIZE) { - fprintf(stderr, "icupkg: string storage overflow\n"); - exit(U_BUFFER_OVERFLOW_ERROR); - } - if(in) { - inStringTop=top; - } else { - outStringTop=top; - } - return p; -} - -void -Package::sortItems() { - UErrorCode errorCode=U_ZERO_ERROR; - uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode)); - exit(errorCode); - } -} - -void Package::setItemCapacity(int32_t max) -{ - if(max<=itemMax) { - return; - } - Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0])); - Item *oldItems = items; - if(newItems == NULL) { - fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n", - (unsigned long)(max*sizeof(items[0])), max); - exit(U_MEMORY_ALLOCATION_ERROR); - } - if(items && itemCount>0) { - uprv_memcpy(newItems, items, (size_t)itemCount*sizeof(items[0])); - } - itemMax = max; - items = newItems; - uprv_free(oldItems); -} - -void Package::ensureItemCapacity() -{ - if((itemCount+1)>itemMax) { - setItemCapacity(itemCount+kItemsChunk); - } -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/tools/toolutil/package.h b/deps/node/deps/icu-small/source/tools/toolutil/package.h deleted file mode 100644 index 3263c84f..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/package.h +++ /dev/null @@ -1,201 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2005-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: package.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2005aug25 -* created by: Markus W. Scherer -* -* Read, modify, and write ICU .dat data package files. -*/ - -#ifndef __PACKAGE_H__ -#define __PACKAGE_H__ - -#include "unicode/utypes.h" - -#include <stdio.h> - -// .dat package file representation ---------------------------------------- *** - -#define STRING_STORE_SIZE 100000 -#define MAX_PKG_NAME_LENGTH 64 - -typedef void CheckDependency(void *context, const char *itemName, const char *targetName); - -U_NAMESPACE_BEGIN - -struct Item { - char *name; - uint8_t *data; - int32_t length; - UBool isDataOwned; - char type; -}; - -class U_TOOLUTIL_API Package { -public: - /* - * Constructor. - * Prepare this object for a new, empty package. - */ - Package(); - - /* Destructor. */ - ~Package(); - - /** - * Uses the prefix of the first entry of the package in readPackage(), - * rather than the package basename. - */ - void setAutoPrefix() { doAutoPrefix=TRUE; } - /** - * Same as setAutoPrefix(), plus the prefix must end with the platform type letter. - */ - void setAutoPrefixWithType() { - doAutoPrefix=TRUE; - prefixEndsWithType=TRUE; - } - void setPrefix(const char *p); - - /* - * Read an existing .dat package file. - * The header and item name strings are swapped into this object, - * but the items are left unswapped. - */ - void readPackage(const char *filename); - /* - * Write a .dat package file with the items in this object. - * Swap all pieces to the desired output platform properties. - * The package becomes unusable: - * The item names are swapped and sorted in the outCharset rather than the local one. - * Also, the items themselves are swapped in-place - */ - void writePackage(const char *filename, char outType, const char *comment); - - /* - * Return the input data type letter (l, b, or e). - */ - char getInType(); - - // find the item in items[], return the non-negative index if found, else the binary-not of the insertion point - int32_t findItem(const char *name, int32_t length=-1) const; - - /* - * Set internal state for following calls to findNextItem() which will return - * indexes for items whose names match the pattern. - */ - void findItems(const char *pattern); - int32_t findNextItem(); - /* - * Set the match mode for findItems() & findNextItem(). - * @param mode 0=default - * MATCH_NOSLASH * does not match a '/' - */ - void setMatchMode(uint32_t mode); - - enum { - MATCH_NOSLASH=1 - }; - - void addItem(const char *name); - void addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type); - void addFile(const char *filesPath, const char *name); - void addItems(const Package &listPkg); - - void removeItem(int32_t itemIndex); - void removeItems(const char *pattern); - void removeItems(const Package &listPkg); - - /* The extractItem() functions accept outputType=0 to mean "don't swap the item". */ - void extractItem(const char *filesPath, int32_t itemIndex, char outType); - void extractItems(const char *filesPath, const char *pattern, char outType); - void extractItems(const char *filesPath, const Package &listPkg, char outType); - - /* This variant extracts an item to a specific filename. */ - void extractItem(const char *filesPath, const char *outName, int32_t itemIndex, char outType); - - int32_t getItemCount() const; - const Item *getItem(int32_t idx) const; - - /* - * Check dependencies and return TRUE if all dependencies are fulfilled. - */ - UBool checkDependencies(); - - /* - * Enumerate all the dependencies and give the results to context and call CheckDependency callback - * @param context user context (will be passed to check function) - * @param check will be called with context and any missing items - */ - void enumDependencies(void *context, CheckDependency check); - -private: - void enumDependencies(Item *pItem, void *context, CheckDependency check); - - /** - * Default CheckDependency function used by checkDependencies() - */ - static void checkDependency(void *context, const char *itemName, const char *targetName); - - /* - * Allocate a string in inStrings or outStrings. - * The length does not include the terminating NUL. - */ - char *allocString(UBool in, int32_t length); - - void sortItems(); - - // data fields - char inPkgName[MAX_PKG_NAME_LENGTH]; - char pkgPrefix[MAX_PKG_NAME_LENGTH]; - - uint8_t *inData; - uint8_t header[1024]; - int32_t inLength, headerLength; - uint8_t inCharset; - UBool inIsBigEndian; - UBool doAutoPrefix; - UBool prefixEndsWithType; - - int32_t itemCount; - int32_t itemMax; - Item *items; - - int32_t inStringTop, outStringTop; - char inStrings[STRING_STORE_SIZE], outStrings[STRING_STORE_SIZE]; - - // match mode for findItems(pattern) and findNextItem() - uint32_t matchMode; - - // state for findItems(pattern) and findNextItem() - const char *findPrefix, *findSuffix; - int32_t findPrefixLength, findSuffixLength; - int32_t findNextIndex; - - // state for checkDependencies() - UBool isMissingItems; - - /** - * Grow itemMax to new value - */ - void setItemCapacity(int32_t max); - - /** - * Grow itemMax to at least itemCount+1 - */ - void ensureItemCapacity(); -}; - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/pkg_genc.cpp b/deps/node/deps/icu-small/source/tools/toolutil/pkg_genc.cpp deleted file mode 100644 index 5ab0d846..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/pkg_genc.cpp +++ /dev/null @@ -1,1214 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/****************************************************************************** - * Copyright (C) 2009-2016, International Business Machines - * Corporation and others. All Rights Reserved. - ******************************************************************************* - */ -#include "unicode/utypes.h" - -#if U_PLATFORM_HAS_WIN32_API -# define VC_EXTRALEAN -# define WIN32_LEAN_AND_MEAN -# define NOUSER -# define NOSERVICE -# define NOIME -# define NOMCX -#include <windows.h> -#include <time.h> -# ifdef __GNUC__ -# define WINDOWS_WITH_GNUC -# endif -#endif - -#if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H -# define U_ELF -#endif - -#ifdef U_ELF -# include <elf.h> -# if defined(ELFCLASS64) -# define U_ELF64 -# endif - /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */ -# ifndef EM_X86_64 -# define EM_X86_64 62 -# endif -# define ICU_ENTRY_OFFSET 0 -#endif - -#include <stdio.h> -#include <stdlib.h> -#include "unicode/putil.h" -#include "cmemory.h" -#include "cstring.h" -#include "filestrm.h" -#include "toolutil.h" -#include "unicode/uclean.h" -#include "uoptions.h" -#include "pkg_genc.h" -#include "filetools.h" - -#define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU)) - -#define HEX_0X 0 /* 0x1234 */ -#define HEX_0H 1 /* 01234h */ - -/* prototypes --------------------------------------------------------------- */ -static void -getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename); - -static uint32_t -write8(FileStream *out, uint8_t byte, uint32_t column); - -static uint32_t -write32(FileStream *out, uint32_t byte, uint32_t column); - -#if U_PLATFORM == U_PF_OS400 -static uint32_t -write8str(FileStream *out, uint8_t byte, uint32_t column); -#endif -/* -------------------------------------------------------------------------- */ - -/* -Creating Template Files for New Platforms - -Let the cc compiler help you get started. -Compile this program - const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16}; -with the -S option to produce assembly output. - -For example, this will generate array.s: -gcc -S array.c - -This will produce a .s file that may look like this: - - .file "array.c" - .version "01.01" -gcc2_compiled.: - .globl x - .section .rodata - .align 4 - .type x,@object - .size x,20 -x: - .long 1 - .long 2 - .long -559038737 - .long -1 - .long 16 - .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)" - -which gives a starting point that will compile, and can be transformed -to become the template, generally with some consulting of as docs and -some experimentation. - -If you want ICU to automatically use this assembly, you should -specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file, -where the name is the compiler or platform that you used in this -assemblyHeader data structure. -*/ -static const struct AssemblyType { - const char *name; - const char *header; - const char *beginLine; - const char *footer; - int8_t hexType; /* HEX_0X or HEX_0h */ -} assemblyHeader[] = { - /* For gcc assemblers, the meaning of .align changes depending on the */ - /* hardware, so we use .balign 16 which always means 16 bytes. */ - /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */ - {"gcc", - ".globl %s\n" - "\t.section .note.GNU-stack,\"\",%%progbits\n" - "\t.section .rodata\n" - "\t.balign 16\n" - "#ifdef U_HIDE_DATA_SYMBOL\n" - "\t.hidden %s\n" - "#endif\n" - "\t.type %s,%%object\n" - "%s:\n\n", - - ".long ",".size %s, .-%s\n",HEX_0X - }, - {"gcc-darwin", - /*"\t.section __TEXT,__text,regular,pure_instructions\n" - "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/ - ".globl _%s\n" - "#ifdef U_HIDE_DATA_SYMBOL\n" - "\t.private_extern _%s\n" - "#endif\n" - "\t.data\n" - "\t.const\n" - "\t.balign 16\n" - "_%s:\n\n", - - ".long ","",HEX_0X - }, - {"gcc-cygwin", - ".globl _%s\n" - "\t.section .rodata\n" - "\t.balign 16\n" - "_%s:\n\n", - - ".long ","",HEX_0X - }, - {"gcc-mingw64", - ".globl %s\n" - "\t.section .rodata\n" - "\t.balign 16\n" - "%s:\n\n", - - ".long ","",HEX_0X - }, -/* 16 bytes alignment. */ -/* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */ - {"sun", - "\t.section \".rodata\"\n" - "\t.align 16\n" - ".globl %s\n" - "%s:\n", - - ".word ","",HEX_0X - }, -/* 16 bytes alignment for sun-x86. */ -/* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */ - {"sun-x86", - "Drodata.rodata:\n" - "\t.type Drodata.rodata,@object\n" - "\t.size Drodata.rodata,0\n" - "\t.globl %s\n" - "\t.align 16\n" - "%s:\n", - - ".4byte ","",HEX_0X - }, -/* 1<<4 bit alignment for aix. */ -/* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */ - {"xlc", - ".globl %s{RO}\n" - "\t.toc\n" - "%s:\n" - "\t.csect %s{RO}, 4\n", - - ".long ","",HEX_0X - }, - {"aCC-ia64", - "\t.file \"%s.s\"\n" - "\t.type %s,@object\n" - "\t.global %s\n" - "\t.secalias .abe$0.rodata, \".rodata\"\n" - "\t.section .abe$0.rodata = \"a\", \"progbits\"\n" - "\t.align 16\n" - "%s::\t", - - "data4 ","",HEX_0X - }, - {"aCC-parisc", - "\t.SPACE $TEXT$\n" - "\t.SUBSPA $LIT$\n" - "%s\n" - "\t.EXPORT %s\n" - "\t.ALIGN 16\n", - - ".WORD ","",HEX_0X - }, -/* align 16 bytes */ -/* http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */ - { "masm", - "\tTITLE %s\n" - "; generated by genccode\n" - ".386\n" - ".model flat\n" - "\tPUBLIC _%s\n" - "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n" - "\tALIGN 16\n" - "_%s\tLABEL DWORD\n", - "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H - } -}; - -static int32_t assemblyHeaderIndex = -1; -static int32_t hexType = HEX_0X; - -U_CAPI UBool U_EXPORT2 -checkAssemblyHeaderName(const char* optAssembly) { - int32_t idx; - assemblyHeaderIndex = -1; - for (idx = 0; idx < UPRV_LENGTHOF(assemblyHeader); idx++) { - if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) { - assemblyHeaderIndex = idx; - hexType = assemblyHeader[idx].hexType; /* set the hex type */ - return TRUE; - } - } - - return FALSE; -} - - -U_CAPI void U_EXPORT2 -printAssemblyHeadersToStdErr(void) { - int32_t idx; - fprintf(stderr, "%s", assemblyHeader[0].name); - for (idx = 1; idx < UPRV_LENGTHOF(assemblyHeader); idx++) { - fprintf(stderr, ", %s", assemblyHeader[idx].name); - } - fprintf(stderr, - ")\n"); -} - -U_CAPI void U_EXPORT2 -writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) { - uint32_t column = MAX_COLUMN; - char entry[64]; - uint32_t buffer[1024]; - char *bufferStr = (char *)buffer; - FileStream *in, *out; - size_t i, length; - - in=T_FileStream_open(filename, "rb"); - if(in==NULL) { - fprintf(stderr, "genccode: unable to open input file %s\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename); - out=T_FileStream_open(bufferStr, "w"); - if(out==NULL) { - fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr); - exit(U_FILE_ACCESS_ERROR); - } - - if (outFilePath != NULL) { - uprv_strcpy(outFilePath, bufferStr); - } - -#if defined (WINDOWS_WITH_GNUC) && U_PLATFORM != U_PF_CYGWIN - /* Need to fix the file separator character when using MinGW. */ - swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/'); -#endif - - if(optEntryPoint != NULL) { - uprv_strcpy(entry, optEntryPoint); - uprv_strcat(entry, "_dat"); - } - - /* turn dashes or dots in the entry name into underscores */ - length=uprv_strlen(entry); - for(i=0; i<length; ++i) { - if(entry[i]=='-' || entry[i]=='.') { - entry[i]='_'; - } - } - - sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header, - entry, entry, entry, entry, - entry, entry, entry, entry); - T_FileStream_writeLine(out, bufferStr); - T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine); - - for(;;) { - length=T_FileStream_read(in, buffer, sizeof(buffer)); - if(length==0) { - break; - } - if (length != sizeof(buffer)) { - /* pad with extra 0's when at the end of the file */ - for(i=0; i < (length % sizeof(uint32_t)); ++i) { - buffer[length+i] = 0; - } - } - for(i=0; i<(length/sizeof(buffer[0])); i++) { - column = write32(out, buffer[i], column); - } - } - - T_FileStream_writeLine(out, "\n"); - - sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer, - entry, entry, entry, entry, - entry, entry, entry, entry); - T_FileStream_writeLine(out, bufferStr); - - if(T_FileStream_error(in)) { - fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - if(T_FileStream_error(out)) { - fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - T_FileStream_close(out); - T_FileStream_close(in); -} - -U_CAPI void U_EXPORT2 -writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) { - uint32_t column = MAX_COLUMN; - char buffer[4096], entry[64]; - FileStream *in, *out; - size_t i, length; - - in=T_FileStream_open(filename, "rb"); - if(in==NULL) { - fprintf(stderr, "genccode: unable to open input file %s\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - if(optName != NULL) { /* prepend 'icudt28_' */ - strcpy(entry, optName); - strcat(entry, "_"); - } else { - entry[0] = 0; - } - - getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename); - if (outFilePath != NULL) { - uprv_strcpy(outFilePath, buffer); - } - out=T_FileStream_open(buffer, "w"); - if(out==NULL) { - fprintf(stderr, "genccode: unable to open output file %s\n", buffer); - exit(U_FILE_ACCESS_ERROR); - } - - /* turn dashes or dots in the entry name into underscores */ - length=uprv_strlen(entry); - for(i=0; i<length; ++i) { - if(entry[i]=='-' || entry[i]=='.') { - entry[i]='_'; - } - } - -#if U_PLATFORM == U_PF_OS400 - /* - TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c - - This is here because this platform can't currently put - const data into the read-only pages of an object or - shared library (service program). Only strings are allowed in read-only - pages, so we use char * strings to store the data. - - In order to prevent the beginning of the data from ever matching the - magic numbers we must still use the initial double. - [grhoten 4/24/2003] - */ - sprintf(buffer, - "#ifndef IN_GENERATED_CCODE\n" - "#define IN_GENERATED_CCODE\n" - "#define U_DISABLE_RENAMING 1\n" - "#include \"unicode/umachine.h\"\n" - "#endif\n" - "U_CDECL_BEGIN\n" - "const struct {\n" - " double bogus;\n" - " const char *bytes; \n" - "} %s={ 0.0, \n", - entry); - T_FileStream_writeLine(out, buffer); - - for(;;) { - length=T_FileStream_read(in, buffer, sizeof(buffer)); - if(length==0) { - break; - } - for(i=0; i<length; ++i) { - column = write8str(out, (uint8_t)buffer[i], column); - } - } - - T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n"); -#else - /* Function renaming shouldn't be done in data */ - sprintf(buffer, - "#ifndef IN_GENERATED_CCODE\n" - "#define IN_GENERATED_CCODE\n" - "#define U_DISABLE_RENAMING 1\n" - "#include \"unicode/umachine.h\"\n" - "#endif\n" - "U_CDECL_BEGIN\n" - "const struct {\n" - " double bogus;\n" - " uint8_t bytes[%ld]; \n" - "} %s={ 0.0, {\n", - (long)T_FileStream_size(in), entry); - T_FileStream_writeLine(out, buffer); - - for(;;) { - length=T_FileStream_read(in, buffer, sizeof(buffer)); - if(length==0) { - break; - } - for(i=0; i<length; ++i) { - column = write8(out, (uint8_t)buffer[i], column); - } - } - - T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n"); -#endif - - if(T_FileStream_error(in)) { - fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - if(T_FileStream_error(out)) { - fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - T_FileStream_close(out); - T_FileStream_close(in); -} - -static uint32_t -write32(FileStream *out, uint32_t bitField, uint32_t column) { - int32_t i; - char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */ - char *s = bitFieldStr; - uint8_t *ptrIdx = (uint8_t *)&bitField; - static const char hexToStr[16] = { - '0','1','2','3', - '4','5','6','7', - '8','9','A','B', - 'C','D','E','F' - }; - - /* write the value, possibly with comma and newline */ - if(column==MAX_COLUMN) { - /* first byte */ - column=1; - } else if(column<32) { - *(s++)=','; - ++column; - } else { - *(s++)='\n'; - uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine); - s+=uprv_strlen(s); - column=1; - } - - if (bitField < 10) { - /* It's a small number. Don't waste the space for 0x */ - *(s++)=hexToStr[bitField]; - } - else { - int seenNonZero = 0; /* This is used to remove leading zeros */ - - if(hexType==HEX_0X) { - *(s++)='0'; - *(s++)='x'; - } else if(hexType==HEX_0H) { - *(s++)='0'; - } - - /* This creates a 32-bit field */ -#if U_IS_BIG_ENDIAN - for (i = 0; i < sizeof(uint32_t); i++) -#else - for (i = sizeof(uint32_t)-1; i >= 0 ; i--) -#endif - { - uint8_t value = ptrIdx[i]; - if (value || seenNonZero) { - *(s++)=hexToStr[value>>4]; - *(s++)=hexToStr[value&0xF]; - seenNonZero = 1; - } - } - if(hexType==HEX_0H) { - *(s++)='h'; - } - } - - *(s++)=0; - T_FileStream_writeLine(out, bitFieldStr); - return column; -} - -static uint32_t -write8(FileStream *out, uint8_t byte, uint32_t column) { - char s[4]; - int i=0; - - /* convert the byte value to a string */ - if(byte>=100) { - s[i++]=(char)('0'+byte/100); - byte%=100; - } - if(i>0 || byte>=10) { - s[i++]=(char)('0'+byte/10); - byte%=10; - } - s[i++]=(char)('0'+byte); - s[i]=0; - - /* write the value, possibly with comma and newline */ - if(column==MAX_COLUMN) { - /* first byte */ - column=1; - } else if(column<16) { - T_FileStream_writeLine(out, ","); - ++column; - } else { - T_FileStream_writeLine(out, ",\n"); - column=1; - } - T_FileStream_writeLine(out, s); - return column; -} - -#if U_PLATFORM == U_PF_OS400 -static uint32_t -write8str(FileStream *out, uint8_t byte, uint32_t column) { - char s[8]; - - if (byte > 7) - sprintf(s, "\\x%X", byte); - else - sprintf(s, "\\%X", byte); - - /* write the value, possibly with comma and newline */ - if(column==MAX_COLUMN) { - /* first byte */ - column=1; - T_FileStream_writeLine(out, "\""); - } else if(column<24) { - ++column; - } else { - T_FileStream_writeLine(out, "\"\n\""); - column=1; - } - T_FileStream_writeLine(out, s); - return column; -} -#endif - -static void -getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) { - const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.'); - - /* copy path */ - if(destdir!=NULL && *destdir!=0) { - do { - *outFilename++=*destdir++; - } while(*destdir!=0); - if(*(outFilename-1)!=U_FILE_SEP_CHAR) { - *outFilename++=U_FILE_SEP_CHAR; - } - inFilename=basename; - } else { - while(inFilename<basename) { - *outFilename++=*inFilename++; - } - } - - if(suffix==NULL) { - /* the filename does not have a suffix */ - uprv_strcpy(entryName, inFilename); - if(optFilename != NULL) { - uprv_strcpy(outFilename, optFilename); - } else { - uprv_strcpy(outFilename, inFilename); - } - uprv_strcat(outFilename, newSuffix); - } else { - char *saveOutFilename = outFilename; - /* copy basename */ - while(inFilename<suffix) { - if(*inFilename=='-') { - /* iSeries cannot have '-' in the .o objects. */ - *outFilename++=*entryName++='_'; - inFilename++; - } - else { - *outFilename++=*entryName++=*inFilename++; - } - } - - /* replace '.' by '_' */ - *outFilename++=*entryName++='_'; - ++inFilename; - - /* copy suffix */ - while(*inFilename!=0) { - *outFilename++=*entryName++=*inFilename++; - } - - *entryName=0; - - if(optFilename != NULL) { - uprv_strcpy(saveOutFilename, optFilename); - uprv_strcat(saveOutFilename, newSuffix); - } else { - /* add ".c" */ - uprv_strcpy(outFilename, newSuffix); - } - } -} - -#ifdef CAN_GENERATE_OBJECTS -static void -getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) { - union { - char bytes[2048]; -#ifdef U_ELF - Elf32_Ehdr header32; - /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */ -#elif U_PLATFORM_HAS_WIN32_API - IMAGE_FILE_HEADER header; -#endif - } buffer; - - const char *filename; - FileStream *in; - int32_t length; - -#ifdef U_ELF - -#elif U_PLATFORM_HAS_WIN32_API - const IMAGE_FILE_HEADER *pHeader; -#else -# error "Unknown platform for CAN_GENERATE_OBJECTS." -#endif - - if(optMatchArch != NULL) { - filename=optMatchArch; - } else { - /* set defaults */ -#ifdef U_ELF - /* set EM_386 because elf.h does not provide better defaults */ - *pCPU=EM_386; - *pBits=32; - *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB); -#elif U_PLATFORM_HAS_WIN32_API -/* _M_IA64 should be defined in windows.h */ -# if defined(_M_IA64) - *pCPU=IMAGE_FILE_MACHINE_IA64; - *pBits = 64; -# elif defined(_M_AMD64) -// link.exe does not really care about the .obj machine type and this will -// allow us to build a dll for both ARM & x64 with an amd64 built tool -// ARM is same as x64 except for first 2 bytes of object file - *pCPU = IMAGE_FILE_MACHINE_UNKNOWN; - // *pCPU = IMAGE_FILE_MACHINE_ARMNT; // If we wanted to be explicit - // *pCPU = IMAGE_FILE_MACHINE_AMD64; // We would use one of these names - *pBits = 64; // Doesn't seem to be used for anything interesting? -# else - *pCPU=IMAGE_FILE_MACHINE_I386; // We would use one of these names - *pBits = 32; -# endif - *pIsBigEndian=FALSE; -#else -# error "Unknown platform for CAN_GENERATE_OBJECTS." -#endif - return; - } - - in=T_FileStream_open(filename, "rb"); - if(in==NULL) { - fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes)); - -#ifdef U_ELF - if(length<(int32_t)sizeof(Elf32_Ehdr)) { - fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); - exit(U_UNSUPPORTED_ERROR); - } - if( - buffer.header32.e_ident[0]!=ELFMAG0 || - buffer.header32.e_ident[1]!=ELFMAG1 || - buffer.header32.e_ident[2]!=ELFMAG2 || - buffer.header32.e_ident[3]!=ELFMAG3 || - buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64 - ) { - fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename); - exit(U_UNSUPPORTED_ERROR); - } - - *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */ -#ifdef U_ELF64 - if(*pBits!=32 && *pBits!=64) { - fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n"); - exit(U_UNSUPPORTED_ERROR); - } -#else - if(*pBits!=32) { - fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n"); - exit(U_UNSUPPORTED_ERROR); - } -#endif - - *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB); - if(*pIsBigEndian!=U_IS_BIG_ENDIAN) { - fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n"); - exit(U_UNSUPPORTED_ERROR); - } - /* TODO: Support byte swapping */ - - *pCPU=buffer.header32.e_machine; -#elif U_PLATFORM_HAS_WIN32_API - if(length<sizeof(IMAGE_FILE_HEADER)) { - fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); - exit(U_UNSUPPORTED_ERROR); - } - /* TODO: Use buffer.header. Keep aliasing legal. */ - pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes; - *pCPU=pHeader->Machine; - /* - * The number of bits is implicit with the Machine value. - * *pBits is ignored in the calling code, so this need not be precise. - */ - *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; - /* Windows always runs on little-endian CPUs. */ - *pIsBigEndian=FALSE; -#else -# error "Unknown platform for CAN_GENERATE_OBJECTS." -#endif - - T_FileStream_close(in); -} - -U_CAPI void U_EXPORT2 -writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) { - /* common variables */ - char buffer[4096], entry[96]={ 0 }; - FileStream *in, *out; - const char *newSuffix; - int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0; - - uint16_t cpu, bits; - UBool makeBigEndian; - - /* platform-specific variables and initialization code */ -#ifdef U_ELF - /* 32-bit Elf file header */ - static Elf32_Ehdr header32={ - { - /* e_ident[] */ - ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, - ELFCLASS32, - U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, - EV_CURRENT /* EI_VERSION */ - }, - ET_REL, - EM_386, - EV_CURRENT, /* e_version */ - 0, /* e_entry */ - 0, /* e_phoff */ - (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */ - 0, /* e_flags */ - (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */ - 0, /* e_phentsize */ - 0, /* e_phnum */ - (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */ - 5, /* e_shnum */ - 2 /* e_shstrndx */ - }; - - /* 32-bit Elf section header table */ - static Elf32_Shdr sectionHeaders32[5]={ - { /* SHN_UNDEF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }, - { /* .symtab */ - 1, /* sh_name */ - SHT_SYMTAB, - 0, /* sh_flags */ - 0, /* sh_addr */ - (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */ - (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */ - 3, /* sh_link=sect hdr index of .strtab */ - 1, /* sh_info=One greater than the symbol table index of the last - * local symbol (with STB_LOCAL). */ - 4, /* sh_addralign */ - (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */ - }, - { /* .shstrtab */ - 9, /* sh_name */ - SHT_STRTAB, - 0, /* sh_flags */ - 0, /* sh_addr */ - (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */ - 40, /* sh_size */ - 0, /* sh_link */ - 0, /* sh_info */ - 1, /* sh_addralign */ - 0 /* sh_entsize */ - }, - { /* .strtab */ - 19, /* sh_name */ - SHT_STRTAB, - 0, /* sh_flags */ - 0, /* sh_addr */ - (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */ - (Elf32_Word)sizeof(entry), /* sh_size */ - 0, /* sh_link */ - 0, /* sh_info */ - 1, /* sh_addralign */ - 0 /* sh_entsize */ - }, - { /* .rodata */ - 27, /* sh_name */ - SHT_PROGBITS, - SHF_ALLOC, /* sh_flags */ - 0, /* sh_addr */ - (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */ - 0, /* sh_size */ - 0, /* sh_link */ - 0, /* sh_info */ - 16, /* sh_addralign */ - 0 /* sh_entsize */ - } - }; - - /* symbol table */ - static Elf32_Sym symbols32[2]={ - { /* STN_UNDEF */ - 0, 0, 0, 0, 0, 0 - }, - { /* data entry point */ - 1, /* st_name */ - 0, /* st_value */ - 0, /* st_size */ - ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), - 0, /* st_other */ - 4 /* st_shndx=index of related section table entry */ - } - }; - - /* section header string table, with decimal string offsets */ - static const char sectionStrings[40]= - /* 0 */ "\0" - /* 1 */ ".symtab\0" - /* 9 */ ".shstrtab\0" - /* 19 */ ".strtab\0" - /* 27 */ ".rodata\0" - /* 35 */ "\0\0\0\0"; /* contains terminating NUL */ - /* 40: padded to multiple of 8 bytes */ - - /* - * Use entry[] for the string table which will contain only the - * entry point name. - * entry[0] must be 0 (NUL) - * The entry point name can be up to 38 characters long (sizeof(entry)-2). - */ - - /* 16-align .rodata in the .o file, just in case */ - static const char padding[16]={ 0 }; - int32_t paddingSize; - -#ifdef U_ELF64 - /* 64-bit Elf file header */ - static Elf64_Ehdr header64={ - { - /* e_ident[] */ - ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, - ELFCLASS64, - U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, - EV_CURRENT /* EI_VERSION */ - }, - ET_REL, - EM_X86_64, - EV_CURRENT, /* e_version */ - 0, /* e_entry */ - 0, /* e_phoff */ - (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */ - 0, /* e_flags */ - (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */ - 0, /* e_phentsize */ - 0, /* e_phnum */ - (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */ - 5, /* e_shnum */ - 2 /* e_shstrndx */ - }; - - /* 64-bit Elf section header table */ - static Elf64_Shdr sectionHeaders64[5]={ - { /* SHN_UNDEF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }, - { /* .symtab */ - 1, /* sh_name */ - SHT_SYMTAB, - 0, /* sh_flags */ - 0, /* sh_addr */ - (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */ - (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */ - 3, /* sh_link=sect hdr index of .strtab */ - 1, /* sh_info=One greater than the symbol table index of the last - * local symbol (with STB_LOCAL). */ - 4, /* sh_addralign */ - (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */ - }, - { /* .shstrtab */ - 9, /* sh_name */ - SHT_STRTAB, - 0, /* sh_flags */ - 0, /* sh_addr */ - (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */ - 40, /* sh_size */ - 0, /* sh_link */ - 0, /* sh_info */ - 1, /* sh_addralign */ - 0 /* sh_entsize */ - }, - { /* .strtab */ - 19, /* sh_name */ - SHT_STRTAB, - 0, /* sh_flags */ - 0, /* sh_addr */ - (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */ - (Elf64_Xword)sizeof(entry), /* sh_size */ - 0, /* sh_link */ - 0, /* sh_info */ - 1, /* sh_addralign */ - 0 /* sh_entsize */ - }, - { /* .rodata */ - 27, /* sh_name */ - SHT_PROGBITS, - SHF_ALLOC, /* sh_flags */ - 0, /* sh_addr */ - (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */ - 0, /* sh_size */ - 0, /* sh_link */ - 0, /* sh_info */ - 16, /* sh_addralign */ - 0 /* sh_entsize */ - } - }; - - /* - * 64-bit symbol table - * careful: different order of items compared with Elf32_sym! - */ - static Elf64_Sym symbols64[2]={ - { /* STN_UNDEF */ - 0, 0, 0, 0, 0, 0 - }, - { /* data entry point */ - 1, /* st_name */ - ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), - 0, /* st_other */ - 4, /* st_shndx=index of related section table entry */ - 0, /* st_value */ - 0 /* st_size */ - } - }; - -#endif /* U_ELF64 */ - - /* entry[] have a leading NUL */ - entryOffset=1; - - /* in the common code, count entryLength from after the NUL */ - entryLengthOffset=1; - - newSuffix=".o"; - -#elif U_PLATFORM_HAS_WIN32_API - struct { - IMAGE_FILE_HEADER fileHeader; - IMAGE_SECTION_HEADER sections[2]; - char linkerOptions[100]; - } objHeader; - IMAGE_SYMBOL symbols[1]; - struct { - DWORD sizeofLongNames; - char longNames[100]; - } symbolNames; - - /* - * entry sometimes have a leading '_' - * overwritten if entryOffset==0 depending on the target platform - * see check for cpu below - */ - entry[0]='_'; - - newSuffix=".obj"; -#else -# error "Unknown platform for CAN_GENERATE_OBJECTS." -#endif - - /* deal with options, files and the entry point name */ - getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch); - if (optMatchArch) - { - printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian); - } - else - { - printf("genccode: using architecture cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian); - } -#if U_PLATFORM_HAS_WIN32_API - if(cpu==IMAGE_FILE_MACHINE_I386) { - entryOffset=1; - } -#endif - - in=T_FileStream_open(filename, "rb"); - if(in==NULL) { - fprintf(stderr, "genccode: unable to open input file %s\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - size=T_FileStream_size(in); - - getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename); - if (outFilePath != NULL) { - uprv_strcpy(outFilePath, buffer); - } - - if(optEntryPoint != NULL) { - uprv_strcpy(entry+entryOffset, optEntryPoint); - uprv_strcat(entry+entryOffset, "_dat"); - } - /* turn dashes in the entry name into underscores */ - entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset); - for(i=0; i<entryLength; ++i) { - if(entry[entryLengthOffset+i]=='-') { - entry[entryLengthOffset+i]='_'; - } - } - - /* open the output file */ - out=T_FileStream_open(buffer, "wb"); - if(out==NULL) { - fprintf(stderr, "genccode: unable to open output file %s\n", buffer); - exit(U_FILE_ACCESS_ERROR); - } - -#ifdef U_ELF - if(bits==32) { - header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; - header32.e_machine=cpu; - - /* 16-align .rodata in the .o file, just in case */ - paddingSize=sectionHeaders32[4].sh_offset & 0xf; - if(paddingSize!=0) { - paddingSize=0x10-paddingSize; - sectionHeaders32[4].sh_offset+=paddingSize; - } - - sectionHeaders32[4].sh_size=(Elf32_Word)size; - - symbols32[1].st_size=(Elf32_Word)size; - - /* write .o headers */ - T_FileStream_write(out, &header32, (int32_t)sizeof(header32)); - T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32)); - T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32)); - } else /* bits==64 */ { -#ifdef U_ELF64 - header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; - header64.e_machine=cpu; - - /* 16-align .rodata in the .o file, just in case */ - paddingSize=sectionHeaders64[4].sh_offset & 0xf; - if(paddingSize!=0) { - paddingSize=0x10-paddingSize; - sectionHeaders64[4].sh_offset+=paddingSize; - } - - sectionHeaders64[4].sh_size=(Elf64_Xword)size; - - symbols64[1].st_size=(Elf64_Xword)size; - - /* write .o headers */ - T_FileStream_write(out, &header64, (int32_t)sizeof(header64)); - T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64)); - T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64)); -#endif - } - - T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings)); - T_FileStream_write(out, entry, (int32_t)sizeof(entry)); - if(paddingSize!=0) { - T_FileStream_write(out, padding, paddingSize); - } -#elif U_PLATFORM_HAS_WIN32_API - /* populate the .obj headers */ - uprv_memset(&objHeader, 0, sizeof(objHeader)); - uprv_memset(&symbols, 0, sizeof(symbols)); - uprv_memset(&symbolNames, 0, sizeof(symbolNames)); - - /* write the linker export directive */ - uprv_strcpy(objHeader.linkerOptions, "-export:"); - length=8; - uprv_strcpy(objHeader.linkerOptions+length, entry); - length+=entryLength; - uprv_strcpy(objHeader.linkerOptions+length, ",data "); - length+=6; - - /* set the file header */ - objHeader.fileHeader.Machine=cpu; - objHeader.fileHeader.NumberOfSections=2; - objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL); - objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */ - objHeader.fileHeader.NumberOfSymbols=1; - - /* set the section for the linker options */ - uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8); - objHeader.sections[0].SizeOfRawData=length; - objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER; - objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES; - - /* set the data section */ - uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6); - objHeader.sections[1].SizeOfRawData=size; - objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length; - objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ; - - /* set the symbol table */ - if(entryLength<=8) { - uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength); - symbolNames.sizeofLongNames=4; - } else { - symbols[0].N.Name.Short=0; - symbols[0].N.Name.Long=4; - symbolNames.sizeofLongNames=4+entryLength+1; - uprv_strcpy(symbolNames.longNames, entry); - } - symbols[0].SectionNumber=2; - symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL; - - /* write the file header and the linker options section */ - T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData); -#else -# error "Unknown platform for CAN_GENERATE_OBJECTS." -#endif - - /* copy the data file into section 2 */ - for(;;) { - length=T_FileStream_read(in, buffer, sizeof(buffer)); - if(length==0) { - break; - } - T_FileStream_write(out, buffer, (int32_t)length); - } - -#if U_PLATFORM_HAS_WIN32_API - /* write the symbol table */ - T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL); - T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames); -#endif - - if(T_FileStream_error(in)) { - fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - if(T_FileStream_error(out)) { - fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - T_FileStream_close(out); - T_FileStream_close(in); -} -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/pkg_genc.h b/deps/node/deps/icu-small/source/tools/toolutil/pkg_genc.h deleted file mode 100644 index 5039f27d..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/pkg_genc.h +++ /dev/null @@ -1,86 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/****************************************************************************** - * Copyright (C) 2008-2011, International Business Machines - * Corporation and others. All Rights Reserved. - ******************************************************************************* - */ - -#ifndef __PKG_GENC_H__ -#define __PKG_GENC_H__ - -#include "unicode/utypes.h" -#include "toolutil.h" - -#include "unicode/putil.h" -#include "putilimp.h" - -/*** Platform #defines move here ***/ -#if U_PLATFORM_HAS_WIN32_API -#ifdef __GNUC__ -#define WINDOWS_WITH_GNUC -#else -#define WINDOWS_WITH_MSVC -#endif -#endif - - -#if !defined(WINDOWS_WITH_MSVC) -#define BUILD_DATA_WITHOUT_ASSEMBLY -#endif - -#ifndef U_DISABLE_OBJ_CODE /* testing */ -#if defined(WINDOWS_WITH_MSVC) || U_PLATFORM_IS_LINUX_BASED -#define CAN_WRITE_OBJ_CODE -#endif -#if U_PLATFORM_HAS_WIN32_API || defined(U_ELF) -#define CAN_GENERATE_OBJECTS -#endif -#endif - -#if U_PLATFORM == U_PF_CYGWIN || defined(CYGWINMSVC) -#define USING_CYGWIN -#endif - -/* - * When building the data library without assembly, - * some platforms use a single c code file for all of - * the data to generate the final data library. This can - * increase the performance of the pkdata tool. - */ -#if U_PLATFORM == U_PF_OS400 -#define USE_SINGLE_CCODE_FILE -#endif - -/* Need to fix the file seperator character when using MinGW. */ -#if defined(WINDOWS_WITH_GNUC) || defined(USING_CYGWIN) -#define PKGDATA_FILE_SEP_STRING "/" -#else -#define PKGDATA_FILE_SEP_STRING U_FILE_SEP_STRING -#endif - -#define LARGE_BUFFER_MAX_SIZE 2048 -#define SMALL_BUFFER_MAX_SIZE 512 -#define SMALL_BUFFER_FLAG_NAMES 32 -#define BUFFER_PADDING_SIZE 20 - -/** End platform defines **/ - - - -U_INTERNAL void U_EXPORT2 -printAssemblyHeadersToStdErr(void); - -U_INTERNAL UBool U_EXPORT2 -checkAssemblyHeaderName(const char* optAssembly); - -U_INTERNAL void U_EXPORT2 -writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath); - -U_INTERNAL void U_EXPORT2 -writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath); - -U_INTERNAL void U_EXPORT2 -writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath); - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/pkg_gencmn.cpp b/deps/node/deps/icu-small/source/tools/toolutil/pkg_gencmn.cpp deleted file mode 100644 index 423e4b73..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/pkg_gencmn.cpp +++ /dev/null @@ -1,578 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/****************************************************************************** - * Copyright (C) 2008-2012, International Business Machines - * Corporation and others. All Rights Reserved. - ******************************************************************************* - */ -#include "unicode/utypes.h" - -#include <stdio.h> -#include <stdlib.h> -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "cmemory.h" -#include "cstring.h" -#include "filestrm.h" -#include "toolutil.h" -#include "unicode/uclean.h" -#include "unewdata.h" -#include "putilimp.h" -#include "pkg_gencmn.h" - -#define STRING_STORE_SIZE 200000 - -#define COMMON_DATA_NAME U_ICUDATA_NAME -#define DATA_TYPE "dat" - -/* ICU package data file format (.dat files) ------------------------------- *** - -Description of the data format after the usual ICU data file header -(UDataInfo etc.). - -Format version 1 - -A .dat package file contains a simple Table of Contents of item names, -followed by the items themselves: - -1. ToC table - -uint32_t count; - number of items -UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item: - uint32_t nameOffset; - offset of the item name - uint32_t dataOffset; - offset of the item data -both are byte offsets from the beginning of the data - -2. item name strings - -All item names are stored as char * strings in one block between the ToC table -and the data items. - -3. data items - -The data items are stored following the item names block. -Each data item is 16-aligned. -The data items are stored in the sorted order of their names. - -Therefore, the top of the name strings block is the offset of the first item, -the length of the last item is the difference between its offset and -the .dat file length, and the length of all previous items is the difference -between its offset and the next one. - ------------------------------------------------------------------------------ */ - -/* UDataInfo cf. udata.h */ -static const UDataInfo dataInfo={ - sizeof(UDataInfo), - 0, - - U_IS_BIG_ENDIAN, - U_CHARSET_FAMILY, - sizeof(UChar), - 0, - - {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ - {1, 0, 0, 0}, /* formatVersion */ - {3, 0, 0, 0} /* dataVersion */ -}; - -static uint32_t maxSize; - -static char stringStore[STRING_STORE_SIZE]; -static uint32_t stringTop=0, basenameTotal=0; - -typedef struct { - char *pathname, *basename; - uint32_t basenameLength, basenameOffset, fileSize, fileOffset; -} File; - -#define CHUNK_FILE_COUNT 256 -static File *files = NULL; -static uint32_t fileCount=0; -static uint32_t fileMax = 0; - - -static char *symPrefix = NULL; - -#define LINE_BUFFER_SIZE 512 -/* prototypes --------------------------------------------------------------- */ - -static void -addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose); - -static char * -allocString(uint32_t length); - -U_CDECL_BEGIN -static int -compareFiles(const void *file1, const void *file2); -U_CDECL_END - -static char * -pathToFullPath(const char *path, const char *source); - -/* map non-tree separator (such as '\') to tree separator ('/') inplace. */ -static void -fixDirToTreePath(char *s); -/* -------------------------------------------------------------------------- */ - -U_CAPI void U_EXPORT2 -createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight, - const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) { - static char buffer[4096]; - char *line; - char *linePtr; - char *s = NULL; - UErrorCode errorCode=U_ZERO_ERROR; - uint32_t i, fileOffset, basenameOffset, length, nread; - FileStream *in, *file; - - line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE); - if (line == NULL) { - fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - linePtr = line; - - maxSize = max_size; - - if (destDir == NULL) { - destDir = u_getDataDirectory(); - } - if (name == NULL) { - name = COMMON_DATA_NAME; - } - if (type == NULL) { - type = DATA_TYPE; - } - if (source == NULL) { - source = "."; - } - - if (dataFile == NULL) { - in = T_FileStream_stdin(); - } else { - in = T_FileStream_open(dataFile, "r"); - if(in == NULL) { - fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile); - exit(U_FILE_ACCESS_ERROR); - } - } - - if (verbose) { - if(sourceTOC) { - printf("generating %s_%s.c (table of contents source file)\n", name, type); - } else { - printf("generating %s.%s (common data file with table of contents)\n", name, type); - } - } - - /* read the list of files and get their lengths */ - while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr), - LINE_BUFFER_SIZE))!=NULL) { - /* remove trailing newline characters and parse space separated items */ - if (s != NULL && *s != 0) { - line=s; - } else { - s=line; - } - while(*s!=0) { - if(*s==' ') { - *s=0; - ++s; - break; - } else if(*s=='\r' || *s=='\n') { - *s=0; - break; - } - ++s; - } - - /* check for comment */ - - if (*line == '#') { - continue; - } - - /* add the file */ -#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) - { - char *t; - while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) { - *t = U_FILE_SEP_CHAR; - } - } -#endif - addFile(getLongPathname(line), name, source, sourceTOC, verbose); - } - - uprv_free(linePtr); - - if(in!=T_FileStream_stdin()) { - T_FileStream_close(in); - } - - if(fileCount==0) { - fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile); - return; - } - - /* sort the files by basename */ - qsort(files, fileCount, sizeof(File), compareFiles); - - if(!sourceTOC) { - UNewDataMemory *out; - - /* determine the offsets of all basenames and files in this common one */ - basenameOffset=4+8*fileCount; - fileOffset=(basenameOffset+(basenameTotal+15))&~0xf; - for(i=0; i<fileCount; ++i) { - files[i].fileOffset=fileOffset; - fileOffset+=(files[i].fileSize+15)&~0xf; - files[i].basenameOffset=basenameOffset; - basenameOffset+=files[i].basenameLength; - } - - /* create the output file */ - out=udata_create(destDir, type, name, - &dataInfo, - copyRight == NULL ? U_COPYRIGHT_STRING : copyRight, - &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n", - destDir, name, type, - u_errorName(errorCode)); - exit(errorCode); - } - - /* write the table of contents */ - udata_write32(out, fileCount); - for(i=0; i<fileCount; ++i) { - udata_write32(out, files[i].basenameOffset); - udata_write32(out, files[i].fileOffset); - } - - /* write the basenames */ - for(i=0; i<fileCount; ++i) { - udata_writeString(out, files[i].basename, files[i].basenameLength); - } - length=4+8*fileCount+basenameTotal; - - /* copy the files */ - for(i=0; i<fileCount; ++i) { - /* pad to 16-align the next file */ - length&=0xf; - if(length!=0) { - udata_writePadding(out, 16-length); - } - - if (verbose) { - printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s"); - } - - /* copy the next file */ - file=T_FileStream_open(files[i].pathname, "rb"); - if(file==NULL) { - fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname); - exit(U_FILE_ACCESS_ERROR); - } - for(nread = 0;;) { - length=T_FileStream_read(file, buffer, sizeof(buffer)); - if(length <= 0) { - break; - } - nread += length; - udata_writeBlock(out, buffer, length); - } - T_FileStream_close(file); - length=files[i].fileSize; - - if (nread != files[i].fileSize) { - fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s"); - exit(U_FILE_ACCESS_ERROR); - } - } - - /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */ - length&=0xf; - if(length!=0) { - udata_writePadding(out, 16-length); - } - - /* finish */ - udata_finish(out, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode)); - exit(errorCode); - } - } else { - /* write a .c source file with the table of contents */ - char *filename; - FileStream *out; - - /* create the output filename */ - filename=s=buffer; - uprv_strcpy(filename, destDir); - s=filename+uprv_strlen(filename); - if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) { - *s++=U_FILE_SEP_CHAR; - } - uprv_strcpy(s, name); - if(*(type)!=0) { - s+=uprv_strlen(s); - *s++='_'; - uprv_strcpy(s, type); - } - s+=uprv_strlen(s); - uprv_strcpy(s, ".c"); - - /* open the output file */ - out=T_FileStream_open(filename, "w"); - if (gencmnFileName != NULL) { - uprv_strcpy(gencmnFileName, filename); - } - if(out==NULL) { - fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - /* write the source file */ - sprintf(buffer, - "/*\n" - " * ICU common data table of contents for %s.%s\n" - " * Automatically generated by icu/source/tools/gencmn/gencmn .\n" - " */\n\n" - "#include \"unicode/utypes.h\"\n" - "#include \"unicode/udata.h\"\n" - "\n" - "/* external symbol declarations for data (%d files) */\n", - name, type, fileCount); - T_FileStream_writeLine(out, buffer); - - sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname); - T_FileStream_writeLine(out, buffer); - for(i=1; i<fileCount; ++i) { - sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname); - T_FileStream_writeLine(out, buffer); - } - T_FileStream_writeLine(out, ";\n\n"); - - sprintf( - buffer, - "U_EXPORT struct {\n" - " uint16_t headerSize;\n" - " uint8_t magic1, magic2;\n" - " UDataInfo info;\n" - " char padding[%lu];\n" - " uint32_t count, reserved;\n" - " struct {\n" - " const char *name;\n" - " const void *data;\n" - " } toc[%lu];\n" - "} U_EXPORT2 %s_dat = {\n" - " 32, 0xda, 0x27, {\n" - " %lu, 0,\n" - " %u, %u, %u, 0,\n" - " {0x54, 0x6f, 0x43, 0x50},\n" - " {1, 0, 0, 0},\n" - " {0, 0, 0, 0}\n" - " },\n" - " \"\", %lu, 0, {\n", - (unsigned long)32-4-sizeof(UDataInfo), - (unsigned long)fileCount, - entrypointName, - (unsigned long)sizeof(UDataInfo), - U_IS_BIG_ENDIAN, - U_CHARSET_FAMILY, - U_SIZEOF_UCHAR, - (unsigned long)fileCount - ); - T_FileStream_writeLine(out, buffer); - - sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname); - T_FileStream_writeLine(out, buffer); - for(i=1; i<fileCount; ++i) { - sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname); - T_FileStream_writeLine(out, buffer); - } - - T_FileStream_writeLine(out, "\n }\n};\n"); - T_FileStream_close(out); - - uprv_free(symPrefix); - } -} - -static void -addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) { - char *s; - uint32_t length; - char *fullPath = NULL; - - if(fileCount==fileMax) { - fileMax += CHUNK_FILE_COUNT; - files = (File *)uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never freed. */ - if(files==NULL) { - fprintf(stderr, "pkgdata/gencmn: Could not allocate %u bytes for %d files\n", (unsigned int)(fileMax*sizeof(files[0])), fileCount); - exit(U_MEMORY_ALLOCATION_ERROR); - } - } - - if(!sourceTOC) { - FileStream *file; - - if(uprv_pathIsAbsolute(filename)) { - fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename); - exit(U_ILLEGAL_ARGUMENT_ERROR); - } - fullPath = pathToFullPath(filename, source); - /* store the pathname */ - length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1); - s=allocString(length); - uprv_strcpy(s, name); - uprv_strcat(s, U_TREE_ENTRY_SEP_STRING); - uprv_strcat(s, filename); - - /* get the basename */ - fixDirToTreePath(s); - files[fileCount].basename=s; - files[fileCount].basenameLength=length; - - files[fileCount].pathname=fullPath; - - basenameTotal+=length; - - /* try to open the file */ - file=T_FileStream_open(fullPath, "rb"); - if(file==NULL) { - fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath); - exit(U_FILE_ACCESS_ERROR); - } - - /* get the file length */ - length=T_FileStream_size(file); - if(T_FileStream_error(file) || length<=20) { - fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath); - exit(U_FILE_ACCESS_ERROR); - } - - T_FileStream_close(file); - - /* do not add files that are longer than maxSize */ - if(maxSize && length>maxSize) { - if (verbose) { - printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize); - } - return; - } - files[fileCount].fileSize=length; - } else { - char *t; - /* get and store the basename */ - /* need to include the package name */ - length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1); - s=allocString(length); - uprv_strcpy(s, name); - uprv_strcat(s, U_TREE_ENTRY_SEP_STRING); - uprv_strcat(s, filename); - fixDirToTreePath(s); - files[fileCount].basename=s; - /* turn the basename into an entry point name and store in the pathname field */ - t=files[fileCount].pathname=allocString(length); - while(--length>0) { - if(*s=='.' || *s=='-' || *s=='/') { - *t='_'; - } else { - *t=*s; - } - ++s; - ++t; - } - *t=0; - } - ++fileCount; -} - -static char * -allocString(uint32_t length) { - uint32_t top=stringTop+length; - char *p; - - if(top>STRING_STORE_SIZE) { - fprintf(stderr, "gencmn: out of memory\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - p=stringStore+stringTop; - stringTop=top; - return p; -} - -static char * -pathToFullPath(const char *path, const char *source) { - int32_t length; - int32_t newLength; - char *fullPath; - int32_t n; - - length = (uint32_t)(uprv_strlen(path) + 1); - newLength = (length + 1 + (int32_t)uprv_strlen(source)); - fullPath = (char *)uprv_malloc(newLength); - if(source != NULL) { - uprv_strcpy(fullPath, source); - uprv_strcat(fullPath, U_FILE_SEP_STRING); - } else { - fullPath[0] = 0; - } - n = (int32_t)uprv_strlen(fullPath); - fullPath[n] = 0; /* Suppress compiler warning for unused variable n */ - /* when conditional code below is not compiled. */ - uprv_strcat(fullPath, path); - -#if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) -#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) - /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */ - for(;fullPath[n];n++) { - if(fullPath[n] == U_FILE_ALT_SEP_CHAR) { - fullPath[n] = U_FILE_SEP_CHAR; - } - } -#endif -#endif -#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) - /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */ - for(;fullPath[n];n++) { - if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) { - fullPath[n] = U_FILE_SEP_CHAR; - } - } -#endif - return fullPath; -} - -U_CDECL_BEGIN -static int -compareFiles(const void *file1, const void *file2) { - /* sort by basename */ - return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename); -} -U_CDECL_END - -static void -fixDirToTreePath(char *s) -{ - (void)s; -#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)) - char *t; -#endif -#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) - for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) { - *t = U_TREE_ENTRY_SEP_CHAR; - } -#endif -#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) - for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) { - *t = U_TREE_ENTRY_SEP_CHAR; - } -#endif -} diff --git a/deps/node/deps/icu-small/source/tools/toolutil/pkg_gencmn.h b/deps/node/deps/icu-small/source/tools/toolutil/pkg_gencmn.h deleted file mode 100644 index 23823996..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/pkg_gencmn.h +++ /dev/null @@ -1,18 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/****************************************************************************** - * Copyright (C) 2008, International Business Machines - * Corporation and others. All Rights Reserved. - ******************************************************************************* - */ - -#ifndef __PKG_GENCMN_H__ -#define __PKG_GENCMN_H__ - -#include "unicode/utypes.h" - -U_CAPI void U_EXPORT2 -createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight, - const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName); - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/pkg_icu.cpp b/deps/node/deps/icu-small/source/tools/toolutil/pkg_icu.cpp deleted file mode 100644 index ce0bfc21..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/pkg_icu.cpp +++ /dev/null @@ -1,176 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/****************************************************************************** - * Copyright (C) 2008-2015, International Business Machines - * Corporation and others. All Rights Reserved. - ******************************************************************************* - */ -#include "unicode/utypes.h" -#include "unicode/localpointer.h" -#include "unicode/putil.h" -#include "cstring.h" -#include "toolutil.h" -#include "uoptions.h" -#include "uparse.h" -#include "package.h" -#include "pkg_icu.h" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -// read a file list -------------------------------------------------------- *** - -U_NAMESPACE_USE - -static const struct { - const char *suffix; - int32_t length; -} listFileSuffixes[]={ - { ".txt", 4 }, - { ".lst", 4 }, - { ".tmp", 4 } -}; - -/* check for multiple text file suffixes to see if this list name is a text file name */ -static UBool -isListTextFile(const char *listname) { - const char *listNameEnd=strchr(listname, 0); - const char *suffix; - int32_t i, length; - for(i=0; i<UPRV_LENGTHOF(listFileSuffixes); ++i) { - suffix=listFileSuffixes[i].suffix; - length=listFileSuffixes[i].length; - if((listNameEnd-listname)>length && 0==memcmp(listNameEnd-length, suffix, length)) { - return TRUE; - } - } - return FALSE; -} - -/* - * Read a file list. - * If the listname ends with ".txt", then read the list file - * (in the system/ invariant charset). - * If the listname ends with ".dat", then read the ICU .dat package file. - * Otherwise, read the file itself as a single-item list. - */ -U_CAPI Package * U_EXPORT2 -readList(const char *filesPath, const char *listname, UBool readContents, Package *listPkgIn) { - Package *listPkg = listPkgIn; - FILE *file; - const char *listNameEnd; - - if(listname==NULL || listname[0]==0) { - fprintf(stderr, "missing list file\n"); - return NULL; - } - - if (listPkg == NULL) { - listPkg=new Package(); - if(listPkg==NULL) { - fprintf(stderr, "icupkg: not enough memory\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - } - - listNameEnd=strchr(listname, 0); - if(isListTextFile(listname)) { - // read the list file - char line[1024]; - char *end; - const char *start; - - file=fopen(listname, "r"); - if(file==NULL) { - fprintf(stderr, "icupkg: unable to open list file \"%s\"\n", listname); - delete listPkg; - exit(U_FILE_ACCESS_ERROR); - } - - while(fgets(line, sizeof(line), file)) { - // remove comments - end=strchr(line, '#'); - if(end!=NULL) { - *end=0; - } else { - // remove trailing CR LF - end=strchr(line, 0); - while(line<end && (*(end-1)=='\r' || *(end-1)=='\n')) { - *--end=0; - } - } - - // check first non-whitespace character and - // skip empty lines and - // skip lines starting with reserved characters - start=u_skipWhitespace(line); - if(*start==0 || NULL!=strchr(U_PKG_RESERVED_CHARS, *start)) { - continue; - } - - // take whitespace-separated items from the line - for(;;) { - // find whitespace after the item or the end of the line - for(end=(char *)start; *end!=0 && *end!=' ' && *end!='\t'; ++end) {} - if(*end==0) { - // this item is the last one on the line - end=NULL; - } else { - // the item is terminated by whitespace, terminate it with NUL - *end=0; - } - if(readContents) { - listPkg->addFile(filesPath, start); - } else { - listPkg->addItem(start); - } - - // find the start of the next item or exit the loop - if(end==NULL || *(start=u_skipWhitespace(end+1))==0) { - break; - } - } - } - fclose(file); - } else if((listNameEnd-listname)>4 && 0==memcmp(listNameEnd-4, ".dat", 4)) { - // read the ICU .dat package - // Accept a .dat file whose name differs from the ToC prefixes. - listPkg->setAutoPrefix(); - listPkg->readPackage(listname); - } else { - // list the single file itself - if(readContents) { - listPkg->addFile(filesPath, listname); - } else { - listPkg->addItem(listname); - } - } - - return listPkg; -} - -U_CAPI int U_EXPORT2 -writePackageDatFile(const char *outFilename, const char *outComment, const char *sourcePath, const char *addList, Package *pkg, char outType) { - LocalPointer<Package> ownedPkg; - LocalPointer<Package> addListPkg; - - if (pkg == NULL) { - ownedPkg.adoptInstead(new Package); - if(ownedPkg.isNull()) { - fprintf(stderr, "icupkg: not enough memory\n"); - return U_MEMORY_ALLOCATION_ERROR; - } - pkg = ownedPkg.getAlias(); - - addListPkg.adoptInstead(readList(sourcePath, addList, TRUE, NULL)); - if(addListPkg.isValid()) { - pkg->addItems(*addListPkg); - } else { - return U_ILLEGAL_ARGUMENT_ERROR; - } - } - - pkg->writePackage(outFilename, outType, outComment); - return 0; -} diff --git a/deps/node/deps/icu-small/source/tools/toolutil/pkg_icu.h b/deps/node/deps/icu-small/source/tools/toolutil/pkg_icu.h deleted file mode 100644 index 638056e6..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/pkg_icu.h +++ /dev/null @@ -1,25 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/****************************************************************************** - * Copyright (C) 2008-2016, International Business Machines - * Corporation and others. All Rights Reserved. - ******************************************************************************* - */ - -#ifndef __PKG_ICU_H__ -#define __PKG_ICU_H__ - -#include "unicode/utypes.h" -#include "package.h" - -#define U_PKG_RESERVED_CHARS "\"%&'()*+,-./:;<=>?_" - -U_CAPI int U_EXPORT2 -writePackageDatFile(const char *outFilename, const char *outComment, - const char *sourcePath, const char *addList, icu::Package *pkg, - char outType); - -U_CAPI icu::Package * U_EXPORT2 -readList(const char *filesPath, const char *listname, UBool readContents, icu::Package *listPkgIn); - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/pkg_imp.h b/deps/node/deps/icu-small/source/tools/toolutil/pkg_imp.h deleted file mode 100644 index 29abd8d8..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/pkg_imp.h +++ /dev/null @@ -1,38 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2005-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: pkg_imp.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2005sep18 -* created by: Markus W. Scherer -* -* Implementation definitions for data package functions in toolutil. -*/ - -#ifndef __PKG_IMP_H__ -#define __PKG_IMP_H__ - -#include "unicode/utypes.h" -#include "unicode/udata.h" - -/* - * Read an ICU data item with any platform type, - * return the pointer to the UDataInfo in its header, - * and set the lengths of the UDataInfo and of the whole header. - * All data remains in its platform type. - */ -U_CFUNC const UDataInfo * -getDataInfo(const uint8_t *data, int32_t length, - int32_t &infoLength, int32_t &headerLength, - UErrorCode *pErrorCode); - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/pkgitems.cpp b/deps/node/deps/icu-small/source/tools/toolutil/pkgitems.cpp deleted file mode 100644 index dd414c2f..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/pkgitems.cpp +++ /dev/null @@ -1,634 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2003-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: pkgitems.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2005sep18 -* created by: Markus W. Scherer -* -* Companion file to package.cpp. Deals with details of ICU data item formats. -* Used for item dependencies. -* Contains adapted code from ucnv_bld.c (swapper code from 2003). -*/ - -#include "unicode/utypes.h" -#include "unicode/ures.h" -#include "unicode/putil.h" -#include "unicode/udata.h" -#include "cstring.h" -#include "uinvchar.h" -#include "ucmndata.h" -#include "udataswp.h" -#include "swapimpl.h" -#include "toolutil.h" -#include "package.h" -#include "pkg_imp.h" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -/* item formats in common */ - -#include "uresdata.h" -#include "ucnv_bld.h" -#include "ucnv_io.h" - -// general definitions ----------------------------------------------------- *** - -U_CDECL_BEGIN - -static void U_CALLCONV -printError(void *context, const char *fmt, va_list args) { - vfprintf((FILE *)context, fmt, args); -} - -U_CDECL_END - -// a data item in native-platform form ------------------------------------- *** - -U_NAMESPACE_BEGIN - -class NativeItem { -public: - NativeItem() : pItem(NULL), pInfo(NULL), bytes(NULL), swapped(NULL), length(0) {} - NativeItem(const Item *item, UDataSwapFn *swap) : swapped(NULL) { - setItem(item, swap); - } - ~NativeItem() { - delete [] swapped; - } - const UDataInfo *getDataInfo() const { - return pInfo; - } - const uint8_t *getBytes() const { - return bytes; - } - int32_t getLength() const { - return length; - } - - void setItem(const Item *item, UDataSwapFn *swap) { - pItem=item; - int32_t infoLength, itemHeaderLength; - UErrorCode errorCode=U_ZERO_ERROR; - pInfo=::getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode); - if(U_FAILURE(errorCode)) { - exit(errorCode); // should succeed because readFile() checks headers - } - length=pItem->length-itemHeaderLength; - - if(pInfo->isBigEndian==U_IS_BIG_ENDIAN && pInfo->charsetFamily==U_CHARSET_FAMILY) { - bytes=pItem->data+itemHeaderLength; - } else { - UDataSwapper *ds=udata_openSwapper((UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", - pItem->name, u_errorName(errorCode)); - exit(errorCode); - } - - ds->printError=printError; - ds->printErrorContext=stderr; - - swapped=new uint8_t[pItem->length]; - if(swapped==NULL) { - fprintf(stderr, "icupkg: unable to allocate memory for swapping \"%s\"\n", pItem->name); - exit(U_MEMORY_ALLOCATION_ERROR); - } - swap(ds, pItem->data, pItem->length, swapped, &errorCode); - pInfo=::getDataInfo(swapped, pItem->length, infoLength, itemHeaderLength, &errorCode); - bytes=swapped+itemHeaderLength; - udata_closeSwapper(ds); - } - } - -private: - const Item *pItem; - const UDataInfo *pInfo; - const uint8_t *bytes; - uint8_t *swapped; - int32_t length; -}; - -// check a dependency ------------------------------------------------------ *** - -/* - * assemble the target item name from the source item name, an ID - * and a suffix - */ -static void -makeTargetName(const char *itemName, const char *id, int32_t idLength, const char *suffix, - char *target, int32_t capacity, - UErrorCode *pErrorCode) { - const char *itemID; - int32_t treeLength, suffixLength, targetLength; - - // get the item basename - itemID=strrchr(itemName, '/'); - if(itemID!=NULL) { - ++itemID; - } else { - itemID=itemName; - } - - // build the target string - treeLength=(int32_t)(itemID-itemName); - if(idLength<0) { - idLength=(int32_t)strlen(id); - } - suffixLength=(int32_t)strlen(suffix); - targetLength=treeLength+idLength+suffixLength; - if(targetLength>=capacity) { - fprintf(stderr, "icupkg/makeTargetName(%s) target item name length %ld too long\n", - itemName, (long)targetLength); - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - return; - } - - memcpy(target, itemName, treeLength); - memcpy(target+treeLength, id, idLength); - memcpy(target+treeLength+idLength, suffix, suffixLength+1); // +1 includes the terminating NUL -} - -static void -checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char *suffix, - CheckDependency check, void *context, - UErrorCode *pErrorCode) { - char target[200]; - makeTargetName(itemName, id, idLength, suffix, target, (int32_t)sizeof(target), pErrorCode); - if(U_SUCCESS(*pErrorCode)) { - check(context, itemName, target); - } -} - -/* assemble the target item name from the item's parent item name */ -static void -checkParent(const char *itemName, CheckDependency check, void *context, - UErrorCode *pErrorCode) { - const char *itemID, *parent, *parentLimit, *suffix; - int32_t parentLength; - - // get the item basename - itemID=strrchr(itemName, '/'); - if(itemID!=NULL) { - ++itemID; - } else { - itemID=itemName; - } - - // get the item suffix - suffix=strrchr(itemID, '.'); - if(suffix==NULL) { - // empty suffix, point to the end of the string - suffix=strrchr(itemID, 0); - } - - // get the position of the last '_' - for(parentLimit=suffix; parentLimit>itemID && *--parentLimit!='_';) {} - - if(parentLimit!=itemID) { - // get the parent item name by truncating the last part of this item's name */ - parent=itemID; - parentLength=(int32_t)(parentLimit-itemID); - } else { - // no '_' in the item name: the parent is the root bundle - parent="root"; - parentLength=4; - if((suffix-itemID)==parentLength && 0==memcmp(itemID, parent, parentLength)) { - // the item itself is "root", which does not depend on a parent - return; - } - } - checkIDSuffix(itemName, parent, parentLength, suffix, check, context, pErrorCode); -} - -// get dependencies from resource bundles ---------------------------------- *** - -static const UChar SLASH=0x2f; - -/* - * Check for the alias from the string or alias resource res. - */ -static void -checkAlias(const char *itemName, - Resource res, const UChar *alias, int32_t length, UBool useResSuffix, - CheckDependency check, void *context, UErrorCode *pErrorCode) { - int32_t i; - - if(!uprv_isInvariantUString(alias, length)) { - fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-invariant characters\n", - itemName, res); - *pErrorCode=U_INVALID_CHAR_FOUND; - return; - } - - // extract the locale ID from alias strings like - // locale_ID/key1/key2/key3 - // locale_ID - - // search for the first slash - for(i=0; i<length && alias[i]!=SLASH; ++i) {} - - if(res_getPublicType(res)==URES_ALIAS) { - // ignore aliases with an initial slash: - // /ICUDATA/... and /pkgname/... go to a different package - // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle - if(i==0) { - return; // initial slash ('/') - } - - // ignore the intra-bundle path starting from the first slash ('/') - length=i; - } else /* URES_STRING */ { - // the whole string should only consist of a locale ID - if(i!=length) { - fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n", - itemName, res); - *pErrorCode=U_UNSUPPORTED_ERROR; - return; - } - } - - // convert the Unicode string to char * - char localeID[32]; - if(length>=(int32_t)sizeof(localeID)) { - fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n", - itemName, res, (long)length); - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - return; - } - u_UCharsToChars(alias, localeID, length); - localeID[length]=0; - - checkIDSuffix(itemName, localeID, -1, (useResSuffix ? ".res" : ""), check, context, pErrorCode); -} - -/* - * Enumerate one resource item and its children and extract dependencies from - * aliases. - */ -static void -ures_enumDependencies(const char *itemName, - const ResourceData *pResData, - Resource res, const char *inKey, const char *parentKey, int32_t depth, - CheckDependency check, void *context, - Package *pkg, - UErrorCode *pErrorCode) { - switch(res_getPublicType(res)) { - case URES_STRING: - { - UBool useResSuffix = TRUE; - // Check for %%ALIAS - if(depth==1 && inKey!=NULL) { - if(0!=strcmp(inKey, "%%ALIAS")) { - break; - } - } - // Check for %%DEPENDENCY - else if(depth==2 && parentKey!=NULL) { - if(0!=strcmp(parentKey, "%%DEPENDENCY")) { - break; - } - useResSuffix = FALSE; - } else { - // we ignore all other strings - break; - } - int32_t length; - const UChar *alias=res_getString(pResData, res, &length); - checkAlias(itemName, res, alias, length, useResSuffix, check, context, pErrorCode); - } - break; - case URES_ALIAS: - { - int32_t length; - const UChar *alias=res_getAlias(pResData, res, &length); - checkAlias(itemName, res, alias, length, TRUE, check, context, pErrorCode); - } - break; - case URES_TABLE: - { - /* recurse */ - int32_t count=res_countArrayItems(pResData, res); - for(int32_t i=0; i<count; ++i) { - const char *itemKey; - Resource item=res_getTableItemByIndex(pResData, res, i, &itemKey); - ures_enumDependencies( - itemName, pResData, - item, itemKey, - inKey, depth+1, - check, context, - pkg, - pErrorCode); - if(U_FAILURE(*pErrorCode)) { - fprintf(stderr, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%s: %08x) failed\n", - itemName, res, i, itemKey, item); - break; - } - } - } - break; - case URES_ARRAY: - { - /* recurse */ - int32_t count=res_countArrayItems(pResData, res); - for(int32_t i=0; i<count; ++i) { - Resource item=res_getArrayItem(pResData, res, i); - ures_enumDependencies( - itemName, pResData, - item, NULL, - inKey, depth+1, - check, context, - pkg, - pErrorCode); - if(U_FAILURE(*pErrorCode)) { - fprintf(stderr, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n", - itemName, res, i, item); - break; - } - } - } - break; - default: - break; - } -} - -static void -ures_enumDependencies(const char *itemName, const UDataInfo *pInfo, - const uint8_t *inBytes, int32_t length, - CheckDependency check, void *context, - Package *pkg, - UErrorCode *pErrorCode) { - ResourceData resData; - - res_read(&resData, pInfo, inBytes, length, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - fprintf(stderr, "icupkg: .res format version %02x.%02x not supported, or bundle malformed\n", - pInfo->formatVersion[0], pInfo->formatVersion[1]); - exit(U_UNSUPPORTED_ERROR); - } - - /* - * if the bundle attributes are present and the nofallback flag is not set, - * then add the parent bundle as a dependency - */ - if(pInfo->formatVersion[0]>1 || (pInfo->formatVersion[0]==1 && pInfo->formatVersion[1]>=1)) { - if(!resData.noFallback) { - /* this bundle participates in locale fallback */ - checkParent(itemName, check, context, pErrorCode); - } - } - - icu::NativeItem nativePool; - - if(resData.usesPoolBundle) { - char poolName[200]; - makeTargetName(itemName, "pool", 4, ".res", poolName, (int32_t)sizeof(poolName), pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - check(context, itemName, poolName); - int32_t index=pkg->findItem(poolName); - if(index<0) { - // We cannot work with a bundle if its pool resource is missing. - // check() already printed a complaint. - return; - } - // TODO: Cache the native version in the Item itself. - nativePool.setItem(pkg->getItem(index), ures_swap); - const UDataInfo *poolInfo=nativePool.getDataInfo(); - if(poolInfo->formatVersion[0]<=1) { - fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName); - return; - } - const int32_t *poolRoot=(const int32_t *)nativePool.getBytes(); - const int32_t *poolIndexes=poolRoot+1; - int32_t poolIndexLength=poolIndexes[URES_INDEX_LENGTH]&0xff; - if(!(poolIndexLength>URES_INDEX_POOL_CHECKSUM && - (poolIndexes[URES_INDEX_ATTRIBUTES]&URES_ATT_IS_POOL_BUNDLE)) - ) { - fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName); - return; - } - if(resData.pRoot[1+URES_INDEX_POOL_CHECKSUM]==poolIndexes[URES_INDEX_POOL_CHECKSUM]) { - resData.poolBundleKeys=(const char *)(poolIndexes+poolIndexLength); - resData.poolBundleStrings=(const uint16_t *)(poolRoot+poolIndexes[URES_INDEX_KEYS_TOP]); - } else { - fprintf(stderr, "icupkg: %s has mismatched checksum for %s\n", poolName, itemName); - return; - } - } - - ures_enumDependencies( - itemName, &resData, - resData.rootRes, NULL, NULL, 0, - check, context, - pkg, - pErrorCode); -} - -// get dependencies from conversion tables --------------------------------- *** - -/* code adapted from ucnv_swap() */ -static void -ucnv_enumDependencies(const UDataSwapper *ds, - const char *itemName, const UDataInfo *pInfo, - const uint8_t *inBytes, int32_t length, - CheckDependency check, void *context, - UErrorCode *pErrorCode) { - uint32_t staticDataSize; - - const UConverterStaticData *inStaticData; - - const _MBCSHeader *inMBCSHeader; - uint8_t outputType; - - /* check format version */ - if(!( - pInfo->formatVersion[0]==6 && - pInfo->formatVersion[1]>=2 - )) { - fprintf(stderr, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n", - pInfo->formatVersion[0], pInfo->formatVersion[1]); - exit(U_UNSUPPORTED_ERROR); - } - - /* read the initial UConverterStaticData structure after the UDataInfo header */ - inStaticData=(const UConverterStaticData *)inBytes; - - if( length<(int32_t)sizeof(UConverterStaticData) || - (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) - ) { - udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - - inBytes+=staticDataSize; - length-=(int32_t)staticDataSize; - - /* check for supported conversionType values */ - if(inStaticData->conversionType==UCNV_MBCS) { - /* MBCS data */ - uint32_t mbcsHeaderLength, mbcsHeaderFlags, mbcsHeaderOptions; - int32_t extOffset; - - inMBCSHeader=(const _MBCSHeader *)inBytes; - - if(length<(int32_t)sizeof(_MBCSHeader)) { - udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { - mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; - } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && - ((mbcsHeaderOptions=ds->readUInt32(inMBCSHeader->options))& - MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 - ) { - mbcsHeaderLength=mbcsHeaderOptions&MBCS_OPT_LENGTH_MASK; - } else { - udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n", - inMBCSHeader->version[0], inMBCSHeader->version[1]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return; - } - - mbcsHeaderFlags=ds->readUInt32(inMBCSHeader->flags); - extOffset=(int32_t)(mbcsHeaderFlags>>8); - outputType=(uint8_t)mbcsHeaderFlags; - - if(outputType==MBCS_OUTPUT_EXT_ONLY) { - /* - * extension-only file, - * contains a base name instead of normal base table data - */ - char baseName[32]; - int32_t baseNameLength; - - /* there is extension data after the base data, see ucnv_ext.h */ - if(length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { - udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - - /* swap the base name, between the header and the extension data */ - const char *inBaseName=(const char *)inBytes+mbcsHeaderLength*4; - baseNameLength=(int32_t)strlen(inBaseName); - if(baseNameLength>=(int32_t)sizeof(baseName)) { - udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n", - itemName, baseNameLength); - *pErrorCode=U_UNSUPPORTED_ERROR; - return; - } - ds->swapInvChars(ds, inBaseName, baseNameLength+1, baseName, pErrorCode); - - checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode); - } - } -} - -// ICU data formats -------------------------------------------------------- *** - -static const struct { - uint8_t dataFormat[4]; -} dataFormats[]={ - { { 0x52, 0x65, 0x73, 0x42 } }, /* dataFormat="ResB" */ - { { 0x63, 0x6e, 0x76, 0x74 } }, /* dataFormat="cnvt" */ - { { 0x43, 0x76, 0x41, 0x6c } } /* dataFormat="CvAl" */ -}; - -enum { - FMT_RES, - FMT_CNV, - FMT_ALIAS, - FMT_COUNT -}; - -static int32_t -getDataFormat(const uint8_t dataFormat[4]) { - int32_t i; - - for(i=0; i<FMT_COUNT; ++i) { - if(0==memcmp(dataFormats[i].dataFormat, dataFormat, 4)) { - return i; - } - } - return -1; -} - -// enumerate dependencies of a package item -------------------------------- *** - -void -Package::enumDependencies(Item *pItem, void *context, CheckDependency check) { - int32_t infoLength, itemHeaderLength; - UErrorCode errorCode=U_ZERO_ERROR; - const UDataInfo *pInfo=getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode); - if(U_FAILURE(errorCode)) { - return; // should not occur because readFile() checks headers - } - - // find the data format and call the corresponding function, if any - int32_t format=getDataFormat(pInfo->dataFormat); - if(format>=0) { - switch(format) { - case FMT_RES: - { - /* - * Swap the resource bundle (if necessary) so that we can use - * the normal runtime uresdata.c code to read it. - * We do not want to duplicate that code, especially not together with on-the-fly swapping. - */ - NativeItem nrb(pItem, ures_swap); - ures_enumDependencies(pItem->name, nrb.getDataInfo(), nrb.getBytes(), nrb.getLength(), check, context, this, &errorCode); - break; - } - case FMT_CNV: - { - // TODO: share/cache swappers - UDataSwapper *ds=udata_openSwapper( - (UBool)pInfo->isBigEndian, pInfo->charsetFamily, - U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, - &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", - pItem->name, u_errorName(errorCode)); - exit(errorCode); - } - - ds->printError=printError; - ds->printErrorContext=stderr; - - const uint8_t *inBytes=pItem->data+itemHeaderLength; - int32_t length=pItem->length-itemHeaderLength; - - ucnv_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode); - udata_closeSwapper(ds); - break; - } - default: - break; - } - - if(U_FAILURE(errorCode)) { - exit(errorCode); - } - } -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/tools/toolutil/ppucd.cpp b/deps/node/deps/icu-small/source/tools/toolutil/ppucd.cpp deleted file mode 100644 index b11efa7f..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/ppucd.cpp +++ /dev/null @@ -1,615 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011-2014, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: ppucd.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011dec11 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/uchar.h" -#include "charstr.h" -#include "cstring.h" -#include "ppucd.h" -#include "uassert.h" -#include "uparse.h" - -#include <stdio.h> -#include <string.h> - -U_NAMESPACE_BEGIN - -PropertyNames::~PropertyNames() {} - -int32_t -PropertyNames::getPropertyEnum(const char *name) const { - return u_getPropertyEnum(name); -} - -int32_t -PropertyNames::getPropertyValueEnum(int32_t property, const char *name) const { - return u_getPropertyValueEnum((UProperty)property, name); -} - -UniProps::UniProps() - : start(U_SENTINEL), end(U_SENTINEL), - bmg(U_SENTINEL), bpb(U_SENTINEL), - scf(U_SENTINEL), slc(U_SENTINEL), stc(U_SENTINEL), suc(U_SENTINEL), - digitValue(-1), numericValue(NULL), - name(NULL), nameAlias(NULL) { - memset(binProps, 0, sizeof(binProps)); - memset(intProps, 0, sizeof(intProps)); - memset(age, 0, 4); -} - -UniProps::~UniProps() {} - -const int32_t PreparsedUCD::kNumLineBuffers; - -PreparsedUCD::PreparsedUCD(const char *filename, UErrorCode &errorCode) - : icuPnames(new PropertyNames()), pnames(icuPnames), - file(NULL), - defaultLineIndex(-1), blockLineIndex(-1), lineIndex(0), - lineNumber(0), - lineType(NO_LINE), - fieldLimit(NULL), lineLimit(NULL) { - if(U_FAILURE(errorCode)) { return; } - - if(filename==NULL || *filename==0 || (*filename=='-' && filename[1]==0)) { - filename=NULL; - file=stdin; - } else { - file=fopen(filename, "r"); - } - if(file==NULL) { - perror("error opening preparsed UCD"); - fprintf(stderr, "error opening preparsed UCD file %s\n", filename ? filename : "\"no file name given\""); - errorCode=U_FILE_ACCESS_ERROR; - return; - } - - memset(ucdVersion, 0, 4); - lines[0][0]=0; -} - -PreparsedUCD::~PreparsedUCD() { - if(file!=stdin) { - fclose(file); - } - delete icuPnames; -} - -// Same order as the LineType values. -static const char *lineTypeStrings[]={ - NULL, - NULL, - "ucd", - "property", - "binary", - "value", - "defaults", - "block", - "cp", - "unassigned", - "algnamesrange" -}; - -PreparsedUCD::LineType -PreparsedUCD::readLine(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return NO_LINE; } - // Select the next available line buffer. - while(!isLineBufferAvailable(lineIndex)) { - ++lineIndex; - if (lineIndex == kNumLineBuffers) { - lineIndex = 0; - } - } - char *line=lines[lineIndex]; - *line=0; - lineLimit=fieldLimit=line; - lineType=NO_LINE; - char *result=fgets(line, sizeof(lines[0]), file); - if(result==NULL) { - if(ferror(file)) { - perror("error reading preparsed UCD"); - fprintf(stderr, "error reading preparsed UCD before line %ld\n", (long)lineNumber); - errorCode=U_FILE_ACCESS_ERROR; - } - return NO_LINE; - } - ++lineNumber; - if(*line=='#') { - fieldLimit=strchr(line, 0); - return lineType=EMPTY_LINE; - } - // Remove trailing /r/n. - char c; - char *limit=strchr(line, 0); - while(line<limit && ((c=*(limit-1))=='\n' || c=='\r')) { --limit; } - // Remove trailing white space. - while(line<limit && ((c=*(limit-1))==' ' || c=='\t')) { --limit; } - *limit=0; - lineLimit=limit; - if(line==limit) { - fieldLimit=limit; - return lineType=EMPTY_LINE; - } - // Split by ';'. - char *semi=line; - while((semi=strchr(semi, ';'))!=NULL) { *semi++=0; } - fieldLimit=strchr(line, 0); - // Determine the line type. - int32_t type; - for(type=EMPTY_LINE+1;; ++type) { - if(type==LINE_TYPE_COUNT) { - fprintf(stderr, - "error in preparsed UCD: unknown line type (first field) '%s' on line %ld\n", - line, (long)lineNumber); - errorCode=U_PARSE_ERROR; - return NO_LINE; - } - if(0==strcmp(line, lineTypeStrings[type])) { - break; - } - } - lineType=(LineType)type; - if(lineType==UNICODE_VERSION_LINE && fieldLimit<lineLimit) { - u_versionFromString(ucdVersion, fieldLimit+1); - } - return lineType; -} - -const char * -PreparsedUCD::firstField() { - char *field=lines[lineIndex]; - fieldLimit=strchr(field, 0); - return field; -} - -const char * -PreparsedUCD::nextField() { - if(fieldLimit==lineLimit) { return NULL; } - char *field=fieldLimit+1; - fieldLimit=strchr(field, 0); - return field; -} - -const UniProps * -PreparsedUCD::getProps(UnicodeSet &newValues, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return NULL; } - newValues.clear(); - if(!lineHasPropertyValues()) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - firstField(); - const char *field=nextField(); - if(field==NULL) { - // No range field after the type. - fprintf(stderr, - "error in preparsed UCD: missing default/block/cp range field " - "(no second field) on line %ld\n", - (long)lineNumber); - errorCode=U_PARSE_ERROR; - return NULL; - } - UChar32 start, end; - if(!parseCodePointRange(field, start, end, errorCode)) { return NULL; } - UniProps *props; - UBool insideBlock=FALSE; // TRUE if cp or unassigned range inside the block range. - switch(lineType) { - case DEFAULTS_LINE: - // Should occur before any block/cp/unassigned line. - if(blockLineIndex>=0) { - fprintf(stderr, - "error in preparsed UCD: default line %ld after one or more block lines\n", - (long)lineNumber); - errorCode=U_PARSE_ERROR; - return NULL; - } - if(defaultLineIndex>=0) { - fprintf(stderr, - "error in preparsed UCD: second line with default properties on line %ld\n", - (long)lineNumber); - errorCode=U_PARSE_ERROR; - return NULL; - } - if(start!=0 || end!=0x10ffff) { - fprintf(stderr, - "error in preparsed UCD: default range must be 0..10FFFF, not '%s' on line %ld\n", - field, (long)lineNumber); - errorCode=U_PARSE_ERROR; - return NULL; - } - props=&defaultProps; - defaultLineIndex=lineIndex; - break; - case BLOCK_LINE: - blockProps=defaultProps; // Block inherits default properties. - props=&blockProps; - blockLineIndex=lineIndex; - break; - case CP_LINE: - case UNASSIGNED_LINE: - if(blockProps.start<=start && end<=blockProps.end) { - insideBlock=TRUE; - if(lineType==CP_LINE) { - // Code point range fully inside the last block inherits the block properties. - cpProps=blockProps; - } else { - // Unassigned line inside the block is based on default properties - // which override block properties. - cpProps=defaultProps; - newValues=blockValues; - // Except, it inherits the one blk=Block property. - int32_t blkIndex=UCHAR_BLOCK-UCHAR_INT_START; - cpProps.intProps[blkIndex]=blockProps.intProps[blkIndex]; - newValues.remove((UChar32)UCHAR_BLOCK); - } - } else if(start>blockProps.end || end<blockProps.start) { - // Code point range fully outside the last block inherits the default properties. - cpProps=defaultProps; - } else { - // Code point range partially overlapping with the last block is illegal. - fprintf(stderr, - "error in preparsed UCD: cp range %s on line %ld only " - "partially overlaps with block range %04lX..%04lX\n", - field, (long)lineNumber, (long)blockProps.start, (long)blockProps.end); - errorCode=U_PARSE_ERROR; - return NULL; - } - props=&cpProps; - break; - default: - // Will not occur because of the range check above. - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - props->start=start; - props->end=end; - while((field=nextField())!=NULL) { - if(!parseProperty(*props, field, newValues, errorCode)) { return NULL; } - } - if(lineType==BLOCK_LINE) { - blockValues=newValues; - } else if(lineType==UNASSIGNED_LINE && insideBlock) { - // Unset newValues for values that are the same as the block values. - for(int32_t prop=0; prop<UCHAR_BINARY_LIMIT; ++prop) { - if(newValues.contains(prop) && cpProps.binProps[prop]==blockProps.binProps[prop]) { - newValues.remove(prop); - } - } - for(int32_t prop=UCHAR_INT_START; prop<UCHAR_INT_LIMIT; ++prop) { - int32_t index=prop-UCHAR_INT_START; - if(newValues.contains(prop) && cpProps.intProps[index]==blockProps.intProps[index]) { - newValues.remove(prop); - } - } - } - return props; -} - -static const struct { - const char *name; - int32_t prop; -} ppucdProperties[]={ - { "Name_Alias", PPUCD_NAME_ALIAS }, - { "Conditional_Case_Mappings", PPUCD_CONDITIONAL_CASE_MAPPINGS }, - { "Turkic_Case_Folding", PPUCD_TURKIC_CASE_FOLDING } -}; - -// Returns TRUE for "ok to continue parsing fields". -UBool -PreparsedUCD::parseProperty(UniProps &props, const char *field, UnicodeSet &newValues, - UErrorCode &errorCode) { - CharString pBuffer; - const char *p=field; - const char *v=strchr(p, '='); - int binaryValue; - if(*p=='-') { - if(v!=NULL) { - fprintf(stderr, - "error in preparsed UCD: mix of binary-property-no and " - "enum-property syntax '%s' on line %ld\n", - field, (long)lineNumber); - errorCode=U_PARSE_ERROR; - return FALSE; - } - binaryValue=0; - ++p; - } else if(v==NULL) { - binaryValue=1; - } else { - binaryValue=-1; - // Copy out the property name rather than modifying the field (writing a NUL). - pBuffer.append(p, (int32_t)(v-p), errorCode); - p=pBuffer.data(); - ++v; - } - int32_t prop=pnames->getPropertyEnum(p); - if(prop<0) { - for(int32_t i=0;; ++i) { - if(i==UPRV_LENGTHOF(ppucdProperties)) { - // Ignore unknown property names. - return TRUE; - } - if(0==uprv_stricmp(p, ppucdProperties[i].name)) { - prop=ppucdProperties[i].prop; - U_ASSERT(prop>=0); - break; - } - } - } - if(prop<UCHAR_BINARY_LIMIT) { - if(binaryValue>=0) { - props.binProps[prop]=(UBool)binaryValue; - } else { - // No binary value for a binary property. - fprintf(stderr, - "error in preparsed UCD: enum-property syntax '%s' " - "for binary property on line %ld\n", - field, (long)lineNumber); - errorCode=U_PARSE_ERROR; - } - } else if(binaryValue>=0) { - // Binary value for a non-binary property. - fprintf(stderr, - "error in preparsed UCD: binary-property syntax '%s' " - "for non-binary property on line %ld\n", - field, (long)lineNumber); - errorCode=U_PARSE_ERROR; - } else if (prop < UCHAR_INT_START) { - fprintf(stderr, - "error in preparsed UCD: prop value is invalid: '%d' for line %ld\n", - prop, (long)lineNumber); - errorCode=U_PARSE_ERROR; - } else if(prop<UCHAR_INT_LIMIT) { - int32_t value=pnames->getPropertyValueEnum(prop, v); - if(value==UCHAR_INVALID_CODE && prop==UCHAR_CANONICAL_COMBINING_CLASS) { - // TODO: Make getPropertyValueEnum(UCHAR_CANONICAL_COMBINING_CLASS, v) work. - char *end; - unsigned long ccc=uprv_strtoul(v, &end, 10); - if(v<end && *end==0 && ccc<=254) { - value=(int32_t)ccc; - } - } - if(value==UCHAR_INVALID_CODE) { - fprintf(stderr, - "error in preparsed UCD: '%s' is not a valid value on line %ld\n", - field, (long)lineNumber); - errorCode=U_PARSE_ERROR; - } else { - props.intProps[prop-UCHAR_INT_START]=value; - } - } else if(*v=='<') { - // Do not parse default values like <code point>, just set null values. - switch(prop) { - case UCHAR_BIDI_MIRRORING_GLYPH: - props.bmg=U_SENTINEL; - break; - case UCHAR_BIDI_PAIRED_BRACKET: - props.bpb=U_SENTINEL; - break; - case UCHAR_SIMPLE_CASE_FOLDING: - props.scf=U_SENTINEL; - break; - case UCHAR_SIMPLE_LOWERCASE_MAPPING: - props.slc=U_SENTINEL; - break; - case UCHAR_SIMPLE_TITLECASE_MAPPING: - props.stc=U_SENTINEL; - break; - case UCHAR_SIMPLE_UPPERCASE_MAPPING: - props.suc=U_SENTINEL; - break; - case UCHAR_CASE_FOLDING: - props.cf.remove(); - break; - case UCHAR_LOWERCASE_MAPPING: - props.lc.remove(); - break; - case UCHAR_TITLECASE_MAPPING: - props.tc.remove(); - break; - case UCHAR_UPPERCASE_MAPPING: - props.uc.remove(); - break; - case UCHAR_SCRIPT_EXTENSIONS: - props.scx.clear(); - break; - default: - fprintf(stderr, - "error in preparsed UCD: '%s' is not a valid default value on line %ld\n", - field, (long)lineNumber); - errorCode=U_PARSE_ERROR; - } - } else { - char c; - switch(prop) { - case UCHAR_NUMERIC_VALUE: - props.numericValue=v; - c=*v; - if('0'<=c && c<='9' && v[1]==0) { - props.digitValue=c-'0'; - } else { - props.digitValue=-1; - } - break; - case UCHAR_NAME: - props.name=v; - break; - case UCHAR_AGE: - u_versionFromString(props.age, v); // Writes 0.0.0.0 if v is not numeric. - break; - case UCHAR_BIDI_MIRRORING_GLYPH: - props.bmg=parseCodePoint(v, errorCode); - break; - case UCHAR_BIDI_PAIRED_BRACKET: - props.bpb=parseCodePoint(v, errorCode); - break; - case UCHAR_SIMPLE_CASE_FOLDING: - props.scf=parseCodePoint(v, errorCode); - break; - case UCHAR_SIMPLE_LOWERCASE_MAPPING: - props.slc=parseCodePoint(v, errorCode); - break; - case UCHAR_SIMPLE_TITLECASE_MAPPING: - props.stc=parseCodePoint(v, errorCode); - break; - case UCHAR_SIMPLE_UPPERCASE_MAPPING: - props.suc=parseCodePoint(v, errorCode); - break; - case UCHAR_CASE_FOLDING: - parseString(v, props.cf, errorCode); - break; - case UCHAR_LOWERCASE_MAPPING: - parseString(v, props.lc, errorCode); - break; - case UCHAR_TITLECASE_MAPPING: - parseString(v, props.tc, errorCode); - break; - case UCHAR_UPPERCASE_MAPPING: - parseString(v, props.uc, errorCode); - break; - case PPUCD_NAME_ALIAS: - props.nameAlias=v; - break; - case PPUCD_CONDITIONAL_CASE_MAPPINGS: - case PPUCD_TURKIC_CASE_FOLDING: - // No need to parse their values: They are hardcoded in the runtime library. - break; - case UCHAR_SCRIPT_EXTENSIONS: - parseScriptExtensions(v, props.scx, errorCode); - break; - default: - // Ignore unhandled properties. - return TRUE; - } - } - if(U_SUCCESS(errorCode)) { - newValues.add((UChar32)prop); - return TRUE; - } else { - return FALSE; - } -} - -UBool -PreparsedUCD::getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return FALSE; } - if(lineType!=ALG_NAMES_RANGE_LINE) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - firstField(); - const char *field=nextField(); - if(field==NULL) { - // No range field after the type. - fprintf(stderr, - "error in preparsed UCD: missing algnamesrange range field " - "(no second field) on line %ld\n", - (long)lineNumber); - errorCode=U_PARSE_ERROR; - return FALSE; - } - return parseCodePointRange(field, start, end, errorCode); -} - -UChar32 -PreparsedUCD::parseCodePoint(const char *s, UErrorCode &errorCode) { - char *end; - uint32_t value=(uint32_t)uprv_strtoul(s, &end, 16); - if(end<=s || *end!=0 || value>=0x110000) { - fprintf(stderr, - "error in preparsed UCD: '%s' is not a valid code point on line %ld\n", - s, (long)lineNumber); - errorCode=U_PARSE_ERROR; - return U_SENTINEL; - } - return (UChar32)value; -} - -UBool -PreparsedUCD::parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode) { - uint32_t st, e; - u_parseCodePointRange(s, &st, &e, &errorCode); - if(U_FAILURE(errorCode)) { - fprintf(stderr, - "error in preparsed UCD: '%s' is not a valid code point range on line %ld\n", - s, (long)lineNumber); - return FALSE; - } - start=(UChar32)st; - end=(UChar32)e; - return TRUE; -} - -void -PreparsedUCD::parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode) { - UChar *buffer=toUCharPtr(uni.getBuffer(-1)); - int32_t length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode); - if(errorCode==U_BUFFER_OVERFLOW_ERROR) { - errorCode=U_ZERO_ERROR; - uni.releaseBuffer(0); - buffer=toUCharPtr(uni.getBuffer(length)); - length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode); - } - uni.releaseBuffer(length); - if(U_FAILURE(errorCode)) { - fprintf(stderr, - "error in preparsed UCD: '%s' is not a valid Unicode string on line %ld\n", - s, (long)lineNumber); - } -} - -void -PreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return; } - scx.clear(); - CharString scString; - for(;;) { - const char *scs; - const char *scLimit=strchr(s, ' '); - if(scLimit!=NULL) { - scs=scString.clear().append(s, (int32_t)(scLimit-s), errorCode).data(); - if(U_FAILURE(errorCode)) { return; } - } else { - scs=s; - } - int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs); - if(script==UCHAR_INVALID_CODE) { - fprintf(stderr, - "error in preparsed UCD: '%s' is not a valid script code on line %ld\n", - scs, (long)lineNumber); - errorCode=U_PARSE_ERROR; - return; - } else if(scx.contains(script)) { - fprintf(stderr, - "error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n", - scs, (long)lineNumber); - errorCode=U_PARSE_ERROR; - return; - } else { - scx.add(script); - } - if(scLimit!=NULL) { - s=scLimit+1; - } else { - break; - } - } - if(scx.isEmpty()) { - fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", (long)lineNumber); - errorCode=U_PARSE_ERROR; - } -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/tools/toolutil/ppucd.h b/deps/node/deps/icu-small/source/tools/toolutil/ppucd.h deleted file mode 100644 index 1f9fb295..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/ppucd.h +++ /dev/null @@ -1,181 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011-2013, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: ppucd.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011dec11 -* created by: Markus W. Scherer -*/ - -#ifndef __PPUCD_H__ -#define __PPUCD_H__ - -#include "unicode/utypes.h" -#include "unicode/uniset.h" -#include "unicode/unistr.h" - -#include <stdio.h> - -/** Additions to the uchar.h enum UProperty. */ -enum { - /** Name_Alias */ - PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT, - PPUCD_CONDITIONAL_CASE_MAPPINGS, - PPUCD_TURKIC_CASE_FOLDING -}; - -U_NAMESPACE_BEGIN - -class U_TOOLUTIL_API PropertyNames { -public: - virtual ~PropertyNames(); - virtual int32_t getPropertyEnum(const char *name) const; - virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const; -}; - -struct U_TOOLUTIL_API UniProps { - UniProps(); - ~UniProps(); - - int32_t getIntProp(int32_t prop) const { return intProps[prop-UCHAR_INT_START]; } - - UChar32 start, end; - UBool binProps[UCHAR_BINARY_LIMIT]; - int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]; - UVersionInfo age; - UChar32 bmg, bpb; - UChar32 scf, slc, stc, suc; - int32_t digitValue; - const char *numericValue; - const char *name; - const char *nameAlias; - UnicodeString cf, lc, tc, uc; - UnicodeSet scx; -}; - -class U_TOOLUTIL_API PreparsedUCD { -public: - enum LineType { - /** No line, end of file. */ - NO_LINE, - /** Empty line. (Might contain a comment.) */ - EMPTY_LINE, - - /** ucd;6.1.0 */ - UNICODE_VERSION_LINE, - - /** property;Binary;Alpha;Alphabetic */ - PROPERTY_LINE, - /** binary;N;No;F;False */ - BINARY_LINE, - /** value;gc;Zs;Space_Separator */ - VALUE_LINE, - - /** defaults;0000..10FFFF;age=NA;bc=L;... */ - DEFAULTS_LINE, - /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */ - BLOCK_LINE, - /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */ - CP_LINE, - /** unassigned;E01F0..E0FFF;bc=BN;CWKCF;DI;GCB=CN;NFKC_CF= */ - UNASSIGNED_LINE, - - /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */ - ALG_NAMES_RANGE_LINE, - - LINE_TYPE_COUNT - }; - - /** - * Constructor. - * Prepare this object for a new, empty package. - */ - PreparsedUCD(const char *filename, UErrorCode &errorCode); - - /** Destructor. */ - ~PreparsedUCD(); - - /** Sets (aliases) a non-standard PropertyNames implementation. Caller retains ownership. */ - void setPropertyNames(const PropertyNames *pn) { pnames=pn; } - - /** - * Reads a line from the preparsed UCD file. - * Splits the line by replacing each ';' with a NUL. - */ - LineType readLine(UErrorCode &errorCode); - - /** Returns the number of the line read by readLine(). */ - int32_t getLineNumber() const { return lineNumber; } - - /** Returns the line's next field, or NULL. */ - const char *nextField(); - - /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */ - const UVersionInfo &getUnicodeVersion() const { return ucdVersion; } - - /** Returns TRUE if the current line has property values. */ - UBool lineHasPropertyValues() const { - return DEFAULTS_LINE<=lineType && lineType<=UNASSIGNED_LINE; - } - - /** - * Parses properties from the current line. - * Clears newValues and sets UProperty codes for property values mentioned - * on the current line (as opposed to being inherited). - * Returns a pointer to the filled-in UniProps, or NULL if something went wrong. - * The returned UniProps are usable until the next line of the same type is read. - */ - const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode); - - /** - * Returns the code point range for the current algnamesrange line. - * Calls & parses nextField(). - * Further nextField() calls will yield the range's type & prefix string. - * Returns U_SUCCESS(errorCode). - */ - UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode); - -private: - UBool isLineBufferAvailable(int32_t i) { - return defaultLineIndex!=i && blockLineIndex!=i; - } - - /** Resets the field iterator and returns the line's first field (the line type field). */ - const char *firstField(); - - UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues, - UErrorCode &errorCode); - UChar32 parseCodePoint(const char *s, UErrorCode &errorCode); - UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode); - void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode); - void parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode); - - static const int32_t kNumLineBuffers=3; - - PropertyNames *icuPnames; // owned - const PropertyNames *pnames; // aliased - FILE *file; - int32_t defaultLineIndex, blockLineIndex, lineIndex; - int32_t lineNumber; - LineType lineType; - char *fieldLimit; - char *lineLimit; - - UVersionInfo ucdVersion; - UniProps defaultProps, blockProps, cpProps; - UnicodeSet blockValues; - // Multiple lines so that default and block properties can maintain pointers - // into their line buffers. - char lines[kNumLineBuffers][4096]; -}; - -U_NAMESPACE_END - -#endif // __PPUCD_H__ diff --git a/deps/node/deps/icu-small/source/tools/toolutil/swapimpl.cpp b/deps/node/deps/icu-small/source/tools/toolutil/swapimpl.cpp deleted file mode 100644 index e8850cb9..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/swapimpl.cpp +++ /dev/null @@ -1,831 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2005-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: swapimpl.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2005may05 -* created by: Markus W. Scherer -* -* Data file swapping functions moved here from the common library -* because some data is hardcoded in ICU4C and needs not be swapped any more. -* Moving the functions here simplifies testing (for code coverage) because -* we need not jump through hoops (like adding snapshots of these files -* to testdata). -* -* The declarations for these functions remain in the internal header files -* in icu/source/common/ -*/ - -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "unicode/udata.h" - -/* Explicit include statement for std_string.h is needed - * for compilation on certain platforms. (e.g. AIX/VACPP) - */ -#include "unicode/std_string.h" - -#include "cmemory.h" -#include "cstring.h" -#include "uinvchar.h" -#include "uassert.h" -#include "uarrsort.h" -#include "ucmndata.h" -#include "udataswp.h" - -/* swapping implementations in common */ - -#include "uresdata.h" -#include "ucnv_io.h" -#include "uprops.h" -#include "ucase.h" -#include "ubidi_props.h" -#include "ucol_swp.h" -#include "ucnv_bld.h" -#include "unormimp.h" -#include "normalizer2impl.h" -#include "sprpimpl.h" -#include "propname.h" -#include "rbbidata.h" -#include "utrie.h" -#include "utrie2.h" -#include "dictionarydata.h" - -/* swapping implementations in i18n */ - -#if !UCONFIG_NO_NORMALIZATION -#include "uspoof_impl.h" -#endif - -U_NAMESPACE_USE - -/* definitions */ - -/* Unicode property (value) aliases data swapping --------------------------- */ - -static int32_t U_CALLCONV -upname_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - /* udata_swapDataHeader checks the arguments */ - int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* check data format and format version */ - const UDataInfo *pInfo= - reinterpret_cast<const UDataInfo *>( - static_cast<const char *>(inData)+4); - if(!( - pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ - pInfo->dataFormat[1]==0x6e && - pInfo->dataFormat[2]==0x61 && - pInfo->dataFormat[3]==0x6d && - pInfo->formatVersion[0]==2 - )) { - udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize; - uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize; - - if(length>=0) { - length-=headerSize; - // formatVersion 2 initially has indexes[8], 32 bytes. - if(length<32) { - udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n", - (int)length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - - const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes); - int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]); - if(length>=0) { - if(length<totalSize) { - udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) " - "for pnames.icu\n", - (int)length, (int)totalSize); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - int32_t numBytesIndexesAndValueMaps= - udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]); - - // Swap the indexes[] and the valueMaps[]. - ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode); - - // Copy the rest of the data. - if(inBytes!=outBytes) { - uprv_memcpy(outBytes+numBytesIndexesAndValueMaps, - inBytes+numBytesIndexesAndValueMaps, - totalSize-numBytesIndexesAndValueMaps); - } - - // We need not swap anything else: - // - // The ByteTries are already byte-serialized, and are fixed on ASCII. - // (On an EBCDIC machine, the input string is converted to lowercase ASCII - // while matching.) - // - // The name groups are mostly invariant characters, but since we only - // generate, and keep in subversion, ASCII versions of pnames.icu, - // and since only ICU4J uses the pnames.icu data file - // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files, - // we just copy those bytes too. - } - - return headerSize+totalSize; -} - -/* Unicode properties data swapping ----------------------------------------- */ - -static int32_t U_CALLCONV -uprops_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UDataInfo *pInfo; - int32_t headerSize, i; - - int32_t dataIndexes[UPROPS_INDEX_COUNT]; - const int32_t *inData32; - - /* udata_swapDataHeader checks the arguments */ - headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* check data format and format version */ - pInfo=(const UDataInfo *)((const char *)inData+4); - if(!( - pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */ - pInfo->dataFormat[1]==0x50 && - pInfo->dataFormat[2]==0x72 && - pInfo->dataFormat[3]==0x6f && - (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) && - (pInfo->formatVersion[0]>=7 || - (pInfo->formatVersion[2]==UTRIE_SHIFT && - pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT)) - )) { - udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - /* the properties file must contain at least the indexes array */ - if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) { - udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", - length-headerSize); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - /* read the indexes */ - inData32=(const int32_t *)((const char *)inData+headerSize); - for(i=0; i<UPROPS_INDEX_COUNT; ++i) { - dataIndexes[i]=udata_readInt32(ds, inData32[i]); - } - - /* - * comments are copied from the data format description in genprops/store.c - * indexes[] constants are in uprops.h - */ - int32_t dataTop; - if(length>=0) { - int32_t *outData32; - - /* - * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size. - * In earlier formatVersions, it is 0 and a lower dataIndexes entry - * has the top of the last item. - */ - for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {} - - if((length-headerSize)<(4*dataTop)) { - udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", - length-headerSize); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - outData32=(int32_t *)((char *)outData+headerSize); - - /* copy everything for inaccessible data (padding) */ - if(inData32!=outData32) { - uprv_memcpy(outData32, inData32, 4*(size_t)dataTop); - } - - /* swap the indexes[16] */ - ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode); - - /* - * swap the main properties UTrie - * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) - */ - utrie_swapAnyVersion(ds, - inData32+UPROPS_INDEX_COUNT, - 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT), - outData32+UPROPS_INDEX_COUNT, - pErrorCode); - - /* - * swap the properties and exceptions words - * P const uint32_t props32[i1-i0]; - * E const uint32_t exceptions[i2-i1]; - */ - ds->swapArray32(ds, - inData32+dataIndexes[UPROPS_PROPS32_INDEX], - 4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]), - outData32+dataIndexes[UPROPS_PROPS32_INDEX], - pErrorCode); - - /* - * swap the UChars - * U const UChar uchars[2*(i3-i2)]; - */ - ds->swapArray16(ds, - inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], - 4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]), - outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], - pErrorCode); - - /* - * swap the additional UTrie - * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties - */ - utrie_swapAnyVersion(ds, - inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], - 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]), - outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], - pErrorCode); - - /* - * swap the properties vectors - * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4]; - */ - ds->swapArray32(ds, - inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], - 4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]), - outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], - pErrorCode); - - // swap the Script_Extensions data - // SCX const uint16_t scriptExtensions[2*(i7-i6)]; - ds->swapArray16(ds, - inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], - 4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]), - outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], - pErrorCode); - } - - /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */ - return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7]; -} - -/* Unicode case mapping data swapping --------------------------------------- */ - -static int32_t U_CALLCONV -ucase_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UDataInfo *pInfo; - int32_t headerSize; - - const uint8_t *inBytes; - uint8_t *outBytes; - - const int32_t *inIndexes; - int32_t indexes[16]; - - int32_t i, offset, count, size; - - /* udata_swapDataHeader checks the arguments */ - headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* check data format and format version */ - pInfo=(const UDataInfo *)((const char *)inData+4); - if(!( - pInfo->dataFormat[0]==UCASE_FMT_0 && /* dataFormat="cAsE" */ - pInfo->dataFormat[1]==UCASE_FMT_1 && - pInfo->dataFormat[2]==UCASE_FMT_2 && - pInfo->dataFormat[3]==UCASE_FMT_3 && - ((pInfo->formatVersion[0]==1 && - pInfo->formatVersion[2]==UTRIE_SHIFT && - pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || - (2<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=4)) - )) { - udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - inBytes=(const uint8_t *)inData+headerSize; - outBytes=(uint8_t *)outData+headerSize; - - inIndexes=(const int32_t *)inBytes; - - if(length>=0) { - length-=headerSize; - if(length<16*4) { - udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - - /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */ - for(i=0; i<16; ++i) { - indexes[i]=udata_readInt32(ds, inIndexes[i]); - } - - /* get the total length of the data */ - size=indexes[UCASE_IX_LENGTH]; - - if(length>=0) { - if(length<size) { - udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - /* copy the data for inaccessible bytes */ - if(inBytes!=outBytes) { - uprv_memcpy(outBytes, inBytes, size); - } - - offset=0; - - /* swap the int32_t indexes[] */ - count=indexes[UCASE_IX_INDEX_TOP]*4; - ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); - offset+=count; - - /* swap the UTrie */ - count=indexes[UCASE_IX_TRIE_SIZE]; - utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); - offset+=count; - - /* swap the uint16_t exceptions[] and unfold[] */ - count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2; - ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); - offset+=count; - - U_ASSERT(offset==size); - } - - return headerSize+size; -} - -/* Unicode bidi/shaping data swapping --------------------------------------- */ - -static int32_t U_CALLCONV -ubidi_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UDataInfo *pInfo; - int32_t headerSize; - - const uint8_t *inBytes; - uint8_t *outBytes; - - const int32_t *inIndexes; - int32_t indexes[16]; - - int32_t i, offset, count, size; - - /* udata_swapDataHeader checks the arguments */ - headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* check data format and format version */ - pInfo=(const UDataInfo *)((const char *)inData+4); - if(!( - pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */ - pInfo->dataFormat[1]==UBIDI_FMT_1 && - pInfo->dataFormat[2]==UBIDI_FMT_2 && - pInfo->dataFormat[3]==UBIDI_FMT_3 && - ((pInfo->formatVersion[0]==1 && - pInfo->formatVersion[2]==UTRIE_SHIFT && - pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || - pInfo->formatVersion[0]==2) - )) { - udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - inBytes=(const uint8_t *)inData+headerSize; - outBytes=(uint8_t *)outData+headerSize; - - inIndexes=(const int32_t *)inBytes; - - if(length>=0) { - length-=headerSize; - if(length<16*4) { - udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - - /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */ - for(i=0; i<16; ++i) { - indexes[i]=udata_readInt32(ds, inIndexes[i]); - } - - /* get the total length of the data */ - size=indexes[UBIDI_IX_LENGTH]; - - if(length>=0) { - if(length<size) { - udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - /* copy the data for inaccessible bytes */ - if(inBytes!=outBytes) { - uprv_memcpy(outBytes, inBytes, size); - } - - offset=0; - - /* swap the int32_t indexes[] */ - count=indexes[UBIDI_IX_INDEX_TOP]*4; - ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); - offset+=count; - - /* swap the UTrie */ - count=indexes[UBIDI_IX_TRIE_SIZE]; - utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); - offset+=count; - - /* swap the uint32_t mirrors[] */ - count=indexes[UBIDI_IX_MIRROR_LENGTH]*4; - ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode); - offset+=count; - - /* just skip the uint8_t jgArray[] and jgArray2[] */ - count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START]; - offset+=count; - count=indexes[UBIDI_IX_JG_LIMIT2]-indexes[UBIDI_IX_JG_START2]; - offset+=count; - - U_ASSERT(offset==size); - } - - return headerSize+size; -} - -/* Unicode normalization data swapping -------------------------------------- */ - -#if !UCONFIG_NO_NORMALIZATION - -static int32_t U_CALLCONV -unorm_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UDataInfo *pInfo; - int32_t headerSize; - - const uint8_t *inBytes; - uint8_t *outBytes; - - const int32_t *inIndexes; - int32_t indexes[32]; - - int32_t i, offset, count, size; - - /* udata_swapDataHeader checks the arguments */ - headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* check data format and format version */ - pInfo=(const UDataInfo *)((const char *)inData+4); - if(!( - pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */ - pInfo->dataFormat[1]==0x6f && - pInfo->dataFormat[2]==0x72 && - pInfo->dataFormat[3]==0x6d && - pInfo->formatVersion[0]==2 - )) { - udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - inBytes=(const uint8_t *)inData+headerSize; - outBytes=(uint8_t *)outData+headerSize; - - inIndexes=(const int32_t *)inBytes; - - if(length>=0) { - length-=headerSize; - if(length<32*4) { - udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - - /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */ - for(i=0; i<32; ++i) { - indexes[i]=udata_readInt32(ds, inIndexes[i]); - } - - /* calculate the total length of the data */ - size= - 32*4+ /* size of indexes[] */ - indexes[_NORM_INDEX_TRIE_SIZE]+ - indexes[_NORM_INDEX_UCHAR_COUNT]*2+ - indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+ - indexes[_NORM_INDEX_FCD_TRIE_SIZE]+ - indexes[_NORM_INDEX_AUX_TRIE_SIZE]+ - indexes[_NORM_INDEX_CANON_SET_COUNT]*2; - - if(length>=0) { - if(length<size) { - udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - /* copy the data for inaccessible bytes */ - if(inBytes!=outBytes) { - uprv_memcpy(outBytes, inBytes, size); - } - - offset=0; - - /* swap the indexes[] */ - count=32*4; - ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); - offset+=count; - - /* swap the main UTrie */ - count=indexes[_NORM_INDEX_TRIE_SIZE]; - utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); - offset+=count; - - /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */ - count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2; - ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); - offset+=count; - - /* swap the FCD UTrie */ - count=indexes[_NORM_INDEX_FCD_TRIE_SIZE]; - if(count!=0) { - utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); - offset+=count; - } - - /* swap the aux UTrie */ - count=indexes[_NORM_INDEX_AUX_TRIE_SIZE]; - if(count!=0) { - utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); - offset+=count; - } - - /* swap the uint16_t combiningTable[] */ - count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2; - ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); - offset+=count; - } - - return headerSize+size; -} - -#endif - -/* Swap 'Test' data from gentest */ -static int32_t U_CALLCONV -test_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UDataInfo *pInfo; - int32_t headerSize; - - const uint8_t *inBytes; - uint8_t *outBytes; - - int32_t offset; - - /* udata_swapDataHeader checks the arguments */ - headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL"); - return 0; - } - - /* check data format and format version */ - pInfo=(const UDataInfo *)((const char *)inData+4); - if(!( - pInfo->dataFormat[0]==0x54 && /* dataFormat="Norm" */ - pInfo->dataFormat[1]==0x65 && - pInfo->dataFormat[2]==0x73 && - pInfo->dataFormat[3]==0x74 && - pInfo->formatVersion[0]==1 - )) { - udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - inBytes=(const uint8_t *)inData+headerSize; - outBytes=(uint8_t *)outData+headerSize; - - int32_t size16 = 2; // 16bit plus padding - int32_t sizeStr = 5; // 4 char inv-str plus null - int32_t size = size16 + sizeStr; - - if(length>=0) { - if(length<size) { - udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n", - length, size); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - offset =0; - /* swap a 1 entry array */ - ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode); - offset+=size16; - ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode); - } - - return headerSize+size; -} - -/* swap any data (except a .dat package) ------------------------------------ */ - -static const struct { - uint8_t dataFormat[4]; - UDataSwapFn *swapFn; -} swapFns[]={ - { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */ -#if !UCONFIG_NO_LEGACY_CONVERSION - { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */ -#endif -#if !UCONFIG_NO_CONVERSION - { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */ -#endif -#if !UCONFIG_NO_IDNA - { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */ -#endif - /* insert data formats here, descending by expected frequency of occurrence */ - { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */ - - { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 }, - ucase_swap }, /* dataFormat="cAsE" */ - - { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 }, - ubidi_swap }, /* dataFormat="BiDi" */ - -#if !UCONFIG_NO_NORMALIZATION - { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ - { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */ -#endif -#if !UCONFIG_NO_COLLATION - { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ - { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */ -#endif -#if !UCONFIG_NO_BREAK_ITERATION - { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */ - { { 0x44, 0x69, 0x63, 0x74 }, udict_swap }, /* dataFormat="Dict" */ -#endif - { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */ - { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames }, /* dataFormat="unam" */ -#if !UCONFIG_NO_NORMALIZATION - { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap }, /* dataFormat="Cfu " */ -#endif - { { 0x54, 0x65, 0x73, 0x74 }, test_swap } /* dataFormat="Test" */ -}; - -U_CAPI int32_t U_EXPORT2 -udata_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - char dataFormatChars[4]; - const UDataInfo *pInfo; - int32_t i, swappedLength; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* - * Preflight the header first; checks for illegal arguments, too. - * Do not swap the header right away because the format-specific swapper - * will swap it, get the headerSize again, and also use the header - * information. Otherwise we would have to pass some of the information - * and not be able to use the UDataSwapFn signature. - */ - udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); - - /* - * If we wanted udata_swap() to also handle non-loadable data like a UTrie, - * then we could check here for further known magic values and structures. - */ - if(U_FAILURE(*pErrorCode)) { - return 0; /* the data format was not recognized */ - } - - pInfo=(const UDataInfo *)((const char *)inData+4); - - { - /* convert the data format from ASCII to Unicode to the system charset */ - UChar u[4]={ - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3] - }; - - if(uprv_isInvariantUString(u, 4)) { - u_UCharsToChars(u, dataFormatChars, 4); - } else { - dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?'; - } - } - - /* dispatch to the swap function for the dataFormat */ - for(i=0; i<UPRV_LENGTHOF(swapFns); ++i) { - if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) { - swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode); - - if(U_FAILURE(*pErrorCode)) { - udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - dataFormatChars[0], dataFormatChars[1], - dataFormatChars[2], dataFormatChars[3], - u_errorName(*pErrorCode)); - } else if(swappedLength<(length-15)) { - /* swapped less than expected */ - udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", - swappedLength, length, - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - dataFormatChars[0], dataFormatChars[1], - dataFormatChars[2], dataFormatChars[3], - u_errorName(*pErrorCode)); - } - - return swappedLength; - } - } - - /* the dataFormat was not recognized */ - udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - dataFormatChars[0], dataFormatChars[1], - dataFormatChars[2], dataFormatChars[3]); - - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; -} diff --git a/deps/node/deps/icu-small/source/tools/toolutil/swapimpl.h b/deps/node/deps/icu-small/source/tools/toolutil/swapimpl.h deleted file mode 100644 index 8c6474f6..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/swapimpl.h +++ /dev/null @@ -1,45 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2005, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: swapimpl.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2005jul29 -* created by: Markus W. Scherer -* -* Declarations for data file swapping functions not declared in internal -* library headers. -*/ - -#ifndef __SWAPIMPL_H__ -#define __SWAPIMPL_H__ - -#include "unicode/utypes.h" -#include "udataswp.h" - -/** - * Identifies and then transforms the ICU data piece in-place, or determines - * its length. See UDataSwapFn. - * This function handles single data pieces (but not .dat data packages) - * and internally dispatches to per-type swap functions. - * Sets a U_UNSUPPORTED_ERROR if the data format is not recognized. - * - * @see UDataSwapFn - * @see udata_openSwapper - * @see udata_openSwapperForInputData - * @internal ICU 2.8 - */ -U_CAPI int32_t U_EXPORT2 -udata_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/toolutil.cpp b/deps/node/deps/icu-small/source/tools/toolutil/toolutil.cpp deleted file mode 100644 index 0f7d0984..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/toolutil.cpp +++ /dev/null @@ -1,357 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: toolutil.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999nov19 -* created by: Markus W. Scherer -* -* 6/25/08 - Added Cygwin specific code in uprv_mkdir - Brian Rower -* -* This file contains utility functions for ICU tools like genccode. -*/ - -#include "unicode/platform.h" -#if U_PLATFORM == U_PF_MINGW -// *cough* - for struct stat -#ifdef __STRICT_ANSI__ -#undef __STRICT_ANSI__ -#endif -#endif - -#include <stdio.h> -#include <sys/stat.h> -#include "unicode/utypes.h" - -#ifndef U_TOOLUTIL_IMPLEMENTATION -#error U_TOOLUTIL_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu -#endif - -#if U_PLATFORM_USES_ONLY_WIN32_API -# define VC_EXTRALEAN -# define WIN32_LEAN_AND_MEAN -# define NOUSER -# define NOSERVICE -# define NOIME -# define NOMCX -# if U_PLATFORM == U_PF_MINGW -# define __NO_MINGW_LFS /* gets around missing 'off64_t' */ -# endif -# include <windows.h> -# include <direct.h> -#else -# include <sys/stat.h> -# include <sys/types.h> -#endif - -/* In MinGW environment, io.h needs to be included for _mkdir() */ -#if U_PLATFORM == U_PF_MINGW -#include <io.h> -#endif - -#include <errno.h> - -#include "unicode/errorcode.h" -#include "unicode/putil.h" -#include "cmemory.h" -#include "cstring.h" -#include "toolutil.h" -#include "unicode/ucal.h" - -U_NAMESPACE_BEGIN - -IcuToolErrorCode::~IcuToolErrorCode() { - // Safe because our handleFailure() does not throw exceptions. - if(isFailure()) { handleFailure(); } -} - -void IcuToolErrorCode::handleFailure() const { - fprintf(stderr, "error at %s: %s\n", location, errorName()); - exit(errorCode); -} - -U_NAMESPACE_END - -static int32_t currentYear = -1; - -U_CAPI int32_t U_EXPORT2 getCurrentYear() { -#if !UCONFIG_NO_FORMATTING - UErrorCode status=U_ZERO_ERROR; - UCalendar *cal = NULL; - - if(currentYear == -1) { - cal = ucal_open(NULL, -1, NULL, UCAL_TRADITIONAL, &status); - ucal_setMillis(cal, ucal_getNow(), &status); - currentYear = ucal_get(cal, UCAL_YEAR, &status); - ucal_close(cal); - } -#else - /* No formatting- no way to set the current year. */ -#endif - return currentYear; -} - - -U_CAPI const char * U_EXPORT2 -getLongPathname(const char *pathname) { -#if U_PLATFORM_USES_ONLY_WIN32_API - /* anticipate problems with "short" pathnames */ - static WIN32_FIND_DATAA info; - HANDLE file=FindFirstFileA(pathname, &info); - if(file!=INVALID_HANDLE_VALUE) { - if(info.cAlternateFileName[0]!=0) { - /* this file has a short name, get and use the long one */ - const char *basename=findBasename(pathname); - if(basename!=pathname) { - /* prepend the long filename with the original path */ - uprv_memmove(info.cFileName+(basename-pathname), info.cFileName, uprv_strlen(info.cFileName)+1); - uprv_memcpy(info.cFileName, pathname, basename-pathname); - } - pathname=info.cFileName; - } - FindClose(file); - } -#endif - return pathname; -} - -U_CAPI const char * U_EXPORT2 -findDirname(const char *path, char *buffer, int32_t bufLen, UErrorCode* status) { - if(U_FAILURE(*status)) return NULL; - const char *resultPtr = NULL; - int32_t resultLen = 0; - - const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR); -#if U_FILE_ALT_SEP_CHAR!=U_FILE_SEP_CHAR - const char *basenameAlt=uprv_strrchr(path, U_FILE_ALT_SEP_CHAR); - if(basenameAlt && (!basename || basename<basenameAlt)) { - basename = basenameAlt; - } -#endif - if(!basename) { - /* no basename - return ''. */ - resultPtr = ""; - resultLen = 0; - } else { - resultPtr = path; - resultLen = basename - path; - if(resultLen<1) { - resultLen = 1; /* '/' or '/a' -> '/' */ - } - } - - if((resultLen+1) <= bufLen) { - uprv_strncpy(buffer, resultPtr, resultLen); - buffer[resultLen]=0; - return buffer; - } else { - *status = U_BUFFER_OVERFLOW_ERROR; - return NULL; - } -} - -U_CAPI const char * U_EXPORT2 -findBasename(const char *filename) { - const char *basename=uprv_strrchr(filename, U_FILE_SEP_CHAR); - -#if U_FILE_ALT_SEP_CHAR!=U_FILE_SEP_CHAR -#if !(U_PLATFORM == U_PF_CYGWIN && U_PLATFORM_USES_ONLY_WIN32_API) - if(basename==NULL) -#endif - { - /* Use lenient matching on Windows, which can accept either \ or / - This is useful for environments like Win32+CygWin which have both. - */ - basename=uprv_strrchr(filename, U_FILE_ALT_SEP_CHAR); - } -#endif - - if(basename!=NULL) { - return basename+1; - } else { - return filename; - } -} - -U_CAPI void U_EXPORT2 -uprv_mkdir(const char *pathname, UErrorCode *status) { - - int retVal = 0; -#if U_PLATFORM_USES_ONLY_WIN32_API - retVal = _mkdir(pathname); -#else - retVal = mkdir(pathname, S_IRWXU | (S_IROTH | S_IXOTH) | (S_IROTH | S_IXOTH)); -#endif - if (retVal && errno != EEXIST) { -#if U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN - /*if using Cygwin and the mkdir says it failed...check if the directory already exists..*/ - /* if it does...don't give the error, if it does not...give the error - Brian Rower - 6/25/08 */ - struct stat st; - - if(stat(pathname,&st) != 0) - { - *status = U_FILE_ACCESS_ERROR; - } -#else - *status = U_FILE_ACCESS_ERROR; -#endif - } -} - -#if !UCONFIG_NO_FILE_IO -U_CAPI UBool U_EXPORT2 -uprv_fileExists(const char *file) { - struct stat stat_buf; - if (stat(file, &stat_buf) == 0) { - return TRUE; - } else { - return FALSE; - } -} -#endif - -/*U_CAPI UDate U_EXPORT2 -uprv_getModificationDate(const char *pathname, UErrorCode *status) -{ - if(U_FAILURE(*status)) { - return; - } - // TODO: handle case where stat is not available - struct stat st; - - if(stat(pathname,&st) != 0) - { - *status = U_FILE_ACCESS_ERROR; - } else { - return st.st_mtime; - } -} -*/ - -/* tool memory helper ------------------------------------------------------- */ - -struct UToolMemory { - char name[64]; - int32_t capacity, maxCapacity, size, idx; - void *array; - UAlignedMemory staticArray[1]; -}; - -U_CAPI UToolMemory * U_EXPORT2 -utm_open(const char *name, int32_t initialCapacity, int32_t maxCapacity, int32_t size) { - UToolMemory *mem; - - if(maxCapacity<initialCapacity) { - maxCapacity=initialCapacity; - } - - mem=(UToolMemory *)uprv_malloc(sizeof(UToolMemory)+initialCapacity*size); - if(mem==NULL) { - fprintf(stderr, "error: %s - out of memory\n", name); - exit(U_MEMORY_ALLOCATION_ERROR); - } - mem->array=mem->staticArray; - - uprv_strcpy(mem->name, name); - mem->capacity=initialCapacity; - mem->maxCapacity=maxCapacity; - mem->size=size; - mem->idx=0; - return mem; -} - -U_CAPI void U_EXPORT2 -utm_close(UToolMemory *mem) { - if(mem!=NULL) { - if(mem->array!=mem->staticArray) { - uprv_free(mem->array); - } - uprv_free(mem); - } -} - - -U_CAPI void * U_EXPORT2 -utm_getStart(UToolMemory *mem) { - return (char *)mem->array; -} - -U_CAPI int32_t U_EXPORT2 -utm_countItems(UToolMemory *mem) { - return mem->idx; -} - - -static UBool -utm_hasCapacity(UToolMemory *mem, int32_t capacity) { - if(mem->capacity<capacity) { - int32_t newCapacity; - - if(mem->maxCapacity<capacity) { - fprintf(stderr, "error: %s - trying to use more than maxCapacity=%ld units\n", - mem->name, (long)mem->maxCapacity); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - /* try to allocate a larger array */ - if(capacity>=2*mem->capacity) { - newCapacity=capacity; - } else if(mem->capacity<=mem->maxCapacity/3) { - newCapacity=2*mem->capacity; - } else { - newCapacity=mem->maxCapacity; - } - - if(mem->array==mem->staticArray) { - mem->array=uprv_malloc(newCapacity*mem->size); - if(mem->array!=NULL) { - uprv_memcpy(mem->array, mem->staticArray, (size_t)mem->idx*mem->size); - } - } else { - mem->array=uprv_realloc(mem->array, newCapacity*mem->size); - } - - if(mem->array==NULL) { - fprintf(stderr, "error: %s - out of memory\n", mem->name); - exit(U_MEMORY_ALLOCATION_ERROR); - } - mem->capacity=newCapacity; - } - - return TRUE; -} - -U_CAPI void * U_EXPORT2 -utm_alloc(UToolMemory *mem) { - char *p=NULL; - int32_t oldIndex=mem->idx; - int32_t newIndex=oldIndex+1; - if(utm_hasCapacity(mem, newIndex)) { - p=(char *)mem->array+oldIndex*mem->size; - mem->idx=newIndex; - uprv_memset(p, 0, mem->size); - } - return p; -} - -U_CAPI void * U_EXPORT2 -utm_allocN(UToolMemory *mem, int32_t n) { - char *p=NULL; - int32_t oldIndex=mem->idx; - int32_t newIndex=oldIndex+n; - if(utm_hasCapacity(mem, newIndex)) { - p=(char *)mem->array+oldIndex*mem->size; - mem->idx=newIndex; - uprv_memset(p, 0, n*mem->size); - } - return p; -} diff --git a/deps/node/deps/icu-small/source/tools/toolutil/toolutil.h b/deps/node/deps/icu-small/source/tools/toolutil/toolutil.h deleted file mode 100644 index be07787a..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/toolutil.h +++ /dev/null @@ -1,187 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: toolutil.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999nov19 -* created by: Markus W. Scherer -* -* This file defines utility functions for ICU tools like genccode. -*/ - -#ifndef __TOOLUTIL_H__ -#define __TOOLUTIL_H__ - -#include "unicode/utypes.h" - - -#ifdef __cplusplus - -#include "unicode/errorcode.h" - -U_NAMESPACE_BEGIN - -/** - * ErrorCode subclass for use in ICU command-line tools. - * The destructor calls handleFailure() which calls exit(errorCode) when isFailure(). - */ -class U_TOOLUTIL_API IcuToolErrorCode : public ErrorCode { -public: - /** - * @param loc A short string describing where the IcuToolErrorCode is used. - */ - IcuToolErrorCode(const char *loc) : location(loc) {} - virtual ~IcuToolErrorCode(); -protected: - virtual void handleFailure() const; -private: - const char *location; -}; - -U_NAMESPACE_END - -#endif - -/* - * For Windows, a path/filename may be the short (8.3) version - * of the "real", long one. In this case, the short one - * is abbreviated and contains a tilde etc. - * This function returns a pointer to the original pathname - * if it is the "real" one itself, and a pointer to a static - * buffer (not thread-safe) containing the long version - * if the pathname is indeed abbreviated. - * - * On platforms other than Windows, this function always returns - * the input pathname pointer. - * - * This function is especially useful in tools that are called - * by a batch file for loop, which yields short pathnames on Win9x. - */ -U_CAPI const char * U_EXPORT2 -getLongPathname(const char *pathname); - -/** - * Find the basename at the end of a pathname, i.e., the part - * after the last file separator, and return a pointer - * to this part of the pathname. - * If the pathname only contains a basename and no file separator, - * then the pathname pointer itself is returned. - **/ -U_CAPI const char * U_EXPORT2 -findBasename(const char *filename); - -/** - * Find the directory name of a pathname, that is, everything - * up to but not including the last file separator. - * - * If successful, copies the directory name into the output buffer along with - * a terminating NULL. - * - * If there isn't a directory name in the path, it returns an empty string. - * @param path the full pathname to inspect. - * @param buffer the output buffer - * @param bufLen the output buffer length - * @param status error code- may return U_BUFFER_OVERFLOW_ERROR if bufLen is too small. - * @return If successful, a pointer to the output buffer. If failure or bufLen is too small, NULL. - **/ -U_CAPI const char * U_EXPORT2 -findDirname(const char *path, char *buffer, int32_t bufLen, UErrorCode* status); - -/* - * Return the current year in the Gregorian calendar. Used for copyright generation. - */ -U_CAPI int32_t U_EXPORT2 -getCurrentYear(void); - -/* - * Creates a directory with pathname. - * - * @param status Set to an error code when mkdir failed. - */ -U_CAPI void U_EXPORT2 -uprv_mkdir(const char *pathname, UErrorCode *status); - -#if !UCONFIG_NO_FILE_IO -/** - * Return TRUE if the named item exists - * @param file filename - * @return TRUE if named item (file, dir, etc) exists, FALSE otherwise - */ -U_CAPI UBool U_EXPORT2 -uprv_fileExists(const char *file); -#endif - -/** - * Return the modification date for the specified file or directory. - * Return value is undefined if there was an error. - */ -/*U_CAPI UDate U_EXPORT2 -uprv_getModificationDate(const char *pathname, UErrorCode *status); -*/ -/* - * Returns the modification - * - * @param status Set to an error code when mkdir failed. - */ - -/* - * UToolMemory is used for generic, custom memory management. - * It is allocated with enough space for count*size bytes starting - * at array. - * The array is declared with a union of large data types so - * that its base address is aligned for any types. - * If size is a multiple of a data type size, then such items - * can be safely allocated inside the array, at offsets that - * are themselves multiples of size. - */ -struct UToolMemory; -typedef struct UToolMemory UToolMemory; - -/** - * Open a UToolMemory object for allocation of initialCapacity to maxCapacity - * items with size bytes each. - */ -U_CAPI UToolMemory * U_EXPORT2 -utm_open(const char *name, int32_t initialCapacity, int32_t maxCapacity, int32_t size); - -/** - * Close a UToolMemory object. - */ -U_CAPI void U_EXPORT2 -utm_close(UToolMemory *mem); - -/** - * Get the pointer to the beginning of the array of items. - * The pointer becomes invalid after allocation of new items. - */ -U_CAPI void * U_EXPORT2 -utm_getStart(UToolMemory *mem); - -/** - * Get the current number of items. - */ -U_CAPI int32_t U_EXPORT2 -utm_countItems(UToolMemory *mem); - -/** - * Allocate one more item and return the pointer to its start in the array. - */ -U_CAPI void * U_EXPORT2 -utm_alloc(UToolMemory *mem); - -/** - * Allocate n items and return the pointer to the start of the first one in the array. - */ -U_CAPI void * U_EXPORT2 -utm_allocN(UToolMemory *mem, int32_t n); - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/ucbuf.cpp b/deps/node/deps/icu-small/source/tools/toolutil/ucbuf.cpp deleted file mode 100644 index 5269c817..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/ucbuf.cpp +++ /dev/null @@ -1,788 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1998-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* -* File ucbuf.cpp -* -* Modification History: -* -* Date Name Description -* 05/10/01 Ram Creation. -******************************************************************************* -*/ - -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "unicode/uchar.h" -#include "unicode/ucnv.h" -#include "unicode/ucnv_err.h" -#include "unicode/ustring.h" -#include "unicode/utf16.h" -#include "filestrm.h" -#include "cstring.h" -#include "cmemory.h" -#include "ustrfmt.h" -#include "ucbuf.h" -#include <stdio.h> - -#if !UCONFIG_NO_CONVERSION - - -#define MAX_IN_BUF 1000 -#define MAX_U_BUF 1500 -#define CONTEXT_LEN 20 - -struct UCHARBUF { - UChar* buffer; - UChar* currentPos; - UChar* bufLimit; - int32_t bufCapacity; - int32_t remaining; - int32_t signatureLength; - FileStream* in; - UConverter* conv; - UBool showWarning; /* makes this API not produce any errors */ - UBool isBuffered; -}; - -U_CAPI UBool U_EXPORT2 -ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){ - char start[8]; - int32_t numRead; - - UChar target[1]={ 0 }; - UChar* pTarget; - const char* pStart; - - /* read a few bytes */ - numRead=T_FileStream_read(in, start, sizeof(start)); - - *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error); - - /* unread the bytes beyond what was consumed for U+FEFF */ - T_FileStream_rewind(in); - if (*signatureLength > 0) { - T_FileStream_read(in, start, *signatureLength); - } - - if(*cp==NULL){ - *conv =NULL; - return FALSE; - } - - /* open the converter for the detected Unicode charset */ - *conv = ucnv_open(*cp,error); - - /* convert and ignore initial U+FEFF, and the buffer overflow */ - pTarget = target; - pStart = start; - ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error); - *signatureLength = (int32_t)(pStart - start); - if(*error==U_BUFFER_OVERFLOW_ERROR) { - *error=U_ZERO_ERROR; - } - - /* verify that we successfully read exactly U+FEFF */ - if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) { - *error=U_INTERNAL_PROGRAM_ERROR; - } - - - return TRUE; -} -static UBool ucbuf_isCPKnown(const char* cp){ - if(ucnv_compareNames("UTF-8",cp)==0){ - return TRUE; - } - if(ucnv_compareNames("UTF-16BE",cp)==0){ - return TRUE; - } - if(ucnv_compareNames("UTF-16LE",cp)==0){ - return TRUE; - } - if(ucnv_compareNames("UTF-16",cp)==0){ - return TRUE; - } - if(ucnv_compareNames("UTF-32",cp)==0){ - return TRUE; - } - if(ucnv_compareNames("UTF-32BE",cp)==0){ - return TRUE; - } - if(ucnv_compareNames("UTF-32LE",cp)==0){ - return TRUE; - } - if(ucnv_compareNames("SCSU",cp)==0){ - return TRUE; - } - if(ucnv_compareNames("BOCU-1",cp)==0){ - return TRUE; - } - if(ucnv_compareNames("UTF-7",cp)==0){ - return TRUE; - } - return FALSE; -} - -U_CAPI FileStream * U_EXPORT2 -ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){ - FileStream* in=NULL; - if(error==NULL || U_FAILURE(*error)){ - return NULL; - } - if(conv==NULL || cp==NULL || fileName==NULL){ - *error = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - /* open the file */ - in= T_FileStream_open(fileName,"rb"); - - if(in == NULL){ - *error=U_FILE_ACCESS_ERROR; - return NULL; - } - - if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) { - return in; - } else { - ucnv_close(*conv); - *conv=NULL; - T_FileStream_close(in); - return NULL; - } -} - -/* fill the uchar buffer */ -static UCHARBUF* -ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){ - UChar* pTarget=NULL; - UChar* target=NULL; - const char* source=NULL; - char carr[MAX_IN_BUF] = {'\0'}; - char* cbuf = carr; - int32_t inputRead=0; - int32_t outputWritten=0; - int32_t offset=0; - const char* sourceLimit =NULL; - int32_t cbufSize=0; - pTarget = buf->buffer; - /* check if we arrived here without exhausting the buffer*/ - if(buf->currentPos<buf->bufLimit){ - offset = (int32_t)(buf->bufLimit-buf->currentPos); - memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar)); - } - -#if UCBUF_DEBUG - memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset)); -#endif - if(buf->isBuffered){ - cbufSize = MAX_IN_BUF; - /* read the file */ - inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset); - buf->remaining-=inputRead; - - }else{ - cbufSize = T_FileStream_size(buf->in); - cbuf = (char*)uprv_malloc(cbufSize); - if (cbuf == NULL) { - *error = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - inputRead= T_FileStream_read(buf->in,cbuf,cbufSize); - buf->remaining-=inputRead; - } - - /* just to be sure...*/ - if ( 0 == inputRead ) - buf->remaining = 0; - - target=pTarget; - /* convert the bytes */ - if(buf->conv){ - /* set the callback to stop */ - UConverterToUCallback toUOldAction ; - void* toUOldContext; - void* toUNewContext=NULL; - ucnv_setToUCallBack(buf->conv, - UCNV_TO_U_CALLBACK_STOP, - toUNewContext, - &toUOldAction, - (const void**)&toUOldContext, - error); - /* since state is saved in the converter we add offset to source*/ - target = pTarget+offset; - source = cbuf; - sourceLimit = source + inputRead; - ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), - &source,sourceLimit,NULL, - (UBool)(buf->remaining==0),error); - - if(U_FAILURE(*error)){ - char context[CONTEXT_LEN+1]; - char preContext[CONTEXT_LEN+1]; - char postContext[CONTEXT_LEN+1]; - int8_t len = CONTEXT_LEN; - int32_t start=0; - int32_t stop =0; - int32_t pos =0; - /* use erro1 to preserve the error code */ - UErrorCode error1 =U_ZERO_ERROR; - - if( buf->showWarning==TRUE){ - fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while" - " converting input stream to target encoding: %s\n", - u_errorName(*error)); - } - - - /* now get the context chars */ - ucnv_getInvalidChars(buf->conv,context,&len,&error1); - context[len]= 0 ; /* null terminate the buffer */ - - pos = (int32_t)(source - cbuf - len); - - /* for pre-context */ - start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1)); - stop = pos-len; - - memcpy(preContext,cbuf+start,stop-start); - /* null terminate the buffer */ - preContext[stop-start] = 0; - - /* for post-context */ - start = pos+len; - stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf)); - - memcpy(postContext,source,stop-start); - /* null terminate the buffer */ - postContext[stop-start] = 0; - - if(buf->showWarning ==TRUE){ - /* print out the context */ - fprintf(stderr,"\tPre-context: %s\n",preContext); - fprintf(stderr,"\tContext: %s\n",context); - fprintf(stderr,"\tPost-context: %s\n", postContext); - } - - /* reset the converter */ - ucnv_reset(buf->conv); - - /* set the call back to substitute - * and restart conversion - */ - ucnv_setToUCallBack(buf->conv, - UCNV_TO_U_CALLBACK_SUBSTITUTE, - toUNewContext, - &toUOldAction, - (const void**)&toUOldContext, - &error1); - - /* reset source and target start positions */ - target = pTarget+offset; - source = cbuf; - - /* re convert */ - ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), - &source,sourceLimit,NULL, - (UBool)(buf->remaining==0),&error1); - - } - outputWritten = (int32_t)(target - pTarget); - -#if UCBUF_DEBUG - { - int i; - target = pTarget; - for(i=0;i<numRead;i++){ - /* printf("%c", (char)(*target++));*/ - } - } -#endif - - }else{ - u_charsToUChars(cbuf,target+offset,inputRead); - outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset); - } - buf->currentPos = pTarget; - buf->bufLimit=pTarget+outputWritten; - *buf->bufLimit=0; /*NUL terminate*/ - if(cbuf!=carr){ - uprv_free(cbuf); - } - return buf; -} - - - -/* get a UChar from the stream*/ -U_CAPI int32_t U_EXPORT2 -ucbuf_getc(UCHARBUF* buf,UErrorCode* error){ - if(error==NULL || U_FAILURE(*error)){ - return FALSE; - } - if(buf->currentPos>=buf->bufLimit){ - if(buf->remaining==0){ - return U_EOF; - } - buf=ucbuf_fillucbuf(buf,error); - if(U_FAILURE(*error)){ - return U_EOF; - } - } - - return *(buf->currentPos++); -} - -/* get a UChar32 from the stream*/ -U_CAPI int32_t U_EXPORT2 -ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){ - int32_t retVal = (int32_t)U_EOF; - if(error==NULL || U_FAILURE(*error)){ - return FALSE; - } - if(buf->currentPos+1>=buf->bufLimit){ - if(buf->remaining==0){ - return U_EOF; - } - buf=ucbuf_fillucbuf(buf,error); - if(U_FAILURE(*error)){ - return U_EOF; - } - } - if(U16_IS_LEAD(*(buf->currentPos))){ - retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]); - buf->currentPos+=2; - }else{ - retVal = *(buf->currentPos++); - } - return retVal; -} - -/* u_unescapeAt() callback to return a UChar*/ -static UChar U_CALLCONV -_charAt(int32_t offset, void *context) { - return ((UCHARBUF*) context)->currentPos[offset]; -} - -/* getc and escape it */ -U_CAPI int32_t U_EXPORT2 -ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) { - int32_t length; - int32_t offset; - UChar32 c32,c1,c2; - if(error==NULL || U_FAILURE(*error)){ - return FALSE; - } - /* Fill the buffer if it is empty */ - if (buf->currentPos >=buf->bufLimit-2) { - ucbuf_fillucbuf(buf,error); - } - - /* Get the next character in the buffer */ - if (buf->currentPos < buf->bufLimit) { - c1 = *(buf->currentPos)++; - } else { - c1 = U_EOF; - } - - c2 = *(buf->currentPos); - - /* If it isn't a backslash, return it */ - if (c1 != 0x005C) { - return c1; - } - - /* Determine the amount of data in the buffer */ - length = (int32_t)(buf->bufLimit - buf->currentPos); - - /* The longest escape sequence is \Uhhhhhhhh; make sure - we have at least that many characters */ - if (length < 10) { - - /* fill the buffer */ - ucbuf_fillucbuf(buf,error); - length = (int32_t)(buf->bufLimit - buf->buffer); - } - - /* Process the escape */ - offset = 0; - c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf); - - /* check if u_unescapeAt unescaped and converted - * to c32 or not - */ - if(c32==(UChar32)0xFFFFFFFF){ - if(buf->showWarning) { - char context[CONTEXT_LEN+1]; - int32_t len = CONTEXT_LEN; - if(length < len) { - len = length; - } - context[len]= 0 ; /* null terminate the buffer */ - u_UCharsToChars( buf->currentPos, context, len); - fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context); - } - *error= U_ILLEGAL_ESCAPE_SEQUENCE; - return c1; - }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){ - /* Update the current buffer position */ - buf->currentPos += offset; - }else{ - /* unescaping failed so we just return - * c1 and not consume the buffer - * this is useful for rules with escapes - * in resouce bundles - * eg: \' \\ \" - */ - return c1; - } - - return c32; -} - -U_CAPI UCHARBUF* U_EXPORT2 -ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){ - - FileStream* in = NULL; - int32_t fileSize=0; - const char* knownCp; - if(error==NULL || U_FAILURE(*error)){ - return NULL; - } - if(cp==NULL || fileName==NULL){ - *error = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - if (!uprv_strcmp(fileName, "-")) { - in = T_FileStream_stdin(); - }else{ - in = T_FileStream_open(fileName, "rb"); - } - - if(in!=NULL){ - UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF)); - fileSize = T_FileStream_size(in); - if(buf == NULL){ - *error = U_MEMORY_ALLOCATION_ERROR; - T_FileStream_close(in); - return NULL; - } - buf->in=in; - buf->conv=NULL; - buf->showWarning = showWarning; - buf->isBuffered = buffered; - buf->signatureLength=0; - if(*cp==NULL || **cp=='\0'){ - /* don't have code page name... try to autodetect */ - ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error); - }else if(ucbuf_isCPKnown(*cp)){ - /* discard BOM */ - ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error); - } - if(U_SUCCESS(*error) && buf->conv==NULL) { - buf->conv=ucnv_open(*cp,error); - } - if(U_FAILURE(*error)){ - ucnv_close(buf->conv); - uprv_free(buf); - T_FileStream_close(in); - return NULL; - } - - if((buf->conv==NULL) && (buf->showWarning==TRUE)){ - fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n"); - } - buf->remaining=fileSize-buf->signatureLength; - if(buf->isBuffered){ - buf->bufCapacity=MAX_U_BUF; - }else{ - buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/; - } - buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity ); - if (buf->buffer == NULL) { - *error = U_MEMORY_ALLOCATION_ERROR; - ucbuf_close(buf); - return NULL; - } - buf->currentPos=buf->buffer; - buf->bufLimit=buf->buffer; - if(U_FAILURE(*error)){ - fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error)); - ucbuf_close(buf); - return NULL; - } - ucbuf_fillucbuf(buf,error); - if(U_FAILURE(*error)){ - ucbuf_close(buf); - return NULL; - } - return buf; - } - *error =U_FILE_ACCESS_ERROR; - return NULL; -} - - - -/* TODO: this method will fail if at the - * begining of buffer and the uchar to unget - * is from the previous buffer. Need to implement - * system to take care of that situation. - */ -U_CAPI void U_EXPORT2 -ucbuf_ungetc(int32_t c,UCHARBUF* buf){ - /* decrement currentPos pointer - * if not at the begining of buffer - */ - if(buf->currentPos!=buf->buffer){ - if(*(buf->currentPos-1)==c){ - buf->currentPos--; - } else { - /* ungetc failed - did not match. */ - } - } else { - /* ungetc failed - beginning of buffer. */ - } -} - -/* frees the resources of UChar* buffer */ -static void -ucbuf_closebuf(UCHARBUF* buf){ - uprv_free(buf->buffer); - buf->buffer = NULL; -} - -/* close the buf and release resources*/ -U_CAPI void U_EXPORT2 -ucbuf_close(UCHARBUF* buf){ - if(buf!=NULL){ - if(buf->conv){ - ucnv_close(buf->conv); - } - T_FileStream_close(buf->in); - ucbuf_closebuf(buf); - uprv_free(buf); - } -} - -/* rewind the buf and file stream */ -U_CAPI void U_EXPORT2 -ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){ - if(error==NULL || U_FAILURE(*error)){ - return; - } - if(buf){ - buf->currentPos=buf->buffer; - buf->bufLimit=buf->buffer; - T_FileStream_rewind(buf->in); - buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength; - - ucnv_resetToUnicode(buf->conv); - if(buf->signatureLength>0) { - UChar target[1]={ 0 }; - UChar* pTarget; - char start[8]; - const char* pStart; - int32_t numRead; - - /* read the signature bytes */ - numRead=T_FileStream_read(buf->in, start, buf->signatureLength); - - /* convert and ignore initial U+FEFF, and the buffer overflow */ - pTarget = target; - pStart = start; - ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error); - if(*error==U_BUFFER_OVERFLOW_ERROR) { - *error=U_ZERO_ERROR; - } - - /* verify that we successfully read exactly U+FEFF */ - if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) { - *error=U_INTERNAL_PROGRAM_ERROR; - } - } - } -} - - -U_CAPI int32_t U_EXPORT2 -ucbuf_size(UCHARBUF* buf){ - if(buf){ - if(buf->isBuffered){ - return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv); - }else{ - return (int32_t)(buf->bufLimit - buf->buffer); - } - } - return 0; -} - -U_CAPI const UChar* U_EXPORT2 -ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){ - if(error==NULL || U_FAILURE(*error)){ - return NULL; - } - if(buf==NULL || len==NULL){ - *error = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - *len = (int32_t)(buf->bufLimit - buf->buffer); - return buf->buffer; -} - -U_CAPI const char* U_EXPORT2 -ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){ - int32_t requiredLen = 0; - int32_t dirlen = 0; - int32_t filelen = 0; - if(status==NULL || U_FAILURE(*status)){ - return NULL; - } - - if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - - dirlen = (int32_t)uprv_strlen(inputDir); - filelen = (int32_t)uprv_strlen(fileName); - if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { - requiredLen = dirlen + filelen + 2; - if((*len < requiredLen) || target==NULL){ - *len = requiredLen; - *status = U_BUFFER_OVERFLOW_ERROR; - return NULL; - } - - target[0] = '\0'; - /* - * append the input dir to openFileName if the first char in - * filename is not file seperation char and the last char input directory is not '.'. - * This is to support : - * genrb -s. /home/icu/data - * genrb -s. icu/data - * The user cannot mix notations like - * genrb -s. /icu/data --- the absolute path specified. -s redundant - * user should use - * genrb -s. icu/data --- start from CWD and look in icu/data dir - */ - if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){ - uprv_strcpy(target, inputDir); - target[dirlen] = U_FILE_SEP_CHAR; - } - target[dirlen + 1] = '\0'; - } else { - requiredLen = dirlen + filelen + 1; - if((*len < requiredLen) || target==NULL){ - *len = requiredLen; - *status = U_BUFFER_OVERFLOW_ERROR; - return NULL; - } - - uprv_strcpy(target, inputDir); - } - - uprv_strcat(target, fileName); - return target; -} -/* - * Unicode TR 13 says any of the below chars is - * a new line char in a readline function in addition - * to CR+LF combination which needs to be - * handled seperately - */ -static UBool ucbuf_isCharNewLine(UChar c){ - switch(c){ - case 0x000A: /* LF */ - case 0x000D: /* CR */ - case 0x000C: /* FF */ - case 0x0085: /* NEL */ - case 0x2028: /* LS */ - case 0x2029: /* PS */ - return TRUE; - default: - return FALSE; - } -} - -U_CAPI const UChar* U_EXPORT2 -ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){ - UChar* temp = buf->currentPos; - UChar* savePos =NULL; - UChar c=0x0000; - if(buf->isBuffered){ - /* The input is buffered we have to do more - * for returning a pointer U_TRUNCATED_CHAR_FOUND - */ - for(;;){ - c = *temp++; - if(buf->remaining==0){ - return NULL; /* end of file is reached return NULL */ - } - if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){ - *err= U_TRUNCATED_CHAR_FOUND; - return NULL; - }else{ - ucbuf_fillucbuf(buf,err); - if(U_FAILURE(*err)){ - return NULL; - } - } - /* - * Accoding to TR 13 readLine functions must interpret - * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators - */ - /* Windows CR LF */ - if(c ==0x0d && temp <= buf->bufLimit && *temp == 0x0a ){ - *len = (int32_t)(temp++ - buf->currentPos); - savePos = buf->currentPos; - buf->currentPos = temp; - return savePos; - } - /* else */ - - if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */ - *len = (int32_t)(temp - buf->currentPos); - savePos = buf->currentPos; - buf->currentPos = temp; - return savePos; - } - } - }else{ - /* we know that all input is read into the internal - * buffer so we can safely return pointers - */ - for(;;){ - c = *temp++; - - if(buf->currentPos==buf->bufLimit){ - return NULL; /* end of file is reached return NULL */ - } - /* Windows CR LF */ - if(c ==0x0d && temp <= buf->bufLimit && *temp == 0x0a ){ - *len = (int32_t)(temp++ - buf->currentPos); - savePos = buf->currentPos; - buf->currentPos = temp; - return savePos; - } - /* else */ - if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */ - *len = (int32_t)(temp - buf->currentPos); - savePos = buf->currentPos; - buf->currentPos = temp; - return savePos; - } - } - } - /* not reached */ - /* A compiler warning will appear if all paths don't contain a return statement. */ -/* return NULL;*/ -} -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/ucbuf.h b/deps/node/deps/icu-small/source/tools/toolutil/ucbuf.h deleted file mode 100644 index 48d41ef4..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/ucbuf.h +++ /dev/null @@ -1,217 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1998-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* -* File ucbuf.h -* -* Modification History: -* -* Date Name Description -* 05/10/01 Ram Creation. -* -* This API reads in files and returns UChars -******************************************************************************* -*/ - -#include "unicode/localpointer.h" -#include "unicode/ucnv.h" -#include "filestrm.h" - -#if !UCONFIG_NO_CONVERSION - -#ifndef UCBUF_H -#define UCBUF_H 1 - -typedef struct UCHARBUF UCHARBUF; -/** - * End of file value - */ -#define U_EOF 0xFFFFFFFF -/** - * Error value if a sequence cannot be unescaped - */ -#define U_ERR 0xFFFFFFFE - -typedef struct ULine ULine; - -struct ULine { - UChar *name; - int32_t len; -}; - -/** - * Opens the UCHARBUF with the given file stream and code page for conversion - * @param fileName Name of the file to open. - * @param codepage The encoding of the file stream to convert to Unicode. - * If *codepoge is NULL on input the API will try to autodetect - * popular Unicode encodings - * @param showWarning Flag to print out warnings to STDOUT - * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads - * the whole file into memory and converts it. - * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value - * indicates a failure on entry, the function will immediately return. - * On exit the value will indicate the success of the operation. - * @return pointer to the newly opened UCHARBUF - */ -U_CAPI UCHARBUF* U_EXPORT2 -ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err); - -/** - * Gets a UTF-16 code unit at the current position from the converted buffer - * and increments the current position - * @param buf Pointer to UCHARBUF structure - * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value - * indicates a failure on entry, the function will immediately return. - * On exit the value will indicate the success of the operation. - */ -U_CAPI int32_t U_EXPORT2 -ucbuf_getc(UCHARBUF* buf,UErrorCode* err); - -/** - * Gets a UTF-32 code point at the current position from the converted buffer - * and increments the current position - * @param buf Pointer to UCHARBUF structure - * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value - * indicates a failure on entry, the function will immediately return. - * On exit the value will indicate the success of the operation. - */ -U_CAPI int32_t U_EXPORT2 -ucbuf_getc32(UCHARBUF* buf,UErrorCode* err); - -/** - * Gets a UTF-16 code unit at the current position from the converted buffer after - * unescaping and increments the current position. If the escape sequence is for UTF-32 - * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned - * @param buf Pointer to UCHARBUF structure - * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value - * indicates a failure on entry, the function will immediately return. - * On exit the value will indicate the success of the operation. - */ -U_CAPI int32_t U_EXPORT2 -ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err); - -/** - * Gets a pointer to the current position in the internal buffer and length of the line. - * It imperative to make a copy of the returned buffer before performing operations on it. - * @param buf Pointer to UCHARBUF structure - * @param len Output param to receive the len of the buffer returned till end of the line - * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value - * indicates a failure on entry, the function will immediately return. - * On exit the value will indicate the success of the operation. - * Error: U_TRUNCATED_CHAR_FOUND - * @return Pointer to the internal buffer, NULL if EOF - */ -U_CAPI const UChar* U_EXPORT2 -ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err); - - -/** - * Resets the buffers and the underlying file stream. - * @param buf Pointer to UCHARBUF structure - * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value - * indicates a failure on entry, the function will immediately return. - * On exit the value will indicate the success of the operation. - */ -U_CAPI void U_EXPORT2 -ucbuf_rewind(UCHARBUF* buf,UErrorCode* err); - -/** - * Returns a pointer to the internal converted buffer - * @param buf Pointer to UCHARBUF structure - * @param len Pointer to int32_t to receive the lenth of buffer - * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value - * indicates a failure on entry, the function will immediately return. - * On exit the value will indicate the success of the operation. - * @return Pointer to internal UChar buffer - */ -U_CAPI const UChar* U_EXPORT2 -ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err); - -/** - * Closes the UCHARBUF structure members and cleans up the malloc'ed memory - * @param buf Pointer to UCHARBUF structure - */ -U_CAPI void U_EXPORT2 -ucbuf_close(UCHARBUF* buf); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUCHARBUFPointer - * "Smart pointer" class, closes a UCHARBUF via ucbuf_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close); - -U_NAMESPACE_END - -#endif - -/** - * Rewinds the buffer by one codepoint. Does not rewind over escaped characters. - */ -U_CAPI void U_EXPORT2 -ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf); - - -/** - * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. - * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring - * the converter to correct state for converting the rest of the stream. So the UConverter parameter - * is necessary. - * If the charset was autodetected, the caller must close both the input FileStream - * and the converter. - * - * @param fileName The file name to be opened and encoding autodected - * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. - * @param cp Output param to receive the detected encoding - * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value - * indicates a failure on entry, the function will immediately return. - * On exit the value will indicate the success of the operation. - * @return The input FileStream if its charset was autodetected; NULL otherwise. - */ -U_CAPI FileStream * U_EXPORT2 -ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, -int32_t* signatureLength, UErrorCode* status); - -/** - * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. - * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring - * the converter to correct state for converting the rest of the stream. So the UConverter parameter - * is necessary. - * If the charset was autodetected, the caller must close the converter. - * - * @param fileStream The file stream whose encoding is to be detected - * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. - * @param cp Output param to receive the detected encoding - * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value - * indicates a failure on entry, the function will immediately return. - * On exit the value will indicate the success of the operation. - * @return Boolean whether the Unicode charset was autodetected. - */ - -U_CAPI UBool U_EXPORT2 -ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status); - -/** - * Returns the approximate size in UChars required for converting the file to UChars - */ -U_CAPI int32_t U_EXPORT2 -ucbuf_size(UCHARBUF* buf); - -U_CAPI const char* U_EXPORT2 -ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status); - -#endif -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/ucln_tu.cpp b/deps/node/deps/icu-small/source/tools/toolutil/ucln_tu.cpp deleted file mode 100644 index 5354fe17..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/ucln_tu.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/******************************************************************** - * COPYRIGHT: - * Copyright (c) 2007-2014, International Business Machines Corporation and - * others. All Rights Reserved. - ********************************************************************/ - - -/** Auto-client **/ -#define UCLN_TYPE UCLN_TOOLUTIL -#include "ucln_imp.h" - -int uprv_dummyFunction_TU(void); -int uprv_dummyFunction_TU(void) -{ - /* this is here to prevent the compiler from complaining about an empty file */ - return 0; -} diff --git a/deps/node/deps/icu-small/source/tools/toolutil/ucm.cpp b/deps/node/deps/icu-small/source/tools/toolutil/ucm.cpp deleted file mode 100644 index 28c3f3f4..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/ucm.cpp +++ /dev/null @@ -1,1195 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2003-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ucm.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2003jun20 -* created by: Markus W. Scherer -* -* This file reads a .ucm file, stores its mappings and sorts them. -* It implements handling of Unicode conversion mappings from .ucm files -* for makeconv, canonucm, rptp2ucm, etc. -* -* Unicode code point sequences with a length of more than 1, -* as well as byte sequences with more than 4 bytes or more than one complete -* character sequence are handled to support m:n mappings. -*/ - -#include "unicode/utypes.h" -#include "unicode/ustring.h" -#include "cstring.h" -#include "cmemory.h" -#include "filestrm.h" -#include "uarrsort.h" -#include "ucnvmbcs.h" -#include "ucnv_bld.h" -#include "ucnv_ext.h" -#include "uparse.h" -#include "ucm.h" -#include <stdio.h> - -#if !UCONFIG_NO_CONVERSION - -/* -------------------------------------------------------------------------- */ - -static void -printMapping(UCMapping *m, UChar32 *codePoints, uint8_t *bytes, FILE *f) { - int32_t j; - - for(j=0; j<m->uLen; ++j) { - fprintf(f, "<U%04lX>", (long)codePoints[j]); - } - - fputc(' ', f); - - for(j=0; j<m->bLen; ++j) { - fprintf(f, "\\x%02X", bytes[j]); - } - - if(m->f>=0) { - fprintf(f, " |%u\n", m->f); - } else { - fputs("\n", f); - } -} - -U_CAPI void U_EXPORT2 -ucm_printMapping(UCMTable *table, UCMapping *m, FILE *f) { - printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), f); -} - -U_CAPI void U_EXPORT2 -ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode) { - UCMapping *m; - int32_t i, length; - - m=table->mappings; - length=table->mappingsLength; - if(byUnicode) { - for(i=0; i<length; ++m, ++i) { - ucm_printMapping(table, m, f); - } - } else { - const int32_t *map=table->reverseMap; - for(i=0; i<length; ++i) { - ucm_printMapping(table, m+map[i], f); - } - } -} - -/* mapping comparisons ------------------------------------------------------ */ - -static int32_t -compareUnicode(UCMTable *lTable, const UCMapping *l, - UCMTable *rTable, const UCMapping *r) { - const UChar32 *lu, *ru; - int32_t result, i, length; - - if(l->uLen==1 && r->uLen==1) { - /* compare two single code points */ - return l->u-r->u; - } - - /* get pointers to the code point sequences */ - lu=UCM_GET_CODE_POINTS(lTable, l); - ru=UCM_GET_CODE_POINTS(rTable, r); - - /* get the minimum length */ - if(l->uLen<=r->uLen) { - length=l->uLen; - } else { - length=r->uLen; - } - - /* compare the code points */ - for(i=0; i<length; ++i) { - result=lu[i]-ru[i]; - if(result!=0) { - return result; - } - } - - /* compare the lengths */ - return l->uLen-r->uLen; -} - -static int32_t -compareBytes(UCMTable *lTable, const UCMapping *l, - UCMTable *rTable, const UCMapping *r, - UBool lexical) { - const uint8_t *lb, *rb; - int32_t result, i, length; - - /* - * A lexical comparison is used for sorting in the builder, to allow - * an efficient search for a byte sequence that could be a prefix - * of a previously entered byte sequence. - * - * Comparing by lengths first is for compatibility with old .ucm tools - * like canonucm and rptp2ucm. - */ - if(lexical) { - /* get the minimum length and continue */ - if(l->bLen<=r->bLen) { - length=l->bLen; - } else { - length=r->bLen; - } - } else { - /* compare lengths first */ - result=l->bLen-r->bLen; - if(result!=0) { - return result; - } else { - length=l->bLen; - } - } - - /* get pointers to the byte sequences */ - lb=UCM_GET_BYTES(lTable, l); - rb=UCM_GET_BYTES(rTable, r); - - /* compare the bytes */ - for(i=0; i<length; ++i) { - result=lb[i]-rb[i]; - if(result!=0) { - return result; - } - } - - /* compare the lengths */ - return l->bLen-r->bLen; -} - -/* compare UCMappings for sorting */ -static int32_t -compareMappings(UCMTable *lTable, const UCMapping *l, - UCMTable *rTable, const UCMapping *r, - UBool uFirst) { - int32_t result; - - /* choose which side to compare first */ - if(uFirst) { - /* Unicode then bytes */ - result=compareUnicode(lTable, l, rTable, r); - if(result==0) { - result=compareBytes(lTable, l, rTable, r, FALSE); /* not lexically, like canonucm */ - } - } else { - /* bytes then Unicode */ - result=compareBytes(lTable, l, rTable, r, TRUE); /* lexically, for builder */ - if(result==0) { - result=compareUnicode(lTable, l, rTable, r); - } - } - - if(result!=0) { - return result; - } - - /* compare the flags */ - return l->f-r->f; -} -U_CDECL_BEGIN -/* sorting by Unicode first sorts mappings directly */ -static int32_t U_CALLCONV -compareMappingsUnicodeFirst(const void *context, const void *left, const void *right) { - return compareMappings( - (UCMTable *)context, (const UCMapping *)left, - (UCMTable *)context, (const UCMapping *)right, TRUE); -} - -/* sorting by bytes first sorts the reverseMap; use indirection to mappings */ -static int32_t U_CALLCONV -compareMappingsBytesFirst(const void *context, const void *left, const void *right) { - UCMTable *table=(UCMTable *)context; - int32_t l=*(const int32_t *)left, r=*(const int32_t *)right; - return compareMappings( - table, table->mappings+l, - table, table->mappings+r, FALSE); -} -U_CDECL_END - -U_CAPI void U_EXPORT2 -ucm_sortTable(UCMTable *t) { - UErrorCode errorCode; - int32_t i; - - if(t->isSorted) { - return; - } - - errorCode=U_ZERO_ERROR; - - /* 1. sort by Unicode first */ - uprv_sortArray(t->mappings, t->mappingsLength, sizeof(UCMapping), - compareMappingsUnicodeFirst, t, - FALSE, &errorCode); - - /* build the reverseMap */ - if(t->reverseMap==NULL) { - /* - * allocate mappingsCapacity instead of mappingsLength so that - * if mappings are added, the reverseMap need not be - * reallocated each time - * (see ucm_moveMappings() and ucm_addMapping()) - */ - t->reverseMap=(int32_t *)uprv_malloc(t->mappingsCapacity*sizeof(int32_t)); - if(t->reverseMap==NULL) { - fprintf(stderr, "ucm error: unable to allocate reverseMap\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - } - for(i=0; i<t->mappingsLength; ++i) { - t->reverseMap[i]=i; - } - - /* 2. sort reverseMap by mappings bytes first */ - uprv_sortArray(t->reverseMap, t->mappingsLength, sizeof(int32_t), - compareMappingsBytesFirst, t, - FALSE, &errorCode); - - if(U_FAILURE(errorCode)) { - fprintf(stderr, "ucm error: sortTable()/uprv_sortArray() fails - %s\n", - u_errorName(errorCode)); - exit(errorCode); - } - - t->isSorted=TRUE; -} - -/* - * remove mappings with their move flag set from the base table - * and move some of them (with UCM_MOVE_TO_EXT) to the extension table - */ -U_CAPI void U_EXPORT2 -ucm_moveMappings(UCMTable *base, UCMTable *ext) { - UCMapping *mb, *mbLimit; - int8_t flag; - - mb=base->mappings; - mbLimit=mb+base->mappingsLength; - - while(mb<mbLimit) { - flag=mb->moveFlag; - if(flag!=0) { - /* reset the move flag */ - mb->moveFlag=0; - - if(ext!=NULL && (flag&UCM_MOVE_TO_EXT)) { - /* add the mapping to the extension table */ - ucm_addMapping(ext, mb, UCM_GET_CODE_POINTS(base, mb), UCM_GET_BYTES(base, mb)); - } - - /* remove this mapping: move the last base mapping down and overwrite the current one */ - if(mb<(mbLimit-1)) { - uprv_memcpy(mb, mbLimit-1, sizeof(UCMapping)); - } - --mbLimit; - --base->mappingsLength; - base->isSorted=FALSE; - } else { - ++mb; - } - } -} - -enum { - NEEDS_MOVE=1, - HAS_ERRORS=2 -}; - -static uint8_t -checkBaseExtUnicode(UCMStates *baseStates, UCMTable *base, UCMTable *ext, - UBool moveToExt, UBool intersectBase) { - (void)baseStates; - - UCMapping *mb, *me, *mbLimit, *meLimit; - int32_t cmp; - uint8_t result; - - mb=base->mappings; - mbLimit=mb+base->mappingsLength; - - me=ext->mappings; - meLimit=me+ext->mappingsLength; - - result=0; - - for(;;) { - /* skip irrelevant mappings on both sides */ - for(;;) { - if(mb==mbLimit) { - return result; - } - - if((0<=mb->f && mb->f<=2) || mb->f==4) { - break; - } - - ++mb; - } - - for(;;) { - if(me==meLimit) { - return result; - } - - if((0<=me->f && me->f<=2) || me->f==4) { - break; - } - - ++me; - } - - /* compare the base and extension mappings */ - cmp=compareUnicode(base, mb, ext, me); - if(cmp<0) { - if(intersectBase && (intersectBase!=2 || mb->bLen>1)) { - /* - * mapping in base but not in ext, move it - * - * if ext is DBCS, move DBCS mappings here - * and check SBCS ones for Unicode prefix below - */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - - /* does mb map from an input sequence that is a prefix of me's? */ - } else if( mb->uLen<me->uLen && - 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) - ) { - if(moveToExt) { - /* mark this mapping to be moved to the extension table */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - } else { - fprintf(stderr, - "ucm error: the base table contains a mapping whose input sequence\n" - " is a prefix of the input sequence of an extension mapping\n"); - ucm_printMapping(base, mb, stderr); - ucm_printMapping(ext, me, stderr); - result|=HAS_ERRORS; - } - } - - ++mb; - } else if(cmp==0) { - /* - * same output: remove the extension mapping, - * otherwise treat as an error - */ - if( mb->f==me->f && mb->bLen==me->bLen && - 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) - ) { - me->moveFlag|=UCM_REMOVE_MAPPING; - result|=NEEDS_MOVE; - } else if(intersectBase) { - /* mapping in base but not in ext, move it */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - } else { - fprintf(stderr, - "ucm error: the base table contains a mapping whose input sequence\n" - " is the same as the input sequence of an extension mapping\n" - " but it maps differently\n"); - ucm_printMapping(base, mb, stderr); - ucm_printMapping(ext, me, stderr); - result|=HAS_ERRORS; - } - - ++mb; - } else /* cmp>0 */ { - ++me; - } - } -} - -static uint8_t -checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext, - UBool moveToExt, UBool intersectBase) { - UCMapping *mb, *me; - int32_t *baseMap, *extMap; - int32_t b, e, bLimit, eLimit, cmp; - uint8_t result; - UBool isSISO; - - baseMap=base->reverseMap; - extMap=ext->reverseMap; - - b=e=0; - bLimit=base->mappingsLength; - eLimit=ext->mappingsLength; - - result=0; - - isSISO=(UBool)(baseStates->outputType==MBCS_OUTPUT_2_SISO); - - for(;;) { - /* skip irrelevant mappings on both sides */ - for(;; ++b) { - if(b==bLimit) { - return result; - } - mb=base->mappings+baseMap[b]; - - if(intersectBase==2 && mb->bLen==1) { - /* - * comparing a base against a DBCS extension: - * leave SBCS base mappings alone - */ - continue; - } - - if(mb->f==0 || mb->f==3) { - break; - } - } - - for(;;) { - if(e==eLimit) { - return result; - } - me=ext->mappings+extMap[e]; - - if(me->f==0 || me->f==3) { - break; - } - - ++e; - } - - /* compare the base and extension mappings */ - cmp=compareBytes(base, mb, ext, me, TRUE); - if(cmp<0) { - if(intersectBase) { - /* mapping in base but not in ext, move it */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - - /* - * does mb map from an input sequence that is a prefix of me's? - * for SI/SO tables, a single byte is never a prefix because it - * occurs in a separate single-byte state - */ - } else if( mb->bLen<me->bLen && - (!isSISO || mb->bLen>1) && - 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) - ) { - if(moveToExt) { - /* mark this mapping to be moved to the extension table */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - } else { - fprintf(stderr, - "ucm error: the base table contains a mapping whose input sequence\n" - " is a prefix of the input sequence of an extension mapping\n"); - ucm_printMapping(base, mb, stderr); - ucm_printMapping(ext, me, stderr); - result|=HAS_ERRORS; - } - } - - ++b; - } else if(cmp==0) { - /* - * same output: remove the extension mapping, - * otherwise treat as an error - */ - if( mb->f==me->f && mb->uLen==me->uLen && - 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) - ) { - me->moveFlag|=UCM_REMOVE_MAPPING; - result|=NEEDS_MOVE; - } else if(intersectBase) { - /* mapping in base but not in ext, move it */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - } else { - fprintf(stderr, - "ucm error: the base table contains a mapping whose input sequence\n" - " is the same as the input sequence of an extension mapping\n" - " but it maps differently\n"); - ucm_printMapping(base, mb, stderr); - ucm_printMapping(ext, me, stderr); - result|=HAS_ERRORS; - } - - ++b; - } else /* cmp>0 */ { - ++e; - } - } -} - -U_CAPI UBool U_EXPORT2 -ucm_checkValidity(UCMTable *table, UCMStates *baseStates) { - UCMapping *m, *mLimit; - int32_t count; - UBool isOK; - - m=table->mappings; - mLimit=m+table->mappingsLength; - isOK=TRUE; - - while(m<mLimit) { - count=ucm_countChars(baseStates, UCM_GET_BYTES(table, m), m->bLen); - if(count<1) { - ucm_printMapping(table, m, stderr); - isOK=FALSE; - } - ++m; - } - - return isOK; -} - -U_CAPI UBool U_EXPORT2 -ucm_checkBaseExt(UCMStates *baseStates, - UCMTable *base, UCMTable *ext, UCMTable *moveTarget, - UBool intersectBase) { - uint8_t result; - - /* if we have an extension table, we must always use precision flags */ - if(base->flagsType&UCM_FLAGS_IMPLICIT) { - fprintf(stderr, "ucm error: the base table contains mappings without precision flags\n"); - return FALSE; - } - if(ext->flagsType&UCM_FLAGS_IMPLICIT) { - fprintf(stderr, "ucm error: extension table contains mappings without precision flags\n"); - return FALSE; - } - - /* checking requires both tables to be sorted */ - ucm_sortTable(base); - ucm_sortTable(ext); - - /* check */ - result= - checkBaseExtUnicode(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase)| - checkBaseExtBytes(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase); - - if(result&HAS_ERRORS) { - return FALSE; - } - - if(result&NEEDS_MOVE) { - ucm_moveMappings(ext, NULL); - ucm_moveMappings(base, moveTarget); - ucm_sortTable(base); - ucm_sortTable(ext); - if(moveTarget!=NULL) { - ucm_sortTable(moveTarget); - } - } - - return TRUE; -} - -/* merge tables for rptp2ucm ------------------------------------------------ */ - -U_CAPI void U_EXPORT2 -ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable, - const uint8_t *subchar, int32_t subcharLength, - uint8_t subchar1) { - UCMapping *fromUMapping, *toUMapping; - int32_t fromUIndex, toUIndex, fromUTop, toUTop, cmp; - - ucm_sortTable(fromUTable); - ucm_sortTable(toUTable); - - fromUMapping=fromUTable->mappings; - toUMapping=toUTable->mappings; - - fromUTop=fromUTable->mappingsLength; - toUTop=toUTable->mappingsLength; - - fromUIndex=toUIndex=0; - - while(fromUIndex<fromUTop && toUIndex<toUTop) { - cmp=compareMappings(fromUTable, fromUMapping, toUTable, toUMapping, TRUE); - if(cmp==0) { - /* equal: roundtrip, nothing to do (flags are initially 0) */ - ++fromUMapping; - ++toUMapping; - - ++fromUIndex; - ++toUIndex; - } else if(cmp<0) { - /* - * the fromU mapping does not have a toU counterpart: - * fallback Unicode->codepage - */ - if( (fromUMapping->bLen==subcharLength && - 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) || - (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) - ) { - fromUMapping->f=2; /* SUB mapping */ - } else { - fromUMapping->f=1; /* normal fallback */ - } - - ++fromUMapping; - ++fromUIndex; - } else { - /* - * the toU mapping does not have a fromU counterpart: - * (reverse) fallback codepage->Unicode, copy it to the fromU table - */ - - /* ignore reverse fallbacks to Unicode SUB */ - if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { - toUMapping->f=3; /* reverse fallback */ - ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); - - /* the table may have been reallocated */ - fromUMapping=fromUTable->mappings+fromUIndex; - } - - ++toUMapping; - ++toUIndex; - } - } - - /* either one or both tables are exhausted */ - while(fromUIndex<fromUTop) { - /* leftover fromU mappings are fallbacks */ - if( (fromUMapping->bLen==subcharLength && - 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) || - (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) - ) { - fromUMapping->f=2; /* SUB mapping */ - } else { - fromUMapping->f=1; /* normal fallback */ - } - - ++fromUMapping; - ++fromUIndex; - } - - while(toUIndex<toUTop) { - /* leftover toU mappings are reverse fallbacks */ - - /* ignore reverse fallbacks to Unicode SUB */ - if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { - toUMapping->f=3; /* reverse fallback */ - ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); - } - - ++toUMapping; - ++toUIndex; - } - - fromUTable->isSorted=FALSE; -} - -/* separate extension mappings out of base table for rptp2ucm --------------- */ - -U_CAPI UBool U_EXPORT2 -ucm_separateMappings(UCMFile *ucm, UBool isSISO) { - UCMTable *table; - UCMapping *m, *mLimit; - int32_t type; - UBool needsMove, isOK; - - table=ucm->base; - m=table->mappings; - mLimit=m+table->mappingsLength; - - needsMove=FALSE; - isOK=TRUE; - - for(; m<mLimit; ++m) { - if(isSISO && m->bLen==1 && (m->b.bytes[0]==0xe || m->b.bytes[0]==0xf)) { - fprintf(stderr, "warning: removing illegal mapping from an SI/SO-stateful table\n"); - ucm_printMapping(table, m, stderr); - m->moveFlag|=UCM_REMOVE_MAPPING; - needsMove=TRUE; - continue; - } - - type=ucm_mappingType( - &ucm->states, m, - UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m)); - if(type<0) { - /* illegal byte sequence */ - printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), stderr); - isOK=FALSE; - } else if(type>0) { - m->moveFlag|=UCM_MOVE_TO_EXT; - needsMove=TRUE; - } - } - - if(!isOK) { - return FALSE; - } - if(needsMove) { - ucm_moveMappings(ucm->base, ucm->ext); - return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, ucm->ext, FALSE); - } else { - ucm_sortTable(ucm->base); - return TRUE; - } -} - -/* ucm parser --------------------------------------------------------------- */ - -U_CAPI int8_t U_EXPORT2 -ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line, const char **ps) { - const char *s=*ps; - char *end; - uint8_t byte; - int8_t bLen; - - bLen=0; - for(;;) { - /* skip an optional plus sign */ - if(bLen>0 && *s=='+') { - ++s; - } - if(*s!='\\') { - break; - } - - if( s[1]!='x' || - (byte=(uint8_t)uprv_strtoul(s+2, &end, 16), end)!=s+4 - ) { - fprintf(stderr, "ucm error: byte must be formatted as \\xXX (2 hex digits) - \"%s\"\n", line); - return -1; - } - - if(bLen==UCNV_EXT_MAX_BYTES) { - fprintf(stderr, "ucm error: too many bytes on \"%s\"\n", line); - return -1; - } - bytes[bLen++]=byte; - s=end; - } - - *ps=s; - return bLen; -} - -/* parse a mapping line; must not be empty */ -U_CAPI UBool U_EXPORT2 -ucm_parseMappingLine(UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES], - const char *line) { - const char *s; - char *end; - UChar32 cp; - int32_t u16Length; - int8_t uLen, bLen, f; - - s=line; - uLen=bLen=0; - - /* parse code points */ - for(;;) { - /* skip an optional plus sign */ - if(uLen>0 && *s=='+') { - ++s; - } - if(*s!='<') { - break; - } - - if( s[1]!='U' || - (cp=(UChar32)uprv_strtoul(s+2, &end, 16), end)==s+2 || - *end!='>' - ) { - fprintf(stderr, "ucm error: Unicode code point must be formatted as <UXXXX> (1..6 hex digits) - \"%s\"\n", line); - return FALSE; - } - if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) { - fprintf(stderr, "ucm error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", line); - return FALSE; - } - - if(uLen==UCNV_EXT_MAX_UCHARS) { - fprintf(stderr, "ucm error: too many code points on \"%s\"\n", line); - return FALSE; - } - codePoints[uLen++]=cp; - s=end+1; - } - - if(uLen==0) { - fprintf(stderr, "ucm error: no Unicode code points on \"%s\"\n", line); - return FALSE; - } else if(uLen==1) { - m->u=codePoints[0]; - } else { - UErrorCode errorCode=U_ZERO_ERROR; - u_strFromUTF32(NULL, 0, &u16Length, codePoints, uLen, &errorCode); - if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) || - u16Length>UCNV_EXT_MAX_UCHARS - ) { - fprintf(stderr, "ucm error: too many UChars on \"%s\"\n", line); - return FALSE; - } - } - - s=u_skipWhitespace(s); - - /* parse bytes */ - bLen=ucm_parseBytes(bytes, line, &s); - - if(bLen<0) { - return FALSE; - } else if(bLen==0) { - fprintf(stderr, "ucm error: no bytes on \"%s\"\n", line); - return FALSE; - } else if(bLen<=4) { - uprv_memcpy(m->b.bytes, bytes, bLen); - } - - /* skip everything until the fallback indicator, even the start of a comment */ - for(;;) { - if(*s==0) { - f=-1; /* no fallback indicator */ - break; - } else if(*s=='|') { - f=(int8_t)(s[1]-'0'); - if((uint8_t)f>4) { - fprintf(stderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line); - return FALSE; - } - break; - } - ++s; - } - - m->uLen=uLen; - m->bLen=bLen; - m->f=f; - return TRUE; -} - -/* general APIs ------------------------------------------------------------- */ - -U_CAPI UCMTable * U_EXPORT2 -ucm_openTable() { - UCMTable *table=(UCMTable *)uprv_malloc(sizeof(UCMTable)); - if(table==NULL) { - fprintf(stderr, "ucm error: unable to allocate a UCMTable\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - memset(table, 0, sizeof(UCMTable)); - return table; -} - -U_CAPI void U_EXPORT2 -ucm_closeTable(UCMTable *table) { - if(table!=NULL) { - uprv_free(table->mappings); - uprv_free(table->codePoints); - uprv_free(table->bytes); - uprv_free(table->reverseMap); - uprv_free(table); - } -} - -U_CAPI void U_EXPORT2 -ucm_resetTable(UCMTable *table) { - if(table!=NULL) { - table->mappingsLength=0; - table->flagsType=0; - table->unicodeMask=0; - table->bytesLength=table->codePointsLength=0; - table->isSorted=FALSE; - } -} - -U_CAPI void U_EXPORT2 -ucm_addMapping(UCMTable *table, - UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES]) { - UCMapping *tm; - UChar32 c; - int32_t idx; - - if(table->mappingsLength>=table->mappingsCapacity) { - /* make the mappings array larger */ - if(table->mappingsCapacity==0) { - table->mappingsCapacity=1000; - } else { - table->mappingsCapacity*=10; - } - table->mappings=(UCMapping *)uprv_realloc(table->mappings, - table->mappingsCapacity*sizeof(UCMapping)); - if(table->mappings==NULL) { - fprintf(stderr, "ucm error: unable to allocate %d UCMappings\n", - (int)table->mappingsCapacity); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - if(table->reverseMap!=NULL) { - /* the reverseMap must be reallocated in a new sort */ - uprv_free(table->reverseMap); - table->reverseMap=NULL; - } - } - - if(m->uLen>1 && table->codePointsCapacity==0) { - table->codePointsCapacity=10000; - table->codePoints=(UChar32 *)uprv_malloc(table->codePointsCapacity*4); - if(table->codePoints==NULL) { - fprintf(stderr, "ucm error: unable to allocate %d UChar32s\n", - (int)table->codePointsCapacity); - exit(U_MEMORY_ALLOCATION_ERROR); - } - } - - if(m->bLen>4 && table->bytesCapacity==0) { - table->bytesCapacity=10000; - table->bytes=(uint8_t *)uprv_malloc(table->bytesCapacity); - if(table->bytes==NULL) { - fprintf(stderr, "ucm error: unable to allocate %d bytes\n", - (int)table->bytesCapacity); - exit(U_MEMORY_ALLOCATION_ERROR); - } - } - - if(m->uLen>1) { - idx=table->codePointsLength; - table->codePointsLength+=m->uLen; - if(table->codePointsLength>table->codePointsCapacity) { - fprintf(stderr, "ucm error: too many code points in multiple-code point mappings\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - uprv_memcpy(table->codePoints+idx, codePoints, (size_t)m->uLen*4); - m->u=idx; - } - - if(m->bLen>4) { - idx=table->bytesLength; - table->bytesLength+=m->bLen; - if(table->bytesLength>table->bytesCapacity) { - fprintf(stderr, "ucm error: too many bytes in mappings with >4 charset bytes\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - uprv_memcpy(table->bytes+idx, bytes, m->bLen); - m->b.idx=idx; - } - - /* set unicodeMask */ - for(idx=0; idx<m->uLen; ++idx) { - c=codePoints[idx]; - if(c>=0x10000) { - table->unicodeMask|=UCNV_HAS_SUPPLEMENTARY; /* there are supplementary code points */ - } else if(U_IS_SURROGATE(c)) { - table->unicodeMask|=UCNV_HAS_SURROGATES; /* there are surrogate code points */ - } - } - - /* set flagsType */ - if(m->f<0) { - table->flagsType|=UCM_FLAGS_IMPLICIT; - } else { - table->flagsType|=UCM_FLAGS_EXPLICIT; - } - - tm=table->mappings+table->mappingsLength++; - uprv_memcpy(tm, m, sizeof(UCMapping)); - - table->isSorted=FALSE; -} - -U_CAPI UCMFile * U_EXPORT2 -ucm_open() { - UCMFile *ucm=(UCMFile *)uprv_malloc(sizeof(UCMFile)); - if(ucm==NULL) { - fprintf(stderr, "ucm error: unable to allocate a UCMFile\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - memset(ucm, 0, sizeof(UCMFile)); - - ucm->base=ucm_openTable(); - ucm->ext=ucm_openTable(); - - ucm->states.stateFlags[0]=MBCS_STATE_FLAG_DIRECT; - ucm->states.conversionType=UCNV_UNSUPPORTED_CONVERTER; - ucm->states.outputType=-1; - ucm->states.minCharLength=ucm->states.maxCharLength=1; - - return ucm; -} - -U_CAPI void U_EXPORT2 -ucm_close(UCMFile *ucm) { - if(ucm!=NULL) { - ucm_closeTable(ucm->base); - ucm_closeTable(ucm->ext); - uprv_free(ucm); - } -} - -U_CAPI int32_t U_EXPORT2 -ucm_mappingType(UCMStates *baseStates, - UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES]) { - (void)codePoints; - /* check validity of the bytes and count the characters in them */ - int32_t count=ucm_countChars(baseStates, bytes, m->bLen); - if(count<1) { - /* illegal byte sequence */ - return -1; - } - - /* - * Suitable for an ICU conversion base table means: - * - a 1:1 mapping (1 Unicode code point : 1 byte sequence) - * - precision flag 0..3 - * - SBCS: any 1:1 mapping - * (the table stores additional bits to distinguish mapping types) - * - MBCS: not a |2 SUB mapping for <subchar1> - * - MBCS: not a |1 fallback to 0x00 - * - MBCS: not a multi-byte mapping with leading 0x00 bytes - * - * Further restrictions for fromUnicode tables - * are enforced in makeconv (MBCSOkForBaseFromUnicode()). - * - * All of the MBCS fromUnicode specific tests could be removed from here, - * but the ones above are for unusual mappings, and removing the tests - * from here would change canonucm output which seems gratuitous. - * (Markus Scherer 2006-nov-28) - * - * Exception: All implicit mappings (f<0) that need to be moved - * because of fromUnicode restrictions _must_ be moved here because - * makeconv uses a hack for moving mappings only for the fromUnicode table - * that only works with non-negative values of f. - */ - if( m->uLen==1 && count==1 && m->f<=3 && - (baseStates->maxCharLength==1 || - !((m->f==2 && m->bLen==1) || - (m->f==1 && bytes[0]==0) || - (m->f<=1 && m->bLen>1 && bytes[0]==0))) - ) { - return 0; /* suitable for a base table */ - } else { - return 1; /* needs to go into an extension table */ - } -} - -U_CAPI UBool U_EXPORT2 -ucm_addMappingAuto(UCMFile *ucm, UBool forBase, UCMStates *baseStates, - UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES]) { - int32_t type; - - if(m->f==2 && m->uLen>1) { - fprintf(stderr, "ucm error: illegal <subchar1> |2 mapping from multiple code points\n"); - printMapping(m, codePoints, bytes, stderr); - return FALSE; - } - - if(baseStates!=NULL) { - /* check validity of the bytes and count the characters in them */ - type=ucm_mappingType(baseStates, m, codePoints, bytes); - if(type<0) { - /* illegal byte sequence */ - printMapping(m, codePoints, bytes, stderr); - return FALSE; - } - } else { - /* not used - adding a mapping for an extension-only table before its base table is read */ - type=1; - } - - /* - * Add the mapping to the base table if this is requested and suitable. - * Otherwise, add it to the extension table. - */ - if(forBase && type==0) { - ucm_addMapping(ucm->base, m, codePoints, bytes); - } else { - ucm_addMapping(ucm->ext, m, codePoints, bytes); - } - - return TRUE; -} - -U_CAPI UBool U_EXPORT2 -ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates *baseStates) { - UCMapping m={ 0, {0}, 0, 0, 0, 0 }; - UChar32 codePoints[UCNV_EXT_MAX_UCHARS]; - uint8_t bytes[UCNV_EXT_MAX_BYTES]; - - const char *s; - - /* ignore empty and comment lines */ - if(line[0]=='#' || *(s=u_skipWhitespace(line))==0 || *s=='\n' || *s=='\r') { - return TRUE; - } - - return - ucm_parseMappingLine(&m, codePoints, bytes, line) && - ucm_addMappingAuto(ucm, forBase, baseStates, &m, codePoints, bytes); -} - -U_CAPI void U_EXPORT2 -ucm_readTable(UCMFile *ucm, FileStream* convFile, - UBool forBase, UCMStates *baseStates, - UErrorCode *pErrorCode) { - char line[500]; - char *end; - UBool isOK; - - if(U_FAILURE(*pErrorCode)) { - return; - } - - isOK=TRUE; - - for(;;) { - /* read the next line */ - if(!T_FileStream_readLine(convFile, line, sizeof(line))) { - fprintf(stderr, "incomplete charmap section\n"); - isOK=FALSE; - break; - } - - /* remove CR LF */ - end=uprv_strchr(line, 0); - while(line<end && (*(end-1)=='\r' || *(end-1)=='\n')) { - --end; - } - *end=0; - - /* ignore empty and comment lines */ - if(line[0]==0 || line[0]=='#') { - continue; - } - - /* stop at the end of the mapping table */ - if(0==uprv_strcmp(line, "END CHARMAP")) { - break; - } - - isOK&=ucm_addMappingFromLine(ucm, line, forBase, baseStates); - } - - if(!isOK) { - *pErrorCode=U_INVALID_TABLE_FORMAT; - } -} -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/ucm.h b/deps/node/deps/icu-small/source/tools/toolutil/ucm.h deleted file mode 100644 index 3af93975..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/ucm.h +++ /dev/null @@ -1,301 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ******************************************************************************* - * Copyright (C) 2003-2013, International Business Machines - * Corporation and others. All Rights Reserved. - ******************************************************************************* - * file name: ucm.h - * encoding: UTF-8 - * tab size: 8 (not used) - * indentation:4 - * - * created on: 2003jun20 - * created by: Markus W. Scherer - * - * Definitions for the .ucm file parser and handler module ucm.c. - */ - -#ifndef __UCM_H__ -#define __UCM_H__ - -#include "unicode/utypes.h" -#include "ucnvmbcs.h" -#include "ucnv_ext.h" -#include "filestrm.h" -#include <stdio.h> - -#if !UCONFIG_NO_CONVERSION - -U_CDECL_BEGIN - -/* constants for UCMapping.moveFlag */ -enum { - UCM_MOVE_TO_EXT=1, - UCM_REMOVE_MAPPING=2 -}; - -/* - * Per-mapping data structure - * - * u if uLen==1: Unicode code point - * else index to uLen code points - * b if bLen<=4: up to 4 bytes - * else index to bLen bytes - * uLen number of code points - * bLen number of words containing left-justified bytes - * bIsMultipleChars indicates that the bytes contain more than one sequence - * according to the state table - * f flag for roundtrip (0), fallback (1), sub mapping (2), reverse fallback (3) - * or "good one-way" mapping (4). - * Same values as in the source file after | - */ -typedef struct UCMapping { - UChar32 u; - union { - uint32_t idx; - uint8_t bytes[4]; - } b; - int8_t uLen, bLen, f, moveFlag; -} UCMapping; - -/* constants for UCMTable.flagsType */ -enum { - UCM_FLAGS_INITIAL, /* no mappings parsed yet */ - UCM_FLAGS_EXPLICIT, /* .ucm file has mappings with | fallback indicators */ - UCM_FLAGS_IMPLICIT, /* .ucm file has mappings without | fallback indicators, later wins */ - UCM_FLAGS_MIXED /* both implicit and explicit */ -}; - -typedef struct UCMTable { - UCMapping *mappings; - int32_t mappingsCapacity, mappingsLength; - - UChar32 *codePoints; - int32_t codePointsCapacity, codePointsLength; - - uint8_t *bytes; - int32_t bytesCapacity, bytesLength; - - /* index map for mapping by bytes first */ - int32_t *reverseMap; - - uint8_t unicodeMask; - int8_t flagsType; /* UCM_FLAGS_INITIAL etc. */ - UBool isSorted; -} UCMTable; - -enum { - MBCS_STATE_FLAG_DIRECT=1, - MBCS_STATE_FLAG_SURROGATES, - - MBCS_STATE_FLAG_READY=16 -}; - -typedef struct UCMStates { - int32_t stateTable[MBCS_MAX_STATE_COUNT][256]; - uint32_t stateFlags[MBCS_MAX_STATE_COUNT], - stateOffsetSum[MBCS_MAX_STATE_COUNT]; - - int32_t countStates, minCharLength, maxCharLength, countToUCodeUnits; - int8_t conversionType, outputType; -} UCMStates; - -typedef struct UCMFile { - UCMTable *base, *ext; - UCMStates states; - - char baseName[UCNV_MAX_CONVERTER_NAME_LENGTH]; -} UCMFile; - -/* simple accesses ---------------------------------------------------------- */ - -#define UCM_GET_CODE_POINTS(t, m) \ - (((m)->uLen==1) ? &(m)->u : (t)->codePoints+(m)->u) - -#define UCM_GET_BYTES(t, m) \ - (((m)->bLen<=4) ? (m)->b.bytes : (t)->bytes+(m)->b.idx) - -/* APIs --------------------------------------------------------------------- */ - -U_CAPI UCMFile * U_EXPORT2 -ucm_open(void); - -U_CAPI void U_EXPORT2 -ucm_close(UCMFile *ucm); - -U_CAPI UBool U_EXPORT2 -ucm_parseHeaderLine(UCMFile *ucm, - char *line, char **pKey, char **pValue); - -/* @return -1 illegal bytes 0 suitable for base table 1 needs to go into extension table */ -U_CAPI int32_t U_EXPORT2 -ucm_mappingType(UCMStates *baseStates, - UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES]); - -/* add a mapping to the base or extension table as appropriate */ -U_CAPI UBool U_EXPORT2 -ucm_addMappingAuto(UCMFile *ucm, UBool forBase, UCMStates *baseStates, - UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES]); - -U_CAPI UBool U_EXPORT2 -ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates *baseStates); - - -U_CAPI UCMTable * U_EXPORT2 -ucm_openTable(void); - -U_CAPI void U_EXPORT2 -ucm_closeTable(UCMTable *table); - -U_CAPI void U_EXPORT2 -ucm_resetTable(UCMTable *table); - -U_CAPI void U_EXPORT2 -ucm_sortTable(UCMTable *t); - -/* - * Remove mappings with their move flag set from the base table - * and move some of them (with UCM_MOVE_TO_EXT) to the extension table. - */ -U_CAPI void U_EXPORT2 -ucm_moveMappings(UCMTable *base, UCMTable *ext); - -/** - * Read a table from a .ucm file, from after the CHARMAP line to - * including the END CHARMAP line. - */ -U_CAPI void U_EXPORT2 -ucm_readTable(UCMFile *ucm, FileStream* convFile, - UBool forBase, UCMStates *baseStates, - UErrorCode *pErrorCode); - -/** - * Check the validity of mappings against a base table's states; - * necessary for extension-only tables that were read before their base tables. - */ -U_CAPI UBool U_EXPORT2 -ucm_checkValidity(UCMTable *ext, UCMStates *baseStates); - -/** - * Check a base table against an extension table. - * Set the moveTarget!=NULL if it is possible to move mappings from the base. - * This is the case where base and extension tables are parsed from a single file - * (moveTarget==ext) - * or when delta file mappings are subtracted from a base table. - * - * When a base table cannot be modified because a delta file is parsed in makeconv, - * then set moveTarget=NULL. - * - * if(intersectBase) then mappings that exist in the base table but not in - * the extension table are moved to moveTarget instead of showing an error. - * - * Special mode: - * If intersectBase==2 for a DBCS extension table, then SBCS mappings are - * not moved out of the base unless their Unicode input requires it. - * This helps ucmkbase generate base tables for DBCS-only extension .cnv files. - * - * For both tables in the same file, the extension table is automatically - * built. - * For separate files, the extension file can use a complete mapping table (.ucm file), - * so that common mappings need not be stripped out manually. - * - * - * Sort both tables, and then for each mapping direction: - * - * If intersectBase is TRUE and the base table contains a mapping - * that does not exist in the extension table, then this mapping is moved - * to moveTarget. - * - * - otherwise - - * - * If the base table contains a mapping for which the input sequence is - * the same as the extension input, then - * - if the output is the same: remove the extension mapping - * - else: error - * - * If the base table contains a mapping for which the input sequence is - * a prefix of the extension input, then - * - if moveTarget!=NULL: move the base mapping to the moveTarget table - * - else: error - * - * @return FALSE in case of an irreparable error - */ -U_CAPI UBool U_EXPORT2 -ucm_checkBaseExt(UCMStates *baseStates, UCMTable *base, UCMTable *ext, - UCMTable *moveTarget, UBool intersectBase); - -U_CAPI void U_EXPORT2 -ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode); - -U_CAPI void U_EXPORT2 -ucm_printMapping(UCMTable *table, UCMapping *m, FILE *f); - - -U_CAPI void U_EXPORT2 -ucm_addState(UCMStates *states, const char *s); - -U_CAPI void U_EXPORT2 -ucm_processStates(UCMStates *states, UBool ignoreSISOCheck); - -U_CAPI int32_t U_EXPORT2 -ucm_countChars(UCMStates *states, - const uint8_t *bytes, int32_t length); - - -U_CAPI int8_t U_EXPORT2 -ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line, const char **ps); - -U_CAPI UBool U_EXPORT2 -ucm_parseMappingLine(UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES], - const char *line); - -U_CAPI void U_EXPORT2 -ucm_addMapping(UCMTable *table, - UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES]); - -/* very makeconv-specific functions ----------------------------------------- */ - -/* finalize and optimize states after the toUnicode mappings are processed */ -U_CAPI void U_EXPORT2 -ucm_optimizeStates(UCMStates *states, - uint16_t **pUnicodeCodeUnits, - _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks, - UBool verbose); - -/* moved here because it is used inside ucmstate.c */ -U_CAPI int32_t U_EXPORT2 -ucm_findFallback(_MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks, - uint32_t offset); - -/* very rptp2ucm-specific functions ----------------------------------------- */ - -/* - * Input: Separate tables with mappings from/to Unicode, - * subchar and subchar1 (0 if none). - * All mappings must have flag 0. - * - * Output: fromUTable will contain the union of mappings with the correct - * precision flags, and be sorted. - */ -U_CAPI void U_EXPORT2 -ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable, - const uint8_t *subchar, int32_t subcharLength, - uint8_t subchar1); - -U_CAPI UBool U_EXPORT2 -ucm_separateMappings(UCMFile *ucm, UBool isSISO); - -U_CDECL_END - -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/ucmstate.cpp b/deps/node/deps/icu-small/source/tools/toolutil/ucmstate.cpp deleted file mode 100644 index 27765752..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/ucmstate.cpp +++ /dev/null @@ -1,1051 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2003-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ucmstate.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2003oct09 -* created by: Markus W. Scherer -* -* This file handles ICU .ucm file state information as part of the ucm module. -* Most of this code used to be in makeconv.c. -*/ - -#include "unicode/utypes.h" -#include "cstring.h" -#include "cmemory.h" -#include "uarrsort.h" -#include "ucnvmbcs.h" -#include "ucnv_ext.h" -#include "uparse.h" -#include "ucm.h" -#include <stdio.h> - -#if !UCONFIG_NO_CONVERSION - -/* MBCS state handling ------------------------------------------------------ */ - -/* - * state table row grammar (ebnf-style): - * (whitespace is allowed between all tokens) - * - * row=[[firstentry ','] entry (',' entry)*] - * firstentry="initial" | "surrogates" - * (initial state (default for state 0), output is all surrogate pairs) - * entry=range [':' nextstate] ['.' action] - * range=number ['-' number] - * nextstate=number - * (0..7f) - * action='u' | 's' | 'p' | 'i' - * (unassigned, state change only, surrogate pair, illegal) - * number=(1- or 2-digit hexadecimal number) - */ -static const char * -parseState(const char *s, int32_t state[256], uint32_t *pFlags) { - const char *t; - uint32_t start, end, i; - int32_t entry; - - /* initialize the state: all illegal with U+ffff */ - for(i=0; i<256; ++i) { - state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0xffff); - } - - /* skip leading white space */ - s=u_skipWhitespace(s); - - /* is there an "initial" or "surrogates" directive? */ - if(uprv_strncmp("initial", s, 7)==0) { - *pFlags=MBCS_STATE_FLAG_DIRECT; - s=u_skipWhitespace(s+7); - if(*s++!=',') { - return s-1; - } - } else if(*pFlags==0 && uprv_strncmp("surrogates", s, 10)==0) { - *pFlags=MBCS_STATE_FLAG_SURROGATES; - s=u_skipWhitespace(s+10); - if(*s++!=',') { - return s-1; - } - } else if(*s==0) { - /* empty state row: all-illegal */ - return NULL; - } - - for(;;) { - /* read an entry, the start of the range first */ - s=u_skipWhitespace(s); - start=uprv_strtoul(s, (char **)&t, 16); - if(s==t || 0xff<start) { - return s; - } - s=u_skipWhitespace(t); - - /* read the end of the range if there is one */ - if(*s=='-') { - s=u_skipWhitespace(s+1); - end=uprv_strtoul(s, (char **)&t, 16); - if(s==t || end<start || 0xff<end) { - return s; - } - s=u_skipWhitespace(t); - } else { - end=start; - } - - /* determine the state entrys for this range */ - if(*s!=':' && *s!='.') { - /* the default is: final state with valid entries */ - entry=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_16, 0); - } else { - entry=MBCS_ENTRY_TRANSITION(0, 0); - if(*s==':') { - /* get the next state, default to 0 */ - s=u_skipWhitespace(s+1); - i=uprv_strtoul(s, (char **)&t, 16); - if(s!=t) { - if(0x7f<i) { - return s; - } - s=u_skipWhitespace(t); - entry=MBCS_ENTRY_SET_STATE(entry, i); - } - } - - /* get the state action, default to valid */ - if(*s=='.') { - /* this is a final state */ - entry=MBCS_ENTRY_SET_FINAL(entry); - - s=u_skipWhitespace(s+1); - if(*s=='u') { - /* unassigned set U+fffe */ - entry=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_UNASSIGNED, 0xfffe); - s=u_skipWhitespace(s+1); - } else if(*s=='p') { - if(*pFlags!=MBCS_STATE_FLAG_DIRECT) { - entry=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_VALID_16_PAIR); - } else { - entry=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_VALID_16); - } - s=u_skipWhitespace(s+1); - } else if(*s=='s') { - entry=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_CHANGE_ONLY); - s=u_skipWhitespace(s+1); - } else if(*s=='i') { - /* illegal set U+ffff */ - entry=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_ILLEGAL, 0xffff); - s=u_skipWhitespace(s+1); - } else { - /* default to valid */ - entry=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_VALID_16); - } - } else { - /* this is an intermediate state, nothing to do */ - } - } - - /* adjust "final valid" states according to the state flags */ - if(MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16) { - switch(*pFlags) { - case 0: - /* no adjustment */ - break; - case MBCS_STATE_FLAG_DIRECT: - /* set the valid-direct code point to "unassigned"==0xfffe */ - entry=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_VALID_DIRECT_16, 0xfffe); - break; - case MBCS_STATE_FLAG_SURROGATES: - entry=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_VALID_16_PAIR, 0); - break; - default: - break; - } - } - - /* set this entry for the range */ - for(i=start; i<=end; ++i) { - state[i]=entry; - } - - if(*s==',') { - ++s; - } else { - return *s==0 ? NULL : s; - } - } -} - -U_CAPI void U_EXPORT2 -ucm_addState(UCMStates *states, const char *s) { - const char *error; - - if(states->countStates==MBCS_MAX_STATE_COUNT) { - fprintf(stderr, "ucm error: too many states (maximum %u)\n", MBCS_MAX_STATE_COUNT); - exit(U_INVALID_TABLE_FORMAT); - } - - error=parseState(s, states->stateTable[states->countStates], - &states->stateFlags[states->countStates]); - if(error!=NULL) { - fprintf(stderr, "ucm error: parse error in state definition at '%s'\n", error); - exit(U_INVALID_TABLE_FORMAT); - } - - ++states->countStates; -} - -U_CAPI UBool U_EXPORT2 -ucm_parseHeaderLine(UCMFile *ucm, - char *line, char **pKey, char **pValue) { - UCMStates *states; - char *s, *end; - char c; - - states=&ucm->states; - - /* remove comments and trailing CR and LF and remove whitespace from the end */ - for(end=line; (c=*end)!=0; ++end) { - if(c=='#' || c=='\r' || c=='\n') { - break; - } - } - while(end>line && (*(end-1)==' ' || *(end-1)=='\t')) { - --end; - } - *end=0; - - /* skip leading white space and ignore empty lines */ - s=(char *)u_skipWhitespace(line); - if(*s==0) { - return TRUE; - } - - /* stop at the beginning of the mapping section */ - if(uprv_memcmp(s, "CHARMAP", 7)==0) { - return FALSE; - } - - /* get the key name, bracketed in <> */ - if(*s!='<') { - fprintf(stderr, "ucm error: no header field <key> in line \"%s\"\n", line); - exit(U_INVALID_TABLE_FORMAT); - } - *pKey=++s; - while(*s!='>') { - if(*s==0) { - fprintf(stderr, "ucm error: incomplete header field <key> in line \"%s\"\n", line); - exit(U_INVALID_TABLE_FORMAT); - } - ++s; - } - *s=0; - - /* get the value string, possibly quoted */ - s=(char *)u_skipWhitespace(s+1); - if(*s!='"') { - *pValue=s; - } else { - /* remove the quotes */ - *pValue=s+1; - if(end>*pValue && *(end-1)=='"') { - *--end=0; - } - } - - /* collect the information from the header field, ignore unknown keys */ - if(uprv_strcmp(*pKey, "uconv_class")==0) { - if(uprv_strcmp(*pValue, "DBCS")==0) { - states->conversionType=UCNV_DBCS; - } else if(uprv_strcmp(*pValue, "SBCS")==0) { - states->conversionType = UCNV_SBCS; - } else if(uprv_strcmp(*pValue, "MBCS")==0) { - states->conversionType = UCNV_MBCS; - } else if(uprv_strcmp(*pValue, "EBCDIC_STATEFUL")==0) { - states->conversionType = UCNV_EBCDIC_STATEFUL; - } else { - fprintf(stderr, "ucm error: unknown <uconv_class> %s\n", *pValue); - exit(U_INVALID_TABLE_FORMAT); - } - return TRUE; - } else if(uprv_strcmp(*pKey, "mb_cur_max")==0) { - c=**pValue; - if('1'<=c && c<='4' && (*pValue)[1]==0) { - states->maxCharLength=(int8_t)(c-'0'); - states->outputType=(int8_t)(states->maxCharLength-1); - } else { - fprintf(stderr, "ucm error: illegal <mb_cur_max> %s\n", *pValue); - exit(U_INVALID_TABLE_FORMAT); - } - return TRUE; - } else if(uprv_strcmp(*pKey, "mb_cur_min")==0) { - c=**pValue; - if('1'<=c && c<='4' && (*pValue)[1]==0) { - states->minCharLength=(int8_t)(c-'0'); - } else { - fprintf(stderr, "ucm error: illegal <mb_cur_min> %s\n", *pValue); - exit(U_INVALID_TABLE_FORMAT); - } - return TRUE; - } else if(uprv_strcmp(*pKey, "icu:state")==0) { - /* if an SBCS/DBCS/EBCDIC_STATEFUL converter has icu:state, then turn it into MBCS */ - switch(states->conversionType) { - case UCNV_SBCS: - case UCNV_DBCS: - case UCNV_EBCDIC_STATEFUL: - states->conversionType=UCNV_MBCS; - break; - case UCNV_MBCS: - break; - default: - fprintf(stderr, "ucm error: <icu:state> entry for non-MBCS table or before the <uconv_class> line\n"); - exit(U_INVALID_TABLE_FORMAT); - } - - if(states->maxCharLength==0) { - fprintf(stderr, "ucm error: <icu:state> before the <mb_cur_max> line\n"); - exit(U_INVALID_TABLE_FORMAT); - } - ucm_addState(states, *pValue); - return TRUE; - } else if(uprv_strcmp(*pKey, "icu:base")==0) { - if(**pValue==0) { - fprintf(stderr, "ucm error: <icu:base> without a base table name\n"); - exit(U_INVALID_TABLE_FORMAT); - } - uprv_strcpy(ucm->baseName, *pValue); - return TRUE; - } - - return FALSE; -} - -/* post-processing ---------------------------------------------------------- */ - -static int32_t -sumUpStates(UCMStates *states) { - int32_t entry, sum, state, cell, count; - UBool allStatesReady; - - /* - * Sum up the offsets for all states. - * In each final state (where there are only final entries), - * the offsets add up directly. - * In all other state table rows, for each transition entry to another state, - * the offsets sum of that state needs to be added. - * This is achieved in at most countStates iterations. - */ - allStatesReady=FALSE; - for(count=states->countStates; !allStatesReady && count>=0; --count) { - allStatesReady=TRUE; - for(state=states->countStates-1; state>=0; --state) { - if(!(states->stateFlags[state]&MBCS_STATE_FLAG_READY)) { - allStatesReady=FALSE; - sum=0; - - /* at first, add up only the final delta offsets to keep them <512 */ - for(cell=0; cell<256; ++cell) { - entry=states->stateTable[state][cell]; - if(MBCS_ENTRY_IS_FINAL(entry)) { - switch(MBCS_ENTRY_FINAL_ACTION(entry)) { - case MBCS_STATE_VALID_16: - states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_VALUE(entry, sum); - sum+=1; - break; - case MBCS_STATE_VALID_16_PAIR: - states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_VALUE(entry, sum); - sum+=2; - break; - default: - /* no addition */ - break; - } - } - } - - /* now, add up the delta offsets for the transitional entries */ - for(cell=0; cell<256; ++cell) { - entry=states->stateTable[state][cell]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - if(states->stateFlags[MBCS_ENTRY_TRANSITION_STATE(entry)]&MBCS_STATE_FLAG_READY) { - states->stateTable[state][cell]=MBCS_ENTRY_TRANSITION_SET_OFFSET(entry, sum); - sum+=states->stateOffsetSum[MBCS_ENTRY_TRANSITION_STATE(entry)]; - } else { - /* that next state does not have a sum yet, we cannot finish the one for this state */ - sum=-1; - break; - } - } - } - - if(sum!=-1) { - states->stateOffsetSum[state]=sum; - states->stateFlags[state]|=MBCS_STATE_FLAG_READY; - } - } - } - } - - if(!allStatesReady) { - fprintf(stderr, "ucm error: the state table contains loops\n"); - exit(U_INVALID_TABLE_FORMAT); - } - - /* - * For all "direct" (i.e., initial) states>0, - * the offsets need to be increased by the sum of - * the previous initial states. - */ - sum=states->stateOffsetSum[0]; - for(state=1; state<states->countStates; ++state) { - if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) { - int32_t sum2=sum; - sum+=states->stateOffsetSum[state]; - for(cell=0; cell<256; ++cell) { - entry=states->stateTable[state][cell]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - states->stateTable[state][cell]=MBCS_ENTRY_TRANSITION_ADD_OFFSET(entry, sum2); - } - } - } - } - - /* round up to the next even number to have the following data 32-bit-aligned */ - return states->countToUCodeUnits=(sum+1)&~1; -} - -U_CAPI void U_EXPORT2 -ucm_processStates(UCMStates *states, UBool ignoreSISOCheck) { - int32_t entry, state, cell, count; - - if(states->conversionType==UCNV_UNSUPPORTED_CONVERTER) { - fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n"); - exit(U_INVALID_TABLE_FORMAT); - } - - if(states->countStates==0) { - switch(states->conversionType) { - case UCNV_SBCS: - /* SBCS: use MBCS data structure with a default state table */ - if(states->maxCharLength!=1) { - fprintf(stderr, "error: SBCS codepage with max B/char!=1\n"); - exit(U_INVALID_TABLE_FORMAT); - } - states->conversionType=UCNV_MBCS; - ucm_addState(states, "0-ff"); - break; - case UCNV_MBCS: - fprintf(stderr, "ucm error: missing state table information (<icu:state>) for MBCS\n"); - exit(U_INVALID_TABLE_FORMAT); - break; - case UCNV_EBCDIC_STATEFUL: - /* EBCDIC_STATEFUL: use MBCS data structure with a default state table */ - if(states->minCharLength!=1 || states->maxCharLength!=2) { - fprintf(stderr, "error: DBCS codepage with min B/char!=1 or max B/char!=2\n"); - exit(U_INVALID_TABLE_FORMAT); - } - states->conversionType=UCNV_MBCS; - ucm_addState(states, "0-ff, e:1.s, f:0.s"); - ucm_addState(states, "initial, 0-3f:4, e:1.s, f:0.s, 40:3, 41-fe:2, ff:4"); - ucm_addState(states, "0-40:1.i, 41-fe:1., ff:1.i"); - ucm_addState(states, "0-ff:1.i, 40:1."); - ucm_addState(states, "0-ff:1.i"); - break; - case UCNV_DBCS: - /* DBCS: use MBCS data structure with a default state table */ - if(states->minCharLength!=2 || states->maxCharLength!=2) { - fprintf(stderr, "error: DBCS codepage with min or max B/char!=2\n"); - exit(U_INVALID_TABLE_FORMAT); - } - states->conversionType = UCNV_MBCS; - ucm_addState(states, "0-3f:3, 40:2, 41-fe:1, ff:3"); - ucm_addState(states, "41-fe"); - ucm_addState(states, "40"); - ucm_addState(states, ""); - break; - default: - fprintf(stderr, "ucm error: unknown charset structure\n"); - exit(U_INVALID_TABLE_FORMAT); - break; - } - } - - /* - * check that the min/max character lengths are reasonable; - * to do this right, all paths through the state table would have to be - * recursively walked while keeping track of the sequence lengths, - * but these simple checks cover most state tables in practice - */ - if(states->maxCharLength<states->minCharLength) { - fprintf(stderr, "ucm error: max B/char < min B/char\n"); - exit(U_INVALID_TABLE_FORMAT); - } - - /* count non-direct states and compare with max B/char */ - count=0; - for(state=0; state<states->countStates; ++state) { - if((states->stateFlags[state]&0xf)!=MBCS_STATE_FLAG_DIRECT) { - ++count; - } - } - if(states->maxCharLength>count+1) { - fprintf(stderr, "ucm error: max B/char too large\n"); - exit(U_INVALID_TABLE_FORMAT); - } - - if(states->minCharLength==1) { - int32_t action; - - /* - * if there are single-byte characters, - * then the initial state must have direct result states - */ - for(cell=0; cell<256; ++cell) { - entry=states->stateTable[0][cell]; - if( MBCS_ENTRY_IS_FINAL(entry) && - ((action=MBCS_ENTRY_FINAL_ACTION(entry))==MBCS_STATE_VALID_DIRECT_16 || - action==MBCS_STATE_UNASSIGNED) - ) { - break; - } - } - - if(cell==256) { - fprintf(stderr, "ucm warning: min B/char too small\n"); - } - } - - /* - * make sure that all "next state" values are within limits - * and that all next states after final ones have the "direct" - * flag of initial states - */ - for(state=states->countStates-1; state>=0; --state) { - for(cell=0; cell<256; ++cell) { - entry=states->stateTable[state][cell]; - if((uint8_t)MBCS_ENTRY_STATE(entry)>=states->countStates) { - fprintf(stderr, "ucm error: state table entry [%x][%x] has a next state of %x that is too high\n", - (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry)); - exit(U_INVALID_TABLE_FORMAT); - } - if(MBCS_ENTRY_IS_FINAL(entry) && (states->stateFlags[MBCS_ENTRY_STATE(entry)]&0xf)!=MBCS_STATE_FLAG_DIRECT) { - fprintf(stderr, "ucm error: state table entry [%x][%x] is final but has a non-initial next state of %x\n", - (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry)); - exit(U_INVALID_TABLE_FORMAT); - } else if(MBCS_ENTRY_IS_TRANSITION(entry) && (states->stateFlags[MBCS_ENTRY_STATE(entry)]&0xf)==MBCS_STATE_FLAG_DIRECT) { - fprintf(stderr, "ucm error: state table entry [%x][%x] is not final but has an initial next state of %x\n", - (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry)); - exit(U_INVALID_TABLE_FORMAT); - } - } - } - - /* is this an SI/SO (like EBCDIC-stateful) state table? */ - if(states->countStates>=2 && (states->stateFlags[1]&0xf)==MBCS_STATE_FLAG_DIRECT) { - if(states->maxCharLength!=2) { - fprintf(stderr, "ucm error: SI/SO codepages must have max 2 bytes/char (not %x)\n", (int)states->maxCharLength); - exit(U_INVALID_TABLE_FORMAT); - } - if(states->countStates<3) { - fprintf(stderr, "ucm error: SI/SO codepages must have at least 3 states (not %x)\n", (int)states->countStates); - exit(U_INVALID_TABLE_FORMAT); - } - /* are the SI/SO all in the right places? */ - if( ignoreSISOCheck || - (states->stateTable[0][0xe]==MBCS_ENTRY_FINAL(1, MBCS_STATE_CHANGE_ONLY, 0) && - states->stateTable[0][0xf]==MBCS_ENTRY_FINAL(0, MBCS_STATE_CHANGE_ONLY, 0) && - states->stateTable[1][0xe]==MBCS_ENTRY_FINAL(1, MBCS_STATE_CHANGE_ONLY, 0) && - states->stateTable[1][0xf]==MBCS_ENTRY_FINAL(0, MBCS_STATE_CHANGE_ONLY, 0)) - ) { - states->outputType=MBCS_OUTPUT_2_SISO; - } else { - fprintf(stderr, "ucm error: SI/SO codepages must have in states 0 and 1 transitions e:1.s, f:0.s\n"); - exit(U_INVALID_TABLE_FORMAT); - } - state=2; - } else { - state=1; - } - - /* check that no unexpected state is a "direct" one */ - while(state<states->countStates) { - if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) { - fprintf(stderr, "ucm error: state %d is 'initial' - not supported except for SI/SO codepages\n", (int)state); - exit(U_INVALID_TABLE_FORMAT); - } - ++state; - } - - sumUpStates(states); -} - -/* find a fallback for this offset; return the index or -1 if not found */ -U_CAPI int32_t U_EXPORT2 -ucm_findFallback(_MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks, - uint32_t offset) { - int32_t i; - - if(countToUFallbacks==0) { - /* shortcut: most codepages do not have fallbacks from codepage to Unicode */ - return -1; - } - - /* do a linear search for the fallback mapping (the table is not yet sorted) */ - for(i=0; i<countToUFallbacks; ++i) { - if(offset==toUFallbacks[i].offset) { - return i; - } - } - return -1; -} - -/* - * This function tries to compact toUnicode tables for 2-byte codepages - * by finding lead bytes with all-unassigned trail bytes and adding another state - * for them. - */ -static void -compactToUnicode2(UCMStates *states, - uint16_t **pUnicodeCodeUnits, - _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks, - UBool verbose) { - int32_t (*oldStateTable)[256]; - uint16_t count[256]; - uint16_t *oldUnicodeCodeUnits; - int32_t entry, offset, oldOffset, trailOffset, oldTrailOffset, savings, sum; - int32_t i, j, leadState, trailState, newState, fallback; - uint16_t unit; - - /* find the lead state */ - if(states->outputType==MBCS_OUTPUT_2_SISO) { - /* use the DBCS lead state for SI/SO codepages */ - leadState=1; - } else { - leadState=0; - } - - /* find the main trail state: the most used target state */ - uprv_memset(count, 0, sizeof(count)); - for(i=0; i<256; ++i) { - entry=states->stateTable[leadState][i]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - ++count[MBCS_ENTRY_TRANSITION_STATE(entry)]; - } - } - trailState=0; - for(i=1; i<states->countStates; ++i) { - if(count[i]>count[trailState]) { - trailState=i; - } - } - - /* count possible savings from lead bytes with all-unassigned results in all trail bytes */ - uprv_memset(count, 0, sizeof(count)); - savings=0; - /* for each lead byte */ - for(i=0; i<256; ++i) { - entry=states->stateTable[leadState][i]; - if(MBCS_ENTRY_IS_TRANSITION(entry) && (MBCS_ENTRY_TRANSITION_STATE(entry))==trailState) { - /* the offset is different for each lead byte */ - offset=MBCS_ENTRY_TRANSITION_OFFSET(entry); - /* for each trail byte for this lead byte */ - for(j=0; j<256; ++j) { - entry=states->stateTable[trailState][j]; - switch(MBCS_ENTRY_FINAL_ACTION(entry)) { - case MBCS_STATE_VALID_16: - entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); - if((*pUnicodeCodeUnits)[entry]==0xfffe && ucm_findFallback(toUFallbacks, countToUFallbacks, entry)<0) { - ++count[i]; - } else { - j=999; /* do not count for this lead byte because there are assignments */ - } - break; - case MBCS_STATE_VALID_16_PAIR: - entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); - if((*pUnicodeCodeUnits)[entry]==0xfffe) { - count[i]+=2; - } else { - j=999; /* do not count for this lead byte because there are assignments */ - } - break; - default: - break; - } - } - if(j==256) { - /* all trail bytes for this lead byte are unassigned */ - savings+=count[i]; - } else { - count[i]=0; - } - } - } - /* subtract from the possible savings the cost of an additional state */ - savings=savings*2-1024; /* count bytes, not 16-bit words */ - if(savings<=0) { - return; - } - if(verbose) { - printf("compacting toUnicode data saves %ld bytes\n", (long)savings); - } - if(states->countStates>=MBCS_MAX_STATE_COUNT) { - fprintf(stderr, "cannot compact toUnicode because the maximum number of states is reached\n"); - return; - } - - /* make a copy of the state table */ - oldStateTable=(int32_t (*)[256])uprv_malloc(states->countStates*1024); - if(oldStateTable==NULL) { - fprintf(stderr, "cannot compact toUnicode: out of memory\n"); - return; - } - uprv_memcpy(oldStateTable, states->stateTable, states->countStates*1024); - - /* add the new state */ - /* - * this function does not catch the degenerate case where all lead bytes - * have all-unassigned trail bytes and the lead state could be removed - */ - newState=states->countStates++; - states->stateFlags[newState]=0; - /* copy the old trail state, turning all assigned states into unassigned ones */ - for(i=0; i<256; ++i) { - entry=states->stateTable[trailState][i]; - switch(MBCS_ENTRY_FINAL_ACTION(entry)) { - case MBCS_STATE_VALID_16: - case MBCS_STATE_VALID_16_PAIR: - states->stateTable[newState][i]=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_UNASSIGNED, 0xfffe); - break; - default: - states->stateTable[newState][i]=entry; - break; - } - } - - /* in the lead state, redirect all lead bytes with all-unassigned trail bytes to the new state */ - for(i=0; i<256; ++i) { - if(count[i]>0) { - states->stateTable[leadState][i]=MBCS_ENTRY_SET_STATE(states->stateTable[leadState][i], newState); - } - } - - /* sum up the new state table */ - for(i=0; i<states->countStates; ++i) { - states->stateFlags[i]&=~MBCS_STATE_FLAG_READY; - } - sum=sumUpStates(states); - - /* allocate a new, smaller code units array */ - oldUnicodeCodeUnits=*pUnicodeCodeUnits; - if(sum==0) { - *pUnicodeCodeUnits=NULL; - if(oldUnicodeCodeUnits!=NULL) { - uprv_free(oldUnicodeCodeUnits); - } - uprv_free(oldStateTable); - return; - } - *pUnicodeCodeUnits=(uint16_t *)uprv_malloc(sum*sizeof(uint16_t)); - if(*pUnicodeCodeUnits==NULL) { - fprintf(stderr, "cannot compact toUnicode: out of memory allocating %ld 16-bit code units\n", - (long)sum); - /* revert to the old state table */ - *pUnicodeCodeUnits=oldUnicodeCodeUnits; - --states->countStates; - uprv_memcpy(states->stateTable, oldStateTable, states->countStates*1024); - uprv_free(oldStateTable); - return; - } - for(i=0; i<sum; ++i) { - (*pUnicodeCodeUnits)[i]=0xfffe; - } - - /* copy the code units for all assigned characters */ - /* - * The old state table has the same lead _and_ trail states for assigned characters! - * The differences are in the offsets, and in the trail states for some unassigned characters. - * For each character with an assigned state in the new table, it was assigned in the old one. - * Only still-assigned characters are copied. - * Note that fallback mappings need to get their offset values adjusted. - */ - - /* for each initial state */ - for(leadState=0; leadState<states->countStates; ++leadState) { - if((states->stateFlags[leadState]&0xf)==MBCS_STATE_FLAG_DIRECT) { - /* for each lead byte from there */ - for(i=0; i<256; ++i) { - entry=states->stateTable[leadState][i]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - trailState=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); - /* the new state does not have assigned states */ - if(trailState!=newState) { - trailOffset=MBCS_ENTRY_TRANSITION_OFFSET(entry); - oldTrailOffset=MBCS_ENTRY_TRANSITION_OFFSET(oldStateTable[leadState][i]); - /* for each trail byte */ - for(j=0; j<256; ++j) { - entry=states->stateTable[trailState][j]; - /* copy assigned-character code units and adjust fallback offsets */ - switch(MBCS_ENTRY_FINAL_ACTION(entry)) { - case MBCS_STATE_VALID_16: - offset=trailOffset+MBCS_ENTRY_FINAL_VALUE_16(entry); - /* find the old offset according to the old state table */ - oldOffset=oldTrailOffset+MBCS_ENTRY_FINAL_VALUE_16(oldStateTable[trailState][j]); - unit=(*pUnicodeCodeUnits)[offset]=oldUnicodeCodeUnits[oldOffset]; - if(unit==0xfffe && (fallback=ucm_findFallback(toUFallbacks, countToUFallbacks, oldOffset))>=0) { - toUFallbacks[fallback].offset=0x80000000|offset; - } - break; - case MBCS_STATE_VALID_16_PAIR: - offset=trailOffset+MBCS_ENTRY_FINAL_VALUE_16(entry); - /* find the old offset according to the old state table */ - oldOffset=oldTrailOffset+MBCS_ENTRY_FINAL_VALUE_16(oldStateTable[trailState][j]); - (*pUnicodeCodeUnits)[offset++]=oldUnicodeCodeUnits[oldOffset++]; - (*pUnicodeCodeUnits)[offset]=oldUnicodeCodeUnits[oldOffset]; - break; - default: - break; - } - } - } - } - } - } - } - - /* remove temporary flags from fallback offsets that protected them from being modified twice */ - for(i=0; i<countToUFallbacks; ++i) { - toUFallbacks[i].offset&=0x7fffffff; - } - - /* free temporary memory */ - uprv_free(oldUnicodeCodeUnits); - uprv_free(oldStateTable); -} - -/* - * recursive sub-function of compactToUnicodeHelper() - * returns: - * >0 number of bytes that are used in unicodeCodeUnits[] that could be saved, - * if all sequences from this state are unassigned, returns the - * <0 there are assignments in unicodeCodeUnits[] - * 0 no use of unicodeCodeUnits[] - */ -static int32_t -findUnassigned(UCMStates *states, - uint16_t *unicodeCodeUnits, - _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks, - int32_t state, int32_t offset, uint32_t b) { - int32_t i, entry, savings, localSavings, belowSavings; - UBool haveAssigned; - - localSavings=belowSavings=0; - haveAssigned=FALSE; - for(i=0; i<256; ++i) { - entry=states->stateTable[state][i]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - savings=findUnassigned(states, - unicodeCodeUnits, - toUFallbacks, countToUFallbacks, - MBCS_ENTRY_TRANSITION_STATE(entry), - offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), - (b<<8)|(uint32_t)i); - if(savings<0) { - haveAssigned=TRUE; - } else if(savings>0) { - printf(" all-unassigned sequences from prefix 0x%02lx state %ld use %ld bytes\n", - (unsigned long)((b<<8)|i), (long)state, (long)savings); - belowSavings+=savings; - } - } else if(!haveAssigned) { - switch(MBCS_ENTRY_FINAL_ACTION(entry)) { - case MBCS_STATE_VALID_16: - entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); - if(unicodeCodeUnits[entry]==0xfffe && ucm_findFallback(toUFallbacks, countToUFallbacks, entry)<0) { - localSavings+=2; - } else { - haveAssigned=TRUE; - } - break; - case MBCS_STATE_VALID_16_PAIR: - entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); - if(unicodeCodeUnits[entry]==0xfffe) { - localSavings+=4; - } else { - haveAssigned=TRUE; - } - break; - default: - break; - } - } - } - if(haveAssigned) { - return -1; - } else { - return localSavings+belowSavings; - } -} - -/* helper function for finding compaction opportunities */ -static void -compactToUnicodeHelper(UCMStates *states, - uint16_t *unicodeCodeUnits, - _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks) { - int32_t state, savings; - - /* for each initial state */ - for(state=0; state<states->countStates; ++state) { - if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) { - savings=findUnassigned(states, - unicodeCodeUnits, - toUFallbacks, countToUFallbacks, - state, 0, 0); - if(savings>0) { - printf(" all-unassigned sequences from initial state %ld use %ld bytes\n", - (long)state, (long)savings); - } - } - } -} - -U_CDECL_BEGIN -static int32_t U_CALLCONV -compareFallbacks(const void *context, const void *fb1, const void *fb2) { - (void)context; - return ((const _MBCSToUFallback *)fb1)->offset-((const _MBCSToUFallback *)fb2)->offset; -} -U_CDECL_END - -U_CAPI void U_EXPORT2 -ucm_optimizeStates(UCMStates *states, - uint16_t **pUnicodeCodeUnits, - _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks, - UBool verbose) { - UErrorCode errorCode; - int32_t state, cell, entry; - - /* test each state table entry */ - for(state=0; state<states->countStates; ++state) { - for(cell=0; cell<256; ++cell) { - entry=states->stateTable[state][cell]; - /* - * if the entry is a final one with an MBCS_STATE_VALID_DIRECT_16 action code - * and the code point is "unassigned" (0xfffe), then change it to - * the "unassigned" action code with bits 26..23 set to zero and U+fffe. - */ - if(MBCS_ENTRY_SET_STATE(entry, 0)==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, 0xfffe)) { - states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_UNASSIGNED); - } - } - } - - /* try to compact the toUnicode tables */ - if(states->maxCharLength==2) { - compactToUnicode2(states, pUnicodeCodeUnits, toUFallbacks, countToUFallbacks, verbose); - } else if(states->maxCharLength>2) { - if(verbose) { - compactToUnicodeHelper(states, *pUnicodeCodeUnits, toUFallbacks, countToUFallbacks); - } - } - - /* sort toUFallbacks */ - /* - * It should be safe to sort them before compactToUnicode2() is called, - * because it should not change the relative order of the offset values - * that it adjusts, but they need to be sorted at some point, and - * it is safest here. - */ - if(countToUFallbacks>0) { - errorCode=U_ZERO_ERROR; /* nothing bad will happen... */ - uprv_sortArray(toUFallbacks, countToUFallbacks, - sizeof(_MBCSToUFallback), - compareFallbacks, NULL, FALSE, &errorCode); - } -} - -/* use a complete state table ----------------------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -ucm_countChars(UCMStates *states, - const uint8_t *bytes, int32_t length) { - uint32_t offset; - int32_t i, entry, count; - uint8_t state; - - offset=0; - count=0; - state=0; - - if(states->countStates==0) { - fprintf(stderr, "ucm error: there is no state information!\n"); - return -1; - } - - /* for SI/SO (like EBCDIC-stateful), double-byte sequences start in state 1 */ - if(length==2 && states->outputType==MBCS_OUTPUT_2_SISO) { - state=1; - } - - /* - * Walk down the state table like in conversion, - * much like getNextUChar(). - * We assume that c<=0x10ffff. - */ - for(i=0; i<length; ++i) { - entry=states->stateTable[state][bytes[i]]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); - offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); - } else { - switch(MBCS_ENTRY_FINAL_ACTION(entry)) { - case MBCS_STATE_ILLEGAL: - fprintf(stderr, "ucm error: byte sequence ends in illegal state\n"); - return -1; - case MBCS_STATE_CHANGE_ONLY: - fprintf(stderr, "ucm error: byte sequence ends in state-change-only\n"); - return -1; - case MBCS_STATE_UNASSIGNED: - case MBCS_STATE_FALLBACK_DIRECT_16: - case MBCS_STATE_VALID_DIRECT_16: - case MBCS_STATE_FALLBACK_DIRECT_20: - case MBCS_STATE_VALID_DIRECT_20: - case MBCS_STATE_VALID_16: - case MBCS_STATE_VALID_16_PAIR: - /* count a complete character and prepare for a new one */ - ++count; - state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); - offset=0; - break; - default: - /* reserved, must never occur */ - fprintf(stderr, "ucm error: byte sequence reached reserved action code, entry: 0x%02lx\n", (unsigned long)entry); - return -1; - } - } - } - - if(offset!=0) { - fprintf(stderr, "ucm error: byte sequence too short, ends in non-final state %u\n", state); - return -1; - } - - /* - * for SI/SO (like EBCDIC-stateful), multiple-character results - * must consist of only double-byte sequences - */ - if(count>1 && states->outputType==MBCS_OUTPUT_2_SISO && length!=2*count) { - fprintf(stderr, "ucm error: SI/SO (like EBCDIC-stateful) result with %d characters does not contain all DBCS\n", (int)count); - return -1; - } - - return count; -} -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/udbgutil.cpp b/deps/node/deps/icu-small/source/tools/toolutil/udbgutil.cpp deleted file mode 100644 index 285f68a0..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/udbgutil.cpp +++ /dev/null @@ -1,754 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/******************************************************************** - * COPYRIGHT: - * Copyright (c) 2007-2016, International Business Machines Corporation and - * others. All Rights Reserved. - ********************************************************************/ - -#include "udbgutil.h" -#include <string.h> -#include "ustr_imp.h" -#include "cmemory.h" -#include "cstring.h" -#include "putilimp.h" -#include "unicode/ulocdata.h" -#include "unicode/ucnv.h" -#include "unicode/unistr.h" -#include "cstr.h" - -/* -To add a new enum type - (For example: UShoeSize with values USHOE_WIDE=0, USHOE_REGULAR, USHOE_NARROW, USHOE_COUNT) - - 0. Make sure that all lines you add are protected with appropriate uconfig guards, - such as '#if !UCONFIG_NO_SHOES'. - 1. udbgutil.h: add UDBG_UShoeSize to the UDebugEnumType enum before UDBG_ENUM_COUNT - ( The subsequent steps involve this file, udbgutil.cpp ) - 2. Find the marker "Add new enum types above this line" - 3. Before that marker, add a #include of any header file you need. - 4. Each enum type has three things in this section: a #define, a count_, and an array of Fields. - It may help to copy and paste a previous definition. - 5. In the case of the USHOE_... strings above, "USHOE_" is common to all values- six characters - " #define LEN_USHOE 6 " - 6 characters will strip off "USHOE_" leaving enum values of WIDE, REGULAR, and NARROW. - 6. Define the 'count_' variable, with the number of enum values. If the enum has a _MAX or _COUNT value, - that can be helpful for automatically defining the count. Otherwise define it manually. - " static const int32_t count_UShoeSize = USHOE_COUNT; " - 7. Define the field names, in order. - " static const Field names_UShoeSize[] = { - " FIELD_NAME_STR( LEN_USHOE, USHOE_WIDE ), - " FIELD_NAME_STR( LEN_USHOE, USHOE_REGULAR ), - " FIELD_NAME_STR( LEN_USHOE, USHOE_NARROW ), - " }; - ( The following command was usedfor converting ucol.h into partially correct entities ) - grep "^[ ]*UCOL" < unicode/ucol.h | - sed -e 's%^[ ]*\([A-Z]*\)_\([A-Z_]*\).*% FIELD_NAME_STR( LEN_\1, \1_\2 ),%g' - 8. Now, a bit farther down, add the name of the enum itself to the end of names_UDebugEnumType - ( UDebugEnumType is an enum, too!) - names_UDebugEnumType[] { ... - " FIELD_NAME_STR( LEN_UDBG, UDBG_UShoeSize ), " - 9. Find the function _udbg_enumCount and add the count macro: - " COUNT_CASE(UShoeSize) - 10. Find the function _udbg_enumFields and add the field macro: - " FIELD_CASE(UShoeSize) - 11. verify that your test code, and Java data generation, works properly. -*/ - -/** - * Structure representing an enum value - */ -struct Field { - int32_t prefix; /**< how many characters to remove in the prefix - i.e. UCHAR_ = 5 */ - const char *str; /**< The actual string value */ - int32_t num; /**< The numeric value */ -}; - -/** - * Define another field name. Used in an array of Field s - * @param y the common prefix length (i.e. 6 for "USHOE_" ) - * @param x the actual enum value - it will be copied in both string and symbolic form. - * @see Field - */ -#define FIELD_NAME_STR(y,x) { y, #x, x } - - -// TODO: Currently, this whole functionality goes away with UCONFIG_NO_FORMATTING. Should be split up. -#if !UCONFIG_NO_FORMATTING - -// Calendar -#include "unicode/ucal.h" - -// 'UCAL_' = 5 -#define LEN_UCAL 5 /* UCAL_ */ -static const int32_t count_UCalendarDateFields = UCAL_FIELD_COUNT; -static const Field names_UCalendarDateFields[] = -{ - FIELD_NAME_STR( LEN_UCAL, UCAL_ERA ), - FIELD_NAME_STR( LEN_UCAL, UCAL_YEAR ), - FIELD_NAME_STR( LEN_UCAL, UCAL_MONTH ), - FIELD_NAME_STR( LEN_UCAL, UCAL_WEEK_OF_YEAR ), - FIELD_NAME_STR( LEN_UCAL, UCAL_WEEK_OF_MONTH ), - FIELD_NAME_STR( LEN_UCAL, UCAL_DATE ), - FIELD_NAME_STR( LEN_UCAL, UCAL_DAY_OF_YEAR ), - FIELD_NAME_STR( LEN_UCAL, UCAL_DAY_OF_WEEK ), - FIELD_NAME_STR( LEN_UCAL, UCAL_DAY_OF_WEEK_IN_MONTH ), - FIELD_NAME_STR( LEN_UCAL, UCAL_AM_PM ), - FIELD_NAME_STR( LEN_UCAL, UCAL_HOUR ), - FIELD_NAME_STR( LEN_UCAL, UCAL_HOUR_OF_DAY ), - FIELD_NAME_STR( LEN_UCAL, UCAL_MINUTE ), - FIELD_NAME_STR( LEN_UCAL, UCAL_SECOND ), - FIELD_NAME_STR( LEN_UCAL, UCAL_MILLISECOND ), - FIELD_NAME_STR( LEN_UCAL, UCAL_ZONE_OFFSET ), - FIELD_NAME_STR( LEN_UCAL, UCAL_DST_OFFSET ), - FIELD_NAME_STR( LEN_UCAL, UCAL_YEAR_WOY ), - FIELD_NAME_STR( LEN_UCAL, UCAL_DOW_LOCAL ), - FIELD_NAME_STR( LEN_UCAL, UCAL_EXTENDED_YEAR ), - FIELD_NAME_STR( LEN_UCAL, UCAL_JULIAN_DAY ), - FIELD_NAME_STR( LEN_UCAL, UCAL_MILLISECONDS_IN_DAY ), - FIELD_NAME_STR( LEN_UCAL, UCAL_IS_LEAP_MONTH ), -}; - - -static const int32_t count_UCalendarMonths = UCAL_UNDECIMBER+1; -static const Field names_UCalendarMonths[] = -{ - FIELD_NAME_STR( LEN_UCAL, UCAL_JANUARY ), - FIELD_NAME_STR( LEN_UCAL, UCAL_FEBRUARY ), - FIELD_NAME_STR( LEN_UCAL, UCAL_MARCH ), - FIELD_NAME_STR( LEN_UCAL, UCAL_APRIL ), - FIELD_NAME_STR( LEN_UCAL, UCAL_MAY ), - FIELD_NAME_STR( LEN_UCAL, UCAL_JUNE ), - FIELD_NAME_STR( LEN_UCAL, UCAL_JULY ), - FIELD_NAME_STR( LEN_UCAL, UCAL_AUGUST ), - FIELD_NAME_STR( LEN_UCAL, UCAL_SEPTEMBER ), - FIELD_NAME_STR( LEN_UCAL, UCAL_OCTOBER ), - FIELD_NAME_STR( LEN_UCAL, UCAL_NOVEMBER ), - FIELD_NAME_STR( LEN_UCAL, UCAL_DECEMBER ), - FIELD_NAME_STR( LEN_UCAL, UCAL_UNDECIMBER) -}; - -#include "unicode/udat.h" - -#define LEN_UDAT 5 /* "UDAT_" */ -static const int32_t count_UDateFormatStyle = UDAT_SHORT+1; -static const Field names_UDateFormatStyle[] = -{ - FIELD_NAME_STR( LEN_UDAT, UDAT_FULL ), - FIELD_NAME_STR( LEN_UDAT, UDAT_LONG ), - FIELD_NAME_STR( LEN_UDAT, UDAT_MEDIUM ), - FIELD_NAME_STR( LEN_UDAT, UDAT_SHORT ), - /* end regular */ - /* - * negative enums.. leave out for now. - FIELD_NAME_STR( LEN_UDAT, UDAT_NONE ), - FIELD_NAME_STR( LEN_UDAT, UDAT_PATTERN ), - */ -}; - -#endif - -#include "unicode/uloc.h" - -#define LEN_UAR 12 /* "ULOC_ACCEPT_" */ -static const int32_t count_UAcceptResult = 3; -static const Field names_UAcceptResult[] = -{ - FIELD_NAME_STR( LEN_UAR, ULOC_ACCEPT_FAILED ), - FIELD_NAME_STR( LEN_UAR, ULOC_ACCEPT_VALID ), - FIELD_NAME_STR( LEN_UAR, ULOC_ACCEPT_FALLBACK ), -}; - -#if !UCONFIG_NO_COLLATION -#include "unicode/ucol.h" -#define LEN_UCOL 5 /* UCOL_ */ -static const int32_t count_UColAttributeValue = UCOL_ATTRIBUTE_VALUE_COUNT; -static const Field names_UColAttributeValue[] = { - FIELD_NAME_STR( LEN_UCOL, UCOL_PRIMARY ), - FIELD_NAME_STR( LEN_UCOL, UCOL_SECONDARY ), - FIELD_NAME_STR( LEN_UCOL, UCOL_TERTIARY ), -// FIELD_NAME_STR( LEN_UCOL, UCOL_CE_STRENGTH_LIMIT ), - FIELD_NAME_STR( LEN_UCOL, UCOL_QUATERNARY ), - // gap - FIELD_NAME_STR( LEN_UCOL, UCOL_IDENTICAL ), -// FIELD_NAME_STR( LEN_UCOL, UCOL_STRENGTH_LIMIT ), - FIELD_NAME_STR( LEN_UCOL, UCOL_OFF ), - FIELD_NAME_STR( LEN_UCOL, UCOL_ON ), - // gap - FIELD_NAME_STR( LEN_UCOL, UCOL_SHIFTED ), - FIELD_NAME_STR( LEN_UCOL, UCOL_NON_IGNORABLE ), - // gap - FIELD_NAME_STR( LEN_UCOL, UCOL_LOWER_FIRST ), - FIELD_NAME_STR( LEN_UCOL, UCOL_UPPER_FIRST ), -}; - -#endif - - -#if UCONFIG_ENABLE_PLUGINS -#include "unicode/icuplug.h" - -#define LEN_UPLUG_REASON 13 /* UPLUG_REASON_ */ -static const int32_t count_UPlugReason = UPLUG_REASON_COUNT; -static const Field names_UPlugReason[] = { - FIELD_NAME_STR( LEN_UPLUG_REASON, UPLUG_REASON_QUERY ), - FIELD_NAME_STR( LEN_UPLUG_REASON, UPLUG_REASON_LOAD ), - FIELD_NAME_STR( LEN_UPLUG_REASON, UPLUG_REASON_UNLOAD ), -}; - -#define LEN_UPLUG_LEVEL 12 /* UPLUG_LEVEL_ */ -static const int32_t count_UPlugLevel = UPLUG_LEVEL_COUNT; -static const Field names_UPlugLevel[] = { - FIELD_NAME_STR( LEN_UPLUG_LEVEL, UPLUG_LEVEL_INVALID ), - FIELD_NAME_STR( LEN_UPLUG_LEVEL, UPLUG_LEVEL_UNKNOWN ), - FIELD_NAME_STR( LEN_UPLUG_LEVEL, UPLUG_LEVEL_LOW ), - FIELD_NAME_STR( LEN_UPLUG_LEVEL, UPLUG_LEVEL_HIGH ), -}; -#endif - -#define LEN_UDBG 5 /* "UDBG_" */ -static const int32_t count_UDebugEnumType = UDBG_ENUM_COUNT; -static const Field names_UDebugEnumType[] = -{ - FIELD_NAME_STR( LEN_UDBG, UDBG_UDebugEnumType ), -#if !UCONFIG_NO_FORMATTING - FIELD_NAME_STR( LEN_UDBG, UDBG_UCalendarDateFields ), - FIELD_NAME_STR( LEN_UDBG, UDBG_UCalendarMonths ), - FIELD_NAME_STR( LEN_UDBG, UDBG_UDateFormatStyle ), -#endif -#if UCONFIG_ENABLE_PLUGINS - FIELD_NAME_STR( LEN_UDBG, UDBG_UPlugReason ), - FIELD_NAME_STR( LEN_UDBG, UDBG_UPlugLevel ), -#endif - FIELD_NAME_STR( LEN_UDBG, UDBG_UAcceptResult ), -#if !UCONFIG_NO_COLLATION - FIELD_NAME_STR( LEN_UDBG, UDBG_UColAttributeValue ), -#endif -}; - - -// --- Add new enum types above this line --- - -#define COUNT_CASE(x) case UDBG_##x: return (actual?count_##x:UPRV_LENGTHOF(names_##x)); -#define COUNT_FAIL_CASE(x) case UDBG_##x: return -1; - -#define FIELD_CASE(x) case UDBG_##x: return names_##x; -#define FIELD_FAIL_CASE(x) case UDBG_##x: return NULL; - -// low level - -/** - * @param type type of item - * @param actual TRUE: for the actual enum's type (UCAL_FIELD_COUNT, etc), or FALSE for the string count - */ -static int32_t _udbg_enumCount(UDebugEnumType type, UBool actual) { - switch(type) { - COUNT_CASE(UDebugEnumType) -#if !UCONFIG_NO_FORMATTING - COUNT_CASE(UCalendarDateFields) - COUNT_CASE(UCalendarMonths) - COUNT_CASE(UDateFormatStyle) -#endif -#if UCONFIG_ENABLE_PLUGINS - COUNT_CASE(UPlugReason) - COUNT_CASE(UPlugLevel) -#endif - COUNT_CASE(UAcceptResult) -#if !UCONFIG_NO_COLLATION - COUNT_CASE(UColAttributeValue) -#endif - // COUNT_FAIL_CASE(UNonExistentEnum) - default: - return -1; - } -} - -static const Field* _udbg_enumFields(UDebugEnumType type) { - switch(type) { - FIELD_CASE(UDebugEnumType) -#if !UCONFIG_NO_FORMATTING - FIELD_CASE(UCalendarDateFields) - FIELD_CASE(UCalendarMonths) - FIELD_CASE(UDateFormatStyle) -#endif -#if UCONFIG_ENABLE_PLUGINS - FIELD_CASE(UPlugReason) - FIELD_CASE(UPlugLevel) -#endif - FIELD_CASE(UAcceptResult) - // FIELD_FAIL_CASE(UNonExistentEnum) -#if !UCONFIG_NO_COLLATION - FIELD_CASE(UColAttributeValue) -#endif - default: - return NULL; - } -} - -// implementation - -int32_t udbg_enumCount(UDebugEnumType type) { - return _udbg_enumCount(type, FALSE); -} - -int32_t udbg_enumExpectedCount(UDebugEnumType type) { - return _udbg_enumCount(type, TRUE); -} - -const char * udbg_enumName(UDebugEnumType type, int32_t field) { - if(field<0 || - field>=_udbg_enumCount(type,FALSE)) { // also will catch unsupported items - return NULL; - } else { - const Field *fields = _udbg_enumFields(type); - if(fields == NULL) { - return NULL; - } else { - return fields[field].str + fields[field].prefix; - } - } -} - -int32_t udbg_enumArrayValue(UDebugEnumType type, int32_t field) { - if(field<0 || - field>=_udbg_enumCount(type,FALSE)) { // also will catch unsupported items - return -1; - } else { - const Field *fields = _udbg_enumFields(type); - if(fields == NULL) { - return -1; - } else { - return fields[field].num; - } - } -} - -int32_t udbg_enumByName(UDebugEnumType type, const char *value) { - if(type<0||type>=_udbg_enumCount(UDBG_UDebugEnumType, TRUE)) { - return -1; // type out of range - } - const Field *fields = _udbg_enumFields(type); - if (fields != NULL) { - for(int32_t field = 0;field<_udbg_enumCount(type, FALSE);field++) { - if(!strcmp(value, fields[field].str + fields[field].prefix)) { - return fields[field].num; - } - } - // try with the prefix - for(int32_t field = 0;field<_udbg_enumCount(type, FALSE);field++) { - if(!strcmp(value, fields[field].str)) { - return fields[field].num; - } - } - } - // fail - return -1; -} - -/* platform info */ -/** - * Print the current platform - */ -U_CAPI const char *udbg_getPlatform(void) -{ -#if U_PLATFORM_USES_ONLY_WIN32_API - return "Windows"; -#elif U_PLATFORM == U_PF_CYGWIN - return "Cygwin"; -#elif U_PLATFORM == U_PF_UNKNOWN - return "unknown"; -#elif U_PLATFORM == U_PF_DARWIN - return "Darwin"; -#elif U_PLATFORM == U_PF_BSD - return "BSD"; -#elif U_PLATFORM == U_PF_QNX - return "QNX"; -#elif U_PLATFORM == U_PF_LINUX - return "Linux"; -#elif U_PLATFORM == U_PF_ANDROID - return "Android"; -#elif U_PLATFORM == U_PF_CLASSIC_MACOS - return "MacOS (Classic)"; -#elif U_PLATFORM == U_PF_OS390 - return "IBM z"; -#elif U_PLATFORM == U_PF_OS400 - return "IBM i"; -#else - return "Other (POSIX-like)"; -#endif -} - -struct USystemParams; - -typedef int32_t U_CALLCONV USystemParameterCallback(const USystemParams *param, char *target, int32_t targetCapacity, UErrorCode *status); - -struct USystemParams { - const char *paramName; - USystemParameterCallback *paramFunction; - const char *paramStr; - int32_t paramInt; -}; - -/* parameter types */ -U_CAPI int32_t -paramEmpty(const USystemParams * /* param */, char *target, int32_t targetCapacity, UErrorCode *status) { - if(U_FAILURE(*status))return 0; - return u_terminateChars(target, targetCapacity, 0, status); -} - -U_CAPI int32_t -paramStatic(const USystemParams *param, char *target, int32_t targetCapacity, UErrorCode *status) { - if(param->paramStr==NULL) return paramEmpty(param,target,targetCapacity,status); - if(U_FAILURE(*status))return 0; - int32_t len = static_cast<int32_t>(uprv_strlen(param->paramStr)); - if(target!=NULL) { - uprv_strncpy(target,param->paramStr,uprv_min(len,targetCapacity)); - } - return u_terminateChars(target, targetCapacity, len, status); -} - -static const char *nullString = "(null)"; - -static int32_t stringToStringBuffer(char *target, int32_t targetCapacity, const char *str, UErrorCode *status) { - if(str==NULL) str=nullString; - - int32_t len = static_cast<int32_t>(uprv_strlen(str)); - if (U_SUCCESS(*status)) { - if(target!=NULL) { - uprv_strncpy(target,str,uprv_min(len,targetCapacity)); - } - } else { - const char *s = u_errorName(*status); - len = static_cast<int32_t>(uprv_strlen(s)); - if(target!=NULL) { - uprv_strncpy(target,s,uprv_min(len,targetCapacity)); - } - } - return u_terminateChars(target, targetCapacity, len, status); -} - -static int32_t integerToStringBuffer(char *target, int32_t targetCapacity, int32_t n, int32_t radix, UErrorCode *status) { - if(U_FAILURE(*status)) return 0; - char str[300]; - T_CString_integerToString(str,n,radix); - return stringToStringBuffer(target,targetCapacity,str,status); -} - -U_CAPI int32_t -paramInteger(const USystemParams *param, char *target, int32_t targetCapacity, UErrorCode *status) { - if(U_FAILURE(*status))return 0; - if(param->paramStr==NULL || param->paramStr[0]=='d') { - return integerToStringBuffer(target,targetCapacity,param->paramInt, 10,status); - } else if(param->paramStr[0]=='x') { - return integerToStringBuffer(target,targetCapacity,param->paramInt, 16,status); - } else if(param->paramStr[0]=='o') { - return integerToStringBuffer(target,targetCapacity,param->paramInt, 8,status); - } else if(param->paramStr[0]=='b') { - return integerToStringBuffer(target,targetCapacity,param->paramInt, 2,status); - } else { - *status = U_INTERNAL_PROGRAM_ERROR; - return 0; - } -} - - -U_CAPI int32_t -paramCldrVersion(const USystemParams * /* param */, char *target, int32_t targetCapacity, UErrorCode *status) { - if(U_FAILURE(*status))return 0; - char str[200]=""; - UVersionInfo icu; - - ulocdata_getCLDRVersion(icu, status); - if(U_SUCCESS(*status)) { - u_versionToString(icu, str); - return stringToStringBuffer(target,targetCapacity,str,status); - } else { - return 0; - } -} - - -#if !UCONFIG_NO_FORMATTING -U_CAPI int32_t -paramTimezoneDefault(const USystemParams * /* param */, char *target, int32_t targetCapacity, UErrorCode *status) { - if(U_FAILURE(*status))return 0; - UChar buf[100]; - char buf2[100]; - int32_t len; - - len = ucal_getDefaultTimeZone(buf, 100, status); - if(U_SUCCESS(*status)&&len>0) { - u_UCharsToChars(buf, buf2, len+1); - return stringToStringBuffer(target,targetCapacity, buf2,status); - } else { - return 0; - } -} -#endif - -U_CAPI int32_t -paramLocaleDefaultBcp47(const USystemParams * /* param */, char *target, int32_t targetCapacity, UErrorCode *status) { - if(U_FAILURE(*status))return 0; - const char *def = uloc_getDefault(); - return uloc_toLanguageTag(def,target,targetCapacity,FALSE,status); -} - - -/* simple 1-liner param functions */ -#define STRING_PARAM(func, str) U_CAPI int32_t \ - func(const USystemParams *, char *target, int32_t targetCapacity, UErrorCode *status) \ - { return stringToStringBuffer(target,targetCapacity,(str),status); } - -STRING_PARAM(paramIcudataPath, u_getDataDirectory()) -STRING_PARAM(paramPlatform, udbg_getPlatform()) -STRING_PARAM(paramLocaleDefault, uloc_getDefault()) -#if !UCONFIG_NO_CONVERSION -STRING_PARAM(paramConverterDefault, ucnv_getDefaultName()) -#endif - -#if !UCONFIG_NO_FORMATTING -STRING_PARAM(paramTimezoneVersion, ucal_getTZDataVersion(status)) -#endif - -static const USystemParams systemParams[] = { - { "copyright", paramStatic, U_COPYRIGHT_STRING,0 }, - { "product", paramStatic, "icu4c",0 }, - { "product.full", paramStatic, "International Components for Unicode for C/C++",0 }, - { "version", paramStatic, U_ICU_VERSION,0 }, - { "version.unicode", paramStatic, U_UNICODE_VERSION,0 }, - { "platform.number", paramInteger, "d",U_PLATFORM}, - { "platform.type", paramPlatform, NULL ,0}, - { "locale.default", paramLocaleDefault, NULL, 0}, - { "locale.default.bcp47", paramLocaleDefaultBcp47, NULL, 0}, -#if !UCONFIG_NO_CONVERSION - { "converter.default", paramConverterDefault, NULL, 0}, -#endif - { "icudata.name", paramStatic, U_ICUDATA_NAME, 0}, - { "icudata.path", paramIcudataPath, NULL, 0}, - - { "cldr.version", paramCldrVersion, NULL, 0}, - -#if !UCONFIG_NO_FORMATTING - { "tz.version", paramTimezoneVersion, NULL, 0}, - { "tz.default", paramTimezoneDefault, NULL, 0}, -#endif - - { "cpu.bits", paramInteger, "d", (sizeof(void*))*8}, - { "cpu.big_endian", paramInteger, "b", U_IS_BIG_ENDIAN}, - { "os.wchar_width", paramInteger, "d", U_SIZEOF_WCHAR_T}, - { "os.charset_family", paramInteger, "d", U_CHARSET_FAMILY}, -#if defined (U_HOST) - { "os.host", paramStatic, U_HOST, 0}, -#endif -#if defined (U_BUILD) - { "build.build", paramStatic, U_BUILD, 0}, -#endif -#if defined (U_CC) - { "build.cc", paramStatic, U_CC, 0}, -#endif -#if defined (U_CXX) - { "build.cxx", paramStatic, U_CXX, 0}, -#endif -#if defined (CYGWINMSVC) - { "build.cygwinmsvc", paramInteger, "b", 1}, -#endif - { "uconfig.internal_digitlist", paramInteger, "b", 1}, /* always 1 */ - { "uconfig.have_parseallinput", paramInteger, "b", UCONFIG_HAVE_PARSEALLINPUT}, - - -}; - -#define U_SYSPARAM_COUNT UPRV_LENGTHOF(systemParams) - -U_CAPI const char *udbg_getSystemParameterNameByIndex(int32_t i) { - if(i>=0 && i < (int32_t)U_SYSPARAM_COUNT) { - return systemParams[i].paramName; - } else { - return NULL; - } -} - - -U_CAPI int32_t udbg_getSystemParameterValueByIndex(int32_t i, char *buffer, int32_t bufferCapacity, UErrorCode *status) { - if(i>=0 && i< (int32_t)U_SYSPARAM_COUNT) { - return systemParams[i].paramFunction(&(systemParams[i]),buffer,bufferCapacity,status); - } else { - return 0; - } -} - -U_CAPI void udbg_writeIcuInfo(FILE *out) { - char str[2000]; - /* todo: API for writing DTD? */ - fprintf(out, " <icuSystemParams type=\"icu4c\">\n"); - const char *paramName; - for(int32_t i=0;(paramName=udbg_getSystemParameterNameByIndex(i))!=NULL;i++) { - UErrorCode status2 = U_ZERO_ERROR; - udbg_getSystemParameterValueByIndex(i, str,2000,&status2); - if(U_SUCCESS(status2)) { - fprintf(out," <param name=\"%s\">%s</param>\n", paramName,str); - } else { - fprintf(out," <!-- n=\"%s\" ERROR: %s -->\n", paramName, u_errorName(status2)); - } - } - fprintf(out, " </icuSystemParams>\n"); -} - -#define ICU_TRAC_URL "http://bugs.icu-project.org/trac/ticket/" -#define CLDR_TRAC_URL "http://unicode.org/cldr/trac/ticket/" -#define CLDR_TICKET_PREFIX "cldrbug:" - -U_CAPI char *udbg_knownIssueURLFrom(const char *ticket, char *buf) { - if( ticket==NULL ) { - return NULL; - } - - if( !strncmp(ticket, CLDR_TICKET_PREFIX, strlen(CLDR_TICKET_PREFIX)) ) { - strcpy( buf, CLDR_TRAC_URL ); - strcat( buf, ticket+strlen(CLDR_TICKET_PREFIX) ); - } else { - strcpy( buf, ICU_TRAC_URL ); - strcat( buf, ticket ); - } - return buf; -} - - -#include <set> -#include <map> -#include <string> -#include <ostream> -#include <iostream> - -class KnownIssues { -public: - KnownIssues(); - ~KnownIssues(); - void add(const char *ticket, const char *where, const UChar *msg, UBool *firstForTicket, UBool *firstForWhere); - void add(const char *ticket, const char *where, const char *msg, UBool *firstForTicket, UBool *firstForWhere); - UBool print(); -private: - std::map< std::string, - std::map < std::string, std::set < std::string > > > fTable; -}; - -KnownIssues::KnownIssues() - : fTable() -{ -} - -KnownIssues::~KnownIssues() -{ -} - -void KnownIssues::add(const char *ticket, const char *where, const UChar *msg, UBool *firstForTicket, UBool *firstForWhere) -{ - if(fTable.find(ticket) == fTable.end()) { - if(firstForTicket!=NULL) *firstForTicket = TRUE; - fTable[ticket] = std::map < std::string, std::set < std::string > >(); - } else { - if(firstForTicket!=NULL) *firstForTicket = FALSE; - } - if(where==NULL) return; - - if(fTable[ticket].find(where) == fTable[ticket].end()) { - if(firstForWhere!=NULL) *firstForWhere = TRUE; - fTable[ticket][where] = std::set < std::string >(); - } else { - if(firstForWhere!=NULL) *firstForWhere = FALSE; - } - if(msg==NULL || !*msg) return; - - const icu::UnicodeString ustr(msg); - - fTable[ticket][where].insert(std::string(icu::CStr(ustr)())); -} - -void KnownIssues::add(const char *ticket, const char *where, const char *msg, UBool *firstForTicket, UBool *firstForWhere) -{ - if(fTable.find(ticket) == fTable.end()) { - if(firstForTicket!=NULL) *firstForTicket = TRUE; - fTable[ticket] = std::map < std::string, std::set < std::string > >(); - } else { - if(firstForTicket!=NULL) *firstForTicket = FALSE; - } - if(where==NULL) return; - - if(fTable[ticket].find(where) == fTable[ticket].end()) { - if(firstForWhere!=NULL) *firstForWhere = TRUE; - fTable[ticket][where] = std::set < std::string >(); - } else { - if(firstForWhere!=NULL) *firstForWhere = FALSE; - } - if(msg==NULL || !*msg) return; - - std::string str(msg); - fTable[ticket][where].insert(str); -} - -UBool KnownIssues::print() -{ - if(fTable.empty()) { - return FALSE; - } - - std::cout << "KNOWN ISSUES" << std::endl; - for( std::map< std::string, - std::map < std::string, std::set < std::string > > >::iterator i = fTable.begin(); - i != fTable.end(); - i++ ) { - char URL[1024]; - std::cout << '#' << (*i).first << " <" << udbg_knownIssueURLFrom( (*i).first.c_str(), URL ) << ">" << std::endl; - - for( std::map< std::string, std::set < std::string > >::iterator ii = (*i).second.begin(); - ii != (*i).second.end(); - ii++ ) { - std::cout << " " << (*ii).first << std::endl; - for ( std::set < std::string >::iterator iii = (*ii).second.begin(); - iii != (*ii).second.end(); - iii++ ) { - std::cout << " " << '"' << (*iii) << '"' << std::endl; - } - } - } - return TRUE; -} - -U_CAPI void *udbg_knownIssue_openU(void *ptr, const char *ticket, char *where, const UChar *msg, UBool *firstForTicket, - UBool *firstForWhere) { - KnownIssues *t = static_cast<KnownIssues*>(ptr); - if(t==NULL) { - t = new KnownIssues(); - } - - t->add(ticket, where, msg, firstForTicket, firstForWhere); - - return static_cast<void*>(t); -} - -U_CAPI void *udbg_knownIssue_open(void *ptr, const char *ticket, char *where, const char *msg, UBool *firstForTicket, - UBool *firstForWhere) { - KnownIssues *t = static_cast<KnownIssues*>(ptr); - if(t==NULL) { - t = new KnownIssues(); - } - - t->add(ticket, where, msg, firstForTicket, firstForWhere); - - return static_cast<void*>(t); -} - -U_CAPI UBool udbg_knownIssue_print(void *ptr) { - KnownIssues *t = static_cast<KnownIssues*>(ptr); - if(t==NULL) { - return FALSE; - } else { - t->print(); - return TRUE; - } -} - -U_CAPI void udbg_knownIssue_close(void *ptr) { - KnownIssues *t = static_cast<KnownIssues*>(ptr); - delete t; -} diff --git a/deps/node/deps/icu-small/source/tools/toolutil/udbgutil.h b/deps/node/deps/icu-small/source/tools/toolutil/udbgutil.h deleted file mode 100644 index 2f186e6e..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/udbgutil.h +++ /dev/null @@ -1,155 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -************************************************************************ -* Copyright (c) 2008-2015, International Business Machines -* Corporation and others. All Rights Reserved. -************************************************************************ -*/ - -/** C Utilities to aid in debugging **/ - -#ifndef _UDBGUTIL_H -#define _UDBGUTIL_H - -#include "unicode/utypes.h" -#include <stdio.h> - -enum UDebugEnumType { - UDBG_UDebugEnumType = 0, /* Self-referential, strings for UDebugEnumType. Count=ENUM_COUNT. */ -#if !UCONFIG_NO_FORMATTING - UDBG_UCalendarDateFields, /* UCalendarDateFields. Count=UCAL_FIELD_COUNT. Unsupported if UCONFIG_NO_FORMATTING. */ - UDBG_UCalendarMonths, /* UCalendarMonths. Count= (UCAL_UNDECIMBER+1) */ - UDBG_UDateFormatStyle, /* Count = UDAT_SHORT=1 */ -#endif -#if UCONFIG_ENABLE_PLUGINS - UDBG_UPlugReason, /* Count = UPLUG_REASON_COUNT */ - UDBG_UPlugLevel, /* COUNT = UPLUG_LEVEL_COUNT */ -#endif - UDBG_UAcceptResult, /* Count = ULOC_ACCEPT_FALLBACK+1=3 */ - - /* All following enums may be discontiguous. */ - -#if !UCONFIG_NO_COLLATION - UDBG_UColAttributeValue, /* UCOL_ATTRIBUTE_VALUE_COUNT */ -#endif - UDBG_ENUM_COUNT, - UDBG_HIGHEST_CONTIGUOUS_ENUM = UDBG_UAcceptResult, /**< last enum in this list with contiguous (testable) values. */ - UDBG_INVALID_ENUM = -1 /** Invalid enum value **/ -}; - -typedef enum UDebugEnumType UDebugEnumType; - -/** - * @param type the type of enum - * Print how many enums are contained for this type. - * Should be equal to the appropriate _COUNT constant or there is an error. Return -1 if unsupported. - */ -U_CAPI int32_t U_EXPORT2 udbg_enumCount(UDebugEnumType type); - -/** - * Convert an enum to a string - * @param type type of enum - * @param field field number - * @return string of the format "ERA", "YEAR", etc, or NULL if out of range or unsupported - */ -U_CAPI const char * U_EXPORT2 udbg_enumName(UDebugEnumType type, int32_t field); - -/** - * for consistency checking - * @param type the type of enum - * Print how many enums should be contained for this type. - * This is equal to the appropriate _COUNT constant or there is an error. Returns -1 if unsupported. - */ -U_CAPI int32_t U_EXPORT2 udbg_enumExpectedCount(UDebugEnumType type); - -/** - * For consistency checking, returns the expected enum ordinal value for the given index value. - * @param type which type - * @param field field number - * @return should be equal to 'field' or -1 if out of range. - */ -U_CAPI int32_t U_EXPORT2 udbg_enumArrayValue(UDebugEnumType type, int32_t field); - -/** - * Locate the specified field value by name. - * @param type which type - * @param name name of string (case sensitive) - * @return should be a field value or -1 if not found. - */ -U_CAPI int32_t U_EXPORT2 udbg_enumByName(UDebugEnumType type, const char *name); - - -/** - * Return the Platform (U_PLATFORM) as a string - */ -U_CAPI const char *udbg_getPlatform(void); - -/** - * Get the nth system parameter's name - * @param i index of name, starting from zero - * @return name, or NULL if off the end - * @see udbg_getSystemParameterValue - */ -U_CAPI const char *udbg_getSystemParameterNameByIndex(int32_t i); - -/** - * Get the nth system parameter's value, in a user supplied buffer - * @parameter i index of value, starting from zero - * @param status error status - * @return length written (standard termination rules) - * @see udbg_getSystemParameterName - */ -U_CAPI int32_t udbg_getSystemParameterValueByIndex(int32_t i, char *buffer, int32_t bufferCapacity, UErrorCode *status); - -/** - * Write ICU info as XML - */ -U_CAPI void udbg_writeIcuInfo(FILE *f); - -/** - * \def UDBG_KNOWNISSUE_LEN - * Length of output buffer for udbg_knownIssueURLFrom - */ -#define UDBG_KNOWNISSUE_LEN 255 - -/** - * Convert a "known issue" string into a URL - * @param ticket ticket string such as "10245" or "cldrbug:5013" - * @param buf output buffer - must be UDBG_KNOWNISSUE_LEN in size - * @return pointer to output buffer, or NULL on err - */ -U_CAPI char *udbg_knownIssueURLFrom(const char *ticket, char *buf); - -/** - * Open (or reopen) a 'known issue' table. - * @param ptr pointer to 'table'. Opaque. - * @return new or existing ptr - */ -U_CAPI void *udbg_knownIssue_openU(void *ptr, const char *ticket, char *where, const UChar *msg, UBool *firstForTicket, - UBool *firstForWhere); - - -/** - * Open (or reopen) a 'known issue' table. - * @param ptr pointer to 'table'. Opaque. - * @return new or existing ptr - */ -U_CAPI void *udbg_knownIssue_open(void *ptr, const char *ticket, char *where, const char *msg, UBool *firstForTicket, - UBool *firstForWhere); - -/** - * Print 'known issue' table, to std::cout. - * @param ptr pointer from udbg_knownIssue - * @return TRUE if there were any issues. - */ -U_CAPI UBool udbg_knownIssue_print(void *ptr); - -/** - * Close 'known issue' table. - * @param ptr - */ -U_CAPI void udbg_knownIssue_close(void *ptr); - - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/unewdata.cpp b/deps/node/deps/icu-small/source/tools/toolutil/unewdata.cpp deleted file mode 100644 index 22d85408..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/unewdata.cpp +++ /dev/null @@ -1,275 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2010, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: unewdata.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999oct25 -* created by: Markus W. Scherer -*/ - -#include <stdio.h> -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "unicode/ustring.h" -#include "cmemory.h" -#include "cstring.h" -#include "filestrm.h" -#include "unicode/udata.h" -#include "unewdata.h" - -struct UNewDataMemory { - FileStream *file; - uint16_t headerSize; - uint8_t magic1, magic2; -}; - -U_CAPI UNewDataMemory * U_EXPORT2 -udata_create(const char *dir, const char *type, const char *name, - const UDataInfo *pInfo, - const char *comment, - UErrorCode *pErrorCode) { - UNewDataMemory *pData; - uint16_t headerSize, commentLength; - char filename[512]; - uint8_t bytes[16]; - int32_t length; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return NULL; - } else if(name==NULL || *name==0 || pInfo==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - /* allocate the data structure */ - pData=(UNewDataMemory *)uprv_malloc(sizeof(UNewDataMemory)); - if(pData==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - /* Check that the full path won't be too long */ - length = 0; /* Start with nothing */ - if(dir != NULL && *dir !=0) /* Add directory length if one was given */ - { - length += static_cast<int32_t>(strlen(dir)); - - /* Add 1 if dir doesn't end with path sep */ - if (dir[strlen(dir) - 1]!= U_FILE_SEP_CHAR) { - length++; - } - } - length += static_cast<int32_t>(strlen(name)); /* Add the filename length */ - - if(type != NULL && *type !=0) { /* Add directory length if given */ - length += static_cast<int32_t>(strlen(type)); - } - - - /* LDH buffer Length error check */ - if(length > ((int32_t)sizeof(filename) - 1)) - { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - uprv_free(pData); - return NULL; - } - - /* open the output file */ - if(dir!=NULL && *dir!=0) { /* if dir has a value, we prepend it to the filename */ - char *p=filename+strlen(dir); - uprv_strcpy(filename, dir); - if (*(p-1)!=U_FILE_SEP_CHAR) { - *p++=U_FILE_SEP_CHAR; - *p=0; - } - } else { /* otherwise, we'll output to the current dir */ - filename[0]=0; - } - uprv_strcat(filename, name); - if(type!=NULL && *type!=0) { - uprv_strcat(filename, "."); - uprv_strcat(filename, type); - } - pData->file=T_FileStream_open(filename, "wb"); - if(pData->file==NULL) { - uprv_free(pData); - *pErrorCode=U_FILE_ACCESS_ERROR; - return NULL; - } - - /* write the header information */ - headerSize=(uint16_t)(pInfo->size+4); - if(comment!=NULL && *comment!=0) { - commentLength=(uint16_t)(uprv_strlen(comment)+1); - headerSize+=commentLength; - } else { - commentLength=0; - } - - /* write the size of the header, take padding into account */ - pData->headerSize=(uint16_t)((headerSize+15)&~0xf); - pData->magic1=0xda; - pData->magic2=0x27; - T_FileStream_write(pData->file, &pData->headerSize, 4); - - /* write the information data */ - T_FileStream_write(pData->file, pInfo, pInfo->size); - - /* write the comment */ - if(commentLength>0) { - T_FileStream_write(pData->file, comment, commentLength); - } - - /* write padding bytes to align the data section to 16 bytes */ - headerSize&=0xf; - if(headerSize!=0) { - headerSize=(uint16_t)(16-headerSize); - uprv_memset(bytes, 0, headerSize); - T_FileStream_write(pData->file, bytes, headerSize); - } - - return pData; -} - -U_CAPI uint32_t U_EXPORT2 -udata_finish(UNewDataMemory *pData, UErrorCode *pErrorCode) { - uint32_t fileLength=0; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if(pData!=NULL) { - if(pData->file!=NULL) { - /* fflush(pData->file);*/ - fileLength=T_FileStream_size(pData->file); - if(T_FileStream_error(pData->file)) { - *pErrorCode=U_FILE_ACCESS_ERROR; - } else { - fileLength-=pData->headerSize; - } - T_FileStream_close(pData->file); - } - uprv_free(pData); - } - - return fileLength; -} - -/* dummy UDataInfo cf. udata.h */ -static const UDataInfo dummyDataInfo = { - sizeof(UDataInfo), - 0, - - U_IS_BIG_ENDIAN, - U_CHARSET_FAMILY, - U_SIZEOF_UCHAR, - 0, - - { 0, 0, 0, 0 }, /* dummy dataFormat */ - { 0, 0, 0, 0 }, /* dummy formatVersion */ - { 0, 0, 0, 0 } /* dummy dataVersion */ -}; - -U_CAPI void U_EXPORT2 -udata_createDummy(const char *dir, const char *type, const char *name, UErrorCode *pErrorCode) { - if(U_SUCCESS(*pErrorCode)) { - udata_finish(udata_create(dir, type, name, &dummyDataInfo, NULL, pErrorCode), pErrorCode); - if(U_FAILURE(*pErrorCode)) { - fprintf(stderr, "error %s writing dummy data file %s" U_FILE_SEP_STRING "%s.%s\n", - u_errorName(*pErrorCode), dir, name, type); - exit(*pErrorCode); - } - } -} - -U_CAPI void U_EXPORT2 -udata_write8(UNewDataMemory *pData, uint8_t byte) { - if(pData!=NULL && pData->file!=NULL) { - T_FileStream_write(pData->file, &byte, 1); - } -} - -U_CAPI void U_EXPORT2 -udata_write16(UNewDataMemory *pData, uint16_t word) { - if(pData!=NULL && pData->file!=NULL) { - T_FileStream_write(pData->file, &word, 2); - } -} - -U_CAPI void U_EXPORT2 -udata_write32(UNewDataMemory *pData, uint32_t wyde) { - if(pData!=NULL && pData->file!=NULL) { - T_FileStream_write(pData->file, &wyde, 4); - } -} - -U_CAPI void U_EXPORT2 -udata_writeBlock(UNewDataMemory *pData, const void *s, int32_t length) { - if(pData!=NULL && pData->file!=NULL) { - if(length>0) { - T_FileStream_write(pData->file, s, length); - } - } -} - -U_CAPI void U_EXPORT2 -udata_writePadding(UNewDataMemory *pData, int32_t length) { - static const uint8_t padding[16]={ - 0xaa, 0xaa, 0xaa, 0xaa, - 0xaa, 0xaa, 0xaa, 0xaa, - 0xaa, 0xaa, 0xaa, 0xaa, - 0xaa, 0xaa, 0xaa, 0xaa - }; - if(pData!=NULL && pData->file!=NULL) { - while(length>=16) { - T_FileStream_write(pData->file, padding, 16); - length-=16; - } - if(length>0) { - T_FileStream_write(pData->file, padding, length); - } - } -} - -U_CAPI void U_EXPORT2 -udata_writeString(UNewDataMemory *pData, const char *s, int32_t length) { - if(pData!=NULL && pData->file!=NULL) { - if(length==-1) { - length=(int32_t)uprv_strlen(s); - } - if(length>0) { - T_FileStream_write(pData->file, s, length); - } - } -} - -U_CAPI void U_EXPORT2 -udata_writeUString(UNewDataMemory *pData, const UChar *s, int32_t length) { - if(pData!=NULL && pData->file!=NULL) { - if(length==-1) { - length=u_strlen(s); - } - if(length>0) { - T_FileStream_write(pData->file, s, length*sizeof(UChar)); - } - } -} - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/deps/node/deps/icu-small/source/tools/toolutil/unewdata.h b/deps/node/deps/icu-small/source/tools/toolutil/unewdata.h deleted file mode 100644 index 137fb495..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/unewdata.h +++ /dev/null @@ -1,113 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2010, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: unewdata.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999oct25 -* created by: Markus W. Scherer -*/ - -#ifndef __UNEWDATA_H__ -#define __UNEWDATA_H__ - -#include "unicode/utypes.h" -#include "unicode/udata.h" - -/* API for writing data -----------------------------------------------------*/ - -/** @memo Forward declaration of the data memory creation type. */ -typedef struct UNewDataMemory UNewDataMemory; - -/** - * Create a new binary data file. - * The file-writing <code>udata_</code> functions facilitate writing - * binary data files that can be read by ICU's <code>udata</code> API. - * This function opens a new file with a filename determined from its - * parameters - of the form "name.type". - * It then writes a short header, followed by the <code>UDataInfo</code> - * structure and, optionally, by the comment string. - * It then writes padding bytes to round up to a multiple of 16 bytes. - * Subsequent write operations will thus start at an offset in the file - * that is a multiple of 16. <code>udata_getMemory()</code> will return - * a pointer to this same starting offset. - * - * See udata.h . - * - * @param dir A string that specifies the directory where the data will be - * written. If <code>NULL</code>, then - * <code>u_getDataDirectory</code> is used. - * @param type A string that specifies the type of data to be written. - * For example, resource bundles are written with type "res", - * conversion tables with type "cnv". - * This may be <code>NULL</code> or empty. - * @param name A string that specifies the name of the data. - * @param pInfo A pointer to a correctly filled <code>UDataInfo</code> - * structure that will be copied into the file. - * @param comment A string (e.g., a copyright statement) that will be - * copied into the file if it is not <code>NULL</code> - * or empty. This string serves only as a comment in the binary - * file. It will not be accessible by any API. - * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>. - */ -U_CAPI UNewDataMemory * U_EXPORT2 -udata_create(const char *dir, const char *type, const char *name, - const UDataInfo *pInfo, - const char *comment, - UErrorCode *pErrorCode); - -/** @memo Close a newly written binary file. */ -U_CAPI uint32_t U_EXPORT2 -udata_finish(UNewDataMemory *pData, UErrorCode *pErrorCode); - -/** @memo Write a dummy data file. */ -U_CAPI void U_EXPORT2 -udata_createDummy(const char *dir, const char *type, const char *name, UErrorCode *pErrorCode); - -/** @memo Write an 8-bit byte to the file. */ -U_CAPI void U_EXPORT2 -udata_write8(UNewDataMemory *pData, uint8_t byte); - -/** @memo Write a 16-bit word to the file. */ -U_CAPI void U_EXPORT2 -udata_write16(UNewDataMemory *pData, uint16_t word); - -/** @memo Write a 32-bit word to the file. */ -U_CAPI void U_EXPORT2 -udata_write32(UNewDataMemory *pData, uint32_t wyde); - -/** @memo Write a block of bytes to the file. */ -U_CAPI void U_EXPORT2 -udata_writeBlock(UNewDataMemory *pData, const void *s, int32_t length); - -/** @memo Write a block of arbitrary padding bytes to the file. */ -U_CAPI void U_EXPORT2 -udata_writePadding(UNewDataMemory *pData, int32_t length); - -/** @memo Write a <code>char*</code> string of platform "invariant characters" to the file. */ -U_CAPI void U_EXPORT2 -udata_writeString(UNewDataMemory *pData, const char *s, int32_t length); - -/** @memo Write a <code>UChar*</code> string of Unicode character code units to the file. */ -U_CAPI void U_EXPORT2 -udata_writeUString(UNewDataMemory *pData, const UChar *s, int32_t length); - - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/uoptions.cpp b/deps/node/deps/icu-small/source/tools/toolutil/uoptions.cpp deleted file mode 100644 index 53a77bcc..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/uoptions.cpp +++ /dev/null @@ -1,133 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2000-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uoptions.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000apr17 -* created by: Markus W. Scherer -* -* This file provides a command line argument parser. -*/ - -#include "unicode/utypes.h" -#include "cstring.h" -#include "uoptions.h" - -U_CAPI int U_EXPORT2 -u_parseArgs(int argc, char* argv[], - int optionCount, UOption options[]) { - char *arg; - int i=1, remaining=1; - char c, stopOptions=0; - - while(i<argc) { - arg=argv[i]; - if(!stopOptions && *arg=='-' && (c=arg[1])!=0) { - /* process an option */ - UOption *option=NULL; - arg+=2; - if(c=='-') { - /* process a long option */ - if(*arg==0) { - /* stop processing options after "--" */ - stopOptions=1; - } else { - /* search for the option string */ - int j; - for(j=0; j<optionCount; ++j) { - if(options[j].longName && uprv_strcmp(arg, options[j].longName)==0) { - option=options+j; - break; - } - } - if(option==NULL) { - /* no option matches */ - return -i; - } - option->doesOccur=1; - - if(option->hasArg!=UOPT_NO_ARG) { - /* parse the argument for the option, if any */ - if(i+1<argc && !(argv[i+1][0]=='-' && argv[i+1][1]!=0)) { - /* argument in the next argv[], and there is not an option in there */ - option->value=argv[++i]; - } else if(option->hasArg==UOPT_REQUIRES_ARG) { - /* there is no argument, but one is required: return with error */ - option->doesOccur=0; - return -i; - } - } - - if(option->optionFn!=NULL && option->optionFn(option->context, option)<0) { - /* the option function was called and returned an error */ - option->doesOccur=0; - return -i; - } - } - } else { - /* process one or more short options */ - do { - /* search for the option letter */ - int j; - for(j=0; j<optionCount; ++j) { - if(c==options[j].shortName) { - option=options+j; - break; - } - } - if(option==NULL) { - /* no option matches */ - return -i; - } - option->doesOccur=1; - - if(option->hasArg!=UOPT_NO_ARG) { - /* parse the argument for the option, if any */ - if(*arg!=0) { - /* argument following in the same argv[] */ - option->value=arg; - /* do not process the rest of this arg as option letters */ - break; - } else if(i+1<argc && !(argv[i+1][0]=='-' && argv[i+1][1]!=0)) { - /* argument in the next argv[], and there is not an option in there */ - option->value=argv[++i]; - /* this break is redundant because we know that *arg==0 */ - break; - } else if(option->hasArg==UOPT_REQUIRES_ARG) { - /* there is no argument, but one is required: return with error */ - option->doesOccur=0; - return -i; - } - } - - if(option->optionFn!=NULL && option->optionFn(option->context, option)<0) { - /* the option function was called and returned an error */ - option->doesOccur=0; - return -i; - } - - /* get the next option letter */ - option=NULL; - c=*arg++; - } while(c!=0); - } - - /* go to next argv[] */ - ++i; - } else { - /* move a non-option up in argv[] */ - argv[remaining++]=arg; - ++i; - } - } - return remaining; -} diff --git a/deps/node/deps/icu-small/source/tools/toolutil/uoptions.h b/deps/node/deps/icu-small/source/tools/toolutil/uoptions.h deleted file mode 100644 index a7a2e96c..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/uoptions.h +++ /dev/null @@ -1,143 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2000-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uoptions.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000apr17 -* created by: Markus W. Scherer -* -* This file provides a command line argument parser. -*/ - -#ifndef __UOPTIONS_H__ -#define __UOPTIONS_H__ - -#include "unicode/utypes.h" - -/* This should usually be called before calling u_parseArgs */ -/*#if U_PLATFORM == U_PF_OS390 && (U_CHARSET_FAMILY == U_ASCII_FAMILY)*/ - /* translate args from EBCDIC to ASCII */ -/*# define U_MAIN_INIT_ARGS(argc, argv) __argvtoascii_a(argc, argv)*/ -/*#elif defined(XP_MAC_CONSOLE)*/ -#if defined(XP_MAC_CONSOLE) -# include <console.h> - /* Get the arguments from the GUI, since old Macs don't have a console Window. */ -# define U_MAIN_INIT_ARGS(argc, argv) argc = ccommand((char***)&argv) -#else - /* Normally we do nothing. */ -# define U_MAIN_INIT_ARGS(argc, argv) -#endif - - - -/* forward declarations for the function declaration */ -struct UOption; -typedef struct UOption UOption; - -/* function to be called for a command line option */ -typedef int UOptionFn(void *context, UOption *option); - -/* values of UOption.hasArg */ -enum { UOPT_NO_ARG, UOPT_REQUIRES_ARG, UOPT_OPTIONAL_ARG }; - -/* structure describing a command line option */ -struct UOption { - const char *longName; /* "foo" for --foo */ - const char *value; /* output placeholder, will point to the argument string, if any */ - UOptionFn *optionFn; /* function to be called when this option occurs */ - void *context; /* parameter for the function */ - char shortName; /* 'f' for -f */ - char hasArg; /* enum value: option takes no/requires/may have argument */ - char doesOccur; /* boolean for "this one occured" */ -}; - -/* macro for an entry in a declaration of UOption[] */ -#define UOPTION_DEF(longName, shortName, hasArg) \ - { longName, NULL, NULL, NULL, shortName, hasArg, 0 } - -/* ICU Tools option definitions */ -#define UOPTION_HELP_H UOPTION_DEF("help", 'h', UOPT_NO_ARG) -#define UOPTION_HELP_QUESTION_MARK UOPTION_DEF("help", '?', UOPT_NO_ARG) -#define UOPTION_VERBOSE UOPTION_DEF("verbose", 'v', UOPT_NO_ARG) -#define UOPTION_QUIET UOPTION_DEF("quiet", 'q', UOPT_NO_ARG) -#define UOPTION_VERSION UOPTION_DEF("version", 'V', UOPT_NO_ARG) -#define UOPTION_COPYRIGHT UOPTION_DEF("copyright", 'c', UOPT_NO_ARG) - -#define UOPTION_DESTDIR UOPTION_DEF("destdir", 'd', UOPT_REQUIRES_ARG) -#define UOPTION_SOURCEDIR UOPTION_DEF("sourcedir", 's', UOPT_REQUIRES_ARG) -#define UOPTION_ENCODING UOPTION_DEF("encoding", 'e', UOPT_REQUIRES_ARG) -#define UOPTION_ICUDATADIR UOPTION_DEF("icudatadir", 'i', UOPT_REQUIRES_ARG) -#define UOPTION_WRITE_JAVA UOPTION_DEF("write-java", 'j', UOPT_OPTIONAL_ARG) -#define UOPTION_PACKAGE_NAME UOPTION_DEF("package-name", 'p', UOPT_REQUIRES_ARG) -#define UOPTION_BUNDLE_NAME UOPTION_DEF("bundle-name", 'b', UOPT_REQUIRES_ARG) - -/** - * C Command line argument parser. - * - * This function takes the argv[argc] command line and a description of - * the program's options in form of an array of UOption structures. - * Each UOption defines a long and a short name (a string and a character) - * for options like "--foo" and "-f". - * - * Each option is marked with whether it does not take an argument, - * requires one, or optionally takes one. The argument may follow in - * the same argv[] entry for short options, or it may always follow - * in the next argv[] entry. - * - * An argument is in the next argv[] entry for both long and short name - * options, except it is taken from directly behind the short name in - * its own argv[] entry if there are characters following the option letter. - * An argument in its own argv[] entry must not begin with a '-' - * unless it is only the '-' itself. There is no restriction of the - * argument format if it is part of the short name options's argv[] entry. - * - * The argument is stored in the value field of the corresponding - * UOption entry, and the doesOccur field is set to 1 if the option - * is found at all. - * - * Short name options without arguments can be collapsed into a single - * argv[] entry. After an option letter takes an argument, following - * letters will be taken as its argument. - * - * If the same option is found several times, then the last - * argument value will be stored in the value field. - * - * For each option, a function can be called. This could be used - * for options that occur multiple times and all arguments are to - * be collected. - * - * All options are removed from the argv[] array itself. If the parser - * is successful, then it returns the number of remaining non-option - * strings (including argv[0]). - * argv[0], the program name, is never read or modified. - * - * An option "--" ends option processing; everything after this - * remains in the argv[] array. - * - * An option string "-" alone is treated as a non-option. - * - * If an option is not recognized or an argument missing, then - * the parser returns with the negative index of the argv[] entry - * where the error was detected. - * - * The OS/400 compiler requires that argv either be "char* argv[]", - * or "const char* const argv[]", and it will not accept, - * "const char* argv[]" as a definition for main(). - * - * @param argv This parameter is modified - * @param options This parameter is modified - */ -U_CAPI int U_EXPORT2 -u_parseArgs(int argc, char* argv[], - int optionCount, UOption options[]); - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/uparse.cpp b/deps/node/deps/icu-small/source/tools/toolutil/uparse.cpp deleted file mode 100644 index a932c171..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/uparse.cpp +++ /dev/null @@ -1,383 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2000-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uparse.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000apr18 -* created by: Markus W. Scherer -* -* This file provides a parser for files that are delimited by one single -* character like ';' or TAB. Example: the Unicode Character Properties files -* like UnicodeData.txt are semicolon-delimited. -*/ - -#include "unicode/utypes.h" -#include "unicode/uchar.h" -#include "unicode/ustring.h" -#include "unicode/utf16.h" -#include "cstring.h" -#include "filestrm.h" -#include "uparse.h" -#include "ustr_imp.h" - -#include <stdio.h> - -U_CAPI const char * U_EXPORT2 -u_skipWhitespace(const char *s) { - while(U_IS_INV_WHITESPACE(*s)) { - ++s; - } - return s; -} - -U_CAPI char * U_EXPORT2 -u_rtrim(char *s) { - char *end=uprv_strchr(s, 0); - while(s<end && U_IS_INV_WHITESPACE(*(end-1))) { - *--end = 0; - } - return end; -} - -/* - * If the string starts with # @missing: then return the pointer to the - * following non-whitespace character. - * Otherwise return the original pointer. - * Unicode 5.0 adds such lines in some data files to document - * default property values. - * Poor man's regex for variable amounts of white space. - */ -static const char * -getMissingLimit(const char *s) { - const char *s0=s; - if( - *(s=u_skipWhitespace(s))=='#' && - *(s=u_skipWhitespace(s+1))=='@' && - 0==strncmp((s=u_skipWhitespace(s+1)), "missing", 7) && - *(s=u_skipWhitespace(s+7))==':' - ) { - return u_skipWhitespace(s+1); - } else { - return s0; - } -} - -U_CAPI void U_EXPORT2 -u_parseDelimitedFile(const char *filename, char delimiter, - char *fields[][2], int32_t fieldCount, - UParseLineFn *lineFn, void *context, - UErrorCode *pErrorCode) { - FileStream *file; - char line[10000]; - char *start, *limit; - int32_t i, length; - - if(U_FAILURE(*pErrorCode)) { - return; - } - - if(fields==NULL || lineFn==NULL || fieldCount<=0) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - if(filename==NULL || *filename==0 || (*filename=='-' && filename[1]==0)) { - filename=NULL; - file=T_FileStream_stdin(); - } else { - file=T_FileStream_open(filename, "r"); - } - if(file==NULL) { - *pErrorCode=U_FILE_ACCESS_ERROR; - return; - } - - while(T_FileStream_readLine(file, line, sizeof(line))!=NULL) { - /* remove trailing newline characters */ - length=(int32_t)(u_rtrim(line)-line); - - /* - * detect a line with # @missing: - * start parsing after that, or else from the beginning of the line - * set the default warning for @missing lines - */ - start=(char *)getMissingLimit(line); - if(start==line) { - *pErrorCode=U_ZERO_ERROR; - } else { - *pErrorCode=U_USING_DEFAULT_WARNING; - } - - /* skip this line if it is empty or a comment */ - if(*start==0 || *start=='#') { - continue; - } - - /* remove in-line comments */ - limit=uprv_strchr(start, '#'); - if(limit!=NULL) { - /* get white space before the pound sign */ - while(limit>start && U_IS_INV_WHITESPACE(*(limit-1))) { - --limit; - } - - /* truncate the line */ - *limit=0; - } - - /* skip lines with only whitespace */ - if(u_skipWhitespace(start)[0]==0) { - continue; - } - - /* for each field, call the corresponding field function */ - for(i=0; i<fieldCount; ++i) { - /* set the limit pointer of this field */ - limit=start; - while(*limit!=delimiter && *limit!=0) { - ++limit; - } - - /* set the field start and limit in the fields array */ - fields[i][0]=start; - fields[i][1]=limit; - - /* set start to the beginning of the next field, if any */ - start=limit; - if(*start!=0) { - ++start; - } else if(i+1<fieldCount) { - *pErrorCode=U_PARSE_ERROR; - limit=line+length; - i=fieldCount; - break; - } - } - - /* too few fields? */ - if(U_FAILURE(*pErrorCode)) { - break; - } - - /* call the field function */ - lineFn(context, fields, fieldCount, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - break; - } - } - - if(filename!=NULL) { - T_FileStream_close(file); - } -} - -/* - * parse a list of code points - * store them as a UTF-32 string in dest[destCapacity] - * return the number of code points - */ -U_CAPI int32_t U_EXPORT2 -u_parseCodePoints(const char *s, - uint32_t *dest, int32_t destCapacity, - UErrorCode *pErrorCode) { - char *end; - uint32_t value; - int32_t count; - - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - count=0; - for(;;) { - s=u_skipWhitespace(s); - if(*s==';' || *s==0) { - return count; - } - - /* read one code point */ - value=(uint32_t)uprv_strtoul(s, &end, 16); - if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) { - *pErrorCode=U_PARSE_ERROR; - return 0; - } - - /* append it to the destination array */ - if(count<destCapacity) { - dest[count++]=value; - } else { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - - /* go to the following characters */ - s=end; - } -} - -/* - * parse a list of code points - * store them as a string in dest[destCapacity] - * set the first code point in *pFirst - * @return The length of the string in numbers of UChars. - */ -U_CAPI int32_t U_EXPORT2 -u_parseString(const char *s, - UChar *dest, int32_t destCapacity, - uint32_t *pFirst, - UErrorCode *pErrorCode) { - char *end; - uint32_t value; - int32_t destLength; - - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(pFirst!=NULL) { - *pFirst=0xffffffff; - } - - destLength=0; - for(;;) { - s=u_skipWhitespace(s); - if(*s==';' || *s==0) { - if(destLength<destCapacity) { - dest[destLength]=0; - } else if(destLength==destCapacity) { - *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; - } else { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - return destLength; - } - - /* read one code point */ - value=(uint32_t)uprv_strtoul(s, &end, 16); - if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) { - *pErrorCode=U_PARSE_ERROR; - return 0; - } - - /* store the first code point */ - if(pFirst!=NULL) { - *pFirst=value; - pFirst=NULL; - } - - /* append it to the destination array */ - if((destLength+U16_LENGTH(value))<=destCapacity) { - U16_APPEND_UNSAFE(dest, destLength, value); - } else { - destLength+=U16_LENGTH(value); - } - - /* go to the following characters */ - s=end; - } -} - -/* read a range like start or start..end */ -U_CAPI int32_t U_EXPORT2 -u_parseCodePointRangeAnyTerminator(const char *s, - uint32_t *pStart, uint32_t *pEnd, - const char **terminator, - UErrorCode *pErrorCode) { - char *end; - uint32_t value; - - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(s==NULL || pStart==NULL || pEnd==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* read the start code point */ - s=u_skipWhitespace(s); - value=(uint32_t)uprv_strtoul(s, &end, 16); - if(end<=s || value>=0x110000) { - *pErrorCode=U_PARSE_ERROR; - return 0; - } - *pStart=*pEnd=value; - - /* is there a "..end"? */ - s=u_skipWhitespace(end); - if(*s!='.' || s[1]!='.') { - *terminator=end; - return 1; - } - s=u_skipWhitespace(s+2); - - /* read the end code point */ - value=(uint32_t)uprv_strtoul(s, &end, 16); - if(end<=s || value>=0x110000) { - *pErrorCode=U_PARSE_ERROR; - return 0; - } - *pEnd=value; - - /* is this a valid range? */ - if(value<*pStart) { - *pErrorCode=U_PARSE_ERROR; - return 0; - } - - *terminator=end; - return value-*pStart+1; -} - -U_CAPI int32_t U_EXPORT2 -u_parseCodePointRange(const char *s, - uint32_t *pStart, uint32_t *pEnd, - UErrorCode *pErrorCode) { - const char *terminator; - int32_t rangeLength= - u_parseCodePointRangeAnyTerminator(s, pStart, pEnd, &terminator, pErrorCode); - if(U_SUCCESS(*pErrorCode)) { - terminator=u_skipWhitespace(terminator); - if(*terminator!=';' && *terminator!=0) { - *pErrorCode=U_PARSE_ERROR; - return 0; - } - } - return rangeLength; -} - -U_CAPI int32_t U_EXPORT2 -u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status) { - const char *read = source; - int32_t i = 0; - unsigned int value = 0; - if(sLen == -1) { - sLen = (int32_t)strlen(source); - } - - while(read < source+sLen) { - sscanf(read, "%2x", &value); - if(i < destCapacity) { - dest[i] = (char)value; - } - i++; - read += 2; - } - return u_terminateChars(dest, destCapacity, i, status); -} diff --git a/deps/node/deps/icu-small/source/tools/toolutil/uparse.h b/deps/node/deps/icu-small/source/tools/toolutil/uparse.h deleted file mode 100644 index df0e79a2..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/uparse.h +++ /dev/null @@ -1,153 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2000-2010, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uparse.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000apr18 -* created by: Markus W. Scherer -* -* This file provides a parser for files that are delimited by one single -* character like ';' or TAB. Example: the Unicode Character Properties files -* like UnicodeData.txt are semicolon-delimited. -*/ - -#ifndef __UPARSE_H__ -#define __UPARSE_H__ - -#include "unicode/utypes.h" - -/** - * Is c an invariant-character whitespace? - * @param c invariant character - */ -#define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n') - -U_CDECL_BEGIN - -/** - * Skip space ' ' and TAB '\t' characters. - * - * @param s Pointer to characters. - * @return Pointer to first character at or after s that is not a space or TAB. - */ -U_CAPI const char * U_EXPORT2 -u_skipWhitespace(const char *s); - -/** - * Trim whitespace (including line endings) from the end of the string. - * - * @param s Pointer to the string. - * @return Pointer to the new end of the string. - */ -U_CAPI char * U_EXPORT2 -u_rtrim(char *s); - -/** Function type for u_parseDelimitedFile(). */ -typedef void U_CALLCONV -UParseLineFn(void *context, - char *fields[][2], - int32_t fieldCount, - UErrorCode *pErrorCode); - -/** - * Parser for files that are similar to UnicodeData.txt: - * This function opens the file and reads it line by line. It skips empty lines - * and comment lines that start with a '#'. - * All other lines are separated into fields with one delimiter character - * (semicolon for Unicode Properties files) between two fields. The last field in - * a line does not need to be terminated with a delimiter. - * - * For each line, after segmenting it, a line function is called. - * It gets passed the array of field start and limit pointers that is - * passed into this parser and filled by it for each line. - * For each field i of the line, the start pointer in fields[i][0] - * points to the beginning of the field, while the limit pointer in fields[i][1] - * points behind the field, i.e., to the delimiter or the line end. - * - * The context parameter of the line function is - * the same as the one for the parse function. - * - * The line function may modify the contents of the fields including the - * limit characters. - * - * If the file cannot be opened, or there is a parsing error or a field function - * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code. - */ -U_CAPI void U_EXPORT2 -u_parseDelimitedFile(const char *filename, char delimiter, - char *fields[][2], int32_t fieldCount, - UParseLineFn *lineFn, void *context, - UErrorCode *pErrorCode); - -/** - * Parse a string of code points like 0061 0308 0300. - * s must end with either ';' or NUL. - * - * @return Number of code points. - */ -U_CAPI int32_t U_EXPORT2 -u_parseCodePoints(const char *s, - uint32_t *dest, int32_t destCapacity, - UErrorCode *pErrorCode); - -/** - * Parse a list of code points like 0061 0308 0300 - * into a UChar * string. - * s must end with either ';' or NUL. - * - * Set the first code point in *pFirst. - * - * @param s Input char * string. - * @param dest Output string buffer. - * @param destCapacity Capacity of dest in numbers of UChars. - * @param pFirst If pFirst!=NULL the *pFirst will be set to the first - * code point in the string. - * @param pErrorCode ICU error code. - * @return The length of the string in numbers of UChars. - */ -U_CAPI int32_t U_EXPORT2 -u_parseString(const char *s, - UChar *dest, int32_t destCapacity, - uint32_t *pFirst, - UErrorCode *pErrorCode); - -/** - * Parse a code point range like - * 0085 or - * 4E00..9FA5. - * - * s must contain such a range and end with either ';' or NUL. - * - * @return Length of code point range, end-start+1 - */ -U_CAPI int32_t U_EXPORT2 -u_parseCodePointRange(const char *s, - uint32_t *pStart, uint32_t *pEnd, - UErrorCode *pErrorCode); - -/** - * Same as u_parseCodePointRange() but the range may be terminated by - * any character. The position of the terminating character is returned via - * the *terminator output parameter. - */ -U_CAPI int32_t U_EXPORT2 -u_parseCodePointRangeAnyTerminator(const char *s, - uint32_t *pStart, uint32_t *pEnd, - const char **terminator, - UErrorCode *pErrorCode); - -U_CAPI int32_t U_EXPORT2 -u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status); - -U_CDECL_END - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/writesrc.cpp b/deps/node/deps/icu-small/source/tools/toolutil/writesrc.cpp deleted file mode 100644 index 1a1dd396..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/writesrc.cpp +++ /dev/null @@ -1,345 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2005-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: writesrc.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2005apr23 -* created by: Markus W. Scherer -* -* Helper functions for writing source code for data. -*/ - -#include <stdio.h> -#include <time.h> -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "unicode/ucptrie.h" -#include "utrie2.h" -#include "cstring.h" -#include "writesrc.h" - -static FILE * -usrc_createWithHeader(const char *path, const char *filename, - const char *header, const char *generator) { - char buffer[1024]; - const char *p; - char *q; - FILE *f; - char c; - - if(path==NULL) { - p=filename; - } else { - /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */ - uprv_strcpy(buffer, path); - q=buffer+uprv_strlen(buffer); - if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) { - *q++=U_FILE_SEP_CHAR; - } - uprv_strcpy(q, filename); - p=buffer; - } - - f=fopen(p, "w"); - if(f!=NULL) { - const struct tm *lt; - time_t t; - - time(&t); - lt=localtime(&t); - if(generator==NULL) { - strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt); - fprintf(f, header, filename, buffer); - } else { - fprintf(f, header, filename, generator); - } - } else { - fprintf( - stderr, - "usrc_create(%s, %s): unable to create file\n", - path!=NULL ? path : "", filename); - } - return f; -} - -U_CAPI FILE * U_EXPORT2 -usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) { - const char *header; - char buffer[200]; - if(copyrightYear<=2016) { - header= - "// © 2016 and later: Unicode, Inc. and others.\n" - "// License & terms of use: http://www.unicode.org/copyright.html\n" - "//\n" - "// Copyright (C) 1999-2016, International Business Machines\n" - "// Corporation and others. All Rights Reserved.\n" - "//\n" - "// file name: %s\n" - "//\n" - "// machine-generated by: %s\n" - "\n\n"; - } else { - sprintf(buffer, - "// © %d and later: Unicode, Inc. and others.\n" - "// License & terms of use: http://www.unicode.org/copyright.html\n" - "//\n" - "// file name: %%s\n" - "//\n" - "// machine-generated by: %%s\n" - "\n\n", - (int)copyrightYear); - header=buffer; - } - return usrc_createWithHeader(path, filename, header, generator); -} - -U_CAPI FILE * U_EXPORT2 -usrc_createTextData(const char *path, const char *filename, const char *generator) { - // TODO: Add parameter for the first year this file was generated, not before 2016. - static const char *header= - "# Copyright (C) 2016 and later: Unicode, Inc. and others.\n" - "# License & terms of use: http://www.unicode.org/copyright.html\n" - "# Copyright (C) 1999-2016, International Business Machines\n" - "# Corporation and others. All Rights Reserved.\n" - "#\n" - "# file name: %s\n" - "#\n" - "# machine-generated by: %s\n" - "\n\n"; - return usrc_createWithHeader(path, filename, header, generator); -} - -U_CAPI void U_EXPORT2 -usrc_writeArray(FILE *f, - const char *prefix, - const void *p, int32_t width, int32_t length, - const char *postfix) { - const uint8_t *p8; - const uint16_t *p16; - const uint32_t *p32; - uint32_t value; - int32_t i, col; - - p8=NULL; - p16=NULL; - p32=NULL; - switch(width) { - case 8: - p8=(const uint8_t *)p; - break; - case 16: - p16=(const uint16_t *)p; - break; - case 32: - p32=(const uint32_t *)p; - break; - default: - fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width); - return; - } - if(prefix!=NULL) { - fprintf(f, prefix, (long)length); - } - for(i=col=0; i<length; ++i, ++col) { - if(i>0) { - if(col<16) { - fputc(',', f); - } else { - fputs(",\n", f); - col=0; - } - } - switch(width) { - case 8: - value=p8[i]; - break; - case 16: - value=p16[i]; - break; - case 32: - value=p32[i]; - break; - default: - value=0; /* unreachable */ - break; - } - fprintf(f, value<=9 ? "%lu" : "0x%lx", (unsigned long)value); - } - if(postfix!=NULL) { - fputs(postfix, f); - } -} - -U_CAPI void U_EXPORT2 -usrc_writeUTrie2Arrays(FILE *f, - const char *indexPrefix, const char *data32Prefix, - const UTrie2 *pTrie, - const char *postfix) { - if(pTrie->data32==NULL) { - /* 16-bit trie */ - usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, postfix); - } else { - /* 32-bit trie */ - usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, postfix); - usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, postfix); - } -} - -U_CAPI void U_EXPORT2 -usrc_writeUTrie2Struct(FILE *f, - const char *prefix, - const UTrie2 *pTrie, - const char *indexName, const char *data32Name, - const char *postfix) { - if(prefix!=NULL) { - fputs(prefix, f); - } - if(pTrie->data32==NULL) { - /* 16-bit trie */ - fprintf( - f, - " %s,\n" /* index */ - " %s+%ld,\n" /* data16 */ - " NULL,\n", /* data32 */ - indexName, - indexName, - (long)pTrie->indexLength); - } else { - /* 32-bit trie */ - fprintf( - f, - " %s,\n" /* index */ - " NULL,\n" /* data16 */ - " %s,\n", /* data32 */ - indexName, - data32Name); - } - fprintf( - f, - " %ld,\n" /* indexLength */ - " %ld,\n" /* dataLength */ - " 0x%hx,\n" /* index2NullOffset */ - " 0x%hx,\n" /* dataNullOffset */ - " 0x%lx,\n" /* initialValue */ - " 0x%lx,\n" /* errorValue */ - " 0x%lx,\n" /* highStart */ - " 0x%lx,\n" /* highValueIndex */ - " NULL, 0, FALSE, FALSE, 0, NULL\n", - (long)pTrie->indexLength, (long)pTrie->dataLength, - (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset, - (long)pTrie->initialValue, (long)pTrie->errorValue, - (long)pTrie->highStart, (long)pTrie->highValueIndex); - if(postfix!=NULL) { - fputs(postfix, f); - } -} - -U_CAPI void U_EXPORT2 -usrc_writeUCPTrieArrays(FILE *f, - const char *indexPrefix, const char *dataPrefix, - const UCPTrie *pTrie, - const char *postfix) { - usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, postfix); - int32_t width= - pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 : - pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 : - pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0; - usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, postfix); -} - -U_CAPI void U_EXPORT2 -usrc_writeUCPTrieStruct(FILE *f, - const char *prefix, - const UCPTrie *pTrie, - const char *indexName, const char *dataName, - const char *postfix) { - if(prefix!=NULL) { - fputs(prefix, f); - } - fprintf( - f, - " %s,\n" // index - " { %s },\n", // data (union) - indexName, - dataName); - fprintf( - f, - " %ld, %ld,\n" // indexLength, dataLength - " 0x%lx, 0x%x,\n" // highStart, shifted12HighStart - " %d, %d,\n" // type, valueWidth - " 0, 0,\n" // reserved32, reserved16 - " 0x%x, 0x%lx,\n" // index3NullOffset, dataNullOffset - " 0x%lx,\n", // nullValue - (long)pTrie->indexLength, (long)pTrie->dataLength, - (long)pTrie->highStart, pTrie->shifted12HighStart, - pTrie->type, pTrie->valueWidth, - pTrie->index3NullOffset, (long)pTrie->dataNullOffset, - (long)pTrie->nullValue); - if(postfix!=NULL) { - fputs(postfix, f); - } -} - -U_CAPI void U_EXPORT2 -usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie) { - int32_t width= - pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 : - pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 : - pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0; - char line[100], line2[100], line3[100]; - sprintf(line, "static const uint16_t %s_trieIndex[%%ld]={\n", name); - sprintf(line2, "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name); - usrc_writeUCPTrieArrays(f, line, line2, pTrie, "\n};\n\n"); - sprintf(line, "static const UCPTrie %s_trie={\n", name); - sprintf(line2, "%s_trieIndex", name); - sprintf(line3, "%s_trieData", name); - usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, "};\n\n"); -} - -U_CAPI void U_EXPORT2 -usrc_writeArrayOfMostlyInvChars(FILE *f, - const char *prefix, - const char *p, int32_t length, - const char *postfix) { - int32_t i, col; - int prev2, prev, c; - - if(prefix!=NULL) { - fprintf(f, prefix, (long)length); - } - prev2=prev=-1; - for(i=col=0; i<length; ++i, ++col) { - c=(uint8_t)p[i]; - if(i>0) { - /* Break long lines. Try to break at interesting places, to minimize revision diffs. */ - if( - /* Very long line. */ - col>=32 || - /* Long line, break after terminating NUL. */ - (col>=24 && prev2>=0x20 && prev==0) || - /* Medium-long line, break before non-NUL, non-character byte. */ - (col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20) - ) { - fputs(",\n", f); - col=0; - } else { - fputc(',', f); - } - } - fprintf(f, c<0x20 ? "%u" : "'%c'", c); - prev2=prev; - prev=c; - } - if(postfix!=NULL) { - fputs(postfix, f); - } -} diff --git a/deps/node/deps/icu-small/source/tools/toolutil/writesrc.h b/deps/node/deps/icu-small/source/tools/toolutil/writesrc.h deleted file mode 100644 index 35ba2567..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/writesrc.h +++ /dev/null @@ -1,122 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2005-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: writesrc.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2005apr23 -* created by: Markus W. Scherer -* -* Helper functions for writing source code for data. -*/ - -#ifndef __WRITESRC_H__ -#define __WRITESRC_H__ - -#include <stdio.h> -#include "unicode/utypes.h" -#include "unicode/ucptrie.h" -#include "utrie2.h" - -/** - * Creates a source text file and writes a header comment with the ICU copyright. - * Writes a C/Java-style comment with the generator name. - */ -U_CAPI FILE * U_EXPORT2 -usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator); - -/** - * Creates a source text file and writes a header comment with the ICU copyright. - * Writes the comment with # lines, as used in scripts and text data. - */ -U_CAPI FILE * U_EXPORT2 -usrc_createTextData(const char *path, const char *filename, const char *generator); - -/** - * Writes the contents of an array of 8/16/32-bit words. - * The prefix and postfix are optional (can be NULL) and are written first/last. - * The prefix may contain a %ld or similar field for the array length. - * The {} and declaration etc. need to be included in prefix/postfix or - * printed before and after the array contents. - */ -U_CAPI void U_EXPORT2 -usrc_writeArray(FILE *f, - const char *prefix, - const void *p, int32_t width, int32_t length, - const char *postfix); - -/** - * Calls usrc_writeArray() for the index and data arrays of a frozen UTrie2. - * Only the index array is written for a 16-bit UTrie2. In this case, dataPrefix - * is ignored and can be NULL. - */ -U_CAPI void U_EXPORT2 -usrc_writeUTrie2Arrays(FILE *f, - const char *indexPrefix, const char *dataPrefix, - const UTrie2 *pTrie, - const char *postfix); - -/** - * Writes the UTrie2 struct values. - * The {} and declaration etc. need to be included in prefix/postfix or - * printed before and after the array contents. - */ -U_CAPI void U_EXPORT2 -usrc_writeUTrie2Struct(FILE *f, - const char *prefix, - const UTrie2 *pTrie, - const char *indexName, const char *dataName, - const char *postfix); - -/** - * Calls usrc_writeArray() for the index and data arrays of a UCPTrie. - */ -U_CAPI void U_EXPORT2 -usrc_writeUCPTrieArrays(FILE *f, - const char *indexPrefix, const char *dataPrefix, - const UCPTrie *pTrie, - const char *postfix); - -/** - * Writes the UCPTrie struct values. - * The {} and declaration etc. need to be included in prefix/postfix or - * printed before and after the array contents. - */ -U_CAPI void U_EXPORT2 -usrc_writeUCPTrieStruct(FILE *f, - const char *prefix, - const UCPTrie *pTrie, - const char *indexName, const char *dataName, - const char *postfix); - -/** - * Writes the UCPTrie arrays and struct values. - */ -U_CAPI void U_EXPORT2 -usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie); - -/** - * Writes the contents of an array of mostly invariant characters. - * Characters 0..0x1f are printed as numbers, - * others as characters with single quotes: '%c'. - * - * The prefix and postfix are optional (can be NULL) and are written first/last. - * The prefix may contain a %ld or similar field for the array length. - * The {} and declaration etc. need to be included in prefix/postfix or - * printed before and after the array contents. - */ -U_CAPI void U_EXPORT2 -usrc_writeArrayOfMostlyInvChars(FILE *f, - const char *prefix, - const char *p, int32_t length, - const char *postfix); - -#endif diff --git a/deps/node/deps/icu-small/source/tools/toolutil/xmlparser.cpp b/deps/node/deps/icu-small/source/tools/toolutil/xmlparser.cpp deleted file mode 100644 index ae7ef170..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/xmlparser.cpp +++ /dev/null @@ -1,826 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2004-2010, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: xmlparser.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004jul21 -* created by: Andy Heninger -*/ - -#include <stdio.h> -#include "unicode/uchar.h" -#include "unicode/ucnv.h" -#include "unicode/regex.h" -#include "filestrm.h" -#include "xmlparser.h" - -#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION - -// character constants -enum { - x_QUOT=0x22, - x_AMP=0x26, - x_APOS=0x27, - x_LT=0x3c, - x_GT=0x3e, - x_l=0x6c -}; - -#define XML_SPACES "[ \\u0009\\u000d\\u000a]" - -// XML #4 -#define XML_NAMESTARTCHAR "[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" \ - "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" \ - "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" \ - "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" - -// XML #5 -#define XML_NAMECHAR "[" XML_NAMESTARTCHAR "\\-.[0-9]\\u00b7[\\u0300-\\u036f][\\u203f-\\u2040]]" - -// XML #6 -#define XML_NAME XML_NAMESTARTCHAR "(?:" XML_NAMECHAR ")*" - -U_NAMESPACE_BEGIN - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UXMLParser) -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UXMLElement) - -// -// UXMLParser constructor. Mostly just initializes the ICU regexes that are -// used for parsing. -// -UXMLParser::UXMLParser(UErrorCode &status) : - // XML Declaration. XML Production #23. - // example: "<?xml version=1.0 encoding="utf-16" ?> - // This is a sloppy implementation - just look for the leading <?xml and the closing ?> - // allow for a possible leading BOM. - mXMLDecl(UnicodeString("(?s)\\uFEFF?<\\?xml.+?\\?>", -1, US_INV), 0, status), - - // XML Comment production #15 - // example: "<!-- whatever --> - // note, does not detect an illegal "--" within comments - mXMLComment(UnicodeString("(?s)<!--.+?-->", -1, US_INV), 0, status), - - // XML Spaces - // production [3] - mXMLSP(UnicodeString(XML_SPACES "+", -1, US_INV), 0, status), - - // XML Doctype decl production #28 - // example "<!DOCTYPE foo SYSTEM "somewhere" > - // or "<!DOCTYPE foo [internal dtd]> - // TODO: we don't actually parse the DOCTYPE or internal subsets. - // Some internal dtd subsets could confuse this simple-minded - // attempt at skipping over them, specifically, occcurences - // of closeing square brackets. These could appear in comments, - // or in parameter entity declarations, for example. - mXMLDoctype(UnicodeString( - "(?s)<!DOCTYPE.*?(>|\\[.*?\\].*?>)", -1, US_INV - ), 0, status), - - // XML PI production #16 - // example "<?target stuff?> - mXMLPI(UnicodeString("(?s)<\\?.+?\\?>", -1, US_INV), 0, status), - - // XML Element Start Productions #40, #41 - // example <foo att1='abc' att2="d e f" > - // capture #1: the tag name - // - mXMLElemStart (UnicodeString("(?s)<(" XML_NAME ")" // match "<tag_name" - "(?:" - XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*" // match "ATTR_NAME = " - "(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))" // match '"attribute value"' - ")*" // * for zero or more attributes. - XML_SPACES "*?>", -1, US_INV), 0, status), // match " >" - - // XML Element End production #42 - // example </foo> - mXMLElemEnd (UnicodeString("</(" XML_NAME ")" XML_SPACES "*>", -1, US_INV), 0, status), - - // XML Element Empty production #44 - // example <foo att1="abc" att2="d e f" /> - mXMLElemEmpty (UnicodeString("(?s)<(" XML_NAME ")" // match "<tag_name" - "(?:" - XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*" // match "ATTR_NAME = " - "(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))" // match '"attribute value"' - ")*" // * for zero or more attributes. - XML_SPACES "*?/>", -1, US_INV), 0, status), // match " />" - - - // XMLCharData. Everything but '<'. Note that & will be dealt with later. - mXMLCharData(UnicodeString("(?s)[^<]*", -1, US_INV), 0, status), - - // Attribute name = "value". XML Productions 10, 40/41 - // Capture group 1 is name, - // 2 is the attribute value, including the quotes. - // - // Note that attributes are scanned twice. The first time is with - // the regex for an entire element start. There, the attributes - // are checked syntactically, but not separted out one by one. - // Here, we match a single attribute, and make its name and - // attribute value available to the parser code. - mAttrValue(UnicodeString(XML_SPACES "+(" XML_NAME ")" XML_SPACES "*=" XML_SPACES "*" - "((?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))", -1, US_INV), 0, status), - - - mAttrNormalizer(UnicodeString(XML_SPACES, -1, US_INV), 0, status), - - // Match any of the new-line sequences in content. - // All are changed to \u000a. - mNewLineNormalizer(UnicodeString("\\u000d\\u000a|\\u000d\\u0085|\\u000a|\\u000d|\\u0085|\\u2028", -1, US_INV), 0, status), - - // & char references - // We will figure out what we've got based on which capture group has content. - // The last one is a catchall for unrecognized entity references.. - // 1 2 3 4 5 6 7 8 - mAmps(UnicodeString("&(?:(amp;)|(lt;)|(gt;)|(apos;)|(quot;)|#x([0-9A-Fa-f]{1,8});|#([0-9]{1,8});|(.))"), - 0, status), - - fNames(status), - fElementStack(status), - fOneLF((UChar)0x0a) // Plain new-line string, used in new line normalization. - { - } - -UXMLParser * -UXMLParser::createParser(UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { - return NULL; - } else { - return new UXMLParser(errorCode); - } -} - -UXMLParser::~UXMLParser() {} - -UXMLElement * -UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) { - char bytes[4096], charsetBuffer[100]; - FileStream *f; - const char *charset, *pb; - UnicodeString src; - UConverter *cnv; - UChar *buffer, *pu; - int32_t fileLength, bytesLength, length, capacity; - UBool flush; - - if(U_FAILURE(errorCode)) { - return NULL; - } - - f=T_FileStream_open(filename, "rb"); - if(f==NULL) { - errorCode=U_FILE_ACCESS_ERROR; - return NULL; - } - - bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes)); - if(bytesLength<(int32_t)sizeof(bytes)) { - // we have already read the entire file - fileLength=bytesLength; - } else { - // get the file length - fileLength=T_FileStream_size(f); - } - - /* - * get the charset: - * 1. Unicode signature - * 2. treat as ISO-8859-1 and read XML encoding="charser" - * 3. default to UTF-8 - */ - charset=ucnv_detectUnicodeSignature(bytes, bytesLength, NULL, &errorCode); - if(U_SUCCESS(errorCode) && charset!=NULL) { - // open converter according to Unicode signature - cnv=ucnv_open(charset, &errorCode); - } else { - // read as Latin-1 and parse the XML declaration and encoding - cnv=ucnv_open("ISO-8859-1", &errorCode); - if(U_FAILURE(errorCode)) { - // unexpected error opening Latin-1 converter - goto exit; - } - - buffer=toUCharPtr(src.getBuffer(bytesLength)); - if(buffer==NULL) { - // unexpected failure to reserve some string capacity - errorCode=U_MEMORY_ALLOCATION_ERROR; - goto exit; - } - pb=bytes; - pu=buffer; - ucnv_toUnicode( - cnv, - &pu, buffer+src.getCapacity(), - &pb, bytes+bytesLength, - NULL, TRUE, &errorCode); - src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0); - ucnv_close(cnv); - cnv=NULL; - if(U_FAILURE(errorCode)) { - // unexpected error in conversion from Latin-1 - src.remove(); - goto exit; - } - - // parse XML declaration - if(mXMLDecl.reset(src).lookingAt(0, errorCode)) { - int32_t declEnd=mXMLDecl.end(errorCode); - // go beyond <?xml - int32_t pos=src.indexOf((UChar)x_l)+1; - - mAttrValue.reset(src); - while(pos<declEnd && mAttrValue.lookingAt(pos, errorCode)) { // loop runs once per attribute on this element. - UnicodeString attName = mAttrValue.group(1, errorCode); - UnicodeString attValue = mAttrValue.group(2, errorCode); - - // Trim the quotes from the att value. These are left over from the original regex - // that parsed the attribue, which couldn't conveniently strip them. - attValue.remove(0,1); // one char from the beginning - attValue.truncate(attValue.length()-1); // and one from the end. - - if(attName==UNICODE_STRING("encoding", 8)) { - length=attValue.extract(0, 0x7fffffff, charsetBuffer, (int32_t)sizeof(charsetBuffer)); - charset=charsetBuffer; - break; - } - pos = mAttrValue.end(2, errorCode); - } - - if(charset==NULL) { - // default to UTF-8 - charset="UTF-8"; - } - cnv=ucnv_open(charset, &errorCode); - } - } - - if(U_FAILURE(errorCode)) { - // unable to open the converter - goto exit; - } - - // convert the file contents - capacity=fileLength; // estimated capacity - src.getBuffer(capacity); - src.releaseBuffer(0); // zero length - flush=FALSE; - for(;;) { - // convert contents of bytes[bytesLength] - pb=bytes; - for(;;) { - length=src.length(); - buffer=toUCharPtr(src.getBuffer(capacity)); - if(buffer==NULL) { - // unexpected failure to reserve some string capacity - errorCode=U_MEMORY_ALLOCATION_ERROR; - goto exit; - } - - pu=buffer+length; - ucnv_toUnicode( - cnv, &pu, buffer+src.getCapacity(), - &pb, bytes+bytesLength, - NULL, FALSE, &errorCode); - src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0); - if(errorCode==U_BUFFER_OVERFLOW_ERROR) { - errorCode=U_ZERO_ERROR; - capacity=(3*src.getCapacity())/2; // increase capacity by 50% - } else { - break; - } - } - - if(U_FAILURE(errorCode)) { - break; // conversion error - } - - if(flush) { - break; // completely converted the file - } - - // read next block - bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes)); - if(bytesLength==0) { - // reached end of file, convert once more to flush the converter - flush=TRUE; - } - }; - -exit: - ucnv_close(cnv); - T_FileStream_close(f); - - if(U_SUCCESS(errorCode)) { - return parse(src, errorCode); - } else { - return NULL; - } -} - -UXMLElement * -UXMLParser::parse(const UnicodeString &src, UErrorCode &status) { - if(U_FAILURE(status)) { - return NULL; - } - - UXMLElement *root = NULL; - fPos = 0; // TODO use just a local pos variable and pass it into functions - // where necessary? - - // set all matchers to work on the input string - mXMLDecl.reset(src); - mXMLComment.reset(src); - mXMLSP.reset(src); - mXMLDoctype.reset(src); - mXMLPI.reset(src); - mXMLElemStart.reset(src); - mXMLElemEnd.reset(src); - mXMLElemEmpty.reset(src); - mXMLCharData.reset(src); - mAttrValue.reset(src); - mAttrNormalizer.reset(src); - mNewLineNormalizer.reset(src); - mAmps.reset(src); - - // Consume the XML Declaration, if present. - if (mXMLDecl.lookingAt(fPos, status)) { - fPos = mXMLDecl.end(status); - } - - // Consume "misc" [XML production 27] appearing before DocType - parseMisc(status); - - // Consume a DocType declaration, if present. - if (mXMLDoctype.lookingAt(fPos, status)) { - fPos = mXMLDoctype.end(status); - } - - // Consume additional "misc" [XML production 27] appearing after the DocType - parseMisc(status); - - // Get the root element - if (mXMLElemEmpty.lookingAt(fPos, status)) { - // Root is an empty element (no nested elements or content) - root = createElement(mXMLElemEmpty, status); - fPos = mXMLElemEmpty.end(status); - } else { - if (mXMLElemStart.lookingAt(fPos, status) == FALSE) { - error("Root Element expected", status); - goto errorExit; - } - root = createElement(mXMLElemStart, status); - UXMLElement *el = root; - - // - // This is the loop that consumes the root element of the document, - // including all nested content. Nested elements are handled by - // explicit pushes/pops of the element stack; there is no recursion - // in the control flow of this code. - // "el" always refers to the current element, the one to which content - // is being added. It is above the top of the element stack. - for (;;) { - // Nested Element Start - if (mXMLElemStart.lookingAt(fPos, status)) { - UXMLElement *t = createElement(mXMLElemStart, status); - el->fChildren.addElement(t, status); - t->fParent = el; - fElementStack.push(el, status); - el = t; - continue; - } - - // Text Content. String is concatenated onto the current node's content, - // but only if it contains something other than spaces. - UnicodeString s = scanContent(status); - if (s.length() > 0) { - mXMLSP.reset(s); - if (mXMLSP.matches(status) == FALSE) { - // This chunk of text contains something other than just - // white space. Make a child node for it. - replaceCharRefs(s, status); - el->fChildren.addElement(s.clone(), status); - } - mXMLSP.reset(src); // The matchers need to stay set to the main input string. - continue; - } - - // Comments. Discard. - if (mXMLComment.lookingAt(fPos, status)) { - fPos = mXMLComment.end(status); - continue; - } - - // PIs. Discard. - if (mXMLPI.lookingAt(fPos, status)) { - fPos = mXMLPI.end(status); - continue; - } - - // Element End - if (mXMLElemEnd.lookingAt(fPos, status)) { - fPos = mXMLElemEnd.end(0, status); - const UnicodeString name = mXMLElemEnd.group(1, status); - if (name != *el->fName) { - error("Element start / end tag mismatch", status); - goto errorExit; - } - if (fElementStack.empty()) { - // Close of the root element. We're done with the doc. - el = NULL; - break; - } - el = (UXMLElement *)fElementStack.pop(); - continue; - } - - // Empty Element. Stored as a child of the current element, but not stacked. - if (mXMLElemEmpty.lookingAt(fPos, status)) { - UXMLElement *t = createElement(mXMLElemEmpty, status); - el->fChildren.addElement(t, status); - continue; - } - - // Hit something within the document that doesn't match anything. - // It's an error. - error("Unrecognized markup", status); - break; - } - - if (el != NULL || !fElementStack.empty()) { - // We bailed out early, for some reason. - error("Root element not closed.", status); - goto errorExit; - } - } - - // Root Element parse is complete. - // Consume the annoying xml "Misc" that can appear at the end of the doc. - parseMisc(status); - - // We should have reached the end of the input - if (fPos != src.length()) { - error("Extra content at the end of the document", status); - goto errorExit; - } - - // Success! - return root; - -errorExit: - delete root; - return NULL; -} - -// -// createElement -// We've just matched an element start tag. Create and fill in a UXMLElement object -// for it. -// -UXMLElement * -UXMLParser::createElement(RegexMatcher &mEl, UErrorCode &status) { - // First capture group is the element's name. - UXMLElement *el = new UXMLElement(this, intern(mEl.group(1, status), status), status); - - // Scan for attributes. - int32_t pos = mEl.end(1, status); // The position after the end of the tag name - - while (mAttrValue.lookingAt(pos, status)) { // loop runs once per attribute on this element. - UnicodeString attName = mAttrValue.group(1, status); - UnicodeString attValue = mAttrValue.group(2, status); - - // Trim the quotes from the att value. These are left over from the original regex - // that parsed the attribue, which couldn't conveniently strip them. - attValue.remove(0,1); // one char from the beginning - attValue.truncate(attValue.length()-1); // and one from the end. - - // XML Attribue value normalization. - // This is one of the really screwy parts of the XML spec. - // See http://www.w3.org/TR/2004/REC-xml11-20040204/#AVNormalize - // Note that non-validating parsers must treat all entities as type CDATA - // which simplifies things some. - - // Att normalization step 1: normalize any newlines in the attribute value - mNewLineNormalizer.reset(attValue); - attValue = mNewLineNormalizer.replaceAll(fOneLF, status); - - // Next change all xml white space chars to plain \u0020 spaces. - mAttrNormalizer.reset(attValue); - UnicodeString oneSpace((UChar)0x0020); - attValue = mAttrNormalizer.replaceAll(oneSpace, status); - - // Replace character entities. - replaceCharRefs(attValue, status); - - // Save the attribute name and value in our document structure. - el->fAttNames.addElement((void *)intern(attName, status), status); - el->fAttValues.addElement(attValue.clone(), status); - pos = mAttrValue.end(2, status); - } - fPos = mEl.end(0, status); - return el; -} - -// -// parseMisc -// Consume XML "Misc" [production #27] -// which is any combination of space, PI and comments -// Need to watch end-of-input because xml MISC stuff is allowed after -// the document element, so we WILL scan off the end in this function -// -void -UXMLParser::parseMisc(UErrorCode &status) { - for (;;) { - if (fPos >= mXMLPI.input().length()) { - break; - } - if (mXMLPI.lookingAt(fPos, status)) { - fPos = mXMLPI.end(status); - continue; - } - if (mXMLSP.lookingAt(fPos, status)) { - fPos = mXMLSP.end(status); - continue; - } - if (mXMLComment.lookingAt(fPos, status)) { - fPos = mXMLComment.end(status); - continue; - } - break; - } -} - -// -// Scan for document content. -// -UnicodeString -UXMLParser::scanContent(UErrorCode &status) { - UnicodeString result; - if (mXMLCharData.lookingAt(fPos, status)) { - result = mXMLCharData.group((int32_t)0, status); - // Normalize the new-lines. (Before char ref substitution) - mNewLineNormalizer.reset(result); - result = mNewLineNormalizer.replaceAll(fOneLF, status); - - // TODO: handle CDATA - fPos = mXMLCharData.end(0, status); - } - - return result; -} - -// -// replaceCharRefs -// -// replace the char entities < & { ካ etc. in a string -// with the corresponding actual character. -// -void -UXMLParser::replaceCharRefs(UnicodeString &s, UErrorCode &status) { - UnicodeString result; - UnicodeString replacement; - int i; - - mAmps.reset(s); - // See the initialization for the regex matcher mAmps. - // Which entity we've matched is determined by which capture group has content, - // which is flaged by start() of that group not being -1. - while (mAmps.find()) { - if (mAmps.start(1, status) != -1) { - replacement.setTo((UChar)x_AMP); - } else if (mAmps.start(2, status) != -1) { - replacement.setTo((UChar)x_LT); - } else if (mAmps.start(3, status) != -1) { - replacement.setTo((UChar)x_GT); - } else if (mAmps.start(4, status) != -1) { - replacement.setTo((UChar)x_APOS); - } else if (mAmps.start(5, status) != -1) { - replacement.setTo((UChar)x_QUOT); - } else if (mAmps.start(6, status) != -1) { - UnicodeString hexString = mAmps.group(6, status); - UChar32 val = 0; - for (i=0; i<hexString.length(); i++) { - val = (val << 4) + u_digit(hexString.charAt(i), 16); - } - // TODO: some verification that the character is valid - replacement.setTo(val); - } else if (mAmps.start(7, status) != -1) { - UnicodeString decimalString = mAmps.group(7, status); - UChar32 val = 0; - for (i=0; i<decimalString.length(); i++) { - val = val*10 + u_digit(decimalString.charAt(i), 10); - } - // TODO: some verification that the character is valid - replacement.setTo(val); - } else { - // An unrecognized &entity; Leave it alone. - // TODO: check that it really looks like an entity, and is not some - // random & in the text. - replacement = mAmps.group((int32_t)0, status); - } - mAmps.appendReplacement(result, replacement, status); - } - mAmps.appendTail(result); - s = result; -} - -void -UXMLParser::error(const char *message, UErrorCode &status) { - // TODO: something better here... - const UnicodeString &src=mXMLDecl.input(); - int line = 0; - int ci = 0; - while (ci < fPos && ci>=0) { - ci = src.indexOf((UChar)0x0a, ci+1); - line++; - } - fprintf(stderr, "Error: %s at line %d\n", message, line); - if (U_SUCCESS(status)) { - status = U_PARSE_ERROR; - } -} - -// intern strings like in Java - -const UnicodeString * -UXMLParser::intern(const UnicodeString &s, UErrorCode &errorCode) { - const UHashElement *he=fNames.find(s); - if(he!=NULL) { - // already a known name, return its hashed key pointer - return (const UnicodeString *)he->key.pointer; - } else { - // add this new name and return its hashed key pointer - fNames.puti(s, 0, errorCode); - he=fNames.find(s); - return (const UnicodeString *)he->key.pointer; - } -} - -const UnicodeString * -UXMLParser::findName(const UnicodeString &s) const { - const UHashElement *he=fNames.find(s); - if(he!=NULL) { - // a known name, return its hashed key pointer - return (const UnicodeString *)he->key.pointer; - } else { - // unknown name - return NULL; - } -} - -// UXMLElement ------------------------------------------------------------- *** - -UXMLElement::UXMLElement(const UXMLParser *parser, const UnicodeString *name, UErrorCode &errorCode) : - fParser(parser), - fName(name), - fAttNames(errorCode), - fAttValues(errorCode), - fChildren(errorCode), - fParent(NULL) -{ -} - -UXMLElement::~UXMLElement() { - int i; - // attribute names are owned by the UXMLParser, don't delete them here - for (i=fAttValues.size()-1; i>=0; i--) { - delete (UObject *)fAttValues.elementAt(i); - } - for (i=fChildren.size()-1; i>=0; i--) { - delete (UObject *)fChildren.elementAt(i); - } -} - -const UnicodeString & -UXMLElement::getTagName() const { - return *fName; -} - -UnicodeString -UXMLElement::getText(UBool recurse) const { - UnicodeString text; - appendText(text, recurse); - return text; -} - -void -UXMLElement::appendText(UnicodeString &text, UBool recurse) const { - const UObject *node; - int32_t i, count=fChildren.size(); - for(i=0; i<count; ++i) { - node=(const UObject *)fChildren.elementAt(i); - const UnicodeString *s=dynamic_cast<const UnicodeString *>(node); - if(s!=NULL) { - text.append(*s); - } else if(recurse) /* must be a UXMLElement */ { - ((const UXMLElement *)node)->appendText(text, recurse); - } - } -} - -int32_t -UXMLElement::countAttributes() const { - return fAttNames.size(); -} - -const UnicodeString * -UXMLElement::getAttribute(int32_t i, UnicodeString &name, UnicodeString &value) const { - if(0<=i && i<fAttNames.size()) { - name.setTo(*(const UnicodeString *)fAttNames.elementAt(i)); - value.setTo(*(const UnicodeString *)fAttValues.elementAt(i)); - return &value; // or return (UnicodeString *)fAttValues.elementAt(i); - } else { - return NULL; - } -} - -const UnicodeString * -UXMLElement::getAttribute(const UnicodeString &name) const { - // search for the attribute name by comparing the interned pointer, - // not the string contents - const UnicodeString *p=fParser->findName(name); - if(p==NULL) { - return NULL; // no such attribute seen by the parser at all - } - - int32_t i, count=fAttNames.size(); - for(i=0; i<count; ++i) { - if(p==(const UnicodeString *)fAttNames.elementAt(i)) { - return (const UnicodeString *)fAttValues.elementAt(i); - } - } - return NULL; -} - -int32_t -UXMLElement::countChildren() const { - return fChildren.size(); -} - -const UObject * -UXMLElement::getChild(int32_t i, UXMLNodeType &type) const { - if(0<=i && i<fChildren.size()) { - const UObject *node=(const UObject *)fChildren.elementAt(i); - if(dynamic_cast<const UXMLElement *>(node)!=NULL) { - type=UXML_NODE_TYPE_ELEMENT; - } else { - type=UXML_NODE_TYPE_STRING; - } - return node; - } else { - return NULL; - } -} - -const UXMLElement * -UXMLElement::nextChildElement(int32_t &i) const { - if(i<0) { - return NULL; - } - - const UObject *node; - int32_t count=fChildren.size(); - while(i<count) { - node=(const UObject *)fChildren.elementAt(i++); - const UXMLElement *elem=dynamic_cast<const UXMLElement *>(node); - if(elem!=NULL) { - return elem; - } - } - return NULL; -} - -const UXMLElement * -UXMLElement::getChildElement(const UnicodeString &name) const { - // search for the element name by comparing the interned pointer, - // not the string contents - const UnicodeString *p=fParser->findName(name); - if(p==NULL) { - return NULL; // no such element seen by the parser at all - } - - const UObject *node; - int32_t i, count=fChildren.size(); - for(i=0; i<count; ++i) { - node=(const UObject *)fChildren.elementAt(i); - const UXMLElement *elem=dynamic_cast<const UXMLElement *>(node); - if(elem!=NULL) { - if(p==elem->fName) { - return elem; - } - } - } - return NULL; -} - -U_NAMESPACE_END - -#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ diff --git a/deps/node/deps/icu-small/source/tools/toolutil/xmlparser.h b/deps/node/deps/icu-small/source/tools/toolutil/xmlparser.h deleted file mode 100644 index 72f7ec8f..00000000 --- a/deps/node/deps/icu-small/source/tools/toolutil/xmlparser.h +++ /dev/null @@ -1,247 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2004-2005, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: xmlparser.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004jul21 -* created by: Andy Heninger -* -* Tiny XML parser using ICU and intended for use in ICU tests and in build tools. -* Not suitable for production use. Not supported. -* Not conformant. Not efficient. -* But very small. -*/ - -#ifndef __XMLPARSER_H__ -#define __XMLPARSER_H__ - -#include "unicode/uobject.h" -#include "unicode/unistr.h" -#include "unicode/regex.h" -#include "uvector.h" -#include "hash.h" - -#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION - -enum UXMLNodeType { - /** Node type string (text contents), stored as a UnicodeString. */ - UXML_NODE_TYPE_STRING, - /** Node type element, stored as a UXMLElement. */ - UXML_NODE_TYPE_ELEMENT, - UXML_NODE_TYPE_COUNT -}; - -U_NAMESPACE_BEGIN - -class UXMLParser; - -/** - * This class represents an element node in a parsed XML tree. - */ -class U_TOOLUTIL_API UXMLElement : public UObject { -public: - /** - * Destructor. - */ - virtual ~UXMLElement(); - - /** - * Get the tag name of this element. - */ - const UnicodeString &getTagName() const; - /** - * Get the text contents of the element. - * Append the contents of all text child nodes. - * @param recurse If TRUE, also recursively appends the contents of all - * text child nodes of element children. - * @return The text contents. - */ - UnicodeString getText(UBool recurse) const; - /** - * Get the number of attributes. - */ - int32_t countAttributes() const; - /** - * Get the i-th attribute. - * @param i Index of the attribute. - * @param name Output parameter, receives the attribute name. - * @param value Output parameter, receives the attribute value. - * @return A pointer to the attribute value (may be &value or a pointer to an - * internal string object), or NULL if i is out of bounds. - */ - const UnicodeString *getAttribute(int32_t i, UnicodeString &name, UnicodeString &value) const; - /** - * Get the value of the attribute with the given name. - * @param name Attribute name to be looked up. - * @return A pointer to the attribute value, or NULL if this element - * does not have this attribute. - */ - const UnicodeString *getAttribute(const UnicodeString &name) const; - /** - * Get the number of child nodes. - */ - int32_t countChildren() const; - /** - * Get the i-th child node. - * @param i Index of the child node. - * @param type The child node type. - * @return A pointer to the child node object, or NULL if i is out of bounds. - */ - const UObject *getChild(int32_t i, UXMLNodeType &type) const; - /** - * Get the next child element node, skipping non-element child nodes. - * @param i Enumeration index; initialize to 0 before getting the first child element. - * @return A pointer to the next child element, or NULL if there is none. - */ - const UXMLElement *nextChildElement(int32_t &i) const; - /** - * Get the immediate child element with the given name. - * If there are multiple child elements with this name, then return - * the first one. - * @param name Element name to be looked up. - * @return A pointer to the element node, or NULL if this element - * does not have this immediate child element. - */ - const UXMLElement *getChildElement(const UnicodeString &name) const; - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - */ - virtual UClassID getDynamicClassID() const; - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - */ - static UClassID U_EXPORT2 getStaticClassID(); - -private: - // prevent default construction etc. - UXMLElement(); - UXMLElement(const UXMLElement &other); - UXMLElement &operator=(const UXMLElement &other); - - void appendText(UnicodeString &text, UBool recurse) const; - - friend class UXMLParser; - - UXMLElement(const UXMLParser *parser, const UnicodeString *name, UErrorCode &errorCode); - - const UXMLParser *fParser; - const UnicodeString *fName; // The tag name of this element (owned by the UXMLParser) - UnicodeString fContent; // The text content of this node. All element content is - // concatenated even when there are intervening nested elements - // (which doesn't happen with most xml files we care about) - // Sections of content containing only white space are dropped, - // which gets rid the bogus white space content from - // elements which are primarily containers for nested elements. - UVector fAttNames; // A vector containing the names of this element's attributes - // The names are UnicodeString objects, owned by the UXMLParser. - UVector fAttValues; // A vector containing the attribute values for - // this element's attributes. The order is the same - // as that of the attribute name vector. - - UVector fChildren; // The child nodes of this element (a Vector) - - UXMLElement *fParent; // A pointer to the parent element of this element. -}; - -/** - * A simple XML parser; it is neither efficient nor conformant and only useful for - * restricted types of XML documents. - * - * The parse methods parse whole documents and return the parse trees via their - * root elements. - */ -class U_TOOLUTIL_API UXMLParser : public UObject { -public: - /** - * Create an XML parser. - */ - static UXMLParser *createParser(UErrorCode &errorCode); - /** - * Destructor. - */ - virtual ~UXMLParser(); - - /** - * Parse an XML document, create the entire document tree, and - * return a pointer to the root element of the parsed tree. - * The caller must delete the element. - */ - UXMLElement *parse(const UnicodeString &src, UErrorCode &errorCode); - /** - * Parse an XML file, create the entire document tree, and - * return a pointer to the root element of the parsed tree. - * The caller must delete the element. - */ - UXMLElement *parseFile(const char *filename, UErrorCode &errorCode); - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - */ - virtual UClassID getDynamicClassID() const; - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - */ - static UClassID U_EXPORT2 getStaticClassID(); - -private: - // prevent default construction etc. - UXMLParser(); - UXMLParser(const UXMLParser &other); - UXMLParser &operator=(const UXMLParser &other); - - // constructor - UXMLParser(UErrorCode &status); - - void parseMisc(UErrorCode &status); - UXMLElement *createElement(RegexMatcher &mEl, UErrorCode &status); - void error(const char *message, UErrorCode &status); - UnicodeString scanContent(UErrorCode &status); - void replaceCharRefs(UnicodeString &s, UErrorCode &status); - - const UnicodeString *intern(const UnicodeString &s, UErrorCode &errorCode); -public: - // public for UXMLElement only - const UnicodeString *findName(const UnicodeString &s) const; -private: - - // There is one ICU regex matcher for each of the major XML syntax items - // that are recognized. - RegexMatcher mXMLDecl; - RegexMatcher mXMLComment; - RegexMatcher mXMLSP; - RegexMatcher mXMLDoctype; - RegexMatcher mXMLPI; - RegexMatcher mXMLElemStart; - RegexMatcher mXMLElemEnd; - RegexMatcher mXMLElemEmpty; - RegexMatcher mXMLCharData; - RegexMatcher mAttrValue; - RegexMatcher mAttrNormalizer; - RegexMatcher mNewLineNormalizer; - RegexMatcher mAmps; - - Hashtable fNames; // interned element/attribute name strings - UStack fElementStack; // Stack holds the parent elements when nested - // elements are being parsed. All items on this - // stack are of type UXMLElement. - int32_t fPos; // String index of the current scan position in - // xml source (in fSrc). - UnicodeString fOneLF; -}; - -U_NAMESPACE_END -#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ - -#endif |