diff options
Diffstat (limited to 'deps/node/deps/icu-small/source/common/punycode.cpp')
-rw-r--r-- | deps/node/deps/icu-small/source/common/punycode.cpp | 589 |
1 files changed, 0 insertions, 589 deletions
diff --git a/deps/node/deps/icu-small/source/common/punycode.cpp b/deps/node/deps/icu-small/source/common/punycode.cpp deleted file mode 100644 index 4f0b9ea9..00000000 --- a/deps/node/deps/icu-small/source/common/punycode.cpp +++ /dev/null @@ -1,589 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: punycode.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jan31 -* created by: Markus W. Scherer -*/ - - -/* This ICU code derived from: */ -/* -punycode.c 0.4.0 (2001-Nov-17-Sat) -http://www.cs.berkeley.edu/~amc/idn/ -Adam M. Costello -http://www.nicemice.net/amc/ - -Disclaimer and license - - Regarding this entire document or any portion of it (including - the pseudocode and C code), the author makes no guarantees and - is not responsible for any damage resulting from its use. The - author grants irrevocable permission to anyone to use, modify, - and distribute it in any way that does not diminish the rights - of anyone else to use, modify, and distribute it, provided that - redistributed derivative works do not contain misleading author or - version information. Derivative works need not be licensed under - similar terms. -*/ -/* - * ICU modifications: - * - ICU data types and coding conventions - * - ICU string buffer handling with implicit source lengths - * and destination preflighting - * - UTF-16 handling - */ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_IDNA - -#include "unicode/ustring.h" -#include "unicode/utf.h" -#include "unicode/utf16.h" -#include "ustr_imp.h" -#include "cstring.h" -#include "cmemory.h" -#include "punycode.h" -#include "uassert.h" - - -/* Punycode ----------------------------------------------------------------- */ - -/* Punycode parameters for Bootstring */ -#define BASE 36 -#define TMIN 1 -#define TMAX 26 -#define SKEW 38 -#define DAMP 700 -#define INITIAL_BIAS 72 -#define INITIAL_N 0x80 - -/* "Basic" Unicode/ASCII code points */ -#define _HYPHEN 0X2d -#define DELIMITER _HYPHEN - -#define _ZERO_ 0X30 -#define _NINE 0x39 - -#define _SMALL_A 0X61 -#define _SMALL_Z 0X7a - -#define _CAPITAL_A 0X41 -#define _CAPITAL_Z 0X5a - -#define IS_BASIC(c) ((c)<0x80) -#define IS_BASIC_UPPERCASE(c) (_CAPITAL_A<=(c) && (c)<=_CAPITAL_Z) - -/** - * digitToBasic() returns the basic code point whose value - * (when used for representing integers) is d, which must be in the - * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is - * nonzero, in which case the uppercase form is used. - */ -static inline char -digitToBasic(int32_t digit, UBool uppercase) { - /* 0..25 map to ASCII a..z or A..Z */ - /* 26..35 map to ASCII 0..9 */ - if(digit<26) { - if(uppercase) { - return (char)(_CAPITAL_A+digit); - } else { - return (char)(_SMALL_A+digit); - } - } else { - return (char)((_ZERO_-26)+digit); - } -} - -/** - * basicToDigit[] contains the numeric value of a basic code - * point (for use in representing integers) in the range 0 to - * BASE-1, or -1 if b is does not represent a value. - */ -static const int8_t -basicToDigit[256]={ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, - - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, - - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 -}; - -static inline char -asciiCaseMap(char b, UBool uppercase) { - if(uppercase) { - if(_SMALL_A<=b && b<=_SMALL_Z) { - b-=(_SMALL_A-_CAPITAL_A); - } - } else { - if(_CAPITAL_A<=b && b<=_CAPITAL_Z) { - b+=(_SMALL_A-_CAPITAL_A); - } - } - return b; -} - -/* Punycode-specific Bootstring code ---------------------------------------- */ - -/* - * The following code omits the {parts} of the pseudo-algorithm in the spec - * that are not used with the Punycode parameter set. - */ - -/* Bias adaptation function. */ -static int32_t -adaptBias(int32_t delta, int32_t length, UBool firstTime) { - int32_t count; - - if(firstTime) { - delta/=DAMP; - } else { - delta/=2; - } - - delta+=delta/length; - for(count=0; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) { - delta/=(BASE-TMIN); - } - - return count+(((BASE-TMIN+1)*delta)/(delta+SKEW)); -} - -#define MAX_CP_COUNT 200 - -U_CFUNC int32_t -u_strToPunycode(const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, - const UBool *caseFlags, - UErrorCode *pErrorCode) { - - int32_t cpBuffer[MAX_CP_COUNT]; - int32_t n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount; - UChar c, c2; - - /* argument checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* - * Handle the basic code points and - * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit): - */ - srcCPCount=destLength=0; - if(srcLength==-1) { - /* NUL-terminated input */ - for(j=0; /* no condition */; ++j) { - if((c=src[j])==0) { - break; - } - if(srcCPCount==MAX_CP_COUNT) { - /* too many input code points */ - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - if(IS_BASIC(c)) { - cpBuffer[srcCPCount++]=0; - if(destLength<destCapacity) { - dest[destLength]= - caseFlags!=NULL ? - asciiCaseMap((char)c, caseFlags[j]) : - (char)c; - } - ++destLength; - } else { - n=(caseFlags!=NULL && caseFlags[j])<<31L; - if(U16_IS_SINGLE(c)) { - n|=c; - } else if(U16_IS_LEAD(c) && U16_IS_TRAIL(c2=src[j+1])) { - ++j; - n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2); - } else { - /* error: unmatched surrogate */ - *pErrorCode=U_INVALID_CHAR_FOUND; - return 0; - } - cpBuffer[srcCPCount++]=n; - } - } - } else { - /* length-specified input */ - for(j=0; j<srcLength; ++j) { - if(srcCPCount==MAX_CP_COUNT) { - /* too many input code points */ - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - c=src[j]; - if(IS_BASIC(c)) { - cpBuffer[srcCPCount++]=0; - if(destLength<destCapacity) { - dest[destLength]= - caseFlags!=NULL ? - asciiCaseMap((char)c, caseFlags[j]) : - (char)c; - } - ++destLength; - } else { - n=(caseFlags!=NULL && caseFlags[j])<<31L; - if(U16_IS_SINGLE(c)) { - n|=c; - } else if(U16_IS_LEAD(c) && (j+1)<srcLength && U16_IS_TRAIL(c2=src[j+1])) { - ++j; - n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2); - } else { - /* error: unmatched surrogate */ - *pErrorCode=U_INVALID_CHAR_FOUND; - return 0; - } - cpBuffer[srcCPCount++]=n; - } - } - } - - /* Finish the basic string - if it is not empty - with a delimiter. */ - basicLength=destLength; - if(basicLength>0) { - if(destLength<destCapacity) { - dest[destLength]=DELIMITER; - } - ++destLength; - } - - /* - * handledCPCount is the number of code points that have been handled - * basicLength is the number of basic code points - * destLength is the number of chars that have been output - */ - - /* Initialize the state: */ - n=INITIAL_N; - delta=0; - bias=INITIAL_BIAS; - - /* Main encoding loop: */ - for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) { - /* - * All non-basic code points < n have been handled already. - * Find the next larger one: - */ - for(m=0x7fffffff, j=0; j<srcCPCount; ++j) { - q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */ - if(n<=q && q<m) { - m=q; - } - } - - /* - * Increase delta enough to advance the decoder's - * <n,i> state to <m,0>, but guard against overflow: - */ - if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) { - *pErrorCode=U_INTERNAL_PROGRAM_ERROR; - return 0; - } - delta+=(m-n)*(handledCPCount+1); - n=m; - - /* Encode a sequence of same code points n */ - for(j=0; j<srcCPCount; ++j) { - q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */ - if(q<n) { - ++delta; - } else if(q==n) { - /* Represent delta as a generalized variable-length integer: */ - for(q=delta, k=BASE; /* no condition */; k+=BASE) { - - /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt - - t=k-bias; - if(t<TMIN) { - t=TMIN; - } else if(t>TMAX) { - t=TMAX; - } - */ - - t=k-bias; - if(t<TMIN) { - t=TMIN; - } else if(k>=(bias+TMAX)) { - t=TMAX; - } - - if(q<t) { - break; - } - - if(destLength<destCapacity) { - dest[destLength]=digitToBasic(t+(q-t)%(BASE-t), 0); - } - ++destLength; - q=(q-t)/(BASE-t); - } - - if(destLength<destCapacity) { - dest[destLength]=digitToBasic(q, (UBool)(cpBuffer[j]<0)); - } - ++destLength; - bias=adaptBias(delta, handledCPCount+1, (UBool)(handledCPCount==basicLength)); - delta=0; - ++handledCPCount; - } - } - - ++delta; - ++n; - } - - return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); -} - -U_CFUNC int32_t -u_strFromPunycode(const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, - UBool *caseFlags, - UErrorCode *pErrorCode) { - int32_t n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t, - destCPCount, firstSupplementaryIndex, cpLength; - UChar b; - - /* argument checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(srcLength==-1) { - srcLength=u_strlen(src); - } - - /* - * Handle the basic code points: - * Let basicLength be the number of input code points - * before the last delimiter, or 0 if there is none, - * then copy the first basicLength code points to the output. - * - * The two following loops iterate backward. - */ - for(j=srcLength; j>0;) { - if(src[--j]==DELIMITER) { - break; - } - } - destLength=basicLength=destCPCount=j; - U_ASSERT(destLength>=0); - - while(j>0) { - b=src[--j]; - if(!IS_BASIC(b)) { - *pErrorCode=U_INVALID_CHAR_FOUND; - return 0; - } - - if(j<destCapacity) { - dest[j]=(UChar)b; - - if(caseFlags!=NULL) { - caseFlags[j]=IS_BASIC_UPPERCASE(b); - } - } - } - - /* Initialize the state: */ - n=INITIAL_N; - i=0; - bias=INITIAL_BIAS; - firstSupplementaryIndex=1000000000; - - /* - * Main decoding loop: - * Start just after the last delimiter if any - * basic code points were copied; start at the beginning otherwise. - */ - for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) { - /* - * in is the index of the next character to be consumed, and - * destCPCount is the number of code points in the output array. - * - * Decode a generalized variable-length integer into delta, - * which gets added to i. The overflow checking is easier - * if we increase i as we go, then subtract off its starting - * value at the end to obtain delta. - */ - for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) { - if(in>=srcLength) { - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return 0; - } - - digit=basicToDigit[(uint8_t)src[in++]]; - if(digit<0) { - *pErrorCode=U_INVALID_CHAR_FOUND; - return 0; - } - if(digit>(0x7fffffff-i)/w) { - /* integer overflow */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return 0; - } - - i+=digit*w; - /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt - t=k-bias; - if(t<TMIN) { - t=TMIN; - } else if(t>TMAX) { - t=TMAX; - } - */ - t=k-bias; - if(t<TMIN) { - t=TMIN; - } else if(k>=(bias+TMAX)) { - t=TMAX; - } - if(digit<t) { - break; - } - - if(w>0x7fffffff/(BASE-t)) { - /* integer overflow */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return 0; - } - w*=BASE-t; - } - - /* - * Modification from sample code: - * Increments destCPCount here, - * where needed instead of in for() loop tail. - */ - ++destCPCount; - bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0)); - - /* - * i was supposed to wrap around from (incremented) destCPCount to 0, - * incrementing n each time, so we'll fix that now: - */ - if(i/destCPCount>(0x7fffffff-n)) { - /* integer overflow */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return 0; - } - - n+=i/destCPCount; - i%=destCPCount; - /* not needed for Punycode: */ - /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */ - - if(n>0x10ffff || U_IS_SURROGATE(n)) { - /* Unicode code point overflow */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return 0; - } - - /* Insert n at position i of the output: */ - cpLength=U16_LENGTH(n); - if(dest!=NULL && ((destLength+cpLength)<=destCapacity)) { - int32_t codeUnitIndex; - - /* - * Handle indexes when supplementary code points are present. - * - * In almost all cases, there will be only BMP code points before i - * and even in the entire string. - * This is handled with the same efficiency as with UTF-32. - * - * Only the rare cases with supplementary code points are handled - * more slowly - but not too bad since this is an insertion anyway. - */ - if(i<=firstSupplementaryIndex) { - codeUnitIndex=i; - if(cpLength>1) { - firstSupplementaryIndex=codeUnitIndex; - } else { - ++firstSupplementaryIndex; - } - } else { - codeUnitIndex=firstSupplementaryIndex; - U16_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex); - } - - /* use the UChar index codeUnitIndex instead of the code point index i */ - if(codeUnitIndex<destLength) { - uprv_memmove(dest+codeUnitIndex+cpLength, - dest+codeUnitIndex, - (destLength-codeUnitIndex)*U_SIZEOF_UCHAR); - if(caseFlags!=NULL) { - uprv_memmove(caseFlags+codeUnitIndex+cpLength, - caseFlags+codeUnitIndex, - destLength-codeUnitIndex); - } - } - if(cpLength==1) { - /* BMP, insert one code unit */ - dest[codeUnitIndex]=(UChar)n; - } else { - /* supplementary character, insert two code units */ - dest[codeUnitIndex]=U16_LEAD(n); - dest[codeUnitIndex+1]=U16_TRAIL(n); - } - if(caseFlags!=NULL) { - /* Case of last character determines uppercase flag: */ - caseFlags[codeUnitIndex]=IS_BASIC_UPPERCASE(src[in-1]); - if(cpLength==2) { - caseFlags[codeUnitIndex+1]=FALSE; - } - } - } - destLength+=cpLength; - U_ASSERT(destLength>=0); - ++i; - } - - return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); -} - -/* ### check notes on overflow handling - only necessary if not IDNA? are these Punycode functions to be public? */ - -#endif /* #if !UCONFIG_NO_IDNA */ |