// Copyright Joyent, Inc. and other Node contributors. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the // "Software"), to deal in the Software without restriction, including // without limitation the rights to use, copy, modify, merge, publish, // distribute, sublicense, and/or sell copies of the Software, and to permit // persons to whom the Software is furnished to do so, subject to the // following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE // USE OR OTHER DEALINGS IN THE SOFTWARE. /* * notes: by srl295 * - When in NODE_HAVE_SMALL_ICU mode, ICU is linked against "stub" (null) data * ( stubdata/libicudata.a ) containing nothing, no data, and it's also * linked against a "small" data file which the SMALL_ICUDATA_ENTRY_POINT * macro names. That's the "english+root" data. * * If icu_data_path is non-null, the user has provided a path and we assume * it goes somewhere useful. We set that path in ICU, and exit. * If icu_data_path is null, they haven't set a path and we want the * "english+root" data. We call * udata_setCommonData(SMALL_ICUDATA_ENTRY_POINT,...) * to load up the english+root data. * * - when NOT in NODE_HAVE_SMALL_ICU mode, ICU is linked directly with its full * data. All of the variables and command line options for changing data at * runtime are disabled, as they wouldn't fully override the internal data. * See: http://bugs.icu-project.org/trac/ticket/10924 */ #include "node_i18n.h" #if defined(NODE_HAVE_I18N_SUPPORT) #include "base_object-inl.h" #include "env-inl.h" #include "node.h" #include "node_buffer.h" #include "node_errors.h" #include "node_internals.h" #include "util-inl.h" #include "v8.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NODE_HAVE_SMALL_ICU /* if this is defined, we have a 'secondary' entry point. compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */ #define SMALL_ICUDATA_ENTRY_POINT \ SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME) #define SMALL_DEF2(major, suff) SMALL_DEF(major, suff) #ifndef U_LIB_SUFFIX_C_NAME #define SMALL_DEF(major, suff) icusmdt##major##_dat #else #define SMALL_DEF(major, suff) icusmdt##suff##major##_dat #endif extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[]; #endif namespace node { using v8::Context; using v8::FunctionCallbackInfo; using v8::HandleScope; using v8::Int32; using v8::Isolate; using v8::Local; using v8::MaybeLocal; using v8::NewStringType; using v8::Object; using v8::ObjectTemplate; using v8::String; using v8::Value; namespace i18n { namespace { template MaybeLocal ToBufferEndian(Environment* env, MaybeStackBuffer* buf) { MaybeLocal ret = Buffer::New(env, buf); if (ret.IsEmpty()) return ret; static_assert(sizeof(T) == 1 || sizeof(T) == 2, "Currently only one- or two-byte buffers are supported"); if (sizeof(T) > 1 && IsBigEndian()) { SPREAD_BUFFER_ARG(ret.ToLocalChecked(), retbuf); SwapBytes16(retbuf_data, retbuf_length); } return ret; } struct Converter { explicit Converter(const char* name, const char* sub = nullptr) : conv(nullptr) { UErrorCode status = U_ZERO_ERROR; conv = ucnv_open(name, &status); CHECK(U_SUCCESS(status)); if (sub != nullptr) { ucnv_setSubstChars(conv, sub, strlen(sub), &status); } } explicit Converter(UConverter* converter, const char* sub = nullptr) : conv(converter) { CHECK_NOT_NULL(conv); UErrorCode status = U_ZERO_ERROR; if (sub != nullptr) { ucnv_setSubstChars(conv, sub, strlen(sub), &status); } } ~Converter() { ucnv_close(conv); } UConverter* conv; }; class ConverterObject : public BaseObject, Converter { public: enum ConverterFlags { CONVERTER_FLAGS_FLUSH = 0x1, CONVERTER_FLAGS_FATAL = 0x2, CONVERTER_FLAGS_IGNORE_BOM = 0x4 }; ~ConverterObject() override {} static void Has(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); HandleScope scope(env->isolate()); CHECK_GE(args.Length(), 1); Utf8Value label(env->isolate(), args[0]); UErrorCode status = U_ZERO_ERROR; UConverter* conv = ucnv_open(*label, &status); args.GetReturnValue().Set(!!U_SUCCESS(status)); ucnv_close(conv); } static void Create(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); HandleScope scope(env->isolate()); Local t = ObjectTemplate::New(env->isolate()); t->SetInternalFieldCount(1); Local obj; if (!t->NewInstance(env->context()).ToLocal(&obj)) return; CHECK_GE(args.Length(), 2); Utf8Value label(env->isolate(), args[0]); int flags = args[1]->Uint32Value(env->context()).ToChecked(); bool fatal = (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL; bool ignoreBOM = (flags & CONVERTER_FLAGS_IGNORE_BOM) == CONVERTER_FLAGS_IGNORE_BOM; UErrorCode status = U_ZERO_ERROR; UConverter* conv = ucnv_open(*label, &status); if (U_FAILURE(status)) return; if (fatal) { status = U_ZERO_ERROR; ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, nullptr, &status); } new ConverterObject(env, obj, conv, ignoreBOM); args.GetReturnValue().Set(obj); } static void Decode(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 3); // Converter, Buffer, Flags Converter utf8("utf8"); ConverterObject* converter; ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As()); SPREAD_BUFFER_ARG(args[1], input_obj); int flags = args[2]->Uint32Value(env->context()).ToChecked(); UErrorCode status = U_ZERO_ERROR; MaybeStackBuffer result; MaybeLocal ret; size_t limit = ucnv_getMinCharSize(converter->conv) * input_obj_length; if (limit > 0) result.AllocateSufficientStorage(limit); UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH; OnScopeLeave cleanup([&]() { if (flush) { // Reset the converter state. converter->bomSeen_ = false; ucnv_reset(converter->conv); } }); const char* source = input_obj_data; size_t source_length = input_obj_length; if (converter->unicode_ && !converter->ignoreBOM_ && !converter->bomSeen_) { int32_t bomOffset = 0; ucnv_detectUnicodeSignature(source, source_length, &bomOffset, &status); source += bomOffset; source_length -= bomOffset; converter->bomSeen_ = true; } UChar* target = *result; ucnv_toUnicode(converter->conv, &target, target + (limit * sizeof(UChar)), &source, source + source_length, nullptr, flush, &status); if (U_SUCCESS(status)) { if (limit > 0) result.SetLength(target - &result[0]); ret = ToBufferEndian(env, &result); args.GetReturnValue().Set(ret.ToLocalChecked()); return; } args.GetReturnValue().Set(status); } SET_NO_MEMORY_INFO() SET_MEMORY_INFO_NAME(ConverterObject) SET_SELF_SIZE(ConverterObject) protected: ConverterObject(Environment* env, Local wrap, UConverter* converter, bool ignoreBOM, const char* sub = nullptr) : BaseObject(env, wrap), Converter(converter, sub), ignoreBOM_(ignoreBOM) { MakeWeak(); switch (ucnv_getType(converter)) { case UCNV_UTF8: case UCNV_UTF16_BigEndian: case UCNV_UTF16_LittleEndian: unicode_ = true; break; default: unicode_ = false; } } private: bool unicode_ = false; // True if this is a Unicode converter bool ignoreBOM_ = false; // True if the BOM should be ignored on Unicode bool bomSeen_ = false; // True if the BOM has been seen }; // One-Shot Converters void CopySourceBuffer(MaybeStackBuffer* dest, const char* data, const size_t length, const size_t length_in_chars) { dest->AllocateSufficientStorage(length_in_chars); char* dst = reinterpret_cast(**dest); memcpy(dst, data, length); if (IsBigEndian()) { SwapBytes16(dst, length); } } typedef MaybeLocal (*TranscodeFunc)(Environment* env, const char* fromEncoding, const char* toEncoding, const char* source, const size_t source_length, UErrorCode* status); MaybeLocal Transcode(Environment* env, const char* fromEncoding, const char* toEncoding, const char* source, const size_t source_length, UErrorCode* status) { *status = U_ZERO_ERROR; MaybeLocal ret; MaybeStackBuffer result; Converter to(toEncoding, "?"); Converter from(fromEncoding); const uint32_t limit = source_length * ucnv_getMaxCharSize(to.conv); result.AllocateSufficientStorage(limit); char* target = *result; ucnv_convertEx(to.conv, from.conv, &target, target + limit, &source, source + source_length, nullptr, nullptr, nullptr, nullptr, true, true, status); if (U_SUCCESS(*status)) { result.SetLength(target - &result[0]); ret = ToBufferEndian(env, &result); } return ret; } MaybeLocal TranscodeToUcs2(Environment* env, const char* fromEncoding, const char* toEncoding, const char* source, const size_t source_length, UErrorCode* status) { *status = U_ZERO_ERROR; MaybeLocal ret; MaybeStackBuffer destbuf(source_length); Converter from(fromEncoding); const size_t length_in_chars = source_length * sizeof(UChar); ucnv_toUChars(from.conv, *destbuf, length_in_chars, source, source_length, status); if (U_SUCCESS(*status)) ret = ToBufferEndian(env, &destbuf); return ret; } MaybeLocal TranscodeFromUcs2(Environment* env, const char* fromEncoding, const char* toEncoding, const char* source, const size_t source_length, UErrorCode* status) { *status = U_ZERO_ERROR; MaybeStackBuffer sourcebuf; MaybeLocal ret; Converter to(toEncoding, "?"); const size_t length_in_chars = source_length / sizeof(UChar); CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars); MaybeStackBuffer destbuf(length_in_chars); const uint32_t len = ucnv_fromUChars(to.conv, *destbuf, length_in_chars, *sourcebuf, length_in_chars, status); if (U_SUCCESS(*status)) { destbuf.SetLength(len); ret = ToBufferEndian(env, &destbuf); } return ret; } MaybeLocal TranscodeUcs2FromUtf8(Environment* env, const char* fromEncoding, const char* toEncoding, const char* source, const size_t source_length, UErrorCode* status) { *status = U_ZERO_ERROR; MaybeStackBuffer destbuf; int32_t result_length; u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length, source, source_length, status); MaybeLocal ret; if (U_SUCCESS(*status)) { destbuf.SetLength(result_length); ret = ToBufferEndian(env, &destbuf); } else if (*status == U_BUFFER_OVERFLOW_ERROR) { *status = U_ZERO_ERROR; destbuf.AllocateSufficientStorage(result_length); u_strFromUTF8(*destbuf, result_length, &result_length, source, source_length, status); if (U_SUCCESS(*status)) { destbuf.SetLength(result_length); ret = ToBufferEndian(env, &destbuf); } } return ret; } MaybeLocal TranscodeUtf8FromUcs2(Environment* env, const char* fromEncoding, const char* toEncoding, const char* source, const size_t source_length, UErrorCode* status) { *status = U_ZERO_ERROR; MaybeLocal ret; const size_t length_in_chars = source_length / sizeof(UChar); int32_t result_length; MaybeStackBuffer sourcebuf; MaybeStackBuffer destbuf; CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars); u_strToUTF8(*destbuf, destbuf.capacity(), &result_length, *sourcebuf, length_in_chars, status); if (U_SUCCESS(*status)) { destbuf.SetLength(result_length); ret = ToBufferEndian(env, &destbuf); } else if (*status == U_BUFFER_OVERFLOW_ERROR) { *status = U_ZERO_ERROR; destbuf.AllocateSufficientStorage(result_length); u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf, length_in_chars, status); if (U_SUCCESS(*status)) { destbuf.SetLength(result_length); ret = ToBufferEndian(env, &destbuf); } } return ret; } const char* EncodingName(const enum encoding encoding) { switch (encoding) { case ASCII: return "us-ascii"; case LATIN1: return "iso8859-1"; case UCS2: return "utf16le"; case UTF8: return "utf-8"; default: return nullptr; } } bool SupportedEncoding(const enum encoding encoding) { switch (encoding) { case ASCII: case LATIN1: case UCS2: case UTF8: return true; default: return false; } } void Transcode(const FunctionCallbackInfo&args) { Environment* env = Environment::GetCurrent(args); Isolate* isolate = env->isolate(); UErrorCode status = U_ZERO_ERROR; MaybeLocal result; CHECK(Buffer::HasInstance(args[0])); SPREAD_BUFFER_ARG(args[0], ts_obj); const enum encoding fromEncoding = ParseEncoding(isolate, args[1], BUFFER); const enum encoding toEncoding = ParseEncoding(isolate, args[2], BUFFER); if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) { TranscodeFunc tfn = &Transcode; switch (fromEncoding) { case ASCII: case LATIN1: if (toEncoding == UCS2) tfn = &TranscodeToUcs2; break; case UTF8: if (toEncoding == UCS2) tfn = &TranscodeUcs2FromUtf8; break; case UCS2: switch (toEncoding) { case UCS2: tfn = &Transcode; break; case UTF8: tfn = &TranscodeUtf8FromUcs2; break; default: tfn = &TranscodeFromUcs2; } break; default: // This should not happen because of the SupportedEncoding checks ABORT(); } result = tfn(env, EncodingName(fromEncoding), EncodingName(toEncoding), ts_obj_data, ts_obj_length, &status); } else { status = U_ILLEGAL_ARGUMENT_ERROR; } if (result.IsEmpty()) return args.GetReturnValue().Set(status); return args.GetReturnValue().Set(result.ToLocalChecked()); } void ICUErrorName(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK(args[0]->IsInt32()); UErrorCode status = static_cast(args[0].As()->Value()); args.GetReturnValue().Set( String::NewFromUtf8(env->isolate(), u_errorName(status), NewStringType::kNormal).ToLocalChecked()); } } // anonymous namespace bool InitializeICUDirectory(const std::string& path) { UErrorCode status = U_ZERO_ERROR; if (path.empty()) { #ifdef NODE_HAVE_SMALL_ICU // install the 'small' data. udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status); #else // !NODE_HAVE_SMALL_ICU // no small data, so nothing to do. #endif // !NODE_HAVE_SMALL_ICU } else { u_setDataDirectory(path.c_str()); u_init(&status); } return status == U_ZERO_ERROR; } int32_t ToUnicode(MaybeStackBuffer* buf, const char* input, size_t length) { UErrorCode status = U_ZERO_ERROR; uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE; UIDNA* uidna = uidna_openUTS46(options, &status); if (U_FAILURE(status)) return -1; UIDNAInfo info = UIDNA_INFO_INITIALIZER; int32_t len = uidna_nameToUnicodeUTF8(uidna, input, length, **buf, buf->capacity(), &info, &status); // Do not check info.errors like we do with ToASCII since ToUnicode always // returns a string, despite any possible errors that may have occurred. if (status == U_BUFFER_OVERFLOW_ERROR) { status = U_ZERO_ERROR; buf->AllocateSufficientStorage(len); len = uidna_nameToUnicodeUTF8(uidna, input, length, **buf, buf->capacity(), &info, &status); } // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode // string, regardless of whether an error occurred. if (U_FAILURE(status)) { len = -1; buf->SetLength(0); } else { buf->SetLength(len); } uidna_close(uidna); return len; } int32_t ToASCII(MaybeStackBuffer* buf, const char* input, size_t length, enum idna_mode mode) { UErrorCode status = U_ZERO_ERROR; uint32_t options = // CheckHyphens = false; handled later UIDNA_CHECK_BIDI | // CheckBidi = true UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing if (mode == IDNA_STRICT) { options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict // VerifyDnsLength = beStrict; // handled later } UIDNA* uidna = uidna_openUTS46(options, &status); if (U_FAILURE(status)) return -1; UIDNAInfo info = UIDNA_INFO_INITIALIZER; int32_t len = uidna_nameToASCII_UTF8(uidna, input, length, **buf, buf->capacity(), &info, &status); if (status == U_BUFFER_OVERFLOW_ERROR) { status = U_ZERO_ERROR; buf->AllocateSufficientStorage(len); len = uidna_nameToASCII_UTF8(uidna, input, length, **buf, buf->capacity(), &info, &status); } // In UTS #46 which specifies ToASCII, certain error conditions are // configurable through options, and the WHATWG URL Standard promptly elects // to disable some of them to accommodate for real-world use cases. // Unfortunately, ICU4C's IDNA module does not support disabling some of // these options through `options` above, and thus continues throwing // unnecessary errors. To counter this situation, we just filter out the // errors that may have happened afterwards, before deciding whether to // return an error from this function. // CheckHyphens = false // (Specified in the current UTS #46 draft rev. 18.) // Refs: // - https://github.com/whatwg/url/issues/53 // - https://github.com/whatwg/url/pull/309 // - http://www.unicode.org/review/pri317/ // - http://www.unicode.org/reports/tr46/tr46-18.html // - https://www.icann.org/news/announcement-2000-01-07-en info.errors &= ~UIDNA_ERROR_HYPHEN_3_4; info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN; info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN; if (mode != IDNA_STRICT) { // VerifyDnsLength = beStrict info.errors &= ~UIDNA_ERROR_EMPTY_LABEL; info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG; info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; } if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) { len = -1; buf->SetLength(0); } else { buf->SetLength(len); } uidna_close(uidna); return len; } static void ToUnicode(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 1); CHECK(args[0]->IsString()); Utf8Value val(env->isolate(), args[0]); MaybeStackBuffer buf; int32_t len = ToUnicode(&buf, *val, val.length()); if (len < 0) { return env->ThrowError("Cannot convert name to Unicode"); } args.GetReturnValue().Set( String::NewFromUtf8(env->isolate(), *buf, NewStringType::kNormal, len).ToLocalChecked()); } static void ToASCII(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 1); CHECK(args[0]->IsString()); Utf8Value val(env->isolate(), args[0]); // optional arg bool lenient = args[1]->BooleanValue(env->isolate()); enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT; MaybeStackBuffer buf; int32_t len = ToASCII(&buf, *val, val.length(), mode); if (len < 0) { return env->ThrowError("Cannot convert name to ASCII"); } args.GetReturnValue().Set( String::NewFromUtf8(env->isolate(), *buf, NewStringType::kNormal, len).ToLocalChecked()); } // This is similar to wcwidth except that it takes the current unicode // character properties database into consideration, allowing it to // correctly calculate the column widths of things like emoji's and // newer wide characters. wcwidth, on the other hand, uses a fixed // algorithm that does not take things like emoji into proper // consideration. // // TODO(TimothyGu): Investigate Cc (C0/C1 control codes). Both VTE (used by // GNOME Terminal) and Konsole don't consider them to be zero-width (see refs // below), and when printed in VTE it is Narrow. However GNOME Terminal doesn't // allow it to be input. Linux's PTY terminal prints control characters as // Narrow rhombi. // // TODO(TimothyGu): Investigate Hangul jamo characters. Medial vowels and final // consonants are 0-width when combined with initial consonants; otherwise they // are technically Wide. But many terminals (including Konsole and // VTE/GLib-based) implement all medials and finals as 0-width. // // Refs: https://eev.ee/blog/2015/09/12/dark-corners-of-unicode/#combining-characters-and-character-width // Refs: https://github.com/GNOME/glib/blob/79e4d4c6be/glib/guniprop.c#L388-L420 // Refs: https://github.com/KDE/konsole/blob/8c6a5d13c0/src/konsole_wcwidth.cpp#L101-L223 static int GetColumnWidth(UChar32 codepoint, bool ambiguous_as_full_width = false) { const auto zero_width_mask = U_GC_CC_MASK | // C0/C1 control code U_GC_CF_MASK | // Format control character U_GC_ME_MASK | // Enclosing mark U_GC_MN_MASK; // Nonspacing mark if (codepoint != 0x00AD && // SOFT HYPHEN is Cf but not zero-width ((U_MASK(u_charType(codepoint)) & zero_width_mask) || u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) { return 0; } // UCHAR_EAST_ASIAN_WIDTH is the Unicode property that identifies a // codepoint as being full width, wide, ambiguous, neutral, narrow, // or halfwidth. const int eaw = u_getIntPropertyValue(codepoint, UCHAR_EAST_ASIAN_WIDTH); switch (eaw) { case U_EA_FULLWIDTH: case U_EA_WIDE: return 2; case U_EA_AMBIGUOUS: // See: http://www.unicode.org/reports/tr11/#Ambiguous for details if (ambiguous_as_full_width) { return 2; } // If ambiguous_as_full_width is false: // Fall through case U_EA_NEUTRAL: if (u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION)) { return 2; } // Fall through case U_EA_HALFWIDTH: case U_EA_NARROW: default: return 1; } } // Returns the column width for the given String. static void GetStringWidth(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); if (args.Length() < 1) return; bool ambiguous_as_full_width = args[1]->IsTrue(); bool expand_emoji_sequence = args[2]->IsTrue(); if (args[0]->IsNumber()) { uint32_t val; if (!args[0]->Uint32Value(env->context()).To(&val)) return; args.GetReturnValue().Set(GetColumnWidth(val, ambiguous_as_full_width)); return; } TwoByteValue value(env->isolate(), args[0]); // reinterpret_cast is required by windows to compile UChar* str = reinterpret_cast(*value); static_assert(sizeof(*str) == sizeof(**value), "sizeof(*str) == sizeof(**value)"); UChar32 c = 0; UChar32 p; size_t n = 0; uint32_t width = 0; while (n < value.length()) { p = c; U16_NEXT(str, n, value.length(), c); // Don't count individual emoji codepoints that occur within an // emoji sequence. This is not necessarily foolproof. Some // environments display emoji sequences in the appropriate // condensed form (as a single emoji glyph), other environments // may not understand an emoji sequence and will display each // individual emoji separately. When this happens, the width // calculated will be off, and there's no reliable way of knowing // in advance if a particular sequence is going to be supported. // The expand_emoji_sequence option allows the caller to skip this // check and count each code within an emoji sequence separately. if (!expand_emoji_sequence && n > 0 && p == 0x200d && // 0x200d == ZWJ (zero width joiner) (u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) || u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER))) { continue; } width += GetColumnWidth(c, ambiguous_as_full_width); } args.GetReturnValue().Set(width); } void Initialize(Local target, Local unused, Local context, void* priv) { Environment* env = Environment::GetCurrent(context); env->SetMethod(target, "toUnicode", ToUnicode); env->SetMethod(target, "toASCII", ToASCII); env->SetMethod(target, "getStringWidth", GetStringWidth); // One-shot converters env->SetMethod(target, "icuErrName", ICUErrorName); env->SetMethod(target, "transcode", Transcode); // ConverterObject env->SetMethod(target, "getConverter", ConverterObject::Create); env->SetMethod(target, "decode", ConverterObject::Decode); env->SetMethod(target, "hasConverter", ConverterObject::Has); } } // namespace i18n } // namespace node NODE_MODULE_CONTEXT_AWARE_INTERNAL(icu, node::i18n::Initialize) #endif // NODE_HAVE_I18N_SUPPORT