diff options
author | James M Snell <jasnell@gmail.com> | 2017-06-12 08:25:53 -0700 |
---|---|---|
committer | James M Snell <jasnell@gmail.com> | 2017-07-24 14:04:13 -0700 |
commit | ed21cb1774d3e146f84a94400db0008a940656c3 (patch) | |
tree | 666b947e272b665045994da4476d7aa01bbe418f /src | |
parent | 7f5a745e35bec2bde067c2922f6a9146beed99e6 (diff) | |
download | android-node-v8-ed21cb1774d3e146f84a94400db0008a940656c3.tar.gz android-node-v8-ed21cb1774d3e146f84a94400db0008a940656c3.tar.bz2 android-node-v8-ed21cb1774d3e146f84a94400db0008a940656c3.zip |
util: implement WHATWG Encoding Standard API
Provide an (initially experimental) implementation of the WHATWG Encoding
Standard API (`TextDecoder` and `TextEncoder`). The is the same API
implemented on the browser side.
By default, with small-icu, only the UTF-8, UTF-16le and UTF-16be decoders
are supported. With full-icu enabled, every encoding other than iso-8859-16
is supported.
This provides a basic test, but does not include the full web platform
tests. Note: many of the web platform tests for this would fail by default
because we ship with small-icu by default.
A process warning will be emitted on first use to indicate that the
API is still experimental. No runtime flag is required to use the
feature.
Refs: https://encoding.spec.whatwg.org/
PR-URL: https://github.com/nodejs/node/pull/13644
Reviewed-By: Timothy Gu <timothygu99@gmail.com>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/node_buffer.cc | 23 | ||||
-rw-r--r-- | src/node_i18n.cc | 155 | ||||
-rw-r--r-- | src/node_i18n.h | 1 | ||||
-rw-r--r-- | src/node_util.cc | 1 |
4 files changed, 180 insertions, 0 deletions
diff --git a/src/node_buffer.cc b/src/node_buffer.cc index d05858ecbd..b3f5793f89 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -1200,6 +1200,27 @@ void Swap64(const FunctionCallbackInfo<Value>& args) { } +// Encode a single string to a UTF-8 Uint8Array (not Buffer). +// Used in TextEncoder.prototype.encode. +static void EncodeUtf8String(const FunctionCallbackInfo<Value>& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + + Local<String> str = args[0].As<String>(); + size_t length = str->Utf8Length(); + char* data = node::UncheckedMalloc(length); + str->WriteUtf8(data, + -1, // We are certain that `data` is sufficiently large + NULL, + String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8); + auto array_buf = ArrayBuffer::New(env->isolate(), data, length, + ArrayBufferCreationMode::kInternalized); + auto array = Uint8Array::New(array_buf, 0, length); + args.GetReturnValue().Set(array); +} + + // pass Buffer object to load prototype methods void SetupBufferJS(const FunctionCallbackInfo<Value>& args) { Environment* env = Environment::GetCurrent(args); @@ -1266,6 +1287,8 @@ void Initialize(Local<Object> target, env->SetMethod(target, "swap32", Swap32); env->SetMethod(target, "swap64", Swap64); + env->SetMethod(target, "encodeUtf8String", EncodeUtf8String); + target->Set(env->context(), FIXED_ONE_BYTE_STRING(env->isolate(), "kMaxLength"), Integer::NewFromUnsigned(env->isolate(), kMaxLength)).FromJust(); diff --git a/src/node_i18n.cc b/src/node_i18n.cc index 3b33744949..2e1aeaa4cb 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -50,6 +50,8 @@ #include "env-inl.h" #include "util.h" #include "util-inl.h" +#include "base-object.h" +#include "base-object-inl.h" #include "v8.h" #include <unicode/utypes.h> @@ -86,10 +88,12 @@ namespace node { using v8::Context; using v8::FunctionCallbackInfo; +using v8::HandleScope; using v8::Isolate; using v8::Local; using v8::MaybeLocal; using v8::Object; +using v8::ObjectTemplate; using v8::String; using v8::Value; @@ -123,6 +127,15 @@ struct Converter { } } + explicit Converter(UConverter* converter, + const char* sub = NULL) : conv(converter) { + CHECK_NE(conv, nullptr); + UErrorCode status = U_ZERO_ERROR; + if (sub != NULL) { + ucnv_setSubstChars(conv, sub, strlen(sub), &status); + } + } + ~Converter() { ucnv_close(conv); } @@ -130,6 +143,143 @@ struct Converter { UConverter* conv; }; +class ConverterObject : public BaseObject, Converter { + public: + enum ConverterFlags { + CONVERTER_FLAGS_FLUSH = 0x1, + CONVERTER_FLAGS_FATAL = 0x2, + CONVERTER_FLAGS_IGNORE_BOM = 0x4 + }; + + ~ConverterObject() override {} + + static void Has(const FunctionCallbackInfo<Value>& args) { + Environment* env = Environment::GetCurrent(args); + HandleScope scope(env->isolate()); + + CHECK_GE(args.Length(), 1); + Utf8Value label(env->isolate(), args[0]); + + UErrorCode status = U_ZERO_ERROR; + UConverter* conv = ucnv_open(*label, &status); + args.GetReturnValue().Set(!!U_SUCCESS(status)); + ucnv_close(conv); + } + + static void Create(const FunctionCallbackInfo<Value>& args) { + Environment* env = Environment::GetCurrent(args); + HandleScope scope(env->isolate()); + + CHECK_GE(args.Length(), 2); + Utf8Value label(env->isolate(), args[0]); + int flags = args[1]->Uint32Value(env->context()).ToChecked(); + bool fatal = + (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL; + bool ignoreBOM = + (flags & CONVERTER_FLAGS_IGNORE_BOM) == CONVERTER_FLAGS_IGNORE_BOM; + + UErrorCode status = U_ZERO_ERROR; + UConverter* conv = ucnv_open(*label, &status); + if (U_FAILURE(status)) + return; + + if (fatal) { + status = U_ZERO_ERROR; + ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP, + nullptr, nullptr, nullptr, &status); + } + + Local<ObjectTemplate> t = ObjectTemplate::New(env->isolate()); + t->SetInternalFieldCount(1); + Local<Object> obj = t->NewInstance(env->context()).ToLocalChecked(); + new ConverterObject(env, obj, conv, ignoreBOM); + args.GetReturnValue().Set(obj); + } + + static void Decode(const FunctionCallbackInfo<Value>& args) { + Environment* env = Environment::GetCurrent(args); + + CHECK_GE(args.Length(), 3); // Converter, Buffer, Flags + + Converter utf8("utf8"); + ConverterObject* converter; + ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As<Object>()); + SPREAD_BUFFER_ARG(args[1], input_obj); + int flags = args[2]->Uint32Value(env->context()).ToChecked(); + + UErrorCode status = U_ZERO_ERROR; + MaybeStackBuffer<UChar> result; + MaybeLocal<Object> ret; + size_t limit = ucnv_getMinCharSize(converter->conv) * + input_obj_length; + if (limit > 0) + result.AllocateSufficientStorage(limit); + + UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH; + + const char* source = input_obj_data; + size_t source_length = input_obj_length; + + if (converter->unicode_ && !converter->ignoreBOM_ && !converter->bomSeen_) { + int32_t bomOffset = 0; + ucnv_detectUnicodeSignature(source, source_length, &bomOffset, &status); + source += bomOffset; + source_length -= bomOffset; + converter->bomSeen_ = true; + } + + UChar* target = *result; + ucnv_toUnicode(converter->conv, + &target, target + (limit * sizeof(UChar)), + &source, source + source_length, + NULL, flush, &status); + + if (U_SUCCESS(status)) { + if (limit > 0) + result.SetLength(target - &result[0]); + ret = ToBufferEndian(env, &result); + args.GetReturnValue().Set(ret.ToLocalChecked()); + goto reset; + } + + args.GetReturnValue().Set(status); + + reset: + if (flush) { + // Reset the converter state + converter->bomSeen_ = false; + ucnv_reset(converter->conv); + } + } + + protected: + ConverterObject(Environment* env, + v8::Local<v8::Object> wrap, + UConverter* converter, + bool ignoreBOM, + const char* sub = NULL) : + BaseObject(env, wrap), + Converter(converter, sub), + ignoreBOM_(ignoreBOM) { + MakeWeak<ConverterObject>(this); + + switch (ucnv_getType(converter)) { + case UCNV_UTF8: + case UCNV_UTF16_BigEndian: + case UCNV_UTF16_LittleEndian: + unicode_ = true; + break; + default: + unicode_ = false; + } + } + + private: + bool unicode_ = false; // True if this is a Unicode converter + bool ignoreBOM_ = false; // True if the BOM should be ignored on Unicode + bool bomSeen_ = false; // True if the BOM has been seen +}; + // One-Shot Converters void CopySourceBuffer(MaybeStackBuffer<UChar>* dest, @@ -717,6 +867,11 @@ void Init(Local<Object> target, // One-shot converters env->SetMethod(target, "icuErrName", ICUErrorName); env->SetMethod(target, "transcode", Transcode); + + // ConverterObject + env->SetMethod(target, "getConverter", ConverterObject::Create); + env->SetMethod(target, "decode", ConverterObject::Decode); + env->SetMethod(target, "hasConverter", ConverterObject::Has); } } // namespace i18n diff --git a/src/node_i18n.h b/src/node_i18n.h index adf9feb414..f7801ce666 100644 --- a/src/node_i18n.h +++ b/src/node_i18n.h @@ -25,6 +25,7 @@ #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS #include "node.h" +#include <unicode/ucnv.h> #include <string> #if defined(NODE_HAVE_I18N_SUPPORT) diff --git a/src/node_util.cc b/src/node_util.cc index 50de94bfb2..c1dff77386 100644 --- a/src/node_util.cc +++ b/src/node_util.cc @@ -21,6 +21,7 @@ using v8::Value; #define VALUE_METHOD_MAP(V) \ + V(isArrayBuffer, IsArrayBuffer) \ V(isAsyncFunction, IsAsyncFunction) \ V(isDataView, IsDataView) \ V(isDate, IsDate) \ |