From 237be2ed9e25e4d56deaf3935be0c217913e5a75 Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Sat, 26 Oct 2019 16:27:51 +0200 Subject: encoding: make TextDecoder handle BOM correctly Do not accept the BOM if it comes from a different encoding, and only discard the BOM after it has actually been read (including when it is spread over multiple chunks in streaming mode). Fixes: https://github.com/nodejs/node/issues/25315 PR-URL: https://github.com/nodejs/node/pull/30132 Reviewed-By: Gus Caplan --- src/node_buffer.cc | 8 ++++---- src/node_i18n.cc | 37 +++++++++++++++++++++++++++---------- src/node_internals.h | 6 +++++- 3 files changed, 36 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 74684110a9..3aa1ea2535 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -219,10 +219,10 @@ size_t Length(Local obj) { } -inline MaybeLocal New(Environment* env, - Local ab, - size_t byte_offset, - size_t length) { +MaybeLocal New(Environment* env, + Local ab, + size_t byte_offset, + size_t length) { CHECK(!env->buffer_prototype_object().IsEmpty()); Local ui = Uint8Array::New(ab, byte_offset, length); Maybe mb = diff --git a/src/node_i18n.cc b/src/node_i18n.cc index 162f5fda5d..ecc0528e76 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -95,6 +95,7 @@ using v8::NewStringType; using v8::Object; using v8::ObjectTemplate; using v8::String; +using v8::Uint8Array; using v8::Value; namespace i18n { @@ -227,14 +228,6 @@ class ConverterObject : public BaseObject, Converter { const char* source = input.data(); size_t source_length = input.length(); - if (converter->unicode_ && !converter->ignoreBOM_ && !converter->bomSeen_) { - int32_t bomOffset = 0; - ucnv_detectUnicodeSignature(source, source_length, &bomOffset, &status); - source += bomOffset; - source_length -= bomOffset; - converter->bomSeen_ = true; - } - UChar* target = *result; ucnv_toUnicode(converter->conv, &target, target + (limit * sizeof(UChar)), @@ -242,10 +235,34 @@ class ConverterObject : public BaseObject, Converter { nullptr, flush, &status); if (U_SUCCESS(status)) { - if (limit > 0) + bool omit_initial_bom = false; + if (limit > 0) { result.SetLength(target - &result[0]); + if (result.length() > 0 && + converter->unicode_ && + !converter->ignoreBOM_ && + !converter->bomSeen_) { + // If the very first result in the stream is a BOM, and we are not + // explicitly told to ignore it, then we mark it for discarding. + if (result[0] == 0xFEFF) { + omit_initial_bom = true; + } + converter->bomSeen_ = true; + } + } ret = ToBufferEndian(env, &result); - args.GetReturnValue().Set(ret.ToLocalChecked()); + if (omit_initial_bom && !ret.IsEmpty()) { + // Peform `ret = ret.slice(2)`. + CHECK(ret.ToLocalChecked()->IsUint8Array()); + Local orig_ret = ret.ToLocalChecked().As(); + ret = Buffer::New(env, + orig_ret->Buffer(), + orig_ret->ByteOffset() + 2, + orig_ret->ByteLength() - 2) + .FromMaybe(Local()); + } + if (!ret.IsEmpty()) + args.GetReturnValue().Set(ret.ToLocalChecked()); return; } diff --git a/src/node_internals.h b/src/node_internals.h index 4ec883c891..2ec230d8b5 100644 --- a/src/node_internals.h +++ b/src/node_internals.h @@ -158,7 +158,11 @@ v8::MaybeLocal New(Environment* env, char* data, size_t length, bool uses_malloc); - +// Creates a Buffer instance over an existing Uint8Array. +v8::MaybeLocal New(Environment* env, + v8::Local ab, + size_t byte_offset, + size_t length); // Construct a Buffer from a MaybeStackBuffer (and also its subclasses like // Utf8Value and TwoByteValue). // If |buf| is invalidated, an empty MaybeLocal is returned, and nothing is -- cgit v1.2.3