summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAnna Henningsen <anna@addaleax.net>2019-10-26 16:27:51 +0200
committerAnna Henningsen <anna@addaleax.net>2019-11-05 20:19:09 +0100
commit237be2ed9e25e4d56deaf3935be0c217913e5a75 (patch)
treef3389797e763be54b82a159a9cd780ea0f2dbfd3 /src
parentafd29c9502449121aacba37b253dc39e159aae03 (diff)
downloadandroid-node-v8-237be2ed9e25e4d56deaf3935be0c217913e5a75.tar.gz
android-node-v8-237be2ed9e25e4d56deaf3935be0c217913e5a75.tar.bz2
android-node-v8-237be2ed9e25e4d56deaf3935be0c217913e5a75.zip
encoding: make TextDecoder handle BOM correctly
Do not accept the BOM if it comes from a different encoding, and only discard the BOM after it has actually been read (including when it is spread over multiple chunks in streaming mode). Fixes: https://github.com/nodejs/node/issues/25315 PR-URL: https://github.com/nodejs/node/pull/30132 Reviewed-By: Gus Caplan <me@gus.host>
Diffstat (limited to 'src')
-rw-r--r--src/node_buffer.cc8
-rw-r--r--src/node_i18n.cc37
-rw-r--r--src/node_internals.h6
3 files changed, 36 insertions, 15 deletions
diff --git a/src/node_buffer.cc b/src/node_buffer.cc
index 74684110a9..3aa1ea2535 100644
--- a/src/node_buffer.cc
+++ b/src/node_buffer.cc
@@ -219,10 +219,10 @@ size_t Length(Local<Object> obj) {
}
-inline MaybeLocal<Uint8Array> New(Environment* env,
- Local<ArrayBuffer> ab,
- size_t byte_offset,
- size_t length) {
+MaybeLocal<Uint8Array> New(Environment* env,
+ Local<ArrayBuffer> ab,
+ size_t byte_offset,
+ size_t length) {
CHECK(!env->buffer_prototype_object().IsEmpty());
Local<Uint8Array> ui = Uint8Array::New(ab, byte_offset, length);
Maybe<bool> mb =
diff --git a/src/node_i18n.cc b/src/node_i18n.cc
index 162f5fda5d..ecc0528e76 100644
--- a/src/node_i18n.cc
+++ b/src/node_i18n.cc
@@ -95,6 +95,7 @@ using v8::NewStringType;
using v8::Object;
using v8::ObjectTemplate;
using v8::String;
+using v8::Uint8Array;
using v8::Value;
namespace i18n {
@@ -227,14 +228,6 @@ class ConverterObject : public BaseObject, Converter {
const char* source = input.data();
size_t source_length = input.length();
- if (converter->unicode_ && !converter->ignoreBOM_ && !converter->bomSeen_) {
- int32_t bomOffset = 0;
- ucnv_detectUnicodeSignature(source, source_length, &bomOffset, &status);
- source += bomOffset;
- source_length -= bomOffset;
- converter->bomSeen_ = true;
- }
-
UChar* target = *result;
ucnv_toUnicode(converter->conv,
&target, target + (limit * sizeof(UChar)),
@@ -242,10 +235,34 @@ class ConverterObject : public BaseObject, Converter {
nullptr, flush, &status);
if (U_SUCCESS(status)) {
- if (limit > 0)
+ bool omit_initial_bom = false;
+ if (limit > 0) {
result.SetLength(target - &result[0]);
+ if (result.length() > 0 &&
+ converter->unicode_ &&
+ !converter->ignoreBOM_ &&
+ !converter->bomSeen_) {
+ // If the very first result in the stream is a BOM, and we are not
+ // explicitly told to ignore it, then we mark it for discarding.
+ if (result[0] == 0xFEFF) {
+ omit_initial_bom = true;
+ }
+ converter->bomSeen_ = true;
+ }
+ }
ret = ToBufferEndian(env, &result);
- args.GetReturnValue().Set(ret.ToLocalChecked());
+ if (omit_initial_bom && !ret.IsEmpty()) {
+ // Peform `ret = ret.slice(2)`.
+ CHECK(ret.ToLocalChecked()->IsUint8Array());
+ Local<Uint8Array> orig_ret = ret.ToLocalChecked().As<Uint8Array>();
+ ret = Buffer::New(env,
+ orig_ret->Buffer(),
+ orig_ret->ByteOffset() + 2,
+ orig_ret->ByteLength() - 2)
+ .FromMaybe(Local<Uint8Array>());
+ }
+ if (!ret.IsEmpty())
+ args.GetReturnValue().Set(ret.ToLocalChecked());
return;
}
diff --git a/src/node_internals.h b/src/node_internals.h
index 4ec883c891..2ec230d8b5 100644
--- a/src/node_internals.h
+++ b/src/node_internals.h
@@ -158,7 +158,11 @@ v8::MaybeLocal<v8::Object> New(Environment* env,
char* data,
size_t length,
bool uses_malloc);
-
+// Creates a Buffer instance over an existing Uint8Array.
+v8::MaybeLocal<v8::Uint8Array> New(Environment* env,
+ v8::Local<v8::ArrayBuffer> ab,
+ size_t byte_offset,
+ size_t length);
// Construct a Buffer from a MaybeStackBuffer (and also its subclasses like
// Utf8Value and TwoByteValue).
// If |buf| is invalidated, an empty MaybeLocal is returned, and nothing is