summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/internal/encoding.js27
-rw-r--r--src/node_buffer.cc8
-rw-r--r--src/node_i18n.cc37
-rw-r--r--src/node_internals.h6
-rw-r--r--test/wpt/status/encoding.json5
5 files changed, 49 insertions, 34 deletions
diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js
index dabcd5eacc..16de0c986e 100644
--- a/lib/internal/encoding.js
+++ b/lib/internal/encoding.js
@@ -484,25 +484,22 @@ function makeTextDecoderJS() {
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
}
- if (!this[kBOMSeen] && !(this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM)) {
- if (this[kEncoding] === 'utf-8') {
- if (input.length >= 3 &&
- input[0] === 0xEF && input[1] === 0xBB && input[2] === 0xBF) {
- input = input.slice(3);
- }
- } else if (this[kEncoding] === 'utf-16le') {
- if (input.length >= 2 && input[0] === 0xFF && input[1] === 0xFE) {
- input = input.slice(2);
- }
+ let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
+ this[kHandle].end(input) :
+ this[kHandle].write(input);
+
+ if (result.length > 0 &&
+ !this[kBOMSeen] &&
+ !(this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM)) {
+ // If the very first result in the stream is a BOM, and we are not
+ // explicitly told to ignore it, then we discard it.
+ if (result[0] === '\ufeff') {
+ result = result.slice(1);
}
this[kBOMSeen] = true;
}
- if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
- return this[kHandle].end(input);
- }
-
- return this[kHandle].write(input);
+ return result;
}
}
diff --git a/src/node_buffer.cc b/src/node_buffer.cc
index 74684110a9..3aa1ea2535 100644
--- a/src/node_buffer.cc
+++ b/src/node_buffer.cc
@@ -219,10 +219,10 @@ size_t Length(Local<Object> obj) {
}
-inline MaybeLocal<Uint8Array> New(Environment* env,
- Local<ArrayBuffer> ab,
- size_t byte_offset,
- size_t length) {
+MaybeLocal<Uint8Array> New(Environment* env,
+ Local<ArrayBuffer> ab,
+ size_t byte_offset,
+ size_t length) {
CHECK(!env->buffer_prototype_object().IsEmpty());
Local<Uint8Array> ui = Uint8Array::New(ab, byte_offset, length);
Maybe<bool> mb =
diff --git a/src/node_i18n.cc b/src/node_i18n.cc
index 162f5fda5d..ecc0528e76 100644
--- a/src/node_i18n.cc
+++ b/src/node_i18n.cc
@@ -95,6 +95,7 @@ using v8::NewStringType;
using v8::Object;
using v8::ObjectTemplate;
using v8::String;
+using v8::Uint8Array;
using v8::Value;
namespace i18n {
@@ -227,14 +228,6 @@ class ConverterObject : public BaseObject, Converter {
const char* source = input.data();
size_t source_length = input.length();
- if (converter->unicode_ && !converter->ignoreBOM_ && !converter->bomSeen_) {
- int32_t bomOffset = 0;
- ucnv_detectUnicodeSignature(source, source_length, &bomOffset, &status);
- source += bomOffset;
- source_length -= bomOffset;
- converter->bomSeen_ = true;
- }
-
UChar* target = *result;
ucnv_toUnicode(converter->conv,
&target, target + (limit * sizeof(UChar)),
@@ -242,10 +235,34 @@ class ConverterObject : public BaseObject, Converter {
nullptr, flush, &status);
if (U_SUCCESS(status)) {
- if (limit > 0)
+ bool omit_initial_bom = false;
+ if (limit > 0) {
result.SetLength(target - &result[0]);
+ if (result.length() > 0 &&
+ converter->unicode_ &&
+ !converter->ignoreBOM_ &&
+ !converter->bomSeen_) {
+ // If the very first result in the stream is a BOM, and we are not
+ // explicitly told to ignore it, then we mark it for discarding.
+ if (result[0] == 0xFEFF) {
+ omit_initial_bom = true;
+ }
+ converter->bomSeen_ = true;
+ }
+ }
ret = ToBufferEndian(env, &result);
- args.GetReturnValue().Set(ret.ToLocalChecked());
+ if (omit_initial_bom && !ret.IsEmpty()) {
+ // Peform `ret = ret.slice(2)`.
+ CHECK(ret.ToLocalChecked()->IsUint8Array());
+ Local<Uint8Array> orig_ret = ret.ToLocalChecked().As<Uint8Array>();
+ ret = Buffer::New(env,
+ orig_ret->Buffer(),
+ orig_ret->ByteOffset() + 2,
+ orig_ret->ByteLength() - 2)
+ .FromMaybe(Local<Uint8Array>());
+ }
+ if (!ret.IsEmpty())
+ args.GetReturnValue().Set(ret.ToLocalChecked());
return;
}
diff --git a/src/node_internals.h b/src/node_internals.h
index 4ec883c891..2ec230d8b5 100644
--- a/src/node_internals.h
+++ b/src/node_internals.h
@@ -158,7 +158,11 @@ v8::MaybeLocal<v8::Object> New(Environment* env,
char* data,
size_t length,
bool uses_malloc);
-
+// Creates a Buffer instance over an existing Uint8Array.
+v8::MaybeLocal<v8::Uint8Array> New(Environment* env,
+ v8::Local<v8::ArrayBuffer> ab,
+ size_t byte_offset,
+ size_t length);
// Construct a Buffer from a MaybeStackBuffer (and also its subclasses like
// Utf8Value and TwoByteValue).
// If |buf| is invalidated, an empty MaybeLocal is returned, and nothing is
diff --git a/test/wpt/status/encoding.json b/test/wpt/status/encoding.json
index 088eed802f..b51dde2aae 100644
--- a/test/wpt/status/encoding.json
+++ b/test/wpt/status/encoding.json
@@ -22,10 +22,7 @@
"fail": "iso-2022-jp decoder state handling bug: https://encoding.spec.whatwg.org/#iso-2022-jp-decoder"
},
"textdecoder-byte-order-marks.any.js": {
- "fail": "Mismatching BOM should not be ignored"
- },
- "textdecoder-copy.any.js": {
- "fail": "Should not have output BOM: https://encoding.spec.whatwg.org/#concept-td-serialize"
+ "requires": ["small-icu"]
},
"textdecoder-fatal-single-byte.any.js": {
"requires": ["full-icu"],