summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnna Henningsen <anna@addaleax.net>2018-08-31 16:57:03 +0200
committerAnna Henningsen <anna@addaleax.net>2018-09-03 22:33:42 +0200
commit71f633a32f1f5617c1c7b60ad936342a579bff74 (patch)
treebcfc550c80f66a630cc149fbc80f204d0ca6cc62
parent6455bea52ba262cea8d1c706ec8a2eec192ec643 (diff)
downloadandroid-node-v8-71f633a32f1f5617c1c7b60ad936342a579bff74.tar.gz
android-node-v8-71f633a32f1f5617c1c7b60ad936342a579bff74.tar.bz2
android-node-v8-71f633a32f1f5617c1c7b60ad936342a579bff74.zip
src: allow UTF-16 in generic StringBytes decode call
This allows removing a number of special cases in other parts of the code, at least one of which was incorrect in expecting aligned input when that was not guaranteed. Fixes: https://github.com/nodejs/node/issues/22358 PR-URL: https://github.com/nodejs/node/pull/22622 Reviewed-By: Tobias Nießen <tniessen@tnie.de>
-rw-r--r--src/node.h1
-rw-r--r--src/node_buffer.cc59
-rw-r--r--src/string_bytes.cc57
-rw-r--r--src/string_bytes.h1
-rw-r--r--src/string_decoder.cc10
-rw-r--r--test/parallel/test-string-decoder.js8
6 files changed, 40 insertions, 96 deletions
diff --git a/src/node.h b/src/node.h
index 1154c39765..90e764f386 100644
--- a/src/node.h
+++ b/src/node.h
@@ -410,7 +410,6 @@ NODE_DEPRECATED("Use FatalException(isolate, ...)",
return FatalException(v8::Isolate::GetCurrent(), try_catch);
})
-// Don't call with encoding=UCS2.
NODE_EXTERN v8::Local<v8::Value> Encode(v8::Isolate* isolate,
const char* buf,
size_t len,
diff --git a/src/node_buffer.cc b/src/node_buffer.cc
index 4a29873c34..7c4c0d9f65 100644
--- a/src/node_buffer.cc
+++ b/src/node_buffer.cc
@@ -467,65 +467,6 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) {
}
-template <>
-void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) {
- Isolate* isolate = args.GetIsolate();
- Environment* env = Environment::GetCurrent(isolate);
-
- THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
- SPREAD_BUFFER_ARG(args.This(), ts_obj);
-
- if (ts_obj_length == 0)
- return args.GetReturnValue().SetEmptyString();
-
- SLICE_START_END(args[0], args[1], ts_obj_length)
- length /= 2;
-
- const char* data = ts_obj_data + start;
- const uint16_t* buf;
- bool release = false;
-
- // Node's "ucs2" encoding expects LE character data inside a Buffer, so we
- // need to reorder on BE platforms. See https://nodejs.org/api/buffer.html
- // regarding Node's "ucs2" encoding specification.
- const bool aligned = (reinterpret_cast<uintptr_t>(data) % sizeof(*buf) == 0);
- if (IsLittleEndian() && !aligned) {
- // Make a copy to avoid unaligned accesses in v8::String::NewFromTwoByte().
- // This applies ONLY to little endian platforms, as misalignment will be
- // handled by a byte-swapping operation in StringBytes::Encode on
- // big endian platforms.
- uint16_t* copy = new uint16_t[length];
- for (size_t i = 0, k = 0; i < length; i += 1, k += 2) {
- // Assumes that the input is little endian.
- const uint8_t lo = static_cast<uint8_t>(data[k + 0]);
- const uint8_t hi = static_cast<uint8_t>(data[k + 1]);
- copy[i] = lo | hi << 8;
- }
- buf = copy;
- release = true;
- } else {
- buf = reinterpret_cast<const uint16_t*>(data);
- }
-
- Local<Value> error;
- MaybeLocal<Value> ret =
- StringBytes::Encode(isolate,
- buf,
- length,
- &error);
-
- if (release)
- delete[] buf;
-
- if (ret.IsEmpty()) {
- CHECK(!error.IsEmpty());
- isolate->ThrowException(error);
- return;
- }
- args.GetReturnValue().Set(ret.ToLocalChecked());
-}
-
-
// bytesCopied = copy(buffer, target[, targetStart][, sourceStart][, sourceEnd])
void Copy(const FunctionCallbackInfo<Value> &args) {
Environment* env = Environment::GetCurrent(args);
diff --git a/src/string_bytes.cc b/src/string_bytes.cc
index c38f368d41..e4c4c0a7db 100644
--- a/src/string_bytes.cc
+++ b/src/string_bytes.cc
@@ -625,7 +625,6 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
size_t buflen,
enum encoding encoding,
Local<Value>* error) {
- CHECK_NE(encoding, UCS2);
CHECK_BUFLEN_IN_RANGE(buflen);
if (!buflen && encoding != BUFFER) {
@@ -703,6 +702,37 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
return ExternOneByteString::New(isolate, dst, dlen, error);
}
+ case UCS2: {
+ if (IsBigEndian()) {
+ uint16_t* dst = node::UncheckedMalloc<uint16_t>(buflen / 2);
+ if (dst == nullptr) {
+ *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
+ return MaybeLocal<Value>();
+ }
+ for (size_t i = 0, k = 0; k < buflen / 2; i += 2, k += 1) {
+ // The input is in *little endian*, because that's what Node.js
+ // expects, so the high byte comes after the low byte.
+ const uint8_t hi = static_cast<uint8_t>(buf[i + 1]);
+ const uint8_t lo = static_cast<uint8_t>(buf[i + 0]);
+ dst[k] = static_cast<uint16_t>(hi) << 8 | lo;
+ }
+ return ExternTwoByteString::New(isolate, dst, buflen / 2, error);
+ }
+ if (reinterpret_cast<uintptr_t>(buf) % 2 != 0) {
+ // Unaligned data still means we can't directly pass it to V8.
+ char* dst = node::UncheckedMalloc(buflen);
+ if (dst == nullptr) {
+ *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
+ return MaybeLocal<Value>();
+ }
+ memcpy(dst, buf, buflen);
+ return ExternTwoByteString::New(
+ isolate, reinterpret_cast<uint16_t*>(dst), buflen / 2, error);
+ }
+ return ExternTwoByteString::NewFromCopy(
+ isolate, reinterpret_cast<const uint16_t*>(buf), buflen / 2, error);
+ }
+
default:
CHECK(0 && "unknown encoding");
break;
@@ -742,30 +772,7 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
enum encoding encoding,
Local<Value>* error) {
const size_t len = strlen(buf);
- MaybeLocal<Value> ret;
- if (encoding == UCS2) {
- // In Node, UCS2 means utf16le. The data must be in little-endian
- // order and must be aligned on 2-bytes. This returns an empty
- // value if it's not aligned and ensures the appropriate byte order
- // on big endian architectures.
- const bool be = IsBigEndian();
- if (len % 2 != 0)
- return ret;
- std::vector<uint16_t> vec(len / 2);
- for (size_t i = 0, k = 0; i < len; i += 2, k += 1) {
- const uint8_t hi = static_cast<uint8_t>(buf[i + 0]);
- const uint8_t lo = static_cast<uint8_t>(buf[i + 1]);
- vec[k] = be ?
- static_cast<uint16_t>(hi) << 8 | lo
- : static_cast<uint16_t>(lo) << 8 | hi;
- }
- ret = vec.empty() ?
- static_cast< Local<Value> >(String::Empty(isolate))
- : StringBytes::Encode(isolate, &vec[0], vec.size(), error);
- } else {
- ret = StringBytes::Encode(isolate, buf, len, encoding, error);
- }
- return ret;
+ return Encode(isolate, buf, len, encoding, error);
}
} // namespace node
diff --git a/src/string_bytes.h b/src/string_bytes.h
index 9e64fa0ee5..87a296663e 100644
--- a/src/string_bytes.h
+++ b/src/string_bytes.h
@@ -90,7 +90,6 @@ class StringBytes {
int* chars_written = nullptr);
// Take the bytes in the src, and turn it into a Buffer or String.
- // Don't call with encoding=UCS2.
static v8::MaybeLocal<v8::Value> Encode(v8::Isolate* isolate,
const char* buf,
size_t buflen,
diff --git a/src/string_decoder.cc b/src/string_decoder.cc
index b75169ff00..fa8201faff 100644
--- a/src/string_decoder.cc
+++ b/src/string_decoder.cc
@@ -30,16 +30,6 @@ MaybeLocal<String> MakeString(Isolate* isolate,
data,
v8::NewStringType::kNormal,
length);
- } else if (encoding == UCS2) {
-#ifdef DEBUG
- CHECK_EQ(reinterpret_cast<uintptr_t>(data) % 2, 0);
- CHECK_EQ(length % 2, 0);
-#endif
- ret = StringBytes::Encode(
- isolate,
- reinterpret_cast<const uint16_t*>(data),
- length / 2,
- &error);
} else {
ret = StringBytes::Encode(
isolate,
diff --git a/test/parallel/test-string-decoder.js b/test/parallel/test-string-decoder.js
index 5571aaeeb7..6e4f4b121d 100644
--- a/test/parallel/test-string-decoder.js
+++ b/test/parallel/test-string-decoder.js
@@ -143,6 +143,14 @@ decoder = new StringDecoder('utf16le');
assert.strictEqual(decoder.write(Buffer.from('3DD84D', 'hex')), '\ud83d');
assert.strictEqual(decoder.end(), '');
+// Regression test for https://github.com/nodejs/node/issues/22358
+// (unaligned UTF-16 access).
+decoder = new StringDecoder('utf16le');
+assert.strictEqual(decoder.write(Buffer.alloc(1)), '');
+assert.strictEqual(decoder.write(Buffer.alloc(20)), '\0'.repeat(10));
+assert.strictEqual(decoder.write(Buffer.alloc(48)), '\0'.repeat(24));
+assert.strictEqual(decoder.end(), '');
+
common.expectsError(
() => new StringDecoder(1),
{