diff options
author | Anna Henningsen <anna@addaleax.net> | 2018-08-31 16:57:03 +0200 |
---|---|---|
committer | Anna Henningsen <anna@addaleax.net> | 2018-09-03 22:33:42 +0200 |
commit | 71f633a32f1f5617c1c7b60ad936342a579bff74 (patch) | |
tree | bcfc550c80f66a630cc149fbc80f204d0ca6cc62 | |
parent | 6455bea52ba262cea8d1c706ec8a2eec192ec643 (diff) | |
download | android-node-v8-71f633a32f1f5617c1c7b60ad936342a579bff74.tar.gz android-node-v8-71f633a32f1f5617c1c7b60ad936342a579bff74.tar.bz2 android-node-v8-71f633a32f1f5617c1c7b60ad936342a579bff74.zip |
src: allow UTF-16 in generic StringBytes decode call
This allows removing a number of special cases in other
parts of the code, at least one of which was incorrect
in expecting aligned input when that was not guaranteed.
Fixes: https://github.com/nodejs/node/issues/22358
PR-URL: https://github.com/nodejs/node/pull/22622
Reviewed-By: Tobias Nießen <tniessen@tnie.de>
-rw-r--r-- | src/node.h | 1 | ||||
-rw-r--r-- | src/node_buffer.cc | 59 | ||||
-rw-r--r-- | src/string_bytes.cc | 57 | ||||
-rw-r--r-- | src/string_bytes.h | 1 | ||||
-rw-r--r-- | src/string_decoder.cc | 10 | ||||
-rw-r--r-- | test/parallel/test-string-decoder.js | 8 |
6 files changed, 40 insertions, 96 deletions
diff --git a/src/node.h b/src/node.h index 1154c39765..90e764f386 100644 --- a/src/node.h +++ b/src/node.h @@ -410,7 +410,6 @@ NODE_DEPRECATED("Use FatalException(isolate, ...)", return FatalException(v8::Isolate::GetCurrent(), try_catch); }) -// Don't call with encoding=UCS2. NODE_EXTERN v8::Local<v8::Value> Encode(v8::Isolate* isolate, const char* buf, size_t len, diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 4a29873c34..7c4c0d9f65 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -467,65 +467,6 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) { } -template <> -void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) { - Isolate* isolate = args.GetIsolate(); - Environment* env = Environment::GetCurrent(isolate); - - THROW_AND_RETURN_UNLESS_BUFFER(env, args.This()); - SPREAD_BUFFER_ARG(args.This(), ts_obj); - - if (ts_obj_length == 0) - return args.GetReturnValue().SetEmptyString(); - - SLICE_START_END(args[0], args[1], ts_obj_length) - length /= 2; - - const char* data = ts_obj_data + start; - const uint16_t* buf; - bool release = false; - - // Node's "ucs2" encoding expects LE character data inside a Buffer, so we - // need to reorder on BE platforms. See https://nodejs.org/api/buffer.html - // regarding Node's "ucs2" encoding specification. - const bool aligned = (reinterpret_cast<uintptr_t>(data) % sizeof(*buf) == 0); - if (IsLittleEndian() && !aligned) { - // Make a copy to avoid unaligned accesses in v8::String::NewFromTwoByte(). - // This applies ONLY to little endian platforms, as misalignment will be - // handled by a byte-swapping operation in StringBytes::Encode on - // big endian platforms. - uint16_t* copy = new uint16_t[length]; - for (size_t i = 0, k = 0; i < length; i += 1, k += 2) { - // Assumes that the input is little endian. - const uint8_t lo = static_cast<uint8_t>(data[k + 0]); - const uint8_t hi = static_cast<uint8_t>(data[k + 1]); - copy[i] = lo | hi << 8; - } - buf = copy; - release = true; - } else { - buf = reinterpret_cast<const uint16_t*>(data); - } - - Local<Value> error; - MaybeLocal<Value> ret = - StringBytes::Encode(isolate, - buf, - length, - &error); - - if (release) - delete[] buf; - - if (ret.IsEmpty()) { - CHECK(!error.IsEmpty()); - isolate->ThrowException(error); - return; - } - args.GetReturnValue().Set(ret.ToLocalChecked()); -} - - // bytesCopied = copy(buffer, target[, targetStart][, sourceStart][, sourceEnd]) void Copy(const FunctionCallbackInfo<Value> &args) { Environment* env = Environment::GetCurrent(args); diff --git a/src/string_bytes.cc b/src/string_bytes.cc index c38f368d41..e4c4c0a7db 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -625,7 +625,6 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate, size_t buflen, enum encoding encoding, Local<Value>* error) { - CHECK_NE(encoding, UCS2); CHECK_BUFLEN_IN_RANGE(buflen); if (!buflen && encoding != BUFFER) { @@ -703,6 +702,37 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate, return ExternOneByteString::New(isolate, dst, dlen, error); } + case UCS2: { + if (IsBigEndian()) { + uint16_t* dst = node::UncheckedMalloc<uint16_t>(buflen / 2); + if (dst == nullptr) { + *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate); + return MaybeLocal<Value>(); + } + for (size_t i = 0, k = 0; k < buflen / 2; i += 2, k += 1) { + // The input is in *little endian*, because that's what Node.js + // expects, so the high byte comes after the low byte. + const uint8_t hi = static_cast<uint8_t>(buf[i + 1]); + const uint8_t lo = static_cast<uint8_t>(buf[i + 0]); + dst[k] = static_cast<uint16_t>(hi) << 8 | lo; + } + return ExternTwoByteString::New(isolate, dst, buflen / 2, error); + } + if (reinterpret_cast<uintptr_t>(buf) % 2 != 0) { + // Unaligned data still means we can't directly pass it to V8. + char* dst = node::UncheckedMalloc(buflen); + if (dst == nullptr) { + *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate); + return MaybeLocal<Value>(); + } + memcpy(dst, buf, buflen); + return ExternTwoByteString::New( + isolate, reinterpret_cast<uint16_t*>(dst), buflen / 2, error); + } + return ExternTwoByteString::NewFromCopy( + isolate, reinterpret_cast<const uint16_t*>(buf), buflen / 2, error); + } + default: CHECK(0 && "unknown encoding"); break; @@ -742,30 +772,7 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate, enum encoding encoding, Local<Value>* error) { const size_t len = strlen(buf); - MaybeLocal<Value> ret; - if (encoding == UCS2) { - // In Node, UCS2 means utf16le. The data must be in little-endian - // order and must be aligned on 2-bytes. This returns an empty - // value if it's not aligned and ensures the appropriate byte order - // on big endian architectures. - const bool be = IsBigEndian(); - if (len % 2 != 0) - return ret; - std::vector<uint16_t> vec(len / 2); - for (size_t i = 0, k = 0; i < len; i += 2, k += 1) { - const uint8_t hi = static_cast<uint8_t>(buf[i + 0]); - const uint8_t lo = static_cast<uint8_t>(buf[i + 1]); - vec[k] = be ? - static_cast<uint16_t>(hi) << 8 | lo - : static_cast<uint16_t>(lo) << 8 | hi; - } - ret = vec.empty() ? - static_cast< Local<Value> >(String::Empty(isolate)) - : StringBytes::Encode(isolate, &vec[0], vec.size(), error); - } else { - ret = StringBytes::Encode(isolate, buf, len, encoding, error); - } - return ret; + return Encode(isolate, buf, len, encoding, error); } } // namespace node diff --git a/src/string_bytes.h b/src/string_bytes.h index 9e64fa0ee5..87a296663e 100644 --- a/src/string_bytes.h +++ b/src/string_bytes.h @@ -90,7 +90,6 @@ class StringBytes { int* chars_written = nullptr); // Take the bytes in the src, and turn it into a Buffer or String. - // Don't call with encoding=UCS2. static v8::MaybeLocal<v8::Value> Encode(v8::Isolate* isolate, const char* buf, size_t buflen, diff --git a/src/string_decoder.cc b/src/string_decoder.cc index b75169ff00..fa8201faff 100644 --- a/src/string_decoder.cc +++ b/src/string_decoder.cc @@ -30,16 +30,6 @@ MaybeLocal<String> MakeString(Isolate* isolate, data, v8::NewStringType::kNormal, length); - } else if (encoding == UCS2) { -#ifdef DEBUG - CHECK_EQ(reinterpret_cast<uintptr_t>(data) % 2, 0); - CHECK_EQ(length % 2, 0); -#endif - ret = StringBytes::Encode( - isolate, - reinterpret_cast<const uint16_t*>(data), - length / 2, - &error); } else { ret = StringBytes::Encode( isolate, diff --git a/test/parallel/test-string-decoder.js b/test/parallel/test-string-decoder.js index 5571aaeeb7..6e4f4b121d 100644 --- a/test/parallel/test-string-decoder.js +++ b/test/parallel/test-string-decoder.js @@ -143,6 +143,14 @@ decoder = new StringDecoder('utf16le'); assert.strictEqual(decoder.write(Buffer.from('3DD84D', 'hex')), '\ud83d'); assert.strictEqual(decoder.end(), ''); +// Regression test for https://github.com/nodejs/node/issues/22358 +// (unaligned UTF-16 access). +decoder = new StringDecoder('utf16le'); +assert.strictEqual(decoder.write(Buffer.alloc(1)), ''); +assert.strictEqual(decoder.write(Buffer.alloc(20)), '\0'.repeat(10)); +assert.strictEqual(decoder.write(Buffer.alloc(48)), '\0'.repeat(24)); +assert.strictEqual(decoder.end(), ''); + common.expectsError( () => new StringDecoder(1), { |