diff options
author | Ben Noordhuis <info@bnoordhuis.nl> | 2015-03-03 15:44:54 +0100 |
---|---|---|
committer | Ben Noordhuis <info@bnoordhuis.nl> | 2015-03-05 20:44:19 +0100 |
commit | 1640dedb3b2a8d6e54ba7b22290d86d5984768be (patch) | |
tree | 22c1954f8abfab32dcebb508efcdcaa282baed25 /src/string_bytes.cc | |
parent | 2eda2d609658826c559fca1944b0e6aafb9d1344 (diff) | |
download | android-node-v8-1640dedb3b2a8d6e54ba7b22290d86d5984768be.tar.gz android-node-v8-1640dedb3b2a8d6e54ba7b22290d86d5984768be.tar.bz2 android-node-v8-1640dedb3b2a8d6e54ba7b22290d86d5984768be.zip |
src: fix ucs-2 buffer encoding regression
StringBytes::Write() did a plain memcpy() when is_extern is true but
that's wrong when the source is a two-byte string and the destination
a one-byte or UTF-8 string.
The impact is limited to strings > 1,031,913 bytes because those are
normally the only strings that are externalized, although the use of
the 'externalize strings' extension (--expose_externalize_string) can
also trigger it.
This commit also cleans up the bytes versus characters confusion in
StringBytes::Write() because that was closely intertwined with the
UCS-2 encoding regression. One wasn't fixable without the other.
Fixes: https://github.com/iojs/io.js/issues/1024
Fixes: https://github.com/joyent/node/issues/8683
PR-URL: https://github.com/iojs/io.js/pull/1042
Reviewed-By: Trevor Norris <trev.norris@gmail.com>
Diffstat (limited to 'src/string_bytes.cc')
-rw-r--r-- | src/string_bytes.cc | 72 |
1 files changed, 36 insertions, 36 deletions
diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 1f5e592a32..4f896ace3f 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -279,13 +279,15 @@ size_t StringBytes::Write(Isolate* isolate, int* chars_written) { HandleScope scope(isolate); const char* data = nullptr; - size_t len = 0; - bool is_extern = GetExternalParts(isolate, val, &data, &len); - size_t extlen = len; + size_t nbytes = 0; + const bool is_extern = GetExternalParts(isolate, val, &data, &nbytes); + const size_t external_nbytes = nbytes; CHECK(val->IsString() == true); Local<String> str = val.As<String>(); - len = len < buflen ? len : buflen; + + if (nbytes > buflen) + nbytes = buflen; int flags = String::HINT_MANY_WRITES_EXPECTED | String::NO_NULL_TERMINATION | @@ -295,67 +297,65 @@ size_t StringBytes::Write(Isolate* isolate, case ASCII: case BINARY: case BUFFER: - if (is_extern) - memcpy(buf, data, len); - else - len = str->WriteOneByte(reinterpret_cast<uint8_t*>(buf), - 0, - buflen, - flags); + if (is_extern && str->IsOneByte()) { + memcpy(buf, data, nbytes); + } else { + uint8_t* const dst = reinterpret_cast<uint8_t*>(buf); + nbytes = str->WriteOneByte(dst, 0, buflen, flags); + } if (chars_written != nullptr) - *chars_written = len; + *chars_written = nbytes; break; case UTF8: - if (is_extern) - // TODO(tjfontaine) should this validate invalid surrogate pairs as - // well? - memcpy(buf, data, len); - else - len = str->WriteUtf8(buf, buflen, chars_written, flags); + nbytes = str->WriteUtf8(buf, buflen, chars_written, flags); break; - case UCS2: - if (is_extern) - memcpy(buf, data, len); - else - len = str->Write(reinterpret_cast<uint16_t*>(buf), 0, buflen, flags); + case UCS2: { + uint16_t* const dst = reinterpret_cast<uint16_t*>(buf); + size_t nchars; + if (is_extern && !str->IsOneByte()) { + memcpy(buf, data, nbytes); + nchars = nbytes / sizeof(*dst); + } else { + nchars = buflen / sizeof(*dst); + nchars = str->Write(dst, 0, nchars, flags); + nbytes = nchars * sizeof(*dst); + } if (IsBigEndian()) { // Node's "ucs2" encoding wants LE character data stored in // the Buffer, so we need to reorder on BE platforms. See // http://nodejs.org/api/buffer.html regarding Node's "ucs2" // encoding specification - uint16_t* buf16 = reinterpret_cast<uint16_t*>(buf); - for (size_t i = 0; i < len; i++) { - buf16[i] = (buf16[i] << 8) | (buf16[i] >> 8); - } + for (size_t i = 0; i < nchars; i++) + dst[i] = dst[i] << 8 | dst[i] >> 8; } if (chars_written != nullptr) - *chars_written = len; - len = len * sizeof(uint16_t); + *chars_written = nchars; break; + } case BASE64: if (is_extern) { - len = base64_decode(buf, buflen, data, extlen); + nbytes = base64_decode(buf, buflen, data, external_nbytes); } else { String::Value value(str); - len = base64_decode(buf, buflen, *value, value.length()); + nbytes = base64_decode(buf, buflen, *value, value.length()); } if (chars_written != nullptr) { - *chars_written = len; + *chars_written = nbytes; } break; case HEX: if (is_extern) { - len = hex_decode(buf, buflen, data, extlen); + nbytes = hex_decode(buf, buflen, data, external_nbytes); } else { String::Value value(str); - len = hex_decode(buf, buflen, *value, value.length()); + nbytes = hex_decode(buf, buflen, *value, value.length()); } if (chars_written != nullptr) { - *chars_written = len * 2; + *chars_written = nbytes; } break; @@ -364,7 +364,7 @@ size_t StringBytes::Write(Isolate* isolate, break; } - return len; + return nbytes; } |