From 1640dedb3b2a8d6e54ba7b22290d86d5984768be Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Tue, 3 Mar 2015 15:44:54 +0100 Subject: src: fix ucs-2 buffer encoding regression StringBytes::Write() did a plain memcpy() when is_extern is true but that's wrong when the source is a two-byte string and the destination a one-byte or UTF-8 string. The impact is limited to strings > 1,031,913 bytes because those are normally the only strings that are externalized, although the use of the 'externalize strings' extension (--expose_externalize_string) can also trigger it. This commit also cleans up the bytes versus characters confusion in StringBytes::Write() because that was closely intertwined with the UCS-2 encoding regression. One wasn't fixable without the other. Fixes: https://github.com/iojs/io.js/issues/1024 Fixes: https://github.com/joyent/node/issues/8683 PR-URL: https://github.com/iojs/io.js/pull/1042 Reviewed-By: Trevor Norris --- src/string_bytes.cc | 72 ++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'src/string_bytes.cc') diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 1f5e592a32..4f896ace3f 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -279,13 +279,15 @@ size_t StringBytes::Write(Isolate* isolate, int* chars_written) { HandleScope scope(isolate); const char* data = nullptr; - size_t len = 0; - bool is_extern = GetExternalParts(isolate, val, &data, &len); - size_t extlen = len; + size_t nbytes = 0; + const bool is_extern = GetExternalParts(isolate, val, &data, &nbytes); + const size_t external_nbytes = nbytes; CHECK(val->IsString() == true); Local str = val.As(); - len = len < buflen ? len : buflen; + + if (nbytes > buflen) + nbytes = buflen; int flags = String::HINT_MANY_WRITES_EXPECTED | String::NO_NULL_TERMINATION | @@ -295,67 +297,65 @@ size_t StringBytes::Write(Isolate* isolate, case ASCII: case BINARY: case BUFFER: - if (is_extern) - memcpy(buf, data, len); - else - len = str->WriteOneByte(reinterpret_cast(buf), - 0, - buflen, - flags); + if (is_extern && str->IsOneByte()) { + memcpy(buf, data, nbytes); + } else { + uint8_t* const dst = reinterpret_cast(buf); + nbytes = str->WriteOneByte(dst, 0, buflen, flags); + } if (chars_written != nullptr) - *chars_written = len; + *chars_written = nbytes; break; case UTF8: - if (is_extern) - // TODO(tjfontaine) should this validate invalid surrogate pairs as - // well? - memcpy(buf, data, len); - else - len = str->WriteUtf8(buf, buflen, chars_written, flags); + nbytes = str->WriteUtf8(buf, buflen, chars_written, flags); break; - case UCS2: - if (is_extern) - memcpy(buf, data, len); - else - len = str->Write(reinterpret_cast(buf), 0, buflen, flags); + case UCS2: { + uint16_t* const dst = reinterpret_cast(buf); + size_t nchars; + if (is_extern && !str->IsOneByte()) { + memcpy(buf, data, nbytes); + nchars = nbytes / sizeof(*dst); + } else { + nchars = buflen / sizeof(*dst); + nchars = str->Write(dst, 0, nchars, flags); + nbytes = nchars * sizeof(*dst); + } if (IsBigEndian()) { // Node's "ucs2" encoding wants LE character data stored in // the Buffer, so we need to reorder on BE platforms. See // http://nodejs.org/api/buffer.html regarding Node's "ucs2" // encoding specification - uint16_t* buf16 = reinterpret_cast(buf); - for (size_t i = 0; i < len; i++) { - buf16[i] = (buf16[i] << 8) | (buf16[i] >> 8); - } + for (size_t i = 0; i < nchars; i++) + dst[i] = dst[i] << 8 | dst[i] >> 8; } if (chars_written != nullptr) - *chars_written = len; - len = len * sizeof(uint16_t); + *chars_written = nchars; break; + } case BASE64: if (is_extern) { - len = base64_decode(buf, buflen, data, extlen); + nbytes = base64_decode(buf, buflen, data, external_nbytes); } else { String::Value value(str); - len = base64_decode(buf, buflen, *value, value.length()); + nbytes = base64_decode(buf, buflen, *value, value.length()); } if (chars_written != nullptr) { - *chars_written = len; + *chars_written = nbytes; } break; case HEX: if (is_extern) { - len = hex_decode(buf, buflen, data, extlen); + nbytes = hex_decode(buf, buflen, data, external_nbytes); } else { String::Value value(str); - len = hex_decode(buf, buflen, *value, value.length()); + nbytes = hex_decode(buf, buflen, *value, value.length()); } if (chars_written != nullptr) { - *chars_written = len * 2; + *chars_written = nbytes; } break; @@ -364,7 +364,7 @@ size_t StringBytes::Write(Isolate* isolate, break; } - return len; + return nbytes; } -- cgit v1.2.3