src: fix ucs-2 buffer encoding regression

StringBytes::Write() did a plain memcpy() when is_extern is true but that's wrong when the source is a two-byte string and the destination a one-byte or UTF-8 string. The impact is limited to strings > 1,031,913 bytes because those are normally the only strings that are externalized, although the use of the 'externalize strings' extension (--expose_externalize_string) can also trigger it. This commit also cleans up the bytes versus characters confusion in StringBytes::Write() because that was closely intertwined with the UCS-2 encoding regression. One wasn't fixable without the other. Fixes: https://github.com/iojs/io.js/issues/1024 Fixes: https://github.com/joyent/node/issues/8683 PR-URL: https://github.com/iojs/io.js/pull/1042 Reviewed-By: Trevor Norris <trev.norris@gmail.com>
author: Ben Noordhuis <info@bnoordhuis.nl> 2015-03-03 15:44:54 +0100
committer: Ben Noordhuis <info@bnoordhuis.nl> 2015-03-05 20:44:19 +0100
commit: 1640dedb3b2a8d6e54ba7b22290d86d5984768be (patch)
tree: 22c1954f8abfab32dcebb508efcdcaa282baed25 /src/string_bytes.cc
parent: 2eda2d609658826c559fca1944b0e6aafb9d1344 (diff)
download: android-node-v8-1640dedb3b2a8d6e54ba7b22290d86d5984768be.tar.gz
android-node-v8-1640dedb3b2a8d6e54ba7b22290d86d5984768be.tar.bz2
android-node-v8-1640dedb3b2a8d6e54ba7b22290d86d5984768be.zip
1 files changed, 36 insertions, 36 deletions
diff --git a/src/string_bytes.cc b/src/string_bytes.cc
index 1f5e592a32..4f896ace3f 100644
--- a/src/string_bytes.cc
+++ b/src/string_bytes.cc
@@ -279,13 +279,15 @@ size_t StringBytes::Write(Isolate* isolate,
                           int* chars_written) {
   HandleScope scope(isolate);
   const char* data = nullptr;
-  size_t len = 0;
-  bool is_extern = GetExternalParts(isolate, val, &data, &len);
-  size_t extlen = len;
+  size_t nbytes = 0;
+  const bool is_extern = GetExternalParts(isolate, val, &data, &nbytes);
+  const size_t external_nbytes = nbytes;
 
   CHECK(val->IsString() == true);
   Local<String> str = val.As<String>();
-  len = len < buflen ? len : buflen;
+
+  if (nbytes > buflen)
+    nbytes = buflen;
 
   int flags = String::HINT_MANY_WRITES_EXPECTED |
               String::NO_NULL_TERMINATION |
@@ -295,67 +297,65 @@ size_t StringBytes::Write(Isolate* isolate,
     case ASCII:
     case BINARY:
     case BUFFER:
-      if (is_extern)
-        memcpy(buf, data, len);
-      else
-        len = str->WriteOneByte(reinterpret_cast<uint8_t*>(buf),
-                                0,
-                                buflen,
-                                flags);
+      if (is_extern && str->IsOneByte()) {
+        memcpy(buf, data, nbytes);
+      } else {
+        uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
+        nbytes = str->WriteOneByte(dst, 0, buflen, flags);
+      }
       if (chars_written != nullptr)
-        *chars_written = len;
+        *chars_written = nbytes;
       break;
 
     case UTF8:
-      if (is_extern)
-        // TODO(tjfontaine) should this validate invalid surrogate pairs as
-        // well?
-        memcpy(buf, data, len);
-      else
-        len = str->WriteUtf8(buf, buflen, chars_written, flags);
+      nbytes = str->WriteUtf8(buf, buflen, chars_written, flags);
       break;
 
-    case UCS2:
-      if (is_extern)
-        memcpy(buf, data, len);
-      else
-        len = str->Write(reinterpret_cast<uint16_t*>(buf), 0, buflen, flags);
+    case UCS2: {
+      uint16_t* const dst = reinterpret_cast<uint16_t*>(buf);
+      size_t nchars;
+      if (is_extern && !str->IsOneByte()) {
+        memcpy(buf, data, nbytes);
+        nchars = nbytes / sizeof(*dst);
+      } else {
+        nchars = buflen / sizeof(*dst);
+        nchars = str->Write(dst, 0, nchars, flags);
+        nbytes = nchars * sizeof(*dst);
+      }
       if (IsBigEndian()) {
         // Node's "ucs2" encoding wants LE character data stored in
         // the Buffer, so we need to reorder on BE platforms.  See
         // http://nodejs.org/api/buffer.html regarding Node's "ucs2"
         // encoding specification
-        uint16_t* buf16 = reinterpret_cast<uint16_t*>(buf);
-        for (size_t i = 0; i < len; i++) {
-          buf16[i] = (buf16[i] << 8) | (buf16[i] >> 8);
-        }
+        for (size_t i = 0; i < nchars; i++)
+          dst[i] = dst[i] << 8 | dst[i] >> 8;
       }
       if (chars_written != nullptr)
-        *chars_written = len;
-      len = len * sizeof(uint16_t);
+        *chars_written = nchars;
       break;
+    }
 
     case BASE64:
       if (is_extern) {
-        len = base64_decode(buf, buflen, data, extlen);
+        nbytes = base64_decode(buf, buflen, data, external_nbytes);
       } else {
         String::Value value(str);
-        len = base64_decode(buf, buflen, *value, value.length());
+        nbytes = base64_decode(buf, buflen, *value, value.length());
       }
       if (chars_written != nullptr) {
-        *chars_written = len;
+        *chars_written = nbytes;
       }
       break;
 
     case HEX:
       if (is_extern) {
-        len = hex_decode(buf, buflen, data, extlen);
+        nbytes = hex_decode(buf, buflen, data, external_nbytes);
       } else {
         String::Value value(str);
-        len = hex_decode(buf, buflen, *value, value.length());
+        nbytes = hex_decode(buf, buflen, *value, value.length());
       }
       if (chars_written != nullptr) {
-        *chars_written = len * 2;
+        *chars_written = nbytes;
       }
       break;
 
@@ -364,7 +364,7 @@ size_t StringBytes::Write(Isolate* isolate,
       break;
   }
 
-  return len;
+  return nbytes;
 }
author	Ben Noordhuis <info@bnoordhuis.nl>	2015-03-03 15:44:54 +0100
committer	Ben Noordhuis <info@bnoordhuis.nl>	2015-03-05 20:44:19 +0100
commit	1640dedb3b2a8d6e54ba7b22290d86d5984768be (patch)
tree	22c1954f8abfab32dcebb508efcdcaa282baed25 /src/string_bytes.cc
parent	2eda2d609658826c559fca1944b0e6aafb9d1344 (diff)
download	android-node-v8-1640dedb3b2a8d6e54ba7b22290d86d5984768be.tar.gz android-node-v8-1640dedb3b2a8d6e54ba7b22290d86d5984768be.tar.bz2 android-node-v8-1640dedb3b2a8d6e54ba7b22290d86d5984768be.zip