deps: update v8 to 4.4.63.9

Upgrade the bundled V8 and update code in src/ and lib/ to the new API. Notable backwards incompatible changes are the removal of the smalloc module and dropped support for CESU-8 decoding. CESU-8 support can be brought back if necessary by doing UTF-8 decoding ourselves. This commit includes https://codereview.chromium.org/1192973004 to fix a build error on python 2.6 systems. The original commit log follows: Use optparse in js2c.py for python compatibility Without this change, V8 won't build on RHEL/CentOS 6 because the distro python is too old to know about the argparse module. PR-URL: https://github.com/nodejs/io.js/pull/2022 Reviewed-By: Rod Vagg <rod@vagg.org> Reviewed-By: Trevor Norris <trev.norris@gmail.com>
author: Ben Noordhuis <info@bnoordhuis.nl> 2015-06-19 13:23:56 +0200
committer: Rod Vagg <rod@vagg.org> 2015-08-04 11:56:14 -0700
commit: 70d1f32f5605465a1a630a64f6f0d35f96c7709d (patch)
tree: 0a349040a686eafcb0a09943ebc733477dce2781 /deps/v8/src/unicode.cc
parent: 4643b8b6671607a7aff60cbbd0b384dcf2f6959e (diff)
download: android-node-v8-70d1f32f5605465a1a630a64f6f0d35f96c7709d.tar.gz
android-node-v8-70d1f32f5605465a1a630a64f6f0d35f96c7709d.tar.bz2
android-node-v8-70d1f32f5605465a1a630a64f6f0d35f96c7709d.zip
1 files changed, 93 insertions, 46 deletions
diff --git a/deps/v8/src/unicode.cc b/deps/v8/src/unicode.cc
index 0d0d63d177..df45697bde 100644
--- a/deps/v8/src/unicode.cc
+++ b/deps/v8/src/unicode.cc
@@ -190,71 +190,118 @@ static int LookupMapping(const int32_t* table,
 }
 
 
-uchar Utf8::CalculateValue(const byte* str, size_t length, size_t* cursor) {
-  // We only get called for non-ASCII characters.
-  if (length == 1) {
-    *cursor += 1;
-    return kBadChar;
-  }
-  byte first = str[0];
-  byte second = str[1] ^ 0x80;
-  if (second & 0xC0) {
+static inline size_t NonASCIISequenceLength(byte first) {
+  // clang-format off
+  static const uint8_t lengths[256] = {
+      // The first 128 entries correspond to ASCII characters.
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      // The following 64 entries correspond to continuation bytes.
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      // The next are two invalid overlong encodings and 30 two-byte sequences.
+      0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+      // 16 three-byte sequences.
+      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+      // 5 four-byte sequences, followed by sequences that could only encode
+      // code points outside of the unicode range.
+      4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  // clang-format on
+  return lengths[first];
+}
+
+
+static inline bool IsContinuationCharacter(byte chr) {
+  return chr >= 0x80 && chr <= 0xBF;
+}
+
+
+// This method decodes an UTF-8 value according to RFC 3629.
+uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) {
+  size_t length = NonASCIISequenceLength(str[0]);
+  if (length == 0 || max_length < length) {
     *cursor += 1;
     return kBadChar;
   }
-  if (first < 0xE0) {
-    if (first < 0xC0) {
-      *cursor += 1;
-      return kBadChar;
-    }
-    uchar code_point = ((first << 6) | second) & kMaxTwoByteChar;
-    if (code_point <= kMaxOneByteChar) {
+  if (length == 2) {
+    if (!IsContinuationCharacter(str[1])) {
       *cursor += 1;
       return kBadChar;
     }
     *cursor += 2;
-    return code_point;
+    return ((str[0] << 6) + str[1]) - 0x00003080;
   }
-  if (length == 2) {
-    *cursor += 1;
-    return kBadChar;
-  }
-  byte third = str[2] ^ 0x80;
-  if (third & 0xC0) {
-    *cursor += 1;
-    return kBadChar;
-  }
-  if (first < 0xF0) {
-    uchar code_point = ((((first << 6) | second) << 6) | third)
-        & kMaxThreeByteChar;
-    if (code_point <= kMaxTwoByteChar) {
+  if (length == 3) {
+    switch (str[0]) {
+      case 0xE0:
+        // Overlong three-byte sequence.
+        if (str[1] < 0xA0 || str[1] > 0xBF) {
+          *cursor += 1;
+          return kBadChar;
+        }
+        break;
+      case 0xED:
+        // High and low surrogate halves.
+        if (str[1] < 0x80 || str[1] > 0x9F) {
+          *cursor += 1;
+          return kBadChar;
+        }
+        break;
+      default:
+        if (!IsContinuationCharacter(str[1])) {
+          *cursor += 1;
+          return kBadChar;
+        }
+    }
+    if (!IsContinuationCharacter(str[2])) {
       *cursor += 1;
       return kBadChar;
     }
     *cursor += 3;
-    return code_point;
+    return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080;
   }
-  if (length == 3) {
+  DCHECK(length == 4);
+  switch (str[0]) {
+    case 0xF0:
+      // Overlong four-byte sequence.
+      if (str[1] < 0x90 || str[1] > 0xBF) {
+        *cursor += 1;
+        return kBadChar;
+      }
+      break;
+    case 0xF4:
+      // Code points outside of the unicode range.
+      if (str[1] < 0x80 || str[1] > 0x8F) {
+        *cursor += 1;
+        return kBadChar;
+      }
+      break;
+    default:
+      if (!IsContinuationCharacter(str[1])) {
+        *cursor += 1;
+        return kBadChar;
+      }
+  }
+  if (!IsContinuationCharacter(str[2])) {
     *cursor += 1;
     return kBadChar;
   }
-  byte fourth = str[3] ^ 0x80;
-  if (fourth & 0xC0) {
+  if (!IsContinuationCharacter(str[3])) {
     *cursor += 1;
     return kBadChar;
   }
-  if (first < 0xF8) {
-    uchar code_point = (((((first << 6 | second) << 6) | third) << 6) | fourth)
-        & kMaxFourByteChar;
-    if (code_point <= kMaxThreeByteChar) {
-      *cursor += 1;
-      return kBadChar;
-    }
-    *cursor += 4;
-    return code_point;
-  }
-  *cursor += 1;
-  return kBadChar;
+  *cursor += 4;
+  return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) -
+         0x03C82080;
 }
author	Ben Noordhuis <info@bnoordhuis.nl>	2015-06-19 13:23:56 +0200
committer	Rod Vagg <rod@vagg.org>	2015-08-04 11:56:14 -0700
commit	70d1f32f5605465a1a630a64f6f0d35f96c7709d (patch)
tree	0a349040a686eafcb0a09943ebc733477dce2781 /deps/v8/src/unicode.cc
parent	4643b8b6671607a7aff60cbbd0b384dcf2f6959e (diff)
download	android-node-v8-70d1f32f5605465a1a630a64f6f0d35f96c7709d.tar.gz android-node-v8-70d1f32f5605465a1a630a64f6f0d35f96c7709d.tar.bz2 android-node-v8-70d1f32f5605465a1a630a64f6f0d35f96c7709d.zip