diff options
author | Michaël Zasso <targos@protonmail.com> | 2019-08-01 08:38:30 +0200 |
---|---|---|
committer | Michaël Zasso <targos@protonmail.com> | 2019-08-01 12:53:56 +0200 |
commit | 2dcc3665abf57c3607cebffdeeca062f5894885d (patch) | |
tree | 4f560748132edcfb4c22d6f967a7e80d23d7ea2c /deps/v8/src/inspector | |
parent | 1ee47d550c6de132f06110aa13eceb7551d643b3 (diff) | |
download | android-node-v8-2dcc3665abf57c3607cebffdeeca062f5894885d.tar.gz android-node-v8-2dcc3665abf57c3607cebffdeeca062f5894885d.tar.bz2 android-node-v8-2dcc3665abf57c3607cebffdeeca062f5894885d.zip |
deps: update V8 to 7.6.303.28
PR-URL: https://github.com/nodejs/node/pull/28016
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: Refael Ackermann (רפאל פלחי) <refack@gmail.com>
Reviewed-By: Rich Trott <rtrott@gmail.com>
Reviewed-By: Michael Dawson <michael_dawson@ca.ibm.com>
Reviewed-By: Jiawen Geng <technicalcute@gmail.com>
Diffstat (limited to 'deps/v8/src/inspector')
20 files changed, 699 insertions, 419 deletions
diff --git a/deps/v8/src/inspector/BUILD.gn b/deps/v8/src/inspector/BUILD.gn index b3e328a69d..863940ef4b 100644 --- a/deps/v8/src/inspector/BUILD.gn +++ b/deps/v8/src/inspector/BUILD.gn @@ -80,9 +80,22 @@ v8_header_set("inspector_test_headers") { ] } +v8_source_set("inspector_string_conversions") { + sources = [ + "v8-string-conversions.cc", + "v8-string-conversions.h", + ] + configs = [ "../..:internal_config_base" ] + deps = [ + "../..:v8_libbase", + ] +} + v8_source_set("inspector") { deps = [ + ":inspector_string_conversions", "../..:v8_version", + "../../third_party/inspector_protocol:encoding", ] public_deps = [ @@ -130,6 +143,8 @@ v8_source_set("inspector") { "v8-heap-profiler-agent-impl.h", "v8-inspector-impl.cc", "v8-inspector-impl.h", + "v8-inspector-protocol-encoding.cc", + "v8-inspector-protocol-encoding.h", "v8-inspector-session-impl.cc", "v8-inspector-session-impl.h", "v8-profiler-agent-impl.cc", diff --git a/deps/v8/src/inspector/DEPS b/deps/v8/src/inspector/DEPS index 330371a82c..5122d5d997 100644 --- a/deps/v8/src/inspector/DEPS +++ b/deps/v8/src/inspector/DEPS @@ -9,10 +9,15 @@ include_rules = [ "+src/base/platform/mutex.h", "+src/base/safe_conversions.h", "+src/base/v8-fallthrough.h", - "+src/conversions.h", - "+src/v8memory.h", + "+src/common/v8memory.h", + "+src/numbers/conversions.h", "+src/inspector", "+src/tracing", "+src/debug/debug-interface.h", "+src/debug/interface-types.h", + "+src/utils/vector.h", + "+third_party/inspector_protocol/encoding/encoding.h", + "+third_party/inspector_protocol/encoding/encoding.cc", + "+../../third_party/inspector_protocol/encoding/encoding.h", + "+../../third_party/inspector_protocol/encoding/encoding.cc", ] diff --git a/deps/v8/src/inspector/OWNERS b/deps/v8/src/inspector/OWNERS index a42adce782..55f8ac7875 100644 --- a/deps/v8/src/inspector/OWNERS +++ b/deps/v8/src/inspector/OWNERS @@ -13,7 +13,6 @@ per-file js_protocol.pdl=set noparent per-file js_protocol.pdl=dgozman@chromium.org per-file js_protocol.pdl=pfeldman@chromium.org -per-file PRESUBMIT.py=machenbach@chromium.org -per-file PRESUBMIT.py=sergiyb@chromium.org +per-file PRESUBMIT.py=file://INFRA_OWNERS # COMPONENT: Platform>DevTools>JavaScript diff --git a/deps/v8/src/inspector/js_protocol.pdl b/deps/v8/src/inspector/js_protocol.pdl index e4715f47ef..c4ff51b060 100644 --- a/deps/v8/src/inspector/js_protocol.pdl +++ b/deps/v8/src/inspector/js_protocol.pdl @@ -317,6 +317,17 @@ domain Debugger # Location this breakpoint resolved into. Location actualLocation + # Sets instrumentation breakpoint. + command setInstrumentationBreakpoint + parameters + # Instrumentation name. + enum instrumentation + beforeScriptExecution + beforeScriptWithSourceMapExecution + returns + # Id of the created breakpoint for further reference. + BreakpointId breakpointId + # Sets JavaScript breakpoint at given location specified either by URL or URL regex. Once this # command is issued, all existing parsed scripts will have breakpoints resolved and returned in # `locations` property. Further matching script parsing will result in subsequent @@ -449,16 +460,17 @@ domain Debugger array of CallFrame callFrames # Pause reason. enum reason - XHR + ambiguous + assert + debugCommand DOM EventListener exception - assert - debugCommand - promiseRejection + instrumentation OOM other - ambiguous + promiseRejection + XHR # Object containing break-specific auxiliary properties. optional object data # Hit breakpoints IDs diff --git a/deps/v8/src/inspector/string-16.cc b/deps/v8/src/inspector/string-16.cc index 303987dede..acf0159f27 100644 --- a/deps/v8/src/inspector/string-16.cc +++ b/deps/v8/src/inspector/string-16.cc @@ -6,14 +6,15 @@ #include <algorithm> #include <cctype> +#include <cinttypes> #include <cstdlib> #include <cstring> #include <limits> #include <string> #include "src/base/platform/platform.h" -#include "src/base/v8-fallthrough.h" -#include "src/conversions.h" +#include "src/inspector/v8-string-conversions.h" +#include "src/numbers/conversions.h" namespace v8_inspector { @@ -44,331 +45,6 @@ int64_t charactersToInteger(const UChar* characters, size_t length, if (ok) *ok = !(*endptr); return result; } - -const UChar replacementCharacter = 0xFFFD; -using UChar32 = uint32_t; - -inline int inlineUTF8SequenceLengthNonASCII(char b0) { - if ((b0 & 0xC0) != 0xC0) return 0; - if ((b0 & 0xE0) == 0xC0) return 2; - if ((b0 & 0xF0) == 0xE0) return 3; - if ((b0 & 0xF8) == 0xF0) return 4; - return 0; -} - -inline int inlineUTF8SequenceLength(char b0) { - return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0); -} - -// Once the bits are split out into bytes of UTF-8, this is a mask OR-ed -// into the first byte, depending on how many bytes follow. There are -// as many entries in this table as there are UTF-8 sequence types. -// (I.e., one byte sequence, two byte... etc.). Remember that sequences -// for *legal* UTF-8 will be 4 or fewer bytes total. -static const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, - 0xF0, 0xF8, 0xFC}; - -typedef enum { - conversionOK, // conversion successful - sourceExhausted, // partial character in source, but hit end - targetExhausted, // insuff. room in target for conversion - sourceIllegal // source sequence is illegal/malformed -} ConversionResult; - -ConversionResult convertUTF16ToUTF8(const UChar** sourceStart, - const UChar* sourceEnd, char** targetStart, - char* targetEnd, bool strict) { - ConversionResult result = conversionOK; - const UChar* source = *sourceStart; - char* target = *targetStart; - while (source < sourceEnd) { - UChar32 ch; - uint32_t bytesToWrite = 0; - const UChar32 byteMask = 0xBF; - const UChar32 byteMark = 0x80; - const UChar* oldSource = - source; // In case we have to back up because of target overflow. - ch = static_cast<uint16_t>(*source++); - // If we have a surrogate pair, convert to UChar32 first. - if (ch >= 0xD800 && ch <= 0xDBFF) { - // If the 16 bits following the high surrogate are in the source buffer... - if (source < sourceEnd) { - UChar32 ch2 = static_cast<uint16_t>(*source); - // If it's a low surrogate, convert to UChar32. - if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { - ch = ((ch - 0xD800) << 10) + (ch2 - 0xDC00) + 0x0010000; - ++source; - } else if (strict) { // it's an unpaired high surrogate - --source; // return to the illegal value itself - result = sourceIllegal; - break; - } - } else { // We don't have the 16 bits following the high surrogate. - --source; // return to the high surrogate - result = sourceExhausted; - break; - } - } else if (strict) { - // UTF-16 surrogate values are illegal in UTF-32 - if (ch >= 0xDC00 && ch <= 0xDFFF) { - --source; // return to the illegal value itself - result = sourceIllegal; - break; - } - } - // Figure out how many bytes the result will require - if (ch < static_cast<UChar32>(0x80)) { - bytesToWrite = 1; - } else if (ch < static_cast<UChar32>(0x800)) { - bytesToWrite = 2; - } else if (ch < static_cast<UChar32>(0x10000)) { - bytesToWrite = 3; - } else if (ch < static_cast<UChar32>(0x110000)) { - bytesToWrite = 4; - } else { - bytesToWrite = 3; - ch = replacementCharacter; - } - - target += bytesToWrite; - if (target > targetEnd) { - source = oldSource; // Back up source pointer! - target -= bytesToWrite; - result = targetExhausted; - break; - } - switch (bytesToWrite) { - case 4: - *--target = static_cast<char>((ch | byteMark) & byteMask); - ch >>= 6; - V8_FALLTHROUGH; - case 3: - *--target = static_cast<char>((ch | byteMark) & byteMask); - ch >>= 6; - V8_FALLTHROUGH; - case 2: - *--target = static_cast<char>((ch | byteMark) & byteMask); - ch >>= 6; - V8_FALLTHROUGH; - case 1: - *--target = static_cast<char>(ch | firstByteMark[bytesToWrite]); - } - target += bytesToWrite; - } - *sourceStart = source; - *targetStart = target; - return result; -} - -/** - * Is this code point a BMP code point (U+0000..U+ffff)? - * @param c 32-bit code point - * @return TRUE or FALSE - * @stable ICU 2.8 - */ -#define U_IS_BMP(c) ((uint32_t)(c) <= 0xFFFF) - -/** - * Is this code point a supplementary code point (U+010000..U+10FFFF)? - * @param c 32-bit code point - * @return TRUE or FALSE - * @stable ICU 2.8 - */ -#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x010000) <= 0xFFFFF) - -/** - * Is this code point a surrogate (U+d800..U+dfff)? - * @param c 32-bit code point - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U_IS_SURROGATE(c) (((c)&0xFFFFF800) == 0xD800) - -/** - * Get the lead surrogate (0xD800..0xDBFF) for a - * supplementary code point (0x010000..0x10FFFF). - * @param supplementary 32-bit code point (U+010000..U+10FFFF) - * @return lead surrogate (U+D800..U+DBFF) for supplementary - * @stable ICU 2.4 - */ -#define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xD7C0) - -/** - * Get the trail surrogate (0xDC00..0xDFFF) for a - * supplementary code point (0x010000..0x10FFFF). - * @param supplementary 32-bit code point (U+010000..U+10FFFF) - * @return trail surrogate (U+DC00..U+DFFF) for supplementary - * @stable ICU 2.4 - */ -#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3FF) | 0xDC00) - -// This must be called with the length pre-determined by the first byte. -// If presented with a length > 4, this returns false. The Unicode -// definition of UTF-8 goes up to 4-byte sequences. -static bool isLegalUTF8(const unsigned char* source, int length) { - unsigned char a; - const unsigned char* srcptr = source + length; - switch (length) { - default: - return false; - // Everything else falls through when "true"... - case 4: - if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; - V8_FALLTHROUGH; - case 3: - if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; - V8_FALLTHROUGH; - case 2: - if ((a = (*--srcptr)) > 0xBF) return false; - - // no fall-through in this inner switch - switch (*source) { - case 0xE0: - if (a < 0xA0) return false; - break; - case 0xED: - if (a > 0x9F) return false; - break; - case 0xF0: - if (a < 0x90) return false; - break; - case 0xF4: - if (a > 0x8F) return false; - break; - default: - if (a < 0x80) return false; - } - V8_FALLTHROUGH; - - case 1: - if (*source >= 0x80 && *source < 0xC2) return false; - } - if (*source > 0xF4) return false; - return true; -} - -// Magic values subtracted from a buffer value during UTF8 conversion. -// This table contains as many values as there might be trailing bytes -// in a UTF-8 sequence. -static const UChar32 offsetsFromUTF8[6] = {0x00000000UL, - 0x00003080UL, - 0x000E2080UL, - 0x03C82080UL, - static_cast<UChar32>(0xFA082080UL), - static_cast<UChar32>(0x82082080UL)}; - -static inline UChar32 readUTF8Sequence(const char*& sequence, size_t length) { - UChar32 character = 0; - - // The cases all fall through. - switch (length) { - case 6: - character += static_cast<unsigned char>(*sequence++); - character <<= 6; - V8_FALLTHROUGH; - case 5: - character += static_cast<unsigned char>(*sequence++); - character <<= 6; - V8_FALLTHROUGH; - case 4: - character += static_cast<unsigned char>(*sequence++); - character <<= 6; - V8_FALLTHROUGH; - case 3: - character += static_cast<unsigned char>(*sequence++); - character <<= 6; - V8_FALLTHROUGH; - case 2: - character += static_cast<unsigned char>(*sequence++); - character <<= 6; - V8_FALLTHROUGH; - case 1: - character += static_cast<unsigned char>(*sequence++); - } - - return character - offsetsFromUTF8[length - 1]; -} - -ConversionResult convertUTF8ToUTF16(const char** sourceStart, - const char* sourceEnd, UChar** targetStart, - UChar* targetEnd, bool* sourceAllASCII, - bool strict) { - ConversionResult result = conversionOK; - const char* source = *sourceStart; - UChar* target = *targetStart; - UChar orAllData = 0; - while (source < sourceEnd) { - int utf8SequenceLength = inlineUTF8SequenceLength(*source); - if (sourceEnd - source < utf8SequenceLength) { - result = sourceExhausted; - break; - } - // Do this check whether lenient or strict - if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), - utf8SequenceLength)) { - result = sourceIllegal; - break; - } - - UChar32 character = readUTF8Sequence(source, utf8SequenceLength); - - if (target >= targetEnd) { - source -= utf8SequenceLength; // Back up source pointer! - result = targetExhausted; - break; - } - - if (U_IS_BMP(character)) { - // UTF-16 surrogate values are illegal in UTF-32 - if (U_IS_SURROGATE(character)) { - if (strict) { - source -= utf8SequenceLength; // return to the illegal value itself - result = sourceIllegal; - break; - } - *target++ = replacementCharacter; - orAllData |= replacementCharacter; - } else { - *target++ = static_cast<UChar>(character); // normal case - orAllData |= character; - } - } else if (U_IS_SUPPLEMENTARY(character)) { - // target is a character in range 0xFFFF - 0x10FFFF - if (target + 1 >= targetEnd) { - source -= utf8SequenceLength; // Back up source pointer! - result = targetExhausted; - break; - } - *target++ = U16_LEAD(character); - *target++ = U16_TRAIL(character); - orAllData = 0xFFFF; - } else { - if (strict) { - source -= utf8SequenceLength; // return to the start - result = sourceIllegal; - break; // Bail out; shouldn't continue - } else { - *target++ = replacementCharacter; - orAllData |= replacementCharacter; - } - } - } - *sourceStart = source; - *targetStart = target; - - if (sourceAllASCII) *sourceAllASCII = !(orAllData & ~0x7F); - - return result; -} - -// Helper to write a three-byte UTF-8 code point to the buffer, caller must -// check room is available. -static inline void putUTF8Triple(char*& buffer, UChar ch) { - *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); - *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); - *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); -} - } // namespace String16::String16(const UChar* characters, size_t size) @@ -386,6 +62,8 @@ String16::String16(const char* characters, size_t size) { String16::String16(const std::basic_string<UChar>& impl) : m_impl(impl) {} +String16::String16(std::basic_string<UChar>&& impl) : m_impl(impl) {} + // static String16 String16::fromInteger(int number) { char arr[50]; @@ -519,65 +197,11 @@ void String16Builder::reserveCapacity(size_t capacity) { } String16 String16::fromUTF8(const char* stringStart, size_t length) { - if (!stringStart || !length) return String16(); - - std::vector<UChar> buffer(length); - UChar* bufferStart = buffer.data(); - - UChar* bufferCurrent = bufferStart; - const char* stringCurrent = stringStart; - if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent, - bufferCurrent + buffer.size(), nullptr, - true) != conversionOK) - return String16(); - - size_t utf16Length = bufferCurrent - bufferStart; - return String16(bufferStart, utf16Length); + return String16(UTF8ToUTF16(stringStart, length)); } std::string String16::utf8() const { - size_t length = this->length(); - - if (!length) return std::string(""); - - // Allocate a buffer big enough to hold all the characters - // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). - // Optimization ideas, if we find this function is hot: - // * We could speculatively create a CStringBuffer to contain 'length' - // characters, and resize if necessary (i.e. if the buffer contains - // non-ascii characters). (Alternatively, scan the buffer first for - // ascii characters, so we know this will be sufficient). - // * We could allocate a CStringBuffer with an appropriate size to - // have a good chance of being able to write the string into the - // buffer without reallocing (say, 1.5 x length). - if (length > std::numeric_limits<unsigned>::max() / 3) return std::string(); - - std::string output(length * 3, '\0'); - const UChar* characters = m_impl.data(); - const UChar* characters_end = characters + length; - char* buffer = &*output.begin(); - char* buffer_end = &*output.end(); - while (characters < characters_end) { - // Use strict conversion to detect unpaired surrogates. - ConversionResult result = convertUTF16ToUTF8( - &characters, characters_end, &buffer, buffer_end, /* strict= */ true); - DCHECK_NE(result, targetExhausted); - // Conversion fails when there is an unpaired surrogate. Put - // replacement character (U+FFFD) instead of the unpaired - // surrogate. - if (result != conversionOK) { - DCHECK_LE(0xD800, *characters); - DCHECK_LE(*characters, 0xDFFF); - // There should be room left, since one UChar hasn't been - // converted. - DCHECK_LE(buffer + 3, buffer_end); - putUTF8Triple(buffer, replacementCharacter); - ++characters; - } - } - - output.resize(buffer - output.data()); - return output; + return UTF16ToUTF8(m_impl.data(), m_impl.size()); } } // namespace v8_inspector diff --git a/deps/v8/src/inspector/string-16.h b/deps/v8/src/inspector/string-16.h index 56df993332..1b475a10a6 100644 --- a/deps/v8/src/inspector/string-16.h +++ b/deps/v8/src/inspector/string-16.h @@ -30,6 +30,7 @@ class String16 { String16(const char* characters); // NOLINT(runtime/explicit) String16(const char* characters, size_t size); explicit String16(const std::basic_string<UChar>& impl); + explicit String16(std::basic_string<UChar>&& impl); String16& operator=(const String16&) V8_NOEXCEPT = default; String16& operator=(String16&&) V8_NOEXCEPT = default; diff --git a/deps/v8/src/inspector/string-util.cc b/deps/v8/src/inspector/string-util.cc index 4dfe8ad352..e81c04d66f 100644 --- a/deps/v8/src/inspector/string-util.cc +++ b/deps/v8/src/inspector/string-util.cc @@ -4,11 +4,12 @@ #include "src/inspector/string-util.h" +#include <cinttypes> #include <cmath> #include "src/base/platform/platform.h" -#include "src/conversions.h" #include "src/inspector/protocol/Protocol.h" +#include "src/numbers/conversions.h" namespace v8_inspector { diff --git a/deps/v8/src/inspector/v8-console.cc b/deps/v8/src/inspector/v8-console.cc index 8aa2102dd7..37b1d5c7a9 100644 --- a/deps/v8/src/inspector/v8-console.cc +++ b/deps/v8/src/inspector/v8-console.cc @@ -124,7 +124,6 @@ class ConsoleHelper { return defaultValue; } v8::Local<v8::String> titleValue; - v8::TryCatch tryCatch(m_context->GetIsolate()); if (!m_info[0]->ToString(m_context).ToLocal(&titleValue)) return defaultValue; return toProtocolString(m_context->GetIsolate(), titleValue); diff --git a/deps/v8/src/inspector/v8-debugger-agent-impl.cc b/deps/v8/src/inspector/v8-debugger-agent-impl.cc index 9fd9e47086..3301838587 100644 --- a/deps/v8/src/inspector/v8-debugger-agent-impl.cc +++ b/deps/v8/src/inspector/v8-debugger-agent-impl.cc @@ -31,10 +31,13 @@ using protocol::Array; using protocol::Maybe; using protocol::Debugger::BreakpointId; using protocol::Debugger::CallFrame; +using protocol::Debugger::Scope; using protocol::Runtime::ExceptionDetails; -using protocol::Runtime::ScriptId; using protocol::Runtime::RemoteObject; -using protocol::Debugger::Scope; +using protocol::Runtime::ScriptId; + +namespace InstrumentationEnum = + protocol::Debugger::SetInstrumentationBreakpoint::InstrumentationEnum; namespace DebuggerAgentState { static const char pauseOnExceptionsState[] = "pauseOnExceptionsState"; @@ -47,6 +50,7 @@ static const char breakpointsByRegex[] = "breakpointsByRegex"; static const char breakpointsByUrl[] = "breakpointsByUrl"; static const char breakpointsByScriptHash[] = "breakpointsByScriptHash"; static const char breakpointHints[] = "breakpointHints"; +static const char instrumentationBreakpoints[] = "instrumentationBreakpoints"; } // namespace DebuggerAgentState @@ -80,7 +84,8 @@ enum class BreakpointType { kByScriptId, kDebugCommand, kMonitorCommand, - kBreakpointAtEntry + kBreakpointAtEntry, + kInstrumentationBreakpoint }; String16 generateBreakpointId(BreakpointType type, @@ -106,6 +111,15 @@ String16 generateBreakpointId(BreakpointType type, return builder.toString(); } +String16 generateInstrumentationBreakpointId(const String16& instrumentation) { + String16Builder builder; + builder.appendNumber( + static_cast<int>(BreakpointType::kInstrumentationBreakpoint)); + builder.append(':'); + builder.append(instrumentation); + return builder.toString(); +} + bool parseBreakpointId(const String16& breakpointId, BreakpointType* type, String16* scriptSelector = nullptr, int* lineNumber = nullptr, int* columnNumber = nullptr) { @@ -114,14 +128,15 @@ bool parseBreakpointId(const String16& breakpointId, BreakpointType* type, int rawType = breakpointId.substring(0, typeLineSeparator).toInteger(); if (rawType < static_cast<int>(BreakpointType::kByUrl) || - rawType > static_cast<int>(BreakpointType::kBreakpointAtEntry)) { + rawType > static_cast<int>(BreakpointType::kInstrumentationBreakpoint)) { return false; } if (type) *type = static_cast<BreakpointType>(rawType); if (rawType == static_cast<int>(BreakpointType::kDebugCommand) || rawType == static_cast<int>(BreakpointType::kMonitorCommand) || - rawType == static_cast<int>(BreakpointType::kBreakpointAtEntry)) { - // The script and source position is not encoded in this case. + rawType == static_cast<int>(BreakpointType::kBreakpointAtEntry) || + rawType == static_cast<int>(BreakpointType::kInstrumentationBreakpoint)) { + // The script and source position are not encoded in this case. return true; } @@ -356,6 +371,7 @@ Response V8DebuggerAgentImpl::disable() { m_state->remove(DebuggerAgentState::breakpointsByUrl); m_state->remove(DebuggerAgentState::breakpointsByScriptHash); m_state->remove(DebuggerAgentState::breakpointHints); + m_state->remove(DebuggerAgentState::instrumentationBreakpoints); m_state->setInteger(DebuggerAgentState::pauseOnExceptionsState, v8::debug::NoBreakOnException); @@ -506,7 +522,6 @@ Response V8DebuggerAgentImpl::setBreakpointByUrl( break; default: UNREACHABLE(); - break; } if (breakpoints->get(breakpointId)) { return Response::Error("Breakpoint at specified location already exists."); @@ -580,6 +595,20 @@ Response V8DebuggerAgentImpl::setBreakpointOnFunctionCall( return Response::OK(); } +Response V8DebuggerAgentImpl::setInstrumentationBreakpoint( + const String16& instrumentation, String16* outBreakpointId) { + if (!enabled()) return Response::Error(kDebuggerNotEnabled); + String16 breakpointId = generateInstrumentationBreakpointId(instrumentation); + protocol::DictionaryValue* breakpoints = getOrCreateObject( + m_state, DebuggerAgentState::instrumentationBreakpoints); + if (breakpoints->get(breakpointId)) { + return Response::Error("Instrumentation breakpoint is already enabled."); + } + breakpoints->setBoolean(breakpointId, true); + *outBreakpointId = breakpointId; + return Response::OK(); +} + Response V8DebuggerAgentImpl::removeBreakpoint(const String16& breakpointId) { if (!enabled()) return Response::Error(kDebuggerNotEnabled); BreakpointType type; @@ -606,6 +635,10 @@ Response V8DebuggerAgentImpl::removeBreakpoint(const String16& breakpointId) { case BreakpointType::kByUrlRegex: breakpoints = m_state->getObject(DebuggerAgentState::breakpointsByRegex); break; + case BreakpointType::kInstrumentationBreakpoint: + breakpoints = + m_state->getObject(DebuggerAgentState::instrumentationBreakpoints); + break; default: break; } @@ -1496,6 +1529,40 @@ void V8DebuggerAgentImpl::didParseSource( m_frontend.breakpointResolved(breakpointId, std::move(location)); } } + setScriptInstrumentationBreakpointIfNeeded(scriptRef); +} + +void V8DebuggerAgentImpl::setScriptInstrumentationBreakpointIfNeeded( + V8DebuggerScript* scriptRef) { + protocol::DictionaryValue* breakpoints = + m_state->getObject(DebuggerAgentState::instrumentationBreakpoints); + if (!breakpoints) return; + bool isBlackboxed = isFunctionBlackboxed( + scriptRef->scriptId(), v8::debug::Location(0, 0), + v8::debug::Location(scriptRef->endLine(), scriptRef->endColumn())); + if (isBlackboxed) return; + + String16 sourceMapURL = scriptRef->sourceMappingURL(); + String16 breakpointId = generateInstrumentationBreakpointId( + InstrumentationEnum::BeforeScriptExecution); + if (!breakpoints->get(breakpointId)) { + if (sourceMapURL.isEmpty()) return; + breakpointId = generateInstrumentationBreakpointId( + InstrumentationEnum::BeforeScriptWithSourceMapExecution); + if (!breakpoints->get(breakpointId)) return; + } + v8::debug::BreakpointId debuggerBreakpointId; + if (!scriptRef->setBreakpointOnRun(&debuggerBreakpointId)) return; + std::unique_ptr<protocol::DictionaryValue> data = + protocol::DictionaryValue::create(); + data->setString("url", scriptRef->sourceURL()); + data->setString("scriptId", scriptRef->scriptId()); + if (!sourceMapURL.isEmpty()) data->setString("sourceMapURL", sourceMapURL); + + m_breakpointsOnScriptRun[debuggerBreakpointId] = std::move(data); + m_debuggerBreakpointIdToBreakpointId[debuggerBreakpointId] = breakpointId; + m_breakpointIdToDebuggerBreakpointIds[breakpointId].push_back( + debuggerBreakpointId); } void V8DebuggerAgentImpl::didPause( @@ -1539,6 +1606,14 @@ void V8DebuggerAgentImpl::didPause( std::unique_ptr<Array<String16>> hitBreakpointIds = Array<String16>::create(); for (const auto& id : hitBreakpoints) { + auto it = m_breakpointsOnScriptRun.find(id); + if (it != m_breakpointsOnScriptRun.end()) { + hitReasons.push_back(std::make_pair( + protocol::Debugger::Paused::ReasonEnum::Instrumentation, + std::move(it->second))); + m_breakpointsOnScriptRun.erase(it); + continue; + } auto breakpointIterator = m_debuggerBreakpointIdToBreakpointId.find(id); if (breakpointIterator == m_debuggerBreakpointIdToBreakpointId.end()) { continue; diff --git a/deps/v8/src/inspector/v8-debugger-agent-impl.h b/deps/v8/src/inspector/v8-debugger-agent-impl.h index bd781c7017..0a5a169907 100644 --- a/deps/v8/src/inspector/v8-debugger-agent-impl.h +++ b/deps/v8/src/inspector/v8-debugger-agent-impl.h @@ -60,6 +60,8 @@ class V8DebuggerAgentImpl : public protocol::Debugger::Backend { Response setBreakpointOnFunctionCall(const String16& functionObjectId, Maybe<String16> optionalCondition, String16* outBreakpointId) override; + Response setInstrumentationBreakpoint(const String16& instrumentation, + String16* outBreakpointId) override; Response removeBreakpoint(const String16& breakpointId) override; Response continueToLocation(std::unique_ptr<protocol::Debugger::Location>, Maybe<String16> targetCallFrames) override; @@ -184,6 +186,8 @@ class V8DebuggerAgentImpl : public protocol::Debugger::Backend { bool isPaused() const; + void setScriptInstrumentationBreakpointIfNeeded(V8DebuggerScript* script); + using ScriptsMap = std::unordered_map<String16, std::unique_ptr<V8DebuggerScript>>; using BreakpointIdToDebuggerBreakpointIdsMap = @@ -201,6 +205,9 @@ class V8DebuggerAgentImpl : public protocol::Debugger::Backend { ScriptsMap m_scripts; BreakpointIdToDebuggerBreakpointIdsMap m_breakpointIdToDebuggerBreakpointIds; DebuggerBreakpointIdToBreakpointIdMap m_debuggerBreakpointIdToBreakpointId; + std::unordered_map<v8::debug::BreakpointId, + std::unique_ptr<protocol::DictionaryValue>> + m_breakpointsOnScriptRun; size_t m_maxScriptCacheSize = 0; size_t m_cachedScriptSize = 0; diff --git a/deps/v8/src/inspector/v8-debugger-script.cc b/deps/v8/src/inspector/v8-debugger-script.cc index 6eaee6e8bc..fe7d570942 100644 --- a/deps/v8/src/inspector/v8-debugger-script.cc +++ b/deps/v8/src/inspector/v8-debugger-script.cc @@ -4,12 +4,12 @@ #include "src/inspector/v8-debugger-script.h" +#include "src/common/v8memory.h" #include "src/inspector/inspected-context.h" #include "src/inspector/string-util.h" #include "src/inspector/v8-debugger-agent-impl.h" #include "src/inspector/v8-inspector-impl.h" #include "src/inspector/wasm-translation.h" -#include "src/v8memory.h" namespace v8_inspector { @@ -235,6 +235,11 @@ class ActualScript : public V8DebuggerScript { id); } + bool setBreakpointOnRun(int* id) const override { + v8::HandleScope scope(m_isolate); + return script()->SetBreakpointOnScriptEntry(id); + } + const String16& hash() const override { if (!m_hash.isEmpty()) return m_hash; v8::HandleScope scope(m_isolate); @@ -424,6 +429,8 @@ class WasmVirtualScript : public V8DebuggerScript { return true; } + bool setBreakpointOnRun(int*) const override { return false; } + const String16& hash() const override { if (m_hash.isEmpty()) { m_hash = m_wasmTranslation->GetHash(m_id, m_functionIndex); diff --git a/deps/v8/src/inspector/v8-debugger-script.h b/deps/v8/src/inspector/v8-debugger-script.h index a6e77b6699..547bb0a2cc 100644 --- a/deps/v8/src/inspector/v8-debugger-script.h +++ b/deps/v8/src/inspector/v8-debugger-script.h @@ -90,6 +90,7 @@ class V8DebuggerScript { virtual bool setBreakpoint(const String16& condition, v8::debug::Location* location, int* id) const = 0; virtual void MakeWeak() = 0; + virtual bool setBreakpointOnRun(int* id) const = 0; protected: V8DebuggerScript(v8::Isolate*, String16 id, String16 url); diff --git a/deps/v8/src/inspector/v8-debugger.cc b/deps/v8/src/inspector/v8-debugger.cc index 7c8eb21299..bc0c9d8cf6 100644 --- a/deps/v8/src/inspector/v8-debugger.cc +++ b/deps/v8/src/inspector/v8-debugger.cc @@ -42,7 +42,8 @@ class MatchPrototypePredicate : public v8::debug::QueryObjectPredicate { : m_inspector(inspector), m_context(context), m_prototype(prototype) {} bool Filter(v8::Local<v8::Object> object) override { - v8::Local<v8::Context> objectContext = object->CreationContext(); + v8::Local<v8::Context> objectContext = + v8::debug::GetCreationContext(object); if (objectContext != m_context) return false; if (!m_inspector->client()->isInspectableHeapObject(object)) return false; // Get prototype chain for current object until first visited prototype. diff --git a/deps/v8/src/inspector/v8-inspector-protocol-encoding.cc b/deps/v8/src/inspector/v8-inspector-protocol-encoding.cc new file mode 100644 index 0000000000..45702e4b33 --- /dev/null +++ b/deps/v8/src/inspector/v8-inspector-protocol-encoding.cc @@ -0,0 +1,51 @@ +// Copyright 2019 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/inspector/v8-inspector-protocol-encoding.h" + +#include <cmath> +#include "../../third_party/inspector_protocol/encoding/encoding.h" +#include "src/numbers/conversions.h" +#include "src/utils/vector.h" + +namespace v8_inspector { +namespace { +using IPEStatus = ::v8_inspector_protocol_encoding::Status; +using ::v8_inspector_protocol_encoding::span; + +class Platform : public ::v8_inspector_protocol_encoding::json::Platform { + public: + bool StrToD(const char* str, double* result) const override { + *result = v8::internal::StringToDouble(str, v8::internal::NO_FLAGS); + return !std::isnan(*result); + } + std::unique_ptr<char[]> DToStr(double value) const override { + v8::internal::ScopedVector<char> buffer( + v8::internal::kDoubleToCStringMinBufferSize); + const char* str = v8::internal::DoubleToCString(value, buffer); + if (str == nullptr) return nullptr; + std::unique_ptr<char[]> result(new char[strlen(str) + 1]); + memcpy(result.get(), str, strlen(str) + 1); + DCHECK_EQ(0, result[strlen(str)]); + return result; + } +}; +} // namespace + +IPEStatus ConvertCBORToJSON(span<uint8_t> cbor, std::vector<uint8_t>* json) { + Platform platform; + return ConvertCBORToJSON(platform, cbor, json); +} + +IPEStatus ConvertJSONToCBOR(span<uint8_t> json, std::vector<uint8_t>* cbor) { + Platform platform; + return ConvertJSONToCBOR(platform, json, cbor); +} + +IPEStatus ConvertJSONToCBOR(span<uint16_t> json, std::vector<uint8_t>* cbor) { + Platform platform; + return ConvertJSONToCBOR(platform, json, cbor); +} + +} // namespace v8_inspector diff --git a/deps/v8/src/inspector/v8-inspector-protocol-encoding.h b/deps/v8/src/inspector/v8-inspector-protocol-encoding.h new file mode 100644 index 0000000000..6dcc7e8401 --- /dev/null +++ b/deps/v8/src/inspector/v8-inspector-protocol-encoding.h @@ -0,0 +1,26 @@ +// Copyright 2019 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_INSPECTOR_V8_INSPECTOR_PROTOCOL_ENCODING_H_ +#define V8_INSPECTOR_V8_INSPECTOR_PROTOCOL_ENCODING_H_ + +#include "../../third_party/inspector_protocol/encoding/encoding.h" + +namespace v8_inspector { + +::v8_inspector_protocol_encoding::Status ConvertCBORToJSON( + ::v8_inspector_protocol_encoding::span<uint8_t> cbor, + std::vector<uint8_t>* json); + +::v8_inspector_protocol_encoding::Status ConvertJSONToCBOR( + ::v8_inspector_protocol_encoding::span<uint8_t> json, + std::vector<uint8_t>* cbor); + +::v8_inspector_protocol_encoding::Status ConvertJSONToCBOR( + ::v8_inspector_protocol_encoding::span<uint16_t> json, + std::vector<uint8_t>* cbor); + +} // namespace v8_inspector + +#endif // V8_INSPECTOR_V8_INSPECTOR_PROTOCOL_ENCODING_H_ diff --git a/deps/v8/src/inspector/v8-inspector-session-impl.cc b/deps/v8/src/inspector/v8-inspector-session-impl.cc index c4c4cc14a1..4242abb64a 100644 --- a/deps/v8/src/inspector/v8-inspector-session-impl.cc +++ b/deps/v8/src/inspector/v8-inspector-session-impl.cc @@ -4,6 +4,8 @@ #include "src/inspector/v8-inspector-session-impl.h" +#include "src/base/logging.h" +#include "src/base/macros.h" #include "src/inspector/injected-script.h" #include "src/inspector/inspected-context.h" #include "src/inspector/protocol/Protocol.h" @@ -15,11 +17,45 @@ #include "src/inspector/v8-debugger.h" #include "src/inspector/v8-heap-profiler-agent-impl.h" #include "src/inspector/v8-inspector-impl.h" +#include "src/inspector/v8-inspector-protocol-encoding.h" #include "src/inspector/v8-profiler-agent-impl.h" #include "src/inspector/v8-runtime-agent-impl.h" #include "src/inspector/v8-schema-agent-impl.h" namespace v8_inspector { +namespace { +using ::v8_inspector_protocol_encoding::span; +using ::v8_inspector_protocol_encoding::SpanFrom; +using IPEStatus = ::v8_inspector_protocol_encoding::Status; + +bool IsCBORMessage(const StringView& msg) { + return msg.is8Bit() && msg.length() >= 2 && msg.characters8()[0] == 0xd8 && + msg.characters8()[1] == 0x5a; +} + +IPEStatus ConvertToCBOR(const StringView& state, std::vector<uint8_t>* cbor) { + return state.is8Bit() + ? ConvertJSONToCBOR( + span<uint8_t>(state.characters8(), state.length()), cbor) + : ConvertJSONToCBOR( + span<uint16_t>(state.characters16(), state.length()), cbor); +} + +std::unique_ptr<protocol::DictionaryValue> ParseState(const StringView& state) { + std::vector<uint8_t> converted; + span<uint8_t> cbor; + if (IsCBORMessage(state)) + cbor = span<uint8_t>(state.characters8(), state.length()); + else if (ConvertToCBOR(state, &converted).ok()) + cbor = SpanFrom(converted); + if (!cbor.empty()) { + std::unique_ptr<protocol::Value> value = + protocol::Value::parseBinary(cbor.data(), cbor.size()); + if (value) return protocol::DictionaryValue::cast(std::move(value)); + } + return protocol::DictionaryValue::create(); +} +} // namespace // static bool V8InspectorSession::canDispatchMethod(const StringView& method) { @@ -60,22 +96,13 @@ V8InspectorSessionImpl::V8InspectorSessionImpl(V8InspectorImpl* inspector, m_channel(channel), m_customObjectFormatterEnabled(false), m_dispatcher(this), - m_state(nullptr), + m_state(ParseState(savedState)), m_runtimeAgent(nullptr), m_debuggerAgent(nullptr), m_heapProfilerAgent(nullptr), m_profilerAgent(nullptr), m_consoleAgent(nullptr), m_schemaAgent(nullptr) { - if (savedState.length()) { - std::unique_ptr<protocol::Value> state = - protocol::StringUtil::parseJSON(toString16(savedState)); - if (state) m_state = protocol::DictionaryValue::cast(std::move(state)); - if (!m_state) m_state = protocol::DictionaryValue::create(); - } else { - m_state = protocol::DictionaryValue::create(); - } - m_state->getBoolean("use_binary_protocol", &use_binary_protocol_); m_runtimeAgent.reset(new V8RuntimeAgentImpl( @@ -330,8 +357,7 @@ void V8InspectorSessionImpl::reportAllContexts(V8RuntimeAgentImpl* agent) { void V8InspectorSessionImpl::dispatchProtocolMessage( const StringView& message) { - bool binary_protocol = - message.is8Bit() && message.length() && message.characters8()[0] == 0xD8; + bool binary_protocol = IsCBORMessage(message); if (binary_protocol) { use_binary_protocol_ = true; m_state->setBoolean("use_binary_protocol", true); @@ -355,8 +381,17 @@ void V8InspectorSessionImpl::dispatchProtocolMessage( } std::unique_ptr<StringBuffer> V8InspectorSessionImpl::stateJSON() { - String16 json = m_state->toJSONString(); - return StringBufferImpl::adopt(json); + std::vector<uint8_t> json; + IPEStatus status = ConvertCBORToJSON(SpanFrom(state()), &json); + DCHECK(status.ok()); + USE(status); + return v8::base::make_unique<BinaryStringBuffer>(std::move(json)); +} + +std::vector<uint8_t> V8InspectorSessionImpl::state() { + std::vector<uint8_t> out; + m_state->writeBinary(&out); + return out; } std::vector<std::unique_ptr<protocol::Schema::API::Domain>> diff --git a/deps/v8/src/inspector/v8-inspector-session-impl.h b/deps/v8/src/inspector/v8-inspector-session-impl.h index 8834b56f5d..ea1d29773c 100644 --- a/deps/v8/src/inspector/v8-inspector-session-impl.h +++ b/deps/v8/src/inspector/v8-inspector-session-impl.h @@ -65,6 +65,7 @@ class V8InspectorSessionImpl : public V8InspectorSession, // V8InspectorSession implementation. void dispatchProtocolMessage(const StringView& message) override; std::unique_ptr<StringBuffer> stateJSON() override; + std::vector<uint8_t> state() override; std::vector<std::unique_ptr<protocol::Schema::API::Domain>> supportedDomains() override; void addInspectedObject( diff --git a/deps/v8/src/inspector/v8-profiler-agent-impl.cc b/deps/v8/src/inspector/v8-profiler-agent-impl.cc index b825397b4d..15f93e39d7 100644 --- a/deps/v8/src/inspector/v8-profiler-agent-impl.cc +++ b/deps/v8/src/inspector/v8-profiler-agent-impl.cc @@ -303,8 +303,8 @@ Response V8ProfilerAgentImpl::startPreciseCoverage(Maybe<bool> callCount, // coverage data if it exists (at the time of writing, that's the case for // each function recompiled after the BlockCount mode has been set); and // function-granularity coverage data otherwise. - typedef v8::debug::Coverage C; - typedef v8::debug::CoverageMode Mode; + using C = v8::debug::Coverage; + using Mode = v8::debug::CoverageMode; Mode mode = callCountValue ? (detailedValue ? Mode::kBlockCount : Mode::kPreciseCount) : (detailedValue ? Mode::kBlockBinary : Mode::kPreciseBinary); diff --git a/deps/v8/src/inspector/v8-string-conversions.cc b/deps/v8/src/inspector/v8-string-conversions.cc new file mode 100644 index 0000000000..0c75e66b97 --- /dev/null +++ b/deps/v8/src/inspector/v8-string-conversions.cc @@ -0,0 +1,403 @@ +// Copyright 2019 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/inspector/v8-string-conversions.h" + +#include <limits> +#include <vector> + +#include "src/base/logging.h" +#include "src/base/v8-fallthrough.h" + +namespace v8_inspector { +namespace { +using UChar = uint16_t; +using UChar32 = uint32_t; + +bool isASCII(UChar c) { return !(c & ~0x7F); } + +const UChar replacementCharacter = 0xFFFD; + +inline int inlineUTF8SequenceLengthNonASCII(char b0) { + if ((b0 & 0xC0) != 0xC0) return 0; + if ((b0 & 0xE0) == 0xC0) return 2; + if ((b0 & 0xF0) == 0xE0) return 3; + if ((b0 & 0xF8) == 0xF0) return 4; + return 0; +} + +inline int inlineUTF8SequenceLength(char b0) { + return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0); +} + +// Once the bits are split out into bytes of UTF-8, this is a mask OR-ed +// into the first byte, depending on how many bytes follow. There are +// as many entries in this table as there are UTF-8 sequence types. +// (I.e., one byte sequence, two byte... etc.). Remember that sequences +// for *legal* UTF-8 will be 4 or fewer bytes total. +static const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, + 0xF0, 0xF8, 0xFC}; + +enum ConversionResult { + conversionOK, // conversion successful + sourceExhausted, // partial character in source, but hit end + targetExhausted, // insuff. room in target for conversion + sourceIllegal // source sequence is illegal/malformed +}; + +ConversionResult convertUTF16ToUTF8(const UChar** sourceStart, + const UChar* sourceEnd, char** targetStart, + char* targetEnd, bool strict) { + ConversionResult result = conversionOK; + const UChar* source = *sourceStart; + char* target = *targetStart; + while (source < sourceEnd) { + UChar32 ch; + uint32_t bytesToWrite = 0; + const UChar32 byteMask = 0xBF; + const UChar32 byteMark = 0x80; + const UChar* oldSource = + source; // In case we have to back up because of target overflow. + ch = static_cast<uint16_t>(*source++); + // If we have a surrogate pair, convert to UChar32 first. + if (ch >= 0xD800 && ch <= 0xDBFF) { + // If the 16 bits following the high surrogate are in the source buffer... + if (source < sourceEnd) { + UChar32 ch2 = static_cast<uint16_t>(*source); + // If it's a low surrogate, convert to UChar32. + if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { + ch = ((ch - 0xD800) << 10) + (ch2 - 0xDC00) + 0x0010000; + ++source; + } else if (strict) { // it's an unpaired high surrogate + --source; // return to the illegal value itself + result = sourceIllegal; + break; + } + } else { // We don't have the 16 bits following the high surrogate. + --source; // return to the high surrogate + result = sourceExhausted; + break; + } + } else if (strict) { + // UTF-16 surrogate values are illegal in UTF-32 + if (ch >= 0xDC00 && ch <= 0xDFFF) { + --source; // return to the illegal value itself + result = sourceIllegal; + break; + } + } + // Figure out how many bytes the result will require + if (ch < static_cast<UChar32>(0x80)) { + bytesToWrite = 1; + } else if (ch < static_cast<UChar32>(0x800)) { + bytesToWrite = 2; + } else if (ch < static_cast<UChar32>(0x10000)) { + bytesToWrite = 3; + } else if (ch < static_cast<UChar32>(0x110000)) { + bytesToWrite = 4; + } else { + bytesToWrite = 3; + ch = replacementCharacter; + } + + target += bytesToWrite; + if (target > targetEnd) { + source = oldSource; // Back up source pointer! + target -= bytesToWrite; + result = targetExhausted; + break; + } + switch (bytesToWrite) { + case 4: + *--target = static_cast<char>((ch | byteMark) & byteMask); + ch >>= 6; + V8_FALLTHROUGH; + case 3: + *--target = static_cast<char>((ch | byteMark) & byteMask); + ch >>= 6; + V8_FALLTHROUGH; + case 2: + *--target = static_cast<char>((ch | byteMark) & byteMask); + ch >>= 6; + V8_FALLTHROUGH; + case 1: + *--target = static_cast<char>(ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/** + * Is this code point a BMP code point (U+0000..U+ffff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.8 + */ +#define U_IS_BMP(c) ((uint32_t)(c) <= 0xFFFF) + +/** + * Is this code point a supplementary code point (U+010000..U+10FFFF)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.8 + */ +#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x010000) <= 0xFFFFF) + +/** + * Is this code point a surrogate (U+d800..U+dfff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_SURROGATE(c) (((c)&0xFFFFF800) == 0xD800) + +/** + * Get the lead surrogate (0xD800..0xDBFF) for a + * supplementary code point (0x010000..0x10FFFF). + * @param supplementary 32-bit code point (U+010000..U+10FFFF) + * @return lead surrogate (U+D800..U+DBFF) for supplementary + * @stable ICU 2.4 + */ +#define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xD7C0) + +/** + * Get the trail surrogate (0xDC00..0xDFFF) for a + * supplementary code point (0x010000..0x10FFFF). + * @param supplementary 32-bit code point (U+010000..U+10FFFF) + * @return trail surrogate (U+DC00..U+DFFF) for supplementary + * @stable ICU 2.4 + */ +#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3FF) | 0xDC00) + +// This must be called with the length pre-determined by the first byte. +// If presented with a length > 4, this returns false. The Unicode +// definition of UTF-8 goes up to 4-byte sequences. +static bool isLegalUTF8(const unsigned char* source, int length) { + unsigned char a; + const unsigned char* srcptr = source + length; + switch (length) { + default: + return false; + // Everything else falls through when "true"... + case 4: + if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + V8_FALLTHROUGH; + case 3: + if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + V8_FALLTHROUGH; + case 2: + if ((a = (*--srcptr)) > 0xBF) return false; + + // no fall-through in this inner switch + switch (*source) { + case 0xE0: + if (a < 0xA0) return false; + break; + case 0xED: + if (a > 0x9F) return false; + break; + case 0xF0: + if (a < 0x90) return false; + break; + case 0xF4: + if (a > 0x8F) return false; + break; + default: + if (a < 0x80) return false; + } + V8_FALLTHROUGH; + + case 1: + if (*source >= 0x80 && *source < 0xC2) return false; + } + if (*source > 0xF4) return false; + return true; +} + +// Magic values subtracted from a buffer value during UTF8 conversion. +// This table contains as many values as there might be trailing bytes +// in a UTF-8 sequence. +static const UChar32 offsetsFromUTF8[6] = {0x00000000UL, + 0x00003080UL, + 0x000E2080UL, + 0x03C82080UL, + static_cast<UChar32>(0xFA082080UL), + static_cast<UChar32>(0x82082080UL)}; + +static inline UChar32 readUTF8Sequence(const char*& sequence, size_t length) { + UChar32 character = 0; + + // The cases all fall through. + switch (length) { + case 6: + character += static_cast<unsigned char>(*sequence++); + character <<= 6; + V8_FALLTHROUGH; + case 5: + character += static_cast<unsigned char>(*sequence++); + character <<= 6; + V8_FALLTHROUGH; + case 4: + character += static_cast<unsigned char>(*sequence++); + character <<= 6; + V8_FALLTHROUGH; + case 3: + character += static_cast<unsigned char>(*sequence++); + character <<= 6; + V8_FALLTHROUGH; + case 2: + character += static_cast<unsigned char>(*sequence++); + character <<= 6; + V8_FALLTHROUGH; + case 1: + character += static_cast<unsigned char>(*sequence++); + } + + return character - offsetsFromUTF8[length - 1]; +} + +ConversionResult convertUTF8ToUTF16(const char** sourceStart, + const char* sourceEnd, UChar** targetStart, + UChar* targetEnd, bool* sourceAllASCII, + bool strict) { + ConversionResult result = conversionOK; + const char* source = *sourceStart; + UChar* target = *targetStart; + UChar orAllData = 0; + while (source < sourceEnd) { + int utf8SequenceLength = inlineUTF8SequenceLength(*source); + if (sourceEnd - source < utf8SequenceLength) { + result = sourceExhausted; + break; + } + // Do this check whether lenient or strict + if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), + utf8SequenceLength)) { + result = sourceIllegal; + break; + } + + UChar32 character = readUTF8Sequence(source, utf8SequenceLength); + + if (target >= targetEnd) { + source -= utf8SequenceLength; // Back up source pointer! + result = targetExhausted; + break; + } + + if (U_IS_BMP(character)) { + // UTF-16 surrogate values are illegal in UTF-32 + if (U_IS_SURROGATE(character)) { + if (strict) { + source -= utf8SequenceLength; // return to the illegal value itself + result = sourceIllegal; + break; + } + *target++ = replacementCharacter; + orAllData |= replacementCharacter; + } else { + *target++ = static_cast<UChar>(character); // normal case + orAllData |= character; + } + } else if (U_IS_SUPPLEMENTARY(character)) { + // target is a character in range 0xFFFF - 0x10FFFF + if (target + 1 >= targetEnd) { + source -= utf8SequenceLength; // Back up source pointer! + result = targetExhausted; + break; + } + *target++ = U16_LEAD(character); + *target++ = U16_TRAIL(character); + orAllData = 0xFFFF; + } else { + if (strict) { + source -= utf8SequenceLength; // return to the start + result = sourceIllegal; + break; // Bail out; shouldn't continue + } else { + *target++ = replacementCharacter; + orAllData |= replacementCharacter; + } + } + } + *sourceStart = source; + *targetStart = target; + + if (sourceAllASCII) *sourceAllASCII = !(orAllData & ~0x7F); + + return result; +} + +// Helper to write a three-byte UTF-8 code point to the buffer, caller must +// check room is available. +static inline void putUTF8Triple(char*& buffer, UChar ch) { + *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); + *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); + *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); +} +} // namespace + +std::string UTF16ToUTF8(const UChar* stringStart, size_t length) { + if (!stringStart || !length) return std::string(); + + // Allocate a buffer big enough to hold all the characters + // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). + // Optimization ideas, if we find this function is hot: + // * We could speculatively create a CStringBuffer to contain 'length' + // characters, and resize if necessary (i.e. if the buffer contains + // non-ascii characters). (Alternatively, scan the buffer first for + // ascii characters, so we know this will be sufficient). + // * We could allocate a CStringBuffer with an appropriate size to + // have a good chance of being able to write the string into the + // buffer without reallocing (say, 1.5 x length). + if (length > std::numeric_limits<unsigned>::max() / 3) return std::string(); + + std::string output(length * 3, '\0'); + const UChar* characters = stringStart; + const UChar* characters_end = characters + length; + char* buffer = &*output.begin(); + char* buffer_end = &*output.end(); + while (characters < characters_end) { + // Use strict conversion to detect unpaired surrogates. + ConversionResult result = convertUTF16ToUTF8( + &characters, characters_end, &buffer, buffer_end, /* strict= */ true); + DCHECK_NE(result, targetExhausted); + // Conversion fails when there is an unpaired surrogate. Put + // replacement character (U+FFFD) instead of the unpaired + // surrogate. + if (result != conversionOK) { + DCHECK_LE(0xD800, *characters); + DCHECK_LE(*characters, 0xDFFF); + // There should be room left, since one UChar hasn't been + // converted. + DCHECK_LE(buffer + 3, buffer_end); + putUTF8Triple(buffer, replacementCharacter); + ++characters; + } + } + + output.resize(buffer - output.data()); + return output; +} + +std::basic_string<UChar> UTF8ToUTF16(const char* stringStart, size_t length) { + if (!stringStart || !length) return std::basic_string<UChar>(); + std::vector<uint16_t> buffer(length); + UChar* bufferStart = buffer.data(); + + UChar* bufferCurrent = bufferStart; + const char* stringCurrent = reinterpret_cast<const char*>(stringStart); + if (convertUTF8ToUTF16(&stringCurrent, + reinterpret_cast<const char*>(stringStart + length), + &bufferCurrent, bufferCurrent + buffer.size(), nullptr, + true) != conversionOK) + return std::basic_string<uint16_t>(); + size_t utf16Length = bufferCurrent - bufferStart; + return std::basic_string<UChar>(bufferStart, bufferStart + utf16Length); +} + +} // namespace v8_inspector diff --git a/deps/v8/src/inspector/v8-string-conversions.h b/deps/v8/src/inspector/v8-string-conversions.h new file mode 100644 index 0000000000..c1d69c18f0 --- /dev/null +++ b/deps/v8/src/inspector/v8-string-conversions.h @@ -0,0 +1,17 @@ +// Copyright 2019 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_INSPECTOR_V8_STRING_CONVERSIONS_H_ +#define V8_INSPECTOR_V8_STRING_CONVERSIONS_H_ + +#include <string> + +// Conversion routines between UT8 and UTF16, used by string-16.{h,cc}. You may +// want to use string-16.h directly rather than these. +namespace v8_inspector { +std::basic_string<uint16_t> UTF8ToUTF16(const char* stringStart, size_t length); +std::string UTF16ToUTF8(const uint16_t* stringStart, size_t length); +} // namespace v8_inspector + +#endif // V8_INSPECTOR_V8_STRING_CONVERSIONS_H_ |