From 5aaa7fee2e4a075d9123b885f9e8cda3de2a780a Mon Sep 17 00:00:00 2001 From: MichaĆ«l Zasso Date: Sun, 19 May 2019 09:54:52 +0200 Subject: tools: update inspector_protocol to 0aafd2 Co-authored-by: Ben Noordhuis PR-URL: https://github.com/nodejs/node/pull/27770 Reviewed-By: Ben Noordhuis Reviewed-By: Eugene Ostroukhov Reviewed-By: Rich Trott --- tools/inspector_protocol/.clang-format | 36 + tools/inspector_protocol/BUILD.gn | 34 + tools/inspector_protocol/README.md | 33 + tools/inspector_protocol/README.node | 2 +- tools/inspector_protocol/code_generator.py | 36 +- tools/inspector_protocol/codereview.settings | 6 + .../inspector_protocol/convert_protocol_to_json.py | 2 - tools/inspector_protocol/encoding/encoding.cc | 2189 +++++++++++++++++++ tools/inspector_protocol/encoding/encoding.h | 510 +++++ tools/inspector_protocol/encoding/encoding_test.cc | 1838 ++++++++++++++++ .../encoding/encoding_test_helper.h | 33 + tools/inspector_protocol/inspector_protocol.gni | 7 +- tools/inspector_protocol/inspector_protocol.gypi | 5 +- tools/inspector_protocol/lib/CBOR_cpp.template | 803 ------- tools/inspector_protocol/lib/CBOR_h.template | 416 ---- .../inspector_protocol/lib/Collections_h.template | 43 - tools/inspector_protocol/lib/Values_cpp.template | 143 +- .../lib/base_string_adapter_cc.template | 78 +- .../lib/base_string_adapter_h.template | 25 +- tools/inspector_protocol/lib/encoding_cpp.template | 2199 ++++++++++++++++++++ tools/inspector_protocol/lib/encoding_h.template | 520 +++++ tools/inspector_protocol/pdl.py | 24 +- tools/inspector_protocol/roll.py | 162 ++ .../templates/TypeBuilder_cpp.template | 4 +- .../templates/TypeBuilder_h.template | 4 +- 25 files changed, 7705 insertions(+), 1447 deletions(-) create mode 100644 tools/inspector_protocol/.clang-format create mode 100644 tools/inspector_protocol/BUILD.gn create mode 100644 tools/inspector_protocol/README.md create mode 100644 tools/inspector_protocol/codereview.settings create mode 100644 tools/inspector_protocol/encoding/encoding.cc create mode 100644 tools/inspector_protocol/encoding/encoding.h create mode 100644 tools/inspector_protocol/encoding/encoding_test.cc create mode 100644 tools/inspector_protocol/encoding/encoding_test_helper.h delete mode 100644 tools/inspector_protocol/lib/CBOR_cpp.template delete mode 100644 tools/inspector_protocol/lib/CBOR_h.template delete mode 100644 tools/inspector_protocol/lib/Collections_h.template create mode 100644 tools/inspector_protocol/lib/encoding_cpp.template create mode 100644 tools/inspector_protocol/lib/encoding_h.template create mode 100644 tools/inspector_protocol/roll.py (limited to 'tools/inspector_protocol') diff --git a/tools/inspector_protocol/.clang-format b/tools/inspector_protocol/.clang-format new file mode 100644 index 0000000000..fcbc9c321a --- /dev/null +++ b/tools/inspector_protocol/.clang-format @@ -0,0 +1,36 @@ +# Defines the Chromium style for automatic reformatting. +# http://clang.llvm.org/docs/ClangFormatStyleOptions.html +BasedOnStyle: Chromium +# This defaults to 'Auto'. Explicitly set it for a while, so that +# 'vector >' in existing files gets formatted to +# 'vector>'. ('Auto' means that clang-format will only use +# 'int>>' if the file already contains at least one such instance.) +Standard: Cpp11 + +# Make sure code like: +# IPC_BEGIN_MESSAGE_MAP() +# IPC_MESSAGE_HANDLER(WidgetHostViewHost_Update, OnUpdate) +# IPC_END_MESSAGE_MAP() +# gets correctly indented. +MacroBlockBegin: "^\ +BEGIN_MSG_MAP|\ +BEGIN_MSG_MAP_EX|\ +BEGIN_SAFE_MSG_MAP_EX|\ +CR_BEGIN_MSG_MAP_EX|\ +IPC_BEGIN_MESSAGE_MAP|\ +IPC_BEGIN_MESSAGE_MAP_WITH_PARAM|\ +IPC_PROTOBUF_MESSAGE_TRAITS_BEGIN|\ +IPC_STRUCT_BEGIN|\ +IPC_STRUCT_BEGIN_WITH_PARENT|\ +IPC_STRUCT_TRAITS_BEGIN|\ +POLPARAMS_BEGIN|\ +PPAPI_BEGIN_MESSAGE_MAP$" +MacroBlockEnd: "^\ +CR_END_MSG_MAP|\ +END_MSG_MAP|\ +IPC_END_MESSAGE_MAP|\ +IPC_PROTOBUF_MESSAGE_TRAITS_END|\ +IPC_STRUCT_END|\ +IPC_STRUCT_TRAITS_END|\ +POLPARAMS_END|\ +PPAPI_END_MESSAGE_MAP$" diff --git a/tools/inspector_protocol/BUILD.gn b/tools/inspector_protocol/BUILD.gn new file mode 100644 index 0000000000..974471bf27 --- /dev/null +++ b/tools/inspector_protocol/BUILD.gn @@ -0,0 +1,34 @@ +# Copyright 2019 the V8 project authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +static_library("encoding") { + sources = [ + "encoding/encoding.cc", + "encoding/encoding.h", + ] +} + +# encoding_test is part of the unittests, defined in +# test/unittests/BUILD.gn. + +import("../../gni/v8.gni") + +v8_source_set("encoding_test") { + sources = [ + "encoding/encoding_test.cc", + "encoding/encoding_test_helper.h", + ] + configs = [ + "../..:external_config", + "../..:internal_config_base", + ] + deps = [ + ":encoding", + "../..:v8_libbase", + "../../src/inspector:inspector_string_conversions", + "//testing/gmock", + "//testing/gtest", + ] + testonly = true +} diff --git a/tools/inspector_protocol/README.md b/tools/inspector_protocol/README.md new file mode 100644 index 0000000000..da3f93f3f3 --- /dev/null +++ b/tools/inspector_protocol/README.md @@ -0,0 +1,33 @@ +# Chromium inspector (devtools) protocol + +This package contains code generators and templates for the Chromium +inspector protocol. + +The canonical location of this package is at +https://chromium.googlesource.com/deps/inspector_protocol/ + +In the Chromium tree, it's rolled into +https://cs.chromium.org/chromium/src/third_party/inspector_protocol/ + +In the V8 tree, it's rolled into +https://cs.chromium.org/chromium/src/v8/third_party/inspector_protocol/ + +See also [Contributing to Chrome Devtools Protocol](https://docs.google.com/document/d/1c-COD2kaK__5iMM5SEx-PzNA7HFmgttcYfOHHX0HaOM/edit). + +We're working on enabling standalone builds for parts of this package for +testing and development, please feel free to ignore this for now. +But, if you're familiar with +[Chromium's development process](https://www.chromium.org/developers/contributing-code) +and have the depot_tools installed, you may use these commands +to fetch the package (and dependencies) and build and run the tests: + + fetch inspector_protocol + cd src + gn gen out/Release + ninja -C out/Release json_parser_test + out/Release/json_parser_test + +You'll probably also need to install g++, since Clang uses this to find the +standard C++ headers. E.g., + + sudo apt-get install g++-8 diff --git a/tools/inspector_protocol/README.node b/tools/inspector_protocol/README.node index 6f22020a4d..a838019857 100644 --- a/tools/inspector_protocol/README.node +++ b/tools/inspector_protocol/README.node @@ -2,7 +2,7 @@ Name: inspector protocol Short Name: inspector_protocol URL: https://chromium.googlesource.com/deps/inspector_protocol/ Version: 0 -Revision: f67ec5180f476830e839226b5ca948e43070fdab +Revision: 0aafd2876f7485db7b07c513c0457b7cbbbe3304 License: BSD License File: LICENSE Security Critical: no diff --git a/tools/inspector_protocol/code_generator.py b/tools/inspector_protocol/code_generator.py index 9200022413..7b555d7478 100755 --- a/tools/inspector_protocol/code_generator.py +++ b/tools/inspector_protocol/code_generator.py @@ -5,7 +5,7 @@ import os.path import sys -import optparse +import argparse import collections import functools import re @@ -17,6 +17,13 @@ except ImportError: import pdl +try: + unicode +except NameError: + # Define unicode for Py3 + def unicode(s, *_): + return s + # Path handling for libraries and templates # Paths have to be normalized because Jinja uses the exact template path to # determine the hash used in the cache filename, and we need a pre-caching step @@ -53,27 +60,16 @@ def read_config(): return collections.namedtuple('X', keys)(*values) try: - cmdline_parser = optparse.OptionParser() - cmdline_parser.add_option("--output_base") - cmdline_parser.add_option("--jinja_dir") - cmdline_parser.add_option("--config") - cmdline_parser.add_option("--config_value", action="append", type="string") - arg_options, _ = cmdline_parser.parse_args() + cmdline_parser = argparse.ArgumentParser() + cmdline_parser.add_argument("--output_base", type=unicode, required=True) + cmdline_parser.add_argument("--jinja_dir", type=unicode, required=True) + cmdline_parser.add_argument("--config", type=unicode, required=True) + cmdline_parser.add_argument("--config_value", default=[], action="append") + arg_options = cmdline_parser.parse_args() jinja_dir = arg_options.jinja_dir - if not jinja_dir: - raise Exception("jinja directory must be specified") - jinja_dir = jinja_dir.decode('utf8') output_base = arg_options.output_base - if not output_base: - raise Exception("Base output directory must be specified") - output_base = output_base.decode('utf8') config_file = arg_options.config - if not config_file: - raise Exception("Config file name must be specified") - config_file = config_file.decode('utf8') config_values = arg_options.config_value - if not config_values: - config_values = [] except Exception: # Work with python 2 and 3 http://docs.python.org/py3k/howto/pyporting.html exc = sys.exc_info()[1] @@ -631,7 +627,7 @@ def main(): "Array_h.template", "DispatcherBase_h.template", "Parser_h.template", - "CBOR_h.template", + "encoding_h.template", ] protocol_cpp_templates = [ @@ -641,7 +637,7 @@ def main(): "Object_cpp.template", "DispatcherBase_cpp.template", "Parser_cpp.template", - "CBOR_cpp.template", + "encoding_cpp.template", ] forward_h_templates = [ diff --git a/tools/inspector_protocol/codereview.settings b/tools/inspector_protocol/codereview.settings new file mode 100644 index 0000000000..6ac8580b4c --- /dev/null +++ b/tools/inspector_protocol/codereview.settings @@ -0,0 +1,6 @@ +# This file is used by git-cl to get repository specific information. +CC_LIST: chromium-reviews@chromium.org +CODE_REVIEW_SERVER: codereview.chromium.org +GERRIT_HOST: True +PROJECT: inspector_protocol +VIEW_VC: https://chromium.googlesource.com/deps/inspector_protocol/+/ diff --git a/tools/inspector_protocol/convert_protocol_to_json.py b/tools/inspector_protocol/convert_protocol_to_json.py index 96048f793d..f98bebcd5e 100755 --- a/tools/inspector_protocol/convert_protocol_to_json.py +++ b/tools/inspector_protocol/convert_protocol_to_json.py @@ -4,10 +4,8 @@ # found in the LICENSE file. import argparse -import collections import json import os.path -import re import sys import pdl diff --git a/tools/inspector_protocol/encoding/encoding.cc b/tools/inspector_protocol/encoding/encoding.cc new file mode 100644 index 0000000000..f7858c9a22 --- /dev/null +++ b/tools/inspector_protocol/encoding/encoding.cc @@ -0,0 +1,2189 @@ +// Copyright 2019 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "encoding.h" + +#include +#include +#include +#include +#include +#include + +namespace v8_inspector_protocol_encoding { +// ============================================================================= +// Status and Error codes +// ============================================================================= + +std::string Status::ToASCIIString() const { + switch (error) { + case Error::OK: + return "OK"; + case Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS: + return ToASCIIString("JSON: unprocessed input remains"); + case Error::JSON_PARSER_STACK_LIMIT_EXCEEDED: + return ToASCIIString("JSON: stack limit exceeded"); + case Error::JSON_PARSER_NO_INPUT: + return ToASCIIString("JSON: no input"); + case Error::JSON_PARSER_INVALID_TOKEN: + return ToASCIIString("JSON: invalid token"); + case Error::JSON_PARSER_INVALID_NUMBER: + return ToASCIIString("JSON: invalid number"); + case Error::JSON_PARSER_INVALID_STRING: + return ToASCIIString("JSON: invalid string"); + case Error::JSON_PARSER_UNEXPECTED_ARRAY_END: + return ToASCIIString("JSON: unexpected array end"); + case Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED: + return ToASCIIString("JSON: comma or array end expected"); + case Error::JSON_PARSER_STRING_LITERAL_EXPECTED: + return ToASCIIString("JSON: string literal expected"); + case Error::JSON_PARSER_COLON_EXPECTED: + return ToASCIIString("JSON: colon expected"); + case Error::JSON_PARSER_UNEXPECTED_MAP_END: + return ToASCIIString("JSON: unexpected map end"); + case Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED: + return ToASCIIString("JSON: comma or map end expected"); + case Error::JSON_PARSER_VALUE_EXPECTED: + return ToASCIIString("JSON: value expected"); + + case Error::CBOR_INVALID_INT32: + return ToASCIIString("CBOR: invalid int32"); + case Error::CBOR_INVALID_DOUBLE: + return ToASCIIString("CBOR: invalid double"); + case Error::CBOR_INVALID_ENVELOPE: + return ToASCIIString("CBOR: invalid envelope"); + case Error::CBOR_INVALID_STRING8: + return ToASCIIString("CBOR: invalid string8"); + case Error::CBOR_INVALID_STRING16: + return ToASCIIString("CBOR: invalid string16"); + case Error::CBOR_INVALID_BINARY: + return ToASCIIString("CBOR: invalid binary"); + case Error::CBOR_UNSUPPORTED_VALUE: + return ToASCIIString("CBOR: unsupported value"); + case Error::CBOR_NO_INPUT: + return ToASCIIString("CBOR: no input"); + case Error::CBOR_INVALID_START_BYTE: + return ToASCIIString("CBOR: invalid start byte"); + case Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE: + return ToASCIIString("CBOR: unexpected eof expected value"); + case Error::CBOR_UNEXPECTED_EOF_IN_ARRAY: + return ToASCIIString("CBOR: unexpected eof in array"); + case Error::CBOR_UNEXPECTED_EOF_IN_MAP: + return ToASCIIString("CBOR: unexpected eof in map"); + case Error::CBOR_INVALID_MAP_KEY: + return ToASCIIString("CBOR: invalid map key"); + case Error::CBOR_STACK_LIMIT_EXCEEDED: + return ToASCIIString("CBOR: stack limit exceeded"); + case Error::CBOR_TRAILING_JUNK: + return ToASCIIString("CBOR: trailing junk"); + case Error::CBOR_MAP_START_EXPECTED: + return ToASCIIString("CBOR: map start expected"); + case Error::CBOR_MAP_STOP_EXPECTED: + return ToASCIIString("CBOR: map stop expected"); + case Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED: + return ToASCIIString("CBOR: envelope size limit exceeded"); + } + // Some compilers can't figure out that we can't get here. + return "INVALID ERROR CODE"; +} + +std::string Status::ToASCIIString(const char* msg) const { + return std::string(msg) + " at position " + std::to_string(pos); +} + +namespace cbor { +namespace { +// Indicates the number of bits the "initial byte" needs to be shifted to the +// right after applying |kMajorTypeMask| to produce the major type in the +// lowermost bits. +static constexpr uint8_t kMajorTypeBitShift = 5u; +// Mask selecting the low-order 5 bits of the "initial byte", which is where +// the additional information is encoded. +static constexpr uint8_t kAdditionalInformationMask = 0x1f; +// Mask selecting the high-order 3 bits of the "initial byte", which indicates +// the major type of the encoded value. +static constexpr uint8_t kMajorTypeMask = 0xe0; +// Indicates the integer is in the following byte. +static constexpr uint8_t kAdditionalInformation1Byte = 24u; +// Indicates the integer is in the next 2 bytes. +static constexpr uint8_t kAdditionalInformation2Bytes = 25u; +// Indicates the integer is in the next 4 bytes. +static constexpr uint8_t kAdditionalInformation4Bytes = 26u; +// Indicates the integer is in the next 8 bytes. +static constexpr uint8_t kAdditionalInformation8Bytes = 27u; + +// Encodes the initial byte, consisting of the |type| in the first 3 bits +// followed by 5 bits of |additional_info|. +constexpr uint8_t EncodeInitialByte(MajorType type, uint8_t additional_info) { + return (static_cast(type) << kMajorTypeBitShift) | + (additional_info & kAdditionalInformationMask); +} + +// TAG 24 indicates that what follows is a byte string which is +// encoded in CBOR format. We use this as a wrapper for +// maps and arrays, allowing us to skip them, because the +// byte string carries its size (byte length). +// https://tools.ietf.org/html/rfc7049#section-2.4.4.1 +static constexpr uint8_t kInitialByteForEnvelope = + EncodeInitialByte(MajorType::TAG, 24); +// The initial byte for a byte string with at most 2^32 bytes +// of payload. This is used for envelope encoding, even if +// the byte string is shorter. +static constexpr uint8_t kInitialByteFor32BitLengthByteString = + EncodeInitialByte(MajorType::BYTE_STRING, 26); + +// See RFC 7049 Section 2.2.1, indefinite length arrays / maps have additional +// info = 31. +static constexpr uint8_t kInitialByteIndefiniteLengthArray = + EncodeInitialByte(MajorType::ARRAY, 31); +static constexpr uint8_t kInitialByteIndefiniteLengthMap = + EncodeInitialByte(MajorType::MAP, 31); +// See RFC 7049 Section 2.3, Table 1; this is used for finishing indefinite +// length maps / arrays. +static constexpr uint8_t kStopByte = + EncodeInitialByte(MajorType::SIMPLE_VALUE, 31); + +// See RFC 7049 Section 2.3, Table 2. +static constexpr uint8_t kEncodedTrue = + EncodeInitialByte(MajorType::SIMPLE_VALUE, 21); +static constexpr uint8_t kEncodedFalse = + EncodeInitialByte(MajorType::SIMPLE_VALUE, 20); +static constexpr uint8_t kEncodedNull = + EncodeInitialByte(MajorType::SIMPLE_VALUE, 22); +static constexpr uint8_t kInitialByteForDouble = + EncodeInitialByte(MajorType::SIMPLE_VALUE, 27); + +// See RFC 7049 Table 3 and Section 2.4.4.2. This is used as a prefix for +// arbitrary binary data encoded as BYTE_STRING. +static constexpr uint8_t kExpectedConversionToBase64Tag = + EncodeInitialByte(MajorType::TAG, 22); + +// Writes the bytes for |v| to |out|, starting with the most significant byte. +// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html +template +void WriteBytesMostSignificantByteFirst(T v, C* out) { + for (int shift_bytes = sizeof(T) - 1; shift_bytes >= 0; --shift_bytes) + out->push_back(0xff & (v >> (shift_bytes * 8))); +} + +// Extracts sizeof(T) bytes from |in| to extract a value of type T +// (e.g. uint64_t, uint32_t, ...), most significant byte first. +// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html +template +T ReadBytesMostSignificantByteFirst(span in) { + assert(in.size() >= sizeof(T)); + T result = 0; + for (size_t shift_bytes = 0; shift_bytes < sizeof(T); ++shift_bytes) + result |= T(in[sizeof(T) - 1 - shift_bytes]) << (shift_bytes * 8); + return result; +} +} // namespace + +namespace internals { +// Reads the start of a token with definitive size from |bytes|. +// |type| is the major type as specified in RFC 7049 Section 2.1. +// |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size +// (e.g. for BYTE_STRING). +// If successful, returns the number of bytes read. Otherwise returns -1. +// TODO(johannes): change return type to size_t and use 0 for error. +int8_t ReadTokenStart(span bytes, MajorType* type, uint64_t* value) { + if (bytes.empty()) + return -1; + uint8_t initial_byte = bytes[0]; + *type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift); + + uint8_t additional_information = initial_byte & kAdditionalInformationMask; + if (additional_information < 24) { + // Values 0-23 are encoded directly into the additional info of the + // initial byte. + *value = additional_information; + return 1; + } + if (additional_information == kAdditionalInformation1Byte) { + // Values 24-255 are encoded with one initial byte, followed by the value. + if (bytes.size() < 2) + return -1; + *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); + return 2; + } + if (additional_information == kAdditionalInformation2Bytes) { + // Values 256-65535: 1 initial byte + 2 bytes payload. + if (bytes.size() < 1 + sizeof(uint16_t)) + return -1; + *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); + return 3; + } + if (additional_information == kAdditionalInformation4Bytes) { + // 32 bit uint: 1 initial byte + 4 bytes payload. + if (bytes.size() < 1 + sizeof(uint32_t)) + return -1; + *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); + return 5; + } + if (additional_information == kAdditionalInformation8Bytes) { + // 64 bit uint: 1 initial byte + 8 bytes payload. + if (bytes.size() < 1 + sizeof(uint64_t)) + return -1; + *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); + return 9; + } + return -1; +} + +// Writes the start of a token with |type|. The |value| may indicate the size, +// or it may be the payload if the value is an unsigned integer. +template +void WriteTokenStartTmpl(MajorType type, uint64_t value, C* encoded) { + if (value < 24) { + // Values 0-23 are encoded directly into the additional info of the + // initial byte. + encoded->push_back(EncodeInitialByte(type, /*additional_info=*/value)); + return; + } + if (value <= std::numeric_limits::max()) { + // Values 24-255 are encoded with one initial byte, followed by the value. + encoded->push_back(EncodeInitialByte(type, kAdditionalInformation1Byte)); + encoded->push_back(value); + return; + } + if (value <= std::numeric_limits::max()) { + // Values 256-65535: 1 initial byte + 2 bytes payload. + encoded->push_back(EncodeInitialByte(type, kAdditionalInformation2Bytes)); + WriteBytesMostSignificantByteFirst(value, encoded); + return; + } + if (value <= std::numeric_limits::max()) { + // 32 bit uint: 1 initial byte + 4 bytes payload. + encoded->push_back(EncodeInitialByte(type, kAdditionalInformation4Bytes)); + WriteBytesMostSignificantByteFirst(static_cast(value), + encoded); + return; + } + // 64 bit uint: 1 initial byte + 8 bytes payload. + encoded->push_back(EncodeInitialByte(type, kAdditionalInformation8Bytes)); + WriteBytesMostSignificantByteFirst(value, encoded); +} +void WriteTokenStart(MajorType type, + uint64_t value, + std::vector* encoded) { + WriteTokenStartTmpl(type, value, encoded); +} +void WriteTokenStart(MajorType type, uint64_t value, std::string* encoded) { + WriteTokenStartTmpl(type, value, encoded); +} +} // namespace internals + +// ============================================================================= +// Detecting CBOR content +// ============================================================================= + +uint8_t InitialByteForEnvelope() { + return kInitialByteForEnvelope; +} +uint8_t InitialByteFor32BitLengthByteString() { + return kInitialByteFor32BitLengthByteString; +} +bool IsCBORMessage(span msg) { + return msg.size() >= 6 && msg[0] == InitialByteForEnvelope() && + msg[1] == InitialByteFor32BitLengthByteString(); +} + +// ============================================================================= +// Encoding invidiual CBOR items +// ============================================================================= + +uint8_t EncodeTrue() { + return kEncodedTrue; +} +uint8_t EncodeFalse() { + return kEncodedFalse; +} +uint8_t EncodeNull() { + return kEncodedNull; +} + +uint8_t EncodeIndefiniteLengthArrayStart() { + return kInitialByteIndefiniteLengthArray; +} + +uint8_t EncodeIndefiniteLengthMapStart() { + return kInitialByteIndefiniteLengthMap; +} + +uint8_t EncodeStop() { + return kStopByte; +} + +template +void EncodeInt32Tmpl(int32_t value, C* out) { + if (value >= 0) { + internals::WriteTokenStart(MajorType::UNSIGNED, value, out); + } else { + uint64_t representation = static_cast(-(value + 1)); + internals::WriteTokenStart(MajorType::NEGATIVE, representation, out); + } +} +void EncodeInt32(int32_t value, std::vector* out) { + EncodeInt32Tmpl(value, out); +} +void EncodeInt32(int32_t value, std::string* out) { + EncodeInt32Tmpl(value, out); +} + +template +void EncodeString16Tmpl(span in, C* out) { + uint64_t byte_length = static_cast(in.size_bytes()); + internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out); + // When emitting UTF16 characters, we always write the least significant byte + // first; this is because it's the native representation for X86. + // TODO(johannes): Implement a more efficient thing here later, e.g. + // casting *iff* the machine has this byte order. + // The wire format for UTF16 chars will probably remain the same + // (least significant byte first) since this way we can have + // golden files, unittests, etc. that port easily and universally. + // See also: + // https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html + for (const uint16_t two_bytes : in) { + out->push_back(two_bytes); + out->push_back(two_bytes >> 8); + } +} +void EncodeString16(span in, std::vector* out) { + EncodeString16Tmpl(in, out); +} +void EncodeString16(span in, std::string* out) { + EncodeString16Tmpl(in, out); +} + +template +void EncodeString8Tmpl(span in, C* out) { + internals::WriteTokenStart(MajorType::STRING, + static_cast(in.size_bytes()), out); + out->insert(out->end(), in.begin(), in.end()); +} +void EncodeString8(span in, std::vector* out) { + EncodeString8Tmpl(in, out); +} +void EncodeString8(span in, std::string* out) { + EncodeString8Tmpl(in, out); +} + +template +void EncodeFromLatin1Tmpl(span latin1, C* out) { + for (size_t ii = 0; ii < latin1.size(); ++ii) { + if (latin1[ii] <= 127) + continue; + // If there's at least one non-ASCII char, convert to UTF8. + std::vector utf8(latin1.begin(), latin1.begin() + ii); + for (; ii < latin1.size(); ++ii) { + if (latin1[ii] <= 127) { + utf8.push_back(latin1[ii]); + } else { + // 0xC0 means it's a UTF8 sequence with 2 bytes. + utf8.push_back((latin1[ii] >> 6) | 0xc0); + utf8.push_back((latin1[ii] | 0x80) & 0xbf); + } + } + EncodeString8(SpanFrom(utf8), out); + return; + } + EncodeString8(latin1, out); +} +void EncodeFromLatin1(span latin1, std::vector* out) { + EncodeFromLatin1Tmpl(latin1, out); +} +void EncodeFromLatin1(span latin1, std::string* out) { + EncodeFromLatin1Tmpl(latin1, out); +} + +template +void EncodeFromUTF16Tmpl(span utf16, C* out) { + // If there's at least one non-ASCII char, encode as STRING16 (UTF16). + for (uint16_t ch : utf16) { + if (ch <= 127) + continue; + EncodeString16(utf16, out); + return; + } + // It's all US-ASCII, strip out every second byte and encode as UTF8. + internals::WriteTokenStart(MajorType::STRING, + static_cast(utf16.size()), out); + out->insert(out->end(), utf16.begin(), utf16.end()); +} +void EncodeFromUTF16(span utf16, std::vector* out) { + EncodeFromUTF16Tmpl(utf16, out); +} +void EncodeFromUTF16(span utf16, std::string* out) { + EncodeFromUTF16Tmpl(utf16, out); +} + +template +void EncodeBinaryTmpl(span in, C* out) { + out->push_back(kExpectedConversionToBase64Tag); + uint64_t byte_length = static_cast(in.size_bytes()); + internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out); + out->insert(out->end(), in.begin(), in.end()); +} +void EncodeBinary(span in, std::vector* out) { + EncodeBinaryTmpl(in, out); +} +void EncodeBinary(span in, std::string* out) { + EncodeBinaryTmpl(in, out); +} + +// A double is encoded with a specific initial byte +// (kInitialByteForDouble) plus the 64 bits of payload for its value. +constexpr size_t kEncodedDoubleSize = 1 + sizeof(uint64_t); + +// An envelope is encoded with a specific initial byte +// (kInitialByteForEnvelope), plus the start byte for a BYTE_STRING with a 32 +// bit wide length, plus a 32 bit length for that string. +constexpr size_t kEncodedEnvelopeHeaderSize = 1 + 1 + sizeof(uint32_t); + +template +void EncodeDoubleTmpl(double value, C* out) { + // The additional_info=27 indicates 64 bits for the double follow. + // See RFC 7049 Section 2.3, Table 1. + out->push_back(kInitialByteForDouble); + union { + double from_double; + uint64_t to_uint64; + } reinterpret; + reinterpret.from_double = value; + WriteBytesMostSignificantByteFirst(reinterpret.to_uint64, out); +} +void EncodeDouble(double value, std::vector* out) { + EncodeDoubleTmpl(value, out); +} +void EncodeDouble(double value, std::string* out) { + EncodeDoubleTmpl(value, out); +} + +// ============================================================================= +// cbor::EnvelopeEncoder - for wrapping submessages +// ============================================================================= + +template +void EncodeStartTmpl(C* out, size_t* byte_size_pos) { + assert(*byte_size_pos == 0); + out->push_back(kInitialByteForEnvelope); + out->push_back(kInitialByteFor32BitLengthByteString); + *byte_size_pos = out->size(); + out->resize(out->size() + sizeof(uint32_t)); +} + +void EnvelopeEncoder::EncodeStart(std::vector* out) { + EncodeStartTmpl>(out, &byte_size_pos_); +} + +void EnvelopeEncoder::EncodeStart(std::string* out) { + EncodeStartTmpl(out, &byte_size_pos_); +} + +template +bool EncodeStopTmpl(C* out, size_t* byte_size_pos) { + assert(*byte_size_pos != 0); + // The byte size is the size of the payload, that is, all the + // bytes that were written past the byte size position itself. + uint64_t byte_size = out->size() - (*byte_size_pos + sizeof(uint32_t)); + // We store exactly 4 bytes, so at most INT32MAX, with most significant + // byte first. + if (byte_size > std::numeric_limits::max()) + return false; + for (int shift_bytes = sizeof(uint32_t) - 1; shift_bytes >= 0; + --shift_bytes) { + (*out)[(*byte_size_pos)++] = 0xff & (byte_size >> (shift_bytes * 8)); + } + return true; +} + +bool EnvelopeEncoder::EncodeStop(std::vector* out) { + return EncodeStopTmpl(out, &byte_size_pos_); +} + +bool EnvelopeEncoder::EncodeStop(std::string* out) { + return EncodeStopTmpl(out, &byte_size_pos_); +} + +// ============================================================================= +// cbor::NewCBOREncoder - for encoding from a streaming parser +// ============================================================================= + +namespace { +template +class CBOREncoder : public StreamingParserHandler { + public: + CBOREncoder(C* out, Status* status) : out_(out), status_(status) { + *status_ = Status(); + } + + void HandleMapBegin() override { + if (!status_->ok()) + return; + envelopes_.emplace_back(); + envelopes_.back().EncodeStart(out_); + out_->push_back(kInitialByteIndefiniteLengthMap); + } + + void HandleMapEnd() override { + if (!status_->ok()) + return; + out_->push_back(kStopByte); + assert(!envelopes_.empty()); + if (!envelopes_.back().EncodeStop(out_)) { + HandleError( + Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size())); + return; + } + envelopes_.pop_back(); + } + + void HandleArrayBegin() override { + if (!status_->ok()) + return; + envelopes_.emplace_back(); + envelopes_.back().EncodeStart(out_); + out_->push_back(kInitialByteIndefiniteLengthArray); + } + + void HandleArrayEnd() override { + if (!status_->ok()) + return; + out_->push_back(kStopByte); + assert(!envelopes_.empty()); + if (!envelopes_.back().EncodeStop(out_)) { + HandleError( + Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size())); + return; + } + envelopes_.pop_back(); + } + + void HandleString8(span chars) override { + if (!status_->ok()) + return; + EncodeString8(chars, out_); + } + + void HandleString16(span chars) override { + if (!status_->ok()) + return; + EncodeFromUTF16(chars, out_); + } + + void HandleBinary(span bytes) override { + if (!status_->ok()) + return; + EncodeBinary(bytes, out_); + } + + void HandleDouble(double value) override { + if (!status_->ok()) + return; + EncodeDouble(value, out_); + } + + void HandleInt32(int32_t value) override { + if (!status_->ok()) + return; + EncodeInt32(value, out_); + } + + void HandleBool(bool value) override { + if (!status_->ok()) + return; + // See RFC 7049 Section 2.3, Table 2. + out_->push_back(value ? kEncodedTrue : kEncodedFalse); + } + + void HandleNull() override { + if (!status_->ok()) + return; + // See RFC 7049 Section 2.3, Table 2. + out_->push_back(kEncodedNull); + } + + void HandleError(Status error) override { + if (!status_->ok()) + return; + *status_ = error; + out_->clear(); + } + + private: + C* out_; + std::vector envelopes_; + Status* status_; +}; +} // namespace + +std::unique_ptr NewCBOREncoder( + std::vector* out, + Status* status) { + return std::unique_ptr( + new CBOREncoder>(out, status)); +} +std::unique_ptr NewCBOREncoder(std::string* out, + Status* status) { + return std::unique_ptr( + new CBOREncoder(out, status)); +} + +// ============================================================================= +// cbor::CBORTokenizer - for parsing individual CBOR items +// ============================================================================= + +CBORTokenizer::CBORTokenizer(span bytes) : bytes_(bytes) { + ReadNextToken(/*enter_envelope=*/false); +} +CBORTokenizer::~CBORTokenizer() {} + +CBORTokenTag CBORTokenizer::TokenTag() const { + return token_tag_; +} + +void CBORTokenizer::Next() { + if (token_tag_ == CBORTokenTag::ERROR_VALUE || + token_tag_ == CBORTokenTag::DONE) + return; + ReadNextToken(/*enter_envelope=*/false); +} + +void CBORTokenizer::EnterEnvelope() { + assert(token_tag_ == CBORTokenTag::ENVELOPE); + ReadNextToken(/*enter_envelope=*/true); +} + +Status CBORTokenizer::Status() const { + return status_; +} + +// The following accessor functions ::GetInt32, ::GetDouble, +// ::GetString8, ::GetString16WireRep, ::GetBinary, ::GetEnvelopeContents +// assume that a particular token was recognized in ::ReadNextToken. +// That's where all the error checking is done. By design, +// the accessors (assuming the token was recognized) never produce +// an error. + +int32_t CBORTokenizer::GetInt32() const { + assert(token_tag_ == CBORTokenTag::INT32); + // The range checks happen in ::ReadNextToken(). + return static_cast( + token_start_type_ == MajorType::UNSIGNED + ? token_start_internal_value_ + : -static_cast(token_start_internal_value_) - 1); +} + +double CBORTokenizer::GetDouble() const { + assert(token_tag_ == CBORTokenTag::DOUBLE); + union { + uint64_t from_uint64; + double to_double; + } reinterpret; + reinterpret.from_uint64 = ReadBytesMostSignificantByteFirst( + bytes_.subspan(status_.pos + 1)); + return reinterpret.to_double; +} + +span CBORTokenizer::GetString8() const { + assert(token_tag_ == CBORTokenTag::STRING8); + auto length = static_cast(token_start_internal_value_); + return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); +} + +span CBORTokenizer::GetString16WireRep() const { + assert(token_tag_ == CBORTokenTag::STRING16); + auto length = static_cast(token_start_internal_value_); + return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); +} + +span CBORTokenizer::GetBinary() const { + assert(token_tag_ == CBORTokenTag::BINARY); + auto length = static_cast(token_start_internal_value_); + return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); +} + +span CBORTokenizer::GetEnvelopeContents() const { + assert(token_tag_ == CBORTokenTag::ENVELOPE); + auto length = static_cast(token_start_internal_value_); + return bytes_.subspan(status_.pos + kEncodedEnvelopeHeaderSize, length); +} + +// All error checking happens in ::ReadNextToken, so that the accessors +// can avoid having to carry an error return value. +// +// With respect to checking the encoded lengths of strings, arrays, etc: +// On the wire, CBOR uses 1,2,4, and 8 byte unsigned integers, so +// we initially read them as uint64_t, usually into token_start_internal_value_. +// +// However, since these containers have a representation on the machine, +// we need to do corresponding size computations on the input byte array, +// output span (e.g. the payload for a string), etc., and size_t is +// machine specific (in practice either 32 bit or 64 bit). +// +// Further, we must avoid overflowing size_t. Therefore, we use this +// kMaxValidLength constant to: +// - Reject values that are larger than the architecture specific +// max size_t (differs between 32 bit and 64 bit arch). +// - Reserve at least one bit so that we can check against overflows +// when adding lengths (array / string length / etc.); we do this by +// ensuring that the inputs to an addition are <= kMaxValidLength, +// and then checking whether the sum went past it. +// +// See also +// https://chromium.googlesource.com/chromium/src/+/master/docs/security/integer-semantics.md +static const uint64_t kMaxValidLength = + std::min(std::numeric_limits::max() >> 2, + std::numeric_limits::max()); + +void CBORTokenizer::ReadNextToken(bool enter_envelope) { + if (enter_envelope) { + status_.pos += kEncodedEnvelopeHeaderSize; + } else { + status_.pos = + status_.pos == Status::npos() ? 0 : status_.pos + token_byte_length_; + } + status_.error = Error::OK; + if (status_.pos >= bytes_.size()) { + token_tag_ = CBORTokenTag::DONE; + return; + } + const size_t remaining_bytes = bytes_.size() - status_.pos; + switch (bytes_[status_.pos]) { + case kStopByte: + SetToken(CBORTokenTag::STOP, 1); + return; + case kInitialByteIndefiniteLengthMap: + SetToken(CBORTokenTag::MAP_START, 1); + return; + case kInitialByteIndefiniteLengthArray: + SetToken(CBORTokenTag::ARRAY_START, 1); + return; + case kEncodedTrue: + SetToken(CBORTokenTag::TRUE_VALUE, 1); + return; + case kEncodedFalse: + SetToken(CBORTokenTag::FALSE_VALUE, 1); + return; + case kEncodedNull: + SetToken(CBORTokenTag::NULL_VALUE, 1); + return; + case kExpectedConversionToBase64Tag: { // BINARY + const int8_t bytes_read = internals::ReadTokenStart( + bytes_.subspan(status_.pos + 1), &token_start_type_, + &token_start_internal_value_); + if (bytes_read < 0 || token_start_type_ != MajorType::BYTE_STRING || + token_start_internal_value_ > kMaxValidLength) { + SetError(Error::CBOR_INVALID_BINARY); + return; + } + const uint64_t token_byte_length = token_start_internal_value_ + + /* tag before token start: */ 1 + + /* token start: */ bytes_read; + if (token_byte_length > remaining_bytes) { + SetError(Error::CBOR_INVALID_BINARY); + return; + } + SetToken(CBORTokenTag::BINARY, static_cast(token_byte_length)); + return; + } + case kInitialByteForDouble: { // DOUBLE + if (kEncodedDoubleSize > remaining_bytes) { + SetError(Error::CBOR_INVALID_DOUBLE); + return; + } + SetToken(CBORTokenTag::DOUBLE, kEncodedDoubleSize); + return; + } + case kInitialByteForEnvelope: { // ENVELOPE + if (kEncodedEnvelopeHeaderSize > remaining_bytes) { + SetError(Error::CBOR_INVALID_ENVELOPE); + return; + } + // The envelope must be a byte string with 32 bit length. + if (bytes_[status_.pos + 1] != kInitialByteFor32BitLengthByteString) { + SetError(Error::CBOR_INVALID_ENVELOPE); + return; + } + // Read the length of the byte string. + token_start_internal_value_ = ReadBytesMostSignificantByteFirst( + bytes_.subspan(status_.pos + 2)); + if (token_start_internal_value_ > kMaxValidLength) { + SetError(Error::CBOR_INVALID_ENVELOPE); + return; + } + uint64_t token_byte_length = + token_start_internal_value_ + kEncodedEnvelopeHeaderSize; + if (token_byte_length > remaining_bytes) { + SetError(Error::CBOR_INVALID_ENVELOPE); + return; + } + SetToken(CBORTokenTag::ENVELOPE, static_cast(token_byte_length)); + return; + } + default: { + const int8_t token_start_length = internals::ReadTokenStart( + bytes_.subspan(status_.pos), &token_start_type_, + &token_start_internal_value_); + const bool success = token_start_length >= 0; + switch (token_start_type_) { + case MajorType::UNSIGNED: // INT32. + // INT32 is a signed int32 (int32 makes sense for the + // inspector_protocol, it's not a CBOR limitation), so we check + // against the signed max, so that the allowable values are + // 0, 1, 2, ... 2^31 - 1. + if (!success || std::numeric_limits::max() < + token_start_internal_value_) { + SetError(Error::CBOR_INVALID_INT32); + return; + } + SetToken(CBORTokenTag::INT32, token_start_length); + return; + case MajorType::NEGATIVE: { // INT32. + // INT32 is a signed int32 (int32 makes sense for the + // inspector_protocol, it's not a CBOR limitation); in CBOR, + // the negative values for INT32 are represented as NEGATIVE, + // that is, -1 INT32 is represented as 1 << 5 | 0 (major type 1, + // additional info value 0). So here, we compute the INT32 value + // and then check it against the INT32 min. + int64_t actual_value = + -static_cast(token_start_internal_value_) - 1; + if (!success || actual_value < std::numeric_limits::min()) { + SetError(Error::CBOR_INVALID_INT32); + return; + } + SetToken(CBORTokenTag::INT32, token_start_length); + return; + } + case MajorType::STRING: { // STRING8. + if (!success || token_start_internal_value_ > kMaxValidLength) { + SetError(Error::CBOR_INVALID_STRING8); + return; + } + uint64_t token_byte_length = + token_start_internal_value_ + token_start_length; + if (token_byte_length > remaining_bytes) { + SetError(Error::CBOR_INVALID_STRING8); + return; + } + SetToken(CBORTokenTag::STRING8, + static_cast(token_byte_length)); + return; + } + case MajorType::BYTE_STRING: { // STRING16. + // Length must be divisible by 2 since UTF16 is 2 bytes per + // character, hence the &1 check. + if (!success || token_start_internal_value_ > kMaxValidLength || + token_start_internal_value_ & 1) { + SetError(Error::CBOR_INVALID_STRING16); + return; + } + uint64_t token_byte_length = + token_start_internal_value_ + token_start_length; + if (token_byte_length > remaining_bytes) { + SetError(Error::CBOR_INVALID_STRING16); + return; + } + SetToken(CBORTokenTag::STRING16, + static_cast(token_byte_length)); + return; + } + case MajorType::ARRAY: + case MajorType::MAP: + case MajorType::TAG: + case MajorType::SIMPLE_VALUE: + SetError(Error::CBOR_UNSUPPORTED_VALUE); + return; + } + } + } +} + +void CBORTokenizer::SetToken(CBORTokenTag token_tag, size_t token_byte_length) { + token_tag_ = token_tag; + token_byte_length_ = token_byte_length; +} + +void CBORTokenizer::SetError(Error error) { + token_tag_ = CBORTokenTag::ERROR_VALUE; + status_.error = error; +} + +// ============================================================================= +// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages +// ============================================================================= + +namespace { +// When parsing CBOR, we limit recursion depth for objects and arrays +// to this constant. +static constexpr int kStackLimit = 300; + +// Below are three parsing routines for CBOR, which cover enough +// to roundtrip JSON messages. +bool ParseMap(int32_t stack_depth, + CBORTokenizer* tokenizer, + StreamingParserHandler* out); +bool ParseArray(int32_t stack_depth, + CBORTokenizer* tokenizer, + StreamingParserHandler* out); +bool ParseValue(int32_t stack_depth, + CBORTokenizer* tokenizer, + StreamingParserHandler* out); + +void ParseUTF16String(CBORTokenizer* tokenizer, StreamingParserHandler* out) { + std::vector value; + span rep = tokenizer->GetString16WireRep(); + for (size_t ii = 0; ii < rep.size(); ii += 2) + value.push_back((rep[ii + 1] << 8) | rep[ii]); + out->HandleString16(span(value.data(), value.size())); + tokenizer->Next(); +} + +bool ParseUTF8String(CBORTokenizer* tokenizer, StreamingParserHandler* out) { + assert(tokenizer->TokenTag() == CBORTokenTag::STRING8); + out->HandleString8(tokenizer->GetString8()); + tokenizer->Next(); + return true; +} + +bool ParseValue(int32_t stack_depth, + CBORTokenizer* tokenizer, + StreamingParserHandler* out) { + if (stack_depth > kStackLimit) { + out->HandleError( + Status{Error::CBOR_STACK_LIMIT_EXCEEDED, tokenizer->Status().pos}); + return false; + } + // Skip past the envelope to get to what's inside. + if (tokenizer->TokenTag() == CBORTokenTag::ENVELOPE) + tokenizer->EnterEnvelope(); + switch (tokenizer->TokenTag()) { + case CBORTokenTag::ERROR_VALUE: + out->HandleError(tokenizer->Status()); + return false; + case CBORTokenTag::DONE: + out->HandleError(Status{Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE, + tokenizer->Status().pos}); + return false; + case CBORTokenTag::TRUE_VALUE: + out->HandleBool(true); + tokenizer->Next(); + return true; + case CBORTokenTag::FALSE_VALUE: + out->HandleBool(false); + tokenizer->Next(); + return true; + case CBORTokenTag::NULL_VALUE: + out->HandleNull(); + tokenizer->Next(); + return true; + case CBORTokenTag::INT32: + out->HandleInt32(tokenizer->GetInt32()); + tokenizer->Next(); + return true; + case CBORTokenTag::DOUBLE: + out->HandleDouble(tokenizer->GetDouble()); + tokenizer->Next(); + return true; + case CBORTokenTag::STRING8: + return ParseUTF8String(tokenizer, out); + case CBORTokenTag::STRING16: + ParseUTF16String(tokenizer, out); + return true; + case CBORTokenTag::BINARY: { + out->HandleBinary(tokenizer->GetBinary()); + tokenizer->Next(); + return true; + } + case CBORTokenTag::MAP_START: + return ParseMap(stack_depth + 1, tokenizer, out); + case CBORTokenTag::ARRAY_START: + return ParseArray(stack_depth + 1, tokenizer, out); + default: + out->HandleError( + Status{Error::CBOR_UNSUPPORTED_VALUE, tokenizer->Status().pos}); + return false; + } +} + +// |bytes| must start with the indefinite length array byte, so basically, +// ParseArray may only be called after an indefinite length array has been +// detected. +bool ParseArray(int32_t stack_depth, + CBORTokenizer* tokenizer, + StreamingParserHandler* out) { + assert(tokenizer->TokenTag() == CBORTokenTag::ARRAY_START); + tokenizer->Next(); + out->HandleArrayBegin(); + while (tokenizer->TokenTag() != CBORTokenTag::STOP) { + if (tokenizer->TokenTag() == CBORTokenTag::DONE) { + out->HandleError( + Status{Error::CBOR_UNEXPECTED_EOF_IN_ARRAY, tokenizer->Status().pos}); + return false; + } + if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) { + out->HandleError(tokenizer->Status()); + return false; + } + // Parse value. + if (!ParseValue(stack_depth, tokenizer, out)) + return false; + } + out->HandleArrayEnd(); + tokenizer->Next(); + return true; +} + +// |bytes| must start with the indefinite length array byte, so basically, +// ParseArray may only be called after an indefinite length array has been +// detected. +bool ParseMap(int32_t stack_depth, + CBORTokenizer* tokenizer, + StreamingParserHandler* out) { + assert(tokenizer->TokenTag() == CBORTokenTag::MAP_START); + out->HandleMapBegin(); + tokenizer->Next(); + while (tokenizer->TokenTag() != CBORTokenTag::STOP) { + if (tokenizer->TokenTag() == CBORTokenTag::DONE) { + out->HandleError( + Status{Error::CBOR_UNEXPECTED_EOF_IN_MAP, tokenizer->Status().pos}); + return false; + } + if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) { + out->HandleError(tokenizer->Status()); + return false; + } + // Parse key. + if (tokenizer->TokenTag() == CBORTokenTag::STRING8) { + if (!ParseUTF8String(tokenizer, out)) + return false; + } else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) { + ParseUTF16String(tokenizer, out); + } else { + out->HandleError( + Status{Error::CBOR_INVALID_MAP_KEY, tokenizer->Status().pos}); + return false; + } + // Parse value. + if (!ParseValue(stack_depth, tokenizer, out)) + return false; + } + out->HandleMapEnd(); + tokenizer->Next(); + return true; +} +} // namespace + +void ParseCBOR(span bytes, StreamingParserHandler* out) { + if (bytes.empty()) { + out->HandleError(Status{Error::CBOR_NO_INPUT, 0}); + return; + } + if (bytes[0] != kInitialByteForEnvelope) { + out->HandleError(Status{Error::CBOR_INVALID_START_BYTE, 0}); + return; + } + CBORTokenizer tokenizer(bytes); + if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) { + out->HandleError(tokenizer.Status()); + return; + } + // We checked for the envelope start byte above, so the tokenizer + // must agree here, since it's not an error. + assert(tokenizer.TokenTag() == CBORTokenTag::ENVELOPE); + tokenizer.EnterEnvelope(); + if (tokenizer.TokenTag() != CBORTokenTag::MAP_START) { + out->HandleError( + Status{Error::CBOR_MAP_START_EXPECTED, tokenizer.Status().pos}); + return; + } + if (!ParseMap(/*stack_depth=*/1, &tokenizer, out)) + return; + if (tokenizer.TokenTag() == CBORTokenTag::DONE) + return; + if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) { + out->HandleError(tokenizer.Status()); + return; + } + out->HandleError(Status{Error::CBOR_TRAILING_JUNK, tokenizer.Status().pos}); +} + +// ============================================================================= +// cbor::AppendString8EntryToMap - for limited in-place editing of messages +// ============================================================================= + +template +Status AppendString8EntryToCBORMapTmpl(span string8_key, + span string8_value, + C* cbor) { + // Careful below: Don't compare (*cbor)[idx] with a uint8_t, since + // it could be a char (signed!). Instead, use bytes. + span bytes(reinterpret_cast(cbor->data()), + cbor->size()); + CBORTokenizer tokenizer(bytes); + if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) + return tokenizer.Status(); + if (tokenizer.TokenTag() != CBORTokenTag::ENVELOPE) + return Status(Error::CBOR_INVALID_ENVELOPE, 0); + size_t envelope_size = tokenizer.GetEnvelopeContents().size(); + size_t old_size = cbor->size(); + if (old_size != envelope_size + kEncodedEnvelopeHeaderSize) + return Status(Error::CBOR_INVALID_ENVELOPE, 0); + if (envelope_size == 0 || + (tokenizer.GetEnvelopeContents()[0] != EncodeIndefiniteLengthMapStart())) + return Status(Error::CBOR_MAP_START_EXPECTED, kEncodedEnvelopeHeaderSize); + if (bytes[bytes.size() - 1] != EncodeStop()) + return Status(Error::CBOR_MAP_STOP_EXPECTED, cbor->size() - 1); + cbor->pop_back(); + EncodeString8(string8_key, cbor); + EncodeString8(string8_value, cbor); + cbor->push_back(EncodeStop()); + size_t new_envelope_size = envelope_size + (cbor->size() - old_size); + if (new_envelope_size > std::numeric_limits::max()) + return Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, 0); + size_t size_pos = cbor->size() - new_envelope_size - sizeof(uint32_t); + uint8_t* out = reinterpret_cast(&cbor->at(size_pos)); + *(out++) = (new_envelope_size >> 24) & 0xff; + *(out++) = (new_envelope_size >> 16) & 0xff; + *(out++) = (new_envelope_size >> 8) & 0xff; + *(out) = new_envelope_size & 0xff; + return Status(); +} +Status AppendString8EntryToCBORMap(span string8_key, + span string8_value, + std::vector* cbor) { + return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor); +} +Status AppendString8EntryToCBORMap(span string8_key, + span string8_value, + std::string* cbor) { + return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor); +} +} // namespace cbor + +namespace json { + +// ============================================================================= +// json::NewJSONEncoder - for encoding streaming parser events as JSON +// ============================================================================= + +namespace { +// Prints |value| to |out| with 4 hex digits, most significant chunk first. +template +void PrintHex(uint16_t value, C* out) { + for (int ii = 3; ii >= 0; --ii) { + int four_bits = 0xf & (value >> (4 * ii)); + out->push_back(four_bits + ((four_bits <= 9) ? '0' : ('a' - 10))); + } +} + +// In the writer below, we maintain a stack of State instances. +// It is just enough to emit the appropriate delimiters and brackets +// in JSON. +enum class Container { + // Used for the top-level, initial state. + NONE, + // Inside a JSON object. + MAP, + // Inside a JSON array. + ARRAY +}; +class State { + public: + explicit State(Container container) : container_(container) {} + void StartElement(std::vector* out) { StartElementTmpl(out); } + void StartElement(std::string* out) { StartElementTmpl(out); } + Container container() const { return container_; } + + private: + template + void StartElementTmpl(C* out) { + assert(container_ != Container::NONE || size_ == 0); + if (size_ != 0) { + char delim = (!(size_ & 1) || container_ == Container::ARRAY) ? ',' : ':'; + out->push_back(delim); + } + ++size_; + } + + Container container_ = Container::NONE; + int size_ = 0; +}; + +constexpr char kBase64Table[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz0123456789+/"; + +template +void Base64Encode(const span& in, C* out) { + // The following three cases are based on the tables in the example + // section in https://en.wikipedia.org/wiki/Base64. We process three + // input bytes at a time, emitting 4 output bytes at a time. + size_t ii = 0; + + // While possible, process three input bytes. + for (; ii + 3 <= in.size(); ii += 3) { + uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8) | in[ii + 2]; + out->push_back(kBase64Table[(twentyfour_bits >> 18)]); + out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); + out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]); + out->push_back(kBase64Table[twentyfour_bits & 0x3f]); + } + if (ii + 2 <= in.size()) { // Process two input bytes. + uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8); + out->push_back(kBase64Table[(twentyfour_bits >> 18)]); + out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); + out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]); + out->push_back('='); // Emit padding. + return; + } + if (ii + 1 <= in.size()) { // Process a single input byte. + uint32_t twentyfour_bits = (in[ii] << 16); + out->push_back(kBase64Table[(twentyfour_bits >> 18)]); + out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); + out->push_back('='); // Emit padding. + out->push_back('='); // Emit padding. + } +} + +// Implements a handler for JSON parser events to emit a JSON string. +template +class JSONEncoder : public StreamingParserHandler { + public: + JSONEncoder(const Platform* platform, C* out, Status* status) + : platform_(platform), out_(out), status_(status) { + *status_ = Status(); + state_.emplace(Container::NONE); + } + + void HandleMapBegin() override { + if (!status_->ok()) + return; + assert(!state_.empty()); + state_.top().StartElement(out_); + state_.emplace(Container::MAP); + Emit('{'); + } + + void HandleMapEnd() override { + if (!status_->ok()) + return; + assert(state_.size() >= 2 && state_.top().container() == Container::MAP); + state_.pop(); + Emit('}'); + } + + void HandleArrayBegin() override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + state_.emplace(Container::ARRAY); + Emit('['); + } + + void HandleArrayEnd() override { + if (!status_->ok()) + return; + assert(state_.size() >= 2 && state_.top().container() == Container::ARRAY); + state_.pop(); + Emit(']'); + } + + void HandleString16(span chars) override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + Emit('"'); + for (const uint16_t ch : chars) { + if (ch == '"') { + Emit("\\\""); + } else if (ch == '\\') { + Emit("\\\\"); + } else if (ch == '\b') { + Emit("\\b"); + } else if (ch == '\f') { + Emit("\\f"); + } else if (ch == '\n') { + Emit("\\n"); + } else if (ch == '\r') { + Emit("\\r"); + } else if (ch == '\t') { + Emit("\\t"); + } else if (ch >= 32 && ch <= 126) { + Emit(ch); + } else { + Emit("\\u"); + PrintHex(ch, out_); + } + } + Emit('"'); + } + + void HandleString8(span chars) override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + Emit('"'); + for (size_t ii = 0; ii < chars.size(); ++ii) { + uint8_t c = chars[ii]; + if (c == '"') { + Emit("\\\""); + } else if (c == '\\') { + Emit("\\\\"); + } else if (c == '\b') { + Emit("\\b"); + } else if (c == '\f') { + Emit("\\f"); + } else if (c == '\n') { + Emit("\\n"); + } else if (c == '\r') { + Emit("\\r"); + } else if (c == '\t') { + Emit("\\t"); + } else if (c >= 32 && c <= 126) { + Emit(c); + } else if (c < 32) { + Emit("\\u"); + PrintHex(static_cast(c), out_); + } else { + // Inspect the leading byte to figure out how long the utf8 + // byte sequence is; while doing this initialize |codepoint| + // with the first few bits. + // See table in: https://en.wikipedia.org/wiki/UTF-8 + // byte one is 110x xxxx -> 2 byte utf8 sequence + // byte one is 1110 xxxx -> 3 byte utf8 sequence + // byte one is 1111 0xxx -> 4 byte utf8 sequence + uint32_t codepoint; + int num_bytes_left; + if ((c & 0xe0) == 0xc0) { // 2 byte utf8 sequence + num_bytes_left = 1; + codepoint = c & 0x1f; + } else if ((c & 0xf0) == 0xe0) { // 3 byte utf8 sequence + num_bytes_left = 2; + codepoint = c & 0x0f; + } else if ((c & 0xf8) == 0xf0) { // 4 byte utf8 sequence + codepoint = c & 0x07; + num_bytes_left = 3; + } else { + continue; // invalid leading byte + } + + // If we have enough bytes in our input, decode the remaining ones + // belonging to this Unicode character into |codepoint|. + if (ii + num_bytes_left > chars.size()) + continue; + while (num_bytes_left > 0) { + c = chars[++ii]; + --num_bytes_left; + // Check the next byte is a continuation byte, that is 10xx xxxx. + if ((c & 0xc0) != 0x80) + continue; + codepoint = (codepoint << 6) | (c & 0x3f); + } + + // Disallow overlong encodings for ascii characters, as these + // would include " and other characters significant to JSON + // string termination / control. + if (codepoint < 0x7f) + continue; + // Invalid in UTF8, and can't be represented in UTF16 anyway. + if (codepoint > 0x10ffff) + continue; + + // So, now we transcode to UTF16, + // using the math described at https://en.wikipedia.org/wiki/UTF-16, + // for either one or two 16 bit characters. + if (codepoint < 0xffff) { + Emit("\\u"); + PrintHex(static_cast(codepoint), out_); + continue; + } + codepoint -= 0x10000; + // high surrogate + Emit("\\u"); + PrintHex(static_cast((codepoint >> 10) + 0xd800), out_); + // low surrogate + Emit("\\u"); + PrintHex(static_cast((codepoint & 0x3ff) + 0xdc00), out_); + } + } + Emit('"'); + } + + void HandleBinary(span bytes) override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + Emit('"'); + Base64Encode(bytes, out_); + Emit('"'); + } + + void HandleDouble(double value) override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + // JSON cannot represent NaN or Infinity. So, for compatibility, + // we behave like the JSON object in web browsers: emit 'null'. + if (!std::isfinite(value)) { + Emit("null"); + return; + } + std::unique_ptr str_value = platform_->DToStr(value); + + // DToStr may fail to emit a 0 before the decimal dot. E.g. this is + // the case in base::NumberToString in Chromium (which is based on + // dmg_fp). So, much like + // https://cs.chromium.org/chromium/src/base/json/json_writer.cc + // we probe for this and emit the leading 0 anyway if necessary. + const char* chars = str_value.get(); + if (chars[0] == '.') { + Emit('0'); + } else if (chars[0] == '-' && chars[1] == '.') { + Emit("-0"); + ++chars; + } + Emit(chars); + } + + void HandleInt32(int32_t value) override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + Emit(std::to_string(value)); + } + + void HandleBool(bool value) override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + Emit(value ? "true" : "false"); + } + + void HandleNull() override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + Emit("null"); + } + + void HandleError(Status error) override { + assert(!error.ok()); + *status_ = error; + out_->clear(); + } + + private: + void Emit(char c) { out_->push_back(c); } + void Emit(const char* str) { + out_->insert(out_->end(), str, str + strlen(str)); + } + void Emit(const std::string& str) { + out_->insert(out_->end(), str.begin(), str.end()); + } + + const Platform* platform_; + C* out_; + Status* status_; + std::stack state_; +}; +} // namespace + +std::unique_ptr NewJSONEncoder( + const Platform* platform, + std::vector* out, + Status* status) { + return std::unique_ptr( + new JSONEncoder>(platform, out, status)); +} +std::unique_ptr NewJSONEncoder(const Platform* platform, + std::string* out, + Status* status) { + return std::unique_ptr( + new JSONEncoder(platform, out, status)); +} + +// ============================================================================= +// json::ParseJSON - for receiving streaming parser events for JSON. +// ============================================================================= + +namespace { +const int kStackLimit = 300; + +enum Token { + ObjectBegin, + ObjectEnd, + ArrayBegin, + ArrayEnd, + StringLiteral, + Number, + BoolTrue, + BoolFalse, + NullToken, + ListSeparator, + ObjectPairSeparator, + InvalidToken, + NoInput +}; + +const char* const kNullString = "null"; +const char* const kTrueString = "true"; +const char* const kFalseString = "false"; + +template +class JsonParser { + public: + JsonParser(const Platform* platform, StreamingParserHandler* handler) + : platform_(platform), handler_(handler) {} + + void Parse(const Char* start, size_t length) { + start_pos_ = start; + const Char* end = start + length; + const Char* tokenEnd = nullptr; + ParseValue(start, end, &tokenEnd, 0); + if (error_) + return; + if (tokenEnd != end) { + HandleError(Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS, tokenEnd); + } + } + + private: + bool CharsToDouble(const uint16_t* chars, size_t length, double* result) { + std::string buffer; + buffer.reserve(length + 1); + for (size_t ii = 0; ii < length; ++ii) { + bool is_ascii = !(chars[ii] & ~0x7F); + if (!is_ascii) + return false; + buffer.push_back(static_cast(chars[ii])); + } + return platform_->StrToD(buffer.c_str(), result); + } + + bool CharsToDouble(const uint8_t* chars, size_t length, double* result) { + std::string buffer(reinterpret_cast(chars), length); + return platform_->StrToD(buffer.c_str(), result); + } + + static bool ParseConstToken(const Char* start, + const Char* end, + const Char** token_end, + const char* token) { + // |token| is \0 terminated, it's one of the constants at top of the file. + while (start < end && *token != '\0' && *start++ == *token++) { + } + if (*token != '\0') + return false; + *token_end = start; + return true; + } + + static bool ReadInt(const Char* start, + const Char* end, + const Char** token_end, + bool allow_leading_zeros) { + if (start == end) + return false; + bool has_leading_zero = '0' == *start; + int length = 0; + while (start < end && '0' <= *start && *start <= '9') { + ++start; + ++length; + } + if (!length) + return false; + if (!allow_leading_zeros && length > 1 && has_leading_zero) + return false; + *token_end = start; + return true; + } + + static bool ParseNumberToken(const Char* start, + const Char* end, + const Char** token_end) { + // We just grab the number here. We validate the size in DecodeNumber. + // According to RFC4627, a valid number is: [minus] int [frac] [exp] + if (start == end) + return false; + Char c = *start; + if ('-' == c) + ++start; + + if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/false)) + return false; + if (start == end) { + *token_end = start; + return true; + } + + // Optional fraction part + c = *start; + if ('.' == c) { + ++start; + if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true)) + return false; + if (start == end) { + *token_end = start; + return true; + } + c = *start; + } + + // Optional exponent part + if ('e' == c || 'E' == c) { + ++start; + if (start == end) + return false; + c = *start; + if ('-' == c || '+' == c) { + ++start; + if (start == end) + return false; + } + if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true)) + return false; + } + + *token_end = start; + return true; + } + + static bool ReadHexDigits(const Char* start, + const Char* end, + const Char** token_end, + int digits) { + if (end - start < digits) + return false; + for (int i = 0; i < digits; ++i) { + Char c = *start++; + if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || + ('A' <= c && c <= 'F'))) + return false; + } + *token_end = start; + return true; + } + + static bool ParseStringToken(const Char* start, + const Char* end, + const Char** token_end) { + while (start < end) { + Char c = *start++; + if ('\\' == c) { + if (start == end) + return false; + c = *start++; + // Make sure the escaped char is valid. + switch (c) { + case 'x': + if (!ReadHexDigits(start, end, &start, 2)) + return false; + break; + case 'u': + if (!ReadHexDigits(start, end, &start, 4)) + return false; + break; + case '\\': + case '/': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': + case '"': + break; + default: + return false; + } + } else if ('"' == c) { + *token_end = start; + return true; + } + } + return false; + } + + static bool SkipComment(const Char* start, + const Char* end, + const Char** comment_end) { + if (start == end) + return false; + + if (*start != '/' || start + 1 >= end) + return false; + ++start; + + if (*start == '/') { + // Single line comment, read to newline. + for (++start; start < end; ++start) { + if (*start == '\n' || *start == '\r') { + *comment_end = start + 1; + return true; + } + } + *comment_end = end; + // Comment reaches end-of-input, which is fine. + return true; + } + + if (*start == '*') { + Char previous = '\0'; + // Block comment, read until end marker. + for (++start; start < end; previous = *start++) { + if (previous == '*' && *start == '/') { + *comment_end = start + 1; + return true; + } + } + // Block comment must close before end-of-input. + return false; + } + + return false; + } + + static bool IsSpaceOrNewLine(Char c) { + // \v = vertial tab; \f = form feed page break. + return c == ' ' || c == '\n' || c == '\v' || c == '\f' || c == '\r' || + c == '\t'; + } + + static void SkipWhitespaceAndComments(const Char* start, + const Char* end, + const Char** whitespace_end) { + while (start < end) { + if (IsSpaceOrNewLine(*start)) { + ++start; + } else if (*start == '/') { + const Char* comment_end = nullptr; + if (!SkipComment(start, end, &comment_end)) + break; + start = comment_end; + } else { + break; + } + } + *whitespace_end = start; + } + + static Token ParseToken(const Char* start, + const Char* end, + const Char** tokenStart, + const Char** token_end) { + SkipWhitespaceAndComments(start, end, tokenStart); + start = *tokenStart; + + if (start == end) + return NoInput; + + switch (*start) { + case 'n': + if (ParseConstToken(start, end, token_end, kNullString)) + return NullToken; + break; + case 't': + if (ParseConstToken(start, end, token_end, kTrueString)) + return BoolTrue; + break; + case 'f': + if (ParseConstToken(start, end, token_end, kFalseString)) + return BoolFalse; + break; + case '[': + *token_end = start + 1; + return ArrayBegin; + case ']': + *token_end = start + 1; + return ArrayEnd; + case ',': + *token_end = start + 1; + return ListSeparator; + case '{': + *token_end = start + 1; + return ObjectBegin; + case '}': + *token_end = start + 1; + return ObjectEnd; + case ':': + *token_end = start + 1; + return ObjectPairSeparator; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + if (ParseNumberToken(start, end, token_end)) + return Number; + break; + case '"': + if (ParseStringToken(start + 1, end, token_end)) + return StringLiteral; + break; + } + return InvalidToken; + } + + static int HexToInt(Char c) { + if ('0' <= c && c <= '9') + return c - '0'; + if ('A' <= c && c <= 'F') + return c - 'A' + 10; + if ('a' <= c && c <= 'f') + return c - 'a' + 10; + assert(false); // Unreachable. + return 0; + } + + static bool DecodeString(const Char* start, + const Char* end, + std::vector* output) { + if (start == end) + return true; + if (start > end) + return false; + output->reserve(end - start); + while (start < end) { + uint16_t c = *start++; + // If the |Char| we're dealing with is really a byte, then + // we have utf8 here, and we need to check for multibyte characters + // and transcode them to utf16 (either one or two utf16 chars). + if (sizeof(Char) == sizeof(uint8_t) && c >= 0x7f) { + // Inspect the leading byte to figure out how long the utf8 + // byte sequence is; while doing this initialize |codepoint| + // with the first few bits. + // See table in: https://en.wikipedia.org/wiki/UTF-8 + // byte one is 110x xxxx -> 2 byte utf8 sequence + // byte one is 1110 xxxx -> 3 byte utf8 sequence + // byte one is 1111 0xxx -> 4 byte utf8 sequence + uint32_t codepoint; + int num_bytes_left; + if ((c & 0xe0) == 0xc0) { // 2 byte utf8 sequence + num_bytes_left = 1; + codepoint = c & 0x1f; + } else if ((c & 0xf0) == 0xe0) { // 3 byte utf8 sequence + num_bytes_left = 2; + codepoint = c & 0x0f; + } else if ((c & 0xf8) == 0xf0) { // 4 byte utf8 sequence + codepoint = c & 0x07; + num_bytes_left = 3; + } else { + return false; // invalid leading byte + } + + // If we have enough bytes in our inpput, decode the remaining ones + // belonging to this Unicode character into |codepoint|. + if (start + num_bytes_left > end) + return false; + while (num_bytes_left > 0) { + c = *start++; + --num_bytes_left; + // Check the next byte is a continuation byte, that is 10xx xxxx. + if ((c & 0xc0) != 0x80) + return false; + codepoint = (codepoint << 6) | (c & 0x3f); + } + + // Disallow overlong encodings for ascii characters, as these + // would include " and other characters significant to JSON + // string termination / control. + if (codepoint < 0x7f) + return false; + // Invalid in UTF8, and can't be represented in UTF16 anyway. + if (codepoint > 0x10ffff) + return false; + + // So, now we transcode to UTF16, + // using the math described at https://en.wikipedia.org/wiki/UTF-16, + // for either one or two 16 bit characters. + if (codepoint < 0xffff) { + output->push_back(codepoint); + continue; + } + codepoint -= 0x10000; + output->push_back((codepoint >> 10) + 0xd800); // high surrogate + output->push_back((codepoint & 0x3ff) + 0xdc00); // low surrogate + continue; + } + if ('\\' != c) { + output->push_back(c); + continue; + } + if (start == end) + return false; + c = *start++; + + if (c == 'x') { + // \x is not supported. + return false; + } + + switch (c) { + case '"': + case '/': + case '\\': + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case 'u': + c = (HexToInt(*start) << 12) + (HexToInt(*(start + 1)) << 8) + + (HexToInt(*(start + 2)) << 4) + HexToInt(*(start + 3)); + start += 4; + break; + default: + return false; + } + output->push_back(c); + } + return true; + } + + void ParseValue(const Char* start, + const Char* end, + const Char** value_token_end, + int depth) { + if (depth > kStackLimit) { + HandleError(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED, start); + return; + } + const Char* token_start = nullptr; + const Char* token_end = nullptr; + Token token = ParseToken(start, end, &token_start, &token_end); + switch (token) { + case NoInput: + HandleError(Error::JSON_PARSER_NO_INPUT, token_start); + return; + case InvalidToken: + HandleError(Error::JSON_PARSER_INVALID_TOKEN, token_start); + return; + case NullToken: + handler_->HandleNull(); + break; + case BoolTrue: + handler_->HandleBool(true); + break; + case BoolFalse: + handler_->HandleBool(false); + break; + case Number: { + double value; + if (!CharsToDouble(token_start, token_end - token_start, &value)) { + HandleError(Error::JSON_PARSER_INVALID_NUMBER, token_start); + return; + } + if (value >= std::numeric_limits::min() && + value <= std::numeric_limits::max() && + static_cast(value) == value) + handler_->HandleInt32(static_cast(value)); + else + handler_->HandleDouble(value); + break; + } + case StringLiteral: { + std::vector value; + bool ok = DecodeString(token_start + 1, token_end - 1, &value); + if (!ok) { + HandleError(Error::JSON_PARSER_INVALID_STRING, token_start); + return; + } + handler_->HandleString16(span(value.data(), value.size())); + break; + } + case ArrayBegin: { + handler_->HandleArrayBegin(); + start = token_end; + token = ParseToken(start, end, &token_start, &token_end); + while (token != ArrayEnd) { + ParseValue(start, end, &token_end, depth + 1); + if (error_) + return; + + // After a list value, we expect a comma or the end of the list. + start = token_end; + token = ParseToken(start, end, &token_start, &token_end); + if (token == ListSeparator) { + start = token_end; + token = ParseToken(start, end, &token_start, &token_end); + if (token == ArrayEnd) { + HandleError(Error::JSON_PARSER_UNEXPECTED_ARRAY_END, token_start); + return; + } + } else if (token != ArrayEnd) { + // Unexpected value after list value. Bail out. + HandleError(Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED, + token_start); + return; + } + } + handler_->HandleArrayEnd(); + break; + } + case ObjectBegin: { + handler_->HandleMapBegin(); + start = token_end; + token = ParseToken(start, end, &token_start, &token_end); + while (token != ObjectEnd) { + if (token != StringLiteral) { + HandleError(Error::JSON_PARSER_STRING_LITERAL_EXPECTED, + token_start); + return; + } + std::vector key; + if (!DecodeString(token_start + 1, token_end - 1, &key)) { + HandleError(Error::JSON_PARSER_INVALID_STRING, token_start); + return; + } + handler_->HandleString16(span(key.data(), key.size())); + start = token_end; + + token = ParseToken(start, end, &token_start, &token_end); + if (token != ObjectPairSeparator) { + HandleError(Error::JSON_PARSER_COLON_EXPECTED, token_start); + return; + } + start = token_end; + + ParseValue(start, end, &token_end, depth + 1); + if (error_) + return; + start = token_end; + + // After a key/value pair, we expect a comma or the end of the + // object. + token = ParseToken(start, end, &token_start, &token_end); + if (token == ListSeparator) { + start = token_end; + token = ParseToken(start, end, &token_start, &token_end); + if (token == ObjectEnd) { + HandleError(Error::JSON_PARSER_UNEXPECTED_MAP_END, token_start); + return; + } + } else if (token != ObjectEnd) { + // Unexpected value after last object value. Bail out. + HandleError(Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED, + token_start); + return; + } + } + handler_->HandleMapEnd(); + break; + } + + default: + // We got a token that's not a value. + HandleError(Error::JSON_PARSER_VALUE_EXPECTED, token_start); + return; + } + + SkipWhitespaceAndComments(token_end, end, value_token_end); + } + + void HandleError(Error error, const Char* pos) { + assert(error != Error::OK); + if (!error_) { + handler_->HandleError( + Status{error, static_cast(pos - start_pos_)}); + error_ = true; + } + } + + const Char* start_pos_ = nullptr; + bool error_ = false; + const Platform* platform_; + StreamingParserHandler* handler_; +}; +} // namespace + +void ParseJSON(const Platform& platform, + span chars, + StreamingParserHandler* handler) { + JsonParser parser(&platform, handler); + parser.Parse(chars.data(), chars.size()); +} + +void ParseJSON(const Platform& platform, + span chars, + StreamingParserHandler* handler) { + JsonParser parser(&platform, handler); + parser.Parse(chars.data(), chars.size()); +} + +// ============================================================================= +// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding +// ============================================================================= +template +Status ConvertCBORToJSONTmpl(const Platform& platform, + span cbor, + C* json) { + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&platform, json, &status); + cbor::ParseCBOR(cbor, json_writer.get()); + return status; +} + +Status ConvertCBORToJSON(const Platform& platform, + span cbor, + std::vector* json) { + return ConvertCBORToJSONTmpl(platform, cbor, json); +} +Status ConvertCBORToJSON(const Platform& platform, + span cbor, + std::string* json) { + return ConvertCBORToJSONTmpl(platform, cbor, json); +} + +template +Status ConvertJSONToCBORTmpl(const Platform& platform, span json, C* cbor) { + Status status; + std::unique_ptr encoder = + cbor::NewCBOREncoder(cbor, &status); + ParseJSON(platform, json, encoder.get()); + return status; +} +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::string* cbor) { + return ConvertJSONToCBORTmpl(platform, json, cbor); +} +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::string* cbor) { + return ConvertJSONToCBORTmpl(platform, json, cbor); +} +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::vector* cbor) { + return ConvertJSONToCBORTmpl(platform, json, cbor); +} +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::vector* cbor) { + return ConvertJSONToCBORTmpl(platform, json, cbor); +} +} // namespace json +} // namespace v8_inspector_protocol_encoding diff --git a/tools/inspector_protocol/encoding/encoding.h b/tools/inspector_protocol/encoding/encoding.h new file mode 100644 index 0000000000..90916d42b3 --- /dev/null +++ b/tools/inspector_protocol/encoding/encoding.h @@ -0,0 +1,510 @@ +// Copyright 2019 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_ +#define V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_ + +#include +#include +#include +#include +#include +#include +#include + +namespace v8_inspector_protocol_encoding { + +// ============================================================================= +// span - sequence of bytes +// ============================================================================= + +// This template is similar to std::span, which will be included in C++20. +template +class span { + public: + using index_type = size_t; + + span() : data_(nullptr), size_(0) {} + span(const T* data, index_type size) : data_(data), size_(size) {} + + const T* data() const { return data_; } + + const T* begin() const { return data_; } + const T* end() const { return data_ + size_; } + + const T& operator[](index_type idx) const { return data_[idx]; } + + span subspan(index_type offset, index_type count) const { + return span(data_ + offset, count); + } + + span subspan(index_type offset) const { + return span(data_ + offset, size_ - offset); + } + + bool empty() const { return size_ == 0; } + + index_type size() const { return size_; } + index_type size_bytes() const { return size_ * sizeof(T); } + + private: + const T* data_; + index_type size_; +}; + +template +span SpanFrom(const std::vector& v) { + return span(v.data(), v.size()); +} + +template +span SpanFrom(const char (&str)[N]) { + return span(reinterpret_cast(str), N - 1); +} + +inline span SpanFrom(const char* str) { + return str ? span(reinterpret_cast(str), strlen(str)) + : span(); +} + +inline span SpanFrom(const std::string& v) { + return span(reinterpret_cast(v.data()), v.size()); +} + +// ============================================================================= +// Status and Error codes +// ============================================================================= +enum class Error { + OK = 0, + // JSON parsing errors - json_parser.{h,cc}. + JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 0x01, + JSON_PARSER_STACK_LIMIT_EXCEEDED = 0x02, + JSON_PARSER_NO_INPUT = 0x03, + JSON_PARSER_INVALID_TOKEN = 0x04, + JSON_PARSER_INVALID_NUMBER = 0x05, + JSON_PARSER_INVALID_STRING = 0x06, + JSON_PARSER_UNEXPECTED_ARRAY_END = 0x07, + JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 0x08, + JSON_PARSER_STRING_LITERAL_EXPECTED = 0x09, + JSON_PARSER_COLON_EXPECTED = 0x0a, + JSON_PARSER_UNEXPECTED_MAP_END = 0x0b, + JSON_PARSER_COMMA_OR_MAP_END_EXPECTED = 0x0c, + JSON_PARSER_VALUE_EXPECTED = 0x0d, + + CBOR_INVALID_INT32 = 0x0e, + CBOR_INVALID_DOUBLE = 0x0f, + CBOR_INVALID_ENVELOPE = 0x10, + CBOR_INVALID_STRING8 = 0x11, + CBOR_INVALID_STRING16 = 0x12, + CBOR_INVALID_BINARY = 0x13, + CBOR_UNSUPPORTED_VALUE = 0x14, + CBOR_NO_INPUT = 0x15, + CBOR_INVALID_START_BYTE = 0x16, + CBOR_UNEXPECTED_EOF_EXPECTED_VALUE = 0x17, + CBOR_UNEXPECTED_EOF_IN_ARRAY = 0x18, + CBOR_UNEXPECTED_EOF_IN_MAP = 0x19, + CBOR_INVALID_MAP_KEY = 0x1a, + CBOR_STACK_LIMIT_EXCEEDED = 0x1b, + CBOR_TRAILING_JUNK = 0x1c, + CBOR_MAP_START_EXPECTED = 0x1d, + CBOR_MAP_STOP_EXPECTED = 0x1e, + CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED = 0x1f, +}; + +// A status value with position that can be copied. The default status +// is OK. Usually, error status values should come with a valid position. +struct Status { + static constexpr size_t npos() { return std::numeric_limits::max(); } + + bool ok() const { return error == Error::OK; } + + Error error = Error::OK; + size_t pos = npos(); + Status(Error error, size_t pos) : error(error), pos(pos) {} + Status() = default; + + // Returns a 7 bit US-ASCII string, either "OK" or an error message + // that includes the position. + std::string ToASCIIString() const; + + private: + std::string ToASCIIString(const char* msg) const; +}; + +// Handler interface for parser events emitted by a streaming parser. +// See cbor::NewCBOREncoder, cbor::ParseCBOR, json::NewJSONEncoder, +// json::ParseJSON. +class StreamingParserHandler { + public: + virtual ~StreamingParserHandler() = default; + virtual void HandleMapBegin() = 0; + virtual void HandleMapEnd() = 0; + virtual void HandleArrayBegin() = 0; + virtual void HandleArrayEnd() = 0; + virtual void HandleString8(span chars) = 0; + virtual void HandleString16(span chars) = 0; + virtual void HandleBinary(span bytes) = 0; + virtual void HandleDouble(double value) = 0; + virtual void HandleInt32(int32_t value) = 0; + virtual void HandleBool(bool value) = 0; + virtual void HandleNull() = 0; + + // The parser may send one error even after other events have already + // been received. Client code is reponsible to then discard the + // already processed events. + // |error| must be an eror, as in, |error.is_ok()| can't be true. + virtual void HandleError(Status error) = 0; +}; + +namespace cbor { +// The binary encoding for the inspector protocol follows the CBOR specification +// (RFC 7049). Additional constraints: +// - Only indefinite length maps and arrays are supported. +// - Maps and arrays are wrapped with an envelope, that is, a +// CBOR tag with value 24 followed by a byte string specifying +// the byte length of the enclosed map / array. The byte string +// must use a 32 bit wide length. +// - At the top level, a message must be an indefinite length map +// wrapped by an envelope. +// - Maximal size for messages is 2^32 (4 GB). +// - For scalars, we support only the int32_t range, encoded as +// UNSIGNED/NEGATIVE (major types 0 / 1). +// - UTF16 strings, including with unbalanced surrogate pairs, are encoded +// as CBOR BYTE_STRING (major type 2). For such strings, the number of +// bytes encoded must be even. +// - UTF8 strings (major type 3) are supported. +// - 7 bit US-ASCII strings must always be encoded as UTF8 strings, never +// as UTF16 strings. +// - Arbitrary byte arrays, in the inspector protocol called 'binary', +// are encoded as BYTE_STRING (major type 2), prefixed with a byte +// indicating base64 when rendered as JSON. + +// ============================================================================= +// Detecting CBOR content +// ============================================================================= + +// The first byte for an envelope, which we use for wrapping dictionaries +// and arrays; and the byte that indicates a byte string with 32 bit length. +// These two bytes start an envelope, and thereby also any CBOR message +// produced or consumed by this protocol. See also |EnvelopeEncoder| below. +uint8_t InitialByteForEnvelope(); +uint8_t InitialByteFor32BitLengthByteString(); + +// Checks whether |msg| is a cbor message. +bool IsCBORMessage(span msg); + +// ============================================================================= +// Encoding individual CBOR items +// ============================================================================= + +// Some constants for CBOR tokens that only take a single byte on the wire. +uint8_t EncodeTrue(); +uint8_t EncodeFalse(); +uint8_t EncodeNull(); +uint8_t EncodeIndefiniteLengthArrayStart(); +uint8_t EncodeIndefiniteLengthMapStart(); +uint8_t EncodeStop(); + +// Encodes |value| as |UNSIGNED| (major type 0) iff >= 0, or |NEGATIVE| +// (major type 1) iff < 0. +void EncodeInt32(int32_t value, std::vector* out); +void EncodeInt32(int32_t value, std::string* out); + +// Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16 +// character in |in| is emitted with most significant byte first, +// appending to |out|. +void EncodeString16(span in, std::vector* out); +void EncodeString16(span in, std::string* out); + +// Encodes a UTF8 string |in| as STRING (major type 3). +void EncodeString8(span in, std::vector* out); +void EncodeString8(span in, std::string* out); + +// Encodes the given |latin1| string as STRING8. +// If any non-ASCII character is present, it will be represented +// as a 2 byte UTF8 sequence. +void EncodeFromLatin1(span latin1, std::vector* out); +void EncodeFromLatin1(span latin1, std::string* out); + +// Encodes the given |utf16| string as STRING8 if it's entirely US-ASCII. +// Otherwise, encodes as STRING16. +void EncodeFromUTF16(span utf16, std::vector* out); +void EncodeFromUTF16(span utf16, std::string* out); + +// Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with +// definitive length, prefixed with tag 22 indicating expected conversion to +// base64 (see RFC 7049, Table 3 and Section 2.4.4.2). +void EncodeBinary(span in, std::vector* out); +void EncodeBinary(span in, std::string* out); + +// Encodes / decodes a double as Major type 7 (SIMPLE_VALUE), +// with additional info = 27, followed by 8 bytes in big endian. +void EncodeDouble(double value, std::vector* out); +void EncodeDouble(double value, std::string* out); + +// ============================================================================= +// cbor::EnvelopeEncoder - for wrapping submessages +// ============================================================================= + +// An envelope indicates the byte length of a wrapped item. +// We use this for maps and array, which allows the decoder +// to skip such (nested) values whole sale. +// It's implemented as a CBOR tag (major type 6) with additional +// info = 24, followed by a byte string with a 32 bit length value; +// so the maximal structure that we can wrap is 2^32 bits long. +// See also: https://tools.ietf.org/html/rfc7049#section-2.4.4.1 +class EnvelopeEncoder { + public: + // Emits the envelope start bytes and records the position for the + // byte size in |byte_size_pos_|. Also emits empty bytes for the + // byte sisze so that encoding can continue. + void EncodeStart(std::vector* out); + void EncodeStart(std::string* out); + // This records the current size in |out| at position byte_size_pos_. + // Returns true iff successful. + bool EncodeStop(std::vector* out); + bool EncodeStop(std::string* out); + + private: + size_t byte_size_pos_ = 0; +}; + +// ============================================================================= +// cbor::NewCBOREncoder - for encoding from a streaming parser +// ============================================================================= + +// This can be used to convert to CBOR, by passing the return value to a parser +// that drives it. The handler will encode into |out|, and iff an error occurs +// it will set |status| to an error and clear |out|. Otherwise, |status.ok()| +// will be |true|. +std::unique_ptr NewCBOREncoder( + std::vector* out, + Status* status); +std::unique_ptr NewCBOREncoder(std::string* out, + Status* status); + +// ============================================================================= +// cbor::CBORTokenizer - for parsing individual CBOR items +// ============================================================================= + +// Tags for the tokens within a CBOR message that CBORTokenizer understands. +// Note that this is not the same terminology as the CBOR spec (RFC 7049), +// but rather, our adaptation. For instance, we lump unsigned and signed +// major type into INT32 here (and disallow values outside the int32_t range). +enum class CBORTokenTag { + // Encountered an error in the structure of the message. Consult + // status() for details. + ERROR_VALUE, + // Booleans and NULL. + TRUE_VALUE, + FALSE_VALUE, + NULL_VALUE, + // An int32_t (signed 32 bit integer). + INT32, + // A double (64 bit floating point). + DOUBLE, + // A UTF8 string. + STRING8, + // A UTF16 string. + STRING16, + // A binary string. + BINARY, + // Starts an indefinite length map; after the map start we expect + // alternating keys and values, followed by STOP. + MAP_START, + // Starts an indefinite length array; after the array start we + // expect values, followed by STOP. + ARRAY_START, + // Ends a map or an array. + STOP, + // An envelope indicator, wrapping a map or array. + // Internally this carries the byte length of the wrapped + // map or array. While CBORTokenizer::Next() will read / skip the entire + // envelope, CBORTokenizer::EnterEnvelope() reads the tokens + // inside of it. + ENVELOPE, + // We've reached the end there is nothing else to read. + DONE, +}; + +// The major types from RFC 7049 Section 2.1. +enum class MajorType { + UNSIGNED = 0, + NEGATIVE = 1, + BYTE_STRING = 2, + STRING = 3, + ARRAY = 4, + MAP = 5, + TAG = 6, + SIMPLE_VALUE = 7 +}; + +// CBORTokenizer segments a CBOR message, presenting the tokens therein as +// numbers, strings, etc. This is not a complete CBOR parser, but makes it much +// easier to implement one (e.g. ParseCBOR, above). It can also be used to parse +// messages partially. +class CBORTokenizer { + public: + explicit CBORTokenizer(span bytes); + ~CBORTokenizer(); + + // Identifies the current token that we're looking at, + // or ERROR_VALUE (in which ase ::Status() has details) + // or DONE (if we're past the last token). + CBORTokenTag TokenTag() const; + + // Advances to the next token. + void Next(); + // Can only be called if TokenTag() == CBORTokenTag::ENVELOPE. + // While Next() would skip past the entire envelope / what it's + // wrapping, EnterEnvelope positions the cursor inside of the envelope, + // letting the client explore the nested structure. + void EnterEnvelope(); + + // If TokenTag() is CBORTokenTag::ERROR_VALUE, then Status().error describes + // the error more precisely; otherwise it'll be set to Error::OK. + // In either case, Status().pos is the current position. + struct Status Status() const; + + // The following methods retrieve the token values. They can only + // be called if TokenTag() matches. + + // To be called only if ::TokenTag() == CBORTokenTag::INT32. + int32_t GetInt32() const; + + // To be called only if ::TokenTag() == CBORTokenTag::DOUBLE. + double GetDouble() const; + + // To be called only if ::TokenTag() == CBORTokenTag::STRING8. + span GetString8() const; + + // Wire representation for STRING16 is low byte first (little endian). + // To be called only if ::TokenTag() == CBORTokenTag::STRING16. + span GetString16WireRep() const; + + // To be called only if ::TokenTag() == CBORTokenTag::BINARY. + span GetBinary() const; + + // To be called only if ::TokenTag() == CBORTokenTag::ENVELOPE. + span GetEnvelopeContents() const; + + private: + void ReadNextToken(bool enter_envelope); + void SetToken(CBORTokenTag token, size_t token_byte_length); + void SetError(Error error); + + span bytes_; + CBORTokenTag token_tag_; + struct Status status_; + size_t token_byte_length_; + MajorType token_start_type_; + uint64_t token_start_internal_value_; +}; + +// ============================================================================= +// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages +// ============================================================================= + +// Parses a CBOR encoded message from |bytes|, sending events to +// |out|. If an error occurs, sends |out->HandleError|, and parsing stops. +// The client is responsible for discarding the already received information in +// that case. +void ParseCBOR(span bytes, StreamingParserHandler* out); + +// ============================================================================= +// cbor::AppendString8EntryToMap - for limited in-place editing of messages +// ============================================================================= + +// Modifies the |cbor| message by appending a new key/value entry at the end +// of the map. Patches up the envelope size; Status.ok() iff successful. +// If not successful, |cbor| may be corrupted after this call. +Status AppendString8EntryToCBORMap(span string8_key, + span string8_value, + std::vector* cbor); +Status AppendString8EntryToCBORMap(span string8_key, + span string8_value, + std::string* cbor); + +namespace internals { // Exposed only for writing tests. +int8_t ReadTokenStart(span bytes, + cbor::MajorType* type, + uint64_t* value); + +void WriteTokenStart(cbor::MajorType type, + uint64_t value, + std::vector* encoded); +void WriteTokenStart(cbor::MajorType type, + uint64_t value, + std::string* encoded); +} // namespace internals +} // namespace cbor + +namespace json { +// Client code must provide an instance. Implementation should delegate +// to whatever is appropriate. +class Platform { + public: + virtual ~Platform() = default; + // Parses |str| into |result|. Returns false iff there are + // leftover characters or parsing errors. + virtual bool StrToD(const char* str, double* result) const = 0; + + // Prints |value| in a format suitable for JSON. + virtual std::unique_ptr DToStr(double value) const = 0; +}; + +// ============================================================================= +// json::NewJSONEncoder - for encoding streaming parser events as JSON +// ============================================================================= + +// Returns a handler object which will write ascii characters to |out|. +// |status->ok()| will be false iff the handler routine HandleError() is called. +// In that case, we'll stop emitting output. +// Except for calling the HandleError routine at any time, the client +// code must call the Handle* methods in an order in which they'd occur +// in valid JSON; otherwise we may crash (the code uses assert). +std::unique_ptr NewJSONEncoder( + const Platform* platform, + std::vector* out, + Status* status); +std::unique_ptr NewJSONEncoder(const Platform* platform, + std::string* out, + Status* status); + +// ============================================================================= +// json::ParseJSON - for receiving streaming parser events for JSON +// ============================================================================= + +void ParseJSON(const Platform& platform, + span chars, + StreamingParserHandler* handler); +void ParseJSON(const Platform& platform, + span chars, + StreamingParserHandler* handler); + +// ============================================================================= +// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding +// ============================================================================= +Status ConvertCBORToJSON(const Platform& platform, + span cbor, + std::string* json); +Status ConvertCBORToJSON(const Platform& platform, + span cbor, + std::vector* json); +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::vector* cbor); +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::vector* cbor); +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::string* cbor); +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::string* cbor); +} // namespace json +} // namespace v8_inspector_protocol_encoding + +#endif // V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_ diff --git a/tools/inspector_protocol/encoding/encoding_test.cc b/tools/inspector_protocol/encoding/encoding_test.cc new file mode 100644 index 0000000000..b8d75e09ba --- /dev/null +++ b/tools/inspector_protocol/encoding/encoding_test.cc @@ -0,0 +1,1838 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "encoding.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "encoding_test_helper.h" + +using testing::ElementsAreArray; + +namespace v8_inspector_protocol_encoding { + +class TestPlatform : public json::Platform { + bool StrToD(const char* str, double* result) const override { + // This is not thread-safe + // (see https://en.cppreference.com/w/cpp/locale/setlocale) + // but good enough for a unittest. + const char* saved_locale = std::setlocale(LC_NUMERIC, nullptr); + char* end; + *result = std::strtod(str, &end); + std::setlocale(LC_NUMERIC, saved_locale); + if (errno == ERANGE) { + // errno must be reset, e.g. see the example here: + // https://en.cppreference.com/w/cpp/string/byte/strtof + errno = 0; + return false; + } + return end == str + strlen(str); + } + + std::unique_ptr DToStr(double value) const override { + std::stringstream ss; + ss.imbue(std::locale("C")); + ss << value; + std::string str = ss.str(); + std::unique_ptr result(new char[str.size() + 1]); + memcpy(result.get(), str.c_str(), str.size() + 1); + return result; + } +}; + +const json::Platform& GetTestPlatform() { + static TestPlatform* platform = new TestPlatform; + return *platform; +} + +// ============================================================================= +// span - sequence of bytes +// ============================================================================= + +template +class SpanTest : public ::testing::Test {}; + +using TestTypes = ::testing::Types; +TYPED_TEST_SUITE(SpanTest, TestTypes); + +TYPED_TEST(SpanTest, Empty) { + span empty; + EXPECT_TRUE(empty.empty()); + EXPECT_EQ(0u, empty.size()); + EXPECT_EQ(0u, empty.size_bytes()); + EXPECT_EQ(empty.begin(), empty.end()); +} + +TYPED_TEST(SpanTest, SingleItem) { + TypeParam single_item = 42; + span singular(&single_item, 1); + EXPECT_FALSE(singular.empty()); + EXPECT_EQ(1u, singular.size()); + EXPECT_EQ(sizeof(TypeParam), singular.size_bytes()); + EXPECT_EQ(singular.begin() + 1, singular.end()); + EXPECT_EQ(42, singular[0]); +} + +TYPED_TEST(SpanTest, FiveItems) { + std::vector test_input = {31, 32, 33, 34, 35}; + span five_items(test_input.data(), 5); + EXPECT_FALSE(five_items.empty()); + EXPECT_EQ(5u, five_items.size()); + EXPECT_EQ(sizeof(TypeParam) * 5, five_items.size_bytes()); + EXPECT_EQ(five_items.begin() + 5, five_items.end()); + EXPECT_EQ(31, five_items[0]); + EXPECT_EQ(32, five_items[1]); + EXPECT_EQ(33, five_items[2]); + EXPECT_EQ(34, five_items[3]); + EXPECT_EQ(35, five_items[4]); + span three_items = five_items.subspan(2); + EXPECT_EQ(3u, three_items.size()); + EXPECT_EQ(33, three_items[0]); + EXPECT_EQ(34, three_items[1]); + EXPECT_EQ(35, three_items[2]); + span two_items = five_items.subspan(2, 2); + EXPECT_EQ(2u, two_items.size()); + EXPECT_EQ(33, two_items[0]); + EXPECT_EQ(34, two_items[1]); +} + +TEST(SpanFromTest, FromConstCharAndLiteral) { + // Testing this is useful because strlen(nullptr) is undefined. + EXPECT_EQ(nullptr, SpanFrom(nullptr).data()); + EXPECT_EQ(0u, SpanFrom(nullptr).size()); + + const char* kEmpty = ""; + EXPECT_EQ(kEmpty, reinterpret_cast(SpanFrom(kEmpty).data())); + EXPECT_EQ(0u, SpanFrom(kEmpty).size()); + + const char* kFoo = "foo"; + EXPECT_EQ(kFoo, reinterpret_cast(SpanFrom(kFoo).data())); + EXPECT_EQ(3u, SpanFrom(kFoo).size()); + + EXPECT_EQ(3u, SpanFrom("foo").size()); +} + +// ============================================================================= +// Status and Error codes +// ============================================================================= + +TEST(StatusTest, StatusToASCIIString) { + Status ok_status; + EXPECT_EQ("OK", ok_status.ToASCIIString()); + Status json_error(Error::JSON_PARSER_COLON_EXPECTED, 42); + EXPECT_EQ("JSON: colon expected at position 42", json_error.ToASCIIString()); + Status cbor_error(Error::CBOR_TRAILING_JUNK, 21); + EXPECT_EQ("CBOR: trailing junk at position 21", cbor_error.ToASCIIString()); +} + +namespace cbor { + +// ============================================================================= +// Detecting CBOR content +// ============================================================================= + +TEST(IsCBORMessage, SomeSmokeTests) { + std::vector empty; + EXPECT_FALSE(IsCBORMessage(SpanFrom(empty))); + std::vector hello = {'H', 'e', 'l', 'o', ' ', 't', + 'h', 'e', 'r', 'e', '!'}; + EXPECT_FALSE(IsCBORMessage(SpanFrom(hello))); + std::vector example = {0xd8, 0x5a, 0, 0, 0, 0}; + EXPECT_TRUE(IsCBORMessage(SpanFrom(example))); + std::vector one = {0xd8, 0x5a, 0, 0, 0, 1, 1}; + EXPECT_TRUE(IsCBORMessage(SpanFrom(one))); +} + +// ============================================================================= +// Encoding individual CBOR items +// cbor::CBORTokenizer - for parsing individual CBOR items +// ============================================================================= + +// +// EncodeInt32 / CBORTokenTag::INT32 +// +TEST(EncodeDecodeInt32Test, Roundtrips23) { + // This roundtrips the int32_t value 23 through the pair of EncodeInt32 / + // CBORTokenizer; this is interesting since 23 is encoded as a single byte. + std::vector encoded; + EncodeInt32(23, &encoded); + // first three bits: major type = 0; remaining five bits: additional info = + // value 23. + EXPECT_THAT(encoded, ElementsAreArray(std::array{{23}})); + + // Reverse direction: decode with CBORTokenizer. + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::INT32, tokenizer.TokenTag()); + EXPECT_EQ(23, tokenizer.GetInt32()); + tokenizer.Next(); + EXPECT_EQ(CBORTokenTag::DONE, tokenizer.TokenTag()); +} + +TEST(EncodeDecodeInt32Test, RoundtripsUint8) { + // This roundtrips the int32_t value 42 through the pair of EncodeInt32 / + // CBORTokenizer. This is different from Roundtrip23 because 42 is encoded + // in an extra byte after the initial one. + std::vector encoded; + EncodeInt32(42, &encoded); + // first three bits: major type = 0; + // remaining five bits: additional info = 24, indicating payload is uint8. + EXPECT_THAT(encoded, ElementsAreArray(std::array{{24, 42}})); + + // Reverse direction: decode with CBORTokenizer. + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::INT32, tokenizer.TokenTag()); + EXPECT_EQ(42, tokenizer.GetInt32()); + tokenizer.Next(); + EXPECT_EQ(CBORTokenTag::DONE, tokenizer.TokenTag()); +} + +TEST(EncodeDecodeInt32Test, RoundtripsUint16) { + // 500 is encoded as a uint16 after the initial byte. + std::vector encoded; + EncodeInt32(500, &encoded); + // 1 for initial byte, 2 for uint16. + EXPECT_EQ(3u, encoded.size()); + // first three bits: major type = 0; + // remaining five bits: additional info = 25, indicating payload is uint16. + EXPECT_EQ(25, encoded[0]); + EXPECT_EQ(0x01, encoded[1]); + EXPECT_EQ(0xf4, encoded[2]); + + // Reverse direction: decode with CBORTokenizer. + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::INT32, tokenizer.TokenTag()); + EXPECT_EQ(500, tokenizer.GetInt32()); + tokenizer.Next(); + EXPECT_EQ(CBORTokenTag::DONE, tokenizer.TokenTag()); +} + +TEST(EncodeDecodeInt32Test, RoundtripsInt32Max) { + // std::numeric_limits is encoded as a uint32 after the initial byte. + std::vector encoded; + EncodeInt32(std::numeric_limits::max(), &encoded); + // 1 for initial byte, 4 for the uint32. + // first three bits: major type = 0; + // remaining five bits: additional info = 26, indicating payload is uint32. + EXPECT_THAT( + encoded, + ElementsAreArray(std::array{{26, 0x7f, 0xff, 0xff, 0xff}})); + + // Reverse direction: decode with CBORTokenizer. + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::INT32, tokenizer.TokenTag()); + EXPECT_EQ(std::numeric_limits::max(), tokenizer.GetInt32()); + tokenizer.Next(); + EXPECT_EQ(CBORTokenTag::DONE, tokenizer.TokenTag()); +} + +TEST(EncodeDecodeInt32Test, CantRoundtripUint32) { + // 0xdeadbeef is a value which does not fit below + // std::numerical_limits::max(), so we can't encode + // it with EncodeInt32. However, CBOR does support this, so we + // encode it here manually with the internal routine, just to observe + // that it's considered an invalid int32 by CBORTokenizer. + std::vector encoded; + internals::WriteTokenStart(MajorType::UNSIGNED, 0xdeadbeef, &encoded); + // 1 for initial byte, 4 for the uint32. + // first three bits: major type = 0; + // remaining five bits: additional info = 26, indicating payload is uint32. + EXPECT_THAT( + encoded, + ElementsAreArray(std::array{{26, 0xde, 0xad, 0xbe, 0xef}})); + + // Now try to decode; we treat this as an invalid INT32. + CBORTokenizer tokenizer(SpanFrom(encoded)); + // 0xdeadbeef is > std::numerical_limits::max(). + EXPECT_EQ(CBORTokenTag::ERROR_VALUE, tokenizer.TokenTag()); + EXPECT_EQ(Error::CBOR_INVALID_INT32, tokenizer.Status().error); +} + +TEST(EncodeDecodeInt32Test, DecodeErrorCases) { + struct TestCase { + std::vector data; + std::string msg; + }; + std::vector tests{ + {TestCase{ + {24}, + "additional info = 24 would require 1 byte of payload (but it's 0)"}, + TestCase{{27, 0xaa, 0xbb, 0xcc}, + "additional info = 27 would require 8 bytes of payload (but " + "it's 3)"}, + TestCase{{29}, "additional info = 29 isn't recognized"}}}; + + for (const TestCase& test : tests) { + SCOPED_TRACE(test.msg); + CBORTokenizer tokenizer(SpanFrom(test.data)); + EXPECT_EQ(CBORTokenTag::ERROR_VALUE, tokenizer.TokenTag()); + EXPECT_EQ(Error::CBOR_INVALID_INT32, tokenizer.Status().error); + } +} + +TEST(EncodeDecodeInt32Test, RoundtripsMinus24) { + // This roundtrips the int32_t value -24 through the pair of EncodeInt32 / + // CBORTokenizer; this is interesting since -24 is encoded as + // a single byte as NEGATIVE, and it tests the specific encoding + // (note how for unsigned the single byte covers values up to 23). + // Additional examples are covered in RoundtripsAdditionalExamples. + std::vector encoded; + EncodeInt32(-24, &encoded); + // first three bits: major type = 1; remaining five bits: additional info = + // value 23. + EXPECT_THAT(encoded, ElementsAreArray(std::array{{1 << 5 | 23}})); + + // Reverse direction: decode with CBORTokenizer. + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::INT32, tokenizer.TokenTag()); + EXPECT_EQ(-24, tokenizer.GetInt32()); + tokenizer.Next(); + EXPECT_EQ(CBORTokenTag::DONE, tokenizer.TokenTag()); +} + +TEST(EncodeDecodeInt32Test, RoundtripsAdditionalNegativeExamples) { + std::vector examples = {-1, + -10, + -24, + -25, + -300, + -30000, + -300 * 1000, + -1000 * 1000, + -1000 * 1000 * 1000, + std::numeric_limits::min()}; + for (int32_t example : examples) { + SCOPED_TRACE(std::string("example ") + std::to_string(example)); + std::vector encoded; + EncodeInt32(example, &encoded); + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::INT32, tokenizer.TokenTag()); + EXPECT_EQ(example, tokenizer.GetInt32()); + tokenizer.Next(); + EXPECT_EQ(CBORTokenTag::DONE, tokenizer.TokenTag()); + } +} + +// +// EncodeString16 / CBORTokenTag::STRING16 +// +TEST(EncodeDecodeString16Test, RoundtripsEmpty) { + // This roundtrips the empty utf16 string through the pair of EncodeString16 / + // CBORTokenizer. + std::vector encoded; + EncodeString16(span(), &encoded); + EXPECT_EQ(1u, encoded.size()); + // first three bits: major type = 2; remaining five bits: additional info = + // size 0. + EXPECT_EQ(2 << 5, encoded[0]); + + // Reverse direction: decode with CBORTokenizer. + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::STRING16, tokenizer.TokenTag()); + span decoded_string16_wirerep = tokenizer.GetString16WireRep(); + EXPECT_TRUE(decoded_string16_wirerep.empty()); + tokenizer.Next(); + EXPECT_EQ(CBORTokenTag::DONE, tokenizer.TokenTag()); +} + +// On the wire, we STRING16 is encoded as little endian (least +// significant byte first). The host may or may not be little endian, +// so this routine follows the advice in +// https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html. +std::vector String16WireRepToHost(span in) { + // must be even number of bytes. + CHECK_EQ(in.size() & 1, 0u); + std::vector host_out; + for (size_t ii = 0; ii < in.size(); ii += 2) + host_out.push_back(in[ii + 1] << 8 | in[ii]); + return host_out; +} + +TEST(EncodeDecodeString16Test, RoundtripsHelloWorld) { + // This roundtrips the hello world message which is given here in utf16 + // characters. 0xd83c, 0xdf0e: UTF16 encoding for the "Earth Globe Americas" + // character, šŸŒŽ. + std::array msg{ + {'H', 'e', 'l', 'l', 'o', ',', ' ', 0xd83c, 0xdf0e, '.'}}; + std::vector encoded; + EncodeString16(span(msg.data(), msg.size()), &encoded); + // This will be encoded as BYTE_STRING of length 20, so the 20 is encoded in + // the additional info part of the initial byte. Payload is two bytes for each + // UTF16 character. + uint8_t initial_byte = /*major type=*/2 << 5 | /*additional info=*/20; + std::array encoded_expected = { + {initial_byte, 'H', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, + ',', 0, ' ', 0, 0x3c, 0xd8, 0x0e, 0xdf, '.', 0}}; + EXPECT_THAT(encoded, ElementsAreArray(encoded_expected)); + + // Now decode to complete the roundtrip. + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::STRING16, tokenizer.TokenTag()); + std::vector decoded = + String16WireRepToHost(tokenizer.GetString16WireRep()); + EXPECT_THAT(decoded, ElementsAreArray(msg)); + tokenizer.Next(); + EXPECT_EQ(CBORTokenTag::DONE, tokenizer.TokenTag()); + + // For bonus points, we look at the decoded message in UTF8 as well so we can + // easily see it on the terminal screen. + std::string utf8_decoded = UTF16ToUTF8(SpanFrom(decoded)); + EXPECT_EQ("Hello, šŸŒŽ.", utf8_decoded); +} + +TEST(EncodeDecodeString16Test, Roundtrips500) { + // We roundtrip a message that has 250 16 bit values. Each of these are just + // set to their index. 250 is interesting because the cbor spec uses a + // BYTE_STRING of length 500 for one of their examples of how to encode the + // start of it (section 2.1) so it's easy for us to look at the first three + // bytes closely. + std::vector two_fifty; + for (uint16_t ii = 0; ii < 250; ++ii) + two_fifty.push_back(ii); + std::vector encoded; + EncodeString16(span(two_fifty.data(), two_fifty.size()), &encoded); + EXPECT_EQ(3u + 250u * 2, encoded.size()); + // Now check the first three bytes: + // Major type: 2 (BYTE_STRING) + // Additional information: 25, indicating size is represented by 2 bytes. + // Bytes 1 and 2 encode 500 (0x01f4). + EXPECT_EQ(2 << 5 | 25, encoded[0]); + EXPECT_EQ(0x01, encoded[1]); + EXPECT_EQ(0xf4, encoded[2]); + + // Now decode to complete the roundtrip. + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::STRING16, tokenizer.TokenTag()); + std::vector decoded = + String16WireRepToHost(tokenizer.GetString16WireRep()); + EXPECT_THAT(decoded, ElementsAreArray(two_fifty)); + tokenizer.Next(); + EXPECT_EQ(CBORTokenTag::DONE, tokenizer.TokenTag()); +} + +TEST(EncodeDecodeString16Test, ErrorCases) { + struct TestCase { + std::vector data; + std::string msg; + }; + std::vector tests{ + {TestCase{{2 << 5 | 1, 'a'}, + "length must be divisible by 2 (but it's 1)"}, + TestCase{{2 << 5 | 29}, "additional info = 29 isn't recognized"}, + TestCase{{2 << 5 | 9, 1, 2, 3, 4, 5, 6, 7, 8}, + "length (9) points just past the end of the test case"}, + TestCase{{2 << 5 | 27, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 'a', 'b', 'c'}, + "large length pointing past the end of the test case"}}}; + for (const TestCase& test : tests) { + SCOPED_TRACE(test.msg); + CBORTokenizer tokenizer(SpanFrom(test.data)); + EXPECT_EQ(CBORTokenTag::ERROR_VALUE, tokenizer.TokenTag()); + EXPECT_EQ(Error::CBOR_INVALID_STRING16, tokenizer.Status().error); + } +} + +// +// EncodeString8 / CBORTokenTag::STRING8 +// +TEST(EncodeDecodeString8Test, RoundtripsHelloWorld) { + // This roundtrips the hello world message which is given here in utf8 + // characters. šŸŒŽ is a four byte utf8 character. + std::string utf8_msg = "Hello, šŸŒŽ."; + std::vector msg(utf8_msg.begin(), utf8_msg.end()); + std::vector encoded; + EncodeString8(SpanFrom(utf8_msg), &encoded); + // This will be encoded as STRING of length 12, so the 12 is encoded in + // the additional info part of the initial byte. Payload is one byte per + // utf8 byte. + uint8_t initial_byte = /*major type=*/3 << 5 | /*additional info=*/12; + std::array encoded_expected = {{initial_byte, 'H', 'e', 'l', 'l', + 'o', ',', ' ', 0xF0, 0x9f, 0x8c, + 0x8e, '.'}}; + EXPECT_THAT(encoded, ElementsAreArray(encoded_expected)); + + // Now decode to complete the roundtrip. + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::STRING8, tokenizer.TokenTag()); + std::vector decoded(tokenizer.GetString8().begin(), + tokenizer.GetString8().end()); + EXPECT_THAT(decoded, ElementsAreArray(msg)); + tokenizer.Next(); + EXPECT_EQ(CBORTokenTag::DONE, tokenizer.TokenTag()); +} + +TEST(EncodeDecodeString8Test, ErrorCases) { + struct TestCase { + std::vector data; + std::string msg; + }; + std::vector tests{ + {TestCase{{3 << 5 | 29}, "additional info = 29 isn't recognized"}, + TestCase{{3 << 5 | 9, 1, 2, 3, 4, 5, 6, 7, 8}, + "length (9) points just past the end of the test case"}, + TestCase{{3 << 5 | 27, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 'a', 'b', 'c'}, + "large length pointing past the end of the test case"}}}; + for (const TestCase& test : tests) { + SCOPED_TRACE(test.msg); + CBORTokenizer tokenizer(SpanFrom(test.data)); + EXPECT_EQ(CBORTokenTag::ERROR_VALUE, tokenizer.TokenTag()); + EXPECT_EQ(Error::CBOR_INVALID_STRING8, tokenizer.Status().error); + } +} + +TEST(EncodeFromLatin1Test, ConvertsToUTF8IfNeeded) { + std::vector> examples = { + {"Hello, world.", "Hello, world."}, + {"Above: \xDC" + "ber", + "Above: Ɯber"}, + {"\xA5 500 are about \xA3 3.50; a y with umlaut is \xFF", + "Ā„ 500 are about Ā£ 3.50; a y with umlaut is Ćæ"}}; + + for (const auto& example : examples) { + const std::string& latin1 = example.first; + const std::string& expected_utf8 = example.second; + std::vector encoded; + EncodeFromLatin1(SpanFrom(latin1), &encoded); + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::STRING8, tokenizer.TokenTag()); + std::vector decoded(tokenizer.GetString8().begin(), + tokenizer.GetString8().end()); + std::string decoded_str(decoded.begin(), decoded.end()); + EXPECT_THAT(decoded_str, testing::Eq(expected_utf8)); + } +} + +TEST(EncodeFromUTF16Test, ConvertsToUTF8IfEasy) { + std::vector ascii = {'e', 'a', 's', 'y'}; + std::vector encoded; + EncodeFromUTF16(span(ascii.data(), ascii.size()), &encoded); + + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::STRING8, tokenizer.TokenTag()); + std::vector decoded(tokenizer.GetString8().begin(), + tokenizer.GetString8().end()); + std::string decoded_str(decoded.begin(), decoded.end()); + EXPECT_THAT(decoded_str, testing::Eq("easy")); +} + +TEST(EncodeFromUTF16Test, EncodesAsString16IfNeeded) { + // Since this message contains non-ASCII characters, the routine is + // forced to encode as UTF16. We see this below by checking that the + // token tag is STRING16. + std::vector msg = {'H', 'e', 'l', 'l', 'o', + ',', ' ', 0xd83c, 0xdf0e, '.'}; + std::vector encoded; + EncodeFromUTF16(span(msg.data(), msg.size()), &encoded); + + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::STRING16, tokenizer.TokenTag()); + std::vector decoded = + String16WireRepToHost(tokenizer.GetString16WireRep()); + std::string utf8_decoded = UTF16ToUTF8(SpanFrom(decoded)); + EXPECT_EQ("Hello, šŸŒŽ.", utf8_decoded); +} + +// +// EncodeBinary / CBORTokenTag::BINARY +// +TEST(EncodeDecodeBinaryTest, RoundtripsHelloWorld) { + std::vector binary = {'H', 'e', 'l', 'l', 'o', ',', ' ', + 'w', 'o', 'r', 'l', 'd', '.'}; + std::vector encoded; + EncodeBinary(span(binary.data(), binary.size()), &encoded); + // So, on the wire we see that the binary blob travels unmodified. + EXPECT_THAT( + encoded, + ElementsAreArray(std::array{ + {(6 << 5 | 22), // tag 22 indicating base64 interpretation in JSON + (2 << 5 | 13), // BYTE_STRING (type 2) of length 13 + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '.'}})); + std::vector decoded; + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::BINARY, tokenizer.TokenTag()); + EXPECT_EQ(0, static_cast(tokenizer.Status().error)); + decoded = std::vector(tokenizer.GetBinary().begin(), + tokenizer.GetBinary().end()); + EXPECT_THAT(decoded, ElementsAreArray(binary)); + tokenizer.Next(); + EXPECT_EQ(CBORTokenTag::DONE, tokenizer.TokenTag()); +} + +TEST(EncodeDecodeBinaryTest, ErrorCases) { + struct TestCase { + std::vector data; + std::string msg; + }; + std::vector tests{{TestCase{ + {6 << 5 | 22, // tag 22 indicating base64 interpretation in JSON + 2 << 5 | 27, // BYTE_STRING (type 2), followed by 8 bytes length + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + "large length pointing past the end of the test case"}}}; + for (const TestCase& test : tests) { + SCOPED_TRACE(test.msg); + CBORTokenizer tokenizer(SpanFrom(test.data)); + EXPECT_EQ(CBORTokenTag::ERROR_VALUE, tokenizer.TokenTag()); + EXPECT_EQ(Error::CBOR_INVALID_BINARY, tokenizer.Status().error); + } +} + +// +// EncodeDouble / CBORTokenTag::DOUBLE +// +TEST(EncodeDecodeDoubleTest, RoundtripsWikipediaExample) { + // https://en.wikipedia.org/wiki/Double-precision_floating-point_format + // provides the example of a hex representation 3FD5 5555 5555 5555, which + // approximates 1/3. + + const double kOriginalValue = 1.0 / 3; + std::vector encoded; + EncodeDouble(kOriginalValue, &encoded); + // first three bits: major type = 7; remaining five bits: additional info = + // value 27. This is followed by 8 bytes of payload (which match Wikipedia). + EXPECT_THAT( + encoded, + ElementsAreArray(std::array{ + {7 << 5 | 27, 0x3f, 0xd5, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55}})); + + // Reverse direction: decode and compare with original value. + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::DOUBLE, tokenizer.TokenTag()); + EXPECT_THAT(tokenizer.GetDouble(), testing::DoubleEq(kOriginalValue)); + tokenizer.Next(); + EXPECT_EQ(CBORTokenTag::DONE, tokenizer.TokenTag()); +} + +TEST(EncodeDecodeDoubleTest, RoundtripsAdditionalExamples) { + std::vector examples = {0.0, + 1.0, + -1.0, + 3.1415, + std::numeric_limits::min(), + std::numeric_limits::max(), + std::numeric_limits::infinity(), + std::numeric_limits::quiet_NaN()}; + for (double example : examples) { + SCOPED_TRACE(std::string("example ") + std::to_string(example)); + std::vector encoded; + EncodeDouble(example, &encoded); + CBORTokenizer tokenizer(SpanFrom(encoded)); + EXPECT_EQ(CBORTokenTag::DOUBLE, tokenizer.TokenTag()); + if (std::isnan(example)) + EXPECT_TRUE(std::isnan(tokenizer.GetDouble())); + else + EXPECT_THAT(tokenizer.GetDouble(), testing::DoubleEq(example)); + tokenizer.Next(); + EXPECT_EQ(CBORTokenTag::DONE, tokenizer.TokenTag()); + } +} + +// ============================================================================= +// cbor::NewCBOREncoder - for encoding from a streaming parser +// ============================================================================= + +void EncodeUTF8ForTest(const std::string& key, std::vector* out) { + EncodeString8(SpanFrom(key), out); +} +TEST(JSONToCBOREncoderTest, SevenBitStrings) { + // When a string can be represented as 7 bit ASCII, the encoder will use the + // STRING (major Type 3) type, so the actual characters end up as bytes on the + // wire. + std::vector encoded; + Status status; + std::unique_ptr encoder = + NewCBOREncoder(&encoded, &status); + std::vector utf16 = {'f', 'o', 'o'}; + encoder->HandleString16(span(utf16.data(), utf16.size())); + EXPECT_EQ(Error::OK, status.error); + // Here we assert that indeed, seven bit strings are represented as + // bytes on the wire, "foo" is just "foo". + EXPECT_THAT(encoded, + ElementsAreArray(std::array{ + {/*major type 3*/ 3 << 5 | /*length*/ 3, 'f', 'o', 'o'}})); +} + +TEST(JsonCborRoundtrip, EncodingDecoding) { + // Hits all the cases except binary and error in StreamingParserHandler, first + // parsing a JSON message into CBOR, then parsing it back from CBOR into JSON. + std::string json = + "{" + "\"string\":\"Hello, \\ud83c\\udf0e.\"," + "\"double\":3.1415," + "\"int\":1," + "\"negative int\":-1," + "\"bool\":true," + "\"null\":null," + "\"array\":[1,2,3]" + "}"; + std::vector encoded; + Status status; + std::unique_ptr encoder = + NewCBOREncoder(&encoded, &status); + span ascii_in = SpanFrom(json); + json::ParseJSON(GetTestPlatform(), ascii_in, encoder.get()); + std::vector expected = { + 0xd8, // envelope + 0x5a, // byte string with 32 bit length + 0, 0, 0, 94, // length is 94 bytes + }; + expected.push_back(0xbf); // indef length map start + EncodeString8(SpanFrom("string"), &expected); + // This is followed by the encoded string for "Hello, šŸŒŽ." + // So, it's the same bytes that we tested above in + // EncodeDecodeString16Test.RoundtripsHelloWorld. + expected.push_back(/*major type=*/2 << 5 | /*additional info=*/20); + for (uint8_t ch : std::array{ + {'H', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, + ',', 0, ' ', 0, 0x3c, 0xd8, 0x0e, 0xdf, '.', 0}}) + expected.push_back(ch); + EncodeString8(SpanFrom("double"), &expected); + EncodeDouble(3.1415, &expected); + EncodeString8(SpanFrom("int"), &expected); + EncodeInt32(1, &expected); + EncodeString8(SpanFrom("negative int"), &expected); + EncodeInt32(-1, &expected); + EncodeString8(SpanFrom("bool"), &expected); + expected.push_back(7 << 5 | 21); // RFC 7049 Section 2.3, Table 2: true + EncodeString8(SpanFrom("null"), &expected); + expected.push_back(7 << 5 | 22); // RFC 7049 Section 2.3, Table 2: null + EncodeString8(SpanFrom("array"), &expected); + expected.push_back(0xd8); // envelope + expected.push_back(0x5a); // byte string with 32 bit length + // the length is 5 bytes (that's up to end indef length array below). + for (uint8_t ch : std::array{{0, 0, 0, 5}}) + expected.push_back(ch); + expected.push_back(0x9f); // RFC 7049 Section 2.2.1, indef length array start + expected.push_back(1); // Three UNSIGNED values (easy since Major Type 0) + expected.push_back(2); + expected.push_back(3); + expected.push_back(0xff); // End indef length array + expected.push_back(0xff); // End indef length map + EXPECT_TRUE(status.ok()); + EXPECT_THAT(encoded, ElementsAreArray(expected)); + + // And now we roundtrip, decoding the message we just encoded. + std::string decoded; + std::unique_ptr json_encoder = + NewJSONEncoder(&GetTestPlatform(), &decoded, &status); + ParseCBOR(span(encoded.data(), encoded.size()), json_encoder.get()); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ(json, decoded); +} + +TEST(JsonCborRoundtrip, MoreRoundtripExamples) { + std::vector examples = { + // Tests that after closing a nested objects, additional key/value pairs + // are considered. + "{\"foo\":{\"bar\":1},\"baz\":2}", "{\"foo\":[1,2,3],\"baz\":2}"}; + for (const std::string& json : examples) { + SCOPED_TRACE(std::string("example: ") + json); + std::vector encoded; + Status status; + std::unique_ptr encoder = + NewCBOREncoder(&encoded, &status); + span ascii_in = SpanFrom(json); + ParseJSON(GetTestPlatform(), ascii_in, encoder.get()); + std::string decoded; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &decoded, &status); + ParseCBOR(span(encoded.data(), encoded.size()), json_writer.get()); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ(json, decoded); + } +} + +TEST(JSONToCBOREncoderTest, HelloWorldBinary_WithTripToJson) { + // The StreamingParserHandler::HandleBinary is a special case: The JSON parser + // will never call this method, because JSON does not natively support the + // binary type. So, we can't fully roundtrip. However, the other direction + // works: binary will be rendered in JSON, as a base64 string. So, we make + // calls to the encoder directly here, to construct a message, and one of + // these calls is ::HandleBinary, to which we pass a "binary" string + // containing "Hello, world.". + std::vector encoded; + Status status; + std::unique_ptr encoder = + NewCBOREncoder(&encoded, &status); + encoder->HandleMapBegin(); + // Emit a key. + std::vector key = {'f', 'o', 'o'}; + encoder->HandleString16(SpanFrom(key)); + // Emit the binary payload, an arbitrary array of bytes that happens to + // be the ascii message "Hello, world.". + encoder->HandleBinary(SpanFrom(std::vector{ + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '.'})); + encoder->HandleMapEnd(); + EXPECT_EQ(Error::OK, status.error); + + // Now drive the json writer via the CBOR decoder. + std::string decoded; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &decoded, &status); + ParseCBOR(SpanFrom(encoded), json_writer.get()); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ(Status::npos(), status.pos); + // "Hello, world." in base64 is "SGVsbG8sIHdvcmxkLg==". + EXPECT_EQ("{\"foo\":\"SGVsbG8sIHdvcmxkLg==\"}", decoded); +} + +// ============================================================================= +// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages +// ============================================================================= + +TEST(ParseCBORTest, ParseEmptyCBORMessage) { + // An envelope starting with 0xd8, 0x5a, with the byte length + // of 2, containing a map that's empty (0xbf for map + // start, and 0xff for map end). + std::vector in = {0xd8, 0x5a, 0, 0, 0, 2, 0xbf, 0xff}; + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(in.data(), in.size()), json_writer.get()); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ("{}", out); +} + +TEST(ParseCBORTest, ParseCBORHelloWorld) { + const uint8_t kPayloadLen = 27; + std::vector bytes = {0xd8, 0x5a, 0, 0, 0, kPayloadLen}; + bytes.push_back(0xbf); // start indef length map. + EncodeString8(SpanFrom("msg"), &bytes); // key: msg + // Now write the value, the familiar "Hello, šŸŒŽ." where the globe is expressed + // as two utf16 chars. + bytes.push_back(/*major type=*/2 << 5 | /*additional info=*/20); + for (uint8_t ch : std::array{ + {'H', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, + ',', 0, ' ', 0, 0x3c, 0xd8, 0x0e, 0xdf, '.', 0}}) + bytes.push_back(ch); + bytes.push_back(0xff); // stop byte + EXPECT_EQ(kPayloadLen, bytes.size() - 6); + + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ("{\"msg\":\"Hello, \\ud83c\\udf0e.\"}", out); +} + +TEST(ParseCBORTest, UTF8IsSupportedInKeys) { + const uint8_t kPayloadLen = 11; + std::vector bytes = {cbor::InitialByteForEnvelope(), + cbor::InitialByteFor32BitLengthByteString(), + 0, + 0, + 0, + kPayloadLen}; + bytes.push_back(cbor::EncodeIndefiniteLengthMapStart()); + // Two UTF16 chars. + EncodeString8(SpanFrom("šŸŒŽ"), &bytes); + // Can be encoded as a single UTF16 char. + EncodeString8(SpanFrom("ā˜¾"), &bytes); + bytes.push_back(cbor::EncodeStop()); + EXPECT_EQ(kPayloadLen, bytes.size() - 6); + + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ("{\"\\ud83c\\udf0e\":\"\\u263e\"}", out); +} + +TEST(ParseCBORTest, NoInputError) { + std::vector in = {}; + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(in.data(), in.size()), json_writer.get()); + EXPECT_EQ(Error::CBOR_NO_INPUT, status.error); + EXPECT_EQ("", out); +} + +TEST(ParseCBORTest, InvalidStartByteError) { + // Here we test that some actual json, which usually starts with {, + // is not considered CBOR. CBOR messages must start with 0x5a, the + // envelope start byte. + std::string json = "{\"msg\": \"Hello, world.\"}"; + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(SpanFrom(json), json_writer.get()); + EXPECT_EQ(Error::CBOR_INVALID_START_BYTE, status.error); + EXPECT_EQ("", out); +} + +TEST(ParseCBORTest, UnexpectedEofExpectedValueError) { + constexpr uint8_t kPayloadLen = 5; + std::vector bytes = {0xd8, 0x5a, 0, 0, 0, kPayloadLen, // envelope + 0xbf}; // map start + // A key; so value would be next. + EncodeString8(SpanFrom("key"), &bytes); + EXPECT_EQ(kPayloadLen, bytes.size() - 6); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE, status.error); + EXPECT_EQ(bytes.size(), status.pos); + EXPECT_EQ("", out); +} + +TEST(ParseCBORTest, UnexpectedEofInArrayError) { + constexpr uint8_t kPayloadLen = 8; + std::vector bytes = {0xd8, 0x5a, 0, 0, 0, kPayloadLen, // envelope + 0xbf}; // The byte for starting a map. + // A key; so value would be next. + EncodeString8(SpanFrom("array"), &bytes); + bytes.push_back(0x9f); // byte for indefinite length array start. + EXPECT_EQ(kPayloadLen, bytes.size() - 6); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::CBOR_UNEXPECTED_EOF_IN_ARRAY, status.error); + EXPECT_EQ(bytes.size(), status.pos); + EXPECT_EQ("", out); +} + +TEST(ParseCBORTest, UnexpectedEofInMapError) { + constexpr uint8_t kPayloadLen = 1; + std::vector bytes = {0xd8, 0x5a, 0, 0, 0, kPayloadLen, // envelope + 0xbf}; // The byte for starting a map. + EXPECT_EQ(kPayloadLen, bytes.size() - 6); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::CBOR_UNEXPECTED_EOF_IN_MAP, status.error); + EXPECT_EQ(7u, status.pos); + EXPECT_EQ("", out); +} + +TEST(ParseCBORTest, InvalidMapKeyError) { + constexpr uint8_t kPayloadLen = 2; + std::vector bytes = {0xd8, 0x5a, 0, + 0, 0, kPayloadLen, // envelope + 0xbf, // map start + 7 << 5 | 22}; // null (not a valid map key) + EXPECT_EQ(kPayloadLen, bytes.size() - 6); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::CBOR_INVALID_MAP_KEY, status.error); + EXPECT_EQ(7u, status.pos); + EXPECT_EQ("", out); +} + +std::vector MakeNestedCBOR(int depth) { + std::vector bytes; + std::vector envelopes; + for (int ii = 0; ii < depth; ++ii) { + envelopes.emplace_back(); + envelopes.back().EncodeStart(&bytes); + bytes.push_back(0xbf); // indef length map start + EncodeString8(SpanFrom("key"), &bytes); + } + EncodeString8(SpanFrom("innermost_value"), &bytes); + for (int ii = 0; ii < depth; ++ii) { + bytes.push_back(0xff); // stop byte, finishes map. + envelopes.back().EncodeStop(&bytes); + envelopes.pop_back(); + } + return bytes; +} + +TEST(ParseCBORTest, StackLimitExceededError) { + { // Depth 3: no stack limit exceeded error and is easy to inspect. + std::vector bytes = MakeNestedCBOR(3); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ(Status::npos(), status.pos); + EXPECT_EQ("{\"key\":{\"key\":{\"key\":\"innermost_value\"}}}", out); + } + { // Depth 300: no stack limit exceeded. + std::vector bytes = MakeNestedCBOR(300); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ(Status::npos(), status.pos); + } + + // We just want to know the length of one opening map so we can compute + // where the error is encountered. So we look at a small example and find + // the second envelope start. + std::vector small_example = MakeNestedCBOR(3); + size_t opening_segment_size = 1; // Start after the first envelope start. + while (opening_segment_size < small_example.size() && + small_example[opening_segment_size] != 0xd8) + opening_segment_size++; + + { // Depth 301: limit exceeded. + std::vector bytes = MakeNestedCBOR(301); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::CBOR_STACK_LIMIT_EXCEEDED, status.error); + EXPECT_EQ(opening_segment_size * 301, status.pos); + } + { // Depth 320: still limit exceeded, and at the same pos as for 1001 + std::vector bytes = MakeNestedCBOR(320); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::CBOR_STACK_LIMIT_EXCEEDED, status.error); + EXPECT_EQ(opening_segment_size * 301, status.pos); + } +} + +TEST(ParseCBORTest, UnsupportedValueError) { + constexpr uint8_t kPayloadLen = 6; + std::vector bytes = {0xd8, 0x5a, 0, 0, 0, kPayloadLen, // envelope + 0xbf}; // map start + EncodeString8(SpanFrom("key"), &bytes); + size_t error_pos = bytes.size(); + bytes.push_back(6 << 5 | 5); // tags aren't supported yet. + EXPECT_EQ(kPayloadLen, bytes.size() - 6); + + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::CBOR_UNSUPPORTED_VALUE, status.error); + EXPECT_EQ(error_pos, status.pos); + EXPECT_EQ("", out); +} + +TEST(ParseCBORTest, InvalidString16Error) { + constexpr uint8_t kPayloadLen = 11; + std::vector bytes = {0xd8, 0x5a, 0, 0, 0, kPayloadLen, // envelope + 0xbf}; // map start + EncodeString8(SpanFrom("key"), &bytes); + size_t error_pos = bytes.size(); + // a BYTE_STRING of length 5 as value; since we interpret these as string16, + // it's going to be invalid as each character would need two bytes, but + // 5 isn't divisible by 2. + bytes.push_back(2 << 5 | 5); + for (int ii = 0; ii < 5; ++ii) + bytes.push_back(' '); + EXPECT_EQ(kPayloadLen, bytes.size() - 6); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::CBOR_INVALID_STRING16, status.error); + EXPECT_EQ(error_pos, status.pos); + EXPECT_EQ("", out); +} + +TEST(ParseCBORTest, InvalidString8Error) { + constexpr uint8_t kPayloadLen = 6; + std::vector bytes = {0xd8, 0x5a, 0, 0, 0, kPayloadLen, // envelope + 0xbf}; // map start + EncodeString8(SpanFrom("key"), &bytes); + size_t error_pos = bytes.size(); + // a STRING of length 5 as value, but we're at the end of the bytes array + // so it can't be decoded successfully. + bytes.push_back(3 << 5 | 5); + EXPECT_EQ(kPayloadLen, bytes.size() - 6); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::CBOR_INVALID_STRING8, status.error); + EXPECT_EQ(error_pos, status.pos); + EXPECT_EQ("", out); +} + +TEST(ParseCBORTest, InvalidBinaryError) { + constexpr uint8_t kPayloadLen = 9; + std::vector bytes = {0xd8, 0x5a, 0, 0, 0, kPayloadLen, // envelope + 0xbf}; // map start + EncodeString8(SpanFrom("key"), &bytes); + size_t error_pos = bytes.size(); + bytes.push_back(6 << 5 | 22); // base64 hint for JSON; indicates binary + bytes.push_back(2 << 5 | 10); // BYTE_STRING (major type 2) of length 10 + // Just two garbage bytes, not enough for the binary. + bytes.push_back(0x31); + bytes.push_back(0x23); + EXPECT_EQ(kPayloadLen, bytes.size() - 6); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::CBOR_INVALID_BINARY, status.error); + EXPECT_EQ(error_pos, status.pos); + EXPECT_EQ("", out); +} + +TEST(ParseCBORTest, InvalidDoubleError) { + constexpr uint8_t kPayloadLen = 8; + std::vector bytes = {0xd8, 0x5a, 0, 0, 0, kPayloadLen, // envelope + 0xbf}; // map start + EncodeString8(SpanFrom("key"), &bytes); + size_t error_pos = bytes.size(); + bytes.push_back(7 << 5 | 27); // initial byte for double + // Just two garbage bytes, not enough to represent an actual double. + bytes.push_back(0x31); + bytes.push_back(0x23); + EXPECT_EQ(kPayloadLen, bytes.size() - 6); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::CBOR_INVALID_DOUBLE, status.error); + EXPECT_EQ(error_pos, status.pos); + EXPECT_EQ("", out); +} + +TEST(ParseCBORTest, InvalidSignedError) { + constexpr uint8_t kPayloadLen = 14; + std::vector bytes = {0xd8, 0x5a, 0, 0, 0, kPayloadLen, // envelope + 0xbf}; // map start + EncodeString8(SpanFrom("key"), &bytes); + size_t error_pos = bytes.size(); + // uint64_t max is a perfectly fine value to encode as CBOR unsigned, + // but we don't support this since we only cover the int32_t range. + internals::WriteTokenStart(MajorType::UNSIGNED, + std::numeric_limits::max(), &bytes); + EXPECT_EQ(kPayloadLen, bytes.size() - 6); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::CBOR_INVALID_INT32, status.error); + EXPECT_EQ(error_pos, status.pos); + EXPECT_EQ("", out); +} + +TEST(ParseCBORTest, TrailingJunk) { + constexpr uint8_t kPayloadLen = 35; + std::vector bytes = {0xd8, 0x5a, 0, 0, 0, kPayloadLen, // envelope + 0xbf}; // map start + EncodeString8(SpanFrom("key"), &bytes); + EncodeString8(SpanFrom("value"), &bytes); + bytes.push_back(0xff); // Up to here, it's a perfectly fine msg. + size_t error_pos = bytes.size(); + EncodeString8(SpanFrom("trailing junk"), &bytes); + + internals::WriteTokenStart(MajorType::UNSIGNED, + std::numeric_limits::max(), &bytes); + EXPECT_EQ(kPayloadLen, bytes.size() - 6); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(span(bytes.data(), bytes.size()), json_writer.get()); + EXPECT_EQ(Error::CBOR_TRAILING_JUNK, status.error); + EXPECT_EQ(error_pos, status.pos); + EXPECT_EQ("", out); +} + +// ============================================================================= +// cbor::AppendString8EntryToMap - for limited in-place editing of messages +// ============================================================================= + +template +class AppendString8EntryToMapTest : public ::testing::Test {}; + +using ContainerTestTypes = ::testing::Types, std::string>; +TYPED_TEST_SUITE(AppendString8EntryToMapTest, ContainerTestTypes); + +TYPED_TEST(AppendString8EntryToMapTest, AppendsEntrySuccessfully) { + constexpr uint8_t kPayloadLen = 12; + std::vector bytes = {0xd8, 0x5a, 0, 0, 0, kPayloadLen, // envelope + 0xbf}; // map start + size_t pos_before_payload = bytes.size() - 1; + EncodeString8(SpanFrom("key"), &bytes); + EncodeString8(SpanFrom("value"), &bytes); + bytes.push_back(0xff); // A perfectly fine cbor message. + EXPECT_EQ(kPayloadLen, bytes.size() - pos_before_payload); + + TypeParam msg(bytes.begin(), bytes.end()); + + Status status = + AppendString8EntryToCBORMap(SpanFrom("foo"), SpanFrom("bar"), &msg); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ(Status::npos(), status.pos); + std::string out; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(SpanFrom(msg), json_writer.get()); + EXPECT_EQ("{\"key\":\"value\",\"foo\":\"bar\"}", out); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ(Status::npos(), status.pos); +} + +TYPED_TEST(AppendString8EntryToMapTest, AppendThreeEntries) { + std::vector encoded = { + 0xd8, 0x5a, 0, 0, 0, 2, EncodeIndefiniteLengthMapStart(), EncodeStop()}; + EXPECT_EQ(Error::OK, AppendString8EntryToCBORMap(SpanFrom("key"), + SpanFrom("value"), &encoded) + .error); + EXPECT_EQ(Error::OK, AppendString8EntryToCBORMap(SpanFrom("key1"), + SpanFrom("value1"), &encoded) + .error); + EXPECT_EQ(Error::OK, AppendString8EntryToCBORMap(SpanFrom("key2"), + SpanFrom("value2"), &encoded) + .error); + TypeParam msg(encoded.begin(), encoded.end()); + std::string out; + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + ParseCBOR(SpanFrom(msg), json_writer.get()); + EXPECT_EQ("{\"key\":\"value\",\"key1\":\"value1\",\"key2\":\"value2\"}", out); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ(Status::npos(), status.pos); +} + +TYPED_TEST(AppendString8EntryToMapTest, MapStartExpected_Error) { + std::vector bytes = { + 0xd8, 0x5a, 0, 0, 0, 1, EncodeIndefiniteLengthArrayStart()}; + TypeParam msg(bytes.begin(), bytes.end()); + Status status = + AppendString8EntryToCBORMap(SpanFrom("key"), SpanFrom("value"), &msg); + EXPECT_EQ(Error::CBOR_MAP_START_EXPECTED, status.error); + EXPECT_EQ(6u, status.pos); +} + +TYPED_TEST(AppendString8EntryToMapTest, MapStopExpected_Error) { + std::vector bytes = { + 0xd8, 0x5a, 0, 0, 0, 2, EncodeIndefiniteLengthMapStart(), 42}; + TypeParam msg(bytes.begin(), bytes.end()); + Status status = + AppendString8EntryToCBORMap(SpanFrom("key"), SpanFrom("value"), &msg); + EXPECT_EQ(Error::CBOR_MAP_STOP_EXPECTED, status.error); + EXPECT_EQ(7u, status.pos); +} + +TYPED_TEST(AppendString8EntryToMapTest, InvalidEnvelope_Error) { + { // Second byte is wrong. + std::vector bytes = { + 0x5a, 0, 0, 0, 2, EncodeIndefiniteLengthMapStart(), EncodeStop(), 0}; + TypeParam msg(bytes.begin(), bytes.end()); + Status status = + AppendString8EntryToCBORMap(SpanFrom("key"), SpanFrom("value"), &msg); + EXPECT_EQ(Error::CBOR_INVALID_ENVELOPE, status.error); + EXPECT_EQ(0u, status.pos); + } + { // Second byte is wrong. + std::vector bytes = { + 0xd8, 0x7a, 0, 0, 0, 2, EncodeIndefiniteLengthMapStart(), EncodeStop()}; + TypeParam msg(bytes.begin(), bytes.end()); + Status status = + AppendString8EntryToCBORMap(SpanFrom("key"), SpanFrom("value"), &msg); + EXPECT_EQ(Error::CBOR_INVALID_ENVELOPE, status.error); + EXPECT_EQ(0u, status.pos); + } + { // Invalid envelope size example. + std::vector bytes = { + 0xd8, 0x5a, 0, 0, 0, 3, EncodeIndefiniteLengthMapStart(), EncodeStop(), + }; + TypeParam msg(bytes.begin(), bytes.end()); + Status status = + AppendString8EntryToCBORMap(SpanFrom("key"), SpanFrom("value"), &msg); + EXPECT_EQ(Error::CBOR_INVALID_ENVELOPE, status.error); + EXPECT_EQ(0u, status.pos); + } + { // Invalid envelope size example. + std::vector bytes = { + 0xd8, 0x5a, 0, 0, 0, 1, EncodeIndefiniteLengthMapStart(), EncodeStop(), + }; + TypeParam msg(bytes.begin(), bytes.end()); + Status status = + AppendString8EntryToCBORMap(SpanFrom("key"), SpanFrom("value"), &msg); + EXPECT_EQ(Error::CBOR_INVALID_ENVELOPE, status.error); + EXPECT_EQ(0u, status.pos); + } +} +} // namespace cbor + +namespace json { + +// ============================================================================= +// json::NewJSONEncoder - for encoding streaming parser events as JSON +// ============================================================================= + +void WriteUTF8AsUTF16(StreamingParserHandler* writer, const std::string& utf8) { + writer->HandleString16(SpanFrom(UTF8ToUTF16(SpanFrom(utf8)))); +} + +TEST(JsonStdStringWriterTest, HelloWorld) { + std::string out; + Status status; + std::unique_ptr writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + writer->HandleMapBegin(); + WriteUTF8AsUTF16(writer.get(), "msg1"); + WriteUTF8AsUTF16(writer.get(), "Hello, šŸŒŽ."); + std::string key = "msg1-as-utf8"; + std::string value = "Hello, šŸŒŽ."; + writer->HandleString8(SpanFrom(key)); + writer->HandleString8(SpanFrom(value)); + WriteUTF8AsUTF16(writer.get(), "msg2"); + WriteUTF8AsUTF16(writer.get(), "\\\b\r\n\t\f\""); + WriteUTF8AsUTF16(writer.get(), "nested"); + writer->HandleMapBegin(); + WriteUTF8AsUTF16(writer.get(), "double"); + writer->HandleDouble(3.1415); + WriteUTF8AsUTF16(writer.get(), "int"); + writer->HandleInt32(-42); + WriteUTF8AsUTF16(writer.get(), "bool"); + writer->HandleBool(false); + WriteUTF8AsUTF16(writer.get(), "null"); + writer->HandleNull(); + writer->HandleMapEnd(); + WriteUTF8AsUTF16(writer.get(), "array"); + writer->HandleArrayBegin(); + writer->HandleInt32(1); + writer->HandleInt32(2); + writer->HandleInt32(3); + writer->HandleArrayEnd(); + writer->HandleMapEnd(); + EXPECT_TRUE(status.ok()); + EXPECT_EQ( + "{\"msg1\":\"Hello, \\ud83c\\udf0e.\"," + "\"msg1-as-utf8\":\"Hello, \\ud83c\\udf0e.\"," + "\"msg2\":\"\\\\\\b\\r\\n\\t\\f\\\"\"," + "\"nested\":{\"double\":3.1415,\"int\":-42," + "\"bool\":false,\"null\":null},\"array\":[1,2,3]}", + out); +} + +TEST(JsonStdStringWriterTest, RepresentingNonFiniteValuesAsNull) { + // JSON can't represent +Infinity, -Infinity, or NaN. + // So in practice it's mapped to null. + std::string out; + Status status; + std::unique_ptr writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + writer->HandleMapBegin(); + writer->HandleString8(SpanFrom("Infinity")); + writer->HandleDouble(std::numeric_limits::infinity()); + writer->HandleString8(SpanFrom("-Infinity")); + writer->HandleDouble(-std::numeric_limits::infinity()); + writer->HandleString8(SpanFrom("NaN")); + writer->HandleDouble(std::numeric_limits::quiet_NaN()); + writer->HandleMapEnd(); + EXPECT_TRUE(status.ok()); + EXPECT_EQ("{\"Infinity\":null,\"-Infinity\":null,\"NaN\":null}", out); +} + +TEST(JsonStdStringWriterTest, BinaryEncodedAsJsonString) { + // The encoder emits binary submitted to StreamingParserHandler::HandleBinary + // as base64. The following three examples are taken from + // https://en.wikipedia.org/wiki/Base64. + { + std::string out; + Status status; + std::unique_ptr writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + writer->HandleBinary(SpanFrom(std::vector({'M', 'a', 'n'}))); + EXPECT_TRUE(status.ok()); + EXPECT_EQ("\"TWFu\"", out); + } + { + std::string out; + Status status; + std::unique_ptr writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + writer->HandleBinary(SpanFrom(std::vector({'M', 'a'}))); + EXPECT_TRUE(status.ok()); + EXPECT_EQ("\"TWE=\"", out); + } + { + std::string out; + Status status; + std::unique_ptr writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + writer->HandleBinary(SpanFrom(std::vector({'M'}))); + EXPECT_TRUE(status.ok()); + EXPECT_EQ("\"TQ==\"", out); + } + { // "Hello, world.", verified with base64decode.org. + std::string out; + Status status; + std::unique_ptr writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + writer->HandleBinary(SpanFrom(std::vector( + {'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '.'}))); + EXPECT_TRUE(status.ok()); + EXPECT_EQ("\"SGVsbG8sIHdvcmxkLg==\"", out); + } +} + +TEST(JsonStdStringWriterTest, HandlesErrors) { + // When an error is sent via HandleError, it saves it in the provided + // status and clears the output. + std::string out; + Status status; + std::unique_ptr writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + writer->HandleMapBegin(); + WriteUTF8AsUTF16(writer.get(), "msg1"); + writer->HandleError(Status{Error::JSON_PARSER_VALUE_EXPECTED, 42}); + EXPECT_EQ(Error::JSON_PARSER_VALUE_EXPECTED, status.error); + EXPECT_EQ(42u, status.pos); + EXPECT_EQ("", out); +} + +// We'd use Gmock but unfortunately it only handles copyable return types. +class MockPlatform : public Platform { + public: + // Not implemented. + bool StrToD(const char* str, double* result) const override { return false; } + + // A map with pre-registered responses for DToSTr. + std::map dtostr_responses_; + + std::unique_ptr DToStr(double value) const override { + auto it = dtostr_responses_.find(value); + CHECK(it != dtostr_responses_.end()); + const std::string& str = it->second; + std::unique_ptr response(new char[str.size() + 1]); + memcpy(response.get(), str.c_str(), str.size() + 1); + return response; + } +}; + +TEST(JsonStdStringWriterTest, DoubleToString) { + // This "broken" platform responds without the leading 0 before the + // decimal dot, so it'd be invalid JSON. + MockPlatform platform; + platform.dtostr_responses_[.1] = ".1"; + platform.dtostr_responses_[-.7] = "-.7"; + + std::string out; + Status status; + std::unique_ptr writer = + NewJSONEncoder(&platform, &out, &status); + writer->HandleArrayBegin(); + writer->HandleDouble(.1); + writer->HandleDouble(-.7); + writer->HandleArrayEnd(); + EXPECT_EQ("[0.1,-0.7]", out); +} + +// ============================================================================= +// json::ParseJSON - for receiving streaming parser events for JSON +// ============================================================================= + +class Log : public StreamingParserHandler { + public: + void HandleMapBegin() override { log_ << "map begin\n"; } + + void HandleMapEnd() override { log_ << "map end\n"; } + + void HandleArrayBegin() override { log_ << "array begin\n"; } + + void HandleArrayEnd() override { log_ << "array end\n"; } + + void HandleString8(span chars) override { + log_ << "string8: " << std::string(chars.begin(), chars.end()) << "\n"; + } + + void HandleString16(span chars) override { + log_ << "string16: " << UTF16ToUTF8(chars) << "\n"; + } + + void HandleBinary(span bytes) override { + // JSON doesn't have native support for arbitrary bytes, so our parser will + // never call this. + CHECK(false); + } + + void HandleDouble(double value) override { + log_ << "double: " << value << "\n"; + } + + void HandleInt32(int32_t value) override { log_ << "int: " << value << "\n"; } + + void HandleBool(bool value) override { log_ << "bool: " << value << "\n"; } + + void HandleNull() override { log_ << "null\n"; } + + void HandleError(Status status) override { status_ = status; } + + std::string str() const { return status_.ok() ? log_.str() : ""; } + + Status status() const { return status_; } + + private: + std::ostringstream log_; + Status status_; +}; + +class JsonParserTest : public ::testing::Test { + protected: + Log log_; +}; + +TEST_F(JsonParserTest, SimpleDictionary) { + std::string json = "{\"foo\": 42}"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_TRUE(log_.status().ok()); + EXPECT_EQ( + "map begin\n" + "string16: foo\n" + "int: 42\n" + "map end\n", + log_.str()); +} + +TEST_F(JsonParserTest, Whitespace) { + std::string json = "\n {\n\"msg\"\n: \v\"Hello, world.\"\t\r}\t"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_TRUE(log_.status().ok()); + EXPECT_EQ( + "map begin\n" + "string16: msg\n" + "string16: Hello, world.\n" + "map end\n", + log_.str()); +} + +TEST_F(JsonParserTest, NestedDictionary) { + std::string json = "{\"foo\": {\"bar\": {\"baz\": 1}, \"bar2\": 2}}"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_TRUE(log_.status().ok()); + EXPECT_EQ( + "map begin\n" + "string16: foo\n" + "map begin\n" + "string16: bar\n" + "map begin\n" + "string16: baz\n" + "int: 1\n" + "map end\n" + "string16: bar2\n" + "int: 2\n" + "map end\n" + "map end\n", + log_.str()); +} + +TEST_F(JsonParserTest, Doubles) { + std::string json = "{\"foo\": 3.1415, \"bar\": 31415e-4}"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_TRUE(log_.status().ok()); + EXPECT_EQ( + "map begin\n" + "string16: foo\n" + "double: 3.1415\n" + "string16: bar\n" + "double: 3.1415\n" + "map end\n", + log_.str()); +} + +TEST_F(JsonParserTest, Unicode) { + // Globe character. 0xF0 0x9F 0x8C 0x8E in utf8, 0xD83C 0xDF0E in utf16. + std::string json = "{\"msg\": \"Hello, \\uD83C\\uDF0E.\"}"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_TRUE(log_.status().ok()); + EXPECT_EQ( + "map begin\n" + "string16: msg\n" + "string16: Hello, šŸŒŽ.\n" + "map end\n", + log_.str()); +} + +TEST_F(JsonParserTest, Unicode_ParseUtf16) { + // Globe character. utf8: 0xF0 0x9F 0x8C 0x8E; utf16: 0xD83C 0xDF0E. + // Crescent moon character. utf8: 0xF0 0x9F 0x8C 0x99; utf16: 0xD83C 0xDF19. + + // We provide the moon with json escape, but the earth as utf16 input. + // Either way they arrive as utf8 (after decoding in log_.str()). + std::vector json = + UTF8ToUTF16(SpanFrom("{\"space\": \"šŸŒŽ \\uD83C\\uDF19.\"}")); + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_TRUE(log_.status().ok()); + EXPECT_EQ( + "map begin\n" + "string16: space\n" + "string16: šŸŒŽ šŸŒ™.\n" + "map end\n", + log_.str()); +} + +TEST_F(JsonParserTest, Unicode_ParseUtf8) { + // Used below: + // Š³Š»Š°ŃŠ½Š¾ŃŃ‚ŃŒ - example for 2 byte utf8, Russian word "glasnost" + // 屋 - example for 3 byte utf8, Chinese word for "house" + // šŸŒŽ - example for 4 byte utf8: 0xF0 0x9F 0x8C 0x8E; utf16: 0xD83C 0xDF0E. + // šŸŒ™ - example for escapes: utf8: 0xF0 0x9F 0x8C 0x99; utf16: 0xD83C 0xDF19. + + // We provide the moon with json escape, but the earth as utf8 input. + // Either way they arrive as utf8 (after decoding in log_.str()). + std::string json = + "{" + "\"escapes\": \"\\uD83C\\uDF19\"," + "\"2 byte\":\"Š³Š»Š°ŃŠ½Š¾ŃŃ‚ŃŒ\"," + "\"3 byte\":\"屋\"," + "\"4 byte\":\"šŸŒŽ\"" + "}"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_TRUE(log_.status().ok()); + EXPECT_EQ( + "map begin\n" + "string16: escapes\n" + "string16: šŸŒ™\n" + "string16: 2 byte\n" + "string16: Š³Š»Š°ŃŠ½Š¾ŃŃ‚ŃŒ\n" + "string16: 3 byte\n" + "string16: 屋\n" + "string16: 4 byte\n" + "string16: šŸŒŽ\n" + "map end\n", + log_.str()); +} + +TEST_F(JsonParserTest, UnprocessedInputRemainsError) { + // Trailing junk after the valid JSON. + std::string json = "{\"foo\": 3.1415} junk"; + size_t junk_idx = json.find("junk"); + EXPECT_NE(junk_idx, std::string::npos); + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_EQ(Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS, log_.status().error); + EXPECT_EQ(junk_idx, log_.status().pos); + EXPECT_EQ("", log_.str()); +} + +std::string MakeNestedJson(int depth) { + std::string json; + for (int ii = 0; ii < depth; ++ii) + json += "{\"foo\":"; + json += "42"; + for (int ii = 0; ii < depth; ++ii) + json += "}"; + return json; +} + +TEST_F(JsonParserTest, StackLimitExceededError_BelowLimit) { + // kStackLimit is 300 (see json_parser.cc). First let's + // try with a small nested example. + std::string json_3 = MakeNestedJson(3); + ParseJSON(GetTestPlatform(), SpanFrom(json_3), &log_); + EXPECT_TRUE(log_.status().ok()); + EXPECT_EQ( + "map begin\n" + "string16: foo\n" + "map begin\n" + "string16: foo\n" + "map begin\n" + "string16: foo\n" + "int: 42\n" + "map end\n" + "map end\n" + "map end\n", + log_.str()); +} + +TEST_F(JsonParserTest, StackLimitExceededError_AtLimit) { + // Now with kStackLimit (300). + std::string json_limit = MakeNestedJson(300); + ParseJSON(GetTestPlatform(), + span(reinterpret_cast(json_limit.data()), + json_limit.size()), + &log_); + EXPECT_TRUE(log_.status().ok()); +} + +TEST_F(JsonParserTest, StackLimitExceededError_AboveLimit) { + // Now with kStackLimit + 1 (301) - it exceeds in the innermost instance. + std::string exceeded = MakeNestedJson(301); + ParseJSON(GetTestPlatform(), SpanFrom(exceeded), &log_); + EXPECT_EQ(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED, log_.status().error); + EXPECT_EQ(strlen("{\"foo\":") * 301, log_.status().pos); +} + +TEST_F(JsonParserTest, StackLimitExceededError_WayAboveLimit) { + // Now way past the limit. Still, the point of exceeding is 301. + std::string far_out = MakeNestedJson(320); + ParseJSON(GetTestPlatform(), SpanFrom(far_out), &log_); + EXPECT_EQ(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED, log_.status().error); + EXPECT_EQ(strlen("{\"foo\":") * 301, log_.status().pos); +} + +TEST_F(JsonParserTest, NoInputError) { + std::string json = ""; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_EQ(Error::JSON_PARSER_NO_INPUT, log_.status().error); + EXPECT_EQ(0u, log_.status().pos); + EXPECT_EQ("", log_.str()); +} + +TEST_F(JsonParserTest, InvalidTokenError) { + std::string json = "|"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_EQ(Error::JSON_PARSER_INVALID_TOKEN, log_.status().error); + EXPECT_EQ(0u, log_.status().pos); + EXPECT_EQ("", log_.str()); +} + +TEST_F(JsonParserTest, InvalidNumberError) { + // Mantissa exceeds max (the constant used here is int64_t max). + std::string json = "1E9223372036854775807"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_EQ(Error::JSON_PARSER_INVALID_NUMBER, log_.status().error); + EXPECT_EQ(0u, log_.status().pos); + EXPECT_EQ("", log_.str()); +} + +TEST_F(JsonParserTest, InvalidStringError) { + // \x22 is an unsupported escape sequence + std::string json = "\"foo\\x22\""; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_EQ(Error::JSON_PARSER_INVALID_STRING, log_.status().error); + EXPECT_EQ(0u, log_.status().pos); + EXPECT_EQ("", log_.str()); +} + +TEST_F(JsonParserTest, UnexpectedArrayEndError) { + std::string json = "[1,2,]"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_EQ(Error::JSON_PARSER_UNEXPECTED_ARRAY_END, log_.status().error); + EXPECT_EQ(5u, log_.status().pos); + EXPECT_EQ("", log_.str()); +} + +TEST_F(JsonParserTest, CommaOrArrayEndExpectedError) { + std::string json = "[1,2 2"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_EQ(Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED, + log_.status().error); + EXPECT_EQ(5u, log_.status().pos); + EXPECT_EQ("", log_.str()); +} + +TEST_F(JsonParserTest, StringLiteralExpectedError) { + // There's an error because the key bar, a string, is not terminated. + std::string json = "{\"foo\": 3.1415, \"bar: 31415e-4}"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_EQ(Error::JSON_PARSER_STRING_LITERAL_EXPECTED, log_.status().error); + EXPECT_EQ(16u, log_.status().pos); + EXPECT_EQ("", log_.str()); +} + +TEST_F(JsonParserTest, ColonExpectedError) { + std::string json = "{\"foo\", 42}"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_EQ(Error::JSON_PARSER_COLON_EXPECTED, log_.status().error); + EXPECT_EQ(6u, log_.status().pos); + EXPECT_EQ("", log_.str()); +} + +TEST_F(JsonParserTest, UnexpectedMapEndError) { + std::string json = "{\"foo\": 42, }"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_EQ(Error::JSON_PARSER_UNEXPECTED_MAP_END, log_.status().error); + EXPECT_EQ(12u, log_.status().pos); + EXPECT_EQ("", log_.str()); +} + +TEST_F(JsonParserTest, CommaOrMapEndExpectedError) { + // The second separator should be a comma. + std::string json = "{\"foo\": 3.1415: \"bar\": 0}"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_EQ(Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED, log_.status().error); + EXPECT_EQ(14u, log_.status().pos); + EXPECT_EQ("", log_.str()); +} + +TEST_F(JsonParserTest, ValueExpectedError) { + std::string json = "}"; + ParseJSON(GetTestPlatform(), SpanFrom(json), &log_); + EXPECT_EQ(Error::JSON_PARSER_VALUE_EXPECTED, log_.status().error); + EXPECT_EQ(0u, log_.status().pos); + EXPECT_EQ("", log_.str()); +} + +template +class ConvertJSONToCBORTest : public ::testing::Test {}; + +using ContainerTestTypes = ::testing::Types, std::string>; +TYPED_TEST_SUITE(ConvertJSONToCBORTest, ContainerTestTypes); + +TYPED_TEST(ConvertJSONToCBORTest, RoundTripValidJson) { + std::string json_in = "{\"msg\":\"Hello, world.\",\"lst\":[1,2,3]}"; + TypeParam json(json_in.begin(), json_in.end()); + TypeParam cbor; + { + Status status = ConvertJSONToCBOR(GetTestPlatform(), SpanFrom(json), &cbor); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ(Status::npos(), status.pos); + } + TypeParam roundtrip_json; + { + Status status = + ConvertCBORToJSON(GetTestPlatform(), SpanFrom(cbor), &roundtrip_json); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ(Status::npos(), status.pos); + } + EXPECT_EQ(json, roundtrip_json); +} + +TYPED_TEST(ConvertJSONToCBORTest, RoundTripValidJson16) { + std::vector json16 = { + '{', '"', 'm', 's', 'g', '"', ':', '"', 'H', 'e', 'l', 'l', + 'o', ',', ' ', 0xd83c, 0xdf0e, '.', '"', ',', '"', 'l', 's', 't', + '"', ':', '[', '1', ',', '2', ',', '3', ']', '}'}; + TypeParam cbor; + { + Status status = ConvertJSONToCBOR( + GetTestPlatform(), span(json16.data(), json16.size()), &cbor); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ(Status::npos(), status.pos); + } + TypeParam roundtrip_json; + { + Status status = + ConvertCBORToJSON(GetTestPlatform(), SpanFrom(cbor), &roundtrip_json); + EXPECT_EQ(Error::OK, status.error); + EXPECT_EQ(Status::npos(), status.pos); + } + std::string json = "{\"msg\":\"Hello, \\ud83c\\udf0e.\",\"lst\":[1,2,3]}"; + TypeParam expected_json(json.begin(), json.end()); + EXPECT_EQ(expected_json, roundtrip_json); +} +} // namespace json +} // namespace v8_inspector_protocol_encoding diff --git a/tools/inspector_protocol/encoding/encoding_test_helper.h b/tools/inspector_protocol/encoding/encoding_test_helper.h new file mode 100644 index 0000000000..84da2e72e8 --- /dev/null +++ b/tools/inspector_protocol/encoding/encoding_test_helper.h @@ -0,0 +1,33 @@ +// Copyright 2019 The V8 Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// This file is V8 specific, to make encoding_test.cc work. +// It is not rolled from the upstream project. + +#ifndef V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_TEST_HELPER_H_ +#define V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_TEST_HELPER_H_ + +#include +#include + +#include "src/base/logging.h" +#include "src/inspector/v8-string-conversions.h" +#include "testing/gmock/include/gmock/gmock.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace v8_inspector_protocol_encoding { + +std::string UTF16ToUTF8(span in) { + return v8_inspector::UTF16ToUTF8(in.data(), in.size()); +} + +std::vector UTF8ToUTF16(span in) { + std::basic_string utf16 = v8_inspector::UTF8ToUTF16( + reinterpret_cast(in.data()), in.size()); + return std::vector(utf16.begin(), utf16.end()); +} + +} // namespace v8_inspector_protocol_encoding + +#endif // V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_TEST_HELPER_H_ diff --git a/tools/inspector_protocol/inspector_protocol.gni b/tools/inspector_protocol/inspector_protocol.gni index 5dcc1f522d..d612fb6aeb 100644 --- a/tools/inspector_protocol/inspector_protocol.gni +++ b/tools/inspector_protocol/inspector_protocol.gni @@ -27,13 +27,16 @@ template("inspector_protocol_generate") { inspector_protocol_dir = invoker.inspector_protocol_dir action(target_name) { - script = "$inspector_protocol_dir/CodeGenerator.py" + script = "$inspector_protocol_dir/code_generator.py" inputs = [ invoker.config_file, + "$inspector_protocol_dir/lib/base_string_adapter_cc.template", + "$inspector_protocol_dir/lib/base_string_adapter_h.template", + "$inspector_protocol_dir/lib/encoding_h.template", + "$inspector_protocol_dir/lib/encoding_cpp.template", "$inspector_protocol_dir/lib/Allocator_h.template", "$inspector_protocol_dir/lib/Array_h.template", - "$inspector_protocol_dir/lib/Collections_h.template", "$inspector_protocol_dir/lib/DispatcherBase_cpp.template", "$inspector_protocol_dir/lib/DispatcherBase_h.template", "$inspector_protocol_dir/lib/ErrorSupport_cpp.template", diff --git a/tools/inspector_protocol/inspector_protocol.gypi b/tools/inspector_protocol/inspector_protocol.gypi index 1fb7119b5f..d614474e69 100644 --- a/tools/inspector_protocol/inspector_protocol.gypi +++ b/tools/inspector_protocol/inspector_protocol.gypi @@ -5,9 +5,10 @@ { 'variables': { 'inspector_protocol_files': [ + 'lib/encoding_h.template', + 'lib/encoding_cpp.template', 'lib/Allocator_h.template', 'lib/Array_h.template', - 'lib/Collections_h.template', 'lib/DispatcherBase_cpp.template', 'lib/DispatcherBase_h.template', 'lib/ErrorSupport_cpp.template', @@ -27,7 +28,7 @@ 'templates/Imported_h.template', 'templates/TypeBuilder_cpp.template', 'templates/TypeBuilder_h.template', - 'CodeGenerator.py', + 'code_generator.py', ] } } diff --git a/tools/inspector_protocol/lib/CBOR_cpp.template b/tools/inspector_protocol/lib/CBOR_cpp.template deleted file mode 100644 index 36750b19a3..0000000000 --- a/tools/inspector_protocol/lib/CBOR_cpp.template +++ /dev/null @@ -1,803 +0,0 @@ -{# This template is generated by gen_cbor_templates.py. #} -// Generated by lib/CBOR_cpp.template. - -// Copyright 2019 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - - -#include -#include - -{% for namespace in config.protocol.namespace %} -namespace {{namespace}} { -{% endfor %} - -// ===== encoding/cbor.cc ===== - -using namespace cbor; - -namespace { - -// See RFC 7049 Section 2.3, Table 2. -static constexpr uint8_t kEncodedTrue = - EncodeInitialByte(MajorType::SIMPLE_VALUE, 21); -static constexpr uint8_t kEncodedFalse = - EncodeInitialByte(MajorType::SIMPLE_VALUE, 20); -static constexpr uint8_t kEncodedNull = - EncodeInitialByte(MajorType::SIMPLE_VALUE, 22); -static constexpr uint8_t kInitialByteForDouble = - EncodeInitialByte(MajorType::SIMPLE_VALUE, 27); - -} // namespace - -uint8_t EncodeTrue() { return kEncodedTrue; } -uint8_t EncodeFalse() { return kEncodedFalse; } -uint8_t EncodeNull() { return kEncodedNull; } - -uint8_t EncodeIndefiniteLengthArrayStart() { - return kInitialByteIndefiniteLengthArray; -} - -uint8_t EncodeIndefiniteLengthMapStart() { - return kInitialByteIndefiniteLengthMap; -} - -uint8_t EncodeStop() { return kStopByte; } - -namespace { -// See RFC 7049 Table 3 and Section 2.4.4.2. This is used as a prefix for -// arbitrary binary data encoded as BYTE_STRING. -static constexpr uint8_t kExpectedConversionToBase64Tag = - EncodeInitialByte(MajorType::TAG, 22); - -// When parsing CBOR, we limit recursion depth for objects and arrays -// to this constant. -static constexpr int kStackLimit = 1000; - -// Writes the bytes for |v| to |out|, starting with the most significant byte. -// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html -template -void WriteBytesMostSignificantByteFirst(T v, std::vector* out) { - for (int shift_bytes = sizeof(T) - 1; shift_bytes >= 0; --shift_bytes) - out->push_back(0xff & (v >> (shift_bytes * 8))); -} -} // namespace - -namespace cbor_internals { -// Writes the start of a token with |type|. The |value| may indicate the size, -// or it may be the payload if the value is an unsigned integer. -void WriteTokenStart(MajorType type, uint64_t value, - std::vector* encoded) { - if (value < 24) { - // Values 0-23 are encoded directly into the additional info of the - // initial byte. - encoded->push_back(EncodeInitialByte(type, /*additional_info=*/value)); - return; - } - if (value <= std::numeric_limits::max()) { - // Values 24-255 are encoded with one initial byte, followed by the value. - encoded->push_back(EncodeInitialByte(type, kAdditionalInformation1Byte)); - encoded->push_back(value); - return; - } - if (value <= std::numeric_limits::max()) { - // Values 256-65535: 1 initial byte + 2 bytes payload. - encoded->push_back(EncodeInitialByte(type, kAdditionalInformation2Bytes)); - WriteBytesMostSignificantByteFirst(value, encoded); - return; - } - if (value <= std::numeric_limits::max()) { - // 32 bit uint: 1 initial byte + 4 bytes payload. - encoded->push_back(EncodeInitialByte(type, kAdditionalInformation4Bytes)); - WriteBytesMostSignificantByteFirst(static_cast(value), - encoded); - return; - } - // 64 bit uint: 1 initial byte + 8 bytes payload. - encoded->push_back(EncodeInitialByte(type, kAdditionalInformation8Bytes)); - WriteBytesMostSignificantByteFirst(value, encoded); -} -} // namespace cbor_internals - -namespace { -// Extracts sizeof(T) bytes from |in| to extract a value of type T -// (e.g. uint64_t, uint32_t, ...), most significant byte first. -// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html -template -T ReadBytesMostSignificantByteFirst(span in) { - assert(static_cast(in.size()) >= sizeof(T)); - T result = 0; - for (std::size_t shift_bytes = 0; shift_bytes < sizeof(T); ++shift_bytes) - result |= T(in[sizeof(T) - 1 - shift_bytes]) << (shift_bytes * 8); - return result; -} -} // namespace - -namespace cbor_internals { -int8_t ReadTokenStart(span bytes, MajorType* type, uint64_t* value) { - if (bytes.empty()) return -1; - uint8_t initial_byte = bytes[0]; - *type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift); - - uint8_t additional_information = initial_byte & kAdditionalInformationMask; - if (additional_information < 24) { - // Values 0-23 are encoded directly into the additional info of the - // initial byte. - *value = additional_information; - return 1; - } - if (additional_information == kAdditionalInformation1Byte) { - // Values 24-255 are encoded with one initial byte, followed by the value. - if (bytes.size() < 2) return -1; - *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); - return 2; - } - if (additional_information == kAdditionalInformation2Bytes) { - // Values 256-65535: 1 initial byte + 2 bytes payload. - if (static_cast(bytes.size()) < 1 + sizeof(uint16_t)) - return -1; - *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); - return 3; - } - if (additional_information == kAdditionalInformation4Bytes) { - // 32 bit uint: 1 initial byte + 4 bytes payload. - if (static_cast(bytes.size()) < 1 + sizeof(uint32_t)) - return -1; - *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); - return 5; - } - if (additional_information == kAdditionalInformation8Bytes) { - // 64 bit uint: 1 initial byte + 8 bytes payload. - if (static_cast(bytes.size()) < 1 + sizeof(uint64_t)) - return -1; - *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); - return 9; - } - return -1; -} -} // namespace cbor_internals - -using cbor_internals::WriteTokenStart; -using cbor_internals::ReadTokenStart; - -void EncodeInt32(int32_t value, std::vector* out) { - if (value >= 0) { - WriteTokenStart(MajorType::UNSIGNED, value, out); - } else { - uint64_t representation = static_cast(-(value + 1)); - WriteTokenStart(MajorType::NEGATIVE, representation, out); - } -} - -void EncodeString16(span in, std::vector* out) { - uint64_t byte_length = static_cast(in.size_bytes()); - WriteTokenStart(MajorType::BYTE_STRING, byte_length, out); - // When emitting UTF16 characters, we always write the least significant byte - // first; this is because it's the native representation for X86. - // TODO(johannes): Implement a more efficient thing here later, e.g. - // casting *iff* the machine has this byte order. - // The wire format for UTF16 chars will probably remain the same - // (least significant byte first) since this way we can have - // golden files, unittests, etc. that port easily and universally. - // See also: - // https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html - for (const uint16_t two_bytes : in) { - out->push_back(two_bytes); - out->push_back(two_bytes >> 8); - } -} - -void EncodeString8(span in, std::vector* out) { - WriteTokenStart(MajorType::STRING, static_cast(in.size_bytes()), - out); - out->insert(out->end(), in.begin(), in.end()); -} - -void EncodeBinary(span in, std::vector* out) { - out->push_back(kExpectedConversionToBase64Tag); - uint64_t byte_length = static_cast(in.size_bytes()); - WriteTokenStart(MajorType::BYTE_STRING, byte_length, out); - out->insert(out->end(), in.begin(), in.end()); -} - -// A double is encoded with a specific initial byte -// (kInitialByteForDouble) plus the 64 bits of payload for its value. -constexpr std::ptrdiff_t kEncodedDoubleSize = 1 + sizeof(uint64_t); - -// An envelope is encoded with a specific initial byte -// (kInitialByteForEnvelope), plus the start byte for a BYTE_STRING with a 32 -// bit wide length, plus a 32 bit length for that string. -constexpr std::ptrdiff_t kEncodedEnvelopeHeaderSize = 1 + 1 + sizeof(uint32_t); - -void EncodeDouble(double value, std::vector* out) { - // The additional_info=27 indicates 64 bits for the double follow. - // See RFC 7049 Section 2.3, Table 1. - out->push_back(kInitialByteForDouble); - union { - double from_double; - uint64_t to_uint64; - } reinterpret; - reinterpret.from_double = value; - WriteBytesMostSignificantByteFirst(reinterpret.to_uint64, out); -} - -void EnvelopeEncoder::EncodeStart(std::vector* out) { - assert(byte_size_pos_ == 0); - out->push_back(kInitialByteForEnvelope); - out->push_back(kInitialByteFor32BitLengthByteString); - byte_size_pos_ = out->size(); - out->resize(out->size() + sizeof(uint32_t)); -} - -bool EnvelopeEncoder::EncodeStop(std::vector* out) { - assert(byte_size_pos_ != 0); - // The byte size is the size of the payload, that is, all the - // bytes that were written past the byte size position itself. - uint64_t byte_size = out->size() - (byte_size_pos_ + sizeof(uint32_t)); - // We store exactly 4 bytes, so at most INT32MAX, with most significant - // byte first. - if (byte_size > std::numeric_limits::max()) return false; - for (int shift_bytes = sizeof(uint32_t) - 1; shift_bytes >= 0; - --shift_bytes) { - (*out)[byte_size_pos_++] = 0xff & (byte_size >> (shift_bytes * 8)); - } - return true; -} - -namespace { -class JSONToCBOREncoder : public JSONParserHandler { - public: - JSONToCBOREncoder(std::vector* out, Status* status) - : out_(out), status_(status) { - *status_ = Status(); - } - - void HandleObjectBegin() override { - envelopes_.emplace_back(); - envelopes_.back().EncodeStart(out_); - out_->push_back(kInitialByteIndefiniteLengthMap); - } - - void HandleObjectEnd() override { - out_->push_back(kStopByte); - assert(!envelopes_.empty()); - envelopes_.back().EncodeStop(out_); - envelopes_.pop_back(); - } - - void HandleArrayBegin() override { - envelopes_.emplace_back(); - envelopes_.back().EncodeStart(out_); - out_->push_back(kInitialByteIndefiniteLengthArray); - } - - void HandleArrayEnd() override { - out_->push_back(kStopByte); - assert(!envelopes_.empty()); - envelopes_.back().EncodeStop(out_); - envelopes_.pop_back(); - } - - void HandleString16(std::vector chars) override { - for (uint16_t ch : chars) { - if (ch >= 0x7f) { - // If there's at least one non-7bit character, we encode as UTF16. - EncodeString16(span(chars.data(), chars.size()), out_); - return; - } - } - std::vector sevenbit_chars(chars.begin(), chars.end()); - EncodeString8(span(sevenbit_chars.data(), sevenbit_chars.size()), - out_); - } - - void HandleBinary(std::vector bytes) override { - EncodeBinary(span(bytes.data(), bytes.size()), out_); - } - - void HandleDouble(double value) override { EncodeDouble(value, out_); } - - void HandleInt32(int32_t value) override { EncodeInt32(value, out_); } - - void HandleBool(bool value) override { - // See RFC 7049 Section 2.3, Table 2. - out_->push_back(value ? kEncodedTrue : kEncodedFalse); - } - - void HandleNull() override { - // See RFC 7049 Section 2.3, Table 2. - out_->push_back(kEncodedNull); - } - - void HandleError(Status error) override { - assert(!error.ok()); - *status_ = error; - out_->clear(); - } - - private: - std::vector* out_; - std::vector envelopes_; - Status* status_; -}; -} // namespace - -std::unique_ptr NewJSONToCBOREncoder( - std::vector* out, Status* status) { - return std::unique_ptr(new JSONToCBOREncoder(out, status)); -} - -namespace { -// Below are three parsing routines for CBOR, which cover enough -// to roundtrip JSON messages. -bool ParseMap(int32_t stack_depth, CBORTokenizer* tokenizer, - JSONParserHandler* out); -bool ParseArray(int32_t stack_depth, CBORTokenizer* tokenizer, - JSONParserHandler* out); -bool ParseValue(int32_t stack_depth, CBORTokenizer* tokenizer, - JSONParserHandler* out); - -void ParseUTF16String(CBORTokenizer* tokenizer, JSONParserHandler* out) { - std::vector value; - span rep = tokenizer->GetString16WireRep(); - for (std::ptrdiff_t ii = 0; ii < rep.size(); ii += 2) - value.push_back((rep[ii + 1] << 8) | rep[ii]); - out->HandleString16(std::move(value)); - tokenizer->Next(); -} - -// For now this method only covers US-ASCII. Later, we may allow UTF8. -bool ParseASCIIString(CBORTokenizer* tokenizer, JSONParserHandler* out) { - assert(tokenizer->TokenTag() == CBORTokenTag::STRING8); - std::vector value16; - for (uint8_t ch : tokenizer->GetString8()) { - // We only accept us-ascii (7 bit) strings here. Other strings must - // be encoded with 16 bit (the BYTE_STRING case). - if (ch >= 0x7f) { - out->HandleError( - Status{Error::CBOR_STRING8_MUST_BE_7BIT, tokenizer->Status().pos}); - return false; - } - value16.push_back(ch); - } - out->HandleString16(std::move(value16)); - tokenizer->Next(); - return true; -} - -bool ParseValue(int32_t stack_depth, CBORTokenizer* tokenizer, - JSONParserHandler* out) { - if (stack_depth > kStackLimit) { - out->HandleError( - Status{Error::CBOR_STACK_LIMIT_EXCEEDED, tokenizer->Status().pos}); - return false; - } - // Skip past the envelope to get to what's inside. - if (tokenizer->TokenTag() == CBORTokenTag::ENVELOPE) - tokenizer->EnterEnvelope(); - switch (tokenizer->TokenTag()) { - case CBORTokenTag::ERROR_VALUE: - out->HandleError(tokenizer->Status()); - return false; - case CBORTokenTag::DONE: - out->HandleError(Status{Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE, - tokenizer->Status().pos}); - return false; - case CBORTokenTag::TRUE_VALUE: - out->HandleBool(true); - tokenizer->Next(); - return true; - case CBORTokenTag::FALSE_VALUE: - out->HandleBool(false); - tokenizer->Next(); - return true; - case CBORTokenTag::NULL_VALUE: - out->HandleNull(); - tokenizer->Next(); - return true; - case CBORTokenTag::INT32: - out->HandleInt32(tokenizer->GetInt32()); - tokenizer->Next(); - return true; - case CBORTokenTag::DOUBLE: - out->HandleDouble(tokenizer->GetDouble()); - tokenizer->Next(); - return true; - case CBORTokenTag::STRING8: - return ParseASCIIString(tokenizer, out); - case CBORTokenTag::STRING16: - ParseUTF16String(tokenizer, out); - return true; - case CBORTokenTag::BINARY: { - span binary = tokenizer->GetBinary(); - out->HandleBinary(std::vector(binary.begin(), binary.end())); - tokenizer->Next(); - return true; - } - case CBORTokenTag::MAP_START: - return ParseMap(stack_depth + 1, tokenizer, out); - case CBORTokenTag::ARRAY_START: - return ParseArray(stack_depth + 1, tokenizer, out); - default: - out->HandleError( - Status{Error::CBOR_UNSUPPORTED_VALUE, tokenizer->Status().pos}); - return false; - } -} - -// |bytes| must start with the indefinite length array byte, so basically, -// ParseArray may only be called after an indefinite length array has been -// detected. -bool ParseArray(int32_t stack_depth, CBORTokenizer* tokenizer, - JSONParserHandler* out) { - assert(tokenizer->TokenTag() == CBORTokenTag::ARRAY_START); - tokenizer->Next(); - out->HandleArrayBegin(); - while (tokenizer->TokenTag() != CBORTokenTag::STOP) { - if (tokenizer->TokenTag() == CBORTokenTag::DONE) { - out->HandleError( - Status{Error::CBOR_UNEXPECTED_EOF_IN_ARRAY, tokenizer->Status().pos}); - return false; - } - if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) { - out->HandleError(tokenizer->Status()); - return false; - } - // Parse value. - if (!ParseValue(stack_depth, tokenizer, out)) return false; - } - out->HandleArrayEnd(); - tokenizer->Next(); - return true; -} - -// |bytes| must start with the indefinite length array byte, so basically, -// ParseArray may only be called after an indefinite length array has been -// detected. -bool ParseMap(int32_t stack_depth, CBORTokenizer* tokenizer, - JSONParserHandler* out) { - assert(tokenizer->TokenTag() == CBORTokenTag::MAP_START); - out->HandleObjectBegin(); - tokenizer->Next(); - while (tokenizer->TokenTag() != CBORTokenTag::STOP) { - if (tokenizer->TokenTag() == CBORTokenTag::DONE) { - out->HandleError( - Status{Error::CBOR_UNEXPECTED_EOF_IN_MAP, tokenizer->Status().pos}); - return false; - } - if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) { - out->HandleError(tokenizer->Status()); - return false; - } - // Parse key. - if (tokenizer->TokenTag() == CBORTokenTag::STRING8) { - if (!ParseASCIIString(tokenizer, out)) return false; - } else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) { - ParseUTF16String(tokenizer, out); - } else { - out->HandleError( - Status{Error::CBOR_INVALID_MAP_KEY, tokenizer->Status().pos}); - return false; - } - // Parse value. - if (!ParseValue(stack_depth, tokenizer, out)) return false; - } - out->HandleObjectEnd(); - tokenizer->Next(); - return true; -} -} // namespace - -void ParseCBOR(span bytes, JSONParserHandler* json_out) { - if (bytes.empty()) { - json_out->HandleError(Status{Error::CBOR_NO_INPUT, 0}); - return; - } - if (bytes[0] != kInitialByteForEnvelope) { - json_out->HandleError(Status{Error::CBOR_INVALID_START_BYTE, 0}); - return; - } - CBORTokenizer tokenizer(bytes); - if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) { - json_out->HandleError(tokenizer.Status()); - return; - } - // We checked for the envelope start byte above, so the tokenizer - // must agree here, since it's not an error. - assert(tokenizer.TokenTag() == CBORTokenTag::ENVELOPE); - tokenizer.EnterEnvelope(); - if (tokenizer.TokenTag() != CBORTokenTag::MAP_START) { - json_out->HandleError( - Status{Error::CBOR_MAP_START_EXPECTED, tokenizer.Status().pos}); - return; - } - if (!ParseMap(/*stack_depth=*/1, &tokenizer, json_out)) return; - if (tokenizer.TokenTag() == CBORTokenTag::DONE) return; - if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) { - json_out->HandleError(tokenizer.Status()); - return; - } - json_out->HandleError( - Status{Error::CBOR_TRAILING_JUNK, tokenizer.Status().pos}); -} - -CBORTokenizer::CBORTokenizer(span bytes) : bytes_(bytes) { - ReadNextToken(/*enter_envelope=*/false); -} -CBORTokenizer::~CBORTokenizer() {} - -CBORTokenTag CBORTokenizer::TokenTag() const { return token_tag_; } - -void CBORTokenizer::Next() { - if (token_tag_ == CBORTokenTag::ERROR_VALUE || token_tag_ == CBORTokenTag::DONE) - return; - ReadNextToken(/*enter_envelope=*/false); -} - -void CBORTokenizer::EnterEnvelope() { - assert(token_tag_ == CBORTokenTag::ENVELOPE); - ReadNextToken(/*enter_envelope=*/true); -} - -Status CBORTokenizer::Status() const { return status_; } - -int32_t CBORTokenizer::GetInt32() const { - assert(token_tag_ == CBORTokenTag::INT32); - // The range checks happen in ::ReadNextToken(). - return static_cast( - token_start_type_ == MajorType::UNSIGNED - ? token_start_internal_value_ - : -static_cast(token_start_internal_value_) - 1); -} - -double CBORTokenizer::GetDouble() const { - assert(token_tag_ == CBORTokenTag::DOUBLE); - union { - uint64_t from_uint64; - double to_double; - } reinterpret; - reinterpret.from_uint64 = ReadBytesMostSignificantByteFirst( - bytes_.subspan(status_.pos + 1)); - return reinterpret.to_double; -} - -span CBORTokenizer::GetString8() const { - assert(token_tag_ == CBORTokenTag::STRING8); - auto length = static_cast(token_start_internal_value_); - return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); -} - -span CBORTokenizer::GetString16WireRep() const { - assert(token_tag_ == CBORTokenTag::STRING16); - auto length = static_cast(token_start_internal_value_); - return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); -} - -span CBORTokenizer::GetBinary() const { - assert(token_tag_ == CBORTokenTag::BINARY); - auto length = static_cast(token_start_internal_value_); - return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); -} - -void CBORTokenizer::ReadNextToken(bool enter_envelope) { - if (enter_envelope) { - status_.pos += kEncodedEnvelopeHeaderSize; - } else { - status_.pos = - status_.pos == Status::npos() ? 0 : status_.pos + token_byte_length_; - } - status_.error = Error::OK; - if (status_.pos >= bytes_.size()) { - token_tag_ = CBORTokenTag::DONE; - return; - } - switch (bytes_[status_.pos]) { - case kStopByte: - SetToken(CBORTokenTag::STOP, 1); - return; - case kInitialByteIndefiniteLengthMap: - SetToken(CBORTokenTag::MAP_START, 1); - return; - case kInitialByteIndefiniteLengthArray: - SetToken(CBORTokenTag::ARRAY_START, 1); - return; - case kEncodedTrue: - SetToken(CBORTokenTag::TRUE_VALUE, 1); - return; - case kEncodedFalse: - SetToken(CBORTokenTag::FALSE_VALUE, 1); - return; - case kEncodedNull: - SetToken(CBORTokenTag::NULL_VALUE, 1); - return; - case kExpectedConversionToBase64Tag: { // BINARY - int8_t bytes_read = - ReadTokenStart(bytes_.subspan(status_.pos + 1), &token_start_type_, - &token_start_internal_value_); - int64_t token_byte_length = 1 + bytes_read + token_start_internal_value_; - if (-1 == bytes_read || token_start_type_ != MajorType::BYTE_STRING || - status_.pos + token_byte_length > bytes_.size()) { - SetError(Error::CBOR_INVALID_BINARY); - return; - } - SetToken(CBORTokenTag::BINARY, - static_cast(token_byte_length)); - return; - } - case kInitialByteForDouble: { // DOUBLE - if (status_.pos + kEncodedDoubleSize > bytes_.size()) { - SetError(Error::CBOR_INVALID_DOUBLE); - return; - } - SetToken(CBORTokenTag::DOUBLE, kEncodedDoubleSize); - return; - } - case kInitialByteForEnvelope: { // ENVELOPE - if (status_.pos + kEncodedEnvelopeHeaderSize > bytes_.size()) { - SetError(Error::CBOR_INVALID_ENVELOPE); - return; - } - // The envelope must be a byte string with 32 bit length. - if (bytes_[status_.pos + 1] != kInitialByteFor32BitLengthByteString) { - SetError(Error::CBOR_INVALID_ENVELOPE); - return; - } - // Read the length of the byte string. - token_start_internal_value_ = ReadBytesMostSignificantByteFirst( - bytes_.subspan(status_.pos + 2)); - // Make sure the payload is contained within the message. - if (token_start_internal_value_ + kEncodedEnvelopeHeaderSize + - status_.pos > - static_cast(bytes_.size())) { - SetError(Error::CBOR_INVALID_ENVELOPE); - return; - } - auto length = static_cast(token_start_internal_value_); - SetToken(CBORTokenTag::ENVELOPE, - kEncodedEnvelopeHeaderSize + length); - return; - } - default: { - span remainder = - bytes_.subspan(status_.pos, bytes_.size() - status_.pos); - assert(!remainder.empty()); - int8_t token_start_length = ReadTokenStart(remainder, &token_start_type_, - &token_start_internal_value_); - bool success = token_start_length != -1; - switch (token_start_type_) { - case MajorType::UNSIGNED: // INT32. - if (!success || std::numeric_limits::max() < - token_start_internal_value_) { - SetError(Error::CBOR_INVALID_INT32); - return; - } - SetToken(CBORTokenTag::INT32, token_start_length); - return; - case MajorType::NEGATIVE: // INT32. - if (!success || - std::numeric_limits::min() > - -static_cast(token_start_internal_value_) - 1) { - SetError(Error::CBOR_INVALID_INT32); - return; - } - SetToken(CBORTokenTag::INT32, token_start_length); - return; - case MajorType::STRING: { // STRING8. - if (!success || remainder.size() < static_cast( - token_start_internal_value_)) { - SetError(Error::CBOR_INVALID_STRING8); - return; - } - auto length = static_cast(token_start_internal_value_); - SetToken(CBORTokenTag::STRING8, token_start_length + length); - return; - } - case MajorType::BYTE_STRING: { // STRING16. - if (!success || - remainder.size() < - static_cast(token_start_internal_value_) || - // Must be divisible by 2 since UTF16 is 2 bytes per character. - token_start_internal_value_ & 1) { - SetError(Error::CBOR_INVALID_STRING16); - return; - } - auto length = static_cast(token_start_internal_value_); - SetToken(CBORTokenTag::STRING16, token_start_length + length); - return; - } - case MajorType::ARRAY: - case MajorType::MAP: - case MajorType::TAG: - case MajorType::SIMPLE_VALUE: - SetError(Error::CBOR_UNSUPPORTED_VALUE); - return; - } - } - } -} - -void CBORTokenizer::SetToken(CBORTokenTag token_tag, - std::ptrdiff_t token_byte_length) { - token_tag_ = token_tag; - token_byte_length_ = token_byte_length; -} - -void CBORTokenizer::SetError(Error error) { - token_tag_ = CBORTokenTag::ERROR_VALUE; - status_.error = error; -} - -#if 0 -void DumpCBOR(span cbor) { - std::string indent; - CBORTokenizer tokenizer(cbor); - while (true) { - fprintf(stderr, "%s", indent.c_str()); - switch (tokenizer.TokenTag()) { - case CBORTokenTag::ERROR_VALUE: - fprintf(stderr, "ERROR {status.error=%d, status.pos=%ld}\n", - tokenizer.Status().error, tokenizer.Status().pos); - return; - case CBORTokenTag::DONE: - fprintf(stderr, "DONE\n"); - return; - case CBORTokenTag::TRUE_VALUE: - fprintf(stderr, "TRUE_VALUE\n"); - break; - case CBORTokenTag::FALSE_VALUE: - fprintf(stderr, "FALSE_VALUE\n"); - break; - case CBORTokenTag::NULL_VALUE: - fprintf(stderr, "NULL_VALUE\n"); - break; - case CBORTokenTag::INT32: - fprintf(stderr, "INT32 [%d]\n", tokenizer.GetInt32()); - break; - case CBORTokenTag::DOUBLE: - fprintf(stderr, "DOUBLE [%lf]\n", tokenizer.GetDouble()); - break; - case CBORTokenTag::STRING8: { - span v = tokenizer.GetString8(); - std::string t(v.begin(), v.end()); - fprintf(stderr, "STRING8 [%s]\n", t.c_str()); - break; - } - case CBORTokenTag::STRING16: { - span v = tokenizer.GetString16WireRep(); - std::string t(v.begin(), v.end()); - fprintf(stderr, "STRING16 [%s]\n", t.c_str()); - break; - } - case CBORTokenTag::BINARY: { - span v = tokenizer.GetBinary(); - std::string t(v.begin(), v.end()); - fprintf(stderr, "BINARY [%s]\n", t.c_str()); - break; - } - case CBORTokenTag::MAP_START: - fprintf(stderr, "MAP_START\n"); - indent += " "; - break; - case CBORTokenTag::ARRAY_START: - fprintf(stderr, "ARRAY_START\n"); - indent += " "; - break; - case CBORTokenTag::STOP: - fprintf(stderr, "STOP\n"); - indent.erase(0, 2); - break; - case CBORTokenTag::ENVELOPE: - fprintf(stderr, "ENVELOPE\n"); - tokenizer.EnterEnvelope(); - continue; - } - tokenizer.Next(); - } -} -#endif - - -{% for namespace in config.protocol.namespace %} -} // namespace {{namespace}} -{% endfor %} diff --git a/tools/inspector_protocol/lib/CBOR_h.template b/tools/inspector_protocol/lib/CBOR_h.template deleted file mode 100644 index dd637f19e7..0000000000 --- a/tools/inspector_protocol/lib/CBOR_h.template +++ /dev/null @@ -1,416 +0,0 @@ -{# This template is generated by gen_cbor_templates.py. #} -// Generated by lib/CBOR_h.template. - -// Copyright 2019 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef {{"_".join(config.protocol.namespace)}}_CBOR_h -#define {{"_".join(config.protocol.namespace)}}_CBOR_h - -#include -#include -#include -#include - -{% for namespace in config.protocol.namespace %} -namespace {{namespace}} { -{% endfor %} - -// ===== encoding/status.h ===== - -// Error codes. -enum class Error { - OK = 0, - // JSON parsing errors - json_parser.{h,cc}. - JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 0x01, - JSON_PARSER_STACK_LIMIT_EXCEEDED = 0x02, - JSON_PARSER_NO_INPUT = 0x03, - JSON_PARSER_INVALID_TOKEN = 0x04, - JSON_PARSER_INVALID_NUMBER = 0x05, - JSON_PARSER_INVALID_STRING = 0x06, - JSON_PARSER_UNEXPECTED_ARRAY_END = 0x07, - JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 0x08, - JSON_PARSER_STRING_LITERAL_EXPECTED = 0x09, - JSON_PARSER_COLON_EXPECTED = 0x0a, - JSON_PARSER_UNEXPECTED_OBJECT_END = 0x0b, - JSON_PARSER_COMMA_OR_OBJECT_END_EXPECTED = 0x0c, - JSON_PARSER_VALUE_EXPECTED = 0x0d, - - CBOR_INVALID_INT32 = 0x0e, - CBOR_INVALID_DOUBLE = 0x0f, - CBOR_INVALID_ENVELOPE = 0x10, - CBOR_INVALID_STRING8 = 0x11, - CBOR_INVALID_STRING16 = 0x12, - CBOR_INVALID_BINARY = 0x13, - CBOR_UNSUPPORTED_VALUE = 0x14, - CBOR_NO_INPUT = 0x15, - CBOR_INVALID_START_BYTE = 0x16, - CBOR_UNEXPECTED_EOF_EXPECTED_VALUE = 0x17, - CBOR_UNEXPECTED_EOF_IN_ARRAY = 0x18, - CBOR_UNEXPECTED_EOF_IN_MAP = 0x19, - CBOR_INVALID_MAP_KEY = 0x1a, - CBOR_STACK_LIMIT_EXCEEDED = 0x1b, - CBOR_STRING8_MUST_BE_7BIT = 0x1c, - CBOR_TRAILING_JUNK = 0x1d, - CBOR_MAP_START_EXPECTED = 0x1e, -}; - -// A status value with position that can be copied. The default status -// is OK. Usually, error status values should come with a valid position. -struct Status { - static constexpr std::ptrdiff_t npos() { return -1; } - - bool ok() const { return error == Error::OK; } - - Error error = Error::OK; - std::ptrdiff_t pos = npos(); - Status(Error error, std::ptrdiff_t pos) : error(error), pos(pos) {} - Status() = default; -}; - -// ===== encoding/span.h ===== - -// This template is similar to std::span, which will be included in C++20. Like -// std::span it uses ptrdiff_t, which is signed (and thus a bit annoying -// sometimes when comparing with size_t), but other than this it's much simpler. -template -class span { - public: - using index_type = std::ptrdiff_t; - - span() : data_(nullptr), size_(0) {} - span(const T* data, index_type size) : data_(data), size_(size) {} - - const T* data() const { return data_; } - - const T* begin() const { return data_; } - const T* end() const { return data_ + size_; } - - const T& operator[](index_type idx) const { return data_[idx]; } - - span subspan(index_type offset, index_type count) const { - return span(data_ + offset, count); - } - - span subspan(index_type offset) const { - return span(data_ + offset, size_ - offset); - } - - bool empty() const { return size_ == 0; } - - index_type size() const { return size_; } - index_type size_bytes() const { return size_ * sizeof(T); } - - private: - const T* data_; - index_type size_; -}; - -// ===== encoding/json_parser_handler.h ===== - -// Handler interface for JSON parser events. See also json_parser.h. -class JSONParserHandler { - public: - virtual ~JSONParserHandler() = default; - virtual void HandleObjectBegin() = 0; - virtual void HandleObjectEnd() = 0; - virtual void HandleArrayBegin() = 0; - virtual void HandleArrayEnd() = 0; - // TODO(johannes): Support utf8 (requires utf16->utf8 conversion - // internally, including handling mismatched surrogate pairs). - virtual void HandleString16(std::vector chars) = 0; - virtual void HandleBinary(std::vector bytes) = 0; - virtual void HandleDouble(double value) = 0; - virtual void HandleInt32(int32_t value) = 0; - virtual void HandleBool(bool value) = 0; - virtual void HandleNull() = 0; - - // The parser may send one error even after other events have already - // been received. Client code is reponsible to then discard the - // already processed events. - // |error| must be an eror, as in, |error.is_ok()| can't be true. - virtual void HandleError(Status error) = 0; -}; - -// ===== encoding/cbor_internals.h ===== - -namespace cbor { -enum class MajorType; -} - -namespace cbor_internals { - -// Reads the start of a token with definitive size from |bytes|. -// |type| is the major type as specified in RFC 7049 Section 2.1. -// |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size -// (e.g. for BYTE_STRING). -// If successful, returns the number of bytes read. Otherwise returns -1. -int8_t ReadTokenStart(span bytes, cbor::MajorType* type, - uint64_t* value); - -// Writes the start of a token with |type|. The |value| may indicate the size, -// or it may be the payload if the value is an unsigned integer. -void WriteTokenStart(cbor::MajorType type, uint64_t value, - std::vector* encoded); -} // namespace cbor_internals - -// ===== encoding/cbor.h ===== - - -namespace cbor { - -// The major types from RFC 7049 Section 2.1. -enum class MajorType { - UNSIGNED = 0, - NEGATIVE = 1, - BYTE_STRING = 2, - STRING = 3, - ARRAY = 4, - MAP = 5, - TAG = 6, - SIMPLE_VALUE = 7 -}; - -// Indicates the number of bits the "initial byte" needs to be shifted to the -// right after applying |kMajorTypeMask| to produce the major type in the -// lowermost bits. -static constexpr uint8_t kMajorTypeBitShift = 5u; -// Mask selecting the low-order 5 bits of the "initial byte", which is where -// the additional information is encoded. -static constexpr uint8_t kAdditionalInformationMask = 0x1f; -// Mask selecting the high-order 3 bits of the "initial byte", which indicates -// the major type of the encoded value. -static constexpr uint8_t kMajorTypeMask = 0xe0; -// Indicates the integer is in the following byte. -static constexpr uint8_t kAdditionalInformation1Byte = 24u; -// Indicates the integer is in the next 2 bytes. -static constexpr uint8_t kAdditionalInformation2Bytes = 25u; -// Indicates the integer is in the next 4 bytes. -static constexpr uint8_t kAdditionalInformation4Bytes = 26u; -// Indicates the integer is in the next 8 bytes. -static constexpr uint8_t kAdditionalInformation8Bytes = 27u; - -// Encodes the initial byte, consisting of the |type| in the first 3 bits -// followed by 5 bits of |additional_info|. -constexpr uint8_t EncodeInitialByte(MajorType type, uint8_t additional_info) { - return (static_cast(type) << kMajorTypeBitShift) | - (additional_info & kAdditionalInformationMask); -} - -// TAG 24 indicates that what follows is a byte string which is -// encoded in CBOR format. We use this as a wrapper for -// maps and arrays, allowing us to skip them, because the -// byte string carries its size (byte length). -// https://tools.ietf.org/html/rfc7049#section-2.4.4.1 -static constexpr uint8_t kInitialByteForEnvelope = - EncodeInitialByte(MajorType::TAG, 24); -// The initial byte for a byte string with at most 2^32 bytes -// of payload. This is used for envelope encoding, even if -// the byte string is shorter. -static constexpr uint8_t kInitialByteFor32BitLengthByteString = - EncodeInitialByte(MajorType::BYTE_STRING, 26); - -// See RFC 7049 Section 2.2.1, indefinite length arrays / maps have additional -// info = 31. -static constexpr uint8_t kInitialByteIndefiniteLengthArray = - EncodeInitialByte(MajorType::ARRAY, 31); -static constexpr uint8_t kInitialByteIndefiniteLengthMap = - EncodeInitialByte(MajorType::MAP, 31); -// See RFC 7049 Section 2.3, Table 1; this is used for finishing indefinite -// length maps / arrays. -static constexpr uint8_t kStopByte = - EncodeInitialByte(MajorType::SIMPLE_VALUE, 31); - -} // namespace cbor - -// The binary encoding for the inspector protocol follows the CBOR specification -// (RFC 7049). Additional constraints: -// - Only indefinite length maps and arrays are supported. -// - Maps and arrays are wrapped with an envelope, that is, a -// CBOR tag with value 24 followed by a byte string specifying -// the byte length of the enclosed map / array. The byte string -// must use a 32 bit wide length. -// - At the top level, a message must be an indefinite length map -// wrapped by an envelope. -// - Maximal size for messages is 2^32 (4 GB). -// - For scalars, we support only the int32_t range, encoded as -// UNSIGNED/NEGATIVE (major types 0 / 1). -// - UTF16 strings, including with unbalanced surrogate pairs, are encoded -// as CBOR BYTE_STRING (major type 2). For such strings, the number of -// bytes encoded must be even. -// - UTF8 strings (major type 3) may only have ASCII characters -// (7 bit US-ASCII). -// - Arbitrary byte arrays, in the inspector protocol called 'binary', -// are encoded as BYTE_STRING (major type 2), prefixed with a byte -// indicating base64 when rendered as JSON. - -// Encodes |value| as |UNSIGNED| (major type 0) iff >= 0, or |NEGATIVE| -// (major type 1) iff < 0. -void EncodeInt32(int32_t value, std::vector* out); - -// Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16 -// character in |in| is emitted with most significant byte first, -// appending to |out|. -void EncodeString16(span in, std::vector* out); - -// Encodes a UTF8 string |in| as STRING (major type 3). -void EncodeString8(span in, std::vector* out); - -// Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with -// definitive length, prefixed with tag 22 indicating expected conversion to -// base64 (see RFC 7049, Table 3 and Section 2.4.4.2). -void EncodeBinary(span in, std::vector* out); - -// Encodes / decodes a double as Major type 7 (SIMPLE_VALUE), -// with additional info = 27, followed by 8 bytes in big endian. -void EncodeDouble(double value, std::vector* out); - -// Some constants for CBOR tokens that only take a single byte on the wire. -uint8_t EncodeTrue(); -uint8_t EncodeFalse(); -uint8_t EncodeNull(); -uint8_t EncodeIndefiniteLengthArrayStart(); -uint8_t EncodeIndefiniteLengthMapStart(); -uint8_t EncodeStop(); - -// An envelope indicates the byte length of a wrapped item. -// We use this for maps and array, which allows the decoder -// to skip such (nested) values whole sale. -// It's implemented as a CBOR tag (major type 6) with additional -// info = 24, followed by a byte string with a 32 bit length value; -// so the maximal structure that we can wrap is 2^32 bits long. -// See also: https://tools.ietf.org/html/rfc7049#section-2.4.4.1 -class EnvelopeEncoder { - public: - // Emits the envelope start bytes and records the position for the - // byte size in |byte_size_pos_|. Also emits empty bytes for the - // byte sisze so that encoding can continue. - void EncodeStart(std::vector* out); - // This records the current size in |out| at position byte_size_pos_. - // Returns true iff successful. - bool EncodeStop(std::vector* out); - - private: - std::size_t byte_size_pos_ = 0; -}; - -// This can be used to convert from JSON to CBOR, by passing the -// return value to the routines in json_parser.h. The handler will encode into -// |out|, and iff an error occurs it will set |status| to an error and clear -// |out|. Otherwise, |status.ok()| will be |true|. -std::unique_ptr NewJSONToCBOREncoder( - std::vector* out, Status* status); - -// Parses a CBOR encoded message from |bytes|, sending JSON events to -// |json_out|. If an error occurs, sends |out->HandleError|, and parsing stops. -// The client is responsible for discarding the already received information in -// that case. -void ParseCBOR(span bytes, JSONParserHandler* json_out); - -// Tags for the tokens within a CBOR message that CBORStream understands. -// Note that this is not the same terminology as the CBOR spec (RFC 7049), -// but rather, our adaptation. For instance, we lump unsigned and signed -// major type into INT32 here (and disallow values outside the int32_t range). -enum class CBORTokenTag { - // Encountered an error in the structure of the message. Consult - // status() for details. - ERROR_VALUE, - // Booleans and NULL. - TRUE_VALUE, - FALSE_VALUE, - NULL_VALUE, - // An int32_t (signed 32 bit integer). - INT32, - // A double (64 bit floating point). - DOUBLE, - // A UTF8 string. - STRING8, - // A UTF16 string. - STRING16, - // A binary string. - BINARY, - // Starts an indefinite length map; after the map start we expect - // alternating keys and values, followed by STOP. - MAP_START, - // Starts an indefinite length array; after the array start we - // expect values, followed by STOP. - ARRAY_START, - // Ends a map or an array. - STOP, - // An envelope indicator, wrapping a map or array. - // Internally this carries the byte length of the wrapped - // map or array. While CBORTokenizer::Next() will read / skip the entire - // envelope, CBORTokenizer::EnterEnvelope() reads the tokens - // inside of it. - ENVELOPE, - // We've reached the end there is nothing else to read. - DONE, -}; - -// CBORTokenizer segments a CBOR message, presenting the tokens therein as -// numbers, strings, etc. This is not a complete CBOR parser, but makes it much -// easier to implement one (e.g. ParseCBOR, above). It can also be used to parse -// messages partially. -class CBORTokenizer { - public: - explicit CBORTokenizer(span bytes); - ~CBORTokenizer(); - - // Identifies the current token that we're looking at, - // or ERROR_VALUE (in which ase ::Status() has details) - // or DONE (if we're past the last token). - CBORTokenTag TokenTag() const; - - // Advances to the next token. - void Next(); - // Can only be called if TokenTag() == CBORTokenTag::ENVELOPE. - // While Next() would skip past the entire envelope / what it's - // wrapping, EnterEnvelope positions the cursor inside of the envelope, - // letting the client explore the nested structure. - void EnterEnvelope(); - - // If TokenTag() is CBORTokenTag::ERROR_VALUE, then Status().error describes - // the error more precisely; otherwise it'll be set to Error::OK. - // In either case, Status().pos is the current position. - struct Status Status() const; - - // The following methods retrieve the token values. They can only - // be called if TokenTag() matches. - - // To be called only if ::TokenTag() == CBORTokenTag::INT32. - int32_t GetInt32() const; - - // To be called only if ::TokenTag() == CBORTokenTag::DOUBLE. - double GetDouble() const; - - // To be called only if ::TokenTag() == CBORTokenTag::STRING8. - span GetString8() const; - - // Wire representation for STRING16 is low byte first (little endian). - // To be called only if ::TokenTag() == CBORTokenTag::STRING16. - span GetString16WireRep() const; - - // To be called only if ::TokenTag() == CBORTokenTag::BINARY. - span GetBinary() const; - - private: - void ReadNextToken(bool enter_envelope); - void SetToken(CBORTokenTag token, std::ptrdiff_t token_byte_length); - void SetError(Error error); - - span bytes_; - CBORTokenTag token_tag_; - struct Status status_; - std::ptrdiff_t token_byte_length_; - cbor::MajorType token_start_type_; - uint64_t token_start_internal_value_; -}; - -void DumpCBOR(span cbor); - - -{% for namespace in config.protocol.namespace %} -} // namespace {{namespace}} -{% endfor %} -#endif // !defined({{"_".join(config.protocol.namespace)}}_CBOR_h) diff --git a/tools/inspector_protocol/lib/Collections_h.template b/tools/inspector_protocol/lib/Collections_h.template deleted file mode 100644 index 7505a17bfa..0000000000 --- a/tools/inspector_protocol/lib/Collections_h.template +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2016 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef {{"_".join(config.protocol.namespace)}}_Collections_h -#define {{"_".join(config.protocol.namespace)}}_Collections_h - -#include {{format_include(config.protocol.package, "Forward")}} -#include - -#if defined(__APPLE__) && !defined(_LIBCPP_VERSION) -#include -#include - -{% for namespace in config.protocol.namespace %} -namespace {{namespace}} { -{% endfor %} - -template using HashMap = std::map; -template using HashSet = std::set; - -{% for namespace in config.protocol.namespace %} -} // namespace {{namespace}} -{% endfor %} - -#else -#include -#include - -{% for namespace in config.protocol.namespace %} -namespace {{namespace}} { -{% endfor %} - -template using HashMap = std::unordered_map; -template using HashSet = std::unordered_set; - -{% for namespace in config.protocol.namespace %} -} // namespace {{namespace}} -{% endfor %} - -#endif // defined(__APPLE__) && !defined(_LIBCPP_VERSION) - -#endif // !defined({{"_".join(config.protocol.namespace)}}_Collections_h) diff --git a/tools/inspector_protocol/lib/Values_cpp.template b/tools/inspector_protocol/lib/Values_cpp.template index 4b4ba99415..764b4d37d9 100644 --- a/tools/inspector_protocol/lib/Values_cpp.template +++ b/tools/inspector_protocol/lib/Values_cpp.template @@ -66,21 +66,21 @@ static constexpr int kStackLimitValues = 1000; // Below are three parsing routines for CBOR, which cover enough // to roundtrip JSON messages. -std::unique_ptr parseMap(int32_t stack_depth, CBORTokenizer* tokenizer); -std::unique_ptr parseArray(int32_t stack_depth, CBORTokenizer* tokenizer); -std::unique_ptr parseValue(int32_t stack_depth, CBORTokenizer* tokenizer); +std::unique_ptr parseMap(int32_t stack_depth, cbor::CBORTokenizer* tokenizer); +std::unique_ptr parseArray(int32_t stack_depth, cbor::CBORTokenizer* tokenizer); +std::unique_ptr parseValue(int32_t stack_depth, cbor::CBORTokenizer* tokenizer); // |bytes| must start with the indefinite length array byte, so basically, // ParseArray may only be called after an indefinite length array has been // detected. -std::unique_ptr parseArray(int32_t stack_depth, CBORTokenizer* tokenizer) { - DCHECK(tokenizer->TokenTag() == CBORTokenTag::ARRAY_START); +std::unique_ptr parseArray(int32_t stack_depth, cbor::CBORTokenizer* tokenizer) { + DCHECK(tokenizer->TokenTag() == cbor::CBORTokenTag::ARRAY_START); tokenizer->Next(); auto list = ListValue::create(); - while (tokenizer->TokenTag() != CBORTokenTag::STOP) { + while (tokenizer->TokenTag() != cbor::CBORTokenTag::STOP) { // Error::CBOR_UNEXPECTED_EOF_IN_ARRAY - if (tokenizer->TokenTag() == CBORTokenTag::DONE) return nullptr; - if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) return nullptr; + if (tokenizer->TokenTag() == cbor::CBORTokenTag::DONE) return nullptr; + if (tokenizer->TokenTag() == cbor::CBORTokenTag::ERROR_VALUE) return nullptr; // Parse value. auto value = parseValue(stack_depth, tokenizer); if (!value) return nullptr; @@ -91,60 +91,66 @@ std::unique_ptr parseArray(int32_t stack_depth, CBORTokenizer* tokeni } std::unique_ptr parseValue( - int32_t stack_depth, CBORTokenizer* tokenizer) { + int32_t stack_depth, cbor::CBORTokenizer* tokenizer) { // Error::CBOR_STACK_LIMIT_EXCEEDED if (stack_depth > kStackLimitValues) return nullptr; // Skip past the envelope to get to what's inside. - if (tokenizer->TokenTag() == CBORTokenTag::ENVELOPE) + if (tokenizer->TokenTag() == cbor::CBORTokenTag::ENVELOPE) tokenizer->EnterEnvelope(); switch (tokenizer->TokenTag()) { - case CBORTokenTag::ERROR_VALUE: + case cbor::CBORTokenTag::ERROR_VALUE: return nullptr; - case CBORTokenTag::DONE: + case cbor::CBORTokenTag::DONE: // Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE return nullptr; - case CBORTokenTag::TRUE_VALUE: { + case cbor::CBORTokenTag::TRUE_VALUE: { std::unique_ptr value = FundamentalValue::create(true); tokenizer->Next(); return value; } - case CBORTokenTag::FALSE_VALUE: { + case cbor::CBORTokenTag::FALSE_VALUE: { std::unique_ptr value = FundamentalValue::create(false); tokenizer->Next(); return value; } - case CBORTokenTag::NULL_VALUE: { + case cbor::CBORTokenTag::NULL_VALUE: { std::unique_ptr value = FundamentalValue::null(); tokenizer->Next(); return value; } - case CBORTokenTag::INT32: { + case cbor::CBORTokenTag::INT32: { std::unique_ptr value = FundamentalValue::create(tokenizer->GetInt32()); tokenizer->Next(); return value; } - case CBORTokenTag::DOUBLE: { + case cbor::CBORTokenTag::DOUBLE: { std::unique_ptr value = FundamentalValue::create(tokenizer->GetDouble()); tokenizer->Next(); return value; } - case CBORTokenTag::STRING8: { + case cbor::CBORTokenTag::STRING8: { span str = tokenizer->GetString8(); - std::unique_ptr value = StringValue::create(StringUtil::fromUTF8(str.data(), str.size())); + std::unique_ptr value = + StringValue::create(StringUtil::fromUTF8(str.data(), str.size())); tokenizer->Next(); return value; } - case CBORTokenTag::STRING16: - // NOT SUPPORTED YET. - return nullptr; - case CBORTokenTag::BINARY: { + case cbor::CBORTokenTag::STRING16: { + span wire = tokenizer->GetString16WireRep(); + DCHECK_EQ(wire.size() & 1, 0u); + std::unique_ptr value = StringValue::create(StringUtil::fromUTF16( + reinterpret_cast(wire.data()), wire.size() / 2)); + tokenizer->Next(); + return value; + } + case cbor::CBORTokenTag::BINARY: { span payload = tokenizer->GetBinary(); tokenizer->Next(); return BinaryValue::create(Binary::fromSpan(payload.data(), payload.size())); } - case CBORTokenTag::MAP_START: + case cbor::CBORTokenTag::MAP_START: return parseMap(stack_depth + 1, tokenizer); - case CBORTokenTag::ARRAY_START: + case cbor::CBORTokenTag::ARRAY_START: return parseArray(stack_depth + 1, tokenizer); default: // Error::CBOR_UNSUPPORTED_VALUE @@ -156,22 +162,22 @@ std::unique_ptr parseValue( // ParseArray may only be called after an indefinite length array has been // detected. std::unique_ptr parseMap( - int32_t stack_depth, CBORTokenizer* tokenizer) { + int32_t stack_depth, cbor::CBORTokenizer* tokenizer) { auto dict = DictionaryValue::create(); tokenizer->Next(); - while (tokenizer->TokenTag() != CBORTokenTag::STOP) { - if (tokenizer->TokenTag() == CBORTokenTag::DONE) { + while (tokenizer->TokenTag() != cbor::CBORTokenTag::STOP) { + if (tokenizer->TokenTag() == cbor::CBORTokenTag::DONE) { // Error::CBOR_UNEXPECTED_EOF_IN_MAP return nullptr; } - if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) return nullptr; + if (tokenizer->TokenTag() == cbor::CBORTokenTag::ERROR_VALUE) return nullptr; // Parse key. String key; - if (tokenizer->TokenTag() == CBORTokenTag::STRING8) { + if (tokenizer->TokenTag() == cbor::CBORTokenTag::STRING8) { span key_span = tokenizer->GetString8(); key = StringUtil::fromUTF8(key_span.data(), key_span.size()); tokenizer->Next(); - } else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) { + } else if (tokenizer->TokenTag() == cbor::CBORTokenTag::STRING16) { return nullptr; // STRING16 not supported yet. } else { // Error::CBOR_INVALID_MAP_KEY @@ -196,22 +202,21 @@ std::unique_ptr Value::parseBinary(const uint8_t* data, size_t size) { if (bytes.empty()) return nullptr; // Error::CBOR_INVALID_START_BYTE - // TODO(johannes): EncodeInitialByteForEnvelope() method. - if (bytes[0] != 0xd8) return nullptr; + if (bytes[0] != cbor::InitialByteForEnvelope()) return nullptr; - CBORTokenizer tokenizer(bytes); - if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) return nullptr; + cbor::CBORTokenizer tokenizer(bytes); + if (tokenizer.TokenTag() == cbor::CBORTokenTag::ERROR_VALUE) return nullptr; // We checked for the envelope start byte above, so the tokenizer // must agree here, since it's not an error. - DCHECK(tokenizer.TokenTag() == CBORTokenTag::ENVELOPE); + DCHECK(tokenizer.TokenTag() == cbor::CBORTokenTag::ENVELOPE); tokenizer.EnterEnvelope(); // Error::MAP_START_EXPECTED - if (tokenizer.TokenTag() != CBORTokenTag::MAP_START) return nullptr; + if (tokenizer.TokenTag() != cbor::CBORTokenTag::MAP_START) return nullptr; std::unique_ptr result = parseMap(/*stack_depth=*/1, &tokenizer); if (!result) return nullptr; - if (tokenizer.TokenTag() == CBORTokenTag::DONE) return result; - if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) return nullptr; + if (tokenizer.TokenTag() == cbor::CBORTokenTag::DONE) return result; + if (tokenizer.TokenTag() == cbor::CBORTokenTag::ERROR_VALUE) return nullptr; // Error::CBOR_TRAILING_JUNK return nullptr; } @@ -249,7 +254,7 @@ void Value::writeJSON(StringBuilder* output) const void Value::writeBinary(std::vector* bytes) const { DCHECK(m_type == TypeNull); - bytes->push_back(EncodeNull()); + bytes->push_back(cbor::EncodeNull()); } std::unique_ptr Value::clone() const @@ -326,13 +331,13 @@ void FundamentalValue::writeJSON(StringBuilder* output) const void FundamentalValue::writeBinary(std::vector* bytes) const { switch (type()) { case TypeDouble: - EncodeDouble(m_doubleValue, bytes); + cbor::EncodeDouble(m_doubleValue, bytes); return; case TypeInteger: - EncodeInt32(m_integerValue, bytes); + cbor::EncodeInt32(m_integerValue, bytes); return; case TypeBoolean: - bytes->push_back(m_boolValue ? EncodeTrue() : EncodeFalse()); + bytes->push_back(m_boolValue ? cbor::EncodeTrue() : cbor::EncodeFalse()); return; default: DCHECK(false); @@ -363,10 +368,37 @@ void StringValue::writeJSON(StringBuilder* output) const StringUtil::builderAppendQuotedString(*output, m_stringValue); } +namespace { +// This routine distinguishes between the current encoding for a given +// string |s|, and calls encoding routines that will +// - Ensure that all ASCII strings end up being encoded as UTF8 in +// the wire format - e.g., EncodeFromUTF16 will detect ASCII and +// do the (trivial) transcode to STRING8 on the wire, but if it's +// not ASCII it'll do STRING16. +// - Select a format that's cheap to convert to. E.g., we don't +// have LATIN1 on the wire, so we call EncodeFromLatin1 which +// transcodes to UTF8 if needed. +void EncodeString(const String& s, std::vector* out) { + if (StringUtil::CharacterCount(s) == 0) { + cbor::EncodeString8(span(nullptr, 0), out); // Empty string. + } else if (StringUtil::CharactersLatin1(s)) { + cbor::EncodeFromLatin1(span(StringUtil::CharactersLatin1(s), + StringUtil::CharacterCount(s)), + out); + } else if (StringUtil::CharactersUTF16(s)) { + cbor::EncodeFromUTF16(span(StringUtil::CharactersUTF16(s), + StringUtil::CharacterCount(s)), + out); + } else if (StringUtil::CharactersUTF8(s)) { + cbor::EncodeString8(span(StringUtil::CharactersUTF8(s), + StringUtil::CharacterCount(s)), + out); + } +} +} // namespace + void StringValue::writeBinary(std::vector* bytes) const { - StringUTF8Adapter utf8(m_stringValue); - EncodeString8(span(reinterpret_cast(utf8.Data()), - utf8.length()), bytes); + EncodeString(m_stringValue, bytes); } std::unique_ptr StringValue::clone() const @@ -387,7 +419,8 @@ void BinaryValue::writeJSON(StringBuilder* output) const } void BinaryValue::writeBinary(std::vector* bytes) const { - EncodeBinary(span(m_binaryValue.data(), m_binaryValue.size()), bytes); + cbor::EncodeBinary(span(m_binaryValue.data(), + m_binaryValue.size()), bytes); } std::unique_ptr BinaryValue::clone() const @@ -550,19 +583,17 @@ void DictionaryValue::writeJSON(StringBuilder* output) const } void DictionaryValue::writeBinary(std::vector* bytes) const { - EnvelopeEncoder encoder; + cbor::EnvelopeEncoder encoder; encoder.EncodeStart(bytes); - bytes->push_back(EncodeIndefiniteLengthMapStart()); + bytes->push_back(cbor::EncodeIndefiniteLengthMapStart()); for (size_t i = 0; i < m_order.size(); ++i) { const String& key = m_order[i]; Dictionary::const_iterator value = m_data.find(key); DCHECK(value != m_data.cend() && value->second); - StringUTF8Adapter utf8(key); - EncodeString8(span(reinterpret_cast(utf8.Data()), - utf8.length()), bytes); + EncodeString(key, bytes); value->second->writeBinary(bytes); } - bytes->push_back(EncodeStop()); + bytes->push_back(cbor::EncodeStop()); encoder.EncodeStop(bytes); } @@ -601,13 +632,13 @@ void ListValue::writeJSON(StringBuilder* output) const } void ListValue::writeBinary(std::vector* bytes) const { - EnvelopeEncoder encoder; + cbor::EnvelopeEncoder encoder; encoder.EncodeStart(bytes); - bytes->push_back(EncodeIndefiniteLengthArrayStart()); + bytes->push_back(cbor::EncodeIndefiniteLengthArrayStart()); for (size_t i = 0; i < m_data.size(); ++i) { m_data[i]->writeBinary(bytes); } - bytes->push_back(EncodeStop()); + bytes->push_back(cbor::EncodeStop()); encoder.EncodeStop(bytes); } diff --git a/tools/inspector_protocol/lib/base_string_adapter_cc.template b/tools/inspector_protocol/lib/base_string_adapter_cc.template index ed3316446f..639b39bb52 100644 --- a/tools/inspector_protocol/lib/base_string_adapter_cc.template +++ b/tools/inspector_protocol/lib/base_string_adapter_cc.template @@ -136,7 +136,7 @@ std::unique_ptr StringUtil::parseMessage( reinterpret_cast(message.data()), message.length()); } - std::unique_ptr value = base::JSONReader::Read(message); + std::unique_ptr value = base::JSONReader::ReadDeprecated(message); return toProtocolValue(value.get(), 1000); } @@ -185,6 +185,13 @@ void StringBuilder::reserveCapacity(size_t capacity) { string_.reserve(capacity); } +// static +String StringUtil::fromUTF16(const uint16_t* data, size_t length) { + std::string utf8; + base::UTF16ToUTF8(reinterpret_cast(data), length, &utf8); + return utf8; +} + Binary::Binary() : bytes_(new base::RefCountedBytes) {} Binary::Binary(const Binary& binary) : bytes_(binary.bytes_) {} Binary::Binary(scoped_refptr bytes) : bytes_(bytes) {} @@ -230,75 +237,6 @@ Binary Binary::fromSpan(const uint8_t* data, size_t size) { new base::RefCountedBytes(data, size))); } -namespace { -int32_t ReadEnvelopeSize(const uint8_t* in) { - return (in[0] << 24) + (in[1] << 16) + (in[2] << 8) + in[3]; -} - -void WriteEnvelopeSize(uint32_t value, uint8_t* out) { - *(out++) = (value >> 24) & 0xFF; - *(out++) = (value >> 16) & 0xFF; - *(out++) = (value >> 8) & 0xFF; - *(out++) = (value) & 0xFF; -} - -} - -bool AppendStringValueToMapBinary(base::StringPiece in, - base::StringPiece key, base::StringPiece value, std::string* out) { - if (in.size() < 1 + 1 + 4 + 1 + 1) - return false; - const uint8_t* envelope = reinterpret_cast(in.data()); - if (cbor::kInitialByteForEnvelope != envelope[0]) - return false; - if (cbor::kInitialByteFor32BitLengthByteString != envelope[1]) - return false; - if (cbor::kInitialByteIndefiniteLengthMap != envelope[6]) - return false; - - uint32_t envelope_size = ReadEnvelopeSize(envelope + 2); - if (envelope_size + 2 + 4 != in.size()) - return false; - if (cbor::kStopByte != static_cast(*in.rbegin())) - return false; - - std::vector encoded_entry; - encoded_entry.reserve(1 + 4 + key.size() + 1 + 4 + value.size()); - span key_span( - reinterpret_cast(key.data()), key.size()); - EncodeString8(key_span, &encoded_entry); - span value_span( - reinterpret_cast(value.data()), value.size()); - EncodeString8(value_span, &encoded_entry); - - out->clear(); - out->reserve(in.size() + encoded_entry.size()); - out->append(in.begin(), in.end() - 1); - out->append(reinterpret_cast(encoded_entry.data()), - encoded_entry.size()); - out->append(1, static_cast(cbor::kStopByte)); - std::size_t new_size = envelope_size + out->size() - in.size(); - if (new_size > static_cast( - std::numeric_limits::max())) { - return false; - } - WriteEnvelopeSize(new_size, reinterpret_cast(&*out->begin() + 2)); - return true; -} - -bool AppendStringValueToMapJSON(base::StringPiece in, - base::StringPiece key, base::StringPiece value, std::string* out) { - if (!in.length() || *in.rbegin() != '}') - return false; - std::string suffix = - base::StringPrintf(", \"%s\": \"%s\"}", key.begin(), value.begin()); - out->clear(); - out->reserve(in.length() + suffix.length() - 1); - out->append(in.data(), in.length() - 1); - out->append(suffix); - return true; -} - {% for namespace in config.protocol.namespace %} } // namespace {{namespace}} {% endfor %} diff --git a/tools/inspector_protocol/lib/base_string_adapter_h.template b/tools/inspector_protocol/lib/base_string_adapter_h.template index b0215e0745..8bf3c355c0 100644 --- a/tools/inspector_protocol/lib/base_string_adapter_h.template +++ b/tools/inspector_protocol/lib/base_string_adapter_h.template @@ -32,16 +32,6 @@ class Value; using String = std::string; using ProtocolMessage = std::string; -class {{config.lib.export_macro}} StringUTF8Adapter { - public: - StringUTF8Adapter(const std::string& string) : string_(string) { } - const char* Data() const { return string_.data(); } - size_t length() const { return string_.length(); } - - private: - const std::string& string_; -}; - class {{config.lib.export_macro}} StringBuilder { public: StringBuilder(); @@ -109,6 +99,15 @@ class {{config.lib.export_macro}} StringUtil { static String fromUTF8(const uint8_t* data, size_t length) { return std::string(reinterpret_cast(data), length); } + + static String fromUTF16(const uint16_t* data, size_t length); + + static const uint8_t* CharactersLatin1(const String& s) { return nullptr; } + static const uint8_t* CharactersUTF8(const String& s) { + return reinterpret_cast(s.data()); + } + static const uint16_t* CharactersUTF16(const String& s) { return nullptr; } + static size_t CharacterCount(const String& s) { return s.size(); } }; // A read-only sequence of uninterpreted bytes with reference-counted storage. @@ -137,12 +136,6 @@ class {{config.lib.export_macro}} Binary { std::unique_ptr toProtocolValue(const base::Value* value, int depth); std::unique_ptr toBaseValue(Value* value, int depth); - -bool AppendStringValueToMapBinary(base::StringPiece in, - base::StringPiece key, base::StringPiece value, std::string* out); -bool AppendStringValueToMapJSON(base::StringPiece in, - base::StringPiece key, base::StringPiece value, std::string* out); - {% for namespace in config.protocol.namespace %} } // namespace {{namespace}} {% endfor %} diff --git a/tools/inspector_protocol/lib/encoding_cpp.template b/tools/inspector_protocol/lib/encoding_cpp.template new file mode 100644 index 0000000000..54a46ecd20 --- /dev/null +++ b/tools/inspector_protocol/lib/encoding_cpp.template @@ -0,0 +1,2199 @@ +{# This template is generated by gen_cbor_templates.py. #} +// Generated by lib/encoding_cpp.template. + +// Copyright 2019 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + + +#include +#include +#include +#include +#include +#include + +{% for namespace in config.protocol.namespace %} +namespace {{namespace}} { +{% endfor %} + +// ===== encoding/encoding.cc ===== + +// ============================================================================= +// Status and Error codes +// ============================================================================= + +std::string Status::ToASCIIString() const { + switch (error) { + case Error::OK: + return "OK"; + case Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS: + return ToASCIIString("JSON: unprocessed input remains"); + case Error::JSON_PARSER_STACK_LIMIT_EXCEEDED: + return ToASCIIString("JSON: stack limit exceeded"); + case Error::JSON_PARSER_NO_INPUT: + return ToASCIIString("JSON: no input"); + case Error::JSON_PARSER_INVALID_TOKEN: + return ToASCIIString("JSON: invalid token"); + case Error::JSON_PARSER_INVALID_NUMBER: + return ToASCIIString("JSON: invalid number"); + case Error::JSON_PARSER_INVALID_STRING: + return ToASCIIString("JSON: invalid string"); + case Error::JSON_PARSER_UNEXPECTED_ARRAY_END: + return ToASCIIString("JSON: unexpected array end"); + case Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED: + return ToASCIIString("JSON: comma or array end expected"); + case Error::JSON_PARSER_STRING_LITERAL_EXPECTED: + return ToASCIIString("JSON: string literal expected"); + case Error::JSON_PARSER_COLON_EXPECTED: + return ToASCIIString("JSON: colon expected"); + case Error::JSON_PARSER_UNEXPECTED_MAP_END: + return ToASCIIString("JSON: unexpected map end"); + case Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED: + return ToASCIIString("JSON: comma or map end expected"); + case Error::JSON_PARSER_VALUE_EXPECTED: + return ToASCIIString("JSON: value expected"); + + case Error::CBOR_INVALID_INT32: + return ToASCIIString("CBOR: invalid int32"); + case Error::CBOR_INVALID_DOUBLE: + return ToASCIIString("CBOR: invalid double"); + case Error::CBOR_INVALID_ENVELOPE: + return ToASCIIString("CBOR: invalid envelope"); + case Error::CBOR_INVALID_STRING8: + return ToASCIIString("CBOR: invalid string8"); + case Error::CBOR_INVALID_STRING16: + return ToASCIIString("CBOR: invalid string16"); + case Error::CBOR_INVALID_BINARY: + return ToASCIIString("CBOR: invalid binary"); + case Error::CBOR_UNSUPPORTED_VALUE: + return ToASCIIString("CBOR: unsupported value"); + case Error::CBOR_NO_INPUT: + return ToASCIIString("CBOR: no input"); + case Error::CBOR_INVALID_START_BYTE: + return ToASCIIString("CBOR: invalid start byte"); + case Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE: + return ToASCIIString("CBOR: unexpected eof expected value"); + case Error::CBOR_UNEXPECTED_EOF_IN_ARRAY: + return ToASCIIString("CBOR: unexpected eof in array"); + case Error::CBOR_UNEXPECTED_EOF_IN_MAP: + return ToASCIIString("CBOR: unexpected eof in map"); + case Error::CBOR_INVALID_MAP_KEY: + return ToASCIIString("CBOR: invalid map key"); + case Error::CBOR_STACK_LIMIT_EXCEEDED: + return ToASCIIString("CBOR: stack limit exceeded"); + case Error::CBOR_TRAILING_JUNK: + return ToASCIIString("CBOR: trailing junk"); + case Error::CBOR_MAP_START_EXPECTED: + return ToASCIIString("CBOR: map start expected"); + case Error::CBOR_MAP_STOP_EXPECTED: + return ToASCIIString("CBOR: map stop expected"); + case Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED: + return ToASCIIString("CBOR: envelope size limit exceeded"); + } + // Some compilers can't figure out that we can't get here. + return "INVALID ERROR CODE"; +} + +std::string Status::ToASCIIString(const char* msg) const { + return std::string(msg) + " at position " + std::to_string(pos); +} + +namespace cbor { +namespace { +// Indicates the number of bits the "initial byte" needs to be shifted to the +// right after applying |kMajorTypeMask| to produce the major type in the +// lowermost bits. +static constexpr uint8_t kMajorTypeBitShift = 5u; +// Mask selecting the low-order 5 bits of the "initial byte", which is where +// the additional information is encoded. +static constexpr uint8_t kAdditionalInformationMask = 0x1f; +// Mask selecting the high-order 3 bits of the "initial byte", which indicates +// the major type of the encoded value. +static constexpr uint8_t kMajorTypeMask = 0xe0; +// Indicates the integer is in the following byte. +static constexpr uint8_t kAdditionalInformation1Byte = 24u; +// Indicates the integer is in the next 2 bytes. +static constexpr uint8_t kAdditionalInformation2Bytes = 25u; +// Indicates the integer is in the next 4 bytes. +static constexpr uint8_t kAdditionalInformation4Bytes = 26u; +// Indicates the integer is in the next 8 bytes. +static constexpr uint8_t kAdditionalInformation8Bytes = 27u; + +// Encodes the initial byte, consisting of the |type| in the first 3 bits +// followed by 5 bits of |additional_info|. +constexpr uint8_t EncodeInitialByte(MajorType type, uint8_t additional_info) { + return (static_cast(type) << kMajorTypeBitShift) | + (additional_info & kAdditionalInformationMask); +} + +// TAG 24 indicates that what follows is a byte string which is +// encoded in CBOR format. We use this as a wrapper for +// maps and arrays, allowing us to skip them, because the +// byte string carries its size (byte length). +// https://tools.ietf.org/html/rfc7049#section-2.4.4.1 +static constexpr uint8_t kInitialByteForEnvelope = + EncodeInitialByte(MajorType::TAG, 24); +// The initial byte for a byte string with at most 2^32 bytes +// of payload. This is used for envelope encoding, even if +// the byte string is shorter. +static constexpr uint8_t kInitialByteFor32BitLengthByteString = + EncodeInitialByte(MajorType::BYTE_STRING, 26); + +// See RFC 7049 Section 2.2.1, indefinite length arrays / maps have additional +// info = 31. +static constexpr uint8_t kInitialByteIndefiniteLengthArray = + EncodeInitialByte(MajorType::ARRAY, 31); +static constexpr uint8_t kInitialByteIndefiniteLengthMap = + EncodeInitialByte(MajorType::MAP, 31); +// See RFC 7049 Section 2.3, Table 1; this is used for finishing indefinite +// length maps / arrays. +static constexpr uint8_t kStopByte = + EncodeInitialByte(MajorType::SIMPLE_VALUE, 31); + +// See RFC 7049 Section 2.3, Table 2. +static constexpr uint8_t kEncodedTrue = + EncodeInitialByte(MajorType::SIMPLE_VALUE, 21); +static constexpr uint8_t kEncodedFalse = + EncodeInitialByte(MajorType::SIMPLE_VALUE, 20); +static constexpr uint8_t kEncodedNull = + EncodeInitialByte(MajorType::SIMPLE_VALUE, 22); +static constexpr uint8_t kInitialByteForDouble = + EncodeInitialByte(MajorType::SIMPLE_VALUE, 27); + +// See RFC 7049 Table 3 and Section 2.4.4.2. This is used as a prefix for +// arbitrary binary data encoded as BYTE_STRING. +static constexpr uint8_t kExpectedConversionToBase64Tag = + EncodeInitialByte(MajorType::TAG, 22); + +// Writes the bytes for |v| to |out|, starting with the most significant byte. +// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html +template +void WriteBytesMostSignificantByteFirst(T v, C* out) { + for (int shift_bytes = sizeof(T) - 1; shift_bytes >= 0; --shift_bytes) + out->push_back(0xff & (v >> (shift_bytes * 8))); +} + +// Extracts sizeof(T) bytes from |in| to extract a value of type T +// (e.g. uint64_t, uint32_t, ...), most significant byte first. +// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html +template +T ReadBytesMostSignificantByteFirst(span in) { + assert(in.size() >= sizeof(T)); + T result = 0; + for (size_t shift_bytes = 0; shift_bytes < sizeof(T); ++shift_bytes) + result |= T(in[sizeof(T) - 1 - shift_bytes]) << (shift_bytes * 8); + return result; +} +} // namespace + +namespace internals { +// Reads the start of a token with definitive size from |bytes|. +// |type| is the major type as specified in RFC 7049 Section 2.1. +// |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size +// (e.g. for BYTE_STRING). +// If successful, returns the number of bytes read. Otherwise returns -1. +// TODO(johannes): change return type to size_t and use 0 for error. +int8_t ReadTokenStart(span bytes, MajorType* type, uint64_t* value) { + if (bytes.empty()) + return -1; + uint8_t initial_byte = bytes[0]; + *type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift); + + uint8_t additional_information = initial_byte & kAdditionalInformationMask; + if (additional_information < 24) { + // Values 0-23 are encoded directly into the additional info of the + // initial byte. + *value = additional_information; + return 1; + } + if (additional_information == kAdditionalInformation1Byte) { + // Values 24-255 are encoded with one initial byte, followed by the value. + if (bytes.size() < 2) + return -1; + *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); + return 2; + } + if (additional_information == kAdditionalInformation2Bytes) { + // Values 256-65535: 1 initial byte + 2 bytes payload. + if (bytes.size() < 1 + sizeof(uint16_t)) + return -1; + *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); + return 3; + } + if (additional_information == kAdditionalInformation4Bytes) { + // 32 bit uint: 1 initial byte + 4 bytes payload. + if (bytes.size() < 1 + sizeof(uint32_t)) + return -1; + *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); + return 5; + } + if (additional_information == kAdditionalInformation8Bytes) { + // 64 bit uint: 1 initial byte + 8 bytes payload. + if (bytes.size() < 1 + sizeof(uint64_t)) + return -1; + *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); + return 9; + } + return -1; +} + +// Writes the start of a token with |type|. The |value| may indicate the size, +// or it may be the payload if the value is an unsigned integer. +template +void WriteTokenStartTmpl(MajorType type, uint64_t value, C* encoded) { + if (value < 24) { + // Values 0-23 are encoded directly into the additional info of the + // initial byte. + encoded->push_back(EncodeInitialByte(type, /*additional_info=*/value)); + return; + } + if (value <= std::numeric_limits::max()) { + // Values 24-255 are encoded with one initial byte, followed by the value. + encoded->push_back(EncodeInitialByte(type, kAdditionalInformation1Byte)); + encoded->push_back(value); + return; + } + if (value <= std::numeric_limits::max()) { + // Values 256-65535: 1 initial byte + 2 bytes payload. + encoded->push_back(EncodeInitialByte(type, kAdditionalInformation2Bytes)); + WriteBytesMostSignificantByteFirst(value, encoded); + return; + } + if (value <= std::numeric_limits::max()) { + // 32 bit uint: 1 initial byte + 4 bytes payload. + encoded->push_back(EncodeInitialByte(type, kAdditionalInformation4Bytes)); + WriteBytesMostSignificantByteFirst(static_cast(value), + encoded); + return; + } + // 64 bit uint: 1 initial byte + 8 bytes payload. + encoded->push_back(EncodeInitialByte(type, kAdditionalInformation8Bytes)); + WriteBytesMostSignificantByteFirst(value, encoded); +} +void WriteTokenStart(MajorType type, + uint64_t value, + std::vector* encoded) { + WriteTokenStartTmpl(type, value, encoded); +} +void WriteTokenStart(MajorType type, uint64_t value, std::string* encoded) { + WriteTokenStartTmpl(type, value, encoded); +} +} // namespace internals + +// ============================================================================= +// Detecting CBOR content +// ============================================================================= + +uint8_t InitialByteForEnvelope() { + return kInitialByteForEnvelope; +} +uint8_t InitialByteFor32BitLengthByteString() { + return kInitialByteFor32BitLengthByteString; +} +bool IsCBORMessage(span msg) { + return msg.size() >= 6 && msg[0] == InitialByteForEnvelope() && + msg[1] == InitialByteFor32BitLengthByteString(); +} + +// ============================================================================= +// Encoding invidiual CBOR items +// ============================================================================= + +uint8_t EncodeTrue() { + return kEncodedTrue; +} +uint8_t EncodeFalse() { + return kEncodedFalse; +} +uint8_t EncodeNull() { + return kEncodedNull; +} + +uint8_t EncodeIndefiniteLengthArrayStart() { + return kInitialByteIndefiniteLengthArray; +} + +uint8_t EncodeIndefiniteLengthMapStart() { + return kInitialByteIndefiniteLengthMap; +} + +uint8_t EncodeStop() { + return kStopByte; +} + +template +void EncodeInt32Tmpl(int32_t value, C* out) { + if (value >= 0) { + internals::WriteTokenStart(MajorType::UNSIGNED, value, out); + } else { + uint64_t representation = static_cast(-(value + 1)); + internals::WriteTokenStart(MajorType::NEGATIVE, representation, out); + } +} +void EncodeInt32(int32_t value, std::vector* out) { + EncodeInt32Tmpl(value, out); +} +void EncodeInt32(int32_t value, std::string* out) { + EncodeInt32Tmpl(value, out); +} + +template +void EncodeString16Tmpl(span in, C* out) { + uint64_t byte_length = static_cast(in.size_bytes()); + internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out); + // When emitting UTF16 characters, we always write the least significant byte + // first; this is because it's the native representation for X86. + // TODO(johannes): Implement a more efficient thing here later, e.g. + // casting *iff* the machine has this byte order. + // The wire format for UTF16 chars will probably remain the same + // (least significant byte first) since this way we can have + // golden files, unittests, etc. that port easily and universally. + // See also: + // https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html + for (const uint16_t two_bytes : in) { + out->push_back(two_bytes); + out->push_back(two_bytes >> 8); + } +} +void EncodeString16(span in, std::vector* out) { + EncodeString16Tmpl(in, out); +} +void EncodeString16(span in, std::string* out) { + EncodeString16Tmpl(in, out); +} + +template +void EncodeString8Tmpl(span in, C* out) { + internals::WriteTokenStart(MajorType::STRING, + static_cast(in.size_bytes()), out); + out->insert(out->end(), in.begin(), in.end()); +} +void EncodeString8(span in, std::vector* out) { + EncodeString8Tmpl(in, out); +} +void EncodeString8(span in, std::string* out) { + EncodeString8Tmpl(in, out); +} + +template +void EncodeFromLatin1Tmpl(span latin1, C* out) { + for (size_t ii = 0; ii < latin1.size(); ++ii) { + if (latin1[ii] <= 127) + continue; + // If there's at least one non-ASCII char, convert to UTF8. + std::vector utf8(latin1.begin(), latin1.begin() + ii); + for (; ii < latin1.size(); ++ii) { + if (latin1[ii] <= 127) { + utf8.push_back(latin1[ii]); + } else { + // 0xC0 means it's a UTF8 sequence with 2 bytes. + utf8.push_back((latin1[ii] >> 6) | 0xc0); + utf8.push_back((latin1[ii] | 0x80) & 0xbf); + } + } + EncodeString8(SpanFrom(utf8), out); + return; + } + EncodeString8(latin1, out); +} +void EncodeFromLatin1(span latin1, std::vector* out) { + EncodeFromLatin1Tmpl(latin1, out); +} +void EncodeFromLatin1(span latin1, std::string* out) { + EncodeFromLatin1Tmpl(latin1, out); +} + +template +void EncodeFromUTF16Tmpl(span utf16, C* out) { + // If there's at least one non-ASCII char, encode as STRING16 (UTF16). + for (uint16_t ch : utf16) { + if (ch <= 127) + continue; + EncodeString16(utf16, out); + return; + } + // It's all US-ASCII, strip out every second byte and encode as UTF8. + internals::WriteTokenStart(MajorType::STRING, + static_cast(utf16.size()), out); + out->insert(out->end(), utf16.begin(), utf16.end()); +} +void EncodeFromUTF16(span utf16, std::vector* out) { + EncodeFromUTF16Tmpl(utf16, out); +} +void EncodeFromUTF16(span utf16, std::string* out) { + EncodeFromUTF16Tmpl(utf16, out); +} + +template +void EncodeBinaryTmpl(span in, C* out) { + out->push_back(kExpectedConversionToBase64Tag); + uint64_t byte_length = static_cast(in.size_bytes()); + internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out); + out->insert(out->end(), in.begin(), in.end()); +} +void EncodeBinary(span in, std::vector* out) { + EncodeBinaryTmpl(in, out); +} +void EncodeBinary(span in, std::string* out) { + EncodeBinaryTmpl(in, out); +} + +// A double is encoded with a specific initial byte +// (kInitialByteForDouble) plus the 64 bits of payload for its value. +constexpr size_t kEncodedDoubleSize = 1 + sizeof(uint64_t); + +// An envelope is encoded with a specific initial byte +// (kInitialByteForEnvelope), plus the start byte for a BYTE_STRING with a 32 +// bit wide length, plus a 32 bit length for that string. +constexpr size_t kEncodedEnvelopeHeaderSize = 1 + 1 + sizeof(uint32_t); + +template +void EncodeDoubleTmpl(double value, C* out) { + // The additional_info=27 indicates 64 bits for the double follow. + // See RFC 7049 Section 2.3, Table 1. + out->push_back(kInitialByteForDouble); + union { + double from_double; + uint64_t to_uint64; + } reinterpret; + reinterpret.from_double = value; + WriteBytesMostSignificantByteFirst(reinterpret.to_uint64, out); +} +void EncodeDouble(double value, std::vector* out) { + EncodeDoubleTmpl(value, out); +} +void EncodeDouble(double value, std::string* out) { + EncodeDoubleTmpl(value, out); +} + +// ============================================================================= +// cbor::EnvelopeEncoder - for wrapping submessages +// ============================================================================= + +template +void EncodeStartTmpl(C* out, size_t* byte_size_pos) { + assert(*byte_size_pos == 0); + out->push_back(kInitialByteForEnvelope); + out->push_back(kInitialByteFor32BitLengthByteString); + *byte_size_pos = out->size(); + out->resize(out->size() + sizeof(uint32_t)); +} + +void EnvelopeEncoder::EncodeStart(std::vector* out) { + EncodeStartTmpl>(out, &byte_size_pos_); +} + +void EnvelopeEncoder::EncodeStart(std::string* out) { + EncodeStartTmpl(out, &byte_size_pos_); +} + +template +bool EncodeStopTmpl(C* out, size_t* byte_size_pos) { + assert(*byte_size_pos != 0); + // The byte size is the size of the payload, that is, all the + // bytes that were written past the byte size position itself. + uint64_t byte_size = out->size() - (*byte_size_pos + sizeof(uint32_t)); + // We store exactly 4 bytes, so at most INT32MAX, with most significant + // byte first. + if (byte_size > std::numeric_limits::max()) + return false; + for (int shift_bytes = sizeof(uint32_t) - 1; shift_bytes >= 0; + --shift_bytes) { + (*out)[(*byte_size_pos)++] = 0xff & (byte_size >> (shift_bytes * 8)); + } + return true; +} + +bool EnvelopeEncoder::EncodeStop(std::vector* out) { + return EncodeStopTmpl(out, &byte_size_pos_); +} + +bool EnvelopeEncoder::EncodeStop(std::string* out) { + return EncodeStopTmpl(out, &byte_size_pos_); +} + +// ============================================================================= +// cbor::NewCBOREncoder - for encoding from a streaming parser +// ============================================================================= + +namespace { +template +class CBOREncoder : public StreamingParserHandler { + public: + CBOREncoder(C* out, Status* status) : out_(out), status_(status) { + *status_ = Status(); + } + + void HandleMapBegin() override { + if (!status_->ok()) + return; + envelopes_.emplace_back(); + envelopes_.back().EncodeStart(out_); + out_->push_back(kInitialByteIndefiniteLengthMap); + } + + void HandleMapEnd() override { + if (!status_->ok()) + return; + out_->push_back(kStopByte); + assert(!envelopes_.empty()); + if (!envelopes_.back().EncodeStop(out_)) { + HandleError( + Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size())); + return; + } + envelopes_.pop_back(); + } + + void HandleArrayBegin() override { + if (!status_->ok()) + return; + envelopes_.emplace_back(); + envelopes_.back().EncodeStart(out_); + out_->push_back(kInitialByteIndefiniteLengthArray); + } + + void HandleArrayEnd() override { + if (!status_->ok()) + return; + out_->push_back(kStopByte); + assert(!envelopes_.empty()); + if (!envelopes_.back().EncodeStop(out_)) { + HandleError( + Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size())); + return; + } + envelopes_.pop_back(); + } + + void HandleString8(span chars) override { + if (!status_->ok()) + return; + EncodeString8(chars, out_); + } + + void HandleString16(span chars) override { + if (!status_->ok()) + return; + EncodeFromUTF16(chars, out_); + } + + void HandleBinary(span bytes) override { + if (!status_->ok()) + return; + EncodeBinary(bytes, out_); + } + + void HandleDouble(double value) override { + if (!status_->ok()) + return; + EncodeDouble(value, out_); + } + + void HandleInt32(int32_t value) override { + if (!status_->ok()) + return; + EncodeInt32(value, out_); + } + + void HandleBool(bool value) override { + if (!status_->ok()) + return; + // See RFC 7049 Section 2.3, Table 2. + out_->push_back(value ? kEncodedTrue : kEncodedFalse); + } + + void HandleNull() override { + if (!status_->ok()) + return; + // See RFC 7049 Section 2.3, Table 2. + out_->push_back(kEncodedNull); + } + + void HandleError(Status error) override { + if (!status_->ok()) + return; + *status_ = error; + out_->clear(); + } + + private: + C* out_; + std::vector envelopes_; + Status* status_; +}; +} // namespace + +std::unique_ptr NewCBOREncoder( + std::vector* out, + Status* status) { + return std::unique_ptr( + new CBOREncoder>(out, status)); +} +std::unique_ptr NewCBOREncoder(std::string* out, + Status* status) { + return std::unique_ptr( + new CBOREncoder(out, status)); +} + +// ============================================================================= +// cbor::CBORTokenizer - for parsing individual CBOR items +// ============================================================================= + +CBORTokenizer::CBORTokenizer(span bytes) : bytes_(bytes) { + ReadNextToken(/*enter_envelope=*/false); +} +CBORTokenizer::~CBORTokenizer() {} + +CBORTokenTag CBORTokenizer::TokenTag() const { + return token_tag_; +} + +void CBORTokenizer::Next() { + if (token_tag_ == CBORTokenTag::ERROR_VALUE || + token_tag_ == CBORTokenTag::DONE) + return; + ReadNextToken(/*enter_envelope=*/false); +} + +void CBORTokenizer::EnterEnvelope() { + assert(token_tag_ == CBORTokenTag::ENVELOPE); + ReadNextToken(/*enter_envelope=*/true); +} + +Status CBORTokenizer::Status() const { + return status_; +} + +// The following accessor functions ::GetInt32, ::GetDouble, +// ::GetString8, ::GetString16WireRep, ::GetBinary, ::GetEnvelopeContents +// assume that a particular token was recognized in ::ReadNextToken. +// That's where all the error checking is done. By design, +// the accessors (assuming the token was recognized) never produce +// an error. + +int32_t CBORTokenizer::GetInt32() const { + assert(token_tag_ == CBORTokenTag::INT32); + // The range checks happen in ::ReadNextToken(). + return static_cast( + token_start_type_ == MajorType::UNSIGNED + ? token_start_internal_value_ + : -static_cast(token_start_internal_value_) - 1); +} + +double CBORTokenizer::GetDouble() const { + assert(token_tag_ == CBORTokenTag::DOUBLE); + union { + uint64_t from_uint64; + double to_double; + } reinterpret; + reinterpret.from_uint64 = ReadBytesMostSignificantByteFirst( + bytes_.subspan(status_.pos + 1)); + return reinterpret.to_double; +} + +span CBORTokenizer::GetString8() const { + assert(token_tag_ == CBORTokenTag::STRING8); + auto length = static_cast(token_start_internal_value_); + return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); +} + +span CBORTokenizer::GetString16WireRep() const { + assert(token_tag_ == CBORTokenTag::STRING16); + auto length = static_cast(token_start_internal_value_); + return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); +} + +span CBORTokenizer::GetBinary() const { + assert(token_tag_ == CBORTokenTag::BINARY); + auto length = static_cast(token_start_internal_value_); + return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); +} + +span CBORTokenizer::GetEnvelopeContents() const { + assert(token_tag_ == CBORTokenTag::ENVELOPE); + auto length = static_cast(token_start_internal_value_); + return bytes_.subspan(status_.pos + kEncodedEnvelopeHeaderSize, length); +} + +// All error checking happens in ::ReadNextToken, so that the accessors +// can avoid having to carry an error return value. +// +// With respect to checking the encoded lengths of strings, arrays, etc: +// On the wire, CBOR uses 1,2,4, and 8 byte unsigned integers, so +// we initially read them as uint64_t, usually into token_start_internal_value_. +// +// However, since these containers have a representation on the machine, +// we need to do corresponding size computations on the input byte array, +// output span (e.g. the payload for a string), etc., and size_t is +// machine specific (in practice either 32 bit or 64 bit). +// +// Further, we must avoid overflowing size_t. Therefore, we use this +// kMaxValidLength constant to: +// - Reject values that are larger than the architecture specific +// max size_t (differs between 32 bit and 64 bit arch). +// - Reserve at least one bit so that we can check against overflows +// when adding lengths (array / string length / etc.); we do this by +// ensuring that the inputs to an addition are <= kMaxValidLength, +// and then checking whether the sum went past it. +// +// See also +// https://chromium.googlesource.com/chromium/src/+/master/docs/security/integer-semantics.md +static const uint64_t kMaxValidLength = + std::min(std::numeric_limits::max() >> 2, + std::numeric_limits::max()); + +void CBORTokenizer::ReadNextToken(bool enter_envelope) { + if (enter_envelope) { + status_.pos += kEncodedEnvelopeHeaderSize; + } else { + status_.pos = + status_.pos == Status::npos() ? 0 : status_.pos + token_byte_length_; + } + status_.error = Error::OK; + if (status_.pos >= bytes_.size()) { + token_tag_ = CBORTokenTag::DONE; + return; + } + const size_t remaining_bytes = bytes_.size() - status_.pos; + switch (bytes_[status_.pos]) { + case kStopByte: + SetToken(CBORTokenTag::STOP, 1); + return; + case kInitialByteIndefiniteLengthMap: + SetToken(CBORTokenTag::MAP_START, 1); + return; + case kInitialByteIndefiniteLengthArray: + SetToken(CBORTokenTag::ARRAY_START, 1); + return; + case kEncodedTrue: + SetToken(CBORTokenTag::TRUE_VALUE, 1); + return; + case kEncodedFalse: + SetToken(CBORTokenTag::FALSE_VALUE, 1); + return; + case kEncodedNull: + SetToken(CBORTokenTag::NULL_VALUE, 1); + return; + case kExpectedConversionToBase64Tag: { // BINARY + const int8_t bytes_read = internals::ReadTokenStart( + bytes_.subspan(status_.pos + 1), &token_start_type_, + &token_start_internal_value_); + if (bytes_read < 0 || token_start_type_ != MajorType::BYTE_STRING || + token_start_internal_value_ > kMaxValidLength) { + SetError(Error::CBOR_INVALID_BINARY); + return; + } + const uint64_t token_byte_length = token_start_internal_value_ + + /* tag before token start: */ 1 + + /* token start: */ bytes_read; + if (token_byte_length > remaining_bytes) { + SetError(Error::CBOR_INVALID_BINARY); + return; + } + SetToken(CBORTokenTag::BINARY, static_cast(token_byte_length)); + return; + } + case kInitialByteForDouble: { // DOUBLE + if (kEncodedDoubleSize > remaining_bytes) { + SetError(Error::CBOR_INVALID_DOUBLE); + return; + } + SetToken(CBORTokenTag::DOUBLE, kEncodedDoubleSize); + return; + } + case kInitialByteForEnvelope: { // ENVELOPE + if (kEncodedEnvelopeHeaderSize > remaining_bytes) { + SetError(Error::CBOR_INVALID_ENVELOPE); + return; + } + // The envelope must be a byte string with 32 bit length. + if (bytes_[status_.pos + 1] != kInitialByteFor32BitLengthByteString) { + SetError(Error::CBOR_INVALID_ENVELOPE); + return; + } + // Read the length of the byte string. + token_start_internal_value_ = ReadBytesMostSignificantByteFirst( + bytes_.subspan(status_.pos + 2)); + if (token_start_internal_value_ > kMaxValidLength) { + SetError(Error::CBOR_INVALID_ENVELOPE); + return; + } + uint64_t token_byte_length = + token_start_internal_value_ + kEncodedEnvelopeHeaderSize; + if (token_byte_length > remaining_bytes) { + SetError(Error::CBOR_INVALID_ENVELOPE); + return; + } + SetToken(CBORTokenTag::ENVELOPE, static_cast(token_byte_length)); + return; + } + default: { + const int8_t token_start_length = internals::ReadTokenStart( + bytes_.subspan(status_.pos), &token_start_type_, + &token_start_internal_value_); + const bool success = token_start_length >= 0; + switch (token_start_type_) { + case MajorType::UNSIGNED: // INT32. + // INT32 is a signed int32 (int32 makes sense for the + // inspector_protocol, it's not a CBOR limitation), so we check + // against the signed max, so that the allowable values are + // 0, 1, 2, ... 2^31 - 1. + if (!success || std::numeric_limits::max() < + token_start_internal_value_) { + SetError(Error::CBOR_INVALID_INT32); + return; + } + SetToken(CBORTokenTag::INT32, token_start_length); + return; + case MajorType::NEGATIVE: { // INT32. + // INT32 is a signed int32 (int32 makes sense for the + // inspector_protocol, it's not a CBOR limitation); in CBOR, + // the negative values for INT32 are represented as NEGATIVE, + // that is, -1 INT32 is represented as 1 << 5 | 0 (major type 1, + // additional info value 0). So here, we compute the INT32 value + // and then check it against the INT32 min. + int64_t actual_value = + -static_cast(token_start_internal_value_) - 1; + if (!success || actual_value < std::numeric_limits::min()) { + SetError(Error::CBOR_INVALID_INT32); + return; + } + SetToken(CBORTokenTag::INT32, token_start_length); + return; + } + case MajorType::STRING: { // STRING8. + if (!success || token_start_internal_value_ > kMaxValidLength) { + SetError(Error::CBOR_INVALID_STRING8); + return; + } + uint64_t token_byte_length = + token_start_internal_value_ + token_start_length; + if (token_byte_length > remaining_bytes) { + SetError(Error::CBOR_INVALID_STRING8); + return; + } + SetToken(CBORTokenTag::STRING8, + static_cast(token_byte_length)); + return; + } + case MajorType::BYTE_STRING: { // STRING16. + // Length must be divisible by 2 since UTF16 is 2 bytes per + // character, hence the &1 check. + if (!success || token_start_internal_value_ > kMaxValidLength || + token_start_internal_value_ & 1) { + SetError(Error::CBOR_INVALID_STRING16); + return; + } + uint64_t token_byte_length = + token_start_internal_value_ + token_start_length; + if (token_byte_length > remaining_bytes) { + SetError(Error::CBOR_INVALID_STRING16); + return; + } + SetToken(CBORTokenTag::STRING16, + static_cast(token_byte_length)); + return; + } + case MajorType::ARRAY: + case MajorType::MAP: + case MajorType::TAG: + case MajorType::SIMPLE_VALUE: + SetError(Error::CBOR_UNSUPPORTED_VALUE); + return; + } + } + } +} + +void CBORTokenizer::SetToken(CBORTokenTag token_tag, size_t token_byte_length) { + token_tag_ = token_tag; + token_byte_length_ = token_byte_length; +} + +void CBORTokenizer::SetError(Error error) { + token_tag_ = CBORTokenTag::ERROR_VALUE; + status_.error = error; +} + +// ============================================================================= +// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages +// ============================================================================= + +namespace { +// When parsing CBOR, we limit recursion depth for objects and arrays +// to this constant. +static constexpr int kStackLimit = 300; + +// Below are three parsing routines for CBOR, which cover enough +// to roundtrip JSON messages. +bool ParseMap(int32_t stack_depth, + CBORTokenizer* tokenizer, + StreamingParserHandler* out); +bool ParseArray(int32_t stack_depth, + CBORTokenizer* tokenizer, + StreamingParserHandler* out); +bool ParseValue(int32_t stack_depth, + CBORTokenizer* tokenizer, + StreamingParserHandler* out); + +void ParseUTF16String(CBORTokenizer* tokenizer, StreamingParserHandler* out) { + std::vector value; + span rep = tokenizer->GetString16WireRep(); + for (size_t ii = 0; ii < rep.size(); ii += 2) + value.push_back((rep[ii + 1] << 8) | rep[ii]); + out->HandleString16(span(value.data(), value.size())); + tokenizer->Next(); +} + +bool ParseUTF8String(CBORTokenizer* tokenizer, StreamingParserHandler* out) { + assert(tokenizer->TokenTag() == CBORTokenTag::STRING8); + out->HandleString8(tokenizer->GetString8()); + tokenizer->Next(); + return true; +} + +bool ParseValue(int32_t stack_depth, + CBORTokenizer* tokenizer, + StreamingParserHandler* out) { + if (stack_depth > kStackLimit) { + out->HandleError( + Status{Error::CBOR_STACK_LIMIT_EXCEEDED, tokenizer->Status().pos}); + return false; + } + // Skip past the envelope to get to what's inside. + if (tokenizer->TokenTag() == CBORTokenTag::ENVELOPE) + tokenizer->EnterEnvelope(); + switch (tokenizer->TokenTag()) { + case CBORTokenTag::ERROR_VALUE: + out->HandleError(tokenizer->Status()); + return false; + case CBORTokenTag::DONE: + out->HandleError(Status{Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE, + tokenizer->Status().pos}); + return false; + case CBORTokenTag::TRUE_VALUE: + out->HandleBool(true); + tokenizer->Next(); + return true; + case CBORTokenTag::FALSE_VALUE: + out->HandleBool(false); + tokenizer->Next(); + return true; + case CBORTokenTag::NULL_VALUE: + out->HandleNull(); + tokenizer->Next(); + return true; + case CBORTokenTag::INT32: + out->HandleInt32(tokenizer->GetInt32()); + tokenizer->Next(); + return true; + case CBORTokenTag::DOUBLE: + out->HandleDouble(tokenizer->GetDouble()); + tokenizer->Next(); + return true; + case CBORTokenTag::STRING8: + return ParseUTF8String(tokenizer, out); + case CBORTokenTag::STRING16: + ParseUTF16String(tokenizer, out); + return true; + case CBORTokenTag::BINARY: { + out->HandleBinary(tokenizer->GetBinary()); + tokenizer->Next(); + return true; + } + case CBORTokenTag::MAP_START: + return ParseMap(stack_depth + 1, tokenizer, out); + case CBORTokenTag::ARRAY_START: + return ParseArray(stack_depth + 1, tokenizer, out); + default: + out->HandleError( + Status{Error::CBOR_UNSUPPORTED_VALUE, tokenizer->Status().pos}); + return false; + } +} + +// |bytes| must start with the indefinite length array byte, so basically, +// ParseArray may only be called after an indefinite length array has been +// detected. +bool ParseArray(int32_t stack_depth, + CBORTokenizer* tokenizer, + StreamingParserHandler* out) { + assert(tokenizer->TokenTag() == CBORTokenTag::ARRAY_START); + tokenizer->Next(); + out->HandleArrayBegin(); + while (tokenizer->TokenTag() != CBORTokenTag::STOP) { + if (tokenizer->TokenTag() == CBORTokenTag::DONE) { + out->HandleError( + Status{Error::CBOR_UNEXPECTED_EOF_IN_ARRAY, tokenizer->Status().pos}); + return false; + } + if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) { + out->HandleError(tokenizer->Status()); + return false; + } + // Parse value. + if (!ParseValue(stack_depth, tokenizer, out)) + return false; + } + out->HandleArrayEnd(); + tokenizer->Next(); + return true; +} + +// |bytes| must start with the indefinite length array byte, so basically, +// ParseArray may only be called after an indefinite length array has been +// detected. +bool ParseMap(int32_t stack_depth, + CBORTokenizer* tokenizer, + StreamingParserHandler* out) { + assert(tokenizer->TokenTag() == CBORTokenTag::MAP_START); + out->HandleMapBegin(); + tokenizer->Next(); + while (tokenizer->TokenTag() != CBORTokenTag::STOP) { + if (tokenizer->TokenTag() == CBORTokenTag::DONE) { + out->HandleError( + Status{Error::CBOR_UNEXPECTED_EOF_IN_MAP, tokenizer->Status().pos}); + return false; + } + if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) { + out->HandleError(tokenizer->Status()); + return false; + } + // Parse key. + if (tokenizer->TokenTag() == CBORTokenTag::STRING8) { + if (!ParseUTF8String(tokenizer, out)) + return false; + } else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) { + ParseUTF16String(tokenizer, out); + } else { + out->HandleError( + Status{Error::CBOR_INVALID_MAP_KEY, tokenizer->Status().pos}); + return false; + } + // Parse value. + if (!ParseValue(stack_depth, tokenizer, out)) + return false; + } + out->HandleMapEnd(); + tokenizer->Next(); + return true; +} +} // namespace + +void ParseCBOR(span bytes, StreamingParserHandler* out) { + if (bytes.empty()) { + out->HandleError(Status{Error::CBOR_NO_INPUT, 0}); + return; + } + if (bytes[0] != kInitialByteForEnvelope) { + out->HandleError(Status{Error::CBOR_INVALID_START_BYTE, 0}); + return; + } + CBORTokenizer tokenizer(bytes); + if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) { + out->HandleError(tokenizer.Status()); + return; + } + // We checked for the envelope start byte above, so the tokenizer + // must agree here, since it's not an error. + assert(tokenizer.TokenTag() == CBORTokenTag::ENVELOPE); + tokenizer.EnterEnvelope(); + if (tokenizer.TokenTag() != CBORTokenTag::MAP_START) { + out->HandleError( + Status{Error::CBOR_MAP_START_EXPECTED, tokenizer.Status().pos}); + return; + } + if (!ParseMap(/*stack_depth=*/1, &tokenizer, out)) + return; + if (tokenizer.TokenTag() == CBORTokenTag::DONE) + return; + if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) { + out->HandleError(tokenizer.Status()); + return; + } + out->HandleError(Status{Error::CBOR_TRAILING_JUNK, tokenizer.Status().pos}); +} + +// ============================================================================= +// cbor::AppendString8EntryToMap - for limited in-place editing of messages +// ============================================================================= + +template +Status AppendString8EntryToCBORMapTmpl(span string8_key, + span string8_value, + C* cbor) { + // Careful below: Don't compare (*cbor)[idx] with a uint8_t, since + // it could be a char (signed!). Instead, use bytes. + span bytes(reinterpret_cast(cbor->data()), + cbor->size()); + CBORTokenizer tokenizer(bytes); + if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) + return tokenizer.Status(); + if (tokenizer.TokenTag() != CBORTokenTag::ENVELOPE) + return Status(Error::CBOR_INVALID_ENVELOPE, 0); + size_t envelope_size = tokenizer.GetEnvelopeContents().size(); + size_t old_size = cbor->size(); + if (old_size != envelope_size + kEncodedEnvelopeHeaderSize) + return Status(Error::CBOR_INVALID_ENVELOPE, 0); + if (envelope_size == 0 || + (tokenizer.GetEnvelopeContents()[0] != EncodeIndefiniteLengthMapStart())) + return Status(Error::CBOR_MAP_START_EXPECTED, kEncodedEnvelopeHeaderSize); + if (bytes[bytes.size() - 1] != EncodeStop()) + return Status(Error::CBOR_MAP_STOP_EXPECTED, cbor->size() - 1); + cbor->pop_back(); + EncodeString8(string8_key, cbor); + EncodeString8(string8_value, cbor); + cbor->push_back(EncodeStop()); + size_t new_envelope_size = envelope_size + (cbor->size() - old_size); + if (new_envelope_size > std::numeric_limits::max()) + return Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, 0); + size_t size_pos = cbor->size() - new_envelope_size - sizeof(uint32_t); + uint8_t* out = reinterpret_cast(&cbor->at(size_pos)); + *(out++) = (new_envelope_size >> 24) & 0xff; + *(out++) = (new_envelope_size >> 16) & 0xff; + *(out++) = (new_envelope_size >> 8) & 0xff; + *(out) = new_envelope_size & 0xff; + return Status(); +} +Status AppendString8EntryToCBORMap(span string8_key, + span string8_value, + std::vector* cbor) { + return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor); +} +Status AppendString8EntryToCBORMap(span string8_key, + span string8_value, + std::string* cbor) { + return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor); +} +} // namespace cbor + +namespace json { + +// ============================================================================= +// json::NewJSONEncoder - for encoding streaming parser events as JSON +// ============================================================================= + +namespace { +// Prints |value| to |out| with 4 hex digits, most significant chunk first. +template +void PrintHex(uint16_t value, C* out) { + for (int ii = 3; ii >= 0; --ii) { + int four_bits = 0xf & (value >> (4 * ii)); + out->push_back(four_bits + ((four_bits <= 9) ? '0' : ('a' - 10))); + } +} + +// In the writer below, we maintain a stack of State instances. +// It is just enough to emit the appropriate delimiters and brackets +// in JSON. +enum class Container { + // Used for the top-level, initial state. + NONE, + // Inside a JSON object. + MAP, + // Inside a JSON array. + ARRAY +}; +class State { + public: + explicit State(Container container) : container_(container) {} + void StartElement(std::vector* out) { StartElementTmpl(out); } + void StartElement(std::string* out) { StartElementTmpl(out); } + Container container() const { return container_; } + + private: + template + void StartElementTmpl(C* out) { + assert(container_ != Container::NONE || size_ == 0); + if (size_ != 0) { + char delim = (!(size_ & 1) || container_ == Container::ARRAY) ? ',' : ':'; + out->push_back(delim); + } + ++size_; + } + + Container container_ = Container::NONE; + int size_ = 0; +}; + +constexpr char kBase64Table[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz0123456789+/"; + +template +void Base64Encode(const span& in, C* out) { + // The following three cases are based on the tables in the example + // section in https://en.wikipedia.org/wiki/Base64. We process three + // input bytes at a time, emitting 4 output bytes at a time. + size_t ii = 0; + + // While possible, process three input bytes. + for (; ii + 3 <= in.size(); ii += 3) { + uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8) | in[ii + 2]; + out->push_back(kBase64Table[(twentyfour_bits >> 18)]); + out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); + out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]); + out->push_back(kBase64Table[twentyfour_bits & 0x3f]); + } + if (ii + 2 <= in.size()) { // Process two input bytes. + uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8); + out->push_back(kBase64Table[(twentyfour_bits >> 18)]); + out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); + out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]); + out->push_back('='); // Emit padding. + return; + } + if (ii + 1 <= in.size()) { // Process a single input byte. + uint32_t twentyfour_bits = (in[ii] << 16); + out->push_back(kBase64Table[(twentyfour_bits >> 18)]); + out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); + out->push_back('='); // Emit padding. + out->push_back('='); // Emit padding. + } +} + +// Implements a handler for JSON parser events to emit a JSON string. +template +class JSONEncoder : public StreamingParserHandler { + public: + JSONEncoder(const Platform* platform, C* out, Status* status) + : platform_(platform), out_(out), status_(status) { + *status_ = Status(); + state_.emplace(Container::NONE); + } + + void HandleMapBegin() override { + if (!status_->ok()) + return; + assert(!state_.empty()); + state_.top().StartElement(out_); + state_.emplace(Container::MAP); + Emit('{'); + } + + void HandleMapEnd() override { + if (!status_->ok()) + return; + assert(state_.size() >= 2 && state_.top().container() == Container::MAP); + state_.pop(); + Emit('}'); + } + + void HandleArrayBegin() override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + state_.emplace(Container::ARRAY); + Emit('['); + } + + void HandleArrayEnd() override { + if (!status_->ok()) + return; + assert(state_.size() >= 2 && state_.top().container() == Container::ARRAY); + state_.pop(); + Emit(']'); + } + + void HandleString16(span chars) override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + Emit('"'); + for (const uint16_t ch : chars) { + if (ch == '"') { + Emit("\\\""); + } else if (ch == '\\') { + Emit("\\\\"); + } else if (ch == '\b') { + Emit("\\b"); + } else if (ch == '\f') { + Emit("\\f"); + } else if (ch == '\n') { + Emit("\\n"); + } else if (ch == '\r') { + Emit("\\r"); + } else if (ch == '\t') { + Emit("\\t"); + } else if (ch >= 32 && ch <= 126) { + Emit(ch); + } else { + Emit("\\u"); + PrintHex(ch, out_); + } + } + Emit('"'); + } + + void HandleString8(span chars) override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + Emit('"'); + for (size_t ii = 0; ii < chars.size(); ++ii) { + uint8_t c = chars[ii]; + if (c == '"') { + Emit("\\\""); + } else if (c == '\\') { + Emit("\\\\"); + } else if (c == '\b') { + Emit("\\b"); + } else if (c == '\f') { + Emit("\\f"); + } else if (c == '\n') { + Emit("\\n"); + } else if (c == '\r') { + Emit("\\r"); + } else if (c == '\t') { + Emit("\\t"); + } else if (c >= 32 && c <= 126) { + Emit(c); + } else if (c < 32) { + Emit("\\u"); + PrintHex(static_cast(c), out_); + } else { + // Inspect the leading byte to figure out how long the utf8 + // byte sequence is; while doing this initialize |codepoint| + // with the first few bits. + // See table in: https://en.wikipedia.org/wiki/UTF-8 + // byte one is 110x xxxx -> 2 byte utf8 sequence + // byte one is 1110 xxxx -> 3 byte utf8 sequence + // byte one is 1111 0xxx -> 4 byte utf8 sequence + uint32_t codepoint; + int num_bytes_left; + if ((c & 0xe0) == 0xc0) { // 2 byte utf8 sequence + num_bytes_left = 1; + codepoint = c & 0x1f; + } else if ((c & 0xf0) == 0xe0) { // 3 byte utf8 sequence + num_bytes_left = 2; + codepoint = c & 0x0f; + } else if ((c & 0xf8) == 0xf0) { // 4 byte utf8 sequence + codepoint = c & 0x07; + num_bytes_left = 3; + } else { + continue; // invalid leading byte + } + + // If we have enough bytes in our input, decode the remaining ones + // belonging to this Unicode character into |codepoint|. + if (ii + num_bytes_left > chars.size()) + continue; + while (num_bytes_left > 0) { + c = chars[++ii]; + --num_bytes_left; + // Check the next byte is a continuation byte, that is 10xx xxxx. + if ((c & 0xc0) != 0x80) + continue; + codepoint = (codepoint << 6) | (c & 0x3f); + } + + // Disallow overlong encodings for ascii characters, as these + // would include " and other characters significant to JSON + // string termination / control. + if (codepoint < 0x7f) + continue; + // Invalid in UTF8, and can't be represented in UTF16 anyway. + if (codepoint > 0x10ffff) + continue; + + // So, now we transcode to UTF16, + // using the math described at https://en.wikipedia.org/wiki/UTF-16, + // for either one or two 16 bit characters. + if (codepoint < 0xffff) { + Emit("\\u"); + PrintHex(static_cast(codepoint), out_); + continue; + } + codepoint -= 0x10000; + // high surrogate + Emit("\\u"); + PrintHex(static_cast((codepoint >> 10) + 0xd800), out_); + // low surrogate + Emit("\\u"); + PrintHex(static_cast((codepoint & 0x3ff) + 0xdc00), out_); + } + } + Emit('"'); + } + + void HandleBinary(span bytes) override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + Emit('"'); + Base64Encode(bytes, out_); + Emit('"'); + } + + void HandleDouble(double value) override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + // JSON cannot represent NaN or Infinity. So, for compatibility, + // we behave like the JSON object in web browsers: emit 'null'. + if (!std::isfinite(value)) { + Emit("null"); + return; + } + std::unique_ptr str_value = platform_->DToStr(value); + + // DToStr may fail to emit a 0 before the decimal dot. E.g. this is + // the case in base::NumberToString in Chromium (which is based on + // dmg_fp). So, much like + // https://cs.chromium.org/chromium/src/base/json/json_writer.cc + // we probe for this and emit the leading 0 anyway if necessary. + const char* chars = str_value.get(); + if (chars[0] == '.') { + Emit('0'); + } else if (chars[0] == '-' && chars[1] == '.') { + Emit("-0"); + ++chars; + } + Emit(chars); + } + + void HandleInt32(int32_t value) override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + Emit(std::to_string(value)); + } + + void HandleBool(bool value) override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + Emit(value ? "true" : "false"); + } + + void HandleNull() override { + if (!status_->ok()) + return; + state_.top().StartElement(out_); + Emit("null"); + } + + void HandleError(Status error) override { + assert(!error.ok()); + *status_ = error; + out_->clear(); + } + + private: + void Emit(char c) { out_->push_back(c); } + void Emit(const char* str) { + out_->insert(out_->end(), str, str + strlen(str)); + } + void Emit(const std::string& str) { + out_->insert(out_->end(), str.begin(), str.end()); + } + + const Platform* platform_; + C* out_; + Status* status_; + std::stack state_; +}; +} // namespace + +std::unique_ptr NewJSONEncoder( + const Platform* platform, + std::vector* out, + Status* status) { + return std::unique_ptr( + new JSONEncoder>(platform, out, status)); +} +std::unique_ptr NewJSONEncoder(const Platform* platform, + std::string* out, + Status* status) { + return std::unique_ptr( + new JSONEncoder(platform, out, status)); +} + +// ============================================================================= +// json::ParseJSON - for receiving streaming parser events for JSON. +// ============================================================================= + +namespace { +const int kStackLimit = 300; + +enum Token { + ObjectBegin, + ObjectEnd, + ArrayBegin, + ArrayEnd, + StringLiteral, + Number, + BoolTrue, + BoolFalse, + NullToken, + ListSeparator, + ObjectPairSeparator, + InvalidToken, + NoInput +}; + +const char* const kNullString = "null"; +const char* const kTrueString = "true"; +const char* const kFalseString = "false"; + +template +class JsonParser { + public: + JsonParser(const Platform* platform, StreamingParserHandler* handler) + : platform_(platform), handler_(handler) {} + + void Parse(const Char* start, size_t length) { + start_pos_ = start; + const Char* end = start + length; + const Char* tokenEnd = nullptr; + ParseValue(start, end, &tokenEnd, 0); + if (error_) + return; + if (tokenEnd != end) { + HandleError(Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS, tokenEnd); + } + } + + private: + bool CharsToDouble(const uint16_t* chars, size_t length, double* result) { + std::string buffer; + buffer.reserve(length + 1); + for (size_t ii = 0; ii < length; ++ii) { + bool is_ascii = !(chars[ii] & ~0x7F); + if (!is_ascii) + return false; + buffer.push_back(static_cast(chars[ii])); + } + return platform_->StrToD(buffer.c_str(), result); + } + + bool CharsToDouble(const uint8_t* chars, size_t length, double* result) { + std::string buffer(reinterpret_cast(chars), length); + return platform_->StrToD(buffer.c_str(), result); + } + + static bool ParseConstToken(const Char* start, + const Char* end, + const Char** token_end, + const char* token) { + // |token| is \0 terminated, it's one of the constants at top of the file. + while (start < end && *token != '\0' && *start++ == *token++) { + } + if (*token != '\0') + return false; + *token_end = start; + return true; + } + + static bool ReadInt(const Char* start, + const Char* end, + const Char** token_end, + bool allow_leading_zeros) { + if (start == end) + return false; + bool has_leading_zero = '0' == *start; + int length = 0; + while (start < end && '0' <= *start && *start <= '9') { + ++start; + ++length; + } + if (!length) + return false; + if (!allow_leading_zeros && length > 1 && has_leading_zero) + return false; + *token_end = start; + return true; + } + + static bool ParseNumberToken(const Char* start, + const Char* end, + const Char** token_end) { + // We just grab the number here. We validate the size in DecodeNumber. + // According to RFC4627, a valid number is: [minus] int [frac] [exp] + if (start == end) + return false; + Char c = *start; + if ('-' == c) + ++start; + + if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/false)) + return false; + if (start == end) { + *token_end = start; + return true; + } + + // Optional fraction part + c = *start; + if ('.' == c) { + ++start; + if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true)) + return false; + if (start == end) { + *token_end = start; + return true; + } + c = *start; + } + + // Optional exponent part + if ('e' == c || 'E' == c) { + ++start; + if (start == end) + return false; + c = *start; + if ('-' == c || '+' == c) { + ++start; + if (start == end) + return false; + } + if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true)) + return false; + } + + *token_end = start; + return true; + } + + static bool ReadHexDigits(const Char* start, + const Char* end, + const Char** token_end, + int digits) { + if (end - start < digits) + return false; + for (int i = 0; i < digits; ++i) { + Char c = *start++; + if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || + ('A' <= c && c <= 'F'))) + return false; + } + *token_end = start; + return true; + } + + static bool ParseStringToken(const Char* start, + const Char* end, + const Char** token_end) { + while (start < end) { + Char c = *start++; + if ('\\' == c) { + if (start == end) + return false; + c = *start++; + // Make sure the escaped char is valid. + switch (c) { + case 'x': + if (!ReadHexDigits(start, end, &start, 2)) + return false; + break; + case 'u': + if (!ReadHexDigits(start, end, &start, 4)) + return false; + break; + case '\\': + case '/': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': + case '"': + break; + default: + return false; + } + } else if ('"' == c) { + *token_end = start; + return true; + } + } + return false; + } + + static bool SkipComment(const Char* start, + const Char* end, + const Char** comment_end) { + if (start == end) + return false; + + if (*start != '/' || start + 1 >= end) + return false; + ++start; + + if (*start == '/') { + // Single line comment, read to newline. + for (++start; start < end; ++start) { + if (*start == '\n' || *start == '\r') { + *comment_end = start + 1; + return true; + } + } + *comment_end = end; + // Comment reaches end-of-input, which is fine. + return true; + } + + if (*start == '*') { + Char previous = '\0'; + // Block comment, read until end marker. + for (++start; start < end; previous = *start++) { + if (previous == '*' && *start == '/') { + *comment_end = start + 1; + return true; + } + } + // Block comment must close before end-of-input. + return false; + } + + return false; + } + + static bool IsSpaceOrNewLine(Char c) { + // \v = vertial tab; \f = form feed page break. + return c == ' ' || c == '\n' || c == '\v' || c == '\f' || c == '\r' || + c == '\t'; + } + + static void SkipWhitespaceAndComments(const Char* start, + const Char* end, + const Char** whitespace_end) { + while (start < end) { + if (IsSpaceOrNewLine(*start)) { + ++start; + } else if (*start == '/') { + const Char* comment_end = nullptr; + if (!SkipComment(start, end, &comment_end)) + break; + start = comment_end; + } else { + break; + } + } + *whitespace_end = start; + } + + static Token ParseToken(const Char* start, + const Char* end, + const Char** tokenStart, + const Char** token_end) { + SkipWhitespaceAndComments(start, end, tokenStart); + start = *tokenStart; + + if (start == end) + return NoInput; + + switch (*start) { + case 'n': + if (ParseConstToken(start, end, token_end, kNullString)) + return NullToken; + break; + case 't': + if (ParseConstToken(start, end, token_end, kTrueString)) + return BoolTrue; + break; + case 'f': + if (ParseConstToken(start, end, token_end, kFalseString)) + return BoolFalse; + break; + case '[': + *token_end = start + 1; + return ArrayBegin; + case ']': + *token_end = start + 1; + return ArrayEnd; + case ',': + *token_end = start + 1; + return ListSeparator; + case '{': + *token_end = start + 1; + return ObjectBegin; + case '}': + *token_end = start + 1; + return ObjectEnd; + case ':': + *token_end = start + 1; + return ObjectPairSeparator; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + if (ParseNumberToken(start, end, token_end)) + return Number; + break; + case '"': + if (ParseStringToken(start + 1, end, token_end)) + return StringLiteral; + break; + } + return InvalidToken; + } + + static int HexToInt(Char c) { + if ('0' <= c && c <= '9') + return c - '0'; + if ('A' <= c && c <= 'F') + return c - 'A' + 10; + if ('a' <= c && c <= 'f') + return c - 'a' + 10; + assert(false); // Unreachable. + return 0; + } + + static bool DecodeString(const Char* start, + const Char* end, + std::vector* output) { + if (start == end) + return true; + if (start > end) + return false; + output->reserve(end - start); + while (start < end) { + uint16_t c = *start++; + // If the |Char| we're dealing with is really a byte, then + // we have utf8 here, and we need to check for multibyte characters + // and transcode them to utf16 (either one or two utf16 chars). + if (sizeof(Char) == sizeof(uint8_t) && c >= 0x7f) { + // Inspect the leading byte to figure out how long the utf8 + // byte sequence is; while doing this initialize |codepoint| + // with the first few bits. + // See table in: https://en.wikipedia.org/wiki/UTF-8 + // byte one is 110x xxxx -> 2 byte utf8 sequence + // byte one is 1110 xxxx -> 3 byte utf8 sequence + // byte one is 1111 0xxx -> 4 byte utf8 sequence + uint32_t codepoint; + int num_bytes_left; + if ((c & 0xe0) == 0xc0) { // 2 byte utf8 sequence + num_bytes_left = 1; + codepoint = c & 0x1f; + } else if ((c & 0xf0) == 0xe0) { // 3 byte utf8 sequence + num_bytes_left = 2; + codepoint = c & 0x0f; + } else if ((c & 0xf8) == 0xf0) { // 4 byte utf8 sequence + codepoint = c & 0x07; + num_bytes_left = 3; + } else { + return false; // invalid leading byte + } + + // If we have enough bytes in our inpput, decode the remaining ones + // belonging to this Unicode character into |codepoint|. + if (start + num_bytes_left > end) + return false; + while (num_bytes_left > 0) { + c = *start++; + --num_bytes_left; + // Check the next byte is a continuation byte, that is 10xx xxxx. + if ((c & 0xc0) != 0x80) + return false; + codepoint = (codepoint << 6) | (c & 0x3f); + } + + // Disallow overlong encodings for ascii characters, as these + // would include " and other characters significant to JSON + // string termination / control. + if (codepoint < 0x7f) + return false; + // Invalid in UTF8, and can't be represented in UTF16 anyway. + if (codepoint > 0x10ffff) + return false; + + // So, now we transcode to UTF16, + // using the math described at https://en.wikipedia.org/wiki/UTF-16, + // for either one or two 16 bit characters. + if (codepoint < 0xffff) { + output->push_back(codepoint); + continue; + } + codepoint -= 0x10000; + output->push_back((codepoint >> 10) + 0xd800); // high surrogate + output->push_back((codepoint & 0x3ff) + 0xdc00); // low surrogate + continue; + } + if ('\\' != c) { + output->push_back(c); + continue; + } + if (start == end) + return false; + c = *start++; + + if (c == 'x') { + // \x is not supported. + return false; + } + + switch (c) { + case '"': + case '/': + case '\\': + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case 'u': + c = (HexToInt(*start) << 12) + (HexToInt(*(start + 1)) << 8) + + (HexToInt(*(start + 2)) << 4) + HexToInt(*(start + 3)); + start += 4; + break; + default: + return false; + } + output->push_back(c); + } + return true; + } + + void ParseValue(const Char* start, + const Char* end, + const Char** value_token_end, + int depth) { + if (depth > kStackLimit) { + HandleError(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED, start); + return; + } + const Char* token_start = nullptr; + const Char* token_end = nullptr; + Token token = ParseToken(start, end, &token_start, &token_end); + switch (token) { + case NoInput: + HandleError(Error::JSON_PARSER_NO_INPUT, token_start); + return; + case InvalidToken: + HandleError(Error::JSON_PARSER_INVALID_TOKEN, token_start); + return; + case NullToken: + handler_->HandleNull(); + break; + case BoolTrue: + handler_->HandleBool(true); + break; + case BoolFalse: + handler_->HandleBool(false); + break; + case Number: { + double value; + if (!CharsToDouble(token_start, token_end - token_start, &value)) { + HandleError(Error::JSON_PARSER_INVALID_NUMBER, token_start); + return; + } + if (value >= std::numeric_limits::min() && + value <= std::numeric_limits::max() && + static_cast(value) == value) + handler_->HandleInt32(static_cast(value)); + else + handler_->HandleDouble(value); + break; + } + case StringLiteral: { + std::vector value; + bool ok = DecodeString(token_start + 1, token_end - 1, &value); + if (!ok) { + HandleError(Error::JSON_PARSER_INVALID_STRING, token_start); + return; + } + handler_->HandleString16(span(value.data(), value.size())); + break; + } + case ArrayBegin: { + handler_->HandleArrayBegin(); + start = token_end; + token = ParseToken(start, end, &token_start, &token_end); + while (token != ArrayEnd) { + ParseValue(start, end, &token_end, depth + 1); + if (error_) + return; + + // After a list value, we expect a comma or the end of the list. + start = token_end; + token = ParseToken(start, end, &token_start, &token_end); + if (token == ListSeparator) { + start = token_end; + token = ParseToken(start, end, &token_start, &token_end); + if (token == ArrayEnd) { + HandleError(Error::JSON_PARSER_UNEXPECTED_ARRAY_END, token_start); + return; + } + } else if (token != ArrayEnd) { + // Unexpected value after list value. Bail out. + HandleError(Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED, + token_start); + return; + } + } + handler_->HandleArrayEnd(); + break; + } + case ObjectBegin: { + handler_->HandleMapBegin(); + start = token_end; + token = ParseToken(start, end, &token_start, &token_end); + while (token != ObjectEnd) { + if (token != StringLiteral) { + HandleError(Error::JSON_PARSER_STRING_LITERAL_EXPECTED, + token_start); + return; + } + std::vector key; + if (!DecodeString(token_start + 1, token_end - 1, &key)) { + HandleError(Error::JSON_PARSER_INVALID_STRING, token_start); + return; + } + handler_->HandleString16(span(key.data(), key.size())); + start = token_end; + + token = ParseToken(start, end, &token_start, &token_end); + if (token != ObjectPairSeparator) { + HandleError(Error::JSON_PARSER_COLON_EXPECTED, token_start); + return; + } + start = token_end; + + ParseValue(start, end, &token_end, depth + 1); + if (error_) + return; + start = token_end; + + // After a key/value pair, we expect a comma or the end of the + // object. + token = ParseToken(start, end, &token_start, &token_end); + if (token == ListSeparator) { + start = token_end; + token = ParseToken(start, end, &token_start, &token_end); + if (token == ObjectEnd) { + HandleError(Error::JSON_PARSER_UNEXPECTED_MAP_END, token_start); + return; + } + } else if (token != ObjectEnd) { + // Unexpected value after last object value. Bail out. + HandleError(Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED, + token_start); + return; + } + } + handler_->HandleMapEnd(); + break; + } + + default: + // We got a token that's not a value. + HandleError(Error::JSON_PARSER_VALUE_EXPECTED, token_start); + return; + } + + SkipWhitespaceAndComments(token_end, end, value_token_end); + } + + void HandleError(Error error, const Char* pos) { + assert(error != Error::OK); + if (!error_) { + handler_->HandleError( + Status{error, static_cast(pos - start_pos_)}); + error_ = true; + } + } + + const Char* start_pos_ = nullptr; + bool error_ = false; + const Platform* platform_; + StreamingParserHandler* handler_; +}; +} // namespace + +void ParseJSON(const Platform& platform, + span chars, + StreamingParserHandler* handler) { + JsonParser parser(&platform, handler); + parser.Parse(chars.data(), chars.size()); +} + +void ParseJSON(const Platform& platform, + span chars, + StreamingParserHandler* handler) { + JsonParser parser(&platform, handler); + parser.Parse(chars.data(), chars.size()); +} + +// ============================================================================= +// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding +// ============================================================================= +template +Status ConvertCBORToJSONTmpl(const Platform& platform, + span cbor, + C* json) { + Status status; + std::unique_ptr json_writer = + NewJSONEncoder(&platform, json, &status); + cbor::ParseCBOR(cbor, json_writer.get()); + return status; +} + +Status ConvertCBORToJSON(const Platform& platform, + span cbor, + std::vector* json) { + return ConvertCBORToJSONTmpl(platform, cbor, json); +} +Status ConvertCBORToJSON(const Platform& platform, + span cbor, + std::string* json) { + return ConvertCBORToJSONTmpl(platform, cbor, json); +} + +template +Status ConvertJSONToCBORTmpl(const Platform& platform, span json, C* cbor) { + Status status; + std::unique_ptr encoder = + cbor::NewCBOREncoder(cbor, &status); + ParseJSON(platform, json, encoder.get()); + return status; +} +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::string* cbor) { + return ConvertJSONToCBORTmpl(platform, json, cbor); +} +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::string* cbor) { + return ConvertJSONToCBORTmpl(platform, json, cbor); +} +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::vector* cbor) { + return ConvertJSONToCBORTmpl(platform, json, cbor); +} +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::vector* cbor) { + return ConvertJSONToCBORTmpl(platform, json, cbor); +} +} // namespace json + +{% for namespace in config.protocol.namespace %} +} // namespace {{namespace}} +{% endfor %} diff --git a/tools/inspector_protocol/lib/encoding_h.template b/tools/inspector_protocol/lib/encoding_h.template new file mode 100644 index 0000000000..f1a52a1958 --- /dev/null +++ b/tools/inspector_protocol/lib/encoding_h.template @@ -0,0 +1,520 @@ +{# This template is generated by gen_cbor_templates.py. #} +// Generated by lib/encoding_h.template. + +// Copyright 2019 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef {{"_".join(config.protocol.namespace)}}_encoding_h +#define {{"_".join(config.protocol.namespace)}}_encoding_h + +#include +#include +#include +#include +#include +#include +#include + +{% for namespace in config.protocol.namespace %} +namespace {{namespace}} { +{% endfor %} + +// ===== encoding/encoding.h ===== + + +// ============================================================================= +// span - sequence of bytes +// ============================================================================= + +// This template is similar to std::span, which will be included in C++20. +template +class span { + public: + using index_type = size_t; + + span() : data_(nullptr), size_(0) {} + span(const T* data, index_type size) : data_(data), size_(size) {} + + const T* data() const { return data_; } + + const T* begin() const { return data_; } + const T* end() const { return data_ + size_; } + + const T& operator[](index_type idx) const { return data_[idx]; } + + span subspan(index_type offset, index_type count) const { + return span(data_ + offset, count); + } + + span subspan(index_type offset) const { + return span(data_ + offset, size_ - offset); + } + + bool empty() const { return size_ == 0; } + + index_type size() const { return size_; } + index_type size_bytes() const { return size_ * sizeof(T); } + + private: + const T* data_; + index_type size_; +}; + +template +span SpanFrom(const std::vector& v) { + return span(v.data(), v.size()); +} + +template +span SpanFrom(const char (&str)[N]) { + return span(reinterpret_cast(str), N - 1); +} + +inline span SpanFrom(const char* str) { + return str ? span(reinterpret_cast(str), strlen(str)) + : span(); +} + +inline span SpanFrom(const std::string& v) { + return span(reinterpret_cast(v.data()), v.size()); +} + +// ============================================================================= +// Status and Error codes +// ============================================================================= +enum class Error { + OK = 0, + // JSON parsing errors - json_parser.{h,cc}. + JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 0x01, + JSON_PARSER_STACK_LIMIT_EXCEEDED = 0x02, + JSON_PARSER_NO_INPUT = 0x03, + JSON_PARSER_INVALID_TOKEN = 0x04, + JSON_PARSER_INVALID_NUMBER = 0x05, + JSON_PARSER_INVALID_STRING = 0x06, + JSON_PARSER_UNEXPECTED_ARRAY_END = 0x07, + JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 0x08, + JSON_PARSER_STRING_LITERAL_EXPECTED = 0x09, + JSON_PARSER_COLON_EXPECTED = 0x0a, + JSON_PARSER_UNEXPECTED_MAP_END = 0x0b, + JSON_PARSER_COMMA_OR_MAP_END_EXPECTED = 0x0c, + JSON_PARSER_VALUE_EXPECTED = 0x0d, + + CBOR_INVALID_INT32 = 0x0e, + CBOR_INVALID_DOUBLE = 0x0f, + CBOR_INVALID_ENVELOPE = 0x10, + CBOR_INVALID_STRING8 = 0x11, + CBOR_INVALID_STRING16 = 0x12, + CBOR_INVALID_BINARY = 0x13, + CBOR_UNSUPPORTED_VALUE = 0x14, + CBOR_NO_INPUT = 0x15, + CBOR_INVALID_START_BYTE = 0x16, + CBOR_UNEXPECTED_EOF_EXPECTED_VALUE = 0x17, + CBOR_UNEXPECTED_EOF_IN_ARRAY = 0x18, + CBOR_UNEXPECTED_EOF_IN_MAP = 0x19, + CBOR_INVALID_MAP_KEY = 0x1a, + CBOR_STACK_LIMIT_EXCEEDED = 0x1b, + CBOR_TRAILING_JUNK = 0x1c, + CBOR_MAP_START_EXPECTED = 0x1d, + CBOR_MAP_STOP_EXPECTED = 0x1e, + CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED = 0x1f, +}; + +// A status value with position that can be copied. The default status +// is OK. Usually, error status values should come with a valid position. +struct Status { + static constexpr size_t npos() { return std::numeric_limits::max(); } + + bool ok() const { return error == Error::OK; } + + Error error = Error::OK; + size_t pos = npos(); + Status(Error error, size_t pos) : error(error), pos(pos) {} + Status() = default; + + // Returns a 7 bit US-ASCII string, either "OK" or an error message + // that includes the position. + std::string ToASCIIString() const; + + private: + std::string ToASCIIString(const char* msg) const; +}; + +// Handler interface for parser events emitted by a streaming parser. +// See cbor::NewCBOREncoder, cbor::ParseCBOR, json::NewJSONEncoder, +// json::ParseJSON. +class StreamingParserHandler { + public: + virtual ~StreamingParserHandler() = default; + virtual void HandleMapBegin() = 0; + virtual void HandleMapEnd() = 0; + virtual void HandleArrayBegin() = 0; + virtual void HandleArrayEnd() = 0; + virtual void HandleString8(span chars) = 0; + virtual void HandleString16(span chars) = 0; + virtual void HandleBinary(span bytes) = 0; + virtual void HandleDouble(double value) = 0; + virtual void HandleInt32(int32_t value) = 0; + virtual void HandleBool(bool value) = 0; + virtual void HandleNull() = 0; + + // The parser may send one error even after other events have already + // been received. Client code is reponsible to then discard the + // already processed events. + // |error| must be an eror, as in, |error.is_ok()| can't be true. + virtual void HandleError(Status error) = 0; +}; + +namespace cbor { +// The binary encoding for the inspector protocol follows the CBOR specification +// (RFC 7049). Additional constraints: +// - Only indefinite length maps and arrays are supported. +// - Maps and arrays are wrapped with an envelope, that is, a +// CBOR tag with value 24 followed by a byte string specifying +// the byte length of the enclosed map / array. The byte string +// must use a 32 bit wide length. +// - At the top level, a message must be an indefinite length map +// wrapped by an envelope. +// - Maximal size for messages is 2^32 (4 GB). +// - For scalars, we support only the int32_t range, encoded as +// UNSIGNED/NEGATIVE (major types 0 / 1). +// - UTF16 strings, including with unbalanced surrogate pairs, are encoded +// as CBOR BYTE_STRING (major type 2). For such strings, the number of +// bytes encoded must be even. +// - UTF8 strings (major type 3) are supported. +// - 7 bit US-ASCII strings must always be encoded as UTF8 strings, never +// as UTF16 strings. +// - Arbitrary byte arrays, in the inspector protocol called 'binary', +// are encoded as BYTE_STRING (major type 2), prefixed with a byte +// indicating base64 when rendered as JSON. + +// ============================================================================= +// Detecting CBOR content +// ============================================================================= + +// The first byte for an envelope, which we use for wrapping dictionaries +// and arrays; and the byte that indicates a byte string with 32 bit length. +// These two bytes start an envelope, and thereby also any CBOR message +// produced or consumed by this protocol. See also |EnvelopeEncoder| below. +uint8_t InitialByteForEnvelope(); +uint8_t InitialByteFor32BitLengthByteString(); + +// Checks whether |msg| is a cbor message. +bool IsCBORMessage(span msg); + +// ============================================================================= +// Encoding individual CBOR items +// ============================================================================= + +// Some constants for CBOR tokens that only take a single byte on the wire. +uint8_t EncodeTrue(); +uint8_t EncodeFalse(); +uint8_t EncodeNull(); +uint8_t EncodeIndefiniteLengthArrayStart(); +uint8_t EncodeIndefiniteLengthMapStart(); +uint8_t EncodeStop(); + +// Encodes |value| as |UNSIGNED| (major type 0) iff >= 0, or |NEGATIVE| +// (major type 1) iff < 0. +void EncodeInt32(int32_t value, std::vector* out); +void EncodeInt32(int32_t value, std::string* out); + +// Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16 +// character in |in| is emitted with most significant byte first, +// appending to |out|. +void EncodeString16(span in, std::vector* out); +void EncodeString16(span in, std::string* out); + +// Encodes a UTF8 string |in| as STRING (major type 3). +void EncodeString8(span in, std::vector* out); +void EncodeString8(span in, std::string* out); + +// Encodes the given |latin1| string as STRING8. +// If any non-ASCII character is present, it will be represented +// as a 2 byte UTF8 sequence. +void EncodeFromLatin1(span latin1, std::vector* out); +void EncodeFromLatin1(span latin1, std::string* out); + +// Encodes the given |utf16| string as STRING8 if it's entirely US-ASCII. +// Otherwise, encodes as STRING16. +void EncodeFromUTF16(span utf16, std::vector* out); +void EncodeFromUTF16(span utf16, std::string* out); + +// Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with +// definitive length, prefixed with tag 22 indicating expected conversion to +// base64 (see RFC 7049, Table 3 and Section 2.4.4.2). +void EncodeBinary(span in, std::vector* out); +void EncodeBinary(span in, std::string* out); + +// Encodes / decodes a double as Major type 7 (SIMPLE_VALUE), +// with additional info = 27, followed by 8 bytes in big endian. +void EncodeDouble(double value, std::vector* out); +void EncodeDouble(double value, std::string* out); + +// ============================================================================= +// cbor::EnvelopeEncoder - for wrapping submessages +// ============================================================================= + +// An envelope indicates the byte length of a wrapped item. +// We use this for maps and array, which allows the decoder +// to skip such (nested) values whole sale. +// It's implemented as a CBOR tag (major type 6) with additional +// info = 24, followed by a byte string with a 32 bit length value; +// so the maximal structure that we can wrap is 2^32 bits long. +// See also: https://tools.ietf.org/html/rfc7049#section-2.4.4.1 +class EnvelopeEncoder { + public: + // Emits the envelope start bytes and records the position for the + // byte size in |byte_size_pos_|. Also emits empty bytes for the + // byte sisze so that encoding can continue. + void EncodeStart(std::vector* out); + void EncodeStart(std::string* out); + // This records the current size in |out| at position byte_size_pos_. + // Returns true iff successful. + bool EncodeStop(std::vector* out); + bool EncodeStop(std::string* out); + + private: + size_t byte_size_pos_ = 0; +}; + +// ============================================================================= +// cbor::NewCBOREncoder - for encoding from a streaming parser +// ============================================================================= + +// This can be used to convert to CBOR, by passing the return value to a parser +// that drives it. The handler will encode into |out|, and iff an error occurs +// it will set |status| to an error and clear |out|. Otherwise, |status.ok()| +// will be |true|. +std::unique_ptr NewCBOREncoder( + std::vector* out, + Status* status); +std::unique_ptr NewCBOREncoder(std::string* out, + Status* status); + +// ============================================================================= +// cbor::CBORTokenizer - for parsing individual CBOR items +// ============================================================================= + +// Tags for the tokens within a CBOR message that CBORTokenizer understands. +// Note that this is not the same terminology as the CBOR spec (RFC 7049), +// but rather, our adaptation. For instance, we lump unsigned and signed +// major type into INT32 here (and disallow values outside the int32_t range). +enum class CBORTokenTag { + // Encountered an error in the structure of the message. Consult + // status() for details. + ERROR_VALUE, + // Booleans and NULL. + TRUE_VALUE, + FALSE_VALUE, + NULL_VALUE, + // An int32_t (signed 32 bit integer). + INT32, + // A double (64 bit floating point). + DOUBLE, + // A UTF8 string. + STRING8, + // A UTF16 string. + STRING16, + // A binary string. + BINARY, + // Starts an indefinite length map; after the map start we expect + // alternating keys and values, followed by STOP. + MAP_START, + // Starts an indefinite length array; after the array start we + // expect values, followed by STOP. + ARRAY_START, + // Ends a map or an array. + STOP, + // An envelope indicator, wrapping a map or array. + // Internally this carries the byte length of the wrapped + // map or array. While CBORTokenizer::Next() will read / skip the entire + // envelope, CBORTokenizer::EnterEnvelope() reads the tokens + // inside of it. + ENVELOPE, + // We've reached the end there is nothing else to read. + DONE, +}; + +// The major types from RFC 7049 Section 2.1. +enum class MajorType { + UNSIGNED = 0, + NEGATIVE = 1, + BYTE_STRING = 2, + STRING = 3, + ARRAY = 4, + MAP = 5, + TAG = 6, + SIMPLE_VALUE = 7 +}; + +// CBORTokenizer segments a CBOR message, presenting the tokens therein as +// numbers, strings, etc. This is not a complete CBOR parser, but makes it much +// easier to implement one (e.g. ParseCBOR, above). It can also be used to parse +// messages partially. +class CBORTokenizer { + public: + explicit CBORTokenizer(span bytes); + ~CBORTokenizer(); + + // Identifies the current token that we're looking at, + // or ERROR_VALUE (in which ase ::Status() has details) + // or DONE (if we're past the last token). + CBORTokenTag TokenTag() const; + + // Advances to the next token. + void Next(); + // Can only be called if TokenTag() == CBORTokenTag::ENVELOPE. + // While Next() would skip past the entire envelope / what it's + // wrapping, EnterEnvelope positions the cursor inside of the envelope, + // letting the client explore the nested structure. + void EnterEnvelope(); + + // If TokenTag() is CBORTokenTag::ERROR_VALUE, then Status().error describes + // the error more precisely; otherwise it'll be set to Error::OK. + // In either case, Status().pos is the current position. + struct Status Status() const; + + // The following methods retrieve the token values. They can only + // be called if TokenTag() matches. + + // To be called only if ::TokenTag() == CBORTokenTag::INT32. + int32_t GetInt32() const; + + // To be called only if ::TokenTag() == CBORTokenTag::DOUBLE. + double GetDouble() const; + + // To be called only if ::TokenTag() == CBORTokenTag::STRING8. + span GetString8() const; + + // Wire representation for STRING16 is low byte first (little endian). + // To be called only if ::TokenTag() == CBORTokenTag::STRING16. + span GetString16WireRep() const; + + // To be called only if ::TokenTag() == CBORTokenTag::BINARY. + span GetBinary() const; + + // To be called only if ::TokenTag() == CBORTokenTag::ENVELOPE. + span GetEnvelopeContents() const; + + private: + void ReadNextToken(bool enter_envelope); + void SetToken(CBORTokenTag token, size_t token_byte_length); + void SetError(Error error); + + span bytes_; + CBORTokenTag token_tag_; + struct Status status_; + size_t token_byte_length_; + MajorType token_start_type_; + uint64_t token_start_internal_value_; +}; + +// ============================================================================= +// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages +// ============================================================================= + +// Parses a CBOR encoded message from |bytes|, sending events to +// |out|. If an error occurs, sends |out->HandleError|, and parsing stops. +// The client is responsible for discarding the already received information in +// that case. +void ParseCBOR(span bytes, StreamingParserHandler* out); + +// ============================================================================= +// cbor::AppendString8EntryToMap - for limited in-place editing of messages +// ============================================================================= + +// Modifies the |cbor| message by appending a new key/value entry at the end +// of the map. Patches up the envelope size; Status.ok() iff successful. +// If not successful, |cbor| may be corrupted after this call. +Status AppendString8EntryToCBORMap(span string8_key, + span string8_value, + std::vector* cbor); +Status AppendString8EntryToCBORMap(span string8_key, + span string8_value, + std::string* cbor); + +namespace internals { // Exposed only for writing tests. +int8_t ReadTokenStart(span bytes, + cbor::MajorType* type, + uint64_t* value); + +void WriteTokenStart(cbor::MajorType type, + uint64_t value, + std::vector* encoded); +void WriteTokenStart(cbor::MajorType type, + uint64_t value, + std::string* encoded); +} // namespace internals +} // namespace cbor + +namespace json { +// Client code must provide an instance. Implementation should delegate +// to whatever is appropriate. +class Platform { + public: + virtual ~Platform() = default; + // Parses |str| into |result|. Returns false iff there are + // leftover characters or parsing errors. + virtual bool StrToD(const char* str, double* result) const = 0; + + // Prints |value| in a format suitable for JSON. + virtual std::unique_ptr DToStr(double value) const = 0; +}; + +// ============================================================================= +// json::NewJSONEncoder - for encoding streaming parser events as JSON +// ============================================================================= + +// Returns a handler object which will write ascii characters to |out|. +// |status->ok()| will be false iff the handler routine HandleError() is called. +// In that case, we'll stop emitting output. +// Except for calling the HandleError routine at any time, the client +// code must call the Handle* methods in an order in which they'd occur +// in valid JSON; otherwise we may crash (the code uses assert). +std::unique_ptr NewJSONEncoder( + const Platform* platform, + std::vector* out, + Status* status); +std::unique_ptr NewJSONEncoder(const Platform* platform, + std::string* out, + Status* status); + +// ============================================================================= +// json::ParseJSON - for receiving streaming parser events for JSON +// ============================================================================= + +void ParseJSON(const Platform& platform, + span chars, + StreamingParserHandler* handler); +void ParseJSON(const Platform& platform, + span chars, + StreamingParserHandler* handler); + +// ============================================================================= +// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding +// ============================================================================= +Status ConvertCBORToJSON(const Platform& platform, + span cbor, + std::string* json); +Status ConvertCBORToJSON(const Platform& platform, + span cbor, + std::vector* json); +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::vector* cbor); +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::vector* cbor); +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::string* cbor); +Status ConvertJSONToCBOR(const Platform& platform, + span json, + std::string* cbor); +} // namespace json + +{% for namespace in config.protocol.namespace %} +} // namespace {{namespace}} +{% endfor %} +#endif // !defined({{"_".join(config.protocol.namespace)}}_encoding_h) diff --git a/tools/inspector_protocol/pdl.py b/tools/inspector_protocol/pdl.py index 43111e944b..03d11b39d6 100644 --- a/tools/inspector_protocol/pdl.py +++ b/tools/inspector_protocol/pdl.py @@ -74,20 +74,20 @@ def parse(data, file_name, map_binary_to_string=False): if len(trimLine) == 0: continue - match = re.compile('^(experimental )?(deprecated )?domain (.*)').match(line) + match = re.compile(r'^(experimental )?(deprecated )?domain (.*)').match(line) if match: domain = createItem({'domain' : match.group(3)}, match.group(1), match.group(2)) protocol['domains'].append(domain) continue - match = re.compile('^ depends on ([^\s]+)').match(line) + match = re.compile(r'^ depends on ([^\s]+)').match(line) if match: if 'dependencies' not in domain: domain['dependencies'] = [] domain['dependencies'].append(match.group(1)) continue - match = re.compile('^ (experimental )?(deprecated )?type (.*) extends (array of )?([^\s]+)').match(line) + match = re.compile(r'^ (experimental )?(deprecated )?type (.*) extends (array of )?([^\s]+)').match(line) if match: if 'types' not in domain: domain['types'] = [] @@ -96,7 +96,7 @@ def parse(data, file_name, map_binary_to_string=False): domain['types'].append(item) continue - match = re.compile('^ (experimental )?(deprecated )?(command|event) (.*)').match(line) + match = re.compile(r'^ (experimental )?(deprecated )?(command|event) (.*)').match(line) if match: list = [] if match.group(3) == 'command': @@ -114,7 +114,7 @@ def parse(data, file_name, map_binary_to_string=False): list.append(item) continue - match = re.compile('^ (experimental )?(deprecated )?(optional )?(array of )?([^\s]+) ([^\s]+)').match(line) + match = re.compile(r'^ (experimental )?(deprecated )?(optional )?(array of )?([^\s]+) ([^\s]+)').match(line) if match: param = createItem({}, match.group(1), match.group(2), match.group(6)) if match.group(3): @@ -125,36 +125,36 @@ def parse(data, file_name, map_binary_to_string=False): subitems.append(param) continue - match = re.compile('^ (parameters|returns|properties)').match(line) + match = re.compile(r'^ (parameters|returns|properties)').match(line) if match: subitems = item[match.group(1)] = [] continue - match = re.compile('^ enum').match(line) + match = re.compile(r'^ enum').match(line) if match: enumliterals = item['enum'] = [] continue - match = re.compile('^version').match(line) + match = re.compile(r'^version').match(line) if match: continue - match = re.compile('^ major (\d+)').match(line) + match = re.compile(r'^ major (\d+)').match(line) if match: protocol['version']['major'] = match.group(1) continue - match = re.compile('^ minor (\d+)').match(line) + match = re.compile(r'^ minor (\d+)').match(line) if match: protocol['version']['minor'] = match.group(1) continue - match = re.compile('^ redirect ([^\s]+)').match(line) + match = re.compile(r'^ redirect ([^\s]+)').match(line) if match: item['redirect'] = match.group(1) continue - match = re.compile('^ ( )?[^\s]+$').match(line) + match = re.compile(r'^ ( )?[^\s]+$').match(line) if match: # enum literal enumliterals.append(trimLine) diff --git a/tools/inspector_protocol/roll.py b/tools/inspector_protocol/roll.py new file mode 100644 index 0000000000..abe636e270 --- /dev/null +++ b/tools/inspector_protocol/roll.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python +# Copyright 2019 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +from __future__ import print_function +import argparse +import sys +import os +import subprocess +import glob +import shutil + + +FILES_TO_SYNC = [ + 'README.md', + 'check_protocol_compatibility.py', + 'code_generator.py', + 'concatenate_protocols.py', + 'convert_protocol_to_json.py', + 'encoding/encoding.h', + 'encoding/encoding.cc', + 'encoding/encoding_test.cc', + 'inspector_protocol.gni', + 'inspector_protocol.gypi', + 'lib/*', + 'pdl.py', + 'templates/*', +] + + +def RunCmd(cmd): + p = subprocess.Popen(cmd, stdout=subprocess.PIPE) + (stdoutdata, stderrdata) = p.communicate() + if p.returncode != 0: + raise Exception('%s: exit status %d', str(cmd), p.returncode) + return stdoutdata + + +def CheckRepoIsClean(path, suffix): + os.chdir(path) # As a side effect this also checks for existence of the dir. + # If path isn't a git repo, this will throw and exception. + # And if it is a git repo and 'git status' has anything interesting to say, + # then it's not clean (uncommitted files etc.) + if len(RunCmd(['git', 'status', '--porcelain'])) != 0: + raise Exception('%s is not a clean git repo (run git status)' % path) + if not path.endswith(suffix): + raise Exception('%s does not end with /%s' % (path, suffix)) + + +def CheckRepoIsNotAtMasterBranch(path): + os.chdir(path) + stdout = RunCmd(['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip() + if stdout == 'master': + raise Exception('%s is at master branch - refusing to copy there.' % path) + + +def CheckRepoIsV8Checkout(path): + os.chdir(path) + if (RunCmd(['git', 'config', '--get', 'remote.origin.url']).strip() != + 'https://chromium.googlesource.com/v8/v8.git'): + raise Exception('%s is not a proper V8 checkout.' % path) + + +def CheckRepoIsInspectorProtocolCheckout(path): + os.chdir(path) + if (RunCmd(['git', 'config', '--get', 'remote.origin.url']).strip() != + 'https://chromium.googlesource.com/deps/inspector_protocol.git'): + raise Exception('%s is not a proper inspector_protocol checkout.' % path) + + +def FindFilesToSyncIn(path): + files = [] + for f in FILES_TO_SYNC: + files += glob.glob(os.path.join(path, f)) + files = [os.path.relpath(f, path) for f in files] + return files + + +def FilesAreEqual(path1, path2): + # We check for permissions (useful for executable scripts) and contents. + return (os.stat(path1).st_mode == os.stat(path2).st_mode and + open(path1).read() == open(path2).read()) + + +def GetHeadRevision(path): + os.chdir(path) + return RunCmd(['git', 'rev-parse', 'HEAD']) + + +def main(argv): + parser = argparse.ArgumentParser(description=( + "Rolls the inspector_protocol project (upstream) into V8's " + "third_party (downstream).")) + parser.add_argument("--ip_src_upstream", + help="The inspector_protocol (upstream) tree.", + default="~/ip/src") + parser.add_argument("--v8_src_downstream", + help="The V8 src tree.", + default="~/v8/v8") + parser.add_argument('--force', dest='force', action='store_true', + help=("Whether to carry out the modifications " + "in the destination tree.")) + parser.set_defaults(force=False) + + args = parser.parse_args(argv) + upstream = os.path.normpath(os.path.expanduser(args.ip_src_upstream)) + downstream = os.path.normpath(os.path.expanduser( + args.v8_src_downstream)) + CheckRepoIsClean(upstream, '/src') + CheckRepoIsClean(downstream, '/v8') + CheckRepoIsInspectorProtocolCheckout(upstream) + CheckRepoIsV8Checkout(downstream) + # Check that the destination Git repo isn't at the master branch - it's + # generally a bad idea to check into the master branch, so we catch this + # common pilot error here early. + CheckRepoIsNotAtMasterBranch(downstream) + src_dir = upstream + dest_dir = os.path.join(downstream, 'third_party/inspector_protocol') + print('Rolling %s into %s ...' % (src_dir, dest_dir)) + src_files = set(FindFilesToSyncIn(src_dir)) + dest_files = set(FindFilesToSyncIn(dest_dir)) + to_add = [f for f in src_files if f not in dest_files] + to_delete = [f for f in dest_files if f not in src_files] + to_copy = [f for f in src_files + if (f in dest_files and not FilesAreEqual( + os.path.join(src_dir, f), os.path.join(dest_dir, f)))] + print('To add: %s' % to_add) + print('To delete: %s' % to_delete) + print('To copy: %s' % to_copy) + if not to_add and not to_delete and not to_copy: + print('Nothing to do. You\'re good.') + sys.exit(0) + if not args.force: + print('Rerun with --force if you wish the modifications to be done.') + sys.exit(1) + print('You said --force ... as you wish, modifying the destination.') + for f in to_add + to_copy: + contents = open(os.path.join(src_dir, f)).read() + contents = contents.replace( + 'INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_', + 'V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_') + contents = contents.replace( + 'namespace inspector_protocol_encoding', + 'namespace v8_inspector_protocol_encoding') + open(os.path.join(dest_dir, f), 'w').write(contents) + shutil.copymode(os.path.join(src_dir, f), os.path.join(dest_dir, f)) + for f in to_delete: + os.unlink(os.path.join(dest_dir, f)) + head_revision = GetHeadRevision(upstream) + lines = open(os.path.join(dest_dir, 'README.v8')).readlines() + f = open(os.path.join(dest_dir, 'README.v8'), 'w') + for line in lines: + if line.startswith('Revision: '): + f.write('Revision: %s' % head_revision) + else: + f.write(line) + f.close() + + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:])) diff --git a/tools/inspector_protocol/templates/TypeBuilder_cpp.template b/tools/inspector_protocol/templates/TypeBuilder_cpp.template index 4ef60a6ea2..982e2c61b8 100644 --- a/tools/inspector_protocol/templates/TypeBuilder_cpp.template +++ b/tools/inspector_protocol/templates/TypeBuilder_cpp.template @@ -203,12 +203,12 @@ void Frontend::flush() m_frontendChannel->flushProtocolNotifications(); } -void Frontend::sendRawNotification(String notification) +void Frontend::sendRawJSONNotification(String notification) { m_frontendChannel->sendProtocolNotification(InternalRawNotification::fromJSON(std::move(notification))); } -void Frontend::sendRawNotification(std::vector notification) +void Frontend::sendRawCBORNotification(std::vector notification) { m_frontendChannel->sendProtocolNotification(InternalRawNotification::fromBinary(std::move(notification))); } diff --git a/tools/inspector_protocol/templates/TypeBuilder_h.template b/tools/inspector_protocol/templates/TypeBuilder_h.template index c670d65c46..9d86d7a4ac 100644 --- a/tools/inspector_protocol/templates/TypeBuilder_h.template +++ b/tools/inspector_protocol/templates/TypeBuilder_h.template @@ -269,8 +269,8 @@ public: {% endfor %} void flush(); - void sendRawNotification(String); - void sendRawNotification(std::vector); + void sendRawJSONNotification(String); + void sendRawCBORNotification(std::vector); private: FrontendChannel* m_frontendChannel; }; -- cgit v1.2.3