diff options
author | James M Snell <jasnell@gmail.com> | 2016-05-31 11:52:19 -0700 |
---|---|---|
committer | James M Snell <jasnell@gmail.com> | 2016-10-11 12:41:42 -0700 |
commit | 4b312387ead4ba11146b28b8ac05ed385919c4af (patch) | |
tree | fd73b23a01d77c7024dc90402f1ec9cd9d2d479f /src | |
parent | 88323e874473d18cce22d6ae134a056919c457e4 (diff) | |
download | android-node-v8-4b312387ead4ba11146b28b8ac05ed385919c4af.tar.gz android-node-v8-4b312387ead4ba11146b28b8ac05ed385919c4af.tar.bz2 android-node-v8-4b312387ead4ba11146b28b8ac05ed385919c4af.zip |
url: adding WHATWG URL support
Implements WHATWG URL support. Example:
```
var u = new url.URL('http://example.org');
```
Currently passing all WHATWG url parsing tests and all but two of the
setter tests. The two setter tests are intentionally skipped for now
but will be revisited.
PR-URL: https://github.com/nodejs/node/pull/7448
Reviewed-By: Ilkka Myller <ilkka.myller@nodefield.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/node_i18n.cc | 12 | ||||
-rw-r--r-- | src/node_i18n.h | 7 | ||||
-rw-r--r-- | src/node_url.cc | 1406 | ||||
-rw-r--r-- | src/node_url.h | 538 |
4 files changed, 1957 insertions, 6 deletions
diff --git a/src/node_i18n.cc b/src/node_i18n.cc index 0f3b9b76e6..f89ae40a55 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -79,9 +79,9 @@ bool InitializeICUDirectory(const char* icu_data_path) { } } -static int32_t ToUnicode(MaybeStackBuffer<char>* buf, - const char* input, - size_t length) { +int32_t ToUnicode(MaybeStackBuffer<char>* buf, + const char* input, + size_t length) { UErrorCode status = U_ZERO_ERROR; uint32_t options = UIDNA_DEFAULT; options |= UIDNA_NONTRANSITIONAL_TO_UNICODE; @@ -113,9 +113,9 @@ static int32_t ToUnicode(MaybeStackBuffer<char>* buf, return len; } -static int32_t ToASCII(MaybeStackBuffer<char>* buf, - const char* input, - size_t length) { +int32_t ToASCII(MaybeStackBuffer<char>* buf, + const char* input, + size_t length) { UErrorCode status = U_ZERO_ERROR; uint32_t options = UIDNA_DEFAULT; options |= UIDNA_NONTRANSITIONAL_TO_ASCII; diff --git a/src/node_i18n.h b/src/node_i18n.h index 31ad18fa47..21a579526d 100644 --- a/src/node_i18n.h +++ b/src/node_i18n.h @@ -15,6 +15,13 @@ namespace i18n { bool InitializeICUDirectory(const char* icu_data_path); +int32_t ToASCII(MaybeStackBuffer<char>* buf, + const char* input, + size_t length); +int32_t ToUnicode(MaybeStackBuffer<char>* buf, + const char* input, + size_t length); + } // namespace i18n } // namespace node diff --git a/src/node_url.cc b/src/node_url.cc new file mode 100644 index 0000000000..f5b1a143f1 --- /dev/null +++ b/src/node_url.cc @@ -0,0 +1,1406 @@ +#include "node_url.h" +#include "node.h" +#include "node_internals.h" +#include "env.h" +#include "env-inl.h" +#include "util.h" +#include "util-inl.h" +#include "v8.h" +#include "base-object.h" +#include "base-object-inl.h" +#include "node_i18n.h" + +#include <string> +#include <vector> +#include <stdio.h> +#include <cmath> + +#if defined(NODE_HAVE_I18N_SUPPORT) +#include <unicode/utf8.h> +#include <unicode/utf.h> +#endif + +namespace node { + +using v8::Array; +using v8::Context; +using v8::Function; +using v8::FunctionCallbackInfo; +using v8::HandleScope; +using v8::Integer; +using v8::Isolate; +using v8::Local; +using v8::Null; +using v8::Object; +using v8::String; +using v8::Undefined; +using v8::Value; + +#define GET(env, obj, name) \ + obj->Get(env->context(), \ + OneByteString(env->isolate(), name)).ToLocalChecked() + +#define GET_AND_SET(env, obj, name, data, flag) \ + { \ + Local<Value> val = GET(env, obj, #name); \ + if (val->IsString()) { \ + Utf8Value value(env->isolate(), val.As<String>()); \ + data->name = *value; \ + data->flags |= flag; \ + } \ + } + +#define CANNOT_BE_BASE() url.flags |= URL_FLAGS_CANNOT_BE_BASE; +#define INVALID_PARSE_STATE() url.flags |= URL_FLAGS_INVALID_PARSE_STATE; +#define SPECIAL() \ + { \ + url.flags |= URL_FLAGS_SPECIAL; \ + special = true; \ + } +#define TERMINATE() \ + { \ + url.flags |= URL_FLAGS_TERMINATED; \ + goto done; \ + } +#define FAILED() \ + { \ + url.flags |= URL_FLAGS_FAILED; \ + goto done; \ + } + +#define CHECK_FLAG(flags, name) (flags & URL_FLAGS_##name) /* NOLINT */ + +#define IS_CANNOT_BE_BASE(flags) CHECK_FLAG(flags, CANNOT_BE_BASE) +#define IS_FAILED(flags) CHECK_FLAG(flags, FAILED) + +#define DOES_HAVE_SCHEME(url) CHECK_FLAG(url.flags, HAS_SCHEME) +#define DOES_HAVE_USERNAME(url) CHECK_FLAG(url.flags, HAS_USERNAME) +#define DOES_HAVE_PASSWORD(url) CHECK_FLAG(url.flags, HAS_PASSWORD) +#define DOES_HAVE_HOST(url) CHECK_FLAG(url.flags, HAS_HOST) +#define DOES_HAVE_PATH(url) CHECK_FLAG(url.flags, HAS_PATH) +#define DOES_HAVE_QUERY(url) CHECK_FLAG(url.flags, HAS_QUERY) +#define DOES_HAVE_FRAGMENT(url) CHECK_FLAG(url.flags, HAS_FRAGMENT) + +#define SET_HAVE_SCHEME() url.flags |= URL_FLAGS_HAS_SCHEME; +#define SET_HAVE_USERNAME() url.flags |= URL_FLAGS_HAS_USERNAME; +#define SET_HAVE_PASSWORD() url.flags |= URL_FLAGS_HAS_PASSWORD; +#define SET_HAVE_HOST() url.flags |= URL_FLAGS_HAS_HOST; +#define SET_HAVE_PATH() url.flags |= URL_FLAGS_HAS_PATH; +#define SET_HAVE_QUERY() url.flags |= URL_FLAGS_HAS_QUERY; +#define SET_HAVE_FRAGMENT() url.flags |= URL_FLAGS_HAS_FRAGMENT; + +#define UTF8STRING(isolate, str) \ + String::NewFromUtf8(isolate, str.c_str(), v8::NewStringType::kNormal) \ + .ToLocalChecked() + +namespace url { + +#if defined(NODE_HAVE_I18N_SUPPORT) + static int ToUnicode(std::string* input, std::string* output) { + MaybeStackBuffer<char> buf; + if (i18n::ToUnicode(&buf, input->c_str(), input->length()) < 0) + return -1; + output->assign(*buf, buf.length()); + return 0; + } + + static int ToASCII(std::string* input, std::string* output) { + MaybeStackBuffer<char> buf; + if (i18n::ToASCII(&buf, input->c_str(), input->length()) < 0) + return -1; + output->assign(*buf, buf.length()); + return 0; + } + + // Unfortunately there's not really a better way to do this. + // Iterate through each encoded codepoint and verify that + // it is a valid unicode codepoint. + static int IsValidUTF8(std::string* input) { + const char* p = input->c_str(); + int32_t len = input->length(); + for (int32_t i = 0; i < len;) { + UChar32 c; + U8_NEXT_UNSAFE(p, i, c); + if (!U_IS_UNICODE_CHAR(c)) + return -1; + } + return 0; + } +#else + // Intentional non-ops if ICU is not present. + static int ToUnicode(std::string* input, std::string* output) { + output->reserve(input.length()); + *output = input->c_str(); + } + + static int ToASCII(std::string* input, std::string* output) { + output->reserve(input.length()); + *output = input->c_str(); + } + + static int IsValidUTF8(std::string* input) { + return 0; + } +#endif + + static url_host_type ParseIPv6Host(url_host* host, + const char* input, + size_t length) { + url_host_type type = HOST_TYPE_FAILED; + for (unsigned n = 0; n < 8; n++) + host->value.ipv6[n] = 0; + uint16_t* piece_pointer = &host->value.ipv6[0]; + uint16_t* last_piece = piece_pointer + 8; + uint16_t* compress_pointer = nullptr; + const char* pointer = input; + const char* end = pointer + length; + unsigned value, len, swaps, dots; + char ch = pointer < end ? pointer[0] : kEOL; + if (ch == ':') { + if (length < 2 || pointer[1] != ':') + goto end; + pointer += 2; + ch = pointer < end ? pointer[0] : kEOL; + piece_pointer++; + compress_pointer = piece_pointer; + } + while (ch != kEOL) { + if (piece_pointer > last_piece) + goto end; + if (ch == ':') { + if (compress_pointer != nullptr) + goto end; + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + piece_pointer++; + compress_pointer = piece_pointer; + continue; + } + value = 0; + len = 0; + while (len < 4 && ASCII_HEX_DIGIT(ch)) { + value = value * 0x10 + hex2bin(ch); + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + len++; + } + switch (ch) { + case '.': + if (len == 0) + goto end; + pointer -= len; + ch = pointer < end ? pointer[0] : kEOL; + if (piece_pointer > last_piece - 2) + goto end; + dots = 0; + while (ch != kEOL) { + value = 0xffffffff; + if (!ASCII_DIGIT(ch)) + goto end; + while (ASCII_DIGIT(ch)) { + unsigned number = ch - '0'; + if (value == 0xffffffff) { + value = number; + } else if (value == 0) { + goto end; + } else { + value = value * 10 + number; + } + if (value > 255) + goto end; + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + } + if (dots < 3 && ch != '.') + goto end; + *piece_pointer = *piece_pointer * 0x100 + value; + if (dots & 0x1) + piece_pointer++; + if (ch != kEOL) { + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + } + if (dots == 3 && ch != kEOL) + goto end; + dots++; + } + continue; + case ':': + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + if (ch == kEOL) + goto end; + break; + case kEOL: + break; + default: + goto end; + } + *piece_pointer = value; + piece_pointer++; + } + + if (compress_pointer != nullptr) { + swaps = piece_pointer - compress_pointer; + piece_pointer = last_piece - 1; + while (piece_pointer != &host->value.ipv6[0] && swaps > 0) { + uint16_t temp = *piece_pointer; + uint16_t* swap_piece = compress_pointer + swaps - 1; + *piece_pointer = *swap_piece; + *swap_piece = temp; + piece_pointer--; + swaps--; + } + } else if (compress_pointer == nullptr && + piece_pointer != last_piece) { + goto end; + } + type = HOST_TYPE_IPV6; + end: + host->type = type; + return type; + } + + static inline int ParseNumber(const char* start, const char* end) { + unsigned R = 10; + if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') { + start += 2; + R = 16; + } + if (end - start == 0) { + return 0; + } else if (R == 10 && end - start > 1 && start[0] == '0') { + start++; + R = 8; + } + const char* p = start; + + while (p < end) { + const char ch = p[0]; + switch (R) { + case 8: + if (ch < '0' || ch > '7') + return -1; + break; + case 10: + if (!ASCII_DIGIT(ch)) + return -1; + break; + case 16: + if (!ASCII_HEX_DIGIT(ch)) + return -1; + break; + } + p++; + } + return strtol(start, NULL, R); + } + + static url_host_type ParseIPv4Host(url_host* host, + const char* input, + size_t length) { + url_host_type type = HOST_TYPE_DOMAIN; + const char* pointer = input; + const char* mark = input; + const char* end = pointer + length; + int parts = 0; + uint32_t val = 0; + unsigned numbers[4]; + if (length == 0) + goto end; + + while (pointer <= end) { + const char ch = pointer < end ? pointer[0] : kEOL; + const int remaining = end - pointer - 1; + if (ch == '.' || ch == kEOL) { + if (++parts > 4 || pointer - mark == 0) + break; + int n = ParseNumber(mark, pointer); + if (n < 0) { + type = HOST_TYPE_DOMAIN; + goto end; + } + if (pointer - mark == 10) { + numbers[parts - 1] = n; + break; + } + if (n > 255) { + type = HOST_TYPE_FAILED; + goto end; + } + numbers[parts - 1] = n; + mark = pointer + 1; + if (ch == '.' && remaining == 0) + break; + } + pointer++; + } + + type = HOST_TYPE_IPV4; + if (parts > 0) { + val = numbers[parts - 1]; + for (int n = 0; n < parts - 1; n++) { + double b = 3-n; + val += numbers[n] * pow(256, b); + } + } + + host->value.ipv4 = val; + end: + host->type = type; + return type; + } + + static url_host_type ParseHost(url_host* host, + const char* input, + size_t length, + bool unicode = false) { + url_host_type type = HOST_TYPE_FAILED; + const char* pointer = input; + std::string decoded; + + if (length == 0) + goto end; + + if (pointer[0] == '[') { + if (pointer[length - 1] != ']') + goto end; + return ParseIPv6Host(host, ++pointer, length - 2); + } + + // First, we have to percent decode + if (PercentDecode(input, length, &decoded) < 0) + goto end; + + // If there are any invalid UTF8 byte sequences, we have to fail. + // Unfortunately this means iterating through the string and checking + // each decoded codepoint. + if (IsValidUTF8(&decoded) < 0) + goto end; + + // Then we have to punycode toASCII + if (ToASCII(&decoded, &decoded) < 0) + goto end; + + // If any of the following characters are still present, we have to fail + for (size_t n = 0; n < decoded.size(); n++) { + const char ch = decoded[n]; + if (ch == 0x00 || ch == 0x09 || ch == 0x0a || ch == 0x0d || + ch == 0x20 || ch == '#' || ch == '%' || ch == '/' || + ch == '?' || ch == '@' || ch == '[' || ch == '\\' || + ch == ']') { + goto end; + } + } + + // Check to see if it's an IPv4 IP address + type = ParseIPv4Host(host, decoded.c_str(), decoded.length()); + if (type == HOST_TYPE_IPV4 || type == HOST_TYPE_FAILED) + goto end; + + // If the unicode flag is set, run the result through punycode ToUnicode + if (unicode && ToUnicode(&decoded, &decoded) < 0) + goto end; + + // It's not an IPv4 or IPv6 address, it must be a domain + type = HOST_TYPE_DOMAIN; + host->value.domain = decoded; + + end: + host->type = type; + return type; + } + + // Locates the longest sequence of 0 segments in an IPv6 address + // in order to use the :: compression when serializing + static inline uint16_t* FindLongestZeroSequence(uint16_t* values, + size_t len) { + uint16_t* start = values; + uint16_t* end = start + len; + uint16_t* result = nullptr; + + uint16_t* current = nullptr; + unsigned counter = 0, longest = 1; + + while (start < end) { + if (*start == 0) { + if (current == nullptr) + current = start; + counter++; + } else { + if (counter > longest) { + longest = counter; + result = current; + } + counter = 0; + current = nullptr; + } + start++; + } + if (counter > longest) + result = current; + return result; + } + + static url_host_type WriteHost(url_host* host, std::string* dest) { + dest->clear(); + switch (host->type) { + case HOST_TYPE_DOMAIN: + *dest = host->value.domain; + break; + case HOST_TYPE_IPV4: { + dest->reserve(15); + uint32_t value = host->value.ipv4; + for (int n = 0; n < 4; n++) { + char buf[4]; + char* buffer = buf; + snprintf(buffer, sizeof(buf), "%d", value % 256); + dest->insert(0, buf); + if (n < 3) + dest->insert(0, 1, '.'); + value /= 256; + } + break; + } + case HOST_TYPE_IPV6: { + dest->reserve(41); + *dest+= '['; + uint16_t* start = &host->value.ipv6[0]; + uint16_t* compress_pointer = + FindLongestZeroSequence(start, 8); + for (int n = 0; n <= 7; n++) { + uint16_t* piece = &host->value.ipv6[n]; + if (compress_pointer == piece) { + *dest += n == 0 ? "::" : ":"; + while (*piece == 0 && n < 8) { + n++; + piece = &host->value.ipv6[n]; + } + if (n == 8) + break; + } + char buf[5]; + char* buffer = buf; + snprintf(buffer, sizeof(buf), "%x", *piece); + *dest += buf; + if (n < 7) + *dest += ':'; + } + *dest += ']'; + break; + } + case HOST_TYPE_FAILED: + break; + } + return host->type; + } + + static int ParseHost(std::string* input, + std::string* output, + bool unicode = false) { + if (input->length() == 0) + return 0; + url_host host{{""}, HOST_TYPE_DOMAIN}; + ParseHost(&host, input->c_str(), input->length(), unicode); + if (host.type == HOST_TYPE_FAILED) + return -1; + WriteHost(&host, output); + return 0; + } + + static inline void Copy(Isolate* isolate, + Local<Array> ary, + std::vector<std::string>* vec) { + const int32_t len = ary->Length(); + if (len == 0) + return; // nothing to copy + vec->reserve(len); + for (int32_t n = 0; n < len; n++) { + Local<Value> val = ary->Get(n); + if (val->IsString()) { + Utf8Value value(isolate, val.As<String>()); + vec->push_back(std::string(*value, value.length())); + } + } + } + + static inline Local<Array> Copy(Isolate* isolate, + std::vector<std::string> vec) { + Local<Array> ary = Array::New(isolate, vec.size()); + for (size_t n = 0; n < vec.size(); n++) + ary->Set(n, UTF8STRING(isolate, vec[n])); + return ary; + } + + static inline void HarvestBase(Environment* env, + struct url_data* base, + Local<Object> base_obj) { + Local<Value> flags = GET(env, base_obj, "flags"); + if (flags->IsInt32()) + base->flags = flags->Int32Value(); + + GET_AND_SET(env, base_obj, scheme, base, URL_FLAGS_HAS_SCHEME); + GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME); + GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD); + GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST); + GET_AND_SET(env, base_obj, query, base, URL_FLAGS_HAS_QUERY); + GET_AND_SET(env, base_obj, fragment, base, URL_FLAGS_HAS_FRAGMENT); + Local<Value> port = GET(env, base_obj, "port"); + if (port->IsInt32()) + base->port = port->Int32Value(); + Local<Value> path = GET(env, base_obj, "path"); + if (path->IsArray()) { + base->flags |= URL_FLAGS_HAS_PATH; + Copy(env->isolate(), path.As<Array>(), &(base->path)); + } + } + + static inline void HarvestContext(Environment* env, + struct url_data* context, + Local<Object> context_obj) { + Local<Value> flags = GET(env, context_obj, "flags"); + if (flags->IsInt32()) { + int32_t _flags = flags->Int32Value(); + if (_flags & URL_FLAGS_SPECIAL) + context->flags |= URL_FLAGS_SPECIAL; + if (_flags & URL_FLAGS_CANNOT_BE_BASE) + context->flags |= URL_FLAGS_CANNOT_BE_BASE; + } + Local<Value> scheme = GET(env, context_obj, "scheme"); + if (scheme->IsString()) { + Utf8Value value(env->isolate(), scheme); + context->scheme.assign(*value, value.length()); + } + Local<Value> port = GET(env, context_obj, "port"); + if (port->IsInt32()) + context->port = port->Int32Value(); + } + + // Single dot segment can be ".", "%2e", or "%2E" + static inline bool IsSingleDotSegment(std::string str) { + switch (str.size()) { + case 1: + return str == "."; + case 3: + return str[0] == '%' && + str[1] == '2' && + TO_LOWER(str[2]) == 'e'; + default: + return false; + } + } + + // Double dot segment can be: + // "..", ".%2e", ".%2E", "%2e.", "%2E.", + // "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e" + static inline bool IsDoubleDotSegment(std::string str) { + switch (str.size()) { + case 2: + return str == ".."; + case 4: + if (str[0] != '.' && str[0] != '%') + return false; + return ((str[0] == '.' && + str[1] == '%' && + str[2] == '2' && + TO_LOWER(str[3]) == 'e') || + (str[0] == '%' && + str[1] == '2' && + TO_LOWER(str[2]) == 'e' && + str[3] == '.')); + case 6: + return (str[0] == '%' && + str[1] == '2' && + TO_LOWER(str[2]) == 'e' && + str[3] == '%' && + str[4] == '2' && + TO_LOWER(str[5]) == 'e'); + default: + return false; + } + } + + static void Parse(Environment* env, + Local<Value> recv, + const char* input, + const size_t len, + enum url_parse_state override, + Local<Object> base_obj, + Local<Object> context_obj, + Local<Function> cb) { + Isolate* isolate = env->isolate(); + Local<Context> context = env->context(); + HandleScope handle_scope(isolate); + Context::Scope context_scope(context); + + const bool has_base = base_obj->IsObject(); + bool atflag = false; + bool sbflag = false; + bool uflag = false; + bool base_is_file = false; + int wskip = 0; + + struct url_data base; + struct url_data url; + if (context_obj->IsObject()) + HarvestContext(env, &url, context_obj); + if (has_base) + HarvestBase(env, &base, base_obj); + + std::string buffer; + url.scheme.reserve(len); + url.username.reserve(len); + url.password.reserve(len); + url.host.reserve(len); + url.path.reserve(len); + url.query.reserve(len); + url.fragment.reserve(len); + buffer.reserve(len); + + // Set the initial parse state. + const bool state_override = override != kUnknownState; + enum url_parse_state state = state_override ? override : kSchemeStart; + + const char* p = input; + const char* end = input + len; + + if (state < kSchemeStart || state > kFragment) { + INVALID_PARSE_STATE(); + goto done; + } + + while (p <= end) { + const char ch = p < end ? p[0] : kEOL; + + if (TAB_AND_NEWLINE(ch)) { + if (state == kAuthority) { + // It's necessary to keep track of how much whitespace + // is being ignored when in kAuthority state because of + // how the buffer is managed. TODO: See if there's a better + // way + wskip++; + } + p++; + continue; + } + + bool special = url.flags & URL_FLAGS_SPECIAL; + const bool special_back_slash = (special && ch == '\\'); + switch (state) { + case kSchemeStart: + if (ASCII_ALPHA(ch)) { + buffer += TO_LOWER(ch); + state = kScheme; + } else if (!state_override) { + state = kNoScheme; + continue; + } else { + TERMINATE() + } + break; + case kScheme: + if (SCHEME_CHAR(ch)) { + buffer += TO_LOWER(ch); + p++; + continue; + } else if (ch == ':' || (state_override && ch == kEOL)) { + buffer += ':'; + if (buffer.size() > 0) { + SET_HAVE_SCHEME() + url.scheme = buffer; + } + if (IsSpecial(url.scheme)) { + SPECIAL() + } else { + url.flags &= ~URL_FLAGS_SPECIAL; + } + if (state_override) + goto done; + buffer.clear(); + if (url.scheme == "file:") { + state = kFile; + } else if (special && + has_base && + DOES_HAVE_SCHEME(base) && + url.scheme == base.scheme) { + state = kSpecialRelativeOrAuthority; + } else if (special) { + state = kSpecialAuthoritySlashes; + } else if (p[1] == '/') { + state = kPathOrAuthority; + p++; + } else { + CANNOT_BE_BASE() + SET_HAVE_PATH() + url.path.push_back(""); + state = kCannotBeBase; + } + } else if (!state_override) { + buffer.clear(); + state = kNoScheme; + p = input; + continue; + } else { + TERMINATE() + } + break; + case kNoScheme: + if (!has_base || (IS_CANNOT_BE_BASE(base.flags) && ch != '#')) { + FAILED() + } else if (IS_CANNOT_BE_BASE(base.flags) && ch == '#') { + SET_HAVE_SCHEME() + url.scheme = base.scheme; + if (IsSpecial(url.scheme)) { + SPECIAL() + } else { + url.flags &= ~URL_FLAGS_SPECIAL; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + if (DOES_HAVE_QUERY(base)) { + SET_HAVE_QUERY() + url.query = base.query; + } + if (DOES_HAVE_FRAGMENT(base)) { + SET_HAVE_FRAGMENT() + url.fragment = base.fragment; + } + CANNOT_BE_BASE() + state = kFragment; + } else if (has_base && + DOES_HAVE_SCHEME(base) && + base.scheme != "file:") { + state = kRelative; + continue; + } else { + SET_HAVE_SCHEME() + url.scheme = "file:"; + SPECIAL() + state = kFile; + continue; + } + break; + case kSpecialRelativeOrAuthority: + if (ch == '/' && p[1] == '/') { + state = kSpecialAuthorityIgnoreSlashes; + p++; + } else { + state = kRelative; + continue; + } + break; + case kPathOrAuthority: + if (ch == '/') { + state = kAuthority; + } else { + state = kPath; + continue; + } + break; + case kRelative: + SET_HAVE_SCHEME() + url.scheme = base.scheme; + if (IsSpecial(url.scheme)) { + SPECIAL() + } else { + url.flags &= ~URL_FLAGS_SPECIAL; + } + switch (ch) { + case kEOL: + if (DOES_HAVE_USERNAME(base)) { + SET_HAVE_USERNAME() + url.username = base.username; + } + if (DOES_HAVE_PASSWORD(base)) { + SET_HAVE_PASSWORD() + url.password = base.password; + } + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_QUERY(base)) { + SET_HAVE_QUERY() + url.query = base.query; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + url.port = base.port; + break; + case '/': + state = kRelativeSlash; + break; + case '?': + if (DOES_HAVE_USERNAME(base)) { + SET_HAVE_USERNAME() + url.username = base.username; + } + if (DOES_HAVE_PASSWORD(base)) { + SET_HAVE_PASSWORD() + url.password = base.password; + } + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + url.port = base.port; + state = kQuery; + break; + case '#': + if (DOES_HAVE_USERNAME(base)) { + SET_HAVE_USERNAME() + url.username = base.username; + } + if (DOES_HAVE_PASSWORD(base)) { + SET_HAVE_PASSWORD() + url.password = base.password; + } + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_QUERY(base)) { + SET_HAVE_QUERY() + url.query = base.query; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + url.port = base.port; + state = kFragment; + break; + default: + if (special_back_slash) { + state = kRelativeSlash; + } else { + if (DOES_HAVE_USERNAME(base)) { + SET_HAVE_USERNAME() + url.username = base.username; + } + if (DOES_HAVE_PASSWORD(base)) { + SET_HAVE_PASSWORD() + url.password = base.password; + } + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + if (!url.path.empty()) + url.path.pop_back(); + } + url.port = base.port; + state = kPath; + continue; + } + } + break; + case kRelativeSlash: + if (ch == '/' || special_back_slash) { + state = kSpecialAuthorityIgnoreSlashes; + } else { + if (DOES_HAVE_USERNAME(base)) { + SET_HAVE_USERNAME() + url.username = base.username; + } + if (DOES_HAVE_PASSWORD(base)) { + SET_HAVE_PASSWORD() + url.password = base.password; + } + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + url.port = base.port; + state = kPath; + continue; + } + break; + case kSpecialAuthoritySlashes: + state = kSpecialAuthorityIgnoreSlashes; + if (ch == '/' && p[1] == '/') { + p++; + } else { + continue; + } + break; + case kSpecialAuthorityIgnoreSlashes: + if (ch != '/' && ch != '\\') { + state = kAuthority; + continue; + } + break; + case kAuthority: + if (ch == '@') { + if (atflag) { + buffer.reserve(buffer.size() + 3); + buffer.insert(0, "%40"); + } + atflag = true; + const size_t blen = buffer.size(); + if (blen > 0 && buffer[0] != ':') { + SET_HAVE_USERNAME() + } + for (size_t n = 0; n < blen; n++) { + const char bch = buffer[n]; + if (bch == ':') { + SET_HAVE_PASSWORD() + if (!uflag) { + uflag = true; + continue; + } + } + if (uflag) { + AppendOrEscape(&url.password, bch, UserinfoEncodeSet); + } else { + AppendOrEscape(&url.username, bch, UserinfoEncodeSet); + } + } + buffer.clear(); + } else if (ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || + special_back_slash) { + p -= buffer.size() + 1 + wskip; + buffer.clear(); + state = kHost; + } else { + buffer += ch; + } + break; + case kHost: + case kHostname: + if (ch == ':' && !sbflag) { + if (special && buffer.size() == 0) + FAILED() + SET_HAVE_HOST() + if (ParseHost(&buffer, &url.host) < 0) + FAILED() + buffer.clear(); + state = kPort; + if (override == kHostname) + TERMINATE() + } else if (ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || + special_back_slash) { + p--; + if (special && buffer.size() == 0) + FAILED() + SET_HAVE_HOST() + if (ParseHost(&buffer, &url.host) < 0) + FAILED() + buffer.clear(); + state = kPathStart; + if (state_override) + TERMINATE() + } else { + if (ch == '[') + sbflag = true; + if (ch == ']') + sbflag = false; + buffer += TO_LOWER(ch); + } + break; + case kPort: + if (ASCII_DIGIT(ch)) { + buffer += ch; + } else if (state_override || + ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || + special_back_slash) { + if (buffer.size() > 0) { + int port = 0; + for (size_t i = 0; i < buffer.size(); i++) + port = port * 10 + buffer[i] - '0'; + if (port >= 0 && port <= 0xffff) { + url.port = NormalizePort(url.scheme, port); + } else if (!state_override) { + FAILED() + } + buffer.clear(); + } + state = kPathStart; + continue; + } else { + FAILED(); + } + break; + case kFile: + base_is_file = ( + has_base && + DOES_HAVE_SCHEME(base) && + base.scheme == "file:"); + switch (ch) { + case kEOL: + if (base_is_file) { + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + if (DOES_HAVE_QUERY(base)) { + SET_HAVE_QUERY() + url.query = base.query; + } + } + break; + case '\\': + case '/': + state = kFileSlash; + break; + case '?': + if (base_is_file) { + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + SET_HAVE_QUERY() + state = kQuery; + } + break; + case '#': + if (base_is_file) { + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + if (DOES_HAVE_QUERY(base)) { + SET_HAVE_QUERY() + url.query = base.query; + } + state = kFragment; + } + break; + default: + if (base_is_file && + (!WINDOWS_DRIVE_LETTER(ch, p[1]) || + end - p == 1 || + (p[2] != '/' && + p[2] != '\\' && + p[2] != '?' && + p[2] != '#'))) { + if (DOES_HAVE_HOST(base)) { + SET_HAVE_HOST() + url.host = base.host; + } + if (DOES_HAVE_PATH(base)) { + SET_HAVE_PATH() + url.path = base.path; + } + if (!url.path.empty()) + url.path.pop_back(); + } + state = kPath; + continue; + } + break; + case kFileSlash: + if (ch == '/' || ch == '\\') { + state = kFileHost; + } else { + if (has_base && + DOES_HAVE_SCHEME(base) && + base.scheme == "file:" && + DOES_HAVE_PATH(base) && + base.path.size() > 0 && + NORMALIZED_WINDOWS_DRIVE_LETTER(base.path[0])) { + SET_HAVE_PATH() + url.path.push_back(base.path[0]); + } + state = kPath; + continue; + } + break; + case kFileHost: + if (ch == kEOL || + ch == '/' || + ch == '\\' || + ch == '?' || + ch == '#') { + if (buffer.size() == 2 && + WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { + state = kPath; + } else if (buffer.size() == 0) { + state = kPathStart; + } else { + if (buffer != "localhost") { + SET_HAVE_HOST() + if (ParseHost(&buffer, &url.host) < 0) + FAILED() + } + buffer.clear(); + state = kPathStart; + } + continue; + } else { + buffer += ch; + } + break; + case kPathStart: + state = kPath; + if (ch != '/' && !special_back_slash) + continue; + break; + case kPath: + if (ch == kEOL || + ch == '/' || + special_back_slash || + (!state_override && (ch == '?' || ch == '#'))) { + if (IsDoubleDotSegment(buffer)) { + if (!url.path.empty()) + url.path.pop_back(); + if (ch != '/' && !special_back_slash) { + SET_HAVE_PATH() + url.path.push_back(""); + } + } else if (IsSingleDotSegment(buffer)) { + if (ch != '/' && !special_back_slash) { + SET_HAVE_PATH(); + url.path.push_back(""); + } + } else { + if (DOES_HAVE_SCHEME(url) && + url.scheme == "file:" && + url.path.empty() && + buffer.size() == 2 && + WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { + url.flags &= ~URL_FLAGS_HAS_HOST; + buffer[1] = ':'; + } + SET_HAVE_PATH() + std::string segment(buffer.c_str(), buffer.size()); + url.path.push_back(segment); + } + buffer.clear(); + if (ch == '?') { + SET_HAVE_QUERY() + state = kQuery; + } else if (ch == '#') { + state = kFragment; + } + } else { + if (ch == '%' && p[1] == '2' && TO_LOWER(p[2]) == 'e') { + buffer += '.'; + p += 2; + } else { + AppendOrEscape(&buffer, ch, DefaultEncodeSet); + } + } + break; + case kCannotBeBase: + switch (ch) { + case '?': + state = kQuery; + break; + case '#': + state = kFragment; + break; + default: + if (url.path.size() == 0) + url.path.push_back(""); + if (url.path.size() > 0 && ch != kEOL) + AppendOrEscape(&url.path[0], ch, SimpleEncodeSet); + } + break; + case kQuery: + if (ch == kEOL || (!state_override && ch == '#')) { + SET_HAVE_QUERY() + url.query = buffer; + buffer.clear(); + if (ch == '#') + state = kFragment; + } else { + AppendOrEscape(&buffer, ch, QueryEncodeSet); + } + break; + case kFragment: + switch (ch) { + case kEOL: + SET_HAVE_FRAGMENT() + url.fragment = buffer; + break; + case 0: + break; + default: + buffer += ch; + } + break; + default: + INVALID_PARSE_STATE() + goto done; + } + + p++; + } + + done: + + // Define the return value placeholders + const Local<Value> undef = Undefined(isolate); + Local<Value> argv[9] = { + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + }; + + argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); + if (!IS_FAILED(url.flags)) { + if (DOES_HAVE_SCHEME(url)) + argv[ARG_PROTOCOL] = OneByteString(isolate, url.scheme.c_str()); + if (DOES_HAVE_USERNAME(url)) + argv[ARG_USERNAME] = UTF8STRING(isolate, url.username); + if (DOES_HAVE_PASSWORD(url)) + argv[ARG_PASSWORD] = UTF8STRING(isolate, url.password); + if (DOES_HAVE_HOST(url)) + argv[ARG_HOST] = UTF8STRING(isolate, url.host); + if (DOES_HAVE_QUERY(url)) + argv[ARG_QUERY] = UTF8STRING(isolate, url.query); + if (DOES_HAVE_FRAGMENT(url)) + argv[ARG_FRAGMENT] = UTF8STRING(isolate, url.fragment); + if (url.port > -1) + argv[ARG_PORT] = Integer::New(isolate, url.port); + if (DOES_HAVE_PATH(url)) + argv[ARG_PATH] = Copy(isolate, url.path); + } + + cb->Call(context, recv, 9, argv); + } + + static void Parse(const FunctionCallbackInfo<Value>& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 5); + CHECK(args[0]->IsString()); + CHECK(args[2]->IsUndefined() || + args[2]->IsNull() || + args[2]->IsObject()); + CHECK(args[3]->IsUndefined() || + args[3]->IsNull() || + args[3]->IsObject()); + CHECK(args[4]->IsFunction()); + Utf8Value input(env->isolate(), args[0]); + enum url_parse_state override = kUnknownState; + if (args[1]->IsNumber()) + override = (enum url_parse_state)(args[1]->Uint32Value()); + + Parse(env, args.This(), + *input, input.length(), + override, + args[2].As<Object>(), + args[3].As<Object>(), + args[4].As<Function>()); + } + + static void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + Utf8Value value(env->isolate(), args[0]); + std::string output; + const size_t len = value.length(); + output.reserve(len); + for (size_t n = 0; n < len; n++) { + const char ch = (*value)[n]; + AppendOrEscape(&output, ch, UserinfoEncodeSet); + } + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), + output.c_str(), + v8::NewStringType::kNormal).ToLocalChecked()); + } + + static void DomainToASCII(const FunctionCallbackInfo<Value>& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + Utf8Value value(env->isolate(), args[0]); + + url_host host{{""}, HOST_TYPE_DOMAIN}; + ParseHost(&host, *value, value.length()); + if (host.type == HOST_TYPE_FAILED) { + args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); + return; + } + std::string out; + WriteHost(&host, &out); + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), + out.c_str(), + v8::NewStringType::kNormal).ToLocalChecked()); + } + + static void DomainToUnicode(const FunctionCallbackInfo<Value>& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + Utf8Value value(env->isolate(), args[0]); + + url_host host{{""}, HOST_TYPE_DOMAIN}; + ParseHost(&host, *value, value.length(), true); + if (host.type == HOST_TYPE_FAILED) { + args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); + return; + } + std::string out; + WriteHost(&host, &out); + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), + out.c_str(), + v8::NewStringType::kNormal).ToLocalChecked()); + } + + static void Init(Local<Object> target, + Local<Value> unused, + Local<Context> context, + void* priv) { + Environment* env = Environment::GetCurrent(context); + env->SetMethod(target, "parse", Parse); + env->SetMethod(target, "encodeAuth", EncodeAuthSet); + env->SetMethod(target, "domainToASCII", DomainToASCII); + env->SetMethod(target, "domainToUnicode", DomainToUnicode); + +#define XX(name, _) NODE_DEFINE_CONSTANT(target, name); + FLAGS(XX) +#undef XX + +#define XX(name) NODE_DEFINE_CONSTANT(target, name); + ARGS(XX) + PARSESTATES(XX) +#undef XX + } +} // namespace url +} // namespace node + +NODE_MODULE_CONTEXT_AWARE_BUILTIN(url, node::url::Init) diff --git a/src/node_url.h b/src/node_url.h new file mode 100644 index 0000000000..198c29938b --- /dev/null +++ b/src/node_url.h @@ -0,0 +1,538 @@ +#ifndef SRC_NODE_URL_H_ +#define SRC_NODE_URL_H_ + +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#include "node.h" +#include <string> + +namespace node { +namespace url { + +#define BIT_AT(a, i) \ + (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ + (1 << ((unsigned int) (i) & 7)))) +#define TAB_AND_NEWLINE(ch) \ + (ch == 0x09 || ch == 0x0a || ch == 0x0d) +#define ASCII_DIGIT(ch) \ + (ch >= 0x30 && ch <= 0x39) +#define ASCII_HEX_DIGIT(ch) \ + (ASCII_DIGIT(ch) || (ch >= 0x41 && ch <= 0x46) || (ch >= 0x61 && ch <= 0x66)) +#define ASCII_ALPHA(ch) \ + ((ch >= 0x41 && ch <= 0x5a) || (ch >= 0x61 && ch <= 0x7a)) +#define ASCII_ALPHANUMERIC(ch) \ + (ASCII_DIGIT(ch) || ASCII_ALPHA(ch)) +#define TO_LOWER(ch) \ + (ASCII_ALPHA(ch) ? (ch | 0x20) : ch) +#define SCHEME_CHAR(ch) \ + (ASCII_ALPHANUMERIC(ch) || ch == '+' || ch == '-' || ch == '.') +#define WINDOWS_DRIVE_LETTER(ch, next) \ + (ASCII_ALPHA(ch) && (next == ':' || next == '|')) +#define NORMALIZED_WINDOWS_DRIVE_LETTER(str) \ + (str.length() == 2 && \ + ASCII_ALPHA(str[0]) && \ + str[1] == ':') + +static const char* hex[256] = { + "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", + "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", + "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", + "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", + "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27", + "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F", + "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37", + "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F", + "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47", + "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F", + "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57", + "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F", + "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67", + "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F", + "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77", + "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F", + "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", + "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", + "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", + "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", + "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", + "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", + "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", + "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", + "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", + "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", + "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", + "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", + "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", + "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", + "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", + "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" +}; + +static const uint8_t SIMPLE_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +static const uint8_t DEFAULT_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +static const uint8_t USERINFO_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 40 41 42 43 44 45 46 47 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +static const uint8_t QUERY_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +// Must return true if the character is to be percent-encoded +typedef bool (*must_escape_cb)(const unsigned char ch); + +// Appends ch to str. If test(ch) returns true, the ch will +// be percent-encoded then appended. +static inline void AppendOrEscape(std::string* str, + const unsigned char ch, + must_escape_cb test) { + if (test(ch)) + *str += hex[ch]; + else + *str += ch; +} + +static inline bool SimpleEncodeSet(const unsigned char ch) { + return BIT_AT(SIMPLE_ENCODE_SET, ch); +} + +static inline bool DefaultEncodeSet(const unsigned char ch) { + return BIT_AT(DEFAULT_ENCODE_SET, ch); +} + +static inline bool UserinfoEncodeSet(const unsigned char ch) { + return BIT_AT(USERINFO_ENCODE_SET, ch); +} + +static inline bool QueryEncodeSet(const unsigned char ch) { + return BIT_AT(QUERY_ENCODE_SET, ch); +} + +static inline unsigned hex2bin(const char ch) { + if (ch >= '0' && ch <= '9') + return ch - '0'; + if (ch >= 'A' && ch <= 'F') + return 10 + (ch - 'A'); + if (ch >= 'a' && ch <= 'f') + return 10 + (ch - 'a'); + return static_cast<unsigned>(-1); +} + +static inline int PercentDecode(const char* input, + size_t len, + std::string* dest) { + if (len == 0) + return 0; + dest->reserve(len); + const char* pointer = input; + const char* end = input + len; + size_t remaining = pointer - end - 1; + while (pointer < end) { + const char ch = pointer[0]; + remaining = (end - pointer) + 1; + if (ch != '%' || remaining < 2 || + (ch == '%' && + (!ASCII_HEX_DIGIT(pointer[1]) || + !ASCII_HEX_DIGIT(pointer[2])))) { + *dest += ch; + pointer++; + continue; + } else { + unsigned a = hex2bin(pointer[1]); + unsigned b = hex2bin(pointer[2]); + char c = static_cast<char>(a * 16 + b); + *dest += static_cast<char>(c); + pointer += 3; + } + } + return 0; +} + +#define SPECIALS(XX) \ + XX("ftp:", 21) \ + XX("file:", -1) \ + XX("gopher:", 70) \ + XX("http:", 80) \ + XX("https:", 443) \ + XX("ws:", 80) \ + XX("wss:", 443) + +#define PARSESTATES(XX) \ + XX(kSchemeStart) \ + XX(kScheme) \ + XX(kNoScheme) \ + XX(kSpecialRelativeOrAuthority) \ + XX(kPathOrAuthority) \ + XX(kRelative) \ + XX(kRelativeSlash) \ + XX(kSpecialAuthoritySlashes) \ + XX(kSpecialAuthorityIgnoreSlashes) \ + XX(kAuthority) \ + XX(kHost) \ + XX(kHostname) \ + XX(kPort) \ + XX(kFile) \ + XX(kFileSlash) \ + XX(kFileHost) \ + XX(kPathStart) \ + XX(kPath) \ + XX(kCannotBeBase) \ + XX(kQuery) \ + XX(kFragment) + +#define FLAGS(XX) \ + XX(URL_FLAGS_NONE, 0) \ + XX(URL_FLAGS_FAILED, 0x01) \ + XX(URL_FLAGS_CANNOT_BE_BASE, 0x02) \ + XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \ + XX(URL_FLAGS_TERMINATED, 0x08) \ + XX(URL_FLAGS_SPECIAL, 0x10) \ + XX(URL_FLAGS_HAS_SCHEME, 0x20) \ + XX(URL_FLAGS_HAS_USERNAME, 0x40) \ + XX(URL_FLAGS_HAS_PASSWORD, 0x80) \ + XX(URL_FLAGS_HAS_HOST, 0x100) \ + XX(URL_FLAGS_HAS_PATH, 0x200) \ + XX(URL_FLAGS_HAS_QUERY, 0x400) \ + XX(URL_FLAGS_HAS_FRAGMENT, 0x800) + +#define ARGS(XX) \ + XX(ARG_FLAGS) \ + XX(ARG_PROTOCOL) \ + XX(ARG_USERNAME) \ + XX(ARG_PASSWORD) \ + XX(ARG_HOST) \ + XX(ARG_PORT) \ + XX(ARG_PATH) \ + XX(ARG_QUERY) \ + XX(ARG_FRAGMENT) + +static const char kEOL = -1; + +enum url_parse_state { + kUnknownState = -1, +#define XX(name) name, + PARSESTATES(XX) +#undef XX +} url_parse_state; + +enum url_flags { +#define XX(name, val) name = val, + FLAGS(XX) +#undef XX +} url_flags; + +enum url_cb_args { +#define XX(name) name, + ARGS(XX) +#undef XX +} url_cb_args; + +static inline bool IsSpecial(std::string scheme) { +#define XX(name, _) if (scheme == name) return true; + SPECIALS(XX); +#undef XX + return false; +} + +static inline int NormalizePort(std::string scheme, int p) { +#define XX(name, port) if (scheme == name && p == port) return -1; + SPECIALS(XX); +#undef XX + return p; +} + +struct url_data { + int32_t flags = URL_FLAGS_NONE; + int port = -1; + std::string scheme; + std::string username; + std::string password; + std::string host; + std::string query; + std::string fragment; + std::vector<std::string> path; +}; + +union url_host_value { + std::string domain; + uint32_t ipv4; + uint16_t ipv6[8]; + ~url_host_value() {} +}; + +enum url_host_type { + HOST_TYPE_FAILED = -1, + HOST_TYPE_DOMAIN = 0, + HOST_TYPE_IPV4 = 1, + HOST_TYPE_IPV6 = 2 +}; + +struct url_host { + url_host_value value; + enum url_host_type type; +}; +} // namespace url + +} // namespace node + +#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#endif // SRC_NODE_URL_H_ |