summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJames M Snell <jasnell@gmail.com>2016-05-31 11:52:19 -0700
committerJames M Snell <jasnell@gmail.com>2016-10-11 12:41:42 -0700
commit4b312387ead4ba11146b28b8ac05ed385919c4af (patch)
treefd73b23a01d77c7024dc90402f1ec9cd9d2d479f /src
parent88323e874473d18cce22d6ae134a056919c457e4 (diff)
downloadandroid-node-v8-4b312387ead4ba11146b28b8ac05ed385919c4af.tar.gz
android-node-v8-4b312387ead4ba11146b28b8ac05ed385919c4af.tar.bz2
android-node-v8-4b312387ead4ba11146b28b8ac05ed385919c4af.zip
url: adding WHATWG URL support
Implements WHATWG URL support. Example: ``` var u = new url.URL('http://example.org'); ``` Currently passing all WHATWG url parsing tests and all but two of the setter tests. The two setter tests are intentionally skipped for now but will be revisited. PR-URL: https://github.com/nodejs/node/pull/7448 Reviewed-By: Ilkka Myller <ilkka.myller@nodefield.com>
Diffstat (limited to 'src')
-rw-r--r--src/node_i18n.cc12
-rw-r--r--src/node_i18n.h7
-rw-r--r--src/node_url.cc1406
-rw-r--r--src/node_url.h538
4 files changed, 1957 insertions, 6 deletions
diff --git a/src/node_i18n.cc b/src/node_i18n.cc
index 0f3b9b76e6..f89ae40a55 100644
--- a/src/node_i18n.cc
+++ b/src/node_i18n.cc
@@ -79,9 +79,9 @@ bool InitializeICUDirectory(const char* icu_data_path) {
}
}
-static int32_t ToUnicode(MaybeStackBuffer<char>* buf,
- const char* input,
- size_t length) {
+int32_t ToUnicode(MaybeStackBuffer<char>* buf,
+ const char* input,
+ size_t length) {
UErrorCode status = U_ZERO_ERROR;
uint32_t options = UIDNA_DEFAULT;
options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
@@ -113,9 +113,9 @@ static int32_t ToUnicode(MaybeStackBuffer<char>* buf,
return len;
}
-static int32_t ToASCII(MaybeStackBuffer<char>* buf,
- const char* input,
- size_t length) {
+int32_t ToASCII(MaybeStackBuffer<char>* buf,
+ const char* input,
+ size_t length) {
UErrorCode status = U_ZERO_ERROR;
uint32_t options = UIDNA_DEFAULT;
options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
diff --git a/src/node_i18n.h b/src/node_i18n.h
index 31ad18fa47..21a579526d 100644
--- a/src/node_i18n.h
+++ b/src/node_i18n.h
@@ -15,6 +15,13 @@ namespace i18n {
bool InitializeICUDirectory(const char* icu_data_path);
+int32_t ToASCII(MaybeStackBuffer<char>* buf,
+ const char* input,
+ size_t length);
+int32_t ToUnicode(MaybeStackBuffer<char>* buf,
+ const char* input,
+ size_t length);
+
} // namespace i18n
} // namespace node
diff --git a/src/node_url.cc b/src/node_url.cc
new file mode 100644
index 0000000000..f5b1a143f1
--- /dev/null
+++ b/src/node_url.cc
@@ -0,0 +1,1406 @@
+#include "node_url.h"
+#include "node.h"
+#include "node_internals.h"
+#include "env.h"
+#include "env-inl.h"
+#include "util.h"
+#include "util-inl.h"
+#include "v8.h"
+#include "base-object.h"
+#include "base-object-inl.h"
+#include "node_i18n.h"
+
+#include <string>
+#include <vector>
+#include <stdio.h>
+#include <cmath>
+
+#if defined(NODE_HAVE_I18N_SUPPORT)
+#include <unicode/utf8.h>
+#include <unicode/utf.h>
+#endif
+
+namespace node {
+
+using v8::Array;
+using v8::Context;
+using v8::Function;
+using v8::FunctionCallbackInfo;
+using v8::HandleScope;
+using v8::Integer;
+using v8::Isolate;
+using v8::Local;
+using v8::Null;
+using v8::Object;
+using v8::String;
+using v8::Undefined;
+using v8::Value;
+
+#define GET(env, obj, name) \
+ obj->Get(env->context(), \
+ OneByteString(env->isolate(), name)).ToLocalChecked()
+
+#define GET_AND_SET(env, obj, name, data, flag) \
+ { \
+ Local<Value> val = GET(env, obj, #name); \
+ if (val->IsString()) { \
+ Utf8Value value(env->isolate(), val.As<String>()); \
+ data->name = *value; \
+ data->flags |= flag; \
+ } \
+ }
+
+#define CANNOT_BE_BASE() url.flags |= URL_FLAGS_CANNOT_BE_BASE;
+#define INVALID_PARSE_STATE() url.flags |= URL_FLAGS_INVALID_PARSE_STATE;
+#define SPECIAL() \
+ { \
+ url.flags |= URL_FLAGS_SPECIAL; \
+ special = true; \
+ }
+#define TERMINATE() \
+ { \
+ url.flags |= URL_FLAGS_TERMINATED; \
+ goto done; \
+ }
+#define FAILED() \
+ { \
+ url.flags |= URL_FLAGS_FAILED; \
+ goto done; \
+ }
+
+#define CHECK_FLAG(flags, name) (flags & URL_FLAGS_##name) /* NOLINT */
+
+#define IS_CANNOT_BE_BASE(flags) CHECK_FLAG(flags, CANNOT_BE_BASE)
+#define IS_FAILED(flags) CHECK_FLAG(flags, FAILED)
+
+#define DOES_HAVE_SCHEME(url) CHECK_FLAG(url.flags, HAS_SCHEME)
+#define DOES_HAVE_USERNAME(url) CHECK_FLAG(url.flags, HAS_USERNAME)
+#define DOES_HAVE_PASSWORD(url) CHECK_FLAG(url.flags, HAS_PASSWORD)
+#define DOES_HAVE_HOST(url) CHECK_FLAG(url.flags, HAS_HOST)
+#define DOES_HAVE_PATH(url) CHECK_FLAG(url.flags, HAS_PATH)
+#define DOES_HAVE_QUERY(url) CHECK_FLAG(url.flags, HAS_QUERY)
+#define DOES_HAVE_FRAGMENT(url) CHECK_FLAG(url.flags, HAS_FRAGMENT)
+
+#define SET_HAVE_SCHEME() url.flags |= URL_FLAGS_HAS_SCHEME;
+#define SET_HAVE_USERNAME() url.flags |= URL_FLAGS_HAS_USERNAME;
+#define SET_HAVE_PASSWORD() url.flags |= URL_FLAGS_HAS_PASSWORD;
+#define SET_HAVE_HOST() url.flags |= URL_FLAGS_HAS_HOST;
+#define SET_HAVE_PATH() url.flags |= URL_FLAGS_HAS_PATH;
+#define SET_HAVE_QUERY() url.flags |= URL_FLAGS_HAS_QUERY;
+#define SET_HAVE_FRAGMENT() url.flags |= URL_FLAGS_HAS_FRAGMENT;
+
+#define UTF8STRING(isolate, str) \
+ String::NewFromUtf8(isolate, str.c_str(), v8::NewStringType::kNormal) \
+ .ToLocalChecked()
+
+namespace url {
+
+#if defined(NODE_HAVE_I18N_SUPPORT)
+ static int ToUnicode(std::string* input, std::string* output) {
+ MaybeStackBuffer<char> buf;
+ if (i18n::ToUnicode(&buf, input->c_str(), input->length()) < 0)
+ return -1;
+ output->assign(*buf, buf.length());
+ return 0;
+ }
+
+ static int ToASCII(std::string* input, std::string* output) {
+ MaybeStackBuffer<char> buf;
+ if (i18n::ToASCII(&buf, input->c_str(), input->length()) < 0)
+ return -1;
+ output->assign(*buf, buf.length());
+ return 0;
+ }
+
+ // Unfortunately there's not really a better way to do this.
+ // Iterate through each encoded codepoint and verify that
+ // it is a valid unicode codepoint.
+ static int IsValidUTF8(std::string* input) {
+ const char* p = input->c_str();
+ int32_t len = input->length();
+ for (int32_t i = 0; i < len;) {
+ UChar32 c;
+ U8_NEXT_UNSAFE(p, i, c);
+ if (!U_IS_UNICODE_CHAR(c))
+ return -1;
+ }
+ return 0;
+ }
+#else
+ // Intentional non-ops if ICU is not present.
+ static int ToUnicode(std::string* input, std::string* output) {
+ output->reserve(input.length());
+ *output = input->c_str();
+ }
+
+ static int ToASCII(std::string* input, std::string* output) {
+ output->reserve(input.length());
+ *output = input->c_str();
+ }
+
+ static int IsValidUTF8(std::string* input) {
+ return 0;
+ }
+#endif
+
+ static url_host_type ParseIPv6Host(url_host* host,
+ const char* input,
+ size_t length) {
+ url_host_type type = HOST_TYPE_FAILED;
+ for (unsigned n = 0; n < 8; n++)
+ host->value.ipv6[n] = 0;
+ uint16_t* piece_pointer = &host->value.ipv6[0];
+ uint16_t* last_piece = piece_pointer + 8;
+ uint16_t* compress_pointer = nullptr;
+ const char* pointer = input;
+ const char* end = pointer + length;
+ unsigned value, len, swaps, dots;
+ char ch = pointer < end ? pointer[0] : kEOL;
+ if (ch == ':') {
+ if (length < 2 || pointer[1] != ':')
+ goto end;
+ pointer += 2;
+ ch = pointer < end ? pointer[0] : kEOL;
+ piece_pointer++;
+ compress_pointer = piece_pointer;
+ }
+ while (ch != kEOL) {
+ if (piece_pointer > last_piece)
+ goto end;
+ if (ch == ':') {
+ if (compress_pointer != nullptr)
+ goto end;
+ pointer++;
+ ch = pointer < end ? pointer[0] : kEOL;
+ piece_pointer++;
+ compress_pointer = piece_pointer;
+ continue;
+ }
+ value = 0;
+ len = 0;
+ while (len < 4 && ASCII_HEX_DIGIT(ch)) {
+ value = value * 0x10 + hex2bin(ch);
+ pointer++;
+ ch = pointer < end ? pointer[0] : kEOL;
+ len++;
+ }
+ switch (ch) {
+ case '.':
+ if (len == 0)
+ goto end;
+ pointer -= len;
+ ch = pointer < end ? pointer[0] : kEOL;
+ if (piece_pointer > last_piece - 2)
+ goto end;
+ dots = 0;
+ while (ch != kEOL) {
+ value = 0xffffffff;
+ if (!ASCII_DIGIT(ch))
+ goto end;
+ while (ASCII_DIGIT(ch)) {
+ unsigned number = ch - '0';
+ if (value == 0xffffffff) {
+ value = number;
+ } else if (value == 0) {
+ goto end;
+ } else {
+ value = value * 10 + number;
+ }
+ if (value > 255)
+ goto end;
+ pointer++;
+ ch = pointer < end ? pointer[0] : kEOL;
+ }
+ if (dots < 3 && ch != '.')
+ goto end;
+ *piece_pointer = *piece_pointer * 0x100 + value;
+ if (dots & 0x1)
+ piece_pointer++;
+ if (ch != kEOL) {
+ pointer++;
+ ch = pointer < end ? pointer[0] : kEOL;
+ }
+ if (dots == 3 && ch != kEOL)
+ goto end;
+ dots++;
+ }
+ continue;
+ case ':':
+ pointer++;
+ ch = pointer < end ? pointer[0] : kEOL;
+ if (ch == kEOL)
+ goto end;
+ break;
+ case kEOL:
+ break;
+ default:
+ goto end;
+ }
+ *piece_pointer = value;
+ piece_pointer++;
+ }
+
+ if (compress_pointer != nullptr) {
+ swaps = piece_pointer - compress_pointer;
+ piece_pointer = last_piece - 1;
+ while (piece_pointer != &host->value.ipv6[0] && swaps > 0) {
+ uint16_t temp = *piece_pointer;
+ uint16_t* swap_piece = compress_pointer + swaps - 1;
+ *piece_pointer = *swap_piece;
+ *swap_piece = temp;
+ piece_pointer--;
+ swaps--;
+ }
+ } else if (compress_pointer == nullptr &&
+ piece_pointer != last_piece) {
+ goto end;
+ }
+ type = HOST_TYPE_IPV6;
+ end:
+ host->type = type;
+ return type;
+ }
+
+ static inline int ParseNumber(const char* start, const char* end) {
+ unsigned R = 10;
+ if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
+ start += 2;
+ R = 16;
+ }
+ if (end - start == 0) {
+ return 0;
+ } else if (R == 10 && end - start > 1 && start[0] == '0') {
+ start++;
+ R = 8;
+ }
+ const char* p = start;
+
+ while (p < end) {
+ const char ch = p[0];
+ switch (R) {
+ case 8:
+ if (ch < '0' || ch > '7')
+ return -1;
+ break;
+ case 10:
+ if (!ASCII_DIGIT(ch))
+ return -1;
+ break;
+ case 16:
+ if (!ASCII_HEX_DIGIT(ch))
+ return -1;
+ break;
+ }
+ p++;
+ }
+ return strtol(start, NULL, R);
+ }
+
+ static url_host_type ParseIPv4Host(url_host* host,
+ const char* input,
+ size_t length) {
+ url_host_type type = HOST_TYPE_DOMAIN;
+ const char* pointer = input;
+ const char* mark = input;
+ const char* end = pointer + length;
+ int parts = 0;
+ uint32_t val = 0;
+ unsigned numbers[4];
+ if (length == 0)
+ goto end;
+
+ while (pointer <= end) {
+ const char ch = pointer < end ? pointer[0] : kEOL;
+ const int remaining = end - pointer - 1;
+ if (ch == '.' || ch == kEOL) {
+ if (++parts > 4 || pointer - mark == 0)
+ break;
+ int n = ParseNumber(mark, pointer);
+ if (n < 0) {
+ type = HOST_TYPE_DOMAIN;
+ goto end;
+ }
+ if (pointer - mark == 10) {
+ numbers[parts - 1] = n;
+ break;
+ }
+ if (n > 255) {
+ type = HOST_TYPE_FAILED;
+ goto end;
+ }
+ numbers[parts - 1] = n;
+ mark = pointer + 1;
+ if (ch == '.' && remaining == 0)
+ break;
+ }
+ pointer++;
+ }
+
+ type = HOST_TYPE_IPV4;
+ if (parts > 0) {
+ val = numbers[parts - 1];
+ for (int n = 0; n < parts - 1; n++) {
+ double b = 3-n;
+ val += numbers[n] * pow(256, b);
+ }
+ }
+
+ host->value.ipv4 = val;
+ end:
+ host->type = type;
+ return type;
+ }
+
+ static url_host_type ParseHost(url_host* host,
+ const char* input,
+ size_t length,
+ bool unicode = false) {
+ url_host_type type = HOST_TYPE_FAILED;
+ const char* pointer = input;
+ std::string decoded;
+
+ if (length == 0)
+ goto end;
+
+ if (pointer[0] == '[') {
+ if (pointer[length - 1] != ']')
+ goto end;
+ return ParseIPv6Host(host, ++pointer, length - 2);
+ }
+
+ // First, we have to percent decode
+ if (PercentDecode(input, length, &decoded) < 0)
+ goto end;
+
+ // If there are any invalid UTF8 byte sequences, we have to fail.
+ // Unfortunately this means iterating through the string and checking
+ // each decoded codepoint.
+ if (IsValidUTF8(&decoded) < 0)
+ goto end;
+
+ // Then we have to punycode toASCII
+ if (ToASCII(&decoded, &decoded) < 0)
+ goto end;
+
+ // If any of the following characters are still present, we have to fail
+ for (size_t n = 0; n < decoded.size(); n++) {
+ const char ch = decoded[n];
+ if (ch == 0x00 || ch == 0x09 || ch == 0x0a || ch == 0x0d ||
+ ch == 0x20 || ch == '#' || ch == '%' || ch == '/' ||
+ ch == '?' || ch == '@' || ch == '[' || ch == '\\' ||
+ ch == ']') {
+ goto end;
+ }
+ }
+
+ // Check to see if it's an IPv4 IP address
+ type = ParseIPv4Host(host, decoded.c_str(), decoded.length());
+ if (type == HOST_TYPE_IPV4 || type == HOST_TYPE_FAILED)
+ goto end;
+
+ // If the unicode flag is set, run the result through punycode ToUnicode
+ if (unicode && ToUnicode(&decoded, &decoded) < 0)
+ goto end;
+
+ // It's not an IPv4 or IPv6 address, it must be a domain
+ type = HOST_TYPE_DOMAIN;
+ host->value.domain = decoded;
+
+ end:
+ host->type = type;
+ return type;
+ }
+
+ // Locates the longest sequence of 0 segments in an IPv6 address
+ // in order to use the :: compression when serializing
+ static inline uint16_t* FindLongestZeroSequence(uint16_t* values,
+ size_t len) {
+ uint16_t* start = values;
+ uint16_t* end = start + len;
+ uint16_t* result = nullptr;
+
+ uint16_t* current = nullptr;
+ unsigned counter = 0, longest = 1;
+
+ while (start < end) {
+ if (*start == 0) {
+ if (current == nullptr)
+ current = start;
+ counter++;
+ } else {
+ if (counter > longest) {
+ longest = counter;
+ result = current;
+ }
+ counter = 0;
+ current = nullptr;
+ }
+ start++;
+ }
+ if (counter > longest)
+ result = current;
+ return result;
+ }
+
+ static url_host_type WriteHost(url_host* host, std::string* dest) {
+ dest->clear();
+ switch (host->type) {
+ case HOST_TYPE_DOMAIN:
+ *dest = host->value.domain;
+ break;
+ case HOST_TYPE_IPV4: {
+ dest->reserve(15);
+ uint32_t value = host->value.ipv4;
+ for (int n = 0; n < 4; n++) {
+ char buf[4];
+ char* buffer = buf;
+ snprintf(buffer, sizeof(buf), "%d", value % 256);
+ dest->insert(0, buf);
+ if (n < 3)
+ dest->insert(0, 1, '.');
+ value /= 256;
+ }
+ break;
+ }
+ case HOST_TYPE_IPV6: {
+ dest->reserve(41);
+ *dest+= '[';
+ uint16_t* start = &host->value.ipv6[0];
+ uint16_t* compress_pointer =
+ FindLongestZeroSequence(start, 8);
+ for (int n = 0; n <= 7; n++) {
+ uint16_t* piece = &host->value.ipv6[n];
+ if (compress_pointer == piece) {
+ *dest += n == 0 ? "::" : ":";
+ while (*piece == 0 && n < 8) {
+ n++;
+ piece = &host->value.ipv6[n];
+ }
+ if (n == 8)
+ break;
+ }
+ char buf[5];
+ char* buffer = buf;
+ snprintf(buffer, sizeof(buf), "%x", *piece);
+ *dest += buf;
+ if (n < 7)
+ *dest += ':';
+ }
+ *dest += ']';
+ break;
+ }
+ case HOST_TYPE_FAILED:
+ break;
+ }
+ return host->type;
+ }
+
+ static int ParseHost(std::string* input,
+ std::string* output,
+ bool unicode = false) {
+ if (input->length() == 0)
+ return 0;
+ url_host host{{""}, HOST_TYPE_DOMAIN};
+ ParseHost(&host, input->c_str(), input->length(), unicode);
+ if (host.type == HOST_TYPE_FAILED)
+ return -1;
+ WriteHost(&host, output);
+ return 0;
+ }
+
+ static inline void Copy(Isolate* isolate,
+ Local<Array> ary,
+ std::vector<std::string>* vec) {
+ const int32_t len = ary->Length();
+ if (len == 0)
+ return; // nothing to copy
+ vec->reserve(len);
+ for (int32_t n = 0; n < len; n++) {
+ Local<Value> val = ary->Get(n);
+ if (val->IsString()) {
+ Utf8Value value(isolate, val.As<String>());
+ vec->push_back(std::string(*value, value.length()));
+ }
+ }
+ }
+
+ static inline Local<Array> Copy(Isolate* isolate,
+ std::vector<std::string> vec) {
+ Local<Array> ary = Array::New(isolate, vec.size());
+ for (size_t n = 0; n < vec.size(); n++)
+ ary->Set(n, UTF8STRING(isolate, vec[n]));
+ return ary;
+ }
+
+ static inline void HarvestBase(Environment* env,
+ struct url_data* base,
+ Local<Object> base_obj) {
+ Local<Value> flags = GET(env, base_obj, "flags");
+ if (flags->IsInt32())
+ base->flags = flags->Int32Value();
+
+ GET_AND_SET(env, base_obj, scheme, base, URL_FLAGS_HAS_SCHEME);
+ GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME);
+ GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD);
+ GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST);
+ GET_AND_SET(env, base_obj, query, base, URL_FLAGS_HAS_QUERY);
+ GET_AND_SET(env, base_obj, fragment, base, URL_FLAGS_HAS_FRAGMENT);
+ Local<Value> port = GET(env, base_obj, "port");
+ if (port->IsInt32())
+ base->port = port->Int32Value();
+ Local<Value> path = GET(env, base_obj, "path");
+ if (path->IsArray()) {
+ base->flags |= URL_FLAGS_HAS_PATH;
+ Copy(env->isolate(), path.As<Array>(), &(base->path));
+ }
+ }
+
+ static inline void HarvestContext(Environment* env,
+ struct url_data* context,
+ Local<Object> context_obj) {
+ Local<Value> flags = GET(env, context_obj, "flags");
+ if (flags->IsInt32()) {
+ int32_t _flags = flags->Int32Value();
+ if (_flags & URL_FLAGS_SPECIAL)
+ context->flags |= URL_FLAGS_SPECIAL;
+ if (_flags & URL_FLAGS_CANNOT_BE_BASE)
+ context->flags |= URL_FLAGS_CANNOT_BE_BASE;
+ }
+ Local<Value> scheme = GET(env, context_obj, "scheme");
+ if (scheme->IsString()) {
+ Utf8Value value(env->isolate(), scheme);
+ context->scheme.assign(*value, value.length());
+ }
+ Local<Value> port = GET(env, context_obj, "port");
+ if (port->IsInt32())
+ context->port = port->Int32Value();
+ }
+
+ // Single dot segment can be ".", "%2e", or "%2E"
+ static inline bool IsSingleDotSegment(std::string str) {
+ switch (str.size()) {
+ case 1:
+ return str == ".";
+ case 3:
+ return str[0] == '%' &&
+ str[1] == '2' &&
+ TO_LOWER(str[2]) == 'e';
+ default:
+ return false;
+ }
+ }
+
+ // Double dot segment can be:
+ // "..", ".%2e", ".%2E", "%2e.", "%2E.",
+ // "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
+ static inline bool IsDoubleDotSegment(std::string str) {
+ switch (str.size()) {
+ case 2:
+ return str == "..";
+ case 4:
+ if (str[0] != '.' && str[0] != '%')
+ return false;
+ return ((str[0] == '.' &&
+ str[1] == '%' &&
+ str[2] == '2' &&
+ TO_LOWER(str[3]) == 'e') ||
+ (str[0] == '%' &&
+ str[1] == '2' &&
+ TO_LOWER(str[2]) == 'e' &&
+ str[3] == '.'));
+ case 6:
+ return (str[0] == '%' &&
+ str[1] == '2' &&
+ TO_LOWER(str[2]) == 'e' &&
+ str[3] == '%' &&
+ str[4] == '2' &&
+ TO_LOWER(str[5]) == 'e');
+ default:
+ return false;
+ }
+ }
+
+ static void Parse(Environment* env,
+ Local<Value> recv,
+ const char* input,
+ const size_t len,
+ enum url_parse_state override,
+ Local<Object> base_obj,
+ Local<Object> context_obj,
+ Local<Function> cb) {
+ Isolate* isolate = env->isolate();
+ Local<Context> context = env->context();
+ HandleScope handle_scope(isolate);
+ Context::Scope context_scope(context);
+
+ const bool has_base = base_obj->IsObject();
+ bool atflag = false;
+ bool sbflag = false;
+ bool uflag = false;
+ bool base_is_file = false;
+ int wskip = 0;
+
+ struct url_data base;
+ struct url_data url;
+ if (context_obj->IsObject())
+ HarvestContext(env, &url, context_obj);
+ if (has_base)
+ HarvestBase(env, &base, base_obj);
+
+ std::string buffer;
+ url.scheme.reserve(len);
+ url.username.reserve(len);
+ url.password.reserve(len);
+ url.host.reserve(len);
+ url.path.reserve(len);
+ url.query.reserve(len);
+ url.fragment.reserve(len);
+ buffer.reserve(len);
+
+ // Set the initial parse state.
+ const bool state_override = override != kUnknownState;
+ enum url_parse_state state = state_override ? override : kSchemeStart;
+
+ const char* p = input;
+ const char* end = input + len;
+
+ if (state < kSchemeStart || state > kFragment) {
+ INVALID_PARSE_STATE();
+ goto done;
+ }
+
+ while (p <= end) {
+ const char ch = p < end ? p[0] : kEOL;
+
+ if (TAB_AND_NEWLINE(ch)) {
+ if (state == kAuthority) {
+ // It's necessary to keep track of how much whitespace
+ // is being ignored when in kAuthority state because of
+ // how the buffer is managed. TODO: See if there's a better
+ // way
+ wskip++;
+ }
+ p++;
+ continue;
+ }
+
+ bool special = url.flags & URL_FLAGS_SPECIAL;
+ const bool special_back_slash = (special && ch == '\\');
+ switch (state) {
+ case kSchemeStart:
+ if (ASCII_ALPHA(ch)) {
+ buffer += TO_LOWER(ch);
+ state = kScheme;
+ } else if (!state_override) {
+ state = kNoScheme;
+ continue;
+ } else {
+ TERMINATE()
+ }
+ break;
+ case kScheme:
+ if (SCHEME_CHAR(ch)) {
+ buffer += TO_LOWER(ch);
+ p++;
+ continue;
+ } else if (ch == ':' || (state_override && ch == kEOL)) {
+ buffer += ':';
+ if (buffer.size() > 0) {
+ SET_HAVE_SCHEME()
+ url.scheme = buffer;
+ }
+ if (IsSpecial(url.scheme)) {
+ SPECIAL()
+ } else {
+ url.flags &= ~URL_FLAGS_SPECIAL;
+ }
+ if (state_override)
+ goto done;
+ buffer.clear();
+ if (url.scheme == "file:") {
+ state = kFile;
+ } else if (special &&
+ has_base &&
+ DOES_HAVE_SCHEME(base) &&
+ url.scheme == base.scheme) {
+ state = kSpecialRelativeOrAuthority;
+ } else if (special) {
+ state = kSpecialAuthoritySlashes;
+ } else if (p[1] == '/') {
+ state = kPathOrAuthority;
+ p++;
+ } else {
+ CANNOT_BE_BASE()
+ SET_HAVE_PATH()
+ url.path.push_back("");
+ state = kCannotBeBase;
+ }
+ } else if (!state_override) {
+ buffer.clear();
+ state = kNoScheme;
+ p = input;
+ continue;
+ } else {
+ TERMINATE()
+ }
+ break;
+ case kNoScheme:
+ if (!has_base || (IS_CANNOT_BE_BASE(base.flags) && ch != '#')) {
+ FAILED()
+ } else if (IS_CANNOT_BE_BASE(base.flags) && ch == '#') {
+ SET_HAVE_SCHEME()
+ url.scheme = base.scheme;
+ if (IsSpecial(url.scheme)) {
+ SPECIAL()
+ } else {
+ url.flags &= ~URL_FLAGS_SPECIAL;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ if (DOES_HAVE_QUERY(base)) {
+ SET_HAVE_QUERY()
+ url.query = base.query;
+ }
+ if (DOES_HAVE_FRAGMENT(base)) {
+ SET_HAVE_FRAGMENT()
+ url.fragment = base.fragment;
+ }
+ CANNOT_BE_BASE()
+ state = kFragment;
+ } else if (has_base &&
+ DOES_HAVE_SCHEME(base) &&
+ base.scheme != "file:") {
+ state = kRelative;
+ continue;
+ } else {
+ SET_HAVE_SCHEME()
+ url.scheme = "file:";
+ SPECIAL()
+ state = kFile;
+ continue;
+ }
+ break;
+ case kSpecialRelativeOrAuthority:
+ if (ch == '/' && p[1] == '/') {
+ state = kSpecialAuthorityIgnoreSlashes;
+ p++;
+ } else {
+ state = kRelative;
+ continue;
+ }
+ break;
+ case kPathOrAuthority:
+ if (ch == '/') {
+ state = kAuthority;
+ } else {
+ state = kPath;
+ continue;
+ }
+ break;
+ case kRelative:
+ SET_HAVE_SCHEME()
+ url.scheme = base.scheme;
+ if (IsSpecial(url.scheme)) {
+ SPECIAL()
+ } else {
+ url.flags &= ~URL_FLAGS_SPECIAL;
+ }
+ switch (ch) {
+ case kEOL:
+ if (DOES_HAVE_USERNAME(base)) {
+ SET_HAVE_USERNAME()
+ url.username = base.username;
+ }
+ if (DOES_HAVE_PASSWORD(base)) {
+ SET_HAVE_PASSWORD()
+ url.password = base.password;
+ }
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_QUERY(base)) {
+ SET_HAVE_QUERY()
+ url.query = base.query;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ url.port = base.port;
+ break;
+ case '/':
+ state = kRelativeSlash;
+ break;
+ case '?':
+ if (DOES_HAVE_USERNAME(base)) {
+ SET_HAVE_USERNAME()
+ url.username = base.username;
+ }
+ if (DOES_HAVE_PASSWORD(base)) {
+ SET_HAVE_PASSWORD()
+ url.password = base.password;
+ }
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ url.port = base.port;
+ state = kQuery;
+ break;
+ case '#':
+ if (DOES_HAVE_USERNAME(base)) {
+ SET_HAVE_USERNAME()
+ url.username = base.username;
+ }
+ if (DOES_HAVE_PASSWORD(base)) {
+ SET_HAVE_PASSWORD()
+ url.password = base.password;
+ }
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_QUERY(base)) {
+ SET_HAVE_QUERY()
+ url.query = base.query;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ url.port = base.port;
+ state = kFragment;
+ break;
+ default:
+ if (special_back_slash) {
+ state = kRelativeSlash;
+ } else {
+ if (DOES_HAVE_USERNAME(base)) {
+ SET_HAVE_USERNAME()
+ url.username = base.username;
+ }
+ if (DOES_HAVE_PASSWORD(base)) {
+ SET_HAVE_PASSWORD()
+ url.password = base.password;
+ }
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ if (!url.path.empty())
+ url.path.pop_back();
+ }
+ url.port = base.port;
+ state = kPath;
+ continue;
+ }
+ }
+ break;
+ case kRelativeSlash:
+ if (ch == '/' || special_back_slash) {
+ state = kSpecialAuthorityIgnoreSlashes;
+ } else {
+ if (DOES_HAVE_USERNAME(base)) {
+ SET_HAVE_USERNAME()
+ url.username = base.username;
+ }
+ if (DOES_HAVE_PASSWORD(base)) {
+ SET_HAVE_PASSWORD()
+ url.password = base.password;
+ }
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ url.port = base.port;
+ state = kPath;
+ continue;
+ }
+ break;
+ case kSpecialAuthoritySlashes:
+ state = kSpecialAuthorityIgnoreSlashes;
+ if (ch == '/' && p[1] == '/') {
+ p++;
+ } else {
+ continue;
+ }
+ break;
+ case kSpecialAuthorityIgnoreSlashes:
+ if (ch != '/' && ch != '\\') {
+ state = kAuthority;
+ continue;
+ }
+ break;
+ case kAuthority:
+ if (ch == '@') {
+ if (atflag) {
+ buffer.reserve(buffer.size() + 3);
+ buffer.insert(0, "%40");
+ }
+ atflag = true;
+ const size_t blen = buffer.size();
+ if (blen > 0 && buffer[0] != ':') {
+ SET_HAVE_USERNAME()
+ }
+ for (size_t n = 0; n < blen; n++) {
+ const char bch = buffer[n];
+ if (bch == ':') {
+ SET_HAVE_PASSWORD()
+ if (!uflag) {
+ uflag = true;
+ continue;
+ }
+ }
+ if (uflag) {
+ AppendOrEscape(&url.password, bch, UserinfoEncodeSet);
+ } else {
+ AppendOrEscape(&url.username, bch, UserinfoEncodeSet);
+ }
+ }
+ buffer.clear();
+ } else if (ch == kEOL ||
+ ch == '/' ||
+ ch == '?' ||
+ ch == '#' ||
+ special_back_slash) {
+ p -= buffer.size() + 1 + wskip;
+ buffer.clear();
+ state = kHost;
+ } else {
+ buffer += ch;
+ }
+ break;
+ case kHost:
+ case kHostname:
+ if (ch == ':' && !sbflag) {
+ if (special && buffer.size() == 0)
+ FAILED()
+ SET_HAVE_HOST()
+ if (ParseHost(&buffer, &url.host) < 0)
+ FAILED()
+ buffer.clear();
+ state = kPort;
+ if (override == kHostname)
+ TERMINATE()
+ } else if (ch == kEOL ||
+ ch == '/' ||
+ ch == '?' ||
+ ch == '#' ||
+ special_back_slash) {
+ p--;
+ if (special && buffer.size() == 0)
+ FAILED()
+ SET_HAVE_HOST()
+ if (ParseHost(&buffer, &url.host) < 0)
+ FAILED()
+ buffer.clear();
+ state = kPathStart;
+ if (state_override)
+ TERMINATE()
+ } else {
+ if (ch == '[')
+ sbflag = true;
+ if (ch == ']')
+ sbflag = false;
+ buffer += TO_LOWER(ch);
+ }
+ break;
+ case kPort:
+ if (ASCII_DIGIT(ch)) {
+ buffer += ch;
+ } else if (state_override ||
+ ch == kEOL ||
+ ch == '/' ||
+ ch == '?' ||
+ ch == '#' ||
+ special_back_slash) {
+ if (buffer.size() > 0) {
+ int port = 0;
+ for (size_t i = 0; i < buffer.size(); i++)
+ port = port * 10 + buffer[i] - '0';
+ if (port >= 0 && port <= 0xffff) {
+ url.port = NormalizePort(url.scheme, port);
+ } else if (!state_override) {
+ FAILED()
+ }
+ buffer.clear();
+ }
+ state = kPathStart;
+ continue;
+ } else {
+ FAILED();
+ }
+ break;
+ case kFile:
+ base_is_file = (
+ has_base &&
+ DOES_HAVE_SCHEME(base) &&
+ base.scheme == "file:");
+ switch (ch) {
+ case kEOL:
+ if (base_is_file) {
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ if (DOES_HAVE_QUERY(base)) {
+ SET_HAVE_QUERY()
+ url.query = base.query;
+ }
+ }
+ break;
+ case '\\':
+ case '/':
+ state = kFileSlash;
+ break;
+ case '?':
+ if (base_is_file) {
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ SET_HAVE_QUERY()
+ state = kQuery;
+ }
+ break;
+ case '#':
+ if (base_is_file) {
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ if (DOES_HAVE_QUERY(base)) {
+ SET_HAVE_QUERY()
+ url.query = base.query;
+ }
+ state = kFragment;
+ }
+ break;
+ default:
+ if (base_is_file &&
+ (!WINDOWS_DRIVE_LETTER(ch, p[1]) ||
+ end - p == 1 ||
+ (p[2] != '/' &&
+ p[2] != '\\' &&
+ p[2] != '?' &&
+ p[2] != '#'))) {
+ if (DOES_HAVE_HOST(base)) {
+ SET_HAVE_HOST()
+ url.host = base.host;
+ }
+ if (DOES_HAVE_PATH(base)) {
+ SET_HAVE_PATH()
+ url.path = base.path;
+ }
+ if (!url.path.empty())
+ url.path.pop_back();
+ }
+ state = kPath;
+ continue;
+ }
+ break;
+ case kFileSlash:
+ if (ch == '/' || ch == '\\') {
+ state = kFileHost;
+ } else {
+ if (has_base &&
+ DOES_HAVE_SCHEME(base) &&
+ base.scheme == "file:" &&
+ DOES_HAVE_PATH(base) &&
+ base.path.size() > 0 &&
+ NORMALIZED_WINDOWS_DRIVE_LETTER(base.path[0])) {
+ SET_HAVE_PATH()
+ url.path.push_back(base.path[0]);
+ }
+ state = kPath;
+ continue;
+ }
+ break;
+ case kFileHost:
+ if (ch == kEOL ||
+ ch == '/' ||
+ ch == '\\' ||
+ ch == '?' ||
+ ch == '#') {
+ if (buffer.size() == 2 &&
+ WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) {
+ state = kPath;
+ } else if (buffer.size() == 0) {
+ state = kPathStart;
+ } else {
+ if (buffer != "localhost") {
+ SET_HAVE_HOST()
+ if (ParseHost(&buffer, &url.host) < 0)
+ FAILED()
+ }
+ buffer.clear();
+ state = kPathStart;
+ }
+ continue;
+ } else {
+ buffer += ch;
+ }
+ break;
+ case kPathStart:
+ state = kPath;
+ if (ch != '/' && !special_back_slash)
+ continue;
+ break;
+ case kPath:
+ if (ch == kEOL ||
+ ch == '/' ||
+ special_back_slash ||
+ (!state_override && (ch == '?' || ch == '#'))) {
+ if (IsDoubleDotSegment(buffer)) {
+ if (!url.path.empty())
+ url.path.pop_back();
+ if (ch != '/' && !special_back_slash) {
+ SET_HAVE_PATH()
+ url.path.push_back("");
+ }
+ } else if (IsSingleDotSegment(buffer)) {
+ if (ch != '/' && !special_back_slash) {
+ SET_HAVE_PATH();
+ url.path.push_back("");
+ }
+ } else {
+ if (DOES_HAVE_SCHEME(url) &&
+ url.scheme == "file:" &&
+ url.path.empty() &&
+ buffer.size() == 2 &&
+ WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) {
+ url.flags &= ~URL_FLAGS_HAS_HOST;
+ buffer[1] = ':';
+ }
+ SET_HAVE_PATH()
+ std::string segment(buffer.c_str(), buffer.size());
+ url.path.push_back(segment);
+ }
+ buffer.clear();
+ if (ch == '?') {
+ SET_HAVE_QUERY()
+ state = kQuery;
+ } else if (ch == '#') {
+ state = kFragment;
+ }
+ } else {
+ if (ch == '%' && p[1] == '2' && TO_LOWER(p[2]) == 'e') {
+ buffer += '.';
+ p += 2;
+ } else {
+ AppendOrEscape(&buffer, ch, DefaultEncodeSet);
+ }
+ }
+ break;
+ case kCannotBeBase:
+ switch (ch) {
+ case '?':
+ state = kQuery;
+ break;
+ case '#':
+ state = kFragment;
+ break;
+ default:
+ if (url.path.size() == 0)
+ url.path.push_back("");
+ if (url.path.size() > 0 && ch != kEOL)
+ AppendOrEscape(&url.path[0], ch, SimpleEncodeSet);
+ }
+ break;
+ case kQuery:
+ if (ch == kEOL || (!state_override && ch == '#')) {
+ SET_HAVE_QUERY()
+ url.query = buffer;
+ buffer.clear();
+ if (ch == '#')
+ state = kFragment;
+ } else {
+ AppendOrEscape(&buffer, ch, QueryEncodeSet);
+ }
+ break;
+ case kFragment:
+ switch (ch) {
+ case kEOL:
+ SET_HAVE_FRAGMENT()
+ url.fragment = buffer;
+ break;
+ case 0:
+ break;
+ default:
+ buffer += ch;
+ }
+ break;
+ default:
+ INVALID_PARSE_STATE()
+ goto done;
+ }
+
+ p++;
+ }
+
+ done:
+
+ // Define the return value placeholders
+ const Local<Value> undef = Undefined(isolate);
+ Local<Value> argv[9] = {
+ undef,
+ undef,
+ undef,
+ undef,
+ undef,
+ undef,
+ undef,
+ undef,
+ undef,
+ };
+
+ argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
+ if (!IS_FAILED(url.flags)) {
+ if (DOES_HAVE_SCHEME(url))
+ argv[ARG_PROTOCOL] = OneByteString(isolate, url.scheme.c_str());
+ if (DOES_HAVE_USERNAME(url))
+ argv[ARG_USERNAME] = UTF8STRING(isolate, url.username);
+ if (DOES_HAVE_PASSWORD(url))
+ argv[ARG_PASSWORD] = UTF8STRING(isolate, url.password);
+ if (DOES_HAVE_HOST(url))
+ argv[ARG_HOST] = UTF8STRING(isolate, url.host);
+ if (DOES_HAVE_QUERY(url))
+ argv[ARG_QUERY] = UTF8STRING(isolate, url.query);
+ if (DOES_HAVE_FRAGMENT(url))
+ argv[ARG_FRAGMENT] = UTF8STRING(isolate, url.fragment);
+ if (url.port > -1)
+ argv[ARG_PORT] = Integer::New(isolate, url.port);
+ if (DOES_HAVE_PATH(url))
+ argv[ARG_PATH] = Copy(isolate, url.path);
+ }
+
+ cb->Call(context, recv, 9, argv);
+ }
+
+ static void Parse(const FunctionCallbackInfo<Value>& args) {
+ Environment* env = Environment::GetCurrent(args);
+ CHECK_GE(args.Length(), 5);
+ CHECK(args[0]->IsString());
+ CHECK(args[2]->IsUndefined() ||
+ args[2]->IsNull() ||
+ args[2]->IsObject());
+ CHECK(args[3]->IsUndefined() ||
+ args[3]->IsNull() ||
+ args[3]->IsObject());
+ CHECK(args[4]->IsFunction());
+ Utf8Value input(env->isolate(), args[0]);
+ enum url_parse_state override = kUnknownState;
+ if (args[1]->IsNumber())
+ override = (enum url_parse_state)(args[1]->Uint32Value());
+
+ Parse(env, args.This(),
+ *input, input.length(),
+ override,
+ args[2].As<Object>(),
+ args[3].As<Object>(),
+ args[4].As<Function>());
+ }
+
+ static void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
+ Environment* env = Environment::GetCurrent(args);
+ CHECK_GE(args.Length(), 1);
+ CHECK(args[0]->IsString());
+ Utf8Value value(env->isolate(), args[0]);
+ std::string output;
+ const size_t len = value.length();
+ output.reserve(len);
+ for (size_t n = 0; n < len; n++) {
+ const char ch = (*value)[n];
+ AppendOrEscape(&output, ch, UserinfoEncodeSet);
+ }
+ args.GetReturnValue().Set(
+ String::NewFromUtf8(env->isolate(),
+ output.c_str(),
+ v8::NewStringType::kNormal).ToLocalChecked());
+ }
+
+ static void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
+ Environment* env = Environment::GetCurrent(args);
+ CHECK_GE(args.Length(), 1);
+ CHECK(args[0]->IsString());
+ Utf8Value value(env->isolate(), args[0]);
+
+ url_host host{{""}, HOST_TYPE_DOMAIN};
+ ParseHost(&host, *value, value.length());
+ if (host.type == HOST_TYPE_FAILED) {
+ args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
+ return;
+ }
+ std::string out;
+ WriteHost(&host, &out);
+ args.GetReturnValue().Set(
+ String::NewFromUtf8(env->isolate(),
+ out.c_str(),
+ v8::NewStringType::kNormal).ToLocalChecked());
+ }
+
+ static void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
+ Environment* env = Environment::GetCurrent(args);
+ CHECK_GE(args.Length(), 1);
+ CHECK(args[0]->IsString());
+ Utf8Value value(env->isolate(), args[0]);
+
+ url_host host{{""}, HOST_TYPE_DOMAIN};
+ ParseHost(&host, *value, value.length(), true);
+ if (host.type == HOST_TYPE_FAILED) {
+ args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
+ return;
+ }
+ std::string out;
+ WriteHost(&host, &out);
+ args.GetReturnValue().Set(
+ String::NewFromUtf8(env->isolate(),
+ out.c_str(),
+ v8::NewStringType::kNormal).ToLocalChecked());
+ }
+
+ static void Init(Local<Object> target,
+ Local<Value> unused,
+ Local<Context> context,
+ void* priv) {
+ Environment* env = Environment::GetCurrent(context);
+ env->SetMethod(target, "parse", Parse);
+ env->SetMethod(target, "encodeAuth", EncodeAuthSet);
+ env->SetMethod(target, "domainToASCII", DomainToASCII);
+ env->SetMethod(target, "domainToUnicode", DomainToUnicode);
+
+#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
+ FLAGS(XX)
+#undef XX
+
+#define XX(name) NODE_DEFINE_CONSTANT(target, name);
+ ARGS(XX)
+ PARSESTATES(XX)
+#undef XX
+ }
+} // namespace url
+} // namespace node
+
+NODE_MODULE_CONTEXT_AWARE_BUILTIN(url, node::url::Init)
diff --git a/src/node_url.h b/src/node_url.h
new file mode 100644
index 0000000000..198c29938b
--- /dev/null
+++ b/src/node_url.h
@@ -0,0 +1,538 @@
+#ifndef SRC_NODE_URL_H_
+#define SRC_NODE_URL_H_
+
+#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
+
+#include "node.h"
+#include <string>
+
+namespace node {
+namespace url {
+
+#define BIT_AT(a, i) \
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
+ (1 << ((unsigned int) (i) & 7))))
+#define TAB_AND_NEWLINE(ch) \
+ (ch == 0x09 || ch == 0x0a || ch == 0x0d)
+#define ASCII_DIGIT(ch) \
+ (ch >= 0x30 && ch <= 0x39)
+#define ASCII_HEX_DIGIT(ch) \
+ (ASCII_DIGIT(ch) || (ch >= 0x41 && ch <= 0x46) || (ch >= 0x61 && ch <= 0x66))
+#define ASCII_ALPHA(ch) \
+ ((ch >= 0x41 && ch <= 0x5a) || (ch >= 0x61 && ch <= 0x7a))
+#define ASCII_ALPHANUMERIC(ch) \
+ (ASCII_DIGIT(ch) || ASCII_ALPHA(ch))
+#define TO_LOWER(ch) \
+ (ASCII_ALPHA(ch) ? (ch | 0x20) : ch)
+#define SCHEME_CHAR(ch) \
+ (ASCII_ALPHANUMERIC(ch) || ch == '+' || ch == '-' || ch == '.')
+#define WINDOWS_DRIVE_LETTER(ch, next) \
+ (ASCII_ALPHA(ch) && (next == ':' || next == '|'))
+#define NORMALIZED_WINDOWS_DRIVE_LETTER(str) \
+ (str.length() == 2 && \
+ ASCII_ALPHA(str[0]) && \
+ str[1] == ':')
+
+static const char* hex[256] = {
+ "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
+ "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
+ "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
+ "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
+ "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
+ "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
+ "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
+ "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
+ "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
+ "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F",
+ "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
+ "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F",
+ "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
+ "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F",
+ "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
+ "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F",
+ "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
+ "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
+ "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
+ "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
+ "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
+ "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
+ "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
+ "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
+ "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
+ "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
+ "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
+ "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
+ "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
+ "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
+ "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
+ "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
+};
+
+static const uint8_t SIMPLE_ENCODE_SET[32] = {
+ // 00 01 02 03 04 05 06 07
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 08 09 0A 0B 0C 0D 0E 0F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 10 11 12 13 14 15 16 17
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 18 19 1A 1B 1C 1D 1E 1F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 20 21 22 23 24 25 26 27
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 28 29 2A 2B 2C 2D 2E 2F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 30 31 32 33 34 35 36 37
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 38 39 3A 3B 3C 3D 3E 3F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 40 41 42 43 44 45 46 47
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 48 49 4A 4B 4C 4D 4E 4F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 50 51 52 53 54 55 56 57
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 58 59 5A 5B 5C 5D 5E 5F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 60 61 62 63 64 65 66 67
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 68 69 6A 6B 6C 6D 6E 6F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 70 71 72 73 74 75 76 77
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 78 79 7A 7B 7C 7D 7E 7F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
+ // 80 81 82 83 84 85 86 87
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 88 89 8A 8B 8C 8D 8E 8F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 90 91 92 93 94 95 96 97
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 98 99 9A 9B 9C 9D 9E 9F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A0 A1 A2 A3 A4 A5 A6 A7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A8 A9 AA AB AC AD AE AF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B0 B1 B2 B3 B4 B5 B6 B7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B8 B9 BA BB BC BD BE BF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C0 C1 C2 C3 C4 C5 C6 C7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C8 C9 CA CB CC CD CE CF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D0 D1 D2 D3 D4 D5 D6 D7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D8 D9 DA DB DC DD DE DF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E0 E1 E2 E3 E4 E5 E6 E7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E8 E9 EA EB EC ED EE EF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F0 F1 F2 F3 F4 F5 F6 F7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F8 F9 FA FB FC FD FE FF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
+};
+
+static const uint8_t DEFAULT_ENCODE_SET[32] = {
+ // 00 01 02 03 04 05 06 07
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 08 09 0A 0B 0C 0D 0E 0F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 10 11 12 13 14 15 16 17
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 18 19 1A 1B 1C 1D 1E 1F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 20 21 22 23 24 25 26 27
+ 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 28 29 2A 2B 2C 2D 2E 2F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 30 31 32 33 34 35 36 37
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 38 39 3A 3B 3C 3D 3E 3F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80,
+ // 40 41 42 43 44 45 46 47
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 48 49 4A 4B 4C 4D 4E 4F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 50 51 52 53 54 55 56 57
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 58 59 5A 5B 5C 5D 5E 5F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 60 61 62 63 64 65 66 67
+ 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 68 69 6A 6B 6C 6D 6E 6F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 70 71 72 73 74 75 76 77
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 78 79 7A 7B 7C 7D 7E 7F
+ 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80,
+ // 80 81 82 83 84 85 86 87
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 88 89 8A 8B 8C 8D 8E 8F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 90 91 92 93 94 95 96 97
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 98 99 9A 9B 9C 9D 9E 9F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A0 A1 A2 A3 A4 A5 A6 A7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A8 A9 AA AB AC AD AE AF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B0 B1 B2 B3 B4 B5 B6 B7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B8 B9 BA BB BC BD BE BF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C0 C1 C2 C3 C4 C5 C6 C7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C8 C9 CA CB CC CD CE CF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D0 D1 D2 D3 D4 D5 D6 D7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D8 D9 DA DB DC DD DE DF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E0 E1 E2 E3 E4 E5 E6 E7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E8 E9 EA EB EC ED EE EF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F0 F1 F2 F3 F4 F5 F6 F7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F8 F9 FA FB FC FD FE FF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
+};
+
+static const uint8_t USERINFO_ENCODE_SET[32] = {
+ // 00 01 02 03 04 05 06 07
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 08 09 0A 0B 0C 0D 0E 0F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 10 11 12 13 14 15 16 17
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 18 19 1A 1B 1C 1D 1E 1F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 20 21 22 23 24 25 26 27
+ 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 28 29 2A 2B 2C 2D 2E 2F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
+ // 30 31 32 33 34 35 36 37
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 38 39 3A 3B 3C 3D 3E 3F
+ 0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 40 41 42 43 44 45 46 47
+ 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 48 49 4A 4B 4C 4D 4E 4F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 50 51 52 53 54 55 56 57
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 58 59 5A 5B 5C 5D 5E 5F
+ 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
+ // 60 61 62 63 64 65 66 67
+ 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 68 69 6A 6B 6C 6D 6E 6F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 70 71 72 73 74 75 76 77
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 78 79 7A 7B 7C 7D 7E 7F
+ 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80,
+ // 80 81 82 83 84 85 86 87
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 88 89 8A 8B 8C 8D 8E 8F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 90 91 92 93 94 95 96 97
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 98 99 9A 9B 9C 9D 9E 9F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A0 A1 A2 A3 A4 A5 A6 A7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A8 A9 AA AB AC AD AE AF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B0 B1 B2 B3 B4 B5 B6 B7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B8 B9 BA BB BC BD BE BF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C0 C1 C2 C3 C4 C5 C6 C7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C8 C9 CA CB CC CD CE CF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D0 D1 D2 D3 D4 D5 D6 D7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D8 D9 DA DB DC DD DE DF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E0 E1 E2 E3 E4 E5 E6 E7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E8 E9 EA EB EC ED EE EF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F0 F1 F2 F3 F4 F5 F6 F7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F8 F9 FA FB FC FD FE FF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
+};
+
+static const uint8_t QUERY_ENCODE_SET[32] = {
+ // 00 01 02 03 04 05 06 07
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 08 09 0A 0B 0C 0D 0E 0F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 10 11 12 13 14 15 16 17
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 18 19 1A 1B 1C 1D 1E 1F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 20 21 22 23 24 25 26 27
+ 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 28 29 2A 2B 2C 2D 2E 2F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 30 31 32 33 34 35 36 37
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 38 39 3A 3B 3C 3D 3E 3F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
+ // 40 41 42 43 44 45 46 47
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 48 49 4A 4B 4C 4D 4E 4F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 50 51 52 53 54 55 56 57
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 58 59 5A 5B 5C 5D 5E 5F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 60 61 62 63 64 65 66 67
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 68 69 6A 6B 6C 6D 6E 6F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 70 71 72 73 74 75 76 77
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
+ // 78 79 7A 7B 7C 7D 7E 7F
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
+ // 80 81 82 83 84 85 86 87
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 88 89 8A 8B 8C 8D 8E 8F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 90 91 92 93 94 95 96 97
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // 98 99 9A 9B 9C 9D 9E 9F
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A0 A1 A2 A3 A4 A5 A6 A7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // A8 A9 AA AB AC AD AE AF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B0 B1 B2 B3 B4 B5 B6 B7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // B8 B9 BA BB BC BD BE BF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C0 C1 C2 C3 C4 C5 C6 C7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // C8 C9 CA CB CC CD CE CF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D0 D1 D2 D3 D4 D5 D6 D7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // D8 D9 DA DB DC DD DE DF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E0 E1 E2 E3 E4 E5 E6 E7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // E8 E9 EA EB EC ED EE EF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F0 F1 F2 F3 F4 F5 F6 F7
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
+ // F8 F9 FA FB FC FD FE FF
+ 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
+};
+
+// Must return true if the character is to be percent-encoded
+typedef bool (*must_escape_cb)(const unsigned char ch);
+
+// Appends ch to str. If test(ch) returns true, the ch will
+// be percent-encoded then appended.
+static inline void AppendOrEscape(std::string* str,
+ const unsigned char ch,
+ must_escape_cb test) {
+ if (test(ch))
+ *str += hex[ch];
+ else
+ *str += ch;
+}
+
+static inline bool SimpleEncodeSet(const unsigned char ch) {
+ return BIT_AT(SIMPLE_ENCODE_SET, ch);
+}
+
+static inline bool DefaultEncodeSet(const unsigned char ch) {
+ return BIT_AT(DEFAULT_ENCODE_SET, ch);
+}
+
+static inline bool UserinfoEncodeSet(const unsigned char ch) {
+ return BIT_AT(USERINFO_ENCODE_SET, ch);
+}
+
+static inline bool QueryEncodeSet(const unsigned char ch) {
+ return BIT_AT(QUERY_ENCODE_SET, ch);
+}
+
+static inline unsigned hex2bin(const char ch) {
+ if (ch >= '0' && ch <= '9')
+ return ch - '0';
+ if (ch >= 'A' && ch <= 'F')
+ return 10 + (ch - 'A');
+ if (ch >= 'a' && ch <= 'f')
+ return 10 + (ch - 'a');
+ return static_cast<unsigned>(-1);
+}
+
+static inline int PercentDecode(const char* input,
+ size_t len,
+ std::string* dest) {
+ if (len == 0)
+ return 0;
+ dest->reserve(len);
+ const char* pointer = input;
+ const char* end = input + len;
+ size_t remaining = pointer - end - 1;
+ while (pointer < end) {
+ const char ch = pointer[0];
+ remaining = (end - pointer) + 1;
+ if (ch != '%' || remaining < 2 ||
+ (ch == '%' &&
+ (!ASCII_HEX_DIGIT(pointer[1]) ||
+ !ASCII_HEX_DIGIT(pointer[2])))) {
+ *dest += ch;
+ pointer++;
+ continue;
+ } else {
+ unsigned a = hex2bin(pointer[1]);
+ unsigned b = hex2bin(pointer[2]);
+ char c = static_cast<char>(a * 16 + b);
+ *dest += static_cast<char>(c);
+ pointer += 3;
+ }
+ }
+ return 0;
+}
+
+#define SPECIALS(XX) \
+ XX("ftp:", 21) \
+ XX("file:", -1) \
+ XX("gopher:", 70) \
+ XX("http:", 80) \
+ XX("https:", 443) \
+ XX("ws:", 80) \
+ XX("wss:", 443)
+
+#define PARSESTATES(XX) \
+ XX(kSchemeStart) \
+ XX(kScheme) \
+ XX(kNoScheme) \
+ XX(kSpecialRelativeOrAuthority) \
+ XX(kPathOrAuthority) \
+ XX(kRelative) \
+ XX(kRelativeSlash) \
+ XX(kSpecialAuthoritySlashes) \
+ XX(kSpecialAuthorityIgnoreSlashes) \
+ XX(kAuthority) \
+ XX(kHost) \
+ XX(kHostname) \
+ XX(kPort) \
+ XX(kFile) \
+ XX(kFileSlash) \
+ XX(kFileHost) \
+ XX(kPathStart) \
+ XX(kPath) \
+ XX(kCannotBeBase) \
+ XX(kQuery) \
+ XX(kFragment)
+
+#define FLAGS(XX) \
+ XX(URL_FLAGS_NONE, 0) \
+ XX(URL_FLAGS_FAILED, 0x01) \
+ XX(URL_FLAGS_CANNOT_BE_BASE, 0x02) \
+ XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \
+ XX(URL_FLAGS_TERMINATED, 0x08) \
+ XX(URL_FLAGS_SPECIAL, 0x10) \
+ XX(URL_FLAGS_HAS_SCHEME, 0x20) \
+ XX(URL_FLAGS_HAS_USERNAME, 0x40) \
+ XX(URL_FLAGS_HAS_PASSWORD, 0x80) \
+ XX(URL_FLAGS_HAS_HOST, 0x100) \
+ XX(URL_FLAGS_HAS_PATH, 0x200) \
+ XX(URL_FLAGS_HAS_QUERY, 0x400) \
+ XX(URL_FLAGS_HAS_FRAGMENT, 0x800)
+
+#define ARGS(XX) \
+ XX(ARG_FLAGS) \
+ XX(ARG_PROTOCOL) \
+ XX(ARG_USERNAME) \
+ XX(ARG_PASSWORD) \
+ XX(ARG_HOST) \
+ XX(ARG_PORT) \
+ XX(ARG_PATH) \
+ XX(ARG_QUERY) \
+ XX(ARG_FRAGMENT)
+
+static const char kEOL = -1;
+
+enum url_parse_state {
+ kUnknownState = -1,
+#define XX(name) name,
+ PARSESTATES(XX)
+#undef XX
+} url_parse_state;
+
+enum url_flags {
+#define XX(name, val) name = val,
+ FLAGS(XX)
+#undef XX
+} url_flags;
+
+enum url_cb_args {
+#define XX(name) name,
+ ARGS(XX)
+#undef XX
+} url_cb_args;
+
+static inline bool IsSpecial(std::string scheme) {
+#define XX(name, _) if (scheme == name) return true;
+ SPECIALS(XX);
+#undef XX
+ return false;
+}
+
+static inline int NormalizePort(std::string scheme, int p) {
+#define XX(name, port) if (scheme == name && p == port) return -1;
+ SPECIALS(XX);
+#undef XX
+ return p;
+}
+
+struct url_data {
+ int32_t flags = URL_FLAGS_NONE;
+ int port = -1;
+ std::string scheme;
+ std::string username;
+ std::string password;
+ std::string host;
+ std::string query;
+ std::string fragment;
+ std::vector<std::string> path;
+};
+
+union url_host_value {
+ std::string domain;
+ uint32_t ipv4;
+ uint16_t ipv6[8];
+ ~url_host_value() {}
+};
+
+enum url_host_type {
+ HOST_TYPE_FAILED = -1,
+ HOST_TYPE_DOMAIN = 0,
+ HOST_TYPE_IPV4 = 1,
+ HOST_TYPE_IPV6 = 2
+};
+
+struct url_host {
+ url_host_value value;
+ enum url_host_type type;
+};
+} // namespace url
+
+} // namespace node
+
+#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
+
+#endif // SRC_NODE_URL_H_