quickjs-tart

quickjs-based runtime for wallet-core logic
Log | Files | Refs | README | LICENSE

urlapi.c (53584B)


      1 /***************************************************************************
      2  *                                  _   _ ____  _
      3  *  Project                     ___| | | |  _ \| |
      4  *                             / __| | | | |_) | |
      5  *                            | (__| |_| |  _ <| |___
      6  *                             \___|\___/|_| \_\_____|
      7  *
      8  * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
      9  *
     10  * This software is licensed as described in the file COPYING, which
     11  * you should have received as part of this distribution. The terms
     12  * are also available at https://curl.se/docs/copyright.html.
     13  *
     14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
     15  * copies of the Software, and permit persons to whom the Software is
     16  * furnished to do so, under the terms of the COPYING file.
     17  *
     18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
     19  * KIND, either express or implied.
     20  *
     21  * SPDX-License-Identifier: curl
     22  *
     23  ***************************************************************************/
     24 
     25 #include "curl_setup.h"
     26 
     27 #include "urldata.h"
     28 #include "urlapi-int.h"
     29 #include "strcase.h"
     30 #include "url.h"
     31 #include "escape.h"
     32 #include "curl_ctype.h"
     33 #include "curlx/inet_pton.h"
     34 #include "curlx/inet_ntop.h"
     35 #include "strdup.h"
     36 #include "idn.h"
     37 #include "curlx/strparse.h"
     38 #include "curl_memrchr.h"
     39 
     40 /* The last 3 #include files should be in this order */
     41 #include "curl_printf.h"
     42 #include "curl_memory.h"
     43 #include "memdebug.h"
     44 
     45   /* MS-DOS/Windows style drive prefix, eg c: in c:foo */
     46 #define STARTS_WITH_DRIVE_PREFIX(str) \
     47   ((('a' <= str[0] && str[0] <= 'z') || \
     48     ('A' <= str[0] && str[0] <= 'Z')) && \
     49    (str[1] == ':'))
     50 
     51   /* MS-DOS/Windows style drive prefix, optionally with
     52    * a '|' instead of ':', followed by a slash or NUL */
     53 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
     54   ((('a' <= (str)[0] && (str)[0] <= 'z') || \
     55     ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
     56    ((str)[1] == ':' || (str)[1] == '|') && \
     57    ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
     58 
     59 /* scheme is not URL encoded, the longest libcurl supported ones are... */
     60 #define MAX_SCHEME_LEN 40
     61 
     62 /*
     63  * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
     64  * sure we have _some_ value for AF_INET6 without polluting our fake value
     65  * everywhere.
     66  */
     67 #if !defined(USE_IPV6) && !defined(AF_INET6)
     68 #define AF_INET6 (AF_INET + 1)
     69 #endif
     70 
     71 /* Internal representation of CURLU. Point to URL-encoded strings. */
     72 struct Curl_URL {
     73   char *scheme;
     74   char *user;
     75   char *password;
     76   char *options; /* IMAP only? */
     77   char *host;
     78   char *zoneid; /* for numerical IPv6 addresses */
     79   char *port;
     80   char *path;
     81   char *query;
     82   char *fragment;
     83   unsigned short portnum; /* the numerical version (if 'port' is set) */
     84   BIT(query_present);    /* to support blank */
     85   BIT(fragment_present); /* to support blank */
     86   BIT(guessed_scheme);   /* when a URL without scheme is parsed */
     87 };
     88 
     89 #define DEFAULT_SCHEME "https"
     90 
     91 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
     92                                       unsigned int flags);
     93 
     94 static void free_urlhandle(struct Curl_URL *u)
     95 {
     96   free(u->scheme);
     97   free(u->user);
     98   free(u->password);
     99   free(u->options);
    100   free(u->host);
    101   free(u->zoneid);
    102   free(u->port);
    103   free(u->path);
    104   free(u->query);
    105   free(u->fragment);
    106 }
    107 
    108 /*
    109  * Find the separator at the end of the hostname, or the '?' in cases like
    110  * http://www.example.com?id=2380
    111  */
    112 static const char *find_host_sep(const char *url)
    113 {
    114   /* Find the start of the hostname */
    115   const char *sep = strstr(url, "//");
    116   if(!sep)
    117     sep = url;
    118   else
    119     sep += 2;
    120 
    121   /* Find first / or ? */
    122   while(*sep && *sep != '/' && *sep != '?')
    123     sep++;
    124 
    125   return sep;
    126 }
    127 
    128 /* convert CURLcode to CURLUcode */
    129 #define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE :   \
    130                   CURLUE_OUT_OF_MEMORY)
    131 
    132 /* urlencode_str() writes data into an output dynbuf and URL-encodes the
    133  * spaces in the source URL accordingly.
    134  *
    135  * URL encoding should be skipped for hostnames, otherwise IDN resolution
    136  * will fail.
    137  */
    138 static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
    139                                size_t len, bool relative,
    140                                bool query)
    141 {
    142   /* we must add this with whitespace-replacing */
    143   bool left = !query;
    144   const unsigned char *iptr;
    145   const unsigned char *host_sep = (const unsigned char *) url;
    146   CURLcode result = CURLE_OK;
    147 
    148   if(!relative) {
    149     size_t n;
    150     host_sep = (const unsigned char *) find_host_sep(url);
    151 
    152     /* output the first piece as-is */
    153     n = (const char *)host_sep - url;
    154     result = curlx_dyn_addn(o, url, n);
    155     len -= n;
    156   }
    157 
    158   for(iptr = host_sep; len && !result; iptr++, len--) {
    159     if(*iptr == ' ') {
    160       if(left)
    161         result = curlx_dyn_addn(o, "%20", 3);
    162       else
    163         result = curlx_dyn_addn(o, "+", 1);
    164     }
    165     else if((*iptr < ' ') || (*iptr >= 0x7f)) {
    166       unsigned char out[3]={'%'};
    167       Curl_hexbyte(&out[1], *iptr);
    168       result = curlx_dyn_addn(o, out, 3);
    169     }
    170     else {
    171       result = curlx_dyn_addn(o, iptr, 1);
    172       if(*iptr == '?')
    173         left = FALSE;
    174     }
    175   }
    176 
    177   if(result)
    178     return cc2cu(result);
    179   return CURLUE_OK;
    180 }
    181 
    182 /*
    183  * Returns the length of the scheme if the given URL is absolute (as opposed
    184  * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
    185  * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
    186  *
    187  * If 'guess_scheme' is TRUE, it means the URL might be provided without
    188  * scheme.
    189  */
    190 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
    191                             bool guess_scheme)
    192 {
    193   size_t i = 0;
    194   DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
    195   (void)buflen; /* only used in debug-builds */
    196   if(buf)
    197     buf[0] = 0; /* always leave a defined value in buf */
    198 #ifdef _WIN32
    199   if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
    200     return 0;
    201 #endif
    202   if(ISALPHA(url[0]))
    203     for(i = 1; i < MAX_SCHEME_LEN; ++i) {
    204       char s = url[i];
    205       if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
    206         /* RFC 3986 3.1 explains:
    207            scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
    208         */
    209       }
    210       else {
    211         break;
    212       }
    213     }
    214   if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
    215     /* If this does not guess scheme, the scheme always ends with the colon so
    216        that this also detects data: URLs etc. In guessing mode, data: could
    217        be the hostname "data" with a specified port number. */
    218 
    219     /* the length of the scheme is the name part only */
    220     size_t len = i;
    221     if(buf) {
    222       Curl_strntolower(buf, url, i);
    223       buf[i] = 0;
    224     }
    225     return len;
    226   }
    227   return 0;
    228 }
    229 
    230 /*
    231  * Concatenate a relative URL onto a base URL making it absolute.
    232  */
    233 static CURLUcode redirect_url(const char *base, const char *relurl,
    234                               CURLU *u, unsigned int flags)
    235 {
    236   struct dynbuf urlbuf;
    237   bool host_changed = FALSE;
    238   const char *useurl = relurl;
    239   const char *cutoff = NULL;
    240   size_t prelen;
    241   CURLUcode uc;
    242 
    243   /* protsep points to the start of the hostname, after [scheme]:// */
    244   const char *protsep = base + strlen(u->scheme) + 3;
    245   DEBUGASSERT(base && relurl && u); /* all set here */
    246   if(!base)
    247     return CURLUE_MALFORMED_INPUT; /* should never happen */
    248 
    249   /* handle different relative URL types */
    250   switch(relurl[0]) {
    251   case '/':
    252     if(relurl[1] == '/') {
    253       /* protocol-relative URL: //example.com/path */
    254       cutoff = protsep;
    255       useurl = &relurl[2];
    256       host_changed = TRUE;
    257     }
    258     else
    259       /* absolute /path */
    260       cutoff = strchr(protsep, '/');
    261     break;
    262 
    263   case '#':
    264     /* fragment-only change */
    265     if(u->fragment)
    266       cutoff = strchr(protsep, '#');
    267     break;
    268 
    269   default:
    270     /* path or query-only change */
    271     if(u->query && u->query[0])
    272       /* remove existing query */
    273       cutoff = strchr(protsep, '?');
    274     else if(u->fragment && u->fragment[0])
    275       /* Remove existing fragment */
    276       cutoff = strchr(protsep, '#');
    277 
    278     if(relurl[0] != '?') {
    279       /* append a relative path after the last slash */
    280       cutoff = memrchr(protsep, '/',
    281                        cutoff ? (size_t)(cutoff - protsep) : strlen(protsep));
    282       if(cutoff)
    283         cutoff++; /* truncate after last slash */
    284     }
    285     break;
    286   }
    287 
    288   prelen = cutoff ? (size_t)(cutoff - base) : strlen(base);
    289 
    290   /* build new URL */
    291   curlx_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH);
    292 
    293   if(!curlx_dyn_addn(&urlbuf, base, prelen) &&
    294      !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, FALSE)) {
    295     uc = parseurl_and_replace(curlx_dyn_ptr(&urlbuf), u,
    296                               flags & ~CURLU_PATH_AS_IS);
    297   }
    298   else
    299     uc = CURLUE_OUT_OF_MEMORY;
    300 
    301   curlx_dyn_free(&urlbuf);
    302   return uc;
    303 }
    304 
    305 /* scan for byte values <= 31, 127 and sometimes space */
    306 CURLUcode Curl_junkscan(const char *url, size_t *urllen, bool allowspace)
    307 {
    308   size_t n = strlen(url);
    309   size_t i;
    310   unsigned char control;
    311   const unsigned char *p = (const unsigned char *)url;
    312   if(n > CURL_MAX_INPUT_LENGTH)
    313     return CURLUE_MALFORMED_INPUT;
    314 
    315   control = allowspace ? 0x1f : 0x20;
    316   for(i = 0; i < n; i++) {
    317     if(p[i] <= control || p[i] == 127)
    318       return CURLUE_MALFORMED_INPUT;
    319   }
    320   *urllen = n;
    321   return CURLUE_OK;
    322 }
    323 
    324 /*
    325  * parse_hostname_login()
    326  *
    327  * Parse the login details (username, password and options) from the URL and
    328  * strip them out of the hostname
    329  *
    330  */
    331 static CURLUcode parse_hostname_login(struct Curl_URL *u,
    332                                       const char *login,
    333                                       size_t len,
    334                                       unsigned int flags,
    335                                       size_t *offset) /* to the hostname */
    336 {
    337   CURLUcode result = CURLUE_OK;
    338   CURLcode ccode;
    339   char *userp = NULL;
    340   char *passwdp = NULL;
    341   char *optionsp = NULL;
    342   const struct Curl_handler *h = NULL;
    343 
    344   /* At this point, we assume all the other special cases have been taken
    345    * care of, so the host is at most
    346    *
    347    *   [user[:password][;options]]@]hostname
    348    *
    349    * We need somewhere to put the embedded details, so do that first.
    350    */
    351   char *ptr;
    352 
    353   DEBUGASSERT(login);
    354 
    355   *offset = 0;
    356   ptr = memchr(login, '@', len);
    357   if(!ptr)
    358     goto out;
    359 
    360   /* We will now try to extract the
    361    * possible login information in a string like:
    362    * ftp://user:password@ftp.site.example:8021/README */
    363   ptr++;
    364 
    365   /* if this is a known scheme, get some details */
    366   if(u->scheme)
    367     h = Curl_get_scheme_handler(u->scheme);
    368 
    369   /* We could use the login information in the URL so extract it. Only parse
    370      options if the handler says we should. Note that 'h' might be NULL! */
    371   ccode = Curl_parse_login_details(login, ptr - login - 1,
    372                                    &userp, &passwdp,
    373                                    (h && (h->flags & PROTOPT_URLOPTIONS)) ?
    374                                    &optionsp : NULL);
    375   if(ccode) {
    376     result = CURLUE_BAD_LOGIN;
    377     goto out;
    378   }
    379 
    380   if(userp) {
    381     if(flags & CURLU_DISALLOW_USER) {
    382       /* Option DISALLOW_USER is set and URL contains username. */
    383       result = CURLUE_USER_NOT_ALLOWED;
    384       goto out;
    385     }
    386     free(u->user);
    387     u->user = userp;
    388   }
    389 
    390   if(passwdp) {
    391     free(u->password);
    392     u->password = passwdp;
    393   }
    394 
    395   if(optionsp) {
    396     free(u->options);
    397     u->options = optionsp;
    398   }
    399 
    400   /* the hostname starts at this offset */
    401   *offset = ptr - login;
    402   return CURLUE_OK;
    403 
    404 out:
    405 
    406   free(userp);
    407   free(passwdp);
    408   free(optionsp);
    409   u->user = NULL;
    410   u->password = NULL;
    411   u->options = NULL;
    412 
    413   return result;
    414 }
    415 
    416 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
    417                                    bool has_scheme)
    418 {
    419   const char *portptr;
    420   char *hostname = curlx_dyn_ptr(host);
    421   /*
    422    * Find the end of an IPv6 address on the ']' ending bracket.
    423    */
    424   if(hostname[0] == '[') {
    425     portptr = strchr(hostname, ']');
    426     if(!portptr)
    427       return CURLUE_BAD_IPV6;
    428     portptr++;
    429     /* this is a RFC2732-style specified IP-address */
    430     if(*portptr) {
    431       if(*portptr != ':')
    432         return CURLUE_BAD_PORT_NUMBER;
    433     }
    434     else
    435       portptr = NULL;
    436   }
    437   else
    438     portptr = strchr(hostname, ':');
    439 
    440   if(portptr) {
    441     curl_off_t port;
    442     size_t keep = portptr - hostname;
    443 
    444     /* Browser behavior adaptation. If there is a colon with no digits after,
    445        just cut off the name there which makes us ignore the colon and just
    446        use the default port. Firefox, Chrome and Safari all do that.
    447 
    448        Do not do it if the URL has no scheme, to make something that looks like
    449        a scheme not work!
    450     */
    451     curlx_dyn_setlen(host, keep);
    452     portptr++;
    453     if(!*portptr)
    454       return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
    455 
    456     if(curlx_str_number(&portptr, &port, 0xffff) || *portptr)
    457       return CURLUE_BAD_PORT_NUMBER;
    458 
    459     u->portnum = (unsigned short) port;
    460     /* generate a new port number string to get rid of leading zeroes etc */
    461     free(u->port);
    462     u->port = aprintf("%" CURL_FORMAT_CURL_OFF_T, port);
    463     if(!u->port)
    464       return CURLUE_OUT_OF_MEMORY;
    465   }
    466 
    467   return CURLUE_OK;
    468 }
    469 
    470 /* this assumes 'hostname' now starts with [ */
    471 static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
    472                             size_t hlen) /* length of hostname */
    473 {
    474   size_t len;
    475   DEBUGASSERT(*hostname == '[');
    476   if(hlen < 4) /* '[::]' is the shortest possible valid string */
    477     return CURLUE_BAD_IPV6;
    478   hostname++;
    479   hlen -= 2;
    480 
    481   /* only valid IPv6 letters are ok */
    482   len = strspn(hostname, "0123456789abcdefABCDEF:.");
    483 
    484   if(hlen != len) {
    485     hlen = len;
    486     if(hostname[len] == '%') {
    487       /* this could now be '%[zone id]' */
    488       char zoneid[16];
    489       int i = 0;
    490       char *h = &hostname[len + 1];
    491       /* pass '25' if present and is a URL encoded percent sign */
    492       if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
    493         h += 2;
    494       while(*h && (*h != ']') && (i < 15))
    495         zoneid[i++] = *h++;
    496       if(!i || (']' != *h))
    497         return CURLUE_BAD_IPV6;
    498       zoneid[i] = 0;
    499       u->zoneid = strdup(zoneid);
    500       if(!u->zoneid)
    501         return CURLUE_OUT_OF_MEMORY;
    502       hostname[len] = ']'; /* insert end bracket */
    503       hostname[len + 1] = 0; /* terminate the hostname */
    504     }
    505     else
    506       return CURLUE_BAD_IPV6;
    507     /* hostname is fine */
    508   }
    509 
    510   /* Normalize the IPv6 address */
    511   {
    512     char dest[16]; /* fits a binary IPv6 address */
    513     hostname[hlen] = 0; /* end the address there */
    514     if(1 != curlx_inet_pton(AF_INET6, hostname, dest))
    515       return CURLUE_BAD_IPV6;
    516     if(curlx_inet_ntop(AF_INET6, dest, hostname, hlen)) {
    517       hlen = strlen(hostname); /* might be shorter now */
    518       hostname[hlen + 1] = 0;
    519     }
    520     hostname[hlen] = ']'; /* restore ending bracket */
    521   }
    522   return CURLUE_OK;
    523 }
    524 
    525 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
    526                                 size_t hlen) /* length of hostname */
    527 {
    528   size_t len;
    529   DEBUGASSERT(hostname);
    530 
    531   if(!hlen)
    532     return CURLUE_NO_HOST;
    533   else if(hostname[0] == '[')
    534     return ipv6_parse(u, hostname, hlen);
    535   else {
    536     /* letters from the second string are not ok */
    537     len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
    538     if(hlen != len)
    539       /* hostname with bad content */
    540       return CURLUE_BAD_HOSTNAME;
    541   }
    542   return CURLUE_OK;
    543 }
    544 
    545 /*
    546  * Handle partial IPv4 numerical addresses and different bases, like
    547  * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
    548  *
    549  * If the given input string is syntactically wrong IPv4 or any part for
    550  * example is too big, this function returns HOST_NAME.
    551  *
    552  * Output the "normalized" version of that input string in plain quad decimal
    553  * integers.
    554  *
    555  * Returns the host type.
    556  */
    557 
    558 #define HOST_ERROR   -1 /* out of memory */
    559 
    560 #define HOST_NAME    1
    561 #define HOST_IPV4    2
    562 #define HOST_IPV6    3
    563 
    564 static int ipv4_normalize(struct dynbuf *host)
    565 {
    566   bool done = FALSE;
    567   int n = 0;
    568   const char *c = curlx_dyn_ptr(host);
    569   unsigned int parts[4] = {0, 0, 0, 0};
    570   CURLcode result = CURLE_OK;
    571 
    572   if(*c == '[')
    573     return HOST_IPV6;
    574 
    575   while(!done) {
    576     int rc;
    577     curl_off_t l;
    578     if(*c == '0') {
    579       if(c[1] == 'x') {
    580         c += 2; /* skip the prefix */
    581         rc = curlx_str_hex(&c, &l, UINT_MAX);
    582       }
    583       else
    584         rc = curlx_str_octal(&c, &l, UINT_MAX);
    585     }
    586     else
    587       rc = curlx_str_number(&c, &l, UINT_MAX);
    588 
    589     if(rc)
    590       return HOST_NAME;
    591 
    592     parts[n] = (unsigned int)l;
    593 
    594     switch(*c) {
    595     case '.':
    596       if(n == 3)
    597         return HOST_NAME;
    598       n++;
    599       c++;
    600       break;
    601 
    602     case '\0':
    603       done = TRUE;
    604       break;
    605 
    606     default:
    607       return HOST_NAME;
    608     }
    609   }
    610 
    611   switch(n) {
    612   case 0: /* a -- 32 bits */
    613     curlx_dyn_reset(host);
    614 
    615     result = curlx_dyn_addf(host, "%u.%u.%u.%u",
    616                             (parts[0] >> 24),
    617                             ((parts[0] >> 16) & 0xff),
    618                             ((parts[0] >> 8) & 0xff),
    619                             (parts[0] & 0xff));
    620     break;
    621   case 1: /* a.b -- 8.24 bits */
    622     if((parts[0] > 0xff) || (parts[1] > 0xffffff))
    623       return HOST_NAME;
    624     curlx_dyn_reset(host);
    625     result = curlx_dyn_addf(host, "%u.%u.%u.%u",
    626                             (parts[0]),
    627                             ((parts[1] >> 16) & 0xff),
    628                             ((parts[1] >> 8) & 0xff),
    629                             (parts[1] & 0xff));
    630     break;
    631   case 2: /* a.b.c -- 8.8.16 bits */
    632     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
    633       return HOST_NAME;
    634     curlx_dyn_reset(host);
    635     result = curlx_dyn_addf(host, "%u.%u.%u.%u",
    636                             (parts[0]),
    637                             (parts[1]),
    638                             ((parts[2] >> 8) & 0xff),
    639                             (parts[2] & 0xff));
    640     break;
    641   case 3: /* a.b.c.d -- 8.8.8.8 bits */
    642     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
    643        (parts[3] > 0xff))
    644       return HOST_NAME;
    645     curlx_dyn_reset(host);
    646     result = curlx_dyn_addf(host, "%u.%u.%u.%u",
    647                             (parts[0]),
    648                             (parts[1]),
    649                             (parts[2]),
    650                             (parts[3]));
    651     break;
    652   }
    653   if(result)
    654     return HOST_ERROR;
    655   return HOST_IPV4;
    656 }
    657 
    658 /* if necessary, replace the host content with a URL decoded version */
    659 static CURLUcode urldecode_host(struct dynbuf *host)
    660 {
    661   char *per = NULL;
    662   const char *hostname = curlx_dyn_ptr(host);
    663   per = strchr(hostname, '%');
    664   if(!per)
    665     /* nothing to decode */
    666     return CURLUE_OK;
    667   else {
    668     /* encoded */
    669     size_t dlen;
    670     char *decoded;
    671     CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
    672                                      REJECT_CTRL);
    673     if(result)
    674       return CURLUE_BAD_HOSTNAME;
    675     curlx_dyn_reset(host);
    676     result = curlx_dyn_addn(host, decoded, dlen);
    677     free(decoded);
    678     if(result)
    679       return cc2cu(result);
    680   }
    681 
    682   return CURLUE_OK;
    683 }
    684 
    685 static CURLUcode parse_authority(struct Curl_URL *u,
    686                                  const char *auth, size_t authlen,
    687                                  unsigned int flags,
    688                                  struct dynbuf *host,
    689                                  bool has_scheme)
    690 {
    691   size_t offset;
    692   CURLUcode uc;
    693   CURLcode result;
    694 
    695   /*
    696    * Parse the login details and strip them out of the hostname.
    697    */
    698   uc = parse_hostname_login(u, auth, authlen, flags, &offset);
    699   if(uc)
    700     goto out;
    701 
    702   result = curlx_dyn_addn(host, auth + offset, authlen - offset);
    703   if(result) {
    704     uc = cc2cu(result);
    705     goto out;
    706   }
    707 
    708   uc = Curl_parse_port(u, host, has_scheme);
    709   if(uc)
    710     goto out;
    711 
    712   if(!curlx_dyn_len(host))
    713     return CURLUE_NO_HOST;
    714 
    715   switch(ipv4_normalize(host)) {
    716   case HOST_IPV4:
    717     break;
    718   case HOST_IPV6:
    719     uc = ipv6_parse(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
    720     break;
    721   case HOST_NAME:
    722     uc = urldecode_host(host);
    723     if(!uc)
    724       uc = hostname_check(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
    725     break;
    726   case HOST_ERROR:
    727     uc = CURLUE_OUT_OF_MEMORY;
    728     break;
    729   default:
    730     uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
    731     break;
    732   }
    733 
    734 out:
    735   return uc;
    736 }
    737 
    738 /* used for HTTP/2 server push */
    739 CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
    740 {
    741   CURLUcode result;
    742   struct dynbuf host;
    743 
    744   DEBUGASSERT(authority);
    745   curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
    746 
    747   result = parse_authority(u, authority, strlen(authority),
    748                            CURLU_DISALLOW_USER, &host, !!u->scheme);
    749   if(result)
    750     curlx_dyn_free(&host);
    751   else {
    752     free(u->host);
    753     u->host = curlx_dyn_ptr(&host);
    754   }
    755   return result;
    756 }
    757 
    758 /*
    759  * "Remove Dot Segments"
    760  * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
    761  */
    762 
    763 static bool is_dot(const char **str, size_t *clen)
    764 {
    765   const char *p = *str;
    766   if(*p == '.') {
    767     (*str)++;
    768     (*clen)--;
    769     return TRUE;
    770   }
    771   else if((*clen >= 3) &&
    772           (p[0] == '%') && (p[1] == '2') && ((p[2] | 0x20) == 'e')) {
    773     *str += 3;
    774     *clen -= 3;
    775     return TRUE;
    776   }
    777   return FALSE;
    778 }
    779 
    780 #define ISSLASH(x) ((x) == '/')
    781 
    782 /*
    783  * dedotdotify()
    784  * @unittest: 1395
    785  *
    786  * This function gets a null-terminated path with dot and dotdot sequences
    787  * passed in and strips them off according to the rules in RFC 3986 section
    788  * 5.2.4.
    789  *
    790  * The function handles a path. It should not contain the query nor fragment.
    791  *
    792  * RETURNS
    793  *
    794  * Zero for success and 'out' set to an allocated dedotdotified string.
    795  */
    796 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
    797 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
    798 {
    799   struct dynbuf out;
    800   CURLcode result = CURLE_OK;
    801 
    802   *outp = NULL;
    803   /* the path always starts with a slash, and a slash has not dot */
    804   if(clen < 2)
    805     return 0;
    806 
    807   curlx_dyn_init(&out, clen + 1);
    808 
    809   /*  A. If the input buffer begins with a prefix of "../" or "./", then
    810       remove that prefix from the input buffer; otherwise, */
    811   if(is_dot(&input, &clen)) {
    812     const char *p = input;
    813     size_t blen = clen;
    814 
    815     if(!clen)
    816       /* . [end] */
    817       goto end;
    818     else if(ISSLASH(*p)) {
    819       /* one dot followed by a slash */
    820       input = p + 1;
    821       clen--;
    822     }
    823 
    824     /*  D. if the input buffer consists only of "." or "..", then remove
    825         that from the input buffer; otherwise, */
    826     else if(is_dot(&p, &blen)) {
    827       if(!blen)
    828         /* .. [end] */
    829         goto end;
    830       else if(ISSLASH(*p)) {
    831         /* ../ */
    832         input = p + 1;
    833         clen = blen - 1;
    834       }
    835     }
    836   }
    837 
    838   while(clen && !result) { /* until end of path content */
    839     if(ISSLASH(*input)) {
    840       const char *p = &input[1];
    841       size_t blen = clen - 1;
    842       /*  B. if the input buffer begins with a prefix of "/./" or "/.", where
    843           "."  is a complete path segment, then replace that prefix with "/" in
    844           the input buffer; otherwise, */
    845       if(is_dot(&p, &blen)) {
    846         if(!blen) { /* /. */
    847           result = curlx_dyn_addn(&out, "/", 1);
    848           break;
    849         }
    850         else if(ISSLASH(*p)) { /* /./ */
    851           input = p;
    852           clen = blen;
    853           continue;
    854         }
    855 
    856         /*  C. if the input buffer begins with a prefix of "/../" or "/..",
    857             where ".." is a complete path segment, then replace that prefix
    858             with "/" in the input buffer and remove the last segment and its
    859             preceding "/" (if any) from the output buffer; otherwise, */
    860         else if(is_dot(&p, &blen) && (ISSLASH(*p) || !blen)) {
    861           /* remove the last segment from the output buffer */
    862           size_t len = curlx_dyn_len(&out);
    863           if(len) {
    864             char *ptr = curlx_dyn_ptr(&out);
    865             char *last = memrchr(ptr, '/', len);
    866             if(last)
    867               /* trim the output at the slash */
    868               curlx_dyn_setlen(&out, last - ptr);
    869           }
    870 
    871           if(blen) { /* /../ */
    872             input = p;
    873             clen = blen;
    874             continue;
    875           }
    876           result = curlx_dyn_addn(&out, "/", 1);
    877           break;
    878         }
    879       }
    880     }
    881 
    882     /*  E. move the first path segment in the input buffer to the end of
    883         the output buffer, including the initial "/" character (if any) and
    884         any subsequent characters up to, but not including, the next "/"
    885         character or the end of the input buffer. */
    886 
    887     result = curlx_dyn_addn(&out, input, 1);
    888     input++;
    889     clen--;
    890   }
    891 end:
    892   if(!result) {
    893     if(curlx_dyn_len(&out))
    894       *outp = curlx_dyn_ptr(&out);
    895     else {
    896       *outp = strdup("");
    897       if(!*outp)
    898         return 1;
    899     }
    900   }
    901   return result ? 1 : 0; /* success */
    902 }
    903 
    904 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
    905 {
    906   const char *path;
    907   size_t pathlen;
    908   char *query = NULL;
    909   char *fragment = NULL;
    910   char schemebuf[MAX_SCHEME_LEN + 1];
    911   size_t schemelen = 0;
    912   size_t urllen;
    913   CURLUcode result = CURLUE_OK;
    914   size_t fraglen = 0;
    915   struct dynbuf host;
    916 
    917   DEBUGASSERT(url);
    918 
    919   curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
    920 
    921   result = Curl_junkscan(url, &urllen, !!(flags & CURLU_ALLOW_SPACE));
    922   if(result)
    923     goto fail;
    924 
    925   schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
    926                                    flags & (CURLU_GUESS_SCHEME|
    927                                             CURLU_DEFAULT_SCHEME));
    928 
    929   /* handle the file: scheme */
    930   if(schemelen && !strcmp(schemebuf, "file")) {
    931     bool uncpath = FALSE;
    932     if(urllen <= 6) {
    933       /* file:/ is not enough to actually be a complete file: URL */
    934       result = CURLUE_BAD_FILE_URL;
    935       goto fail;
    936     }
    937 
    938     /* path has been allocated large enough to hold this */
    939     path = &url[5];
    940     pathlen = urllen - 5;
    941 
    942     u->scheme = strdup("file");
    943     if(!u->scheme) {
    944       result = CURLUE_OUT_OF_MEMORY;
    945       goto fail;
    946     }
    947 
    948     /* Extra handling URLs with an authority component (i.e. that start with
    949      * "file://")
    950      *
    951      * We allow omitted hostname (e.g. file:/<path>) -- valid according to
    952      * RFC 8089, but not the (current) WHAT-WG URL spec.
    953      */
    954     if(path[0] == '/' && path[1] == '/') {
    955       /* swallow the two slashes */
    956       const char *ptr = &path[2];
    957 
    958       /*
    959        * According to RFC 8089, a file: URL can be reliably dereferenced if:
    960        *
    961        *  o it has no/blank hostname, or
    962        *
    963        *  o the hostname matches "localhost" (case-insensitively), or
    964        *
    965        *  o the hostname is a FQDN that resolves to this machine, or
    966        *
    967        *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
    968        *    Appendix E.3).
    969        *
    970        * For brevity, we only consider URLs with empty, "localhost", or
    971        * "127.0.0.1" hostnames as local, otherwise as an UNC String.
    972        *
    973        * Additionally, there is an exception for URLs with a Windows drive
    974        * letter in the authority (which was accidentally omitted from RFC 8089
    975        * Appendix E, but believe me, it was meant to be there. --MK)
    976        */
    977       if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
    978         /* the URL includes a hostname, it must match "localhost" or
    979            "127.0.0.1" to be valid */
    980         if(checkprefix("localhost/", ptr) ||
    981            checkprefix("127.0.0.1/", ptr)) {
    982           ptr += 9; /* now points to the slash after the host */
    983         }
    984         else {
    985 #ifdef _WIN32
    986           size_t len;
    987 
    988           /* the hostname, NetBIOS computer name, can not contain disallowed
    989              chars, and the delimiting slash character must be appended to the
    990              hostname */
    991           path = strpbrk(ptr, "/\\:*?\"<>|");
    992           if(!path || *path != '/') {
    993             result = CURLUE_BAD_FILE_URL;
    994             goto fail;
    995           }
    996 
    997           len = path - ptr;
    998           if(len) {
    999             CURLcode code = curlx_dyn_addn(&host, ptr, len);
   1000             if(code) {
   1001               result = cc2cu(code);
   1002               goto fail;
   1003             }
   1004             uncpath = TRUE;
   1005           }
   1006 
   1007           ptr -= 2; /* now points to the // before the host in UNC */
   1008 #else
   1009           /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
   1010              none */
   1011           result = CURLUE_BAD_FILE_URL;
   1012           goto fail;
   1013 #endif
   1014         }
   1015       }
   1016 
   1017       path = ptr;
   1018       pathlen = urllen - (ptr - url);
   1019     }
   1020 
   1021     if(!uncpath)
   1022       /* no host for file: URLs by default */
   1023       curlx_dyn_reset(&host);
   1024 
   1025 #if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
   1026     /* Do not allow Windows drive letters when not in Windows.
   1027      * This catches both "file:/c:" and "file:c:" */
   1028     if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
   1029        STARTS_WITH_URL_DRIVE_PREFIX(path)) {
   1030       /* File drive letters are only accepted in MS-DOS/Windows */
   1031       result = CURLUE_BAD_FILE_URL;
   1032       goto fail;
   1033     }
   1034 #else
   1035     /* If the path starts with a slash and a drive letter, ditch the slash */
   1036     if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
   1037       /* This cannot be done with strcpy, as the memory chunks overlap! */
   1038       path++;
   1039       pathlen--;
   1040     }
   1041 #endif
   1042 
   1043   }
   1044   else {
   1045     /* clear path */
   1046     const char *schemep = NULL;
   1047     const char *hostp;
   1048     size_t hostlen;
   1049 
   1050     if(schemelen) {
   1051       int i = 0;
   1052       const char *p = &url[schemelen + 1];
   1053       while((*p == '/') && (i < 4)) {
   1054         p++;
   1055         i++;
   1056       }
   1057 
   1058       schemep = schemebuf;
   1059       if(!Curl_get_scheme_handler(schemep) &&
   1060          !(flags & CURLU_NON_SUPPORT_SCHEME)) {
   1061         result = CURLUE_UNSUPPORTED_SCHEME;
   1062         goto fail;
   1063       }
   1064 
   1065       if((i < 1) || (i > 3)) {
   1066         /* less than one or more than three slashes */
   1067         result = CURLUE_BAD_SLASHES;
   1068         goto fail;
   1069       }
   1070       hostp = p; /* hostname starts here */
   1071     }
   1072     else {
   1073       /* no scheme! */
   1074 
   1075       if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
   1076         result = CURLUE_BAD_SCHEME;
   1077         goto fail;
   1078       }
   1079       if(flags & CURLU_DEFAULT_SCHEME)
   1080         schemep = DEFAULT_SCHEME;
   1081 
   1082       /*
   1083        * The URL was badly formatted, let's try without scheme specified.
   1084        */
   1085       hostp = url;
   1086     }
   1087 
   1088     if(schemep) {
   1089       u->scheme = strdup(schemep);
   1090       if(!u->scheme) {
   1091         result = CURLUE_OUT_OF_MEMORY;
   1092         goto fail;
   1093       }
   1094     }
   1095 
   1096     /* find the end of the hostname + port number */
   1097     hostlen = strcspn(hostp, "/?#");
   1098     path = &hostp[hostlen];
   1099 
   1100     /* this pathlen also contains the query and the fragment */
   1101     pathlen = urllen - (path - url);
   1102     if(hostlen) {
   1103 
   1104       result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
   1105       if(result)
   1106         goto fail;
   1107 
   1108       if((flags & CURLU_GUESS_SCHEME) && !schemep) {
   1109         const char *hostname = curlx_dyn_ptr(&host);
   1110         /* legacy curl-style guess based on hostname */
   1111         if(checkprefix("ftp.", hostname))
   1112           schemep = "ftp";
   1113         else if(checkprefix("dict.", hostname))
   1114           schemep = "dict";
   1115         else if(checkprefix("ldap.", hostname))
   1116           schemep = "ldap";
   1117         else if(checkprefix("imap.", hostname))
   1118           schemep = "imap";
   1119         else if(checkprefix("smtp.", hostname))
   1120           schemep = "smtp";
   1121         else if(checkprefix("pop3.", hostname))
   1122           schemep = "pop3";
   1123         else
   1124           schemep = "http";
   1125 
   1126         u->scheme = strdup(schemep);
   1127         if(!u->scheme) {
   1128           result = CURLUE_OUT_OF_MEMORY;
   1129           goto fail;
   1130         }
   1131         u->guessed_scheme = TRUE;
   1132       }
   1133     }
   1134     else if(flags & CURLU_NO_AUTHORITY) {
   1135       /* allowed to be empty. */
   1136       if(curlx_dyn_add(&host, "")) {
   1137         result = CURLUE_OUT_OF_MEMORY;
   1138         goto fail;
   1139       }
   1140     }
   1141     else {
   1142       result = CURLUE_NO_HOST;
   1143       goto fail;
   1144     }
   1145   }
   1146 
   1147   fragment = strchr(path, '#');
   1148   if(fragment) {
   1149     fraglen = pathlen - (fragment - path);
   1150     u->fragment_present = TRUE;
   1151     if(fraglen > 1) {
   1152       /* skip the leading '#' in the copy but include the terminating null */
   1153       if(flags & CURLU_URLENCODE) {
   1154         struct dynbuf enc;
   1155         curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
   1156         result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
   1157         if(result)
   1158           goto fail;
   1159         u->fragment = curlx_dyn_ptr(&enc);
   1160       }
   1161       else {
   1162         u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
   1163         if(!u->fragment) {
   1164           result = CURLUE_OUT_OF_MEMORY;
   1165           goto fail;
   1166         }
   1167       }
   1168     }
   1169     /* after this, pathlen still contains the query */
   1170     pathlen -= fraglen;
   1171   }
   1172 
   1173   query = memchr(path, '?', pathlen);
   1174   if(query) {
   1175     size_t qlen = fragment ? (size_t)(fragment - query) :
   1176       pathlen - (query - path);
   1177     pathlen -= qlen;
   1178     u->query_present = TRUE;
   1179     if(qlen > 1) {
   1180       if(flags & CURLU_URLENCODE) {
   1181         struct dynbuf enc;
   1182         curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
   1183         /* skip the leading question mark */
   1184         result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
   1185         if(result)
   1186           goto fail;
   1187         u->query = curlx_dyn_ptr(&enc);
   1188       }
   1189       else {
   1190         u->query = Curl_memdup0(query + 1, qlen - 1);
   1191         if(!u->query) {
   1192           result = CURLUE_OUT_OF_MEMORY;
   1193           goto fail;
   1194         }
   1195       }
   1196     }
   1197     else {
   1198       /* single byte query */
   1199       u->query = strdup("");
   1200       if(!u->query) {
   1201         result = CURLUE_OUT_OF_MEMORY;
   1202         goto fail;
   1203       }
   1204     }
   1205   }
   1206 
   1207   if(pathlen && (flags & CURLU_URLENCODE)) {
   1208     struct dynbuf enc;
   1209     curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
   1210     result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
   1211     if(result)
   1212       goto fail;
   1213     pathlen = curlx_dyn_len(&enc);
   1214     path = u->path = curlx_dyn_ptr(&enc);
   1215   }
   1216 
   1217   if(pathlen <= 1) {
   1218     /* there is no path left or just the slash, unset */
   1219     path = NULL;
   1220   }
   1221   else {
   1222     if(!u->path) {
   1223       u->path = Curl_memdup0(path, pathlen);
   1224       if(!u->path) {
   1225         result = CURLUE_OUT_OF_MEMORY;
   1226         goto fail;
   1227       }
   1228       path = u->path;
   1229     }
   1230     else if(flags & CURLU_URLENCODE)
   1231       /* it might have encoded more than just the path so cut it */
   1232       u->path[pathlen] = 0;
   1233 
   1234     if(!(flags & CURLU_PATH_AS_IS)) {
   1235       /* remove ../ and ./ sequences according to RFC3986 */
   1236       char *dedot;
   1237       int err = dedotdotify(path, pathlen, &dedot);
   1238       if(err) {
   1239         result = CURLUE_OUT_OF_MEMORY;
   1240         goto fail;
   1241       }
   1242       if(dedot) {
   1243         free(u->path);
   1244         u->path = dedot;
   1245       }
   1246     }
   1247   }
   1248 
   1249   u->host = curlx_dyn_ptr(&host);
   1250 
   1251   return result;
   1252 fail:
   1253   curlx_dyn_free(&host);
   1254   free_urlhandle(u);
   1255   return result;
   1256 }
   1257 
   1258 /*
   1259  * Parse the URL and, if successful, replace everything in the Curl_URL struct.
   1260  */
   1261 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
   1262                                       unsigned int flags)
   1263 {
   1264   CURLUcode result;
   1265   CURLU tmpurl;
   1266   memset(&tmpurl, 0, sizeof(tmpurl));
   1267   result = parseurl(url, &tmpurl, flags);
   1268   if(!result) {
   1269     free_urlhandle(u);
   1270     *u = tmpurl;
   1271   }
   1272   return result;
   1273 }
   1274 
   1275 /*
   1276  */
   1277 CURLU *curl_url(void)
   1278 {
   1279   return calloc(1, sizeof(struct Curl_URL));
   1280 }
   1281 
   1282 void curl_url_cleanup(CURLU *u)
   1283 {
   1284   if(u) {
   1285     free_urlhandle(u);
   1286     free(u);
   1287   }
   1288 }
   1289 
   1290 #define DUP(dest, src, name)                    \
   1291   do {                                          \
   1292     if(src->name) {                             \
   1293       dest->name = strdup(src->name);           \
   1294       if(!dest->name)                           \
   1295         goto fail;                              \
   1296     }                                           \
   1297   } while(0)
   1298 
   1299 CURLU *curl_url_dup(const CURLU *in)
   1300 {
   1301   struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
   1302   if(u) {
   1303     DUP(u, in, scheme);
   1304     DUP(u, in, user);
   1305     DUP(u, in, password);
   1306     DUP(u, in, options);
   1307     DUP(u, in, host);
   1308     DUP(u, in, port);
   1309     DUP(u, in, path);
   1310     DUP(u, in, query);
   1311     DUP(u, in, fragment);
   1312     DUP(u, in, zoneid);
   1313     u->portnum = in->portnum;
   1314     u->fragment_present = in->fragment_present;
   1315     u->query_present = in->query_present;
   1316   }
   1317   return u;
   1318 fail:
   1319   curl_url_cleanup(u);
   1320   return NULL;
   1321 }
   1322 
   1323 #ifndef USE_IDN
   1324 #define host_decode(x,y) CURLUE_LACKS_IDN
   1325 #define host_encode(x,y) CURLUE_LACKS_IDN
   1326 #else
   1327 static CURLUcode host_decode(const char *host, char **allochost)
   1328 {
   1329   CURLcode result = Curl_idn_decode(host, allochost);
   1330   if(result)
   1331     return (result == CURLE_OUT_OF_MEMORY) ?
   1332       CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
   1333   return CURLUE_OK;
   1334 }
   1335 
   1336 static CURLUcode host_encode(const char *host, char **allochost)
   1337 {
   1338   CURLcode result = Curl_idn_encode(host, allochost);
   1339   if(result)
   1340     return (result == CURLE_OUT_OF_MEMORY) ?
   1341       CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
   1342   return CURLUE_OK;
   1343 }
   1344 #endif
   1345 
   1346 static CURLUcode urlget_format(const CURLU *u, CURLUPart what,
   1347                                const char *ptr, char **part,
   1348                                bool plusdecode, unsigned int flags)
   1349 {
   1350   size_t partlen = strlen(ptr);
   1351   bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
   1352   bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
   1353   bool punycode = (flags & CURLU_PUNYCODE) && (what == CURLUPART_HOST);
   1354   bool depunyfy = (flags & CURLU_PUNY2IDN) && (what == CURLUPART_HOST);
   1355   *part = Curl_memdup0(ptr, partlen);
   1356   if(!*part)
   1357     return CURLUE_OUT_OF_MEMORY;
   1358   if(plusdecode) {
   1359     /* convert + to space */
   1360     char *plus = *part;
   1361     size_t i = 0;
   1362     for(i = 0; i < partlen; ++plus, i++) {
   1363       if(*plus == '+')
   1364         *plus = ' ';
   1365     }
   1366   }
   1367   if(urldecode) {
   1368     char *decoded;
   1369     size_t dlen;
   1370     /* this unconditional rejection of control bytes is documented
   1371        API behavior */
   1372     CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
   1373     free(*part);
   1374     if(res) {
   1375       *part = NULL;
   1376       return CURLUE_URLDECODE;
   1377     }
   1378     *part = decoded;
   1379     partlen = dlen;
   1380   }
   1381   if(urlencode) {
   1382     struct dynbuf enc;
   1383     CURLUcode uc;
   1384     curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
   1385     uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
   1386     if(uc)
   1387       return uc;
   1388     free(*part);
   1389     *part = curlx_dyn_ptr(&enc);
   1390   }
   1391   else if(punycode) {
   1392     if(!Curl_is_ASCII_name(u->host)) {
   1393       char *allochost = NULL;
   1394       CURLUcode ret = host_decode(*part, &allochost);
   1395       if(ret)
   1396         return ret;
   1397       free(*part);
   1398       *part = allochost;
   1399     }
   1400   }
   1401   else if(depunyfy) {
   1402     if(Curl_is_ASCII_name(u->host)) {
   1403       char *allochost = NULL;
   1404       CURLUcode ret = host_encode(*part, &allochost);
   1405       if(ret)
   1406         return ret;
   1407       free(*part);
   1408       *part = allochost;
   1409     }
   1410   }
   1411 
   1412   return CURLUE_OK;
   1413 }
   1414 
   1415 static CURLUcode urlget_url(const CURLU *u, char **part, unsigned int flags)
   1416 {
   1417   char *url;
   1418   const char *scheme;
   1419   char *options = u->options;
   1420   char *port = u->port;
   1421   char *allochost = NULL;
   1422   bool show_fragment =
   1423     u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
   1424   bool show_query = (u->query && u->query[0]) ||
   1425     (u->query_present && flags & CURLU_GET_EMPTY);
   1426   bool punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
   1427   bool depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
   1428   bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
   1429   char portbuf[7];
   1430   if(u->scheme && curl_strequal("file", u->scheme)) {
   1431     url = aprintf("file://%s%s%s%s%s",
   1432                   u->path,
   1433                   show_query ? "?": "",
   1434                   u->query ? u->query : "",
   1435                   show_fragment ? "#": "",
   1436                   u->fragment ? u->fragment : "");
   1437   }
   1438   else if(!u->host)
   1439     return CURLUE_NO_HOST;
   1440   else {
   1441     const struct Curl_handler *h = NULL;
   1442     char schemebuf[MAX_SCHEME_LEN + 5];
   1443     if(u->scheme)
   1444       scheme = u->scheme;
   1445     else if(flags & CURLU_DEFAULT_SCHEME)
   1446       scheme = DEFAULT_SCHEME;
   1447     else
   1448       return CURLUE_NO_SCHEME;
   1449 
   1450     h = Curl_get_scheme_handler(scheme);
   1451     if(!port && (flags & CURLU_DEFAULT_PORT)) {
   1452       /* there is no stored port number, but asked to deliver
   1453          a default one for the scheme */
   1454       if(h) {
   1455         msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
   1456         port = portbuf;
   1457       }
   1458     }
   1459     else if(port) {
   1460       /* there is a stored port number, but asked to inhibit if it matches
   1461          the default one for the scheme */
   1462       if(h && (h->defport == u->portnum) &&
   1463          (flags & CURLU_NO_DEFAULT_PORT))
   1464         port = NULL;
   1465     }
   1466 
   1467     if(h && !(h->flags & PROTOPT_URLOPTIONS))
   1468       options = NULL;
   1469 
   1470     if(u->host[0] == '[') {
   1471       if(u->zoneid) {
   1472         /* make it '[ host %25 zoneid ]' */
   1473         struct dynbuf enc;
   1474         size_t hostlen = strlen(u->host);
   1475         curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
   1476         if(curlx_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
   1477                           u->zoneid))
   1478           return CURLUE_OUT_OF_MEMORY;
   1479         allochost = curlx_dyn_ptr(&enc);
   1480       }
   1481     }
   1482     else if(urlencode) {
   1483       allochost = curl_easy_escape(NULL, u->host, 0);
   1484       if(!allochost)
   1485         return CURLUE_OUT_OF_MEMORY;
   1486     }
   1487     else if(punycode) {
   1488       if(!Curl_is_ASCII_name(u->host)) {
   1489         CURLUcode ret = host_decode(u->host, &allochost);
   1490         if(ret)
   1491           return ret;
   1492       }
   1493     }
   1494     else if(depunyfy) {
   1495       if(Curl_is_ASCII_name(u->host)) {
   1496         CURLUcode ret = host_encode(u->host, &allochost);
   1497         if(ret)
   1498           return ret;
   1499       }
   1500     }
   1501 
   1502     if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
   1503       msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
   1504     else
   1505       schemebuf[0] = 0;
   1506 
   1507     url = aprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
   1508                   schemebuf,
   1509                   u->user ? u->user : "",
   1510                   u->password ? ":": "",
   1511                   u->password ? u->password : "",
   1512                   options ? ";" : "",
   1513                   options ? options : "",
   1514                   (u->user || u->password || options) ? "@": "",
   1515                   allochost ? allochost : u->host,
   1516                   port ? ":": "",
   1517                   port ? port : "",
   1518                   u->path ? u->path : "/",
   1519                   show_query ? "?": "",
   1520                   u->query ? u->query : "",
   1521                   show_fragment ? "#": "",
   1522                   u->fragment ? u->fragment : "");
   1523     free(allochost);
   1524   }
   1525   if(!url)
   1526     return CURLUE_OUT_OF_MEMORY;
   1527   *part = url;
   1528   return CURLUE_OK;
   1529 }
   1530 
   1531 CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
   1532                        char **part, unsigned int flags)
   1533 {
   1534   const char *ptr;
   1535   CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
   1536   char portbuf[7];
   1537   bool plusdecode = FALSE;
   1538   if(!u)
   1539     return CURLUE_BAD_HANDLE;
   1540   if(!part)
   1541     return CURLUE_BAD_PARTPOINTER;
   1542   *part = NULL;
   1543 
   1544   switch(what) {
   1545   case CURLUPART_SCHEME:
   1546     ptr = u->scheme;
   1547     ifmissing = CURLUE_NO_SCHEME;
   1548     flags &= ~CURLU_URLDECODE; /* never for schemes */
   1549     if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
   1550       return CURLUE_NO_SCHEME;
   1551     break;
   1552   case CURLUPART_USER:
   1553     ptr = u->user;
   1554     ifmissing = CURLUE_NO_USER;
   1555     break;
   1556   case CURLUPART_PASSWORD:
   1557     ptr = u->password;
   1558     ifmissing = CURLUE_NO_PASSWORD;
   1559     break;
   1560   case CURLUPART_OPTIONS:
   1561     ptr = u->options;
   1562     ifmissing = CURLUE_NO_OPTIONS;
   1563     break;
   1564   case CURLUPART_HOST:
   1565     ptr = u->host;
   1566     ifmissing = CURLUE_NO_HOST;
   1567     break;
   1568   case CURLUPART_ZONEID:
   1569     ptr = u->zoneid;
   1570     ifmissing = CURLUE_NO_ZONEID;
   1571     break;
   1572   case CURLUPART_PORT:
   1573     ptr = u->port;
   1574     ifmissing = CURLUE_NO_PORT;
   1575     flags &= ~CURLU_URLDECODE; /* never for port */
   1576     if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
   1577       /* there is no stored port number, but asked to deliver
   1578          a default one for the scheme */
   1579       const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
   1580       if(h) {
   1581         msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
   1582         ptr = portbuf;
   1583       }
   1584     }
   1585     else if(ptr && u->scheme) {
   1586       /* there is a stored port number, but ask to inhibit if
   1587          it matches the default one for the scheme */
   1588       const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
   1589       if(h && (h->defport == u->portnum) &&
   1590          (flags & CURLU_NO_DEFAULT_PORT))
   1591         ptr = NULL;
   1592     }
   1593     break;
   1594   case CURLUPART_PATH:
   1595     ptr = u->path;
   1596     if(!ptr)
   1597       ptr = "/";
   1598     break;
   1599   case CURLUPART_QUERY:
   1600     ptr = u->query;
   1601     ifmissing = CURLUE_NO_QUERY;
   1602     plusdecode = flags & CURLU_URLDECODE;
   1603     if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
   1604       /* there was a blank query and the user do not ask for it */
   1605       ptr = NULL;
   1606     break;
   1607   case CURLUPART_FRAGMENT:
   1608     ptr = u->fragment;
   1609     ifmissing = CURLUE_NO_FRAGMENT;
   1610     if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
   1611       /* there was a blank fragment and the user asks for it */
   1612       ptr = "";
   1613     break;
   1614   case CURLUPART_URL:
   1615     return urlget_url(u, part, flags);
   1616   default:
   1617     ptr = NULL;
   1618     break;
   1619   }
   1620   if(ptr)
   1621     return urlget_format(u, what, ptr, part, plusdecode, flags);
   1622 
   1623   return ifmissing;
   1624 }
   1625 
   1626 static CURLUcode set_url_scheme(CURLU *u, const char *scheme,
   1627                                 unsigned int flags)
   1628 {
   1629   size_t plen = strlen(scheme);
   1630   const struct Curl_handler *h = NULL;
   1631   if((plen > MAX_SCHEME_LEN) || (plen < 1))
   1632     /* too long or too short */
   1633     return CURLUE_BAD_SCHEME;
   1634   /* verify that it is a fine scheme */
   1635   h = Curl_get_scheme_handler(scheme);
   1636   if(!h) {
   1637     const char *s = scheme;
   1638     if(!(flags & CURLU_NON_SUPPORT_SCHEME))
   1639       return CURLUE_UNSUPPORTED_SCHEME;
   1640     if(ISALPHA(*s)) {
   1641       /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
   1642       while(--plen) {
   1643         if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
   1644           s++; /* fine */
   1645         else
   1646           return CURLUE_BAD_SCHEME;
   1647       }
   1648     }
   1649     else
   1650       return CURLUE_BAD_SCHEME;
   1651   }
   1652   u->guessed_scheme = FALSE;
   1653   return CURLUE_OK;
   1654 }
   1655 
   1656 static CURLUcode set_url_port(CURLU *u, const char *provided_port)
   1657 {
   1658   char *tmp;
   1659   curl_off_t port;
   1660   if(!ISDIGIT(provided_port[0]))
   1661     /* not a number */
   1662     return CURLUE_BAD_PORT_NUMBER;
   1663   if(curlx_str_number(&provided_port, &port, 0xffff) || *provided_port)
   1664     /* weirdly provided number, not good! */
   1665     return CURLUE_BAD_PORT_NUMBER;
   1666   tmp = aprintf("%" CURL_FORMAT_CURL_OFF_T, port);
   1667   if(!tmp)
   1668     return CURLUE_OUT_OF_MEMORY;
   1669   free(u->port);
   1670   u->port = tmp;
   1671   u->portnum = (unsigned short)port;
   1672   return CURLUE_OK;
   1673 }
   1674 
   1675 static CURLUcode set_url(CURLU *u, const char *url, size_t part_size,
   1676                          unsigned int flags)
   1677 {
   1678   /*
   1679    * Allow a new URL to replace the existing (if any) contents.
   1680    *
   1681    * If the existing contents is enough for a URL, allow a relative URL to
   1682    * replace it.
   1683    */
   1684   CURLUcode uc;
   1685   char *oldurl = NULL;
   1686 
   1687   if(!part_size) {
   1688     /* a blank URL is not a valid URL unless we already have a complete one
   1689        and this is a redirect */
   1690     if(!curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
   1691       /* success, meaning the "" is a fine relative URL, but nothing
   1692          changes */
   1693       free(oldurl);
   1694       return CURLUE_OK;
   1695     }
   1696     return CURLUE_MALFORMED_INPUT;
   1697   }
   1698 
   1699   /* if the new thing is absolute or the old one is not (we could not get an
   1700    * absolute URL in 'oldurl'), then replace the existing with the new. */
   1701   if(Curl_is_absolute_url(url, NULL, 0,
   1702                           flags & (CURLU_GUESS_SCHEME|CURLU_DEFAULT_SCHEME))
   1703      || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
   1704     return parseurl_and_replace(url, u, flags);
   1705   }
   1706   DEBUGASSERT(oldurl); /* it is set here */
   1707   /* apply the relative part to create a new URL */
   1708   uc = redirect_url(oldurl, url, u, flags);
   1709   free(oldurl);
   1710   return uc;
   1711 }
   1712 
   1713 static CURLUcode urlset_clear(CURLU *u, CURLUPart what)
   1714 {
   1715   switch(what) {
   1716   case CURLUPART_URL:
   1717     free_urlhandle(u);
   1718     memset(u, 0, sizeof(struct Curl_URL));
   1719     break;
   1720   case CURLUPART_SCHEME:
   1721     Curl_safefree(u->scheme);
   1722     u->guessed_scheme = FALSE;
   1723     break;
   1724   case CURLUPART_USER:
   1725     Curl_safefree(u->user);
   1726     break;
   1727   case CURLUPART_PASSWORD:
   1728     Curl_safefree(u->password);
   1729     break;
   1730   case CURLUPART_OPTIONS:
   1731     Curl_safefree(u->options);
   1732     break;
   1733   case CURLUPART_HOST:
   1734     Curl_safefree(u->host);
   1735     break;
   1736   case CURLUPART_ZONEID:
   1737     Curl_safefree(u->zoneid);
   1738     break;
   1739   case CURLUPART_PORT:
   1740     u->portnum = 0;
   1741     Curl_safefree(u->port);
   1742     break;
   1743   case CURLUPART_PATH:
   1744     Curl_safefree(u->path);
   1745     break;
   1746   case CURLUPART_QUERY:
   1747     Curl_safefree(u->query);
   1748     u->query_present = FALSE;
   1749     break;
   1750   case CURLUPART_FRAGMENT:
   1751     Curl_safefree(u->fragment);
   1752     u->fragment_present = FALSE;
   1753     break;
   1754   default:
   1755     return CURLUE_UNKNOWN_PART;
   1756   }
   1757   return CURLUE_OK;
   1758 }
   1759 
   1760 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
   1761                        const char *part, unsigned int flags)
   1762 {
   1763   char **storep = NULL;
   1764   bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
   1765   bool plusencode = FALSE;
   1766   bool urlskipslash = FALSE;
   1767   bool leadingslash = FALSE;
   1768   bool appendquery = FALSE;
   1769   bool equalsencode = FALSE;
   1770   size_t nalloc;
   1771 
   1772   if(!u)
   1773     return CURLUE_BAD_HANDLE;
   1774   if(!part)
   1775     /* setting a part to NULL clears it */
   1776     return urlset_clear(u, what);
   1777 
   1778   nalloc = strlen(part);
   1779   if(nalloc > CURL_MAX_INPUT_LENGTH)
   1780     /* excessive input length */
   1781     return CURLUE_MALFORMED_INPUT;
   1782 
   1783   switch(what) {
   1784   case CURLUPART_SCHEME: {
   1785     CURLUcode status = set_url_scheme(u, part, flags);
   1786     if(status)
   1787       return status;
   1788     storep = &u->scheme;
   1789     urlencode = FALSE; /* never */
   1790     break;
   1791   }
   1792   case CURLUPART_USER:
   1793     storep = &u->user;
   1794     break;
   1795   case CURLUPART_PASSWORD:
   1796     storep = &u->password;
   1797     break;
   1798   case CURLUPART_OPTIONS:
   1799     storep = &u->options;
   1800     break;
   1801   case CURLUPART_HOST:
   1802     storep = &u->host;
   1803     Curl_safefree(u->zoneid);
   1804     break;
   1805   case CURLUPART_ZONEID:
   1806     storep = &u->zoneid;
   1807     break;
   1808   case CURLUPART_PORT:
   1809     return set_url_port(u, part);
   1810   case CURLUPART_PATH:
   1811     urlskipslash = TRUE;
   1812     leadingslash = TRUE; /* enforce */
   1813     storep = &u->path;
   1814     break;
   1815   case CURLUPART_QUERY:
   1816     plusencode = urlencode;
   1817     appendquery = (flags & CURLU_APPENDQUERY) ? 1 : 0;
   1818     equalsencode = appendquery;
   1819     storep = &u->query;
   1820     u->query_present = TRUE;
   1821     break;
   1822   case CURLUPART_FRAGMENT:
   1823     storep = &u->fragment;
   1824     u->fragment_present = TRUE;
   1825     break;
   1826   case CURLUPART_URL:
   1827     return set_url(u, part, nalloc, flags);
   1828   default:
   1829     return CURLUE_UNKNOWN_PART;
   1830   }
   1831   DEBUGASSERT(storep);
   1832   {
   1833     const char *newp;
   1834     struct dynbuf enc;
   1835     curlx_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
   1836 
   1837     if(leadingslash && (part[0] != '/')) {
   1838       CURLcode result = curlx_dyn_addn(&enc, "/", 1);
   1839       if(result)
   1840         return cc2cu(result);
   1841     }
   1842     if(urlencode) {
   1843       const unsigned char *i;
   1844 
   1845       for(i = (const unsigned char *)part; *i; i++) {
   1846         CURLcode result;
   1847         if((*i == ' ') && plusencode) {
   1848           result = curlx_dyn_addn(&enc, "+", 1);
   1849           if(result)
   1850             return CURLUE_OUT_OF_MEMORY;
   1851         }
   1852         else if(ISUNRESERVED(*i) ||
   1853                 ((*i == '/') && urlskipslash) ||
   1854                 ((*i == '=') && equalsencode)) {
   1855           if((*i == '=') && equalsencode)
   1856             /* only skip the first equals sign */
   1857             equalsencode = FALSE;
   1858           result = curlx_dyn_addn(&enc, i, 1);
   1859           if(result)
   1860             return cc2cu(result);
   1861         }
   1862         else {
   1863           unsigned char out[3]={'%'};
   1864           Curl_hexbyte(&out[1], *i);
   1865           result = curlx_dyn_addn(&enc, out, 3);
   1866           if(result)
   1867             return cc2cu(result);
   1868         }
   1869       }
   1870     }
   1871     else {
   1872       char *p;
   1873       CURLcode result = curlx_dyn_add(&enc, part);
   1874       if(result)
   1875         return cc2cu(result);
   1876       p = curlx_dyn_ptr(&enc);
   1877       while(*p) {
   1878         /* make sure percent encoded are lower case */
   1879         if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
   1880            (ISUPPER(p[1]) || ISUPPER(p[2]))) {
   1881           p[1] = Curl_raw_tolower(p[1]);
   1882           p[2] = Curl_raw_tolower(p[2]);
   1883           p += 3;
   1884         }
   1885         else
   1886           p++;
   1887       }
   1888     }
   1889     newp = curlx_dyn_ptr(&enc);
   1890 
   1891     if(appendquery && newp) {
   1892       /* Append the 'newp' string onto the old query. Add a '&' separator if
   1893          none is present at the end of the existing query already */
   1894 
   1895       size_t querylen = u->query ? strlen(u->query) : 0;
   1896       bool addamperand = querylen && (u->query[querylen -1] != '&');
   1897       if(querylen) {
   1898         struct dynbuf qbuf;
   1899         curlx_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
   1900 
   1901         if(curlx_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
   1902           goto nomem;
   1903 
   1904         if(addamperand) {
   1905           if(curlx_dyn_addn(&qbuf, "&", 1))
   1906             goto nomem;
   1907         }
   1908         if(curlx_dyn_add(&qbuf, newp))
   1909           goto nomem;
   1910         curlx_dyn_free(&enc);
   1911         free(*storep);
   1912         *storep = curlx_dyn_ptr(&qbuf);
   1913         return CURLUE_OK;
   1914 nomem:
   1915         curlx_dyn_free(&enc);
   1916         return CURLUE_OUT_OF_MEMORY;
   1917       }
   1918     }
   1919 
   1920     else if(what == CURLUPART_HOST) {
   1921       size_t n = curlx_dyn_len(&enc);
   1922       if(!n && (flags & CURLU_NO_AUTHORITY)) {
   1923         /* Skip hostname check, it is allowed to be empty. */
   1924       }
   1925       else {
   1926         bool bad = FALSE;
   1927         if(!n)
   1928           bad = TRUE; /* empty hostname is not okay */
   1929         else if(!urlencode) {
   1930           /* if the host name part was not URL encoded here, it was set ready
   1931              URL encoded so we need to decode it to check */
   1932           size_t dlen;
   1933           char *decoded = NULL;
   1934           CURLcode result =
   1935             Curl_urldecode(newp, n, &decoded, &dlen, REJECT_CTRL);
   1936           if(result || hostname_check(u, decoded, dlen))
   1937             bad = TRUE;
   1938           free(decoded);
   1939         }
   1940         else if(hostname_check(u, (char *)CURL_UNCONST(newp), n))
   1941           bad = TRUE;
   1942         if(bad) {
   1943           curlx_dyn_free(&enc);
   1944           return CURLUE_BAD_HOSTNAME;
   1945         }
   1946       }
   1947     }
   1948 
   1949     free(*storep);
   1950     *storep = (char *)CURL_UNCONST(newp);
   1951   }
   1952   return CURLUE_OK;
   1953 }