idn.c (10334B)
1 /*************************************************************************** 2 * _ _ ____ _ 3 * Project ___| | | | _ \| | 4 * / __| | | | |_) | | 5 * | (__| |_| | _ <| |___ 6 * \___|\___/|_| \_\_____| 7 * 8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. 9 * 10 * This software is licensed as described in the file COPYING, which 11 * you should have received as part of this distribution. The terms 12 * are also available at https://curl.se/docs/copyright.html. 13 * 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 * copies of the Software, and permit persons to whom the Software is 16 * furnished to do so, under the terms of the COPYING file. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 * SPDX-License-Identifier: curl 22 * 23 ***************************************************************************/ 24 25 /* 26 * IDN conversions 27 */ 28 29 #include "curl_setup.h" 30 #include "urldata.h" 31 #include "idn.h" 32 #include "sendf.h" 33 #include "curlx/multibyte.h" 34 #include "curlx/warnless.h" 35 36 #ifdef USE_LIBIDN2 37 #include <idn2.h> 38 39 #if defined(_WIN32) && defined(UNICODE) 40 #define IDN2_LOOKUP(name, host, flags) \ 41 idn2_lookup_u8((const uint8_t *)name, (uint8_t **)host, flags) 42 #else 43 #define IDN2_LOOKUP(name, host, flags) \ 44 idn2_lookup_ul((const char *)name, (char **)host, flags) 45 #endif 46 #endif /* USE_LIBIDN2 */ 47 48 /* The last 3 #include files should be in this order */ 49 #include "curl_printf.h" 50 #include "curl_memory.h" 51 #include "memdebug.h" 52 53 /* for macOS and iOS targets */ 54 #if defined(USE_APPLE_IDN) 55 #include <unicode/uidna.h> 56 #include <iconv.h> 57 #include <langinfo.h> 58 59 #define MAX_HOST_LENGTH 512 60 61 static CURLcode iconv_to_utf8(const char *in, size_t inlen, 62 char **out, size_t *outlen) 63 { 64 iconv_t cd = iconv_open("UTF-8", nl_langinfo(CODESET)); 65 if(cd != (iconv_t)-1) { 66 size_t iconv_outlen = *outlen; 67 char *iconv_in = (char *)CURL_UNCONST(in); 68 size_t iconv_inlen = inlen; 69 size_t iconv_result = iconv(cd, &iconv_in, &iconv_inlen, 70 out, &iconv_outlen); 71 *outlen -= iconv_outlen; 72 iconv_close(cd); 73 if(iconv_result == (size_t)-1) { 74 /* !checksrc! disable ERRNOVAR 1 */ 75 if(errno == ENOMEM) 76 return CURLE_OUT_OF_MEMORY; 77 else 78 return CURLE_URL_MALFORMAT; 79 } 80 81 return CURLE_OK; 82 } 83 else { 84 /* !checksrc! disable ERRNOVAR 1 */ 85 if(errno == ENOMEM) 86 return CURLE_OUT_OF_MEMORY; 87 else 88 return CURLE_FAILED_INIT; 89 } 90 } 91 92 static CURLcode mac_idn_to_ascii(const char *in, char **out) 93 { 94 size_t inlen = strlen(in); 95 if(inlen < MAX_HOST_LENGTH) { 96 char iconv_buffer[MAX_HOST_LENGTH] = {0}; 97 char *iconv_outptr = iconv_buffer; 98 size_t iconv_outlen = sizeof(iconv_buffer); 99 CURLcode iconv_result = iconv_to_utf8(in, inlen, 100 &iconv_outptr, &iconv_outlen); 101 if(!iconv_result) { 102 UErrorCode err = U_ZERO_ERROR; 103 UIDNA* idna = uidna_openUTS46( 104 UIDNA_CHECK_BIDI|UIDNA_NONTRANSITIONAL_TO_ASCII, &err); 105 if(!U_FAILURE(err)) { 106 UIDNAInfo info = UIDNA_INFO_INITIALIZER; 107 char buffer[MAX_HOST_LENGTH] = {0}; 108 (void)uidna_nameToASCII_UTF8(idna, iconv_buffer, (int)iconv_outlen, 109 buffer, sizeof(buffer) - 1, &info, &err); 110 uidna_close(idna); 111 if(!U_FAILURE(err) && !info.errors) { 112 *out = strdup(buffer); 113 if(*out) 114 return CURLE_OK; 115 else 116 return CURLE_OUT_OF_MEMORY; 117 } 118 } 119 } 120 else 121 return iconv_result; 122 } 123 return CURLE_URL_MALFORMAT; 124 } 125 126 static CURLcode mac_ascii_to_idn(const char *in, char **out) 127 { 128 size_t inlen = strlen(in); 129 if(inlen < MAX_HOST_LENGTH) { 130 UErrorCode err = U_ZERO_ERROR; 131 UIDNA* idna = uidna_openUTS46( 132 UIDNA_CHECK_BIDI|UIDNA_NONTRANSITIONAL_TO_UNICODE, &err); 133 if(!U_FAILURE(err)) { 134 UIDNAInfo info = UIDNA_INFO_INITIALIZER; 135 char buffer[MAX_HOST_LENGTH] = {0}; 136 (void)uidna_nameToUnicodeUTF8(idna, in, -1, buffer, 137 sizeof(buffer) - 1, &info, &err); 138 uidna_close(idna); 139 if(!U_FAILURE(err)) { 140 *out = strdup(buffer); 141 if(*out) 142 return CURLE_OK; 143 else 144 return CURLE_OUT_OF_MEMORY; 145 } 146 } 147 } 148 return CURLE_URL_MALFORMAT; 149 } 150 #endif 151 152 #ifdef USE_WIN32_IDN 153 /* using Windows kernel32 and normaliz libraries. */ 154 155 #if (!defined(_WIN32_WINNT) || _WIN32_WINNT < 0x600) && \ 156 (!defined(WINVER) || WINVER < 0x600) 157 WINBASEAPI int WINAPI IdnToAscii(DWORD dwFlags, 158 const WCHAR *lpUnicodeCharStr, 159 int cchUnicodeChar, 160 WCHAR *lpASCIICharStr, 161 int cchASCIIChar); 162 WINBASEAPI int WINAPI IdnToUnicode(DWORD dwFlags, 163 const WCHAR *lpASCIICharStr, 164 int cchASCIIChar, 165 WCHAR *lpUnicodeCharStr, 166 int cchUnicodeChar); 167 #endif 168 169 #define IDN_MAX_LENGTH 255 170 171 static CURLcode win32_idn_to_ascii(const char *in, char **out) 172 { 173 wchar_t *in_w = curlx_convert_UTF8_to_wchar(in); 174 *out = NULL; 175 if(in_w) { 176 wchar_t punycode[IDN_MAX_LENGTH]; 177 int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode, 178 IDN_MAX_LENGTH); 179 curlx_unicodefree(in_w); 180 if(chars) { 181 char *mstr = curlx_convert_wchar_to_UTF8(punycode); 182 if(mstr) { 183 *out = strdup(mstr); 184 curlx_unicodefree(mstr); 185 if(!*out) 186 return CURLE_OUT_OF_MEMORY; 187 } 188 else 189 return CURLE_OUT_OF_MEMORY; 190 } 191 else 192 return CURLE_URL_MALFORMAT; 193 } 194 else 195 return CURLE_URL_MALFORMAT; 196 197 return CURLE_OK; 198 } 199 200 static CURLcode win32_ascii_to_idn(const char *in, char **output) 201 { 202 char *out = NULL; 203 204 wchar_t *in_w = curlx_convert_UTF8_to_wchar(in); 205 if(in_w) { 206 WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */ 207 int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn, 208 IDN_MAX_LENGTH); 209 if(chars) { 210 /* 'chars' is "the number of characters retrieved" */ 211 char *mstr = curlx_convert_wchar_to_UTF8(idn); 212 if(mstr) { 213 out = strdup(mstr); 214 curlx_unicodefree(mstr); 215 if(!out) 216 return CURLE_OUT_OF_MEMORY; 217 } 218 } 219 else 220 return CURLE_URL_MALFORMAT; 221 } 222 else 223 return CURLE_URL_MALFORMAT; 224 *output = out; 225 return CURLE_OK; 226 } 227 228 #endif /* USE_WIN32_IDN */ 229 230 /* 231 * Helpers for IDNA conversions. 232 */ 233 bool Curl_is_ASCII_name(const char *hostname) 234 { 235 /* get an UNSIGNED local version of the pointer */ 236 const unsigned char *ch = (const unsigned char *)hostname; 237 238 if(!hostname) /* bad input, consider it ASCII! */ 239 return TRUE; 240 241 while(*ch) { 242 if(*ch++ & 0x80) 243 return FALSE; 244 } 245 return TRUE; 246 } 247 248 #ifdef USE_IDN 249 /* 250 * Curl_idn_decode() returns an allocated IDN decoded string if it was 251 * possible. NULL on error. 252 * 253 * CURLE_URL_MALFORMAT - the hostname could not be converted 254 * CURLE_OUT_OF_MEMORY - memory problem 255 * 256 */ 257 static CURLcode idn_decode(const char *input, char **output) 258 { 259 char *decoded = NULL; 260 CURLcode result = CURLE_OK; 261 #ifdef USE_LIBIDN2 262 if(idn2_check_version(IDN2_VERSION)) { 263 int flags = IDN2_NFC_INPUT 264 #if IDN2_VERSION_NUMBER >= 0x00140000 265 /* IDN2_NFC_INPUT: Normalize input string using normalization form C. 266 IDN2_NONTRANSITIONAL: Perform Unicode TR46 non-transitional 267 processing. */ 268 | IDN2_NONTRANSITIONAL 269 #endif 270 ; 271 int rc = IDN2_LOOKUP(input, &decoded, flags); 272 if(rc != IDN2_OK) 273 /* fallback to TR46 Transitional mode for better IDNA2003 274 compatibility */ 275 rc = IDN2_LOOKUP(input, &decoded, IDN2_TRANSITIONAL); 276 if(rc != IDN2_OK) 277 result = CURLE_URL_MALFORMAT; 278 } 279 else 280 /* a too old libidn2 version */ 281 result = CURLE_NOT_BUILT_IN; 282 #elif defined(USE_WIN32_IDN) 283 result = win32_idn_to_ascii(input, &decoded); 284 #elif defined(USE_APPLE_IDN) 285 result = mac_idn_to_ascii(input, &decoded); 286 #endif 287 if(!result) 288 *output = decoded; 289 return result; 290 } 291 292 static CURLcode idn_encode(const char *puny, char **output) 293 { 294 char *enc = NULL; 295 #ifdef USE_LIBIDN2 296 int rc = idn2_to_unicode_8z8z(puny, &enc, 0); 297 if(rc != IDNA_SUCCESS) 298 return rc == IDNA_MALLOC_ERROR ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT; 299 #elif defined(USE_WIN32_IDN) 300 CURLcode result = win32_ascii_to_idn(puny, &enc); 301 if(result) 302 return result; 303 #elif defined(USE_APPLE_IDN) 304 CURLcode result = mac_ascii_to_idn(puny, &enc); 305 if(result) 306 return result; 307 #endif 308 *output = enc; 309 return CURLE_OK; 310 } 311 312 CURLcode Curl_idn_decode(const char *input, char **output) 313 { 314 char *d = NULL; 315 CURLcode result = idn_decode(input, &d); 316 #ifdef USE_LIBIDN2 317 if(!result) { 318 char *c = strdup(d); 319 idn2_free(d); 320 if(c) 321 d = c; 322 else 323 result = CURLE_OUT_OF_MEMORY; 324 } 325 #endif 326 if(!result) 327 *output = d; 328 return result; 329 } 330 331 CURLcode Curl_idn_encode(const char *puny, char **output) 332 { 333 char *d = NULL; 334 CURLcode result = idn_encode(puny, &d); 335 #ifdef USE_LIBIDN2 336 if(!result) { 337 char *c = strdup(d); 338 idn2_free(d); 339 if(c) 340 d = c; 341 else 342 result = CURLE_OUT_OF_MEMORY; 343 } 344 #endif 345 if(!result) 346 *output = d; 347 return result; 348 } 349 350 /* 351 * Frees data allocated by idnconvert_hostname() 352 */ 353 void Curl_free_idnconverted_hostname(struct hostname *host) 354 { 355 Curl_safefree(host->encalloc); 356 } 357 358 #endif /* USE_IDN */ 359 360 /* 361 * Perform any necessary IDN conversion of hostname 362 */ 363 CURLcode Curl_idnconvert_hostname(struct hostname *host) 364 { 365 /* set the name we use to display the hostname */ 366 host->dispname = host->name; 367 368 #ifdef USE_IDN 369 /* Check name for non-ASCII and convert hostname if we can */ 370 if(!Curl_is_ASCII_name(host->name)) { 371 char *decoded; 372 CURLcode result = Curl_idn_decode(host->name, &decoded); 373 if(result) 374 return result; 375 /* successful */ 376 host->name = host->encalloc = decoded; 377 } 378 #endif 379 return CURLE_OK; 380 }