multibyte.c (9790B)
1 /*************************************************************************** 2 * _ _ ____ _ 3 * Project ___| | | | _ \| | 4 * / __| | | | |_) | | 5 * | (__| |_| | _ <| |___ 6 * \___|\___/|_| \_\_____| 7 * 8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. 9 * 10 * This software is licensed as described in the file COPYING, which 11 * you should have received as part of this distribution. The terms 12 * are also available at https://curl.se/docs/copyright.html. 13 * 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 * copies of the Software, and permit persons to whom the Software is 16 * furnished to do so, under the terms of the COPYING file. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 * SPDX-License-Identifier: curl 22 * 23 ***************************************************************************/ 24 25 /* 26 * This file is 'mem-include-scan' clean, which means its memory allocations 27 * are not tracked by the curl memory tracker memdebug, so they must not use 28 * `CURLDEBUG` macro replacements in memdebug.h for free, malloc, etc. To avoid 29 * these macro replacements, wrap the names in parentheses to call the original 30 * versions: `ptr = (malloc)(123)`, `(free)(ptr)`, etc. 31 */ 32 33 #include "../curl_setup.h" 34 35 #ifdef _WIN32 36 37 #include "multibyte.h" 38 39 /* 40 * MultiByte conversions using Windows kernel32 library. 41 */ 42 43 wchar_t *curlx_convert_UTF8_to_wchar(const char *str_utf8) 44 { 45 wchar_t *str_w = NULL; 46 47 if(str_utf8) { 48 int str_w_len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, 49 str_utf8, -1, NULL, 0); 50 if(str_w_len > 0) { 51 str_w = (malloc)(str_w_len * sizeof(wchar_t)); 52 if(str_w) { 53 if(MultiByteToWideChar(CP_UTF8, 0, str_utf8, -1, str_w, 54 str_w_len) == 0) { 55 (free)(str_w); 56 return NULL; 57 } 58 } 59 } 60 } 61 62 return str_w; 63 } 64 65 char *curlx_convert_wchar_to_UTF8(const wchar_t *str_w) 66 { 67 char *str_utf8 = NULL; 68 69 if(str_w) { 70 int bytes = WideCharToMultiByte(CP_UTF8, 0, str_w, -1, 71 NULL, 0, NULL, NULL); 72 if(bytes > 0) { 73 str_utf8 = (malloc)(bytes); 74 if(str_utf8) { 75 if(WideCharToMultiByte(CP_UTF8, 0, str_w, -1, str_utf8, bytes, 76 NULL, NULL) == 0) { 77 (free)(str_utf8); 78 return NULL; 79 } 80 } 81 } 82 } 83 84 return str_utf8; 85 } 86 87 #ifndef UNDER_CE 88 89 /* declare GetFullPathNameW for mingw-w64 UWP builds targeting old windows */ 90 #if defined(CURL_WINDOWS_UWP) && defined(__MINGW32__) && \ 91 (_WIN32_WINNT < _WIN32_WINNT_WIN10) 92 WINBASEAPI DWORD WINAPI GetFullPathNameW(LPCWSTR, DWORD, LPWSTR, LPWSTR *); 93 #endif 94 95 /* Fix excessive paths (paths that exceed MAX_PATH length of 260). 96 * 97 * This is a helper function to fix paths that would exceed the MAX_PATH 98 * limitation check done by Windows APIs. It does so by normalizing the passed 99 * in filename or path 'in' to its full canonical path, and if that path is 100 * longer than MAX_PATH then setting 'out' to "\\?\" prefix + that full path. 101 * 102 * For example 'in' filename255chars in current directory C:\foo\bar is 103 * fixed as \\?\C:\foo\bar\filename255chars for 'out' which will tell Windows 104 * it is ok to access that filename even though the actual full path is longer 105 * than 260 chars. 106 * 107 * For non-Unicode builds this function may fail sometimes because only the 108 * Unicode versions of some Windows API functions can access paths longer than 109 * MAX_PATH, for example GetFullPathNameW which is used in this function. When 110 * the full path is then converted from Unicode to multibyte that fails if any 111 * directories in the path contain characters not in the current codepage. 112 */ 113 static bool fix_excessive_path(const TCHAR *in, TCHAR **out) 114 { 115 size_t needed, count; 116 const wchar_t *in_w; 117 wchar_t *fbuf = NULL; 118 119 /* MS documented "approximate" limit for the maximum path length */ 120 const size_t max_path_len = 32767; 121 122 #ifndef _UNICODE 123 wchar_t *ibuf = NULL; 124 char *obuf = NULL; 125 #endif 126 127 *out = NULL; 128 129 /* skip paths already normalized */ 130 if(!_tcsncmp(in, _T("\\\\?\\"), 4)) 131 goto cleanup; 132 133 #ifndef _UNICODE 134 /* convert multibyte input to unicode */ 135 needed = mbstowcs(NULL, in, 0); 136 if(needed == (size_t)-1 || needed >= max_path_len) 137 goto cleanup; 138 ++needed; /* for NUL */ 139 ibuf = (malloc)(needed * sizeof(wchar_t)); 140 if(!ibuf) 141 goto cleanup; 142 count = mbstowcs(ibuf, in, needed); 143 if(count == (size_t)-1 || count >= needed) 144 goto cleanup; 145 in_w = ibuf; 146 #else 147 in_w = in; 148 #endif 149 150 /* GetFullPathNameW returns the normalized full path in unicode. It converts 151 forward slashes to backslashes, processes .. to remove directory segments, 152 etc. Unlike GetFullPathNameA it can process paths that exceed MAX_PATH. */ 153 needed = (size_t)GetFullPathNameW(in_w, 0, NULL, NULL); 154 if(!needed || needed > max_path_len) 155 goto cleanup; 156 /* skip paths that are not excessive and do not need modification */ 157 if(needed <= MAX_PATH) 158 goto cleanup; 159 fbuf = (malloc)(needed * sizeof(wchar_t)); 160 if(!fbuf) 161 goto cleanup; 162 count = (size_t)GetFullPathNameW(in_w, (DWORD)needed, fbuf, NULL); 163 if(!count || count >= needed) 164 goto cleanup; 165 166 /* prepend \\?\ or \\?\UNC\ to the excessively long path. 167 * 168 * c:\longpath ---> \\?\c:\longpath 169 * \\.\c:\longpath ---> \\?\c:\longpath 170 * \\?\c:\longpath ---> \\?\c:\longpath (unchanged) 171 * \\server\c$\longpath ---> \\?\UNC\server\c$\longpath 172 * 173 * https://learn.microsoft.com/en-us/dotnet/standard/io/file-path-formats 174 */ 175 if(!wcsncmp(fbuf, L"\\\\?\\", 4)) 176 ; /* do nothing */ 177 else if(!wcsncmp(fbuf, L"\\\\.\\", 4)) 178 fbuf[2] = '?'; 179 else if(!wcsncmp(fbuf, L"\\\\.", 3) || !wcsncmp(fbuf, L"\\\\?", 3)) { 180 /* Unexpected, not UNC. The formatting doc doesn't allow this AFAICT. */ 181 goto cleanup; 182 } 183 else { 184 wchar_t *temp; 185 186 if(!wcsncmp(fbuf, L"\\\\", 2)) { 187 /* "\\?\UNC\" + full path without "\\" + null */ 188 needed = 8 + (count - 2) + 1; 189 if(needed > max_path_len) 190 goto cleanup; 191 192 temp = (malloc)(needed * sizeof(wchar_t)); 193 if(!temp) 194 goto cleanup; 195 196 wcsncpy(temp, L"\\\\?\\UNC\\", 8); 197 wcscpy(temp + 8, fbuf + 2); 198 } 199 else { 200 /* "\\?\" + full path + null */ 201 needed = 4 + count + 1; 202 if(needed > max_path_len) 203 goto cleanup; 204 205 temp = (malloc)(needed * sizeof(wchar_t)); 206 if(!temp) 207 goto cleanup; 208 209 wcsncpy(temp, L"\\\\?\\", 4); 210 wcscpy(temp + 4, fbuf); 211 } 212 213 (free)(fbuf); 214 fbuf = temp; 215 } 216 217 #ifndef _UNICODE 218 /* convert unicode full path to multibyte output */ 219 needed = wcstombs(NULL, fbuf, 0); 220 if(needed == (size_t)-1 || needed >= max_path_len) 221 goto cleanup; 222 ++needed; /* for NUL */ 223 obuf = (malloc)(needed); 224 if(!obuf) 225 goto cleanup; 226 count = wcstombs(obuf, fbuf, needed); 227 if(count == (size_t)-1 || count >= needed) 228 goto cleanup; 229 *out = obuf; 230 obuf = NULL; 231 #else 232 *out = fbuf; 233 fbuf = NULL; 234 #endif 235 236 cleanup: 237 (free)(fbuf); 238 #ifndef _UNICODE 239 (free)(ibuf); 240 (free)(obuf); 241 #endif 242 return *out ? true : false; 243 } 244 245 int curlx_win32_open(const char *filename, int oflag, ...) 246 { 247 int pmode = 0; 248 int result = -1; 249 TCHAR *fixed = NULL; 250 const TCHAR *target = NULL; 251 252 #ifdef _UNICODE 253 wchar_t *filename_w = curlx_convert_UTF8_to_wchar(filename); 254 #endif 255 256 va_list param; 257 va_start(param, oflag); 258 if(oflag & O_CREAT) 259 pmode = va_arg(param, int); 260 va_end(param); 261 262 #ifdef _UNICODE 263 if(filename_w) { 264 if(fix_excessive_path(filename_w, &fixed)) 265 target = fixed; 266 else 267 target = filename_w; 268 result = _wopen(target, oflag, pmode); 269 curlx_unicodefree(filename_w); 270 } 271 else 272 /* !checksrc! disable ERRNOVAR 1 */ 273 CURL_SETERRNO(EINVAL); 274 #else 275 if(fix_excessive_path(filename, &fixed)) 276 target = fixed; 277 else 278 target = filename; 279 result = _open(target, oflag, pmode); 280 #endif 281 282 (free)(fixed); 283 return result; 284 } 285 286 FILE *curlx_win32_fopen(const char *filename, const char *mode) 287 { 288 FILE *result = NULL; 289 TCHAR *fixed = NULL; 290 const TCHAR *target = NULL; 291 292 #ifdef _UNICODE 293 wchar_t *filename_w = curlx_convert_UTF8_to_wchar(filename); 294 wchar_t *mode_w = curlx_convert_UTF8_to_wchar(mode); 295 if(filename_w && mode_w) { 296 if(fix_excessive_path(filename_w, &fixed)) 297 target = fixed; 298 else 299 target = filename_w; 300 result = _wfopen(target, mode_w); 301 } 302 else 303 /* !checksrc! disable ERRNOVAR 1 */ 304 CURL_SETERRNO(EINVAL); 305 curlx_unicodefree(filename_w); 306 curlx_unicodefree(mode_w); 307 #else 308 if(fix_excessive_path(filename, &fixed)) 309 target = fixed; 310 else 311 target = filename; 312 result = (fopen)(target, mode); 313 #endif 314 315 (free)(fixed); 316 return result; 317 } 318 319 int curlx_win32_stat(const char *path, struct_stat *buffer) 320 { 321 int result = -1; 322 TCHAR *fixed = NULL; 323 const TCHAR *target = NULL; 324 325 #ifdef _UNICODE 326 wchar_t *path_w = curlx_convert_UTF8_to_wchar(path); 327 if(path_w) { 328 if(fix_excessive_path(path_w, &fixed)) 329 target = fixed; 330 else 331 target = path_w; 332 #ifndef USE_WIN32_LARGE_FILES 333 result = _wstat(target, buffer); 334 #else 335 result = _wstati64(target, buffer); 336 #endif 337 curlx_unicodefree(path_w); 338 } 339 else 340 /* !checksrc! disable ERRNOVAR 1 */ 341 CURL_SETERRNO(EINVAL); 342 #else 343 if(fix_excessive_path(path, &fixed)) 344 target = fixed; 345 else 346 target = path; 347 #ifndef USE_WIN32_LARGE_FILES 348 result = _stat(target, buffer); 349 #else 350 result = _stati64(target, buffer); 351 #endif 352 #endif 353 354 (free)(fixed); 355 return result; 356 } 357 358 #endif /* UNDER_CE */ 359 360 #endif /* _WIN32 */