ares_event_win32.c (32217B)
1 /* MIT License 2 * 3 * Copyright (c) 2024 Brad House 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a copy 6 * of this software and associated documentation files (the "Software"), to deal 7 * in the Software without restriction, including without limitation the rights 8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 * copies of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * SPDX-License-Identifier: MIT 25 */ 26 27 /* Uses an anonymous union */ 28 #if defined(__clang__) || defined(__GNUC__) 29 # pragma GCC diagnostic push 30 # if defined(__clang__) 31 # pragma GCC diagnostic ignored "-Wc11-extensions" 32 # else 33 # pragma GCC diagnostic ignored "-Wpedantic" 34 # endif 35 #endif 36 37 #include "ares_private.h" 38 #include "ares_event.h" 39 #include "ares_event_win32.h" 40 41 42 #if defined(USE_WINSOCK) && defined(CARES_THREADS) 43 44 #ifdef HAVE_LIMITS_H 45 # include <limits.h> 46 #endif 47 48 /* IMPLEMENTATION NOTES 49 * ==================== 50 * 51 * This implementation uses some undocumented functionality within Windows for 52 * monitoring sockets. The Ancillary Function Driver (AFD) is the low level 53 * implementation that Winsock2 sits on top of. Winsock2 unfortunately does 54 * not expose the equivalent of epoll() or kqueue(), but it is possible to 55 * access AFD directly and use along with IOCP to simulate the functionality. 56 * We want to use IOCP if possible as it gives us the ability to monitor more 57 * than just sockets (WSAPoll is not an option), and perform arbitrary callbacks 58 * which means we can hook in non-socket related events. 59 * 60 * The information for this implementation was gathered from "wepoll" and 61 * "libuv" which both use slight variants on this. We originally went with 62 * an implementation methodology more similar to "libuv", but we had a few 63 * user reports of crashes during shutdown and memory leaks due to some 64 * events not being delivered for cleanup of closed sockets. 65 * 66 * Initialization: 67 * 1. Dynamically load the NtDeviceIoControlFile, NtCreateFile, and 68 * NtCancelIoFileEx internal symbols from ntdll.dll. (Don't believe 69 * Microsoft's documentation for NtCancelIoFileEx as it documents an 70 * invalid prototype). These functions are to open a reference to the 71 * Ancillary Function Driver (AFD), and to submit and cancel POLL 72 * requests. 73 * 2. Create an IO Completion Port base handle via CreateIoCompletionPort() 74 * that all socket events will be delivered through. 75 * 3. Create a list of AFD Handles and track the number of poll requests 76 * per AFD handle. When we exceed a pre-determined limit of poll requests 77 * for a handle (128), we will automatically create a new handle. The 78 * reason behind this is NtCancelIoFileEx uses a horrible algorithm for 79 * issuing cancellations. See: 80 * https://github.com/python-trio/trio/issues/52#issuecomment-548215128 81 * 4. Create a callback to be used to be able to interrupt waiting for IOCP 82 * events, this may be called for allowing enqueuing of additional socket 83 * events or removing socket events. PostQueuedCompletionStatus() is the 84 * obvious choice. We can use the same container format, the event 85 * delivered won't have an OVERLAPPED pointer so we can differentiate from 86 * socket events. Use the container as the completion key. 87 * 88 * Socket Add: 89 * 1. Create/Allocate a container for holding metadata about a socket 90 * including: 91 * - SOCKET base_socket; 92 * - IO_STATUS_BLOCK iosb; -- Used by AFD POLL, returned as OVERLAPPED 93 * - AFD_POLL_INFO afd_poll_info; -- Used by AFD POLL 94 * - afd list node -- for tracking which AFD handle a POLL request was 95 * submitted to. 96 * 2. Call WSAIoctl(..., SIO_BASE_HANDLE, ...) to unwrap the SOCKET and get 97 * the "base socket" we can use for polling. It appears this may fail so 98 * we should call WSAIoctl(..., SIO_BSP_HANDLE_POLL, ...) as a fallback. 99 * 3. Submit AFD POLL request (see "AFD POLL Request" section) 100 * 4. Record a mapping between the "IO Status Block" and the socket container 101 * so when events are delivered we can dereference. 102 * 103 * Socket Delete: 104 * 1. Call 105 * NtCancelIoFileEx(afd, iosb, &temp_iosb); 106 * to cancel any pending operations. 107 * 2. Tag the socket container as being queued for deletion 108 * 3. Wait for an event to be delivered for the socket (cancel isn't 109 * immediate, it delivers an event to know its complete). Delete only once 110 * that event has been delivered. If we don't do this we could try to 111 * access free()'d memory at a later point. 112 * 113 * Socket Modify: 114 * 1. Call 115 * NtCancelIoFileEx(afd, iosb, &temp_iosb) 116 * to cancel any pending operation. 117 * 2. When the event comes through that the cancel is complete, enqueue 118 * another "AFD Poll Request" for the desired events. 119 * 120 * Event Wait: 121 * 1. Call GetQueuedCompletionStatusEx() with the base IOCP handle, a 122 * stack allocated array of OVERLAPPED_ENTRY's, and an appropriate 123 * timeout. 124 * 2. Iterate across returned events, if the lpOverlapped is NULL, then the 125 * the CompletionKey is a pointer to the container registered via 126 * PostQueuedCompletionStatus(), otherwise it is the "IO Status Block" 127 * registered with the "AFD Poll Request" which needs to be dereferenced 128 * to the "socket container". 129 * 3. If it is a "socket container", disassociate it from the afd list node 130 * it was previously submitted to. 131 * 4. If it is a "socket container" check to see if we are cleaning up, if so, 132 * clean it up. 133 * 5. If it is a "socket container" that is still valid, Submit an 134 * AFD POLL Request (see "AFD POLL Request"). We must re-enable the request 135 * each time we receive a response, it is not persistent. 136 * 6. Notify of any events received as indicated in the AFD_POLL_INFO 137 * Handles[0].Events (NOTE: check NumberOfHandles > 0, and the status in 138 * the IO_STATUS_BLOCK. If we received an AFD_POLL_LOCAL_CLOSE, clean up 139 * the connection like the integrator requested it to be cleaned up. 140 * 141 * AFD Poll Request: 142 * 1. Find an afd poll handle in the list that has fewer pending requests than 143 * the limit. 144 * 2. If an afd poll handle was not associated (e.g. due to all being over 145 * limit), create a new afd poll handle by calling NtCreateFile() 146 * with path \Device\Afd , then add the AFD handle to the IO Completion 147 * Port. We can leave the completion key as blank since events for 148 * multiple sockets will be delivered through this and we need to 149 * differentiate via the OVERLAPPED member returned. Add the new AFD 150 * handle to the list of handles. 151 * 3. Initialize the AFD_POLL_INFO structure: 152 * Exclusive = FALSE; // allow multiple requests 153 * NumberOfHandles = 1; 154 * Timeout.QuadPart = LLONG_MAX; 155 * Handles[0].Handle = (HANDLE)base_socket; 156 * Handles[0].Status = 0; 157 * Handles[0].Events = AFD_POLL_LOCAL_CLOSE + additional events to wait for 158 * such as AFD_POLL_RECEIVE, etc; 159 * 4. Zero out the IO_STATUS_BLOCK structures 160 * 5. Set the "Status" member of IO_STATUS_BLOCK to STATUS_PENDING 161 * 6. Call 162 * NtDeviceIoControlFile(afd, NULL, NULL, &iosb, 163 * &iosb, IOCTL_AFD_POLL 164 * &afd_poll_info, sizeof(afd_poll_info), 165 * &afd_poll_info, sizeof(afd_poll_info)); 166 * 167 * 168 * References: 169 * - https://github.com/piscisaureus/wepoll/ 170 * - https://github.com/libuv/libuv/ 171 */ 172 173 /* Cap the number of outstanding AFD poll requests per AFD handle due to known 174 * slowdowns with large lists and NtCancelIoFileEx() */ 175 # define AFD_POLL_PER_HANDLE 128 176 177 # include <stdarg.h> 178 179 /* # define CARES_DEBUG 1 */ 180 181 # ifdef __GNUC__ 182 # define CARES_PRINTF_LIKE(fmt, args) \ 183 __attribute__((format(printf, fmt, args))) 184 # else 185 # define CARES_PRINTF_LIKE(fmt, args) 186 # endif 187 188 static void CARES_DEBUG_LOG(const char *fmt, ...) CARES_PRINTF_LIKE(1, 2); 189 190 static void CARES_DEBUG_LOG(const char *fmt, ...) 191 { 192 va_list ap; 193 194 va_start(ap, fmt); 195 # ifdef CARES_DEBUG 196 vfprintf(stderr, fmt, ap); 197 fflush(stderr); 198 # endif 199 va_end(ap); 200 } 201 202 typedef struct { 203 /* Dynamically loaded symbols */ 204 NtCreateFile_t NtCreateFile; 205 NtDeviceIoControlFile_t NtDeviceIoControlFile; 206 NtCancelIoFileEx_t NtCancelIoFileEx; 207 208 /* Implementation details */ 209 ares_slist_t *afd_handles; 210 HANDLE iocp_handle; 211 212 /* IO_STATUS_BLOCK * -> ares_evsys_win32_eventdata_t * mapping. There is 213 * no completion key passed to IOCP with this method so we have to look 214 * up based on the lpOverlapped returned (which is mapped to IO_STATUS_BLOCK) 215 */ 216 ares_htable_vpvp_t *sockets; 217 218 /* Flag about whether or not we are shutting down */ 219 ares_bool_t is_shutdown; 220 } ares_evsys_win32_t; 221 222 typedef enum { 223 POLL_STATUS_NONE = 0, 224 POLL_STATUS_PENDING = 1, 225 POLL_STATUS_CANCEL = 2, 226 POLL_STATUS_DESTROY = 3 227 } poll_status_t; 228 229 typedef struct { 230 /*! Pointer to parent event container */ 231 ares_event_t *event; 232 /*! Socket passed in to monitor */ 233 SOCKET socket; 234 /*! Base socket derived from provided socket */ 235 SOCKET base_socket; 236 /*! Structure for submitting AFD POLL requests (Internals!) */ 237 AFD_POLL_INFO afd_poll_info; 238 /*! Status of current polling operation */ 239 poll_status_t poll_status; 240 /*! IO Status Block structure submitted with AFD POLL requests and returned 241 * with IOCP results as lpOverlapped (even though its a different structure) 242 */ 243 IO_STATUS_BLOCK iosb; 244 /*! AFD handle node an outstanding poll request is associated with */ 245 ares_slist_node_t *afd_handle_node; 246 /* Lock is only for PostQueuedCompletionStatus() to prevent multiple 247 * signals. Tracking via POLL_STATUS_PENDING/POLL_STATUS_NONE */ 248 ares_thread_mutex_t *lock; 249 } ares_evsys_win32_eventdata_t; 250 251 static size_t ares_evsys_win32_wait(ares_event_thread_t *e, 252 unsigned long timeout_ms); 253 254 static void ares_iocpevent_signal(const ares_event_t *event) 255 { 256 ares_event_thread_t *e = event->e; 257 ares_evsys_win32_t *ew = e->ev_sys_data; 258 ares_evsys_win32_eventdata_t *ed = event->data; 259 ares_bool_t queue_event = ARES_FALSE; 260 261 ares_thread_mutex_lock(ed->lock); 262 if (ed->poll_status != POLL_STATUS_PENDING) { 263 ed->poll_status = POLL_STATUS_PENDING; 264 queue_event = ARES_TRUE; 265 } 266 ares_thread_mutex_unlock(ed->lock); 267 268 if (!queue_event) { 269 return; 270 } 271 272 PostQueuedCompletionStatus(ew->iocp_handle, 0, (ULONG_PTR)event->data, NULL); 273 } 274 275 static void ares_iocpevent_cb(ares_event_thread_t *e, ares_socket_t fd, 276 void *data, ares_event_flags_t flags) 277 { 278 ares_evsys_win32_eventdata_t *ed = data; 279 (void)e; 280 (void)fd; 281 (void)flags; 282 ares_thread_mutex_lock(ed->lock); 283 ed->poll_status = POLL_STATUS_NONE; 284 ares_thread_mutex_unlock(ed->lock); 285 } 286 287 static ares_event_t *ares_iocpevent_create(ares_event_thread_t *e) 288 { 289 ares_event_t *event = NULL; 290 ares_status_t status; 291 292 status = 293 ares_event_update(&event, e, ARES_EVENT_FLAG_OTHER, ares_iocpevent_cb, 294 ARES_SOCKET_BAD, NULL, NULL, ares_iocpevent_signal); 295 if (status != ARES_SUCCESS) { 296 return NULL; 297 } 298 299 return event; 300 } 301 302 static void ares_evsys_win32_destroy(ares_event_thread_t *e) 303 { 304 ares_evsys_win32_t *ew = NULL; 305 306 if (e == NULL) { 307 return; 308 } 309 310 CARES_DEBUG_LOG("** Win32 Event Destroy\n"); 311 312 ew = e->ev_sys_data; 313 if (ew == NULL) { 314 return; 315 } 316 317 ew->is_shutdown = ARES_TRUE; 318 CARES_DEBUG_LOG(" ** waiting on %lu remaining sockets to be destroyed\n", 319 (unsigned long)ares_htable_vpvp_num_keys(ew->sockets)); 320 while (ares_htable_vpvp_num_keys(ew->sockets)) { 321 ares_evsys_win32_wait(e, 0); 322 } 323 CARES_DEBUG_LOG(" ** all sockets cleaned up\n"); 324 325 326 if (ew->iocp_handle != NULL) { 327 CloseHandle(ew->iocp_handle); 328 } 329 330 ares_slist_destroy(ew->afd_handles); 331 332 ares_htable_vpvp_destroy(ew->sockets); 333 334 ares_free(ew); 335 e->ev_sys_data = NULL; 336 } 337 338 typedef struct { 339 size_t poll_cnt; 340 HANDLE afd_handle; 341 } ares_afd_handle_t; 342 343 static void ares_afd_handle_destroy(void *arg) 344 { 345 ares_afd_handle_t *hnd = arg; 346 if (hnd != NULL && hnd->afd_handle != NULL) { 347 CloseHandle(hnd->afd_handle); 348 } 349 ares_free(hnd); 350 } 351 352 static int ares_afd_handle_cmp(const void *data1, const void *data2) 353 { 354 const ares_afd_handle_t *hnd1 = data1; 355 const ares_afd_handle_t *hnd2 = data2; 356 357 if (hnd1->poll_cnt > hnd2->poll_cnt) { 358 return 1; 359 } 360 if (hnd1->poll_cnt < hnd2->poll_cnt) { 361 return -1; 362 } 363 return 0; 364 } 365 366 static void fill_object_attributes(OBJECT_ATTRIBUTES *attr, 367 UNICODE_STRING *name, ULONG attributes) 368 { 369 memset(attr, 0, sizeof(*attr)); 370 attr->Length = sizeof(*attr); 371 attr->ObjectName = name; 372 attr->Attributes = attributes; 373 } 374 375 # define UNICODE_STRING_CONSTANT(s) \ 376 { (sizeof(s) - 1) * sizeof(wchar_t), sizeof(s) * sizeof(wchar_t), L##s } 377 378 static ares_slist_node_t *ares_afd_handle_create(ares_evsys_win32_t *ew) 379 { 380 UNICODE_STRING afd_device_name = UNICODE_STRING_CONSTANT("\\Device\\Afd"); 381 OBJECT_ATTRIBUTES afd_attributes; 382 NTSTATUS status; 383 IO_STATUS_BLOCK iosb; 384 ares_afd_handle_t *afd = ares_malloc_zero(sizeof(*afd)); 385 ares_slist_node_t *node = NULL; 386 if (afd == NULL) { 387 goto fail; 388 } 389 390 /* Open a handle to the AFD subsystem */ 391 fill_object_attributes(&afd_attributes, &afd_device_name, 0); 392 memset(&iosb, 0, sizeof(iosb)); 393 iosb.Status = STATUS_PENDING; 394 status = ew->NtCreateFile(&afd->afd_handle, SYNCHRONIZE, &afd_attributes, 395 &iosb, NULL, 0, FILE_SHARE_READ | FILE_SHARE_WRITE, 396 FILE_OPEN, 0, NULL, 0); 397 if (status != STATUS_SUCCESS) { 398 CARES_DEBUG_LOG("** Failed to create AFD endpoint\n"); 399 goto fail; 400 } 401 402 if (CreateIoCompletionPort(afd->afd_handle, ew->iocp_handle, 403 0 /* CompletionKey */, 0) == NULL) { 404 goto fail; 405 } 406 407 if (!SetFileCompletionNotificationModes(afd->afd_handle, 408 FILE_SKIP_SET_EVENT_ON_HANDLE)) { 409 goto fail; 410 } 411 412 node = ares_slist_insert(ew->afd_handles, afd); 413 if (node == NULL) { 414 goto fail; 415 } 416 417 return node; 418 419 fail: 420 421 ares_afd_handle_destroy(afd); 422 return NULL; 423 } 424 425 /* Fetch the lowest poll count entry, but if it exceeds the limit, create a 426 * new one and return that */ 427 static ares_slist_node_t *ares_afd_handle_fetch(ares_evsys_win32_t *ew) 428 { 429 ares_slist_node_t *node = ares_slist_node_first(ew->afd_handles); 430 ares_afd_handle_t *afd = ares_slist_node_val(node); 431 432 if (afd != NULL && afd->poll_cnt < AFD_POLL_PER_HANDLE) { 433 return node; 434 } 435 436 return ares_afd_handle_create(ew); 437 } 438 439 static ares_bool_t ares_evsys_win32_init(ares_event_thread_t *e) 440 { 441 ares_evsys_win32_t *ew = NULL; 442 HMODULE ntdll; 443 444 CARES_DEBUG_LOG("** Win32 Event Init\n"); 445 446 ew = ares_malloc_zero(sizeof(*ew)); 447 if (ew == NULL) { 448 return ARES_FALSE; 449 } 450 451 e->ev_sys_data = ew; 452 453 /* All apps should have ntdll.dll already loaded, so just get a handle to 454 * this */ 455 ntdll = GetModuleHandleA("ntdll.dll"); 456 if (ntdll == NULL) { 457 goto fail; 458 } 459 460 # ifdef __GNUC__ 461 # pragma GCC diagnostic push 462 # pragma GCC diagnostic ignored "-Wpedantic" 463 /* Without the (void *) cast we get: 464 * warning: cast between incompatible function types from 'FARPROC' {aka 'long 465 * long int (*)()'} to 'NTSTATUS (*)(...)'} [-Wcast-function-type] but with it 466 * we get: warning: ISO C forbids conversion of function pointer to object 467 * pointer type [-Wpedantic] look unsolvable short of killing the warning. 468 */ 469 # endif 470 471 /* Load Internal symbols not typically accessible */ 472 ew->NtCreateFile = 473 (NtCreateFile_t)(void *)GetProcAddress(ntdll, "NtCreateFile"); 474 ew->NtDeviceIoControlFile = (NtDeviceIoControlFile_t)(void *)GetProcAddress( 475 ntdll, "NtDeviceIoControlFile"); 476 ew->NtCancelIoFileEx = 477 (NtCancelIoFileEx_t)(void *)GetProcAddress(ntdll, "NtCancelIoFileEx"); 478 479 # ifdef __GNUC__ 480 # pragma GCC diagnostic pop 481 # endif 482 483 if (ew->NtCreateFile == NULL || ew->NtCancelIoFileEx == NULL || 484 ew->NtDeviceIoControlFile == NULL) { 485 goto fail; 486 } 487 488 ew->iocp_handle = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); 489 if (ew->iocp_handle == NULL) { 490 goto fail; 491 } 492 493 ew->afd_handles = ares_slist_create( 494 e->channel->rand_state, ares_afd_handle_cmp, ares_afd_handle_destroy); 495 if (ew->afd_handles == NULL) { 496 goto fail; 497 } 498 499 /* Create at least the first afd handle, so we know of any critical system 500 * issues during startup */ 501 if (ares_afd_handle_create(ew) == NULL) { 502 goto fail; 503 } 504 505 e->ev_signal = ares_iocpevent_create(e); 506 if (e->ev_signal == NULL) { 507 goto fail; 508 } 509 510 ew->sockets = ares_htable_vpvp_create(NULL, NULL); 511 if (ew->sockets == NULL) { 512 goto fail; 513 } 514 515 return ARES_TRUE; 516 517 fail: 518 ares_evsys_win32_destroy(e); 519 return ARES_FALSE; 520 } 521 522 static ares_socket_t ares_evsys_win32_basesocket(ares_socket_t socket) 523 { 524 while (1) { 525 DWORD bytes; /* Not used */ 526 ares_socket_t base_socket = ARES_SOCKET_BAD; 527 int rv; 528 529 rv = WSAIoctl(socket, SIO_BASE_HANDLE, NULL, 0, &base_socket, 530 sizeof(base_socket), &bytes, NULL, NULL); 531 if (rv != SOCKET_ERROR && base_socket != ARES_SOCKET_BAD) { 532 socket = base_socket; 533 break; 534 } 535 536 /* If we're here, an error occurred */ 537 if (GetLastError() == WSAENOTSOCK) { 538 /* This is critical, exit */ 539 return ARES_SOCKET_BAD; 540 } 541 542 /* Work around known bug in Komodia based LSPs, use ARES_BSP_HANDLE_POLL 543 * to retrieve the underlying socket to then loop and get the base socket: 544 * https://docs.microsoft.com/en-us/windows/win32/winsock/winsock-ioctls 545 * https://www.komodia.com/newwiki/index.php?title=Komodia%27s_Redirector_bug_fixes#Version_2.2.2.6 546 */ 547 base_socket = ARES_SOCKET_BAD; 548 rv = WSAIoctl(socket, SIO_BSP_HANDLE_POLL, NULL, 0, &base_socket, 549 sizeof(base_socket), &bytes, NULL, NULL); 550 551 if (rv != SOCKET_ERROR && base_socket != ARES_SOCKET_BAD && 552 base_socket != socket) { 553 socket = base_socket; 554 continue; /* loop! */ 555 } 556 557 return ARES_SOCKET_BAD; 558 } 559 560 return socket; 561 } 562 563 static ares_bool_t ares_evsys_win32_afd_enqueue(ares_event_t *event, 564 ares_event_flags_t flags) 565 { 566 ares_event_thread_t *e = event->e; 567 ares_evsys_win32_t *ew = e->ev_sys_data; 568 ares_evsys_win32_eventdata_t *ed = event->data; 569 ares_afd_handle_t *afd; 570 NTSTATUS status; 571 572 if (e == NULL || ed == NULL || ew == NULL) { 573 return ARES_FALSE; 574 } 575 576 /* Misuse */ 577 if (ed->poll_status != POLL_STATUS_NONE) { 578 return ARES_FALSE; 579 } 580 581 ed->afd_handle_node = ares_afd_handle_fetch(ew); 582 /* System resource issue? */ 583 if (ed->afd_handle_node == NULL) { 584 return ARES_FALSE; 585 } 586 587 afd = ares_slist_node_val(ed->afd_handle_node); 588 589 /* Enqueue AFD Poll */ 590 ed->afd_poll_info.Exclusive = FALSE; 591 ed->afd_poll_info.NumberOfHandles = 1; 592 ed->afd_poll_info.Timeout.QuadPart = LLONG_MAX; 593 ed->afd_poll_info.Handles[0].Handle = (HANDLE)ed->base_socket; 594 ed->afd_poll_info.Handles[0].Status = 0; 595 ed->afd_poll_info.Handles[0].Events = AFD_POLL_LOCAL_CLOSE; 596 597 if (flags & ARES_EVENT_FLAG_READ) { 598 ed->afd_poll_info.Handles[0].Events |= 599 (AFD_POLL_RECEIVE | AFD_POLL_DISCONNECT | AFD_POLL_ACCEPT | 600 AFD_POLL_ABORT); 601 } 602 if (flags & ARES_EVENT_FLAG_WRITE) { 603 ed->afd_poll_info.Handles[0].Events |= 604 (AFD_POLL_SEND | AFD_POLL_CONNECT_FAIL); 605 } 606 if (flags == 0) { 607 ed->afd_poll_info.Handles[0].Events |= AFD_POLL_DISCONNECT; 608 } 609 610 memset(&ed->iosb, 0, sizeof(ed->iosb)); 611 ed->iosb.Status = STATUS_PENDING; 612 613 status = ew->NtDeviceIoControlFile( 614 afd->afd_handle, NULL, NULL, &ed->iosb, &ed->iosb, IOCTL_AFD_POLL, 615 &ed->afd_poll_info, sizeof(ed->afd_poll_info), &ed->afd_poll_info, 616 sizeof(ed->afd_poll_info)); 617 if (status != STATUS_SUCCESS && status != STATUS_PENDING) { 618 CARES_DEBUG_LOG("** afd_enqueue ed=%p FAILED\n", (void *)ed); 619 ed->afd_handle_node = NULL; 620 return ARES_FALSE; 621 } 622 623 /* Record that we submitted a poll request to this handle and tell it to 624 * re-sort the node since we changed its sort value */ 625 afd->poll_cnt++; 626 ares_slist_node_reinsert(ed->afd_handle_node); 627 628 ed->poll_status = POLL_STATUS_PENDING; 629 CARES_DEBUG_LOG("++ afd_enqueue ed=%p flags=%X\n", (void *)ed, 630 (unsigned int)flags); 631 return ARES_TRUE; 632 } 633 634 static ares_bool_t ares_evsys_win32_afd_cancel(ares_evsys_win32_eventdata_t *ed) 635 { 636 IO_STATUS_BLOCK cancel_iosb; 637 ares_evsys_win32_t *ew; 638 NTSTATUS status; 639 ares_afd_handle_t *afd; 640 641 ew = ed->event->e->ev_sys_data; 642 643 /* Misuse */ 644 if (ed->poll_status != POLL_STATUS_PENDING) { 645 return ARES_FALSE; 646 } 647 648 afd = ares_slist_node_val(ed->afd_handle_node); 649 650 /* Misuse */ 651 if (afd == NULL) { 652 return ARES_FALSE; 653 } 654 655 ed->poll_status = POLL_STATUS_CANCEL; 656 657 /* Not pending, nothing to do. Most likely that means there is a pending 658 * event that hasn't yet been delivered otherwise it would be re-armed 659 * already */ 660 if (ed->iosb.Status != STATUS_PENDING) { 661 CARES_DEBUG_LOG("** cancel not needed for ed=%p\n", (void *)ed); 662 return ARES_FALSE; 663 } 664 665 status = ew->NtCancelIoFileEx(afd->afd_handle, &ed->iosb, &cancel_iosb); 666 667 CARES_DEBUG_LOG("** Enqueued cancel for ed=%p, status = %lX\n", (void *)ed, 668 status); 669 670 /* NtCancelIoFileEx() may return STATUS_NOT_FOUND if the operation completed 671 * just before calling NtCancelIoFileEx(), but we have not yet received the 672 * notification (but it should be queued for the next IOCP event). */ 673 if (status == STATUS_SUCCESS || status == STATUS_NOT_FOUND) { 674 return ARES_TRUE; 675 } 676 677 return ARES_FALSE; 678 } 679 680 static void ares_evsys_win32_eventdata_destroy(ares_evsys_win32_t *ew, 681 ares_evsys_win32_eventdata_t *ed) 682 { 683 if (ew == NULL || ed == NULL) { 684 return; 685 } 686 CARES_DEBUG_LOG("-- deleting ed=%p (%s)\n", (void *)ed, 687 (ed->socket == ARES_SOCKET_BAD) ? "data" : "socket"); 688 /* These type of handles are deferred destroy. Update tracking. */ 689 if (ed->socket != ARES_SOCKET_BAD) { 690 ares_htable_vpvp_remove(ew->sockets, &ed->iosb); 691 } 692 693 ares_thread_mutex_destroy(ed->lock); 694 695 if (ed->event != NULL) { 696 ed->event->data = NULL; 697 } 698 699 ares_free(ed); 700 } 701 702 static ares_bool_t ares_evsys_win32_event_add(ares_event_t *event) 703 { 704 ares_event_thread_t *e = event->e; 705 ares_evsys_win32_t *ew = e->ev_sys_data; 706 ares_evsys_win32_eventdata_t *ed; 707 ares_bool_t rc = ARES_FALSE; 708 709 ed = ares_malloc_zero(sizeof(*ed)); 710 ed->event = event; 711 ed->socket = event->fd; 712 ed->base_socket = ARES_SOCKET_BAD; 713 event->data = ed; 714 715 CARES_DEBUG_LOG("++ add ed=%p (%s) flags=%X\n", (void *)ed, 716 (ed->socket == ARES_SOCKET_BAD) ? "data" : "socket", 717 (unsigned int)event->flags); 718 719 /* Likely a signal event, not something we will directly handle. We create 720 * the ares_evsys_win32_eventdata_t as the placeholder to use as the 721 * IOCP Completion Key */ 722 if (ed->socket == ARES_SOCKET_BAD) { 723 ed->lock = ares_thread_mutex_create(); 724 if (ed->lock == NULL) { 725 goto done; 726 } 727 rc = ARES_TRUE; 728 goto done; 729 } 730 731 ed->base_socket = ares_evsys_win32_basesocket(ed->socket); 732 if (ed->base_socket == ARES_SOCKET_BAD) { 733 goto done; 734 } 735 736 if (!ares_htable_vpvp_insert(ew->sockets, &ed->iosb, ed)) { 737 goto done; 738 } 739 740 if (!ares_evsys_win32_afd_enqueue(event, event->flags)) { 741 goto done; 742 } 743 744 rc = ARES_TRUE; 745 746 done: 747 if (!rc) { 748 ares_evsys_win32_eventdata_destroy(ew, ed); 749 event->data = NULL; 750 } 751 return rc; 752 } 753 754 static void ares_evsys_win32_event_del(ares_event_t *event) 755 { 756 ares_evsys_win32_eventdata_t *ed = event->data; 757 758 /* Already cleaned up, likely a LOCAL_CLOSE */ 759 if (ed == NULL) { 760 return; 761 } 762 763 CARES_DEBUG_LOG("-- DELETE requested for ed=%p (%s)\n", (void *)ed, 764 (ed->socket != ARES_SOCKET_BAD) ? "socket" : "data"); 765 766 /* 767 * Cancel pending AFD Poll operation. 768 */ 769 if (ed->socket != ARES_SOCKET_BAD) { 770 ares_evsys_win32_afd_cancel(ed); 771 ed->poll_status = POLL_STATUS_DESTROY; 772 ed->event = NULL; 773 } else { 774 ares_evsys_win32_eventdata_destroy(event->e->ev_sys_data, ed); 775 } 776 777 event->data = NULL; 778 } 779 780 static void ares_evsys_win32_event_mod(ares_event_t *event, 781 ares_event_flags_t new_flags) 782 { 783 ares_evsys_win32_eventdata_t *ed = event->data; 784 785 /* Not for us */ 786 if (event->fd == ARES_SOCKET_BAD || ed == NULL) { 787 return; 788 } 789 790 CARES_DEBUG_LOG("** mod ed=%p new_flags=%X\n", (void *)ed, 791 (unsigned int)new_flags); 792 793 /* All we need to do is cancel the pending operation. When the event gets 794 * delivered for the cancellation, it will automatically re-enqueue a new 795 * event */ 796 ares_evsys_win32_afd_cancel(ed); 797 } 798 799 static ares_bool_t ares_evsys_win32_process_other_event( 800 ares_evsys_win32_t *ew, ares_evsys_win32_eventdata_t *ed, size_t i) 801 { 802 ares_event_t *event; 803 804 /* NOTE: do NOT dereference 'ed' if during shutdown as this could be an 805 * invalid pointer if the signal handle was cleaned up, but there was still a 806 * pending event! */ 807 808 if (ew->is_shutdown) { 809 CARES_DEBUG_LOG("\t\t** i=%lu, skip non-socket handle during shutdown\n", 810 (unsigned long)i); 811 return ARES_FALSE; 812 } 813 814 event = ed->event; 815 CARES_DEBUG_LOG("\t\t** i=%lu, ed=%p (data)\n", (unsigned long)i, (void *)ed); 816 817 event->cb(event->e, event->fd, event->data, ARES_EVENT_FLAG_OTHER); 818 return ARES_TRUE; 819 } 820 821 static ares_bool_t ares_evsys_win32_process_socket_event( 822 ares_evsys_win32_t *ew, ares_evsys_win32_eventdata_t *ed, size_t i) 823 { 824 ares_event_flags_t flags = 0; 825 ares_event_t *event = NULL; 826 ares_afd_handle_t *afd = NULL; 827 828 /* Shouldn't be possible */ 829 if (ed == NULL) { 830 CARES_DEBUG_LOG("\t\t** i=%lu, Invalid handle.\n", (unsigned long)i); 831 return ARES_FALSE; 832 } 833 834 event = ed->event; 835 836 CARES_DEBUG_LOG("\t\t** i=%lu, ed=%p (socket)\n", (unsigned long)i, 837 (void *)ed); 838 839 /* Process events */ 840 if (ed->poll_status == POLL_STATUS_PENDING && 841 ed->iosb.Status == STATUS_SUCCESS && 842 ed->afd_poll_info.NumberOfHandles > 0) { 843 if (ed->afd_poll_info.Handles[0].Events & 844 (AFD_POLL_RECEIVE | AFD_POLL_DISCONNECT | AFD_POLL_ACCEPT | 845 AFD_POLL_ABORT)) { 846 flags |= ARES_EVENT_FLAG_READ; 847 } 848 if (ed->afd_poll_info.Handles[0].Events & 849 (AFD_POLL_SEND | AFD_POLL_CONNECT_FAIL)) { 850 flags |= ARES_EVENT_FLAG_WRITE; 851 } 852 if (ed->afd_poll_info.Handles[0].Events & AFD_POLL_LOCAL_CLOSE) { 853 CARES_DEBUG_LOG("\t\t** ed=%p LOCAL CLOSE\n", (void *)ed); 854 ed->poll_status = POLL_STATUS_DESTROY; 855 } 856 } 857 858 CARES_DEBUG_LOG("\t\t** ed=%p, iosb status=%lX, poll_status=%d, flags=%X\n", 859 (void *)ed, (unsigned long)ed->iosb.Status, 860 (int)ed->poll_status, (unsigned int)flags); 861 862 /* Decrement poll count for AFD handle then resort, also disassociate 863 * with socket */ 864 afd = ares_slist_node_val(ed->afd_handle_node); 865 afd->poll_cnt--; 866 ares_slist_node_reinsert(ed->afd_handle_node); 867 ed->afd_handle_node = NULL; 868 869 /* Pending destroy, go ahead and kill it */ 870 if (ed->poll_status == POLL_STATUS_DESTROY) { 871 ares_evsys_win32_eventdata_destroy(ew, ed); 872 return ARES_FALSE; 873 } 874 875 ed->poll_status = POLL_STATUS_NONE; 876 877 /* Mask flags against current desired flags. We could have an event 878 * queued that is outdated. */ 879 flags &= event->flags; 880 881 /* Don't actually do anything with the event that was delivered as we are 882 * in a shutdown/cleanup process. Mostly just handling the delayed 883 * destruction of sockets */ 884 if (ew->is_shutdown) { 885 return ARES_FALSE; 886 } 887 888 /* Re-enqueue so we can get more events on the socket, we either 889 * received a real event, or a cancellation notice. Both cases we 890 * re-queue using the current configured event flags. 891 * 892 * If we can't re-enqueue, that likely means the socket has been 893 * closed, so we want to kill our reference to it 894 */ 895 if (!ares_evsys_win32_afd_enqueue(event, event->flags)) { 896 ares_evsys_win32_eventdata_destroy(ew, ed); 897 return ARES_FALSE; 898 } 899 900 /* No events we recognize to deliver */ 901 if (flags == 0) { 902 return ARES_FALSE; 903 } 904 905 event->cb(event->e, event->fd, event->data, flags); 906 return ARES_TRUE; 907 } 908 909 static size_t ares_evsys_win32_wait(ares_event_thread_t *e, 910 unsigned long timeout_ms) 911 { 912 ares_evsys_win32_t *ew = e->ev_sys_data; 913 OVERLAPPED_ENTRY entries[16]; 914 ULONG maxentries = sizeof(entries) / sizeof(*entries); 915 ULONG nentries; 916 BOOL status; 917 size_t i; 918 size_t cnt = 0; 919 DWORD tout = (timeout_ms == 0) ? INFINITE : (DWORD)timeout_ms; 920 921 CARES_DEBUG_LOG("** Wait Enter\n"); 922 /* Process in a loop for as long as it fills the entire entries buffer, and 923 * on subsequent attempts, ensure the timeout is 0 */ 924 do { 925 nentries = maxentries; 926 status = GetQueuedCompletionStatusEx(ew->iocp_handle, entries, nentries, 927 &nentries, tout, FALSE); 928 929 /* Next loop around, we want to return instantly if there are no events to 930 * be processed */ 931 tout = 0; 932 933 if (!status) { 934 break; 935 } 936 937 CARES_DEBUG_LOG("\t** GetQueuedCompletionStatusEx returned %lu entries\n", 938 (unsigned long)nentries); 939 for (i = 0; i < (size_t)nentries; i++) { 940 ares_evsys_win32_eventdata_t *ed = NULL; 941 ares_bool_t rc; 942 943 /* For things triggered via PostQueuedCompletionStatus() we have an 944 * lpCompletionKey we can just use. Otherwise we need to dereference the 945 * pointer returned in lpOverlapped to determine the referenced 946 * socket */ 947 if (entries[i].lpCompletionKey) { 948 ed = (ares_evsys_win32_eventdata_t *)entries[i].lpCompletionKey; 949 rc = ares_evsys_win32_process_other_event(ew, ed, i); 950 } else { 951 ed = ares_htable_vpvp_get_direct(ew->sockets, entries[i].lpOverlapped); 952 rc = ares_evsys_win32_process_socket_event(ew, ed, i); 953 } 954 955 /* We processed actual events */ 956 if (rc) { 957 cnt++; 958 } 959 } 960 } while (nentries == maxentries); 961 962 CARES_DEBUG_LOG("** Wait Exit\n"); 963 964 return cnt; 965 } 966 967 const ares_event_sys_t ares_evsys_win32 = { "win32", 968 ares_evsys_win32_init, 969 ares_evsys_win32_destroy, 970 ares_evsys_win32_event_add, 971 ares_evsys_win32_event_del, 972 ares_evsys_win32_event_mod, 973 ares_evsys_win32_wait }; 974 #endif 975 976 #if defined(__clang__) || defined(__GNUC__) 977 # pragma GCC diagnostic pop 978 #endif