quickjs-tart

quickjs-based runtime for wallet-core logic
Log | Files | Refs | README | LICENSE

ares_event_win32.c (32217B)


      1 /* MIT License
      2  *
      3  * Copyright (c) 2024 Brad House
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a copy
      6  * of this software and associated documentation files (the "Software"), to deal
      7  * in the Software without restriction, including without limitation the rights
      8  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      9  * copies of the Software, and to permit persons to whom the Software is
     10  * furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice (including the next
     13  * paragraph) shall be included in all copies or substantial portions of the
     14  * Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     22  * SOFTWARE.
     23  *
     24  * SPDX-License-Identifier: MIT
     25  */
     26 
     27 /* Uses an anonymous union */
     28 #if defined(__clang__) || defined(__GNUC__)
     29 #  pragma GCC diagnostic push
     30 #  if defined(__clang__)
     31 #    pragma GCC diagnostic ignored "-Wc11-extensions"
     32 #  else
     33 #    pragma GCC diagnostic ignored "-Wpedantic"
     34 #  endif
     35 #endif
     36 
     37 #include "ares_private.h"
     38 #include "ares_event.h"
     39 #include "ares_event_win32.h"
     40 
     41 
     42 #if defined(USE_WINSOCK) && defined(CARES_THREADS)
     43 
     44 #ifdef HAVE_LIMITS_H
     45 #  include <limits.h>
     46 #endif
     47 
     48 /* IMPLEMENTATION NOTES
     49  * ====================
     50  *
     51  * This implementation uses some undocumented functionality within Windows for
     52  * monitoring sockets. The Ancillary Function Driver (AFD) is the low level
     53  * implementation that Winsock2 sits on top of.  Winsock2 unfortunately does
     54  * not expose the equivalent of epoll() or kqueue(), but it is possible to
     55  * access AFD directly and use along with IOCP to simulate the functionality.
     56  * We want to use IOCP if possible as it gives us the ability to monitor more
     57  * than just sockets (WSAPoll is not an option), and perform arbitrary callbacks
     58  * which means we can hook in non-socket related events.
     59  *
     60  * The information for this implementation was gathered from "wepoll" and
     61  * "libuv" which both use slight variants on this.  We originally went with
     62  * an implementation methodology more similar to "libuv", but we had a few
     63  * user reports of crashes during shutdown and memory leaks due to some
     64  * events not being delivered for cleanup of closed sockets.
     65  *
     66  * Initialization:
     67  *   1. Dynamically load the NtDeviceIoControlFile, NtCreateFile, and
     68  *      NtCancelIoFileEx internal symbols from ntdll.dll. (Don't believe
     69  *      Microsoft's documentation for NtCancelIoFileEx as it documents an
     70  *      invalid prototype). These functions are to open a reference to the
     71  *      Ancillary Function Driver (AFD), and to submit and cancel POLL
     72  *      requests.
     73  *   2. Create an IO Completion Port base handle via CreateIoCompletionPort()
     74  *      that all socket events will be delivered through.
     75  *   3. Create a list of AFD Handles and track the number of poll requests
     76  *      per AFD handle.  When we exceed a pre-determined limit of poll requests
     77  *      for a handle (128), we will automatically create a new handle.  The
     78  *      reason behind this is NtCancelIoFileEx uses a horrible algorithm for
     79  *      issuing cancellations.  See:
     80  *      https://github.com/python-trio/trio/issues/52#issuecomment-548215128
     81  *   4. Create a callback to be used to be able to interrupt waiting for IOCP
     82  *      events, this may be called for allowing enqueuing of additional socket
     83  *      events or removing socket events. PostQueuedCompletionStatus() is the
     84  *      obvious choice.  We can use the same container format, the event
     85  *      delivered won't have an OVERLAPPED pointer so we can differentiate from
     86  *      socket events.  Use the container as the completion key.
     87  *
     88  * Socket Add:
     89  *   1. Create/Allocate a container for holding metadata about a socket
     90  *      including:
     91  *      - SOCKET base_socket;
     92  *      - IO_STATUS_BLOCK iosb; -- Used by AFD POLL, returned as OVERLAPPED
     93  *      - AFD_POLL_INFO afd_poll_info; -- Used by AFD POLL
     94  *      - afd list node -- for tracking which AFD handle a POLL request was
     95  *        submitted to.
     96  *   2. Call WSAIoctl(..., SIO_BASE_HANDLE, ...) to unwrap the SOCKET and get
     97  *      the "base socket" we can use for polling.  It appears this may fail so
     98  *      we should call WSAIoctl(..., SIO_BSP_HANDLE_POLL, ...) as a fallback.
     99  *   3. Submit AFD POLL request (see "AFD POLL Request" section)
    100  *   4. Record a mapping between the "IO Status Block" and the socket container
    101  *      so when events are delivered we can dereference.
    102  *
    103  * Socket Delete:
    104  *   1. Call
    105  *      NtCancelIoFileEx(afd, iosb, &temp_iosb);
    106  *      to cancel any pending operations.
    107  *   2. Tag the socket container as being queued for deletion
    108  *   3. Wait for an event to be delivered for the socket (cancel isn't
    109  *      immediate, it delivers an event to know its complete). Delete only once
    110  *      that event has been delivered.  If we don't do this we could try to
    111  *      access free()'d memory at a later point.
    112  *
    113  * Socket Modify:
    114  *   1. Call
    115  *      NtCancelIoFileEx(afd, iosb, &temp_iosb)
    116  *      to cancel any pending operation.
    117  *   2. When the event comes through that the cancel is complete, enqueue
    118  *      another "AFD Poll Request" for the desired events.
    119  *
    120  * Event Wait:
    121  *   1. Call GetQueuedCompletionStatusEx() with the base IOCP handle, a
    122  *      stack allocated array of OVERLAPPED_ENTRY's, and an appropriate
    123  *      timeout.
    124  *   2. Iterate across returned events, if the lpOverlapped is NULL, then the
    125  *      the CompletionKey is a pointer to the container registered via
    126  *      PostQueuedCompletionStatus(), otherwise it is the "IO Status Block"
    127  *      registered with the "AFD Poll Request" which needs to be dereferenced
    128  *      to the "socket container".
    129  *   3. If it is a "socket container", disassociate it from the afd list node
    130  *      it was previously submitted to.
    131  *   4. If it is a "socket container" check to see if we are cleaning up, if so,
    132  *      clean it up.
    133  *   5. If it is a "socket container" that is still valid, Submit an
    134  *      AFD POLL Request (see "AFD POLL Request"). We must re-enable the request
    135  *      each time we receive a response, it is not persistent.
    136  *   6. Notify of any events received as indicated in the AFD_POLL_INFO
    137  *      Handles[0].Events (NOTE: check NumberOfHandles > 0, and the status in
    138  *      the IO_STATUS_BLOCK.  If we received an AFD_POLL_LOCAL_CLOSE, clean up
    139  *      the connection like the integrator requested it to be cleaned up.
    140  *
    141  * AFD Poll Request:
    142  *   1. Find an afd poll handle in the list that has fewer pending requests than
    143  *      the limit.
    144  *   2. If an afd poll handle was not associated (e.g. due to all being over
    145  *      limit), create a new afd poll handle by calling NtCreateFile()
    146  *      with path \Device\Afd , then add the AFD handle to the IO Completion
    147  *      Port.  We can leave the completion key as blank since events for
    148  *      multiple sockets will be delivered through this and we need to
    149  *      differentiate via the OVERLAPPED member returned.  Add the new AFD
    150  *      handle to the list of handles.
    151  *   3. Initialize the AFD_POLL_INFO structure:
    152  *      Exclusive         = FALSE; // allow multiple requests
    153  *      NumberOfHandles   = 1;
    154  *      Timeout.QuadPart  = LLONG_MAX;
    155  *      Handles[0].Handle = (HANDLE)base_socket;
    156  *      Handles[0].Status = 0;
    157  *      Handles[0].Events = AFD_POLL_LOCAL_CLOSE + additional events to wait for
    158  *                          such as AFD_POLL_RECEIVE, etc;
    159  *   4. Zero out the IO_STATUS_BLOCK structures
    160  *   5. Set the "Status" member of IO_STATUS_BLOCK to STATUS_PENDING
    161  *   6. Call
    162  *      NtDeviceIoControlFile(afd, NULL, NULL, &iosb,
    163  *                            &iosb, IOCTL_AFD_POLL
    164  *                            &afd_poll_info, sizeof(afd_poll_info),
    165  *                            &afd_poll_info, sizeof(afd_poll_info));
    166  *
    167  *
    168  * References:
    169  *   - https://github.com/piscisaureus/wepoll/
    170  *   - https://github.com/libuv/libuv/
    171  */
    172 
    173 /* Cap the number of outstanding AFD poll requests per AFD handle due to known
    174  * slowdowns with large lists and NtCancelIoFileEx() */
    175 #  define AFD_POLL_PER_HANDLE 128
    176 
    177 #  include <stdarg.h>
    178 
    179 /* #  define CARES_DEBUG 1 */
    180 
    181 #  ifdef __GNUC__
    182 #    define CARES_PRINTF_LIKE(fmt, args) \
    183       __attribute__((format(printf, fmt, args)))
    184 #  else
    185 #    define CARES_PRINTF_LIKE(fmt, args)
    186 #  endif
    187 
    188 static void CARES_DEBUG_LOG(const char *fmt, ...) CARES_PRINTF_LIKE(1, 2);
    189 
    190 static void CARES_DEBUG_LOG(const char *fmt, ...)
    191 {
    192   va_list ap;
    193 
    194   va_start(ap, fmt);
    195 #  ifdef CARES_DEBUG
    196   vfprintf(stderr, fmt, ap);
    197   fflush(stderr);
    198 #  endif
    199   va_end(ap);
    200 }
    201 
    202 typedef struct {
    203   /* Dynamically loaded symbols */
    204   NtCreateFile_t          NtCreateFile;
    205   NtDeviceIoControlFile_t NtDeviceIoControlFile;
    206   NtCancelIoFileEx_t      NtCancelIoFileEx;
    207 
    208   /* Implementation details */
    209   ares_slist_t           *afd_handles;
    210   HANDLE                  iocp_handle;
    211 
    212   /* IO_STATUS_BLOCK * -> ares_evsys_win32_eventdata_t * mapping.  There is
    213    * no completion key passed to IOCP with this method so we have to look
    214    * up based on the lpOverlapped returned (which is mapped to IO_STATUS_BLOCK)
    215    */
    216   ares_htable_vpvp_t     *sockets;
    217 
    218   /* Flag about whether or not we are shutting down */
    219   ares_bool_t             is_shutdown;
    220 } ares_evsys_win32_t;
    221 
    222 typedef enum {
    223   POLL_STATUS_NONE    = 0,
    224   POLL_STATUS_PENDING = 1,
    225   POLL_STATUS_CANCEL  = 2,
    226   POLL_STATUS_DESTROY = 3
    227 } poll_status_t;
    228 
    229 typedef struct {
    230   /*! Pointer to parent event container */
    231   ares_event_t        *event;
    232   /*! Socket passed in to monitor */
    233   SOCKET               socket;
    234   /*! Base socket derived from provided socket */
    235   SOCKET               base_socket;
    236   /*! Structure for submitting AFD POLL requests (Internals!) */
    237   AFD_POLL_INFO        afd_poll_info;
    238   /*! Status of current polling operation */
    239   poll_status_t        poll_status;
    240   /*! IO Status Block structure submitted with AFD POLL requests and returned
    241    *  with IOCP results as lpOverlapped (even though its a different structure)
    242    */
    243   IO_STATUS_BLOCK      iosb;
    244   /*! AFD handle node an outstanding poll request is associated with */
    245   ares_slist_node_t   *afd_handle_node;
    246   /* Lock is only for PostQueuedCompletionStatus() to prevent multiple
    247    * signals. Tracking via POLL_STATUS_PENDING/POLL_STATUS_NONE */
    248   ares_thread_mutex_t *lock;
    249 } ares_evsys_win32_eventdata_t;
    250 
    251 static size_t ares_evsys_win32_wait(ares_event_thread_t *e,
    252                                     unsigned long        timeout_ms);
    253 
    254 static void   ares_iocpevent_signal(const ares_event_t *event)
    255 {
    256   ares_event_thread_t          *e           = event->e;
    257   ares_evsys_win32_t           *ew          = e->ev_sys_data;
    258   ares_evsys_win32_eventdata_t *ed          = event->data;
    259   ares_bool_t                   queue_event = ARES_FALSE;
    260 
    261   ares_thread_mutex_lock(ed->lock);
    262   if (ed->poll_status != POLL_STATUS_PENDING) {
    263     ed->poll_status = POLL_STATUS_PENDING;
    264     queue_event     = ARES_TRUE;
    265   }
    266   ares_thread_mutex_unlock(ed->lock);
    267 
    268   if (!queue_event) {
    269     return;
    270   }
    271 
    272   PostQueuedCompletionStatus(ew->iocp_handle, 0, (ULONG_PTR)event->data, NULL);
    273 }
    274 
    275 static void ares_iocpevent_cb(ares_event_thread_t *e, ares_socket_t fd,
    276                               void *data, ares_event_flags_t flags)
    277 {
    278   ares_evsys_win32_eventdata_t *ed = data;
    279   (void)e;
    280   (void)fd;
    281   (void)flags;
    282   ares_thread_mutex_lock(ed->lock);
    283   ed->poll_status = POLL_STATUS_NONE;
    284   ares_thread_mutex_unlock(ed->lock);
    285 }
    286 
    287 static ares_event_t *ares_iocpevent_create(ares_event_thread_t *e)
    288 {
    289   ares_event_t *event = NULL;
    290   ares_status_t status;
    291 
    292   status =
    293     ares_event_update(&event, e, ARES_EVENT_FLAG_OTHER, ares_iocpevent_cb,
    294                       ARES_SOCKET_BAD, NULL, NULL, ares_iocpevent_signal);
    295   if (status != ARES_SUCCESS) {
    296     return NULL;
    297   }
    298 
    299   return event;
    300 }
    301 
    302 static void ares_evsys_win32_destroy(ares_event_thread_t *e)
    303 {
    304   ares_evsys_win32_t *ew = NULL;
    305 
    306   if (e == NULL) {
    307     return;
    308   }
    309 
    310   CARES_DEBUG_LOG("** Win32 Event Destroy\n");
    311 
    312   ew = e->ev_sys_data;
    313   if (ew == NULL) {
    314     return;
    315   }
    316 
    317   ew->is_shutdown = ARES_TRUE;
    318   CARES_DEBUG_LOG("  ** waiting on %lu remaining sockets to be destroyed\n",
    319                   (unsigned long)ares_htable_vpvp_num_keys(ew->sockets));
    320   while (ares_htable_vpvp_num_keys(ew->sockets)) {
    321     ares_evsys_win32_wait(e, 0);
    322   }
    323   CARES_DEBUG_LOG("  ** all sockets cleaned up\n");
    324 
    325 
    326   if (ew->iocp_handle != NULL) {
    327     CloseHandle(ew->iocp_handle);
    328   }
    329 
    330   ares_slist_destroy(ew->afd_handles);
    331 
    332   ares_htable_vpvp_destroy(ew->sockets);
    333 
    334   ares_free(ew);
    335   e->ev_sys_data = NULL;
    336 }
    337 
    338 typedef struct {
    339   size_t poll_cnt;
    340   HANDLE afd_handle;
    341 } ares_afd_handle_t;
    342 
    343 static void ares_afd_handle_destroy(void *arg)
    344 {
    345   ares_afd_handle_t *hnd = arg;
    346   if (hnd != NULL && hnd->afd_handle != NULL) {
    347     CloseHandle(hnd->afd_handle);
    348   }
    349   ares_free(hnd);
    350 }
    351 
    352 static int ares_afd_handle_cmp(const void *data1, const void *data2)
    353 {
    354   const ares_afd_handle_t *hnd1 = data1;
    355   const ares_afd_handle_t *hnd2 = data2;
    356 
    357   if (hnd1->poll_cnt > hnd2->poll_cnt) {
    358     return 1;
    359   }
    360   if (hnd1->poll_cnt < hnd2->poll_cnt) {
    361     return -1;
    362   }
    363   return 0;
    364 }
    365 
    366 static void fill_object_attributes(OBJECT_ATTRIBUTES *attr,
    367                                    UNICODE_STRING *name, ULONG attributes)
    368 {
    369   memset(attr, 0, sizeof(*attr));
    370   attr->Length     = sizeof(*attr);
    371   attr->ObjectName = name;
    372   attr->Attributes = attributes;
    373 }
    374 
    375 #  define UNICODE_STRING_CONSTANT(s) \
    376     { (sizeof(s) - 1) * sizeof(wchar_t), sizeof(s) * sizeof(wchar_t), L##s }
    377 
    378 static ares_slist_node_t *ares_afd_handle_create(ares_evsys_win32_t *ew)
    379 {
    380   UNICODE_STRING     afd_device_name = UNICODE_STRING_CONSTANT("\\Device\\Afd");
    381   OBJECT_ATTRIBUTES  afd_attributes;
    382   NTSTATUS           status;
    383   IO_STATUS_BLOCK    iosb;
    384   ares_afd_handle_t *afd  = ares_malloc_zero(sizeof(*afd));
    385   ares_slist_node_t *node = NULL;
    386   if (afd == NULL) {
    387     goto fail;
    388   }
    389 
    390   /* Open a handle to the AFD subsystem */
    391   fill_object_attributes(&afd_attributes, &afd_device_name, 0);
    392   memset(&iosb, 0, sizeof(iosb));
    393   iosb.Status = STATUS_PENDING;
    394   status      = ew->NtCreateFile(&afd->afd_handle, SYNCHRONIZE, &afd_attributes,
    395                                  &iosb, NULL, 0, FILE_SHARE_READ | FILE_SHARE_WRITE,
    396                                  FILE_OPEN, 0, NULL, 0);
    397   if (status != STATUS_SUCCESS) {
    398     CARES_DEBUG_LOG("** Failed to create AFD endpoint\n");
    399     goto fail;
    400   }
    401 
    402   if (CreateIoCompletionPort(afd->afd_handle, ew->iocp_handle,
    403                              0 /* CompletionKey */, 0) == NULL) {
    404     goto fail;
    405   }
    406 
    407   if (!SetFileCompletionNotificationModes(afd->afd_handle,
    408                                           FILE_SKIP_SET_EVENT_ON_HANDLE)) {
    409     goto fail;
    410   }
    411 
    412   node = ares_slist_insert(ew->afd_handles, afd);
    413   if (node == NULL) {
    414     goto fail;
    415   }
    416 
    417   return node;
    418 
    419 fail:
    420 
    421   ares_afd_handle_destroy(afd);
    422   return NULL;
    423 }
    424 
    425 /* Fetch the lowest poll count entry, but if it exceeds the limit, create a
    426  * new one and return that */
    427 static ares_slist_node_t *ares_afd_handle_fetch(ares_evsys_win32_t *ew)
    428 {
    429   ares_slist_node_t *node = ares_slist_node_first(ew->afd_handles);
    430   ares_afd_handle_t *afd  = ares_slist_node_val(node);
    431 
    432   if (afd != NULL && afd->poll_cnt < AFD_POLL_PER_HANDLE) {
    433     return node;
    434   }
    435 
    436   return ares_afd_handle_create(ew);
    437 }
    438 
    439 static ares_bool_t ares_evsys_win32_init(ares_event_thread_t *e)
    440 {
    441   ares_evsys_win32_t *ew = NULL;
    442   HMODULE             ntdll;
    443 
    444   CARES_DEBUG_LOG("** Win32 Event Init\n");
    445 
    446   ew = ares_malloc_zero(sizeof(*ew));
    447   if (ew == NULL) {
    448     return ARES_FALSE;
    449   }
    450 
    451   e->ev_sys_data = ew;
    452 
    453   /* All apps should have ntdll.dll already loaded, so just get a handle to
    454    * this */
    455   ntdll = GetModuleHandleA("ntdll.dll");
    456   if (ntdll == NULL) {
    457     goto fail;
    458   }
    459 
    460 #  ifdef __GNUC__
    461 #    pragma GCC diagnostic push
    462 #    pragma GCC diagnostic ignored "-Wpedantic"
    463 /* Without the (void *) cast we get:
    464  *  warning: cast between incompatible function types from 'FARPROC' {aka 'long
    465  * long int (*)()'} to 'NTSTATUS (*)(...)'} [-Wcast-function-type] but with it
    466  * we get: warning: ISO C forbids conversion of function pointer to object
    467  * pointer type [-Wpedantic] look unsolvable short of killing the warning.
    468  */
    469 #  endif
    470 
    471   /* Load Internal symbols not typically accessible */
    472   ew->NtCreateFile =
    473     (NtCreateFile_t)(void *)GetProcAddress(ntdll, "NtCreateFile");
    474   ew->NtDeviceIoControlFile = (NtDeviceIoControlFile_t)(void *)GetProcAddress(
    475     ntdll, "NtDeviceIoControlFile");
    476   ew->NtCancelIoFileEx =
    477     (NtCancelIoFileEx_t)(void *)GetProcAddress(ntdll, "NtCancelIoFileEx");
    478 
    479 #  ifdef __GNUC__
    480 #    pragma GCC diagnostic pop
    481 #  endif
    482 
    483   if (ew->NtCreateFile == NULL || ew->NtCancelIoFileEx == NULL ||
    484       ew->NtDeviceIoControlFile == NULL) {
    485     goto fail;
    486   }
    487 
    488   ew->iocp_handle = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
    489   if (ew->iocp_handle == NULL) {
    490     goto fail;
    491   }
    492 
    493   ew->afd_handles = ares_slist_create(
    494     e->channel->rand_state, ares_afd_handle_cmp, ares_afd_handle_destroy);
    495   if (ew->afd_handles == NULL) {
    496     goto fail;
    497   }
    498 
    499   /* Create at least the first afd handle, so we know of any critical system
    500    * issues during startup */
    501   if (ares_afd_handle_create(ew) == NULL) {
    502     goto fail;
    503   }
    504 
    505   e->ev_signal = ares_iocpevent_create(e);
    506   if (e->ev_signal == NULL) {
    507     goto fail;
    508   }
    509 
    510   ew->sockets = ares_htable_vpvp_create(NULL, NULL);
    511   if (ew->sockets == NULL) {
    512     goto fail;
    513   }
    514 
    515   return ARES_TRUE;
    516 
    517 fail:
    518   ares_evsys_win32_destroy(e);
    519   return ARES_FALSE;
    520 }
    521 
    522 static ares_socket_t ares_evsys_win32_basesocket(ares_socket_t socket)
    523 {
    524   while (1) {
    525     DWORD         bytes; /* Not used */
    526     ares_socket_t base_socket = ARES_SOCKET_BAD;
    527     int           rv;
    528 
    529     rv = WSAIoctl(socket, SIO_BASE_HANDLE, NULL, 0, &base_socket,
    530                   sizeof(base_socket), &bytes, NULL, NULL);
    531     if (rv != SOCKET_ERROR && base_socket != ARES_SOCKET_BAD) {
    532       socket = base_socket;
    533       break;
    534     }
    535 
    536     /* If we're here, an error occurred */
    537     if (GetLastError() == WSAENOTSOCK) {
    538       /* This is critical, exit */
    539       return ARES_SOCKET_BAD;
    540     }
    541 
    542     /* Work around known bug in Komodia based LSPs, use ARES_BSP_HANDLE_POLL
    543      * to retrieve the underlying socket to then loop and get the base socket:
    544      *  https://docs.microsoft.com/en-us/windows/win32/winsock/winsock-ioctls
    545      *  https://www.komodia.com/newwiki/index.php?title=Komodia%27s_Redirector_bug_fixes#Version_2.2.2.6
    546      */
    547     base_socket = ARES_SOCKET_BAD;
    548     rv          = WSAIoctl(socket, SIO_BSP_HANDLE_POLL, NULL, 0, &base_socket,
    549                            sizeof(base_socket), &bytes, NULL, NULL);
    550 
    551     if (rv != SOCKET_ERROR && base_socket != ARES_SOCKET_BAD &&
    552         base_socket != socket) {
    553       socket = base_socket;
    554       continue; /* loop! */
    555     }
    556 
    557     return ARES_SOCKET_BAD;
    558   }
    559 
    560   return socket;
    561 }
    562 
    563 static ares_bool_t ares_evsys_win32_afd_enqueue(ares_event_t      *event,
    564                                                 ares_event_flags_t flags)
    565 {
    566   ares_event_thread_t          *e  = event->e;
    567   ares_evsys_win32_t           *ew = e->ev_sys_data;
    568   ares_evsys_win32_eventdata_t *ed = event->data;
    569   ares_afd_handle_t            *afd;
    570   NTSTATUS                      status;
    571 
    572   if (e == NULL || ed == NULL || ew == NULL) {
    573     return ARES_FALSE;
    574   }
    575 
    576   /* Misuse */
    577   if (ed->poll_status != POLL_STATUS_NONE) {
    578     return ARES_FALSE;
    579   }
    580 
    581   ed->afd_handle_node = ares_afd_handle_fetch(ew);
    582   /* System resource issue? */
    583   if (ed->afd_handle_node == NULL) {
    584     return ARES_FALSE;
    585   }
    586 
    587   afd = ares_slist_node_val(ed->afd_handle_node);
    588 
    589   /* Enqueue AFD Poll */
    590   ed->afd_poll_info.Exclusive         = FALSE;
    591   ed->afd_poll_info.NumberOfHandles   = 1;
    592   ed->afd_poll_info.Timeout.QuadPart  = LLONG_MAX;
    593   ed->afd_poll_info.Handles[0].Handle = (HANDLE)ed->base_socket;
    594   ed->afd_poll_info.Handles[0].Status = 0;
    595   ed->afd_poll_info.Handles[0].Events = AFD_POLL_LOCAL_CLOSE;
    596 
    597   if (flags & ARES_EVENT_FLAG_READ) {
    598     ed->afd_poll_info.Handles[0].Events |=
    599       (AFD_POLL_RECEIVE | AFD_POLL_DISCONNECT | AFD_POLL_ACCEPT |
    600        AFD_POLL_ABORT);
    601   }
    602   if (flags & ARES_EVENT_FLAG_WRITE) {
    603     ed->afd_poll_info.Handles[0].Events |=
    604       (AFD_POLL_SEND | AFD_POLL_CONNECT_FAIL);
    605   }
    606   if (flags == 0) {
    607     ed->afd_poll_info.Handles[0].Events |= AFD_POLL_DISCONNECT;
    608   }
    609 
    610   memset(&ed->iosb, 0, sizeof(ed->iosb));
    611   ed->iosb.Status = STATUS_PENDING;
    612 
    613   status = ew->NtDeviceIoControlFile(
    614     afd->afd_handle, NULL, NULL, &ed->iosb, &ed->iosb, IOCTL_AFD_POLL,
    615     &ed->afd_poll_info, sizeof(ed->afd_poll_info), &ed->afd_poll_info,
    616     sizeof(ed->afd_poll_info));
    617   if (status != STATUS_SUCCESS && status != STATUS_PENDING) {
    618     CARES_DEBUG_LOG("** afd_enqueue ed=%p FAILED\n", (void *)ed);
    619     ed->afd_handle_node = NULL;
    620     return ARES_FALSE;
    621   }
    622 
    623   /* Record that we submitted a poll request to this handle and tell it to
    624    * re-sort the node since we changed its sort value */
    625   afd->poll_cnt++;
    626   ares_slist_node_reinsert(ed->afd_handle_node);
    627 
    628   ed->poll_status = POLL_STATUS_PENDING;
    629   CARES_DEBUG_LOG("++ afd_enqueue ed=%p flags=%X\n", (void *)ed,
    630                   (unsigned int)flags);
    631   return ARES_TRUE;
    632 }
    633 
    634 static ares_bool_t ares_evsys_win32_afd_cancel(ares_evsys_win32_eventdata_t *ed)
    635 {
    636   IO_STATUS_BLOCK     cancel_iosb;
    637   ares_evsys_win32_t *ew;
    638   NTSTATUS            status;
    639   ares_afd_handle_t  *afd;
    640 
    641   ew = ed->event->e->ev_sys_data;
    642 
    643   /* Misuse */
    644   if (ed->poll_status != POLL_STATUS_PENDING) {
    645     return ARES_FALSE;
    646   }
    647 
    648   afd = ares_slist_node_val(ed->afd_handle_node);
    649 
    650   /* Misuse */
    651   if (afd == NULL) {
    652     return ARES_FALSE;
    653   }
    654 
    655   ed->poll_status = POLL_STATUS_CANCEL;
    656 
    657   /* Not pending, nothing to do. Most likely that means there is a pending
    658    * event that hasn't yet been delivered otherwise it would be re-armed
    659    * already */
    660   if (ed->iosb.Status != STATUS_PENDING) {
    661     CARES_DEBUG_LOG("** cancel not needed for ed=%p\n", (void *)ed);
    662     return ARES_FALSE;
    663   }
    664 
    665   status = ew->NtCancelIoFileEx(afd->afd_handle, &ed->iosb, &cancel_iosb);
    666 
    667   CARES_DEBUG_LOG("** Enqueued cancel for ed=%p, status = %lX\n", (void *)ed,
    668                   status);
    669 
    670   /* NtCancelIoFileEx() may return STATUS_NOT_FOUND if the operation completed
    671    * just before calling NtCancelIoFileEx(), but we have not yet received the
    672    * notification (but it should be queued for the next IOCP event).  */
    673   if (status == STATUS_SUCCESS || status == STATUS_NOT_FOUND) {
    674     return ARES_TRUE;
    675   }
    676 
    677   return ARES_FALSE;
    678 }
    679 
    680 static void ares_evsys_win32_eventdata_destroy(ares_evsys_win32_t           *ew,
    681                                                ares_evsys_win32_eventdata_t *ed)
    682 {
    683   if (ew == NULL || ed == NULL) {
    684     return;
    685   }
    686   CARES_DEBUG_LOG("-- deleting ed=%p (%s)\n", (void *)ed,
    687                   (ed->socket == ARES_SOCKET_BAD) ? "data" : "socket");
    688   /* These type of handles are deferred destroy. Update tracking. */
    689   if (ed->socket != ARES_SOCKET_BAD) {
    690     ares_htable_vpvp_remove(ew->sockets, &ed->iosb);
    691   }
    692 
    693   ares_thread_mutex_destroy(ed->lock);
    694 
    695   if (ed->event != NULL) {
    696     ed->event->data = NULL;
    697   }
    698 
    699   ares_free(ed);
    700 }
    701 
    702 static ares_bool_t ares_evsys_win32_event_add(ares_event_t *event)
    703 {
    704   ares_event_thread_t          *e  = event->e;
    705   ares_evsys_win32_t           *ew = e->ev_sys_data;
    706   ares_evsys_win32_eventdata_t *ed;
    707   ares_bool_t                   rc = ARES_FALSE;
    708 
    709   ed              = ares_malloc_zero(sizeof(*ed));
    710   ed->event       = event;
    711   ed->socket      = event->fd;
    712   ed->base_socket = ARES_SOCKET_BAD;
    713   event->data     = ed;
    714 
    715   CARES_DEBUG_LOG("++ add ed=%p (%s) flags=%X\n", (void *)ed,
    716                   (ed->socket == ARES_SOCKET_BAD) ? "data" : "socket",
    717                   (unsigned int)event->flags);
    718 
    719   /* Likely a signal event, not something we will directly handle.  We create
    720    * the ares_evsys_win32_eventdata_t as the placeholder to use as the
    721    * IOCP Completion Key */
    722   if (ed->socket == ARES_SOCKET_BAD) {
    723     ed->lock = ares_thread_mutex_create();
    724     if (ed->lock == NULL) {
    725       goto done;
    726     }
    727     rc = ARES_TRUE;
    728     goto done;
    729   }
    730 
    731   ed->base_socket = ares_evsys_win32_basesocket(ed->socket);
    732   if (ed->base_socket == ARES_SOCKET_BAD) {
    733     goto done;
    734   }
    735 
    736   if (!ares_htable_vpvp_insert(ew->sockets, &ed->iosb, ed)) {
    737     goto done;
    738   }
    739 
    740   if (!ares_evsys_win32_afd_enqueue(event, event->flags)) {
    741     goto done;
    742   }
    743 
    744   rc = ARES_TRUE;
    745 
    746 done:
    747   if (!rc) {
    748     ares_evsys_win32_eventdata_destroy(ew, ed);
    749     event->data = NULL;
    750   }
    751   return rc;
    752 }
    753 
    754 static void ares_evsys_win32_event_del(ares_event_t *event)
    755 {
    756   ares_evsys_win32_eventdata_t *ed = event->data;
    757 
    758   /* Already cleaned up, likely a LOCAL_CLOSE */
    759   if (ed == NULL) {
    760     return;
    761   }
    762 
    763   CARES_DEBUG_LOG("-- DELETE requested for ed=%p (%s)\n", (void *)ed,
    764                   (ed->socket != ARES_SOCKET_BAD) ? "socket" : "data");
    765 
    766   /*
    767    * Cancel pending AFD Poll operation.
    768    */
    769   if (ed->socket != ARES_SOCKET_BAD) {
    770     ares_evsys_win32_afd_cancel(ed);
    771     ed->poll_status = POLL_STATUS_DESTROY;
    772     ed->event       = NULL;
    773   } else {
    774     ares_evsys_win32_eventdata_destroy(event->e->ev_sys_data, ed);
    775   }
    776 
    777   event->data = NULL;
    778 }
    779 
    780 static void ares_evsys_win32_event_mod(ares_event_t      *event,
    781                                        ares_event_flags_t new_flags)
    782 {
    783   ares_evsys_win32_eventdata_t *ed = event->data;
    784 
    785   /* Not for us */
    786   if (event->fd == ARES_SOCKET_BAD || ed == NULL) {
    787     return;
    788   }
    789 
    790   CARES_DEBUG_LOG("** mod ed=%p new_flags=%X\n", (void *)ed,
    791                   (unsigned int)new_flags);
    792 
    793   /* All we need to do is cancel the pending operation.  When the event gets
    794    * delivered for the cancellation, it will automatically re-enqueue a new
    795    * event */
    796   ares_evsys_win32_afd_cancel(ed);
    797 }
    798 
    799 static ares_bool_t ares_evsys_win32_process_other_event(
    800   ares_evsys_win32_t *ew, ares_evsys_win32_eventdata_t *ed, size_t i)
    801 {
    802   ares_event_t *event;
    803 
    804   /* NOTE: do NOT dereference 'ed' if during shutdown as this could be an
    805    * invalid pointer if the signal handle was cleaned up, but there was still a
    806    * pending event! */
    807 
    808   if (ew->is_shutdown) {
    809     CARES_DEBUG_LOG("\t\t** i=%lu, skip non-socket handle during shutdown\n",
    810                     (unsigned long)i);
    811     return ARES_FALSE;
    812   }
    813 
    814   event = ed->event;
    815   CARES_DEBUG_LOG("\t\t** i=%lu, ed=%p (data)\n", (unsigned long)i, (void *)ed);
    816 
    817   event->cb(event->e, event->fd, event->data, ARES_EVENT_FLAG_OTHER);
    818   return ARES_TRUE;
    819 }
    820 
    821 static ares_bool_t ares_evsys_win32_process_socket_event(
    822   ares_evsys_win32_t *ew, ares_evsys_win32_eventdata_t *ed, size_t i)
    823 {
    824   ares_event_flags_t flags = 0;
    825   ares_event_t      *event = NULL;
    826   ares_afd_handle_t *afd   = NULL;
    827 
    828   /* Shouldn't be possible */
    829   if (ed == NULL) {
    830     CARES_DEBUG_LOG("\t\t** i=%lu, Invalid handle.\n", (unsigned long)i);
    831     return ARES_FALSE;
    832   }
    833 
    834   event = ed->event;
    835 
    836   CARES_DEBUG_LOG("\t\t** i=%lu, ed=%p (socket)\n", (unsigned long)i,
    837                   (void *)ed);
    838 
    839   /* Process events */
    840   if (ed->poll_status == POLL_STATUS_PENDING &&
    841       ed->iosb.Status == STATUS_SUCCESS &&
    842       ed->afd_poll_info.NumberOfHandles > 0) {
    843     if (ed->afd_poll_info.Handles[0].Events &
    844         (AFD_POLL_RECEIVE | AFD_POLL_DISCONNECT | AFD_POLL_ACCEPT |
    845          AFD_POLL_ABORT)) {
    846       flags |= ARES_EVENT_FLAG_READ;
    847     }
    848     if (ed->afd_poll_info.Handles[0].Events &
    849         (AFD_POLL_SEND | AFD_POLL_CONNECT_FAIL)) {
    850       flags |= ARES_EVENT_FLAG_WRITE;
    851     }
    852     if (ed->afd_poll_info.Handles[0].Events & AFD_POLL_LOCAL_CLOSE) {
    853       CARES_DEBUG_LOG("\t\t** ed=%p LOCAL CLOSE\n", (void *)ed);
    854       ed->poll_status = POLL_STATUS_DESTROY;
    855     }
    856   }
    857 
    858   CARES_DEBUG_LOG("\t\t** ed=%p, iosb status=%lX, poll_status=%d, flags=%X\n",
    859                   (void *)ed, (unsigned long)ed->iosb.Status,
    860                   (int)ed->poll_status, (unsigned int)flags);
    861 
    862   /* Decrement poll count for AFD handle then resort, also disassociate
    863    * with socket */
    864   afd = ares_slist_node_val(ed->afd_handle_node);
    865   afd->poll_cnt--;
    866   ares_slist_node_reinsert(ed->afd_handle_node);
    867   ed->afd_handle_node = NULL;
    868 
    869   /* Pending destroy, go ahead and kill it */
    870   if (ed->poll_status == POLL_STATUS_DESTROY) {
    871     ares_evsys_win32_eventdata_destroy(ew, ed);
    872     return ARES_FALSE;
    873   }
    874 
    875   ed->poll_status = POLL_STATUS_NONE;
    876 
    877   /* Mask flags against current desired flags.  We could have an event
    878    * queued that is outdated. */
    879   flags &= event->flags;
    880 
    881   /* Don't actually do anything with the event that was delivered as we are
    882    * in a shutdown/cleanup process.  Mostly just handling the delayed
    883    * destruction of sockets */
    884   if (ew->is_shutdown) {
    885     return ARES_FALSE;
    886   }
    887 
    888   /* Re-enqueue so we can get more events on the socket, we either
    889    * received a real event, or a cancellation notice.  Both cases we
    890    * re-queue using the current configured event flags.
    891    *
    892    * If we can't re-enqueue, that likely means the socket has been
    893    * closed, so we want to kill our reference to it
    894    */
    895   if (!ares_evsys_win32_afd_enqueue(event, event->flags)) {
    896     ares_evsys_win32_eventdata_destroy(ew, ed);
    897     return ARES_FALSE;
    898   }
    899 
    900   /* No events we recognize to deliver */
    901   if (flags == 0) {
    902     return ARES_FALSE;
    903   }
    904 
    905   event->cb(event->e, event->fd, event->data, flags);
    906   return ARES_TRUE;
    907 }
    908 
    909 static size_t ares_evsys_win32_wait(ares_event_thread_t *e,
    910                                     unsigned long        timeout_ms)
    911 {
    912   ares_evsys_win32_t *ew = e->ev_sys_data;
    913   OVERLAPPED_ENTRY    entries[16];
    914   ULONG               maxentries = sizeof(entries) / sizeof(*entries);
    915   ULONG               nentries;
    916   BOOL                status;
    917   size_t              i;
    918   size_t              cnt  = 0;
    919   DWORD               tout = (timeout_ms == 0) ? INFINITE : (DWORD)timeout_ms;
    920 
    921   CARES_DEBUG_LOG("** Wait Enter\n");
    922   /* Process in a loop for as long as it fills the entire entries buffer, and
    923    * on subsequent attempts, ensure the timeout is 0 */
    924   do {
    925     nentries = maxentries;
    926     status   = GetQueuedCompletionStatusEx(ew->iocp_handle, entries, nentries,
    927                                            &nentries, tout, FALSE);
    928 
    929     /* Next loop around, we want to return instantly if there are no events to
    930      * be processed */
    931     tout = 0;
    932 
    933     if (!status) {
    934       break;
    935     }
    936 
    937     CARES_DEBUG_LOG("\t** GetQueuedCompletionStatusEx returned %lu entries\n",
    938                     (unsigned long)nentries);
    939     for (i = 0; i < (size_t)nentries; i++) {
    940       ares_evsys_win32_eventdata_t *ed = NULL;
    941       ares_bool_t                   rc;
    942 
    943       /* For things triggered via PostQueuedCompletionStatus() we have an
    944        * lpCompletionKey we can just use.  Otherwise we need to dereference the
    945        * pointer returned in lpOverlapped to determine the referenced
    946        * socket */
    947       if (entries[i].lpCompletionKey) {
    948         ed = (ares_evsys_win32_eventdata_t *)entries[i].lpCompletionKey;
    949         rc = ares_evsys_win32_process_other_event(ew, ed, i);
    950       } else {
    951         ed = ares_htable_vpvp_get_direct(ew->sockets, entries[i].lpOverlapped);
    952         rc = ares_evsys_win32_process_socket_event(ew, ed, i);
    953       }
    954 
    955       /* We processed actual events */
    956       if (rc) {
    957         cnt++;
    958       }
    959     }
    960   } while (nentries == maxentries);
    961 
    962   CARES_DEBUG_LOG("** Wait Exit\n");
    963 
    964   return cnt;
    965 }
    966 
    967 const ares_event_sys_t ares_evsys_win32 = { "win32",
    968                                             ares_evsys_win32_init,
    969                                             ares_evsys_win32_destroy,
    970                                             ares_evsys_win32_event_add,
    971                                             ares_evsys_win32_event_del,
    972                                             ares_evsys_win32_event_mod,
    973                                             ares_evsys_win32_wait };
    974 #endif
    975 
    976 #if defined(__clang__) || defined(__GNUC__)
    977 #  pragma GCC diagnostic pop
    978 #endif