ares_metrics.c (9442B)
1 /* MIT License 2 * 3 * Copyright (c) 2024 Brad House 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a copy 6 * of this software and associated documentation files (the "Software"), to deal 7 * in the Software without restriction, including without limitation the rights 8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 * copies of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * SPDX-License-Identifier: MIT 25 */ 26 27 28 /* IMPLEMENTATION NOTES 29 * ==================== 30 * 31 * With very little effort we should be able to determine fairly proper timeouts 32 * we can use based on prior query history. We track in order to be able to 33 * auto-scale when network conditions change (e.g. maybe there is a provider 34 * failover and timings change due to that). Apple appears to do this within 35 * their system resolver in MacOS. Obviously we should have a minimum, maximum, 36 * and initial value to make sure the algorithm doesn't somehow go off the 37 * rails. 38 * 39 * Values: 40 * - Minimum Timeout: 250ms (approximate RTT half-way around the globe) 41 * - Maximum Timeout: 5000ms (Recommended timeout in RFC 1123), can be reduced 42 * by ARES_OPT_MAXTIMEOUTMS, but otherwise the bound specified by the option 43 * caps the retry timeout. 44 * - Initial Timeout: User-specified via configuration or ARES_OPT_TIMEOUTMS 45 * - Average latency multiplier: 5x (a local DNS server returning a cached value 46 * will be quicker than if it needs to recurse so we need to account for this) 47 * - Minimum Count for Average: 3. This is the minimum number of queries we 48 * need to form an average for the bucket. 49 * 50 * Per-server buckets for tracking latency over time (these are ephemeral 51 * meaning they don't persist once a channel is destroyed). We record both the 52 * current timespan for the bucket and the immediate preceding timespan in case 53 * of roll-overs we can still maintain recent metrics for calculations: 54 * - 1 minute 55 * - 15 minutes 56 * - 1 hr 57 * - 1 day 58 * - since inception 59 * 60 * Each bucket would contain: 61 * - timestamp (divided by interval) 62 * - minimum latency 63 * - maximum latency 64 * - total time 65 * - count 66 * NOTE: average latency is (total time / count), we will calculate this 67 * dynamically when needed 68 * 69 * Basic algorithm for calculating timeout to use would be: 70 * - Scan from most recent bucket to least recent 71 * - Check timestamp of bucket, if doesn't match current time, continue to next 72 * bucket 73 * - Check count of bucket, if its not at least the "Minimum Count for Average", 74 * check the previous bucket, otherwise continue to next bucket 75 * - If we reached the end with no bucket match, use "Initial Timeout" 76 * - If bucket is selected, take ("total time" / count) as Average latency, 77 * multiply by "Average Latency Multiplier", bound by "Minimum Timeout" and 78 * "Maximum Timeout" 79 * NOTE: The timeout calculated may not be the timeout used. If we are retrying 80 * the query on the same server another time, then it will use a larger value 81 * 82 * On each query reply where the response is legitimate (proper response or 83 * NXDOMAIN) and not something like a server error: 84 * - Cycle through each bucket in order 85 * - Check timestamp of bucket against current timestamp, if out of date 86 * overwrite previous entry with values, clear current values 87 * - Compare current minimum and maximum recorded latency against query time and 88 * adjust if necessary 89 * - Increment "count" by 1 and "total time" by the query time 90 * 91 * Other Notes: 92 * - This is always-on, the only user-configurable value is the initial 93 * timeout which will simply re-uses the current option. 94 * - Minimum and Maximum latencies for a bucket are currently unused but are 95 * there in case we find a need for them in the future. 96 */ 97 98 #include "ares_private.h" 99 100 /*! Minimum timeout value. Chosen due to it being approximately RTT half-way 101 * around the world */ 102 #define MIN_TIMEOUT_MS 250 103 104 /*! Multiplier to apply to average latency to come up with an initial timeout */ 105 #define AVG_TIMEOUT_MULTIPLIER 5 106 107 /*! Upper timeout bounds, only used if channel->maxtimeout not set */ 108 #define MAX_TIMEOUT_MS 5000 109 110 /*! Minimum queries required to form an average */ 111 #define MIN_COUNT_FOR_AVERAGE 3 112 113 static time_t ares_metric_timestamp(ares_server_bucket_t bucket, 114 const ares_timeval_t *now, 115 ares_bool_t is_previous) 116 { 117 time_t divisor = 1; /* Silence bogus MSVC warning by setting default value */ 118 119 switch (bucket) { 120 case ARES_METRIC_1MINUTE: 121 divisor = 60; 122 break; 123 case ARES_METRIC_15MINUTES: 124 divisor = 15 * 60; 125 break; 126 case ARES_METRIC_1HOUR: 127 divisor = 60 * 60; 128 break; 129 case ARES_METRIC_1DAY: 130 divisor = 24 * 60 * 60; 131 break; 132 case ARES_METRIC_INCEPTION: 133 return is_previous ? 0 : 1; 134 case ARES_METRIC_COUNT: 135 return 0; /* Invalid! */ 136 } 137 138 if (is_previous) { 139 if (divisor >= now->sec) { 140 return 0; 141 } 142 return (time_t)((now->sec - divisor) / divisor); 143 } 144 145 return (time_t)(now->sec / divisor); 146 } 147 148 void ares_metrics_record(const ares_query_t *query, ares_server_t *server, 149 ares_status_t status, const ares_dns_record_t *dnsrec) 150 { 151 ares_timeval_t now; 152 ares_timeval_t tvdiff; 153 unsigned int query_ms; 154 ares_dns_rcode_t rcode; 155 ares_server_bucket_t i; 156 157 if (status != ARES_SUCCESS) { 158 return; 159 } 160 161 if (server == NULL) { 162 return; 163 } 164 165 ares_tvnow(&now); 166 167 rcode = ares_dns_record_get_rcode(dnsrec); 168 if (rcode != ARES_RCODE_NOERROR && rcode != ARES_RCODE_NXDOMAIN) { 169 return; 170 } 171 172 ares_timeval_diff(&tvdiff, &query->ts, &now); 173 query_ms = (unsigned int)((tvdiff.sec * 1000) + (tvdiff.usec / 1000)); 174 if (query_ms == 0) { 175 query_ms = 1; 176 } 177 178 /* Place in each bucket */ 179 for (i = 0; i < ARES_METRIC_COUNT; i++) { 180 time_t ts = ares_metric_timestamp(i, &now, ARES_FALSE); 181 182 /* Copy metrics to prev and clear */ 183 if (ts != server->metrics[i].ts) { 184 server->metrics[i].prev_ts = server->metrics[i].ts; 185 server->metrics[i].prev_total_ms = server->metrics[i].total_ms; 186 server->metrics[i].prev_total_count = server->metrics[i].total_count; 187 server->metrics[i].ts = ts; 188 server->metrics[i].latency_min_ms = 0; 189 server->metrics[i].latency_max_ms = 0; 190 server->metrics[i].total_ms = 0; 191 server->metrics[i].total_count = 0; 192 } 193 194 if (server->metrics[i].latency_min_ms == 0 || 195 server->metrics[i].latency_min_ms > query_ms) { 196 server->metrics[i].latency_min_ms = query_ms; 197 } 198 199 if (query_ms > server->metrics[i].latency_max_ms) { 200 server->metrics[i].latency_max_ms = query_ms; 201 } 202 203 server->metrics[i].total_count++; 204 server->metrics[i].total_ms += (ares_uint64_t)query_ms; 205 } 206 } 207 208 size_t ares_metrics_server_timeout(const ares_server_t *server, 209 const ares_timeval_t *now) 210 { 211 const ares_channel_t *channel = server->channel; 212 ares_server_bucket_t i; 213 size_t timeout_ms = 0; 214 size_t max_timeout_ms; 215 216 for (i = 0; i < ARES_METRIC_COUNT; i++) { 217 time_t ts = ares_metric_timestamp(i, now, ARES_FALSE); 218 219 /* This ts has been invalidated, see if we should use the previous 220 * time period */ 221 if (ts != server->metrics[i].ts || 222 server->metrics[i].total_count < MIN_COUNT_FOR_AVERAGE) { 223 time_t prev_ts = ares_metric_timestamp(i, now, ARES_TRUE); 224 if (prev_ts != server->metrics[i].prev_ts || 225 server->metrics[i].prev_total_count < MIN_COUNT_FOR_AVERAGE) { 226 /* Move onto next bucket */ 227 continue; 228 } 229 /* Calculate average time for previous bucket */ 230 timeout_ms = (size_t)(server->metrics[i].prev_total_ms / 231 server->metrics[i].prev_total_count); 232 } else { 233 /* Calculate average time for current bucket*/ 234 timeout_ms = 235 (size_t)(server->metrics[i].total_ms / server->metrics[i].total_count); 236 } 237 238 /* Multiply average by constant to get timeout value */ 239 timeout_ms *= AVG_TIMEOUT_MULTIPLIER; 240 break; 241 } 242 243 /* If we're here, that means its the first query for the server, so we just 244 * use the initial default timeout */ 245 if (timeout_ms == 0) { 246 timeout_ms = channel->timeout; 247 } 248 249 /* don't go below lower bounds */ 250 if (timeout_ms < MIN_TIMEOUT_MS) { 251 timeout_ms = MIN_TIMEOUT_MS; 252 } 253 254 /* don't go above upper bounds */ 255 max_timeout_ms = channel->maxtimeout ? channel->maxtimeout : MAX_TIMEOUT_MS; 256 if (timeout_ms > max_timeout_ms) { 257 timeout_ms = max_timeout_ms; 258 } 259 260 return timeout_ms; 261 }