parsedate.c (17899B)
1 /*************************************************************************** 2 * _ _ ____ _ 3 * Project ___| | | | _ \| | 4 * / __| | | | |_) | | 5 * | (__| |_| | _ <| |___ 6 * \___|\___/|_| \_\_____| 7 * 8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. 9 * 10 * This software is licensed as described in the file COPYING, which 11 * you should have received as part of this distribution. The terms 12 * are also available at https://curl.se/docs/copyright.html. 13 * 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 * copies of the Software, and permit persons to whom the Software is 16 * furnished to do so, under the terms of the COPYING file. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 * SPDX-License-Identifier: curl 22 * 23 ***************************************************************************/ 24 /* 25 A brief summary of the date string formats this parser groks: 26 27 RFC 2616 3.3.1 28 29 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 30 Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 31 Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format 32 33 we support dates without week day name: 34 35 06 Nov 1994 08:49:37 GMT 36 06-Nov-94 08:49:37 GMT 37 Nov 6 08:49:37 1994 38 39 without the time zone: 40 41 06 Nov 1994 08:49:37 42 06-Nov-94 08:49:37 43 44 weird order: 45 46 1994 Nov 6 08:49:37 (GNU date fails) 47 GMT 08:49:37 06-Nov-94 Sunday 48 94 6 Nov 08:49:37 (GNU date fails) 49 50 time left out: 51 52 1994 Nov 6 53 06-Nov-94 54 Sun Nov 6 94 55 56 unusual separators: 57 58 1994.Nov.6 59 Sun/Nov/6/94/GMT 60 61 commonly used time zone names: 62 63 Sun, 06 Nov 1994 08:49:37 CET 64 06 Nov 1994 08:49:37 EST 65 66 time zones specified using RFC822 style: 67 68 Sun, 12 Sep 2004 15:05:58 -0700 69 Sat, 11 Sep 2004 21:32:11 +0200 70 71 compact numerical date strings: 72 73 20040912 15:05:58 -0700 74 20040911 +0200 75 76 */ 77 78 #include "curl_setup.h" 79 80 #include <limits.h> 81 82 #include <curl/curl.h> 83 #include "curlx/warnless.h" 84 #include "parsedate.h" 85 #include "curlx/strparse.h" 86 87 /* 88 * parsedate() 89 * 90 * Returns: 91 * 92 * PARSEDATE_OK - a fine conversion 93 * PARSEDATE_FAIL - failed to convert 94 * PARSEDATE_LATER - time overflow at the far end of time_t 95 * PARSEDATE_SOONER - time underflow at the low end of time_t 96 */ 97 98 static int parsedate(const char *date, time_t *output); 99 100 #define PARSEDATE_OK 0 101 #define PARSEDATE_FAIL -1 102 #define PARSEDATE_LATER 1 103 #if defined(HAVE_TIME_T_UNSIGNED) || (SIZEOF_TIME_T < 5) 104 #define PARSEDATE_SOONER 2 105 #endif 106 107 #if !defined(CURL_DISABLE_PARSEDATE) || !defined(CURL_DISABLE_FTP) || \ 108 !defined(CURL_DISABLE_FILE) || defined(USE_GNUTLS) 109 /* These names are also used by FTP and FILE code */ 110 const char * const Curl_wkday[] = 111 {"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"}; 112 const char * const Curl_month[]= 113 { "Jan", "Feb", "Mar", "Apr", "May", "Jun", 114 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; 115 #endif 116 117 #ifndef CURL_DISABLE_PARSEDATE 118 static const char * const weekday[] = 119 { "Monday", "Tuesday", "Wednesday", "Thursday", 120 "Friday", "Saturday", "Sunday" }; 121 122 struct tzinfo { 123 char name[5]; 124 int offset; /* +/- in minutes */ 125 }; 126 127 /* Here's a bunch of frequently used time zone names. These were supported 128 by the old getdate parser. */ 129 #define tDAYZONE -60 /* offset for daylight savings time */ 130 static const struct tzinfo tz[]= { 131 {"GMT", 0}, /* Greenwich Mean */ 132 {"UT", 0}, /* Universal Time */ 133 {"UTC", 0}, /* Universal (Coordinated) */ 134 {"WET", 0}, /* Western European */ 135 {"BST", 0 tDAYZONE}, /* British Summer */ 136 {"WAT", 60}, /* West Africa */ 137 {"AST", 240}, /* Atlantic Standard */ 138 {"ADT", 240 tDAYZONE}, /* Atlantic Daylight */ 139 {"EST", 300}, /* Eastern Standard */ 140 {"EDT", 300 tDAYZONE}, /* Eastern Daylight */ 141 {"CST", 360}, /* Central Standard */ 142 {"CDT", 360 tDAYZONE}, /* Central Daylight */ 143 {"MST", 420}, /* Mountain Standard */ 144 {"MDT", 420 tDAYZONE}, /* Mountain Daylight */ 145 {"PST", 480}, /* Pacific Standard */ 146 {"PDT", 480 tDAYZONE}, /* Pacific Daylight */ 147 {"YST", 540}, /* Yukon Standard */ 148 {"YDT", 540 tDAYZONE}, /* Yukon Daylight */ 149 {"HST", 600}, /* Hawaii Standard */ 150 {"HDT", 600 tDAYZONE}, /* Hawaii Daylight */ 151 {"CAT", 600}, /* Central Alaska */ 152 {"AHST", 600}, /* Alaska-Hawaii Standard */ 153 {"NT", 660}, /* Nome */ 154 {"IDLW", 720}, /* International Date Line West */ 155 {"CET", -60}, /* Central European */ 156 {"MET", -60}, /* Middle European */ 157 {"MEWT", -60}, /* Middle European Winter */ 158 {"MEST", -60 tDAYZONE}, /* Middle European Summer */ 159 {"CEST", -60 tDAYZONE}, /* Central European Summer */ 160 {"MESZ", -60 tDAYZONE}, /* Middle European Summer */ 161 {"FWT", -60}, /* French Winter */ 162 {"FST", -60 tDAYZONE}, /* French Summer */ 163 {"EET", -120}, /* Eastern Europe, USSR Zone 1 */ 164 {"WAST", -420}, /* West Australian Standard */ 165 {"WADT", -420 tDAYZONE}, /* West Australian Daylight */ 166 {"CCT", -480}, /* China Coast, USSR Zone 7 */ 167 {"JST", -540}, /* Japan Standard, USSR Zone 8 */ 168 {"EAST", -600}, /* Eastern Australian Standard */ 169 {"EADT", -600 tDAYZONE}, /* Eastern Australian Daylight */ 170 {"GST", -600}, /* Guam Standard, USSR Zone 9 */ 171 {"NZT", -720}, /* New Zealand */ 172 {"NZST", -720}, /* New Zealand Standard */ 173 {"NZDT", -720 tDAYZONE}, /* New Zealand Daylight */ 174 {"IDLE", -720}, /* International Date Line East */ 175 /* Next up: Military timezone names. RFC822 allowed these, but (as noted in 176 RFC 1123) had their signs wrong. Here we use the correct signs to match 177 actual military usage. 178 */ 179 {"A", 1 * 60}, /* Alpha */ 180 {"B", 2 * 60}, /* Bravo */ 181 {"C", 3 * 60}, /* Charlie */ 182 {"D", 4 * 60}, /* Delta */ 183 {"E", 5 * 60}, /* Echo */ 184 {"F", 6 * 60}, /* Foxtrot */ 185 {"G", 7 * 60}, /* Golf */ 186 {"H", 8 * 60}, /* Hotel */ 187 {"I", 9 * 60}, /* India */ 188 /* "J", Juliet is not used as a timezone, to indicate the observer's local 189 time */ 190 {"K", 10 * 60}, /* Kilo */ 191 {"L", 11 * 60}, /* Lima */ 192 {"M", 12 * 60}, /* Mike */ 193 {"N", -1 * 60}, /* November */ 194 {"O", -2 * 60}, /* Oscar */ 195 {"P", -3 * 60}, /* Papa */ 196 {"Q", -4 * 60}, /* Quebec */ 197 {"R", -5 * 60}, /* Romeo */ 198 {"S", -6 * 60}, /* Sierra */ 199 {"T", -7 * 60}, /* Tango */ 200 {"U", -8 * 60}, /* Uniform */ 201 {"V", -9 * 60}, /* Victor */ 202 {"W", -10 * 60}, /* Whiskey */ 203 {"X", -11 * 60}, /* X-ray */ 204 {"Y", -12 * 60}, /* Yankee */ 205 {"Z", 0}, /* Zulu, zero meridian, a.k.a. UTC */ 206 }; 207 208 /* returns: 209 -1 no day 210 0 monday - 6 sunday 211 */ 212 213 static int checkday(const char *check, size_t len) 214 { 215 int i; 216 const char * const *what; 217 if(len > 3) 218 what = &weekday[0]; 219 else if(len == 3) 220 what = &Curl_wkday[0]; 221 else 222 return -1; /* too short */ 223 for(i = 0; i < 7; i++) { 224 size_t ilen = strlen(what[0]); 225 if((ilen == len) && 226 curl_strnequal(check, what[0], len)) 227 return i; 228 what++; 229 } 230 return -1; 231 } 232 233 static int checkmonth(const char *check, size_t len) 234 { 235 int i; 236 const char * const *what = &Curl_month[0]; 237 if(len != 3) 238 return -1; /* not a month */ 239 240 for(i = 0; i < 12; i++) { 241 if(curl_strnequal(check, what[0], 3)) 242 return i; 243 what++; 244 } 245 return -1; /* return the offset or -1, no real offset is -1 */ 246 } 247 248 /* return the time zone offset between GMT and the input one, in number 249 of seconds or -1 if the timezone was not found/legal */ 250 251 static int checktz(const char *check, size_t len) 252 { 253 unsigned int i; 254 const struct tzinfo *what = tz; 255 if(len > 4) /* longer than any valid timezone */ 256 return -1; 257 258 for(i = 0; i < CURL_ARRAYSIZE(tz); i++) { 259 size_t ilen = strlen(what->name); 260 if((ilen == len) && 261 curl_strnequal(check, what->name, len)) 262 return what->offset*60; 263 what++; 264 } 265 return -1; 266 } 267 268 static void skip(const char **date) 269 { 270 /* skip everything that are not letters or digits */ 271 while(**date && !ISALNUM(**date)) 272 (*date)++; 273 } 274 275 enum assume { 276 DATE_MDAY, 277 DATE_YEAR, 278 DATE_TIME 279 }; 280 281 /* 282 * time2epoch: time stamp to seconds since epoch in GMT time zone. Similar to 283 * mktime but for GMT only. 284 */ 285 static time_t time2epoch(int sec, int min, int hour, 286 int mday, int mon, int year) 287 { 288 static const int month_days_cumulative [12] = 289 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; 290 int leap_days = year - (mon <= 1); 291 leap_days = ((leap_days / 4) - (leap_days / 100) + (leap_days / 400) 292 - (1969 / 4) + (1969 / 100) - (1969 / 400)); 293 return ((((time_t) (year - 1970) * 365 294 + leap_days + month_days_cumulative[mon] + mday - 1) * 24 295 + hour) * 60 + min) * 60 + sec; 296 } 297 298 /* Returns the value of a single-digit or two-digit decimal number, return 299 then pointer to after the number. The 'date' pointer is known to point to a 300 digit. */ 301 static int oneortwodigit(const char *date, const char **endp) 302 { 303 int num = date[0] - '0'; 304 if(ISDIGIT(date[1])) { 305 *endp = &date[2]; 306 return num*10 + (date[1] - '0'); 307 } 308 *endp = &date[1]; 309 return num; 310 } 311 312 313 /* HH:MM:SS or HH:MM and accept single-digits too */ 314 static bool match_time(const char *date, 315 int *h, int *m, int *s, char **endp) 316 { 317 const char *p; 318 int hh, mm, ss = 0; 319 hh = oneortwodigit(date, &p); 320 if((hh < 24) && (*p == ':') && ISDIGIT(p[1])) { 321 mm = oneortwodigit(&p[1], &p); 322 if(mm < 60) { 323 if((*p == ':') && ISDIGIT(p[1])) { 324 ss = oneortwodigit(&p[1], &p); 325 if(ss <= 60) { 326 /* valid HH:MM:SS */ 327 goto match; 328 } 329 } 330 else { 331 /* valid HH:MM */ 332 goto match; 333 } 334 } 335 } 336 return FALSE; /* not a time string */ 337 match: 338 *h = hh; 339 *m = mm; 340 *s = ss; 341 *endp = (char *)CURL_UNCONST(p); 342 return TRUE; 343 } 344 345 /* 346 * parsedate() 347 * 348 * Returns: 349 * 350 * PARSEDATE_OK - a fine conversion 351 * PARSEDATE_FAIL - failed to convert 352 * PARSEDATE_LATER - time overflow at the far end of time_t 353 * PARSEDATE_SOONER - time underflow at the low end of time_t 354 */ 355 356 /* Wednesday is the longest name this parser knows about */ 357 #define NAME_LEN 12 358 359 static int parsedate(const char *date, time_t *output) 360 { 361 time_t t = 0; 362 int wdaynum = -1; /* day of the week number, 0-6 (mon-sun) */ 363 int monnum = -1; /* month of the year number, 0-11 */ 364 int mdaynum = -1; /* day of month, 1 - 31 */ 365 int hournum = -1; 366 int minnum = -1; 367 int secnum = -1; 368 int yearnum = -1; 369 int tzoff = -1; 370 enum assume dignext = DATE_MDAY; 371 const char *indate = date; /* save the original pointer */ 372 int part = 0; /* max 6 parts */ 373 374 while(*date && (part < 6)) { 375 bool found = FALSE; 376 377 skip(&date); 378 379 if(ISALPHA(*date)) { 380 /* a name coming up */ 381 size_t len = 0; 382 const char *p = date; 383 while(ISALPHA(*p) && (len < NAME_LEN)) { 384 p++; 385 len++; 386 } 387 388 if(len != NAME_LEN) { 389 if(wdaynum == -1) { 390 wdaynum = checkday(date, len); 391 if(wdaynum != -1) 392 found = TRUE; 393 } 394 if(!found && (monnum == -1)) { 395 monnum = checkmonth(date, len); 396 if(monnum != -1) 397 found = TRUE; 398 } 399 400 if(!found && (tzoff == -1)) { 401 /* this just must be a time zone string */ 402 tzoff = checktz(date, len); 403 if(tzoff != -1) 404 found = TRUE; 405 } 406 } 407 if(!found) 408 return PARSEDATE_FAIL; /* bad string */ 409 410 date += len; 411 } 412 else if(ISDIGIT(*date)) { 413 /* a digit */ 414 unsigned int val; 415 char *end; 416 if((secnum == -1) && 417 match_time(date, &hournum, &minnum, &secnum, &end)) { 418 /* time stamp */ 419 date = end; 420 } 421 else { 422 curl_off_t lval; 423 int num_digits = 0; 424 const char *p = date; 425 if(curlx_str_number(&p, &lval, 99999999)) 426 return PARSEDATE_FAIL; 427 428 /* we know num_digits cannot be larger than 8 */ 429 num_digits = (int)(p - date); 430 val = (unsigned int)lval; 431 432 if((tzoff == -1) && 433 (num_digits == 4) && 434 (val <= 1400) && 435 (indate < date) && 436 ((date[-1] == '+' || date[-1] == '-'))) { 437 /* four digits and a value less than or equal to 1400 (to take into 438 account all sorts of funny time zone diffs) and it is preceded 439 with a plus or minus. This is a time zone indication. 1400 is 440 picked since +1300 is frequently used and +1400 is mentioned as 441 an edge number in the document "ISO C 200X Proposal: Timezone 442 Functions" at http://david.tribble.com/text/c0xtimezone.html If 443 anyone has a more authoritative source for the exact maximum time 444 zone offsets, please speak up! */ 445 found = TRUE; 446 tzoff = (val/100 * 60 + val%100)*60; 447 448 /* the + and - prefix indicates the local time compared to GMT, 449 this we need their reversed math to get what we want */ 450 tzoff = date[-1]=='+' ? -tzoff : tzoff; 451 } 452 453 else if((num_digits == 8) && 454 (yearnum == -1) && 455 (monnum == -1) && 456 (mdaynum == -1)) { 457 /* 8 digits, no year, month or day yet. This is YYYYMMDD */ 458 found = TRUE; 459 yearnum = val/10000; 460 monnum = (val%10000)/100-1; /* month is 0 - 11 */ 461 mdaynum = val%100; 462 } 463 464 if(!found && (dignext == DATE_MDAY) && (mdaynum == -1)) { 465 if((val > 0) && (val < 32)) { 466 mdaynum = val; 467 found = TRUE; 468 } 469 dignext = DATE_YEAR; 470 } 471 472 if(!found && (dignext == DATE_YEAR) && (yearnum == -1)) { 473 yearnum = val; 474 found = TRUE; 475 if(yearnum < 100) { 476 if(yearnum > 70) 477 yearnum += 1900; 478 else 479 yearnum += 2000; 480 } 481 if(mdaynum == -1) 482 dignext = DATE_MDAY; 483 } 484 485 if(!found) 486 return PARSEDATE_FAIL; 487 488 date = p; 489 } 490 } 491 492 part++; 493 } 494 495 if(-1 == secnum) 496 secnum = minnum = hournum = 0; /* no time, make it zero */ 497 498 if((-1 == mdaynum) || 499 (-1 == monnum) || 500 (-1 == yearnum)) 501 /* lacks vital info, fail */ 502 return PARSEDATE_FAIL; 503 504 #ifdef HAVE_TIME_T_UNSIGNED 505 if(yearnum < 1970) { 506 /* only positive numbers cannot return earlier */ 507 *output = TIME_T_MIN; 508 return PARSEDATE_SOONER; 509 } 510 #endif 511 512 #if (SIZEOF_TIME_T < 5) 513 514 #ifdef HAVE_TIME_T_UNSIGNED 515 /* an unsigned 32-bit time_t can only hold dates to 2106 */ 516 if(yearnum > 2105) { 517 *output = TIME_T_MAX; 518 return PARSEDATE_LATER; 519 } 520 #else 521 /* a signed 32-bit time_t can only hold dates to the beginning of 2038 */ 522 if(yearnum > 2037) { 523 *output = TIME_T_MAX; 524 return PARSEDATE_LATER; 525 } 526 if(yearnum < 1903) { 527 *output = TIME_T_MIN; 528 return PARSEDATE_SOONER; 529 } 530 #endif 531 532 #else 533 /* The Gregorian calendar was introduced 1582 */ 534 if(yearnum < 1583) 535 return PARSEDATE_FAIL; 536 #endif 537 538 if((mdaynum > 31) || (monnum > 11) || 539 (hournum > 23) || (minnum > 59) || (secnum > 60)) 540 return PARSEDATE_FAIL; /* clearly an illegal date */ 541 542 /* time2epoch() returns a time_t. time_t is often 32 bits, sometimes even on 543 architectures that feature a 64 bits 'long' but ultimately time_t is the 544 correct data type to use. 545 */ 546 t = time2epoch(secnum, minnum, hournum, mdaynum, monnum, yearnum); 547 548 /* Add the time zone diff between local time zone and GMT. */ 549 if(tzoff == -1) 550 tzoff = 0; 551 552 if((tzoff > 0) && (t > (time_t)(TIME_T_MAX - tzoff))) { 553 *output = TIME_T_MAX; 554 return PARSEDATE_LATER; /* time_t overflow */ 555 } 556 557 t += tzoff; 558 559 *output = t; 560 561 return PARSEDATE_OK; 562 } 563 #else 564 /* disabled */ 565 static int parsedate(const char *date, time_t *output) 566 { 567 (void)date; 568 *output = 0; 569 return PARSEDATE_OK; /* a lie */ 570 } 571 #endif 572 573 time_t curl_getdate(const char *p, const time_t *now) 574 { 575 time_t parsed = -1; 576 int rc = parsedate(p, &parsed); 577 (void)now; /* legacy argument from the past that we ignore */ 578 579 if(rc == PARSEDATE_OK) { 580 if(parsed == (time_t)-1) 581 /* avoid returning -1 for a working scenario */ 582 parsed++; 583 return parsed; 584 } 585 /* everything else is fail */ 586 return -1; 587 } 588 589 /* Curl_getdate_capped() differs from curl_getdate() in that this will return 590 TIME_T_MAX in case the parsed time value was too big, instead of an 591 error. */ 592 593 time_t Curl_getdate_capped(const char *p) 594 { 595 time_t parsed = -1; 596 int rc = parsedate(p, &parsed); 597 598 switch(rc) { 599 case PARSEDATE_OK: 600 if(parsed == (time_t)-1) 601 /* avoid returning -1 for a working scenario */ 602 parsed++; 603 return parsed; 604 case PARSEDATE_LATER: 605 /* this returns the maximum time value */ 606 return parsed; 607 default: 608 return -1; /* everything else is fail */ 609 } 610 /* UNREACHABLE */ 611 } 612 613 /* 614 * Curl_gmtime() is a gmtime() replacement for portability. Do not use the 615 * gmtime_r() or gmtime() functions anywhere else but here. 616 * 617 */ 618 619 CURLcode Curl_gmtime(time_t intime, struct tm *store) 620 { 621 const struct tm *tm; 622 #ifdef HAVE_GMTIME_R 623 /* thread-safe version */ 624 tm = (struct tm *)gmtime_r(&intime, store); 625 #else 626 /* !checksrc! disable BANNEDFUNC 1 */ 627 tm = gmtime(&intime); 628 if(tm) 629 *store = *tm; /* copy the pointed struct to the local copy */ 630 #endif 631 632 if(!tm) 633 return CURLE_BAD_FUNCTION_ARGUMENT; 634 return CURLE_OK; 635 }