commit c12a679468f52e4641ef9c38362456d5454eb2fc
parent 02109cfb9015b59212490ac0e724ddc38f51001f
Author: Charlie Gordon <github@chqrlie.org>
Date: Sun, 25 Feb 2024 23:47:26 +0100
Improve Date.parse
- rewrite Date.parse() with separate parsers
- return `NaN` for out of bounds field values as specified
- accept up to 9 decimals for millisecond fraction but truncate at 3
- accept many more alternative date/time formats
- add test cases in tests/test_builtin.js
Diffstat:
2 files changed, 360 insertions(+), 242 deletions(-)
diff --git a/quickjs/quickjs.c b/quickjs/quickjs.c
@@ -49983,7 +49983,7 @@ static JSValue js_Date_UTC(JSContext *ctx, JSValueConst this_val,
int argc, JSValueConst *argv)
{
// UTC(y, mon, d, h, m, s, ms)
- double fields[] = { 0, 0, 1, 0, 0, 0, 0 };
+ double fields[9] = { 0, 0, 1, 0, 0, 0, 0, 0, 0 };
int i, n;
double a;
@@ -50004,145 +50004,338 @@ static JSValue js_Date_UTC(JSContext *ctx, JSValueConst this_val,
return JS_NewFloat64(ctx, set_date_fields(fields, 0));
}
-static void string_skip_spaces(JSString *sp, int *pp) {
- while (*pp < sp->len && string_get(sp, *pp) == ' ')
+/* Date string parsing */
+
+static BOOL string_skip_char(const uint8_t *sp, int *pp, int c) {
+ if (sp[*pp] == c) {
*pp += 1;
+ return TRUE;
+ } else {
+ return FALSE;
+ }
}
-static void string_skip_non_spaces(JSString *sp, int *pp) {
- while (*pp < sp->len && string_get(sp, *pp) != ' ')
+/* skip spaces, update offset */
+static void string_skip_spaces(const uint8_t *sp, int *pp) {
+ while (sp[*pp] == ' ')
*pp += 1;
}
-/* parse a numeric field with an optional sign if accept_sign is TRUE */
-static int string_get_digits(JSString *sp, int *pp, int64_t *pval) {
- int64_t v = 0;
+/* skip dashes dots and commas */
+static void string_skip_separators(const uint8_t *sp, int *pp) {
+ int c;
+ while ((c = sp[*pp]) == '-' || c == '.' || c == ',')
+ *pp += 1;
+}
+
+/* skip non spaces, update offset */
+static void string_skip_non_spaces(const uint8_t *sp, int *pp) {
+ while (sp[*pp] != '\0' && sp[*pp] != ' ')
+ *pp += 1;
+}
+
+/* parse a numeric field (max_digits = 0 -> no maximum) */
+static BOOL string_get_digits(const uint8_t *sp, int *pp, int *pval,
+ int min_digits, int max_digits)
+{
+ int v = 0;
int c, p = *pp, p_start;
- if (p >= sp->len)
- return -1;
p_start = p;
- while (p < sp->len) {
- c = string_get(sp, p);
- if (!(c >= '0' && c <= '9')) {
- if (p == p_start)
- return -1;
- else
- break;
- }
+ while ((c = sp[p]) >= '0' && c <= '9') {
v = v * 10 + c - '0';
p++;
+ if (p - p_start == max_digits)
+ break;
}
+ if (p - p_start < min_digits)
+ return FALSE;
*pval = v;
*pp = p;
- return 0;
+ return TRUE;
}
-static int string_get_signed_digits(JSString *sp, int *pp, int64_t *pval) {
- int res, sgn, p = *pp;
+static BOOL string_get_milliseconds(const uint8_t *sp, int *pp, int *pval) {
+ /* parse optional fractional part as milliseconds and truncate. */
+ /* spec does not indicate which rounding should be used */
+ int mul = 1000, ms = 0, c, p_start, p = *pp;
- if (p >= sp->len)
- return -1;
-
- sgn = string_get(sp, p);
- if (sgn == '-' || sgn == '+')
+ c = sp[p];
+ if (c == '.' || c == ',') {
p++;
-
- res = string_get_digits(sp, &p, pval);
- if (res == 0 && sgn == '-') {
- if (*pval == 0)
- return -1; // reject negative zero
- *pval = -*pval;
+ p_start = p;
+ while ((c = sp[p]) >= '0' && c <= '9') {
+ ms += (c - '0') * (mul /= 10);
+ p++;
+ if (p - p_start == 9)
+ break;
+ }
+ if (p > p_start) {
+ /* only consume the separator if digits are present */
+ *pval = ms;
+ *pp = p;
+ }
}
- *pp = p;
- return res;
+ return TRUE;
}
-/* parse a fixed width numeric field */
-static int string_get_fixed_width_digits(JSString *sp, int *pp, int n, int64_t *pval) {
- int64_t v = 0;
- int i, c, p = *pp;
+static BOOL string_get_timezone(const uint8_t *sp, int *pp, int *tzp) {
+ int tz = 0, sgn, hh, mm, p = *pp;
- for(i = 0; i < n; i++) {
- if (p >= sp->len)
- return -1;
- c = string_get(sp, p);
- if (!(c >= '0' && c <= '9'))
- return -1;
- v = v * 10 + c - '0';
+ sgn = sp[p];
+ if (sgn == '+' || sgn == '-') {
p++;
+ if (!string_get_digits(sp, &p, &hh, 2, 2))
+ return FALSE;
+ string_skip_char(sp, &p, ':'); /* optional separator */
+ if (!string_get_digits(sp, &p, &mm, 2, 2))
+ return FALSE;
+ if (hh > 23 || mm > 59)
+ return FALSE;
+ tz = hh * 60 + mm;
+ if (sgn != '+')
+ tz = -tz;
+ } else
+ if (sgn == 'Z') {
+ p++;
+ } else {
+ return FALSE;
}
- *pval = v;
*pp = p;
- return 0;
+ *tzp = tz;
+ return TRUE;
}
-static int string_get_milliseconds(JSString *sp, int *pp, int64_t *pval) {
- /* parse milliseconds as a fractional part, round to nearest */
- /* XXX: the spec does not indicate which rounding should be used */
- int mul = 1000, ms = 0, p = *pp, c, p_start;
- if (p >= sp->len)
- return -1;
- p_start = p;
- while (p < sp->len) {
- c = string_get(sp, p);
- if (!(c >= '0' && c <= '9')) {
- if (p == p_start)
- return -1;
- else
- break;
- }
- if (mul == 1 && c >= '5')
- ms += 1;
- ms += (c - '0') * (mul /= 10);
+static BOOL string_match(const uint8_t *sp, int *pp, const char *s) {
+ int p = *pp;
+ while (*s != '\0') {
+ if (sp[p] != (uint8_t)*s++)
+ return FALSE;
p++;
}
- *pval = ms;
*pp = p;
- return 0;
+ return TRUE;
}
-
-static int find_abbrev(JSString *sp, int p, const char *list, int count) {
+static int find_abbrev(const uint8_t *sp, int p, const char *list, int count) {
int n, i;
- if (p + 3 <= sp->len) {
- for (n = 0; n < count; n++) {
- for (i = 0; i < 3; i++) {
- if (string_get(sp, p + i) != month_names[n * 3 + i])
- goto next;
- }
- return n;
- next:;
+ for (n = 0; n < count; n++) {
+ for (i = 0;; i++) {
+ if (sp[p + i] != (uint8_t)month_names[n * 3 + i])
+ break;
+ if (i == 2)
+ return n;
}
}
return -1;
}
-static int string_get_month(JSString *sp, int *pp, int64_t *pval) {
+static BOOL string_get_month(const uint8_t *sp, int *pp, int *pval) {
int n;
- string_skip_spaces(sp, pp);
n = find_abbrev(sp, *pp, month_names, 12);
if (n < 0)
- return -1;
+ return FALSE;
- *pval = n;
+ *pval = n + 1;
*pp += 3;
- return 0;
+ return TRUE;
+}
+
+/* parse toISOString format */
+static BOOL js_date_parse_isostring(const uint8_t *sp, int fields[9], BOOL *is_local) {
+ int sgn, i, p = 0;
+
+ /* initialize fields to the beginning of the Epoch */
+ for (i = 0; i < 9; i++) {
+ fields[i] = (i == 2);
+ }
+ *is_local = FALSE;
+
+ /* year is either yyyy digits or [+-]yyyyyy */
+ sgn = sp[p];
+ if (sgn == '-' || sgn == '+') {
+ p++;
+ if (!string_get_digits(sp, &p, &fields[0], 6, 6))
+ return FALSE;
+ if (sgn == '-') {
+ if (fields[0] == 0)
+ return FALSE; // reject -000000
+ fields[0] = -fields[0];
+ }
+ } else {
+ if (!string_get_digits(sp, &p, &fields[0], 4, 4))
+ return FALSE;
+ }
+ if (string_skip_char(sp, &p, '-')) {
+ if (!string_get_digits(sp, &p, &fields[1], 2, 2)) /* month */
+ return FALSE;
+ if (fields[1] < 1)
+ return FALSE;
+ fields[1] -= 1;
+ if (string_skip_char(sp, &p, '-')) {
+ if (!string_get_digits(sp, &p, &fields[2], 2, 2)) /* day */
+ return FALSE;
+ if (fields[2] < 1)
+ return FALSE;
+ }
+ }
+ if (string_skip_char(sp, &p, 'T')) {
+ *is_local = TRUE;
+ if (!string_get_digits(sp, &p, &fields[3], 2, 2) /* hour */
+ || !string_skip_char(sp, &p, ':')
+ || !string_get_digits(sp, &p, &fields[4], 2, 2)) /* minute */
+ return FALSE;
+ if (string_skip_char(sp, &p, ':')) {
+ if (!string_get_digits(sp, &p, &fields[5], 2, 2)) /* second */
+ return FALSE;
+ string_get_milliseconds(sp, &p, &fields[6]);
+ }
+ }
+ /* parse the time zone offset if present: [+-]HH:mm or [+-]HHmm */
+ if (sp[p]) {
+ *is_local = FALSE;
+ if (!string_get_timezone(sp, &p, &fields[8]))
+ return FALSE;
+ }
+ /* error if extraneous characters */
+ return sp[p] == '\0';
+}
+
+/* parse toString, toUTCString and other formats */
+static BOOL js_date_parse_otherstring(const uint8_t *sp, int fields[9], BOOL *is_local) {
+ int c, i, val, p = 0, p_start;
+ int num[3];
+ BOOL has_year = FALSE;
+ BOOL has_mon = FALSE;
+ BOOL has_time = FALSE;
+ int num_index = 0;
+
+ /* initialize fields to the beginning of 2001-01-01 */
+ fields[0] = 2001;
+ fields[1] = 1;
+ fields[2] = 1;
+ for (i = 3; i < 9; i++) {
+ fields[i] = 0;
+ }
+ *is_local = TRUE;
+
+ while (sp[p] != '\0') {
+ string_skip_spaces(sp, &p);
+ p_start = p;
+ if ((c = sp[p]) == '+' || c == '-') {
+ if (has_time && string_get_timezone(sp, &p, &fields[8])) {
+ *is_local = FALSE;
+ } else {
+ p++;
+ if (string_get_digits(sp, &p, &val, 1, 9)) {
+ if (c == '-') {
+ if (val == 0)
+ return FALSE;
+ val = -val;
+ }
+ fields[0] = val;
+ has_year = TRUE;
+ }
+ }
+ } else
+ if (string_get_digits(sp, &p, &val, 1, 9)) {
+ if (string_skip_char(sp, &p, ':')) {
+ /* time part */
+ fields[3] = val;
+ if (!string_get_digits(sp, &p, &fields[4], 1, 2))
+ return FALSE;
+ if (string_skip_char(sp, &p, ':')) {
+ if (!string_get_digits(sp, &p, &fields[5], 1, 2))
+ return FALSE;
+ string_get_milliseconds(sp, &p, &fields[6]);
+ }
+ has_time = TRUE;
+ } else {
+ if (p - p_start > 2) {
+ fields[0] = val;
+ has_year = TRUE;
+ } else
+ if (val < 1 || val > 31) {
+ fields[0] = val + (val < 100) * 1900;
+ has_year = TRUE;
+ } else {
+ if (num_index == 3)
+ return FALSE;
+ num[num_index++] = val;
+ }
+ }
+ } else
+ if (string_get_month(sp, &p, &fields[1])) {
+ has_mon = TRUE;
+ } else
+ if (c == 'Z') {
+ *is_local = FALSE;
+ p++;
+ continue;
+ } else
+ if (string_match(sp, &p, "GMT") || string_match(sp, &p, "UTC")) {
+ *is_local = FALSE;
+ continue;
+ } else {
+ /* skip a word */
+ string_skip_non_spaces(sp, &p);
+ continue;
+ }
+ string_skip_separators(sp, &p);
+ }
+ if (num_index + has_year + has_mon > 3)
+ return FALSE;
+
+ switch (num_index) {
+ case 0:
+ if (!has_year)
+ return FALSE;
+ break;
+ case 1:
+ if (has_mon)
+ fields[2] = num[0];
+ else
+ fields[1] = num[0];
+ break;
+ case 2:
+ if (has_year) {
+ fields[1] = num[0];
+ fields[2] = num[1];
+ } else
+ if (has_mon) {
+ fields[0] = num[1] + (num[1] < 100) * 1900;
+ fields[2] = num[0];
+ } else {
+ fields[1] = num[0];
+ fields[2] = num[1];
+ }
+ break;
+ case 3:
+ fields[0] = num[2] + (num[2] < 100) * 1900;
+ fields[1] = num[0];
+ fields[2] = num[1];
+ break;
+ default:
+ return FALSE;
+ }
+ if (fields[1] < 1 || fields[2] < 1)
+ return FALSE;
+ fields[1] -= 1;
+ return TRUE;
}
static JSValue js_Date_parse(JSContext *ctx, JSValueConst this_val,
int argc, JSValueConst *argv)
{
- // parse(s)
JSValue s, rv;
- int64_t fields[] = { 0, 1, 1, 0, 0, 0, 0 };
- double fields1[7];
- int64_t tz, hh, mm;
+ int fields[9];
+ double fields1[9];
double d;
- int p, i, c, sgn, l;
+ int i, c;
JSString *sp;
+ uint8_t buf[128];
BOOL is_local;
rv = JS_NAN;
@@ -50152,145 +50345,33 @@ static JSValue js_Date_parse(JSContext *ctx, JSValueConst this_val,
return JS_EXCEPTION;
sp = JS_VALUE_GET_STRING(s);
- p = 0;
- if (p < sp->len && (((c = string_get(sp, p)) >= '0' && c <= '9') || c == '+' || c == '-')) {
- /* ISO format */
- /* year field can be negative */
- if (string_get_signed_digits(sp, &p, &fields[0]))
- goto done;
-
- for (i = 1; i < 7; i++) {
- if (p >= sp->len)
- break;
- switch(i) {
- case 1:
- case 2:
- c = '-';
- break;
- case 3:
- c = 'T';
- break;
- case 4:
- case 5:
- c = ':';
- break;
- case 6:
- c = '.';
- break;
- }
- if (string_get(sp, p) != c)
- break;
- p++;
- if (i == 6) {
- if (string_get_milliseconds(sp, &p, &fields[i]))
- goto done;
- } else {
- if (string_get_digits(sp, &p, &fields[i]))
- goto done;
- }
- }
- /* no time: UTC by default */
- is_local = (i > 3);
- fields[1] -= 1;
-
- /* parse the time zone offset if present: [+-]HH:mm or [+-]HHmm */
- tz = 0;
- if (p < sp->len) {
- sgn = string_get(sp, p);
- if (sgn == '+' || sgn == '-') {
- p++;
- l = sp->len - p;
- if (l != 4 && l != 5)
- goto done;
- if (string_get_fixed_width_digits(sp, &p, 2, &hh))
- goto done;
- if (l == 5) {
- if (string_get(sp, p) != ':')
- goto done;
- p++;
- }
- if (string_get_fixed_width_digits(sp, &p, 2, &mm))
- goto done;
- tz = hh * 60 + mm;
- if (sgn == '-')
- tz = -tz;
- is_local = FALSE;
- } else if (sgn == 'Z') {
- p++;
- is_local = FALSE;
- } else {
- goto done;
- }
- /* error if extraneous characters */
- if (p != sp->len)
- goto done;
- }
- } else {
- /* toString or toUTCString format */
- /* skip the day of the week */
- string_skip_non_spaces(sp, &p);
- string_skip_spaces(sp, &p);
- if (p >= sp->len)
- goto done;
- c = string_get(sp, p);
- if (c >= '0' && c <= '9') {
- /* day of month first */
- if (string_get_digits(sp, &p, &fields[2]))
- goto done;
- if (string_get_month(sp, &p, &fields[1]))
- goto done;
- } else {
- /* month first */
- if (string_get_month(sp, &p, &fields[1]))
- goto done;
- string_skip_spaces(sp, &p);
- if (string_get_digits(sp, &p, &fields[2]))
- goto done;
- }
- /* year */
- string_skip_spaces(sp, &p);
- if (string_get_signed_digits(sp, &p, &fields[0]))
- goto done;
-
- /* hour, min, seconds */
- string_skip_spaces(sp, &p);
- for(i = 0; i < 3; i++) {
- if (i == 1 || i == 2) {
- if (p >= sp->len)
- goto done;
- if (string_get(sp, p) != ':')
- goto done;
- p++;
- }
- if (string_get_digits(sp, &p, &fields[3 + i]))
- goto done;
- }
- // XXX: parse optional milliseconds?
-
- /* parse the time zone offset if present: [+-]HHmm */
- is_local = FALSE;
- tz = 0;
- for (tz = 0; p < sp->len; p++) {
- sgn = string_get(sp, p);
- if (sgn == '+' || sgn == '-') {
- p++;
- if (string_get_fixed_width_digits(sp, &p, 2, &hh))
- goto done;
- if (string_get_fixed_width_digits(sp, &p, 2, &mm))
- goto done;
- tz = hh * 60 + mm;
- if (sgn == '-')
- tz = -tz;
- break;
- }
+ /* convert the string as a byte array */
+ for (i = 0; i < sp->len && i < (int)countof(buf) - 1; i++) {
+ c = string_get(sp, i);
+ if (c > 255)
+ c = (c == 0x2212) ? '-' : 'x';
+ buf[i] = c;
+ }
+ buf[i] = '\0';
+ if (js_date_parse_isostring(buf, fields, &is_local)
+ || js_date_parse_otherstring(buf, fields, &is_local)) {
+ static int const field_max[6] = { 0, 11, 31, 24, 59, 59 };
+ BOOL valid = TRUE;
+ /* check field maximum values */
+ for (i = 1; i < 6; i++) {
+ if (fields[i] > field_max[i])
+ valid = FALSE;
+ }
+ /* special case 24:00:00.000 */
+ if (fields[3] == 24 && (fields[4] | fields[5] | fields[6]))
+ valid = FALSE;
+ if (valid) {
+ for(i = 0; i < 7; i++)
+ fields1[i] = fields[i];
+ d = set_date_fields(fields1, is_local) - fields[8] * 60000;
+ rv = JS_NewFloat64(ctx, d);
}
}
- for(i = 0; i < 7; i++)
- fields1[i] = fields[i];
- d = set_date_fields(fields1, is_local) - tz * 60000;
- rv = JS_NewFloat64(ctx, d);
-
-done:
JS_FreeValue(ctx, s);
return rv;
}
diff --git a/quickjs/tests/test_builtin.js b/quickjs/tests/test_builtin.js
@@ -540,28 +540,65 @@ function test_date()
// is not a valid instance of this format.
// Hence the fractional part after . should have 3 digits and how
// a different number of digits is handled is implementation defined.
- var d = new Date(1506098258091), a, s;
+ assert(Date.parse(""), NaN);
+ assert(Date.parse("2000"), 946684800000);
+ assert(Date.parse("2000-01"), 946684800000);
+ assert(Date.parse("2000-01-01"), 946684800000);
+ //assert(Date.parse("2000-01-01T"), NaN);
+ //assert(Date.parse("2000-01-01T00Z"), NaN);
+ assert(Date.parse("2000-01-01T00:00Z"), 946684800000);
+ assert(Date.parse("2000-01-01T00:00:00Z"), 946684800000);
+ assert(Date.parse("2000-01-01T00:00:00.1Z"), 946684800100);
+ assert(Date.parse("2000-01-01T00:00:00.10Z"), 946684800100);
+ assert(Date.parse("2000-01-01T00:00:00.100Z"), 946684800100);
+ assert(Date.parse("2000-01-01T00:00:00.1000Z"), 946684800100);
+ assert(Date.parse("2000-01-01T00:00:00+00:00"), 946684800000);
+ //assert(Date.parse("2000-01-01T00:00:00+00:30"), 946686600000);
+ var d = new Date("2000T00:00"); // Jan 1st 2000, 0:00:00 local time
+ assert(typeof d === 'object' && d.toString() != 'Invalid Date');
+ assert((new Date('Jan 1 2000')).toISOString(),
+ d.toISOString());
+ assert((new Date('Jan 1 2000 00:00')).toISOString(),
+ d.toISOString());
+ assert((new Date('Jan 1 2000 00:00:00')).toISOString(),
+ d.toISOString());
+ assert((new Date('Jan 1 2000 00:00:00 GMT+0100')).toISOString(),
+ '1999-12-31T23:00:00.000Z');
+ assert((new Date('Jan 1 2000 00:00:00 GMT+0200')).toISOString(),
+ '1999-12-31T22:00:00.000Z');
+ assert((new Date('Sat Jan 1 2000')).toISOString(),
+ d.toISOString());
+ assert((new Date('Sat Jan 1 2000 00:00')).toISOString(),
+ d.toISOString());
+ assert((new Date('Sat Jan 1 2000 00:00:00')).toISOString(),
+ d.toISOString());
+ assert((new Date('Sat Jan 1 2000 00:00:00 GMT+0100')).toISOString(),
+ '1999-12-31T23:00:00.000Z');
+ assert((new Date('Sat Jan 1 2000 00:00:00 GMT+0200')).toISOString(),
+ '1999-12-31T22:00:00.000Z');
+
+ var d = new Date(1506098258091);
assert(d.toISOString(), "2017-09-22T16:37:38.091Z");
d.setUTCHours(18, 10, 11);
assert(d.toISOString(), "2017-09-22T18:10:11.091Z");
- a = Date.parse(d.toISOString());
+ var a = Date.parse(d.toISOString());
assert((new Date(a)).toISOString(), d.toISOString());
- s = new Date("2020-01-01T01:01:01.1Z").toISOString();
- assert(s, "2020-01-01T01:01:01.100Z");
- s = new Date("2020-01-01T01:01:01.12Z").toISOString();
- assert(s, "2020-01-01T01:01:01.120Z");
- s = new Date("2020-01-01T01:01:01.123Z").toISOString();
- assert(s, "2020-01-01T01:01:01.123Z");
- s = new Date("2020-01-01T01:01:01.1234Z").toISOString();
- assert(s, "2020-01-01T01:01:01.123Z");
- s = new Date("2020-01-01T01:01:01.12345Z").toISOString();
- assert(s, "2020-01-01T01:01:01.123Z");
- s = new Date("2020-01-01T01:01:01.1235Z").toISOString();
- assert(s == "2020-01-01T01:01:01.124Z" || // QuickJS
- s == "2020-01-01T01:01:01.123Z"); // nodeJS
- s = new Date("2020-01-01T01:01:01.9999Z").toISOString();
- assert(s == "2020-01-01T01:01:02.000Z" || // QuickJS
- s == "2020-01-01T01:01:01.999Z"); // nodeJS
+
+ assert((new Date("2020-01-01T01:01:01.123Z")).toISOString(),
+ "2020-01-01T01:01:01.123Z");
+ /* implementation defined behavior */
+ assert((new Date("2020-01-01T01:01:01.1Z")).toISOString(),
+ "2020-01-01T01:01:01.100Z");
+ assert((new Date("2020-01-01T01:01:01.12Z")).toISOString(),
+ "2020-01-01T01:01:01.120Z");
+ assert((new Date("2020-01-01T01:01:01.1234Z")).toISOString(),
+ "2020-01-01T01:01:01.123Z");
+ assert((new Date("2020-01-01T01:01:01.12345Z")).toISOString(),
+ "2020-01-01T01:01:01.123Z");
+ assert((new Date("2020-01-01T01:01:01.1235Z")).toISOString(),
+ "2020-01-01T01:01:01.123Z");
+ assert((new Date("2020-01-01T01:01:01.9999Z")).toISOString(),
+ "2020-01-01T01:01:01.999Z");
assert(Date.UTC(NaN), NaN);
assert(Date.UTC(2017, NaN), NaN);