commit 556fcf7ecf1df74c2f1bceb11b5d5e4222088bdd
parent 7179c30aba07627c6ea95ffacaa67a3fcd5ad797
Author: Charlie Gordon <github@chqrlie.org>
Date: Thu, 15 Feb 2024 10:30:04 +0100
Improve string concatenation hack
- add more cases of in place string concatenation
this temporary hack improves the microbench timing by 30% but
has little impact on the test262 timings.
Diffstat:
3 files changed, 98 insertions(+), 79 deletions(-)
diff --git a/quickjs/Makefile b/quickjs/Makefile
@@ -48,9 +48,10 @@ PREFIX?=/usr/local
#CONFIG_ASAN=y
# use memory sanitizer
#CONFIG_MSAN=y
-# include the code for BigFloat/BigDecimal, math mode and faster large integers
# use UB sanitizer
#CONFIG_UBSAN=y
+
+# include the code for BigFloat/BigDecimal and math mode
CONFIG_BIGNUM=y
OBJDIR=.obj
diff --git a/quickjs/VERSION b/quickjs/VERSION
@@ -1 +1 @@
-2024-01-13
+2024-02-14
diff --git a/quickjs/quickjs.c b/quickjs/quickjs.c
@@ -1458,6 +1458,10 @@ static inline int is_digit(int c) {
return c >= '0' && c <= '9';
}
+static inline int string_get(const JSString *p, int idx) {
+ return p->is_wide_char ? p->u.str16[idx] : p->u.str8[idx];
+}
+
typedef struct JSClassShortDef {
JSAtom class_name;
JSClassFinalizer *finalizer;
@@ -2431,10 +2435,7 @@ static inline BOOL is_num_string(uint32_t *pval, const JSString *p)
len = p->len;
if (len == 0 || len > 10)
return FALSE;
- if (p->is_wide_char)
- c = p->u.str16[0];
- else
- c = p->u.str8[0];
+ c = string_get(p, 0);
if (is_num(c)) {
if (c == '0') {
if (len != 1)
@@ -2443,10 +2444,7 @@ static inline BOOL is_num_string(uint32_t *pval, const JSString *p)
} else {
n = c - '0';
for(i = 1; i < len; i++) {
- if (p->is_wide_char)
- c = p->u.str16[i];
- else
- c = p->u.str8[i];
+ c = string_get(p, i);
if (!is_num(c))
return FALSE;
n64 = (uint64_t)n * 10 + (c - '0');
@@ -2491,10 +2489,24 @@ static uint32_t hash_string(const JSString *str, uint32_t h)
return h;
}
-static __maybe_unused void JS_DumpString(JSRuntime *rt,
- const JSString *p)
+static __maybe_unused void JS_DumpChar(JSRuntime *rt, int c, int sep)
+{
+ if (c == sep || c == '\\') {
+ putchar('\\');
+ putchar(c);
+ } else if (c >= ' ' && c <= 126) {
+ putchar(c);
+ } else if (c == '\n') {
+ putchar('\\');
+ putchar('n');
+ } else {
+ printf("\\u%04x", c);
+ }
+}
+
+static __maybe_unused void JS_DumpString(JSRuntime *rt, const JSString *p)
{
- int i, c, sep;
+ int i, sep;
if (p == NULL) {
printf("<null>");
@@ -2504,21 +2516,7 @@ static __maybe_unused void JS_DumpString(JSRuntime *rt,
sep = (p->header.ref_count == 1) ? '\"' : '\'';
putchar(sep);
for(i = 0; i < p->len; i++) {
- if (p->is_wide_char)
- c = p->u.str16[i];
- else
- c = p->u.str8[i];
- if (c == sep || c == '\\') {
- putchar('\\');
- putchar(c);
- } else if (c >= ' ' && c <= 126) {
- putchar(c);
- } else if (c == '\n') {
- putchar('\\');
- putchar('n');
- } else {
- printf("\\u%04x", c);
- }
+ JS_DumpChar(rt, string_get(p, i), sep);
}
putchar(sep);
}
@@ -2861,6 +2859,7 @@ static JSAtom __JS_NewAtomInit(JSRuntime *rt, const char *str, int len,
return __JS_NewAtom(rt, p, atom_type);
}
+/* Warning: str must be ASCII only */
static JSAtom __JS_FindAtom(JSRuntime *rt, const char *str, size_t len,
int atom_type)
{
@@ -2955,11 +2954,13 @@ static JSAtom JS_NewAtomStr(JSContext *ctx, JSString *p)
return __JS_NewAtom(rt, p, JS_ATOM_TYPE_STRING);
}
+/* str is UTF-8 encoded */
JSAtom JS_NewAtomLen(JSContext *ctx, const char *str, size_t len)
{
JSValue val;
if (len == 0 || !is_digit(*str)) {
+ // XXX: this will not work if UTF-8 encoded str contains non ASCII bytes
JSAtom atom = __JS_FindAtom(ctx->rt, str, len, JS_ATOM_TYPE_STRING);
if (atom)
return atom;
@@ -3065,10 +3066,7 @@ static const char *JS_AtomGetStrRT(JSRuntime *rt, char *buf, int buf_size,
return (const char *)str->u.str8;
}
for(i = 0; i < str->len; i++) {
- if (str->is_wide_char)
- c = str->u.str16[i];
- else
- c = str->u.str8[i];
+ c = string_get(str, i);
if ((q - buf) >= buf_size - UTF8_CHAR_LEN_MAX)
break;
if (c < 128) {
@@ -3699,10 +3697,6 @@ static int string_buffer_putc(StringBuffer *s, uint32_t c)
return string_buffer_putc16(s, c);
}
-static int string_get(const JSString *p, int idx) {
- return p->is_wide_char ? p->u.str16[idx] : p->u.str8[idx];
-}
-
static int string_getc(const JSString *p, int *pidx)
{
int idx, c, c1;
@@ -4189,6 +4183,42 @@ static JSValue JS_ConcatString1(JSContext *ctx,
return JS_MKPTR(JS_TAG_STRING, p);
}
+static BOOL JS_ConcatStringInPlace(JSContext *ctx, JSString *p1, JSValueConst op2) {
+ if (JS_VALUE_GET_TAG(op2) == JS_TAG_STRING) {
+ JSString *p2 = JS_VALUE_GET_STRING(op2);
+ size_t size1;
+
+ if (p2->len == 0)
+ return TRUE;
+ if (p1->header.ref_count != 1)
+ return FALSE;
+ size1 = js_malloc_usable_size(ctx, p1);
+ if (p1->is_wide_char) {
+ if (size1 >= sizeof(*p1) + ((p1->len + p2->len) << 1)) {
+ if (p2->is_wide_char) {
+ memcpy(p1->u.str16 + p1->len, p2->u.str16, p2->len << 1);
+ p1->len += p2->len;
+ return TRUE;
+ } else {
+ size_t i;
+ for (i = 0; i < p2->len; i++) {
+ p1->u.str16[p1->len++] = p2->u.str8[i];
+ }
+ return TRUE;
+ }
+ }
+ } else if (!p2->is_wide_char) {
+ if (size1 >= sizeof(*p1) + p1->len + p2->len + 1) {
+ memcpy(p1->u.str8 + p1->len, p2->u.str8, p2->len);
+ p1->len += p2->len;
+ p1->u.str8[p1->len] = '\0';
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
+
/* op1 and op2 are converted to strings. For convenience, op1 or op2 =
JS_EXCEPTION are accepted and return JS_EXCEPTION. */
static JSValue JS_ConcatString(JSContext *ctx, JSValue op1, JSValue op2)
@@ -4211,27 +4241,11 @@ static JSValue JS_ConcatString(JSContext *ctx, JSValue op1, JSValue op2)
}
}
p1 = JS_VALUE_GET_STRING(op1);
- p2 = JS_VALUE_GET_STRING(op2);
-
- /* XXX: could also check if p1 is empty */
- if (p2->len == 0) {
- goto ret_op1;
- }
- if (p1->header.ref_count == 1 && p1->is_wide_char == p2->is_wide_char
- && js_malloc_usable_size(ctx, p1) >= sizeof(*p1) + ((p1->len + p2->len) << p2->is_wide_char) + 1 - p1->is_wide_char) {
- /* Concatenate in place in available space at the end of p1 */
- if (p1->is_wide_char) {
- memcpy(p1->u.str16 + p1->len, p2->u.str16, p2->len << 1);
- p1->len += p2->len;
- } else {
- memcpy(p1->u.str8 + p1->len, p2->u.str8, p2->len);
- p1->len += p2->len;
- p1->u.str8[p1->len] = '\0';
- }
- ret_op1:
+ if (JS_ConcatStringInPlace(ctx, p1, op2)) {
JS_FreeValue(ctx, op2);
return op1;
}
+ p2 = JS_VALUE_GET_STRING(op2);
ret = JS_ConcatString1(ctx, p1, p2);
JS_FreeValue(ctx, op1);
JS_FreeValue(ctx, op2);
@@ -17793,6 +17807,11 @@ static JSValue __JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj,
sp[-2] = __JS_NewFloat64(ctx, JS_VALUE_GET_FLOAT64(op1) +
JS_VALUE_GET_FLOAT64(op2));
sp--;
+ } else if (JS_IsString(op1) && JS_IsString(op2)) {
+ sp[-2] = JS_ConcatString(ctx, op1, op2);
+ sp--;
+ if (JS_IsException(sp[-1]))
+ goto exception;
} else {
add_slow:
if (js_add_slow(ctx, sp))
@@ -17803,38 +17822,45 @@ static JSValue __JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj,
BREAK;
CASE(OP_add_loc):
{
+ JSValue op2;
JSValue *pv;
int idx;
idx = *pc;
pc += 1;
+ op2 = sp[-1];
pv = &var_buf[idx];
- if (likely(JS_VALUE_IS_BOTH_INT(*pv, sp[-1]))) {
+ if (likely(JS_VALUE_IS_BOTH_INT(*pv, op2))) {
int64_t r;
- r = (int64_t)JS_VALUE_GET_INT(*pv) +
- JS_VALUE_GET_INT(sp[-1]);
+ r = (int64_t)JS_VALUE_GET_INT(*pv) + JS_VALUE_GET_INT(op2);
if (unlikely((int)r != r))
goto add_loc_slow;
*pv = JS_NewInt32(ctx, r);
sp--;
+ } else if (JS_VALUE_IS_BOTH_FLOAT(*pv, op2)) {
+ *pv = __JS_NewFloat64(ctx, JS_VALUE_GET_FLOAT64(*pv) +
+ JS_VALUE_GET_FLOAT64(op2));
+ sp--;
} else if (JS_VALUE_GET_TAG(*pv) == JS_TAG_STRING) {
- JSValue op1;
- op1 = sp[-1];
sp--;
- op1 = JS_ToPrimitiveFree(ctx, op1, HINT_NONE);
- if (JS_IsException(op1))
- goto exception;
- op1 = JS_ConcatString(ctx, JS_DupValue(ctx, *pv), op1);
- if (JS_IsException(op1))
+ op2 = JS_ToPrimitiveFree(ctx, op2, HINT_NONE);
+ if (JS_IsException(op2))
goto exception;
- set_value(ctx, pv, op1);
+ if (JS_ConcatStringInPlace(ctx, JS_VALUE_GET_STRING(*pv), op2)) {
+ JS_FreeValue(ctx, op2);
+ } else {
+ op2 = JS_ConcatString(ctx, JS_DupValue(ctx, *pv), op2);
+ if (JS_IsException(op2))
+ goto exception;
+ set_value(ctx, pv, op2);
+ }
} else {
JSValue ops[2];
add_loc_slow:
/* In case of exception, js_add_slow frees ops[0]
and ops[1], so we must duplicate *pv */
ops[0] = JS_DupValue(ctx, *pv);
- ops[1] = sp[-1];
+ ops[1] = op2;
sp--;
if (js_add_slow(ctx, ops + 2))
goto exception;
@@ -35876,6 +35902,7 @@ static JSString *JS_ReadString(BCReaderState *s)
js_free_string(s->ctx->rt, p);
return NULL;
}
+ // XXX: potential endianness issue
memcpy(p->u.str8, s->ptr, size);
s->ptr += size;
if (!is_wide_char) {
@@ -41360,10 +41387,7 @@ static int js_string_get_own_property(JSContext *ctx,
idx = __JS_AtomToUInt32(prop);
if (idx < p1->len) {
if (desc) {
- if (p1->is_wide_char)
- ch = p1->u.str16[idx];
- else
- ch = p1->u.str8[idx];
+ ch = string_get(p1, idx);
desc->flags = JS_PROP_ENUMERABLE;
desc->value = js_new_string_char(ctx, ch);
desc->getter = JS_UNDEFINED;
@@ -41629,10 +41653,7 @@ static JSValue js_string_charCodeAt(JSContext *ctx, JSValueConst this_val,
if (idx < 0 || idx >= p->len) {
ret = JS_NAN;
} else {
- if (p->is_wide_char)
- c = p->u.str16[idx];
- else
- c = p->u.str8[idx];
+ c = string_get(p, idx);
ret = JS_NewInt32(ctx, c);
}
JS_FreeValue(ctx, val);
@@ -41662,10 +41683,7 @@ static JSValue js_string_charAt(JSContext *ctx, JSValueConst this_val,
else
ret = js_new_string8(ctx, NULL, 0);
} else {
- if (p->is_wide_char)
- c = p->u.str16[idx];
- else
- c = p->u.str8[idx];
+ c = string_get(p, idx);
ret = js_new_string_char(ctx, c);
}
JS_FreeValue(ctx, val);
@@ -45542,7 +45560,7 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc,
has_content = TRUE;
}
}
- if (has_content && JS_VALUE_GET_STRING(jsc->gap)->len != 0) {
+ if (has_content && !JS_IsEmptyString(jsc->gap)) {
string_buffer_putc8(jsc->b, '\n');
string_buffer_concat_value(jsc->b, indent);
}