From cb92d243e8b3603cf4abd231abe83179950927fc Mon Sep 17 00:00:00 2001 From: Refael Ackermann Date: Thu, 30 May 2019 11:54:09 -0400 Subject: tools: fix js2c regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-URL: https://github.com/nodejs/node/pull/27980 Reviewed-By: Michaƫl Zasso Reviewed-By: Ben Noordhuis Reviewed-By: Joyee Cheung --- tools/js2c.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'tools/js2c.py') diff --git a/tools/js2c.py b/tools/js2c.py index 4131587655..c3ac53f14b 100755 --- a/tools/js2c.py +++ b/tools/js2c.py @@ -200,6 +200,12 @@ UnionBytes NativeModuleLoader::GetConfig() {{ }} // namespace node """ +ONE_BYTE_STRING = """ +static const uint8_t {0}[] = {{ +{1} +}}; +""" + TWO_BYTE_STRING = """ static const uint16_t {0}[] = {{ {1} @@ -215,15 +221,25 @@ SLUGGER_RE =re.compile('[.\-/]') is_verbose = False def GetDefinition(var, source, step=30): - encoded_source = bytearray(source, 'utf-16le') - code_points = [encoded_source[i] + (encoded_source[i+1] * 256) for i in range(0, len(encoded_source), 2)] + template = ONE_BYTE_STRING + code_points = [ord(c) for c in source] + if any(c > 127 for c in code_points): + template = TWO_BYTE_STRING + # Treat non-ASCII as UTF-8 and encode as UTF-16 Little Endian. + encoded_source = bytearray(source, 'utf-16le') + code_points = [ + encoded_source[i] + (encoded_source[i + 1] * 256) + for i in range(0, len(encoded_source), 2) + ] + # For easier debugging, align to the common 3 char for code-points. elements_s = ['%3s' % x for x in code_points] # Put no more then `step` code-points in a line. slices = [elements_s[i:i + step] for i in range(0, len(elements_s), step)] lines = [','.join(s) for s in slices] array_content = ',\n'.join(lines) - definition = TWO_BYTE_STRING.format(var, array_content) + definition = template.format(var, array_content) + return definition, len(code_points) -- cgit v1.2.3