summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames M Snell <jasnell@gmail.com>2017-06-12 08:25:53 -0700
committerJames M Snell <jasnell@gmail.com>2017-07-24 14:04:13 -0700
commited21cb1774d3e146f84a94400db0008a940656c3 (patch)
tree666b947e272b665045994da4476d7aa01bbe418f
parent7f5a745e35bec2bde067c2922f6a9146beed99e6 (diff)
downloadandroid-node-v8-ed21cb1774d3e146f84a94400db0008a940656c3.tar.gz
android-node-v8-ed21cb1774d3e146f84a94400db0008a940656c3.tar.bz2
android-node-v8-ed21cb1774d3e146f84a94400db0008a940656c3.zip
util: implement WHATWG Encoding Standard API
Provide an (initially experimental) implementation of the WHATWG Encoding Standard API (`TextDecoder` and `TextEncoder`). The is the same API implemented on the browser side. By default, with small-icu, only the UTF-8, UTF-16le and UTF-16be decoders are supported. With full-icu enabled, every encoding other than iso-8859-16 is supported. This provides a basic test, but does not include the full web platform tests. Note: many of the web platform tests for this would fail by default because we ship with small-icu by default. A process warning will be emitted on first use to indicate that the API is still experimental. No runtime flag is required to use the feature. Refs: https://encoding.spec.whatwg.org/ PR-URL: https://github.com/nodejs/node/pull/13644 Reviewed-By: Timothy Gu <timothygu99@gmail.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
-rw-r--r--doc/api/buffer.md14
-rw-r--r--doc/api/util.md151
-rw-r--r--lib/internal/encoding.js458
-rw-r--r--lib/internal/errors.js4
-rw-r--r--lib/util.js3
-rw-r--r--node.gyp1
-rw-r--r--src/node_buffer.cc23
-rw-r--r--src/node_i18n.cc155
-rw-r--r--src/node_i18n.h1
-rw-r--r--src/node_util.cc1
-rw-r--r--test/parallel/test-whatwg-encoding.js385
-rw-r--r--tools/icu/icu-generic.gyp9
12 files changed, 1189 insertions, 16 deletions
diff --git a/doc/api/buffer.md b/doc/api/buffer.md
index d73af5fd16..f8681c6be8 100644
--- a/doc/api/buffer.md
+++ b/doc/api/buffer.md
@@ -193,11 +193,12 @@ The character encodings currently supported by Node.js include:
* `'hex'` - Encode each byte as two hexadecimal characters.
-*Note*: Today's browsers follow the [WHATWG spec] which aliases both 'latin1'
-and ISO-8859-1 to win-1252. This means that while doing something like
-`http.get()`, if the returned charset is one of those listed in the WHATWG spec
-it's possible that the server actually returned win-1252-encoded data, and
-using `'latin1'` encoding may incorrectly decode the characters.
+*Note*: Today's browsers follow the [WHATWG Encoding Standard][] which aliases
+both 'latin1' and ISO-8859-1 to win-1252. This means that while doing something
+like `http.get()`, if the returned charset is one of those listed in the WHATWG
+specification it is possible that the server actually returned
+win-1252-encoded data, and using `'latin1'` encoding may incorrectly decode the
+characters.
## Buffers and TypedArray
<!-- YAML
@@ -2662,7 +2663,6 @@ buf.fill(0);
console.log(buf);
```
-
## Buffer Constants
<!-- YAML
added: 8.2.0
@@ -2730,5 +2730,5 @@ This value may depend on the JS engine that is being used.
[`util.inspect()`]: util.html#util_util_inspect_object_options
[RFC1345]: https://tools.ietf.org/html/rfc1345
[RFC4648, Section 5]: https://tools.ietf.org/html/rfc4648#section-5
-[WHATWG spec]: https://encoding.spec.whatwg.org/
+[WHATWG Encoding Standard]: https://encoding.spec.whatwg.org/
[iterator]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Iteration_protocols
diff --git a/doc/api/util.md b/doc/api/util.md
index 616e989835..ab26ed9c28 100644
--- a/doc/api/util.md
+++ b/doc/api/util.md
@@ -536,6 +536,156 @@ added: v8.0.0
A Symbol that can be used to declare custom promisified variants of functions,
see [Custom promisified functions][].
+### Class: util.TextDecoder
+<!-- YAML
+added: REPLACEME
+-->
+
+> Stability: 1 - Experimental
+
+An implementation of the [WHATWG Encoding Standard][] `TextDecoder` API.
+
+```js
+const decoder = new TextDecoder('shift_jis');
+let string = '';
+let buffer;
+while (buffer = getNextChunkSomehow()) {
+ string += decoder.decode(buffer, { stream: true });
+}
+string += decoder.decode(); // end-of-stream
+```
+
+#### WHATWG Supported Encodings
+
+Per the [WHATWG Encoding Standard][], the encodings supported by the
+`TextDecoder` API are outlined in the tables below. For each encoding,
+one or more aliases may be used. Support for some encodings is enabled
+only when Node.js is using the full ICU data.
+
+##### Encodings Supported By Default
+
+| Encoding | Aliases |
+| ----------- | --------------------------------- |
+| `'utf8'` | `'unicode-1-1-utf-8'`, `'utf-8'` |
+| `'utf-16be'`| |
+| `'utf-16le'`| `'utf-16'` |
+
+##### Encodings Requiring Full-ICU
+
+| Encoding | Aliases |
+| ----------------- | -------------------------------- |
+| `'ibm866'` | `'866'`, `'cp866'`, `'csibm866'` |
+| `'iso-8859-2'` | `'csisolatin2'`, `'iso-ir-101'`, `'iso8859-2'`, `'iso88592'`, `'iso_8859-2'`, `'iso_8859-2:1987'`, `'l2'`, `'latin2'` |
+| `'iso-8859-3'` | `'csisolatin3'`, `'iso-ir-109'`, `'iso8859-3'`, `'iso88593'`, `'iso_8859-3'`, `'iso_8859-3:1988'`, `'l3'`, `'latin3'` |
+| `'iso-8859-4'` | `'csisolatin4'`, `'iso-ir-110'`, `'iso8859-4'`, `'iso88594'`, `'iso_8859-4'`, `'iso_8859-4:1988'`, `'l4'`, `'latin4'` |
+| `'iso-8859-5'` | `'csisolatincyrillic'`, `'cyrillic'`, `'iso-ir-144'`, `'iso8859-5'`, `'iso88595'`, `'iso_8859-5'`, `'iso_8859-5:1988'`|
+| `'iso-8859-6'` | `'arabic'`, `'asmo-708'`, `'csiso88596e'`, `'csiso88596i'`, `'csisolatinarabic'`, `'ecma-114'`, `'iso-8859-6-e'`, `'iso-8859-6-i'`, `'iso-ir-127'`, `'iso8859-6'`, `'iso88596'`, `'iso_8859-6'`, `'iso_8859-6:1987'` |
+| `'iso-8859-7'` | `'csisolatingreek'`, `'ecma-118'`, `'elot_928'`, `'greek'`, `'greek8'`, `'iso-ir-126'`, `'iso8859-7'`, `'iso88597'`, `'iso_8859-7'`, `'iso_8859-7:1987'`, `'sun_eu_greek'` |
+| `'iso-8859-8'` | `'csiso88598e'`, `'csisolatinhebrew'`, `'hebrew'`, `'iso-8859-8-e'`, `'iso-ir-138'`, `'iso8859-8'`, `'iso88598'`, `'iso_8859-8'`, `'iso_8859-8:1988'`, `'visual'` |
+| `'iso-8859-8-i'` | `'csiso88598i'`, `'logical'` |
+| `'iso-8859-10'` | `'csisolatin6'`, `'iso-ir-157'`, `'iso8859-10'`, `'iso885910'`, `'l6'`, `'latin6'` |
+| `'iso-8859-13'` | `'iso8859-13'`, `'iso885913'` |
+| `'iso-8859-14'` | `'iso8859-14'`, `'iso885914'` |
+| `'iso-8859-15'` | `'csisolatin9'`, `'iso8859-15'`, `'iso885915'`, `'iso_8859-15'`, `'l9'` |
+| `'koi8-r'` | `'cskoi8r'`, `'koi'`, `'koi8'`, `'koi8_r'` |
+| `'koi8-u'` | `'koi8-ru'` |
+| `'macintosh'` | `'csmacintosh'`, `'mac'`, `'x-mac-roman'` |
+| `'windows-874'` | `'dos-874'`, `'iso-8859-11'`, `'iso8859-11'`, `'iso885911'`, `'tis-620'` |
+| `'windows-1250'` | `'cp1250'`, `'x-cp1250'` |
+| `'windows-1251'` | `'cp1251'`, `'x-cp1251'` |
+| `'windows-1252'` | `'ansi_x3.4-1968'`, `'ascii'`, `'cp1252'`, `'cp819'`, `'csisolatin1'`, `'ibm819'`, `'iso-8859-1'`, `'iso-ir-100'`, `'iso8859-1'`, `'iso88591'`, `'iso_8859-1'`, `'iso_8859-1:1987'`, `'l1'`, `'latin1'`, `'us-ascii'`, `'x-cp1252'` |
+| `'windows-1253'` | `'cp1253'`, `'x-cp1253'` |
+| `'windows-1254'` | `'cp1254'`, `'csisolatin5'`, `'iso-8859-9'`, `'iso-ir-148'`, `'iso8859-9'`, `'iso88599'`, `'iso_8859-9'`, `'iso_8859-9:1989'`, `'l5'`, `'latin5'`, `'x-cp1254'` |
+| `'windows-1255'` | `'cp1255'`, `'x-cp1255'` |
+| `'windows-1256'` | `'cp1256'`, `'x-cp1256'` |
+| `'windows-1257'` | `'cp1257'`, `'x-cp1257'` |
+| `'windows-1258'` | `'cp1258'`, `'x-cp1258'` |
+| `'x-mac-cyrillic'`| `'x-mac-ukrainian'` |
+| `'gbk'` | `'chinese'`, `'csgb2312'`, `'csiso58gb231280'`, `'gb2312'`, `'gb_2312'`, `'gb_2312-80'`, `'iso-ir-58'`, `'x-gbk'` |
+| `'gb18030'` | |
+| `'big5'` | `'big5-hkscs'`, `'cn-big5'`, `'csbig5'`, `'x-x-big5'` |
+| `'euc-jp'` | `'cseucpkdfmtjapanese'`, `'x-euc-jp'` |
+| `'iso-2022-jp'` | `'csiso2022jp'` |
+| `'shift_jis'` | `'csshiftjis'`, `'ms932'`, `'ms_kanji'`, `'shift-jis'`, `'sjis'`, `'windows-31j'`, `'x-sjis'` |
+| `'euc-kr'` | `'cseuckr'`, `'csksc56011987'`, `'iso-ir-149'`, `'korean'`, `'ks_c_5601-1987'`, `'ks_c_5601-1989'`, `'ksc5601'`, `'ksc_5601'`, `'windows-949'` |
+
+*Note*: The `'iso-8859-16'` encoding listed in the [WHATWG Encoding Standard][]
+is not supported.
+
+#### new TextDecoder([encoding[, options]])
+
+* `encoding` {string} Identifies the `encoding` that this `TextDecoder` instance
+ supports. Defaults to `'utf-8'`.
+* `options` {Object}
+ * `fatal` {boolean} `true` if decoding failures are fatal. Defaults to
+ `false`.
+ * `ignoreBOM` {boolean} When `true`, the `TextDecoder` will include the byte
+ order mark in the decoded result. When `false`, the byte order mark will
+ be removed from the output. This option is only used when `encoding` is
+ `'utf-8'`, `'utf-16be'` or `'utf-16le'`. Defaults to `false`.
+
+Creates an new `TextDecoder` instance. The `encoding` may specify one of the
+supported encodings or an alias.
+
+#### textDecoder.decode([input[, options]])
+
+* `input` {ArrayBuffer|DataView|TypedArray} An `ArrayBuffer`, `DataView` or
+ Typed Array instance containing the encoded data.
+* `options` {Object}
+ * `stream` {boolean} `true` if additional chunks of data are expected.
+ Defaults to `false`.
+* Returns: {string}
+
+Decodes the `input` and returns a string. If `options.stream` is `true`, any
+incomplete byte sequences occuring at the end of the `input` are buffered
+internally and emitted after the next call to `textDecoder.decode()`.
+
+If `textDecoder.fatal` is `true`, decoding errors that occur will result in a
+`TypeError` being thrown.
+
+#### textDecoder.encoding
+
+* Value: {string}
+
+The encoding supported by the `TextDecoder` instance.
+
+#### textDecoder.fatal
+
+* Value: {boolean}
+
+The value will be `true` if decoding errors result in a `TypeError` being
+thrown.
+
+#### textDecoder.ignoreBOM
+
+* Value: {boolean}
+
+The value will be `true` if the decoding result will include the byte order
+mark.
+
+### Class: util.TextEncoder
+<!-- YAML
+added: REPLACEME
+-->
+
+> Stability: 1 - Experimental
+
+An implementation of the [WHATWG Encoding Standard][] `TextEncoder` API. All
+instances of `TextEncoder` only support `UTF-8` encoding.
+
+```js
+const encoder = new TextEncoder();
+const uint8array = encoder.encode('this is some data');
+```
+
+#### textEncoder.encode([input])
+
+* `input` {string} The text to encode. Defaults to an empty string.
+* Returns: {Uint8Array}
+
+UTF-8 Encodes the `input` string and returns a `Uint8Array` containing the
+encoded bytes.
+
## Deprecated APIs
The following APIs have been deprecated and should no longer be used. Existing
@@ -1022,3 +1172,4 @@ Deprecated predecessor of `console.log`.
[Custom promisified functions]: #util_custom_promisified_functions
[constructor]: https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/Object/constructor
[semantically incompatible]: https://github.com/nodejs/node/issues/4179
+[WHATWG Encoding Standard]: https://encoding.spec.whatwg.org/
diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js
new file mode 100644
index 0000000000..22ae5c6c0d
--- /dev/null
+++ b/lib/internal/encoding.js
@@ -0,0 +1,458 @@
+'use strict';
+
+// An implementation of the WHATWG Encoding Standard
+// https://encoding.spec.whatwg.org
+
+const errors = require('internal/errors');
+const kHandle = Symbol('handle');
+const kFlags = Symbol('flags');
+const kEncoding = Symbol('encoding');
+const kDecoder = Symbol('decoder');
+const kEncoder = Symbol('encoder');
+
+let warned = false;
+const experimental =
+ 'The WHATWG Encoding Standard implementation is an experimental API. It ' +
+ 'should not yet be used in production applications.';
+
+const {
+ getConstructorOf,
+ customInspectSymbol: inspect
+} = require('internal/util');
+
+const {
+ isArrayBuffer
+} = process.binding('util');
+
+const {
+ encodeUtf8String
+} = process.binding('buffer');
+
+const {
+ decode: _decode,
+ getConverter,
+ hasConverter
+} = process.binding('icu');
+
+const CONVERTER_FLAGS_FLUSH = 0x1;
+const CONVERTER_FLAGS_FATAL = 0x2;
+const CONVERTER_FLAGS_IGNORE_BOM = 0x4;
+
+const empty = new Uint8Array(0);
+
+const encodings = new Map([
+ ['unicode-1-1-utf-8', 'utf-8'],
+ ['utf8', 'utf-8'],
+ ['utf-8', 'utf-8'],
+ ['866', 'ibm866'],
+ ['cp866', 'ibm866'],
+ ['csibm866', 'ibm866'],
+ ['ibm866', 'ibm866'],
+ ['csisolatin2', 'iso-8859-2'],
+ ['iso-8859-2', 'iso-8859-2'],
+ ['iso-ir-101', 'iso-8859-2'],
+ ['iso8859-2', 'iso-8859-2'],
+ ['iso88592', 'iso-8859-2'],
+ ['iso_8859-2', 'iso-8859-2'],
+ ['iso_8859-2:1987', 'iso-8859-2'],
+ ['l2', 'iso-8859-2'],
+ ['latin2', 'iso-8859-2'],
+ ['csisolatin3', 'iso-8859-3'],
+ ['iso-8859-3', 'iso-8859-3'],
+ ['iso-ir-109', 'iso-8859-3'],
+ ['iso8859-3', 'iso-8859-3'],
+ ['iso88593', 'iso-8859-3'],
+ ['iso_8859-3', 'iso-8859-3'],
+ ['iso_8859-3:1988', 'iso-8859-3'],
+ ['l3', 'iso-8859-3'],
+ ['latin3', 'iso-8859-3'],
+ ['csisolatin4', 'iso-8859-4'],
+ ['iso-8859-4', 'iso-8859-4'],
+ ['iso-ir-110', 'iso-8859-4'],
+ ['iso8859-4', 'iso-8859-4'],
+ ['iso88594', 'iso-8859-4'],
+ ['iso_8859-4', 'iso-8859-4'],
+ ['iso_8859-4:1988', 'iso-8859-4'],
+ ['l4', 'iso-8859-4'],
+ ['latin4', 'iso-8859-4'],
+ ['csisolatincyrillic', 'iso-8859-5'],
+ ['cyrillic', 'iso-8859-5'],
+ ['iso-8859-5', 'iso-8859-5'],
+ ['iso-ir-144', 'iso-8859-5'],
+ ['iso8859-5', 'iso-8859-5'],
+ ['iso88595', 'iso-8859-5'],
+ ['iso_8859-5', 'iso-8859-5'],
+ ['iso_8859-5:1988', 'iso-8859-5'],
+ ['arabic', 'iso-8859-6'],
+ ['asmo-708', 'iso-8859-6'],
+ ['csiso88596e', 'iso-8859-6'],
+ ['csiso88596i', 'iso-8859-6'],
+ ['csisolatinarabic', 'iso-8859-6'],
+ ['ecma-114', 'iso-8859-6'],
+ ['iso-8859-6', 'iso-8859-6'],
+ ['iso-8859-6-e', 'iso-8859-6'],
+ ['iso-8859-6-i', 'iso-8859-6'],
+ ['iso-ir-127', 'iso-8859-6'],
+ ['iso8859-6', 'iso-8859-6'],
+ ['iso88596', 'iso-8859-6'],
+ ['iso_8859-6', 'iso-8859-6'],
+ ['iso_8859-6:1987', 'iso-8859-6'],
+ ['csisolatingreek', 'iso-8859-7'],
+ ['ecma-118', 'iso-8859-7'],
+ ['elot_928', 'iso-8859-7'],
+ ['greek', 'iso-8859-7'],
+ ['greek8', 'iso-8859-7'],
+ ['iso-8859-7', 'iso-8859-7'],
+ ['iso-ir-126', 'iso-8859-7'],
+ ['iso8859-7', 'iso-8859-7'],
+ ['iso88597', 'iso-8859-7'],
+ ['iso_8859-7', 'iso-8859-7'],
+ ['iso_8859-7:1987', 'iso-8859-7'],
+ ['sun_eu_greek', 'iso-8859-7'],
+ ['csiso88598e', 'iso-8859-8'],
+ ['csisolatinhebrew', 'iso-8859-8'],
+ ['hebrew', 'iso-8859-8'],
+ ['iso-8859-8', 'iso-8859-8'],
+ ['iso-8859-8-e', 'iso-8859-8'],
+ ['iso-ir-138', 'iso-8859-8'],
+ ['iso8859-8', 'iso-8859-8'],
+ ['iso88598', 'iso-8859-8'],
+ ['iso_8859-8', 'iso-8859-8'],
+ ['iso_8859-8:1988', 'iso-8859-8'],
+ ['visual', 'iso-8859-8'],
+ ['csiso88598i', 'iso-8859-8-i'],
+ ['iso-8859-8-i', 'iso-8859-8-i'],
+ ['logical', 'iso-8859-8-i'],
+ ['csisolatin6', 'iso-8859-10'],
+ ['iso-8859-10', 'iso-8859-10'],
+ ['iso-ir-157', 'iso-8859-10'],
+ ['iso8859-10', 'iso-8859-10'],
+ ['iso885910', 'iso-8859-10'],
+ ['l6', 'iso-8859-10'],
+ ['latin6', 'iso-8859-10'],
+ ['iso-8859-13', 'iso-8859-13'],
+ ['iso8859-13', 'iso-8859-13'],
+ ['iso885913', 'iso-8859-13'],
+ ['iso-8859-14', 'iso-8859-14'],
+ ['iso8859-14', 'iso-8859-14'],
+ ['iso885914', 'iso-8859-14'],
+ ['csisolatin9', 'iso-8859-15'],
+ ['iso-8859-15', 'iso-8859-15'],
+ ['iso8859-15', 'iso-8859-15'],
+ ['iso885915', 'iso-8859-15'],
+ ['iso_8859-15', 'iso-8859-15'],
+ ['l9', 'iso-8859-15'],
+ ['cskoi8r', 'koi8-r'],
+ ['koi', 'koi8-r'],
+ ['koi8', 'koi8-r'],
+ ['koi8-r', 'koi8-r'],
+ ['koi8_r', 'koi8-r'],
+ ['koi8-ru', 'koi8-u'],
+ ['koi8-u', 'koi8-u'],
+ ['csmacintosh', 'macintosh'],
+ ['mac', 'macintosh'],
+ ['macintosh', 'macintosh'],
+ ['x-mac-roman', 'macintosh'],
+ ['dos-874', 'windows-874'],
+ ['iso-8859-11', 'windows-874'],
+ ['iso8859-11', 'windows-874'],
+ ['iso885911', 'windows-874'],
+ ['tis-620', 'windows-874'],
+ ['windows-874', 'windows-874'],
+ ['cp1250', 'windows-1250'],
+ ['windows-1250', 'windows-1250'],
+ ['x-cp1250', 'windows-1250'],
+ ['cp1251', 'windows-1251'],
+ ['windows-1251', 'windows-1251'],
+ ['x-cp1251', 'windows-1251'],
+ ['ansi_x3.4-1968', 'windows-1252'],
+ ['ascii', 'windows-1252'],
+ ['cp1252', 'windows-1252'],
+ ['cp819', 'windows-1252'],
+ ['csisolatin1', 'windows-1252'],
+ ['ibm819', 'windows-1252'],
+ ['iso-8859-1', 'windows-1252'],
+ ['iso-ir-100', 'windows-1252'],
+ ['iso8859-1', 'windows-1252'],
+ ['iso88591', 'windows-1252'],
+ ['iso_8859-1', 'windows-1252'],
+ ['iso_8859-1:1987', 'windows-1252'],
+ ['l1', 'windows-1252'],
+ ['latin1', 'windows-1252'],
+ ['us-ascii', 'windows-1252'],
+ ['windows-1252', 'windows-1252'],
+ ['x-cp1252', 'windows-1252'],
+ ['cp1253', 'windows-1253'],
+ ['windows-1253', 'windows-1253'],
+ ['x-cp1253', 'windows-1253'],
+ ['cp1254', 'windows-1254'],
+ ['csisolatin5', 'windows-1254'],
+ ['iso-8859-9', 'windows-1254'],
+ ['iso-ir-148', 'windows-1254'],
+ ['iso8859-9', 'windows-1254'],
+ ['iso88599', 'windows-1254'],
+ ['iso_8859-9', 'windows-1254'],
+ ['iso_8859-9:1989', 'windows-1254'],
+ ['l5', 'windows-1254'],
+ ['latin5', 'windows-1254'],
+ ['windows-1254', 'windows-1254'],
+ ['x-cp1254', 'windows-1254'],
+ ['cp1255', 'windows-1255'],
+ ['windows-1255', 'windows-1255'],
+ ['x-cp1255', 'windows-1255'],
+ ['cp1256', 'windows-1256'],
+ ['windows-1256', 'windows-1256'],
+ ['x-cp1256', 'windows-1256'],
+ ['cp1257', 'windows-1257'],
+ ['windows-1257', 'windows-1257'],
+ ['x-cp1257', 'windows-1257'],
+ ['cp1258', 'windows-1258'],
+ ['windows-1258', 'windows-1258'],
+ ['x-cp1258', 'windows-1258'],
+ ['x-mac-cyrillic', 'x-mac-cyrillic'],
+ ['x-mac-ukrainian', 'x-mac-cyrillic'],
+ ['chinese', 'gbk'],
+ ['csgb2312', 'gbk'],
+ ['csiso58gb231280', 'gbk'],
+ ['gb2312', 'gbk'],
+ ['gb_2312', 'gbk'],
+ ['gb_2312-80', 'gbk'],
+ ['gbk', 'gbk'],
+ ['iso-ir-58', 'gbk'],
+ ['x-gbk', 'gbk'],
+ ['gb18030', 'gb18030'],
+ ['big5', 'big5'],
+ ['big5-hkscs', 'big5'],
+ ['cn-big5', 'big5'],
+ ['csbig5', 'big5'],
+ ['x-x-big5', 'big5'],
+ ['cseucpkdfmtjapanese', 'euc-jp'],
+ ['euc-jp', 'euc-jp'],
+ ['x-euc-jp', 'euc-jp'],
+ ['csiso2022jp', 'iso-2022-jp'],
+ ['iso-2022-jp', 'iso-2022-jp'],
+ ['csshiftjis', 'shift_jis'],
+ ['ms932', 'shift_jis'],
+ ['ms_kanji', 'shift_jis'],
+ ['shift-jis', 'shift_jis'],
+ ['shift_jis', 'shift_jis'],
+ ['sjis', 'shift_jis'],
+ ['windows-31j', 'shift_jis'],
+ ['x-sjis', 'shift_jis'],
+ ['cseuckr', 'euc-kr'],
+ ['csksc56011987', 'euc-kr'],
+ ['euc-kr', 'euc-kr'],
+ ['iso-ir-149', 'euc-kr'],
+ ['korean', 'euc-kr'],
+ ['ks_c_5601-1987', 'euc-kr'],
+ ['ks_c_5601-1989', 'euc-kr'],
+ ['ksc5601', 'euc-kr'],
+ ['ksc_5601', 'euc-kr'],
+ ['windows-949', 'euc-kr'],
+ ['utf-16be', 'utf-16be'],
+ ['utf-16le', 'utf-16le'],
+ ['utf-16', 'utf-16le']
+]);
+
+// Unfortunately, String.prototype.trim also removes non-ascii whitespace,
+// so we have to do this manually
+function trimAsciiWhitespace(label) {
+ var s = 0;
+ var e = label.length;
+ while (s < e && (
+ label[s] === '\u0009' ||
+ label[s] === '\u000a' ||
+ label[s] === '\u000c' ||
+ label[s] === '\u000d' ||
+ label[s] === '\u0020')) {
+ s++;
+ }
+ while (e > s && (
+ label[e - 1] === '\u0009' ||
+ label[e - 1] === '\u000a' ||
+ label[e - 1] === '\u000c' ||
+ label[e - 1] === '\u000d' ||
+ label[e - 1] === '\u0020')) {
+ e--;
+ }
+ return label.slice(s, e);
+}
+
+function getEncodingFromLabel(label) {
+ const enc = encodings.get(label);
+ if (enc !== undefined) return enc;
+ return encodings.get(trimAsciiWhitespace(label.toLowerCase()));
+}
+
+function hasTextDecoder(encoding = 'utf-8') {
+ if (typeof encoding !== 'string')
+ throw new errors.Error('ERR_INVALID_ARG_TYPE', 'encoding', 'string');
+ return hasConverter(getEncodingFromLabel(encoding));
+}
+
+var Buffer;
+function lazyBuffer() {
+ if (Buffer === undefined)
+ Buffer = require('buffer').Buffer;
+ return Buffer;
+}
+
+class TextDecoder {
+ constructor(encoding = 'utf-8', options = {}) {
+ if (!warned) {
+ warned = true;
+ process.emitWarning(experimental, 'ExperimentalWarning');
+ }
+
+ encoding = `${encoding}`;
+ if (typeof options !== 'object')
+ throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
+
+ const enc = getEncodingFromLabel(encoding);
+ if (enc === undefined)
+ throw new errors.RangeError('ERR_ENCODING_NOT_SUPPORTED', encoding);
+
+ var flags = 0;
+ if (options !== null) {
+ flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
+ flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
+ }
+
+ const handle = getConverter(enc, flags);
+ if (handle === undefined)
+ throw new errors.Error('ERR_ENCODING_NOT_SUPPORTED', encoding);
+
+ this[kHandle] = handle;
+ this[kFlags] = flags;
+ this[kEncoding] = enc;
+ }
+
+ get encoding() {
+ if (this == null || this[kDecoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder');
+ return this[kEncoding];
+ }
+
+ get fatal() {
+ if (this == null || this[kDecoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder');
+ return (this[kFlags] & CONVERTER_FLAGS_FATAL) === CONVERTER_FLAGS_FATAL;
+ }
+
+ get ignoreBOM() {
+ if (this == null || this[kDecoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder');
+ return (this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM) ===
+ CONVERTER_FLAGS_IGNORE_BOM;
+ }
+
+ decode(input = empty, options = {}) {
+ if (this == null || this[kDecoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder');
+ if (isArrayBuffer(input)) {
+ input = lazyBuffer().from(input);
+ } else if (!ArrayBuffer.isView(input)) {
+ throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'input',
+ ['ArrayBuffer', 'ArrayBufferView']);
+ }
+ if (typeof options !== 'object') {
+ throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'options', 'object');
+ }
+
+ var flags = 0;
+ if (options !== null)
+ flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
+
+ const ret = _decode(this[kHandle], input, flags);
+ if (typeof ret === 'number') {
+ const err = new errors.TypeError('ERR_ENCODING_INVALID_ENCODED_DATA',
+ this.encoding);
+ err.errno = ret;
+ throw err;
+ }
+ return ret.toString('ucs2');
+ }
+
+ [inspect](depth, opts) {
+ if (this == null || this[kDecoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder');
+ if (typeof depth === 'number' && depth < 0)
+ return opts.stylize('[Object]', 'special');
+ var ctor = getConstructorOf(this);
+ var obj = Object.create({
+ constructor: ctor === null ? TextDecoder : ctor
+ });
+ obj.encoding = this.encoding;
+ obj.fatal = this.fatal;
+ obj.ignoreBOM = this.ignoreBOM;
+ if (opts.showHidden) {
+ obj[kFlags] = this[kFlags];
+ obj[kHandle] = this[kHandle];
+ }
+ // Lazy to avoid circular dependency
+ return require('util').inspect(obj, opts);
+ }
+}
+
+class TextEncoder {
+ constructor() {
+ if (!warned) {
+ warned = true;
+ process.emitWarning(experimental, 'ExperimentalWarning');
+ }
+ }
+
+ get encoding() {
+ if (this == null || this[kEncoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextEncoder');
+ return 'utf-8';
+ }
+
+ encode(input = '') {
+ if (this == null || this[kEncoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextEncoder');
+ return encodeUtf8String(`${input}`);
+ }
+
+ [inspect](depth, opts) {
+ if (this == null || this[kEncoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextEncoder');
+ if (typeof depth === 'number' && depth < 0)
+ return opts.stylize('[Object]', 'special');
+ var ctor = getConstructorOf(this);
+ var obj = Object.create({
+ constructor: ctor === null ? TextEncoder : ctor
+ });
+ obj.encoding = this.encoding;
+ // Lazy to avoid circular dependency
+ return require('util').inspect(obj, opts);
+ }
+}
+
+Object.defineProperties(
+ TextDecoder.prototype, {
+ [kDecoder]: { enumerable: false, value: true, configurable: false },
+ 'decode': { enumerable: true },
+ 'encoding': { enumerable: true },
+ 'fatal': { enumerable: true },
+ 'ignoreBOM': { enumerable: true },
+ [Symbol.toStringTag]: {
+ configurable: true,
+ value: 'TextDecoder'
+ } });
+Object.defineProperties(
+ TextEncoder.prototype, {
+ [kEncoder]: { enumerable: false, value: true, configurable: false },
+ 'encode': { enumerable: true },
+ 'encoding': { enumerable: true },
+ [Symbol.toStringTag]: {
+ configurable: true,
+ value: 'TextEncoder'
+ } });
+
+module.exports = {
+ getEncodingFromLabel,
+ hasTextDecoder,
+ TextDecoder,
+ TextEncoder
+};
diff --git a/lib/internal/errors.js b/lib/internal/errors.js
index aa5e4ca1b3..df26230a6f 100644
--- a/lib/internal/errors.js
+++ b/lib/internal/errors.js
@@ -109,6 +109,10 @@ E('ERR_CPU_USAGE', 'Unable to obtain cpu usage %s');
E('ERR_DNS_SET_SERVERS_FAILED', (err, servers) =>
`c-ares failed to set servers: "${err}" [${servers}]`);
E('ERR_FALSY_VALUE_REJECTION', 'Promise was rejected with falsy value');
+E('ERR_ENCODING_NOT_SUPPORTED',
+ (enc) => `The "${enc}" encoding is not supported`);
+E('ERR_ENCODING_INVALID_ENCODED_DATA',
+ (enc) => `The encoded data was not valid for encoding ${enc}`);
E('ERR_HTTP_HEADERS_SENT',
'Cannot render headers after they are sent to the client');
E('ERR_HTTP_INVALID_STATUS_CODE', 'Invalid status code: %s');
diff --git a/lib/util.js b/lib/util.js
index ad58f9ef68..86be8612b9 100644
--- a/lib/util.js
+++ b/lib/util.js
@@ -22,6 +22,7 @@
'use strict';
const errors = require('internal/errors');
+const { TextDecoder, TextEncoder } = require('internal/encoding');
const { errname } = process.binding('uv');
@@ -1125,6 +1126,8 @@ module.exports = exports = {
isPrimitive,
log,
promisify,
+ TextDecoder,
+ TextEncoder,
// Deprecated Old Stuff
debug: deprecate(debug,
diff --git a/node.gyp b/node.gyp
index b0e4676a96..31b4191690 100644
--- a/node.gyp
+++ b/node.gyp
@@ -82,6 +82,7 @@
'lib/internal/cluster/shared_handle.js',
'lib/internal/cluster/utils.js',
'lib/internal/cluster/worker.js',
+ 'lib/internal/encoding.js',
'lib/internal/errors.js',
'lib/internal/freelist.js',
'lib/internal/fs.js',
diff --git a/src/node_buffer.cc b/src/node_buffer.cc
index d05858ecbd..b3f5793f89 100644
--- a/src/node_buffer.cc
+++ b/src/node_buffer.cc
@@ -1200,6 +1200,27 @@ void Swap64(const FunctionCallbackInfo<Value>& args) {
}
+// Encode a single string to a UTF-8 Uint8Array (not Buffer).
+// Used in TextEncoder.prototype.encode.
+static void EncodeUtf8String(const FunctionCallbackInfo<Value>& args) {
+ Environment* env = Environment::GetCurrent(args);
+ CHECK_GE(args.Length(), 1);
+ CHECK(args[0]->IsString());
+
+ Local<String> str = args[0].As<String>();
+ size_t length = str->Utf8Length();
+ char* data = node::UncheckedMalloc(length);
+ str->WriteUtf8(data,
+ -1, // We are certain that `data` is sufficiently large
+ NULL,
+ String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8);
+ auto array_buf = ArrayBuffer::New(env->isolate(), data, length,
+ ArrayBufferCreationMode::kInternalized);
+ auto array = Uint8Array::New(array_buf, 0, length);
+ args.GetReturnValue().Set(array);
+}
+
+
// pass Buffer object to load prototype methods
void SetupBufferJS(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
@@ -1266,6 +1287,8 @@ void Initialize(Local<Object> target,
env->SetMethod(target, "swap32", Swap32);
env->SetMethod(target, "swap64", Swap64);
+ env->SetMethod(target, "encodeUtf8String", EncodeUtf8String);
+
target->Set(env->context(),
FIXED_ONE_BYTE_STRING(env->isolate(), "kMaxLength"),
Integer::NewFromUnsigned(env->isolate(), kMaxLength)).FromJust();
diff --git a/src/node_i18n.cc b/src/node_i18n.cc
index 3b33744949..2e1aeaa4cb 100644
--- a/src/node_i18n.cc
+++ b/src/node_i18n.cc
@@ -50,6 +50,8 @@
#include "env-inl.h"
#include "util.h"
#include "util-inl.h"
+#include "base-object.h"
+#include "base-object-inl.h"
#include "v8.h"
#include <unicode/utypes.h>
@@ -86,10 +88,12 @@ namespace node {
using v8::Context;
using v8::FunctionCallbackInfo;
+using v8::HandleScope;
using v8::Isolate;
using v8::Local;
using v8::MaybeLocal;
using v8::Object;
+using v8::ObjectTemplate;
using v8::String;
using v8::Value;
@@ -123,6 +127,15 @@ struct Converter {
}
}
+ explicit Converter(UConverter* converter,
+ const char* sub = NULL) : conv(converter) {
+ CHECK_NE(conv, nullptr);
+ UErrorCode status = U_ZERO_ERROR;
+ if (sub != NULL) {
+ ucnv_setSubstChars(conv, sub, strlen(sub), &status);
+ }
+ }
+
~Converter() {
ucnv_close(conv);
}
@@ -130,6 +143,143 @@ struct Converter {
UConverter* conv;
};
+class ConverterObject : public BaseObject, Converter {
+ public:
+ enum ConverterFlags {
+ CONVERTER_FLAGS_FLUSH = 0x1,
+ CONVERTER_FLAGS_FATAL = 0x2,
+ CONVERTER_FLAGS_IGNORE_BOM = 0x4
+ };
+
+ ~ConverterObject() override {}
+
+ static void Has(const FunctionCallbackInfo<Value>& args) {
+ Environment* env = Environment::GetCurrent(args);
+ HandleScope scope(env->isolate());
+
+ CHECK_GE(args.Length(), 1);
+ Utf8Value label(env->isolate(), args[0]);
+
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter* conv = ucnv_open(*label, &status);
+ args.GetReturnValue().Set(!!U_SUCCESS(status));
+ ucnv_close(conv);
+ }
+
+ static void Create(const FunctionCallbackInfo<Value>& args) {
+ Environment* env = Environment::GetCurrent(args);
+ HandleScope scope(env->isolate());
+
+ CHECK_GE(args.Length(), 2);
+ Utf8Value label(env->isolate(), args[0]);
+ int flags = args[1]->Uint32Value(env->context()).ToChecked();
+ bool fatal =
+ (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL;
+ bool ignoreBOM =
+ (flags & CONVERTER_FLAGS_IGNORE_BOM) == CONVERTER_FLAGS_IGNORE_BOM;
+
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter* conv = ucnv_open(*label, &status);
+ if (U_FAILURE(status))
+ return;
+
+ if (fatal) {
+ status = U_ZERO_ERROR;
+ ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP,
+ nullptr, nullptr, nullptr, &status);
+ }
+
+ Local<ObjectTemplate> t = ObjectTemplate::New(env->isolate());
+ t->SetInternalFieldCount(1);
+ Local<Object> obj = t->NewInstance(env->context()).ToLocalChecked();
+ new ConverterObject(env, obj, conv, ignoreBOM);
+ args.GetReturnValue().Set(obj);
+ }
+
+ static void Decode(const FunctionCallbackInfo<Value>& args) {
+ Environment* env = Environment::GetCurrent(args);
+
+ CHECK_GE(args.Length(), 3); // Converter, Buffer, Flags
+
+ Converter utf8("utf8");
+ ConverterObject* converter;
+ ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As<Object>());
+ SPREAD_BUFFER_ARG(args[1], input_obj);
+ int flags = args[2]->Uint32Value(env->context()).ToChecked();
+
+ UErrorCode status = U_ZERO_ERROR;
+ MaybeStackBuffer<UChar> result;
+ MaybeLocal<Object> ret;
+ size_t limit = ucnv_getMinCharSize(converter->conv) *
+ input_obj_length;
+ if (limit > 0)
+ result.AllocateSufficientStorage(limit);
+
+ UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH;
+
+ const char* source = input_obj_data;
+ size_t source_length = input_obj_length;
+
+ if (converter->unicode_ && !converter->ignoreBOM_ && !converter->bomSeen_) {
+ int32_t bomOffset = 0;
+ ucnv_detectUnicodeSignature(source, source_length, &bomOffset, &status);
+ source += bomOffset;
+ source_length -= bomOffset;
+ converter->bomSeen_ = true;
+ }
+
+ UChar* target = *result;
+ ucnv_toUnicode(converter->conv,
+ &target, target + (limit * sizeof(UChar)),
+ &source, source + source_length,
+ NULL, flush, &status);
+
+ if (U_SUCCESS(status)) {
+ if (limit > 0)
+ result.SetLength(target - &result[0]);
+ ret = ToBufferEndian(env, &result);
+ args.GetReturnValue().Set(ret.ToLocalChecked());
+ goto reset;
+ }
+
+ args.GetReturnValue().Set(status);
+
+ reset:
+ if (flush) {
+ // Reset the converter state
+ converter->bomSeen_ = false;
+ ucnv_reset(converter->conv);
+ }
+ }
+
+ protected:
+ ConverterObject(Environment* env,
+ v8::Local<v8::Object> wrap,
+ UConverter* converter,
+ bool ignoreBOM,
+ const char* sub = NULL) :
+ BaseObject(env, wrap),
+ Converter(converter, sub),
+ ignoreBOM_(ignoreBOM) {
+ MakeWeak<ConverterObject>(this);
+
+ switch (ucnv_getType(converter)) {
+ case UCNV_UTF8:
+ case UCNV_UTF16_BigEndian:
+ case UCNV_UTF16_LittleEndian:
+ unicode_ = true;
+ break;
+ default:
+ unicode_ = false;
+ }
+ }
+
+ private:
+ bool unicode_ = false; // True if this is a Unicode converter
+ bool ignoreBOM_ = false; // True if the BOM should be ignored on Unicode
+ bool bomSeen_ = false; // True if the BOM has been seen
+};
+
// One-Shot Converters
void CopySourceBuffer(MaybeStackBuffer<UChar>* dest,
@@ -717,6 +867,11 @@ void Init(Local<Object> target,
// One-shot converters
env->SetMethod(target, "icuErrName", ICUErrorName);
env->SetMethod(target, "transcode", Transcode);
+
+ // ConverterObject
+ env->SetMethod(target, "getConverter", ConverterObject::Create);
+ env->SetMethod(target, "decode", ConverterObject::Decode);
+ env->SetMethod(target, "hasConverter", ConverterObject::Has);
}
} // namespace i18n
diff --git a/src/node_i18n.h b/src/node_i18n.h
index adf9feb414..f7801ce666 100644
--- a/src/node_i18n.h
+++ b/src/node_i18n.h
@@ -25,6 +25,7 @@
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
#include "node.h"
+#include <unicode/ucnv.h>
#include <string>
#if defined(NODE_HAVE_I18N_SUPPORT)
diff --git a/src/node_util.cc b/src/node_util.cc
index 50de94bfb2..c1dff77386 100644
--- a/src/node_util.cc
+++ b/src/node_util.cc
@@ -21,6 +21,7 @@ using v8::Value;
#define VALUE_METHOD_MAP(V) \
+ V(isArrayBuffer, IsArrayBuffer) \
V(isAsyncFunction, IsAsyncFunction) \
V(isDataView, IsDataView) \
V(isDate, IsDate) \
diff --git a/test/parallel/test-whatwg-encoding.js b/test/parallel/test-whatwg-encoding.js
new file mode 100644
index 0000000000..c181df860c
--- /dev/null
+++ b/test/parallel/test-whatwg-encoding.js
@@ -0,0 +1,385 @@
+// Flags: --expose-internals
+'use strict';
+
+const common = require('../common');
+const assert = require('assert');
+const { TextEncoder, TextDecoder } = require('util');
+const { customInspectSymbol: inspect } = require('internal/util');
+const { getEncodingFromLabel } = require('internal/encoding');
+
+const encoded = Buffer.from([0xef, 0xbb, 0xbf, 0x74, 0x65,
+ 0x73, 0x74, 0xe2, 0x82, 0xac]);
+
+if (!common.hasIntl) {
+ common.skip('WHATWG Encoding tests because ICU is not present.');
+}
+
+// Make Sure TextDecoder and TextEncoder exist
+assert(TextDecoder);
+assert(TextEncoder);
+
+// Test TextEncoder
+const enc = new TextEncoder();
+assert(enc);
+const buf = enc.encode('\ufefftest€');
+
+assert.strictEqual(Buffer.compare(buf, encoded), 0);
+
+
+// Test TextDecoder, UTF-8, fatal: false, ignoreBOM: false
+{
+ ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
+ const dec = new TextDecoder(i);
+ const res = dec.decode(buf);
+ assert.strictEqual(res, 'test€');
+ });
+
+ ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
+ const dec = new TextDecoder(i);
+ let res = '';
+ res += dec.decode(buf.slice(0, 8), { stream: true });
+ res += dec.decode(buf.slice(8));
+ assert.strictEqual(res, 'test€');
+ });
+}
+
+// Test TextDecoder, UTF-8, fatal: false, ignoreBOM: true
+{
+ ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
+ const dec = new TextDecoder(i, { ignoreBOM: true });
+ const res = dec.decode(buf);
+ assert.strictEqual(res, '\ufefftest€');
+ });
+
+ ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
+ const dec = new TextDecoder(i, { ignoreBOM: true });
+ let res = '';
+ res += dec.decode(buf.slice(0, 8), { stream: true });
+ res += dec.decode(buf.slice(8));
+ assert.strictEqual(res, '\ufefftest€');
+ });
+}
+
+// Test TextDecoder, UTF-8, fatal: true, ignoreBOM: false
+{
+ ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
+ const dec = new TextDecoder(i, { fatal: true });
+ assert.throws(() => dec.decode(buf.slice(0, 8)),
+ common.expectsError({
+ code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
+ type: TypeError,
+ message:
+ /^The encoded data was not valid for encoding utf-8$/
+ }));
+ });
+
+ ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
+ const dec = new TextDecoder(i, { fatal: true });
+ assert.doesNotThrow(() => dec.decode(buf.slice(0, 8), { stream: true }));
+ assert.doesNotThrow(() => dec.decode(buf.slice(8)));
+ });
+}
+
+// Test TextDecoder, UTF-16le
+{
+ const dec = new TextDecoder('utf-16le');
+ const res = dec.decode(Buffer.from('test€', 'utf-16le'));
+ assert.strictEqual(res, 'test€');
+}
+
+// Test TextDecoder, UTF-16be
+{
+ const dec = new TextDecoder('utf-16be');
+ const res = dec.decode(Buffer.from([0x00, 0x74, 0x00, 0x65, 0x00,
+ 0x73, 0x00, 0x74, 0x20, 0xac]));
+ assert.strictEqual(res, 'test€');
+}
+
+{
+ const fn = TextDecoder.prototype[inspect];
+ fn.call(new TextDecoder(), Infinity, {});
+
+ [{}, [], true, 1, '', new TextEncoder()].forEach((i) => {
+ assert.throws(() => fn.call(i, Infinity, {}),
+ common.expectsError({
+ code: 'ERR_INVALID_THIS',
+ message: 'Value of "this" must be of type TextDecoder'
+ }));
+ });
+}
+
+{
+ const fn = TextEncoder.prototype[inspect];
+ fn.call(new TextEncoder(), Infinity, {});
+
+ [{}, [], true, 1, '', new TextDecoder()].forEach((i) => {
+ assert.throws(() => fn.call(i, Infinity, {}),
+ common.expectsError({
+ code: 'ERR_INVALID_THIS',
+ message: 'Value of "this" must be of type TextEncoder'
+ }));
+ });
+}
+
+// Test Encoding Mappings
+{
+
+ const mappings = {
+ 'utf-8': [
+ 'unicode-1-1-utf-8',
+ 'utf8'
+ ],
+ 'utf-16be': [],
+ 'utf-16le': [
+ 'utf-16'
+ ],
+ 'ibm866': [
+ '866',
+ 'cp866',
+ 'csibm866'
+ ],
+ 'iso-8859-2': [
+ 'csisolatin2',
+ 'iso-ir-101',
+ 'iso8859-2',
+ 'iso88592',
+ 'iso_8859-2',
+ 'iso_8859-2:1987',
+ 'l2',
+ 'latin2'
+ ],
+ 'iso-8859-3': [
+ 'csisolatin3',
+ 'iso-ir-109',
+ 'iso8859-3',
+ 'iso88593',
+ 'iso_8859-3',
+ 'iso_8859-3:1988',
+ 'l3',
+ 'latin3'
+ ],
+ 'iso-8859-4': [
+ 'csisolatin4',
+ 'iso-ir-110',
+ 'iso8859-4',
+ 'iso88594',
+ 'iso_8859-4',
+ 'iso_8859-4:1988',
+ 'l4',
+ 'latin4'
+ ],
+ 'iso-8859-5': [
+ 'csisolatincyrillic',
+ 'cyrillic',
+ 'iso-ir-144',
+ 'iso8859-5',
+ 'iso88595',
+ 'iso_8859-5',
+ 'iso_8859-5:1988'
+ ],
+ 'iso-8859-6': [
+ 'arabic',
+ 'asmo-708',
+ 'csiso88596e',
+ 'csiso88596i',
+ 'csisolatinarabic',
+ 'ecma-114',
+ 'iso-8859-6-e',
+ 'iso-8859-6-i',
+ 'iso-ir-127',
+ 'iso8859-6',
+ 'iso88596',
+ 'iso_8859-6',
+ 'iso_8859-6:1987'
+ ],
+ 'iso-8859-7': [
+ 'csisolatingreek',
+ 'ecma-118',
+ 'elot_928',
+ 'greek',
+ 'greek8',
+ 'iso-ir-126',
+ 'iso8859-7',
+ 'iso88597',
+ 'iso_8859-7',
+ 'iso_8859-7:1987',
+ 'sun_eu_greek'
+ ],
+ 'iso-8859-8': [
+ 'csiso88598e',
+ 'csisolatinhebrew',
+ 'hebrew',
+ 'iso-8859-8-e',
+ 'iso-ir-138',
+ 'iso8859-8',
+ 'iso88598',
+ 'iso_8859-8',
+ 'iso_8859-8:1988',
+ 'visual'
+ ],
+ 'iso-8859-8-i': [
+ 'csiso88598i',
+ 'logical'
+ ],
+ 'iso-8859-10': [
+ 'csisolatin6',
+ 'iso-ir-157',
+ 'iso8859-10',
+ 'iso885910',
+ 'l6',
+ 'latin6'
+ ],
+ 'iso-8859-13': [
+ 'iso8859-13',
+ 'iso885913'
+ ],
+ 'iso-8859-14': [
+ 'iso8859-14',
+ 'iso885914'
+ ],
+ 'iso-8859-15': [
+ 'csisolatin9',
+ 'iso8859-15',
+ 'iso885915',
+ 'iso_8859-15',
+ 'l9'
+ ],
+ 'koi8-r': [
+ 'cskoi8r',
+ 'koi',
+ 'koi8',
+ 'koi8_r'
+ ],
+ 'koi8-u': [
+ 'koi8-ru'
+ ],
+ 'macintosh': [
+ 'csmacintosh',
+ 'mac',
+ 'x-mac-roman'
+ ],
+ 'windows-874': [
+ 'dos-874',
+ 'iso-8859-11',
+ 'iso8859-11',
+ 'iso885911',
+ 'tis-620'
+ ],
+ 'windows-1250': [
+ 'cp1250',
+ 'x-cp1250'
+ ],
+ 'windows-1251': [
+ 'cp1251',
+ 'x-cp1251'
+ ],
+ 'windows-1252': [
+ 'ansi_x3.4-1968',
+ 'ascii',
+ 'cp1252',
+ 'cp819',
+ 'csisolatin1',
+ 'ibm819',
+ 'iso-8859-1',
+ 'iso-ir-100',
+ 'iso8859-1',
+ 'iso88591',
+ 'iso_8859-1',
+ 'iso_8859-1:1987',
+ 'l1',
+ 'latin1',
+ 'us-ascii',
+ 'x-cp1252'
+ ],
+ 'windows-1253': [
+ 'cp1253',
+ 'x-cp1253'
+ ],
+ 'windows-1254': [
+ 'cp1254',
+ 'csisolatin5',
+ 'iso-8859-9',
+ 'iso-ir-148',
+ 'iso8859-9',
+ 'iso88599',
+ 'iso_8859-9',
+ 'iso_8859-9:1989',
+ 'l5',
+ 'latin5',
+ 'x-cp1254'
+ ],
+ 'windows-1255': [
+ 'cp1255',
+ 'x-cp1255'
+ ],
+ 'windows-1256': [
+ 'cp1256',
+ 'x-cp1256'
+ ],
+ 'windows-1257': [
+ 'cp1257',
+ 'x-cp1257'
+ ],
+ 'windows-1258': [
+ 'cp1258',
+ 'x-cp1258'
+ ],
+ 'x-mac-cyrillic': [
+ 'x-mac-ukrainian'
+ ],
+ 'gbk': [
+ 'chinese',
+ 'csgb2312',
+ 'csiso58gb231280',
+ 'gb2312',
+ 'gb_2312',
+ 'gb_2312-80',
+ 'iso-ir-58',
+ 'x-gbk'
+ ],
+ 'gb18030': [ ],
+ 'big5': [
+ 'big5-hkscs',
+ 'cn-big5',
+ 'csbig5',
+ 'x-x-big5'
+ ],
+ 'euc-jp': [
+ 'cseucpkdfmtjapanese',
+ 'x-euc-jp'
+ ],
+ 'iso-2022-jp': [
+ 'csiso2022jp'
+ ],
+ 'shift_jis': [
+ 'csshiftjis',
+ 'ms932',
+ 'ms_kanji',
+ 'shift-jis',
+ 'sjis',
+ 'windows-31j',
+ 'x-sjis'
+ ],
+ 'euc-kr': [
+ ' euc-kr \t',
+ 'EUC-kr \n',
+ 'cseuckr',
+ 'csksc56011987',
+ 'iso-ir-149',
+ 'korean',
+ 'ks_c_5601-1987',
+ 'ks_c_5601-1989',
+ 'ksc5601',
+ 'ksc_5601',
+ 'windows-949'
+ ]
+ };
+ Object.entries(mappings).forEach((i) => {
+ const enc = i[0];
+ const labels = i[1];
+ assert.strictEqual(getEncodingFromLabel(enc), enc);
+ labels.forEach((l) => assert.strictEqual(getEncodingFromLabel(l), enc));
+ });
+
+ assert.strictEqual(getEncodingFromLabel('made-up'), undefined);
+}
diff --git a/tools/icu/icu-generic.gyp b/tools/icu/icu-generic.gyp
index 4c2125a043..93d7cd5f6d 100644
--- a/tools/icu/icu-generic.gyp
+++ b/tools/icu/icu-generic.gyp
@@ -30,15 +30,6 @@
'type': 'none',
'toolsets': [ 'host', 'target' ],
'direct_dependent_settings': {
- 'conditions': [
- [ 'icu_endianness == "l"', {
- 'defines': [
- # ICU cannot swap the initial data without this.
- # http://bugs.icu-project.org/trac/ticket/11046
- 'UCONFIG_NO_LEGACY_CONVERSION=1'
- ],
- }],
- ],
'defines': [
'UCONFIG_NO_SERVICE=1',
'UCONFIG_NO_REGULAR_EXPRESSIONS=1',