summaryrefslogtreecommitdiff
path: root/lib/internal/encoding.js
diff options
context:
space:
mode:
Diffstat (limited to 'lib/internal/encoding.js')
-rw-r--r--lib/internal/encoding.js458
1 files changed, 458 insertions, 0 deletions
diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js
new file mode 100644
index 0000000000..22ae5c6c0d
--- /dev/null
+++ b/lib/internal/encoding.js
@@ -0,0 +1,458 @@
+'use strict';
+
+// An implementation of the WHATWG Encoding Standard
+// https://encoding.spec.whatwg.org
+
+const errors = require('internal/errors');
+const kHandle = Symbol('handle');
+const kFlags = Symbol('flags');
+const kEncoding = Symbol('encoding');
+const kDecoder = Symbol('decoder');
+const kEncoder = Symbol('encoder');
+
+let warned = false;
+const experimental =
+ 'The WHATWG Encoding Standard implementation is an experimental API. It ' +
+ 'should not yet be used in production applications.';
+
+const {
+ getConstructorOf,
+ customInspectSymbol: inspect
+} = require('internal/util');
+
+const {
+ isArrayBuffer
+} = process.binding('util');
+
+const {
+ encodeUtf8String
+} = process.binding('buffer');
+
+const {
+ decode: _decode,
+ getConverter,
+ hasConverter
+} = process.binding('icu');
+
+const CONVERTER_FLAGS_FLUSH = 0x1;
+const CONVERTER_FLAGS_FATAL = 0x2;
+const CONVERTER_FLAGS_IGNORE_BOM = 0x4;
+
+const empty = new Uint8Array(0);
+
+const encodings = new Map([
+ ['unicode-1-1-utf-8', 'utf-8'],
+ ['utf8', 'utf-8'],
+ ['utf-8', 'utf-8'],
+ ['866', 'ibm866'],
+ ['cp866', 'ibm866'],
+ ['csibm866', 'ibm866'],
+ ['ibm866', 'ibm866'],
+ ['csisolatin2', 'iso-8859-2'],
+ ['iso-8859-2', 'iso-8859-2'],
+ ['iso-ir-101', 'iso-8859-2'],
+ ['iso8859-2', 'iso-8859-2'],
+ ['iso88592', 'iso-8859-2'],
+ ['iso_8859-2', 'iso-8859-2'],
+ ['iso_8859-2:1987', 'iso-8859-2'],
+ ['l2', 'iso-8859-2'],
+ ['latin2', 'iso-8859-2'],
+ ['csisolatin3', 'iso-8859-3'],
+ ['iso-8859-3', 'iso-8859-3'],
+ ['iso-ir-109', 'iso-8859-3'],
+ ['iso8859-3', 'iso-8859-3'],
+ ['iso88593', 'iso-8859-3'],
+ ['iso_8859-3', 'iso-8859-3'],
+ ['iso_8859-3:1988', 'iso-8859-3'],
+ ['l3', 'iso-8859-3'],
+ ['latin3', 'iso-8859-3'],
+ ['csisolatin4', 'iso-8859-4'],
+ ['iso-8859-4', 'iso-8859-4'],
+ ['iso-ir-110', 'iso-8859-4'],
+ ['iso8859-4', 'iso-8859-4'],
+ ['iso88594', 'iso-8859-4'],
+ ['iso_8859-4', 'iso-8859-4'],
+ ['iso_8859-4:1988', 'iso-8859-4'],
+ ['l4', 'iso-8859-4'],
+ ['latin4', 'iso-8859-4'],
+ ['csisolatincyrillic', 'iso-8859-5'],
+ ['cyrillic', 'iso-8859-5'],
+ ['iso-8859-5', 'iso-8859-5'],
+ ['iso-ir-144', 'iso-8859-5'],
+ ['iso8859-5', 'iso-8859-5'],
+ ['iso88595', 'iso-8859-5'],
+ ['iso_8859-5', 'iso-8859-5'],
+ ['iso_8859-5:1988', 'iso-8859-5'],
+ ['arabic', 'iso-8859-6'],
+ ['asmo-708', 'iso-8859-6'],
+ ['csiso88596e', 'iso-8859-6'],
+ ['csiso88596i', 'iso-8859-6'],
+ ['csisolatinarabic', 'iso-8859-6'],
+ ['ecma-114', 'iso-8859-6'],
+ ['iso-8859-6', 'iso-8859-6'],
+ ['iso-8859-6-e', 'iso-8859-6'],
+ ['iso-8859-6-i', 'iso-8859-6'],
+ ['iso-ir-127', 'iso-8859-6'],
+ ['iso8859-6', 'iso-8859-6'],
+ ['iso88596', 'iso-8859-6'],
+ ['iso_8859-6', 'iso-8859-6'],
+ ['iso_8859-6:1987', 'iso-8859-6'],
+ ['csisolatingreek', 'iso-8859-7'],
+ ['ecma-118', 'iso-8859-7'],
+ ['elot_928', 'iso-8859-7'],
+ ['greek', 'iso-8859-7'],
+ ['greek8', 'iso-8859-7'],
+ ['iso-8859-7', 'iso-8859-7'],
+ ['iso-ir-126', 'iso-8859-7'],
+ ['iso8859-7', 'iso-8859-7'],
+ ['iso88597', 'iso-8859-7'],
+ ['iso_8859-7', 'iso-8859-7'],
+ ['iso_8859-7:1987', 'iso-8859-7'],
+ ['sun_eu_greek', 'iso-8859-7'],
+ ['csiso88598e', 'iso-8859-8'],
+ ['csisolatinhebrew', 'iso-8859-8'],
+ ['hebrew', 'iso-8859-8'],
+ ['iso-8859-8', 'iso-8859-8'],
+ ['iso-8859-8-e', 'iso-8859-8'],
+ ['iso-ir-138', 'iso-8859-8'],
+ ['iso8859-8', 'iso-8859-8'],
+ ['iso88598', 'iso-8859-8'],
+ ['iso_8859-8', 'iso-8859-8'],
+ ['iso_8859-8:1988', 'iso-8859-8'],
+ ['visual', 'iso-8859-8'],
+ ['csiso88598i', 'iso-8859-8-i'],
+ ['iso-8859-8-i', 'iso-8859-8-i'],
+ ['logical', 'iso-8859-8-i'],
+ ['csisolatin6', 'iso-8859-10'],
+ ['iso-8859-10', 'iso-8859-10'],
+ ['iso-ir-157', 'iso-8859-10'],
+ ['iso8859-10', 'iso-8859-10'],
+ ['iso885910', 'iso-8859-10'],
+ ['l6', 'iso-8859-10'],
+ ['latin6', 'iso-8859-10'],
+ ['iso-8859-13', 'iso-8859-13'],
+ ['iso8859-13', 'iso-8859-13'],
+ ['iso885913', 'iso-8859-13'],
+ ['iso-8859-14', 'iso-8859-14'],
+ ['iso8859-14', 'iso-8859-14'],
+ ['iso885914', 'iso-8859-14'],
+ ['csisolatin9', 'iso-8859-15'],
+ ['iso-8859-15', 'iso-8859-15'],
+ ['iso8859-15', 'iso-8859-15'],
+ ['iso885915', 'iso-8859-15'],
+ ['iso_8859-15', 'iso-8859-15'],
+ ['l9', 'iso-8859-15'],
+ ['cskoi8r', 'koi8-r'],
+ ['koi', 'koi8-r'],
+ ['koi8', 'koi8-r'],
+ ['koi8-r', 'koi8-r'],
+ ['koi8_r', 'koi8-r'],
+ ['koi8-ru', 'koi8-u'],
+ ['koi8-u', 'koi8-u'],
+ ['csmacintosh', 'macintosh'],
+ ['mac', 'macintosh'],
+ ['macintosh', 'macintosh'],
+ ['x-mac-roman', 'macintosh'],
+ ['dos-874', 'windows-874'],
+ ['iso-8859-11', 'windows-874'],
+ ['iso8859-11', 'windows-874'],
+ ['iso885911', 'windows-874'],
+ ['tis-620', 'windows-874'],
+ ['windows-874', 'windows-874'],
+ ['cp1250', 'windows-1250'],
+ ['windows-1250', 'windows-1250'],
+ ['x-cp1250', 'windows-1250'],
+ ['cp1251', 'windows-1251'],
+ ['windows-1251', 'windows-1251'],
+ ['x-cp1251', 'windows-1251'],
+ ['ansi_x3.4-1968', 'windows-1252'],
+ ['ascii', 'windows-1252'],
+ ['cp1252', 'windows-1252'],
+ ['cp819', 'windows-1252'],
+ ['csisolatin1', 'windows-1252'],
+ ['ibm819', 'windows-1252'],
+ ['iso-8859-1', 'windows-1252'],
+ ['iso-ir-100', 'windows-1252'],
+ ['iso8859-1', 'windows-1252'],
+ ['iso88591', 'windows-1252'],
+ ['iso_8859-1', 'windows-1252'],
+ ['iso_8859-1:1987', 'windows-1252'],
+ ['l1', 'windows-1252'],
+ ['latin1', 'windows-1252'],
+ ['us-ascii', 'windows-1252'],
+ ['windows-1252', 'windows-1252'],
+ ['x-cp1252', 'windows-1252'],
+ ['cp1253', 'windows-1253'],
+ ['windows-1253', 'windows-1253'],
+ ['x-cp1253', 'windows-1253'],
+ ['cp1254', 'windows-1254'],
+ ['csisolatin5', 'windows-1254'],
+ ['iso-8859-9', 'windows-1254'],
+ ['iso-ir-148', 'windows-1254'],
+ ['iso8859-9', 'windows-1254'],
+ ['iso88599', 'windows-1254'],
+ ['iso_8859-9', 'windows-1254'],
+ ['iso_8859-9:1989', 'windows-1254'],
+ ['l5', 'windows-1254'],
+ ['latin5', 'windows-1254'],
+ ['windows-1254', 'windows-1254'],
+ ['x-cp1254', 'windows-1254'],
+ ['cp1255', 'windows-1255'],
+ ['windows-1255', 'windows-1255'],
+ ['x-cp1255', 'windows-1255'],
+ ['cp1256', 'windows-1256'],
+ ['windows-1256', 'windows-1256'],
+ ['x-cp1256', 'windows-1256'],
+ ['cp1257', 'windows-1257'],
+ ['windows-1257', 'windows-1257'],
+ ['x-cp1257', 'windows-1257'],
+ ['cp1258', 'windows-1258'],
+ ['windows-1258', 'windows-1258'],
+ ['x-cp1258', 'windows-1258'],
+ ['x-mac-cyrillic', 'x-mac-cyrillic'],
+ ['x-mac-ukrainian', 'x-mac-cyrillic'],
+ ['chinese', 'gbk'],
+ ['csgb2312', 'gbk'],
+ ['csiso58gb231280', 'gbk'],
+ ['gb2312', 'gbk'],
+ ['gb_2312', 'gbk'],
+ ['gb_2312-80', 'gbk'],
+ ['gbk', 'gbk'],
+ ['iso-ir-58', 'gbk'],
+ ['x-gbk', 'gbk'],
+ ['gb18030', 'gb18030'],
+ ['big5', 'big5'],
+ ['big5-hkscs', 'big5'],
+ ['cn-big5', 'big5'],
+ ['csbig5', 'big5'],
+ ['x-x-big5', 'big5'],
+ ['cseucpkdfmtjapanese', 'euc-jp'],
+ ['euc-jp', 'euc-jp'],
+ ['x-euc-jp', 'euc-jp'],
+ ['csiso2022jp', 'iso-2022-jp'],
+ ['iso-2022-jp', 'iso-2022-jp'],
+ ['csshiftjis', 'shift_jis'],
+ ['ms932', 'shift_jis'],
+ ['ms_kanji', 'shift_jis'],
+ ['shift-jis', 'shift_jis'],
+ ['shift_jis', 'shift_jis'],
+ ['sjis', 'shift_jis'],
+ ['windows-31j', 'shift_jis'],
+ ['x-sjis', 'shift_jis'],
+ ['cseuckr', 'euc-kr'],
+ ['csksc56011987', 'euc-kr'],
+ ['euc-kr', 'euc-kr'],
+ ['iso-ir-149', 'euc-kr'],
+ ['korean', 'euc-kr'],
+ ['ks_c_5601-1987', 'euc-kr'],
+ ['ks_c_5601-1989', 'euc-kr'],
+ ['ksc5601', 'euc-kr'],
+ ['ksc_5601', 'euc-kr'],
+ ['windows-949', 'euc-kr'],
+ ['utf-16be', 'utf-16be'],
+ ['utf-16le', 'utf-16le'],
+ ['utf-16', 'utf-16le']
+]);
+
+// Unfortunately, String.prototype.trim also removes non-ascii whitespace,
+// so we have to do this manually
+function trimAsciiWhitespace(label) {
+ var s = 0;
+ var e = label.length;
+ while (s < e && (
+ label[s] === '\u0009' ||
+ label[s] === '\u000a' ||
+ label[s] === '\u000c' ||
+ label[s] === '\u000d' ||
+ label[s] === '\u0020')) {
+ s++;
+ }
+ while (e > s && (
+ label[e - 1] === '\u0009' ||
+ label[e - 1] === '\u000a' ||
+ label[e - 1] === '\u000c' ||
+ label[e - 1] === '\u000d' ||
+ label[e - 1] === '\u0020')) {
+ e--;
+ }
+ return label.slice(s, e);
+}
+
+function getEncodingFromLabel(label) {
+ const enc = encodings.get(label);
+ if (enc !== undefined) return enc;
+ return encodings.get(trimAsciiWhitespace(label.toLowerCase()));
+}
+
+function hasTextDecoder(encoding = 'utf-8') {
+ if (typeof encoding !== 'string')
+ throw new errors.Error('ERR_INVALID_ARG_TYPE', 'encoding', 'string');
+ return hasConverter(getEncodingFromLabel(encoding));
+}
+
+var Buffer;
+function lazyBuffer() {
+ if (Buffer === undefined)
+ Buffer = require('buffer').Buffer;
+ return Buffer;
+}
+
+class TextDecoder {
+ constructor(encoding = 'utf-8', options = {}) {
+ if (!warned) {
+ warned = true;
+ process.emitWarning(experimental, 'ExperimentalWarning');
+ }
+
+ encoding = `${encoding}`;
+ if (typeof options !== 'object')
+ throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
+
+ const enc = getEncodingFromLabel(encoding);
+ if (enc === undefined)
+ throw new errors.RangeError('ERR_ENCODING_NOT_SUPPORTED', encoding);
+
+ var flags = 0;
+ if (options !== null) {
+ flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
+ flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
+ }
+
+ const handle = getConverter(enc, flags);
+ if (handle === undefined)
+ throw new errors.Error('ERR_ENCODING_NOT_SUPPORTED', encoding);
+
+ this[kHandle] = handle;
+ this[kFlags] = flags;
+ this[kEncoding] = enc;
+ }
+
+ get encoding() {
+ if (this == null || this[kDecoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder');
+ return this[kEncoding];
+ }
+
+ get fatal() {
+ if (this == null || this[kDecoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder');
+ return (this[kFlags] & CONVERTER_FLAGS_FATAL) === CONVERTER_FLAGS_FATAL;
+ }
+
+ get ignoreBOM() {
+ if (this == null || this[kDecoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder');
+ return (this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM) ===
+ CONVERTER_FLAGS_IGNORE_BOM;
+ }
+
+ decode(input = empty, options = {}) {
+ if (this == null || this[kDecoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder');
+ if (isArrayBuffer(input)) {
+ input = lazyBuffer().from(input);
+ } else if (!ArrayBuffer.isView(input)) {
+ throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'input',
+ ['ArrayBuffer', 'ArrayBufferView']);
+ }
+ if (typeof options !== 'object') {
+ throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'options', 'object');
+ }
+
+ var flags = 0;
+ if (options !== null)
+ flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
+
+ const ret = _decode(this[kHandle], input, flags);
+ if (typeof ret === 'number') {
+ const err = new errors.TypeError('ERR_ENCODING_INVALID_ENCODED_DATA',
+ this.encoding);
+ err.errno = ret;
+ throw err;
+ }
+ return ret.toString('ucs2');
+ }
+
+ [inspect](depth, opts) {
+ if (this == null || this[kDecoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder');
+ if (typeof depth === 'number' && depth < 0)
+ return opts.stylize('[Object]', 'special');
+ var ctor = getConstructorOf(this);
+ var obj = Object.create({
+ constructor: ctor === null ? TextDecoder : ctor
+ });
+ obj.encoding = this.encoding;
+ obj.fatal = this.fatal;
+ obj.ignoreBOM = this.ignoreBOM;
+ if (opts.showHidden) {
+ obj[kFlags] = this[kFlags];
+ obj[kHandle] = this[kHandle];
+ }
+ // Lazy to avoid circular dependency
+ return require('util').inspect(obj, opts);
+ }
+}
+
+class TextEncoder {
+ constructor() {
+ if (!warned) {
+ warned = true;
+ process.emitWarning(experimental, 'ExperimentalWarning');
+ }
+ }
+
+ get encoding() {
+ if (this == null || this[kEncoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextEncoder');
+ return 'utf-8';
+ }
+
+ encode(input = '') {
+ if (this == null || this[kEncoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextEncoder');
+ return encodeUtf8String(`${input}`);
+ }
+
+ [inspect](depth, opts) {
+ if (this == null || this[kEncoder] !== true)
+ throw new errors.TypeError('ERR_INVALID_THIS', 'TextEncoder');
+ if (typeof depth === 'number' && depth < 0)
+ return opts.stylize('[Object]', 'special');
+ var ctor = getConstructorOf(this);
+ var obj = Object.create({
+ constructor: ctor === null ? TextEncoder : ctor
+ });
+ obj.encoding = this.encoding;
+ // Lazy to avoid circular dependency
+ return require('util').inspect(obj, opts);
+ }
+}
+
+Object.defineProperties(
+ TextDecoder.prototype, {
+ [kDecoder]: { enumerable: false, value: true, configurable: false },
+ 'decode': { enumerable: true },
+ 'encoding': { enumerable: true },
+ 'fatal': { enumerable: true },
+ 'ignoreBOM': { enumerable: true },
+ [Symbol.toStringTag]: {
+ configurable: true,
+ value: 'TextDecoder'
+ } });
+Object.defineProperties(
+ TextEncoder.prototype, {
+ [kEncoder]: { enumerable: false, value: true, configurable: false },
+ 'encode': { enumerable: true },
+ 'encoding': { enumerable: true },
+ [Symbol.toStringTag]: {
+ configurable: true,
+ value: 'TextEncoder'
+ } });
+
+module.exports = {
+ getEncodingFromLabel,
+ hasTextDecoder,
+ TextDecoder,
+ TextEncoder
+};