diff options
author | Brian White <mscdex@mscdex.net> | 2015-03-19 17:31:34 -0400 |
---|---|---|
committer | Brian White <mscdex@mscdex.net> | 2015-03-25 00:34:34 -0400 |
commit | 8a945814dd61ddb547a4690788ea47cd7757f165 (patch) | |
tree | 0bbe36f54ad561fff62346a483776376d4d497cf /lib/string_decoder.js | |
parent | 3d46fefe0cb5abf45bb6a21e5d0a377c58f0f896 (diff) | |
download | android-node-v8-8a945814dd61ddb547a4690788ea47cd7757f165.tar.gz android-node-v8-8a945814dd61ddb547a4690788ea47cd7757f165.tar.bz2 android-node-v8-8a945814dd61ddb547a4690788ea47cd7757f165.zip |
string_decoder: optimize write()
By limiting property getting/setting to only where they are
absolutely necessary, we can achieve greater performance
especially with small utf8 inputs and any size base64 inputs.
PR-URL: https://github.com/iojs/io.js/pull/1209
Reviewed-By: Rod Vagg <rod@vagg.org>
Reviewed-By: Nicu Micleușanu <micnic90@gmail.com>
Reviewed-By: Chris Dickinson <christopher.s.dickinson@gmail.com>
Diffstat (limited to 'lib/string_decoder.js')
-rw-r--r-- | lib/string_decoder.js | 105 |
1 files changed, 68 insertions, 37 deletions
diff --git a/lib/string_decoder.js b/lib/string_decoder.js index ad85ee1331..61a3bb20d5 100644 --- a/lib/string_decoder.js +++ b/lib/string_decoder.js @@ -1,7 +1,9 @@ 'use strict'; +const isEncoding = Buffer.isEncoding; + function assertEncoding(encoding) { - if (encoding && !Buffer.isEncoding(encoding)) { + if (encoding && !isEncoding(encoding)) { throw new Error('Unknown encoding: ' + encoding); } } @@ -59,65 +61,83 @@ const StringDecoder = exports.StringDecoder = function(encoding) { // replacement character. See https://codereview.chromium.org/121173009/ . StringDecoder.prototype.write = function(buffer) { var charStr = ''; + var buflen = buffer.length; + var charBuffer = this.charBuffer; + var charLength = this.charLength; + var charReceived = this.charReceived; + var surrogateSize = this.surrogateSize; + var encoding = this.encoding; // if our last write ended with an incomplete multibyte character - while (this.charLength) { + while (charLength) { // determine how many remaining bytes this buffer has to offer for this char - var available = (buffer.length >= this.charLength - this.charReceived) ? - this.charLength - this.charReceived : - buffer.length; + var diff = charLength - charReceived; + var available = (buflen >= diff) ? diff : buflen; // add the new bytes to the char buffer - buffer.copy(this.charBuffer, this.charReceived, 0, available); - this.charReceived += available; + buffer.copy(charBuffer, charReceived, 0, available); + charReceived += available; - if (this.charReceived < this.charLength) { + if (charReceived < charLength) { // still not enough chars in this buffer? wait for more ... + + this.charLength = charLength; + this.charReceived = charReceived; + return ''; } // remove bytes belonging to the current character from the buffer - buffer = buffer.slice(available, buffer.length); + buffer = buffer.slice(available, buflen); + buflen = buffer.length; // get the character that was split - charStr = this.charBuffer.slice(0, this.charLength).toString(this.encoding); + charStr = charBuffer.toString(encoding, 0, charLength); // CESU-8: lead surrogate (D800-DBFF) is also the incomplete character var charCode = charStr.charCodeAt(charStr.length - 1); if (charCode >= 0xD800 && charCode <= 0xDBFF) { - this.charLength += this.surrogateSize; + charLength += surrogateSize; charStr = ''; continue; } - this.charReceived = this.charLength = 0; + charReceived = charLength = 0; // if there are no more bytes in this buffer, just emit our char - if (buffer.length === 0) { + if (buflen === 0) { + this.charLength = charLength; + this.charReceived = charReceived; + return charStr; } - break; } // determine and set charLength / charReceived - this.detectIncompleteChar(buffer); + if (this.detectIncompleteChar(buffer)) + charLength = this.charLength; + charReceived = this.charReceived; - var end = buffer.length; - if (this.charLength) { + var end = buflen; + if (charLength) { // buffer the incomplete character bytes we got - buffer.copy(this.charBuffer, 0, buffer.length - this.charReceived, end); - end -= this.charReceived; + buffer.copy(charBuffer, 0, buflen - charReceived, end); + end -= charReceived; } - charStr += buffer.toString(this.encoding, 0, end); + this.charLength = charLength; + charStr += buffer.toString(encoding, 0, end); var end = charStr.length - 1; var charCode = charStr.charCodeAt(end); // CESU-8: lead surrogate (D800-DBFF) is also the incomplete character if (charCode >= 0xD800 && charCode <= 0xDBFF) { - var size = this.surrogateSize; - this.charLength += size; - this.charReceived += size; - this.charBuffer.copy(this.charBuffer, size, 0, size); - buffer.copy(this.charBuffer, 0, 0, size); + charLength += surrogateSize; + charReceived += surrogateSize; + charBuffer.copy(charBuffer, surrogateSize, 0, surrogateSize); + buffer.copy(charBuffer, 0, 0, surrogateSize); + + this.charLength = charLength; + this.charReceived = charReceived; + return charStr.substring(0, end); } @@ -130,35 +150,43 @@ StringDecoder.prototype.write = function(buffer) { // length that character, and sets this.charReceived to the number of bytes // that are available for this character. StringDecoder.prototype.detectIncompleteChar = function(buffer) { + var buflen = buffer.length; // determine how many bytes we have to check at the end of this buffer - var i = (buffer.length >= 3) ? 3 : buffer.length; + var i = (buflen >= 3) ? 3 : buflen; + var newlen = false; // Figure out if one of the last i bytes of our buffer announces an // incomplete char. for (; i > 0; i--) { - var c = buffer[buffer.length - i]; + var c = buffer[buflen - i]; // See http://en.wikipedia.org/wiki/UTF-8#Description // 110XXXXX - if (i == 1 && c >> 5 == 0x06) { + if (i === 1 && c >> 5 === 0x06) { this.charLength = 2; + newlen = true; break; } // 1110XXXX - if (i <= 2 && c >> 4 == 0x0E) { + if (i <= 2 && c >> 4 === 0x0E) { this.charLength = 3; + newlen = true; break; } // 11110XXX - if (i <= 3 && c >> 3 == 0x1E) { + if (i <= 3 && c >> 3 === 0x1E) { this.charLength = 4; + newlen = true; break; } } + this.charReceived = i; + + return newlen; }; StringDecoder.prototype.end = function(buffer) { @@ -166,11 +194,12 @@ StringDecoder.prototype.end = function(buffer) { if (buffer && buffer.length) res = this.write(buffer); - if (this.charReceived) { - var cr = this.charReceived; + var charReceived = this.charReceived; + if (charReceived) { + var cr = charReceived; var buf = this.charBuffer; var enc = this.encoding; - res += buf.slice(0, cr).toString(enc); + res += buf.toString(enc, 0, cr); } return res; @@ -181,11 +210,13 @@ function passThroughWrite(buffer) { } function utf16DetectIncompleteChar(buffer) { - this.charReceived = buffer.length % 2; - this.charLength = this.charReceived ? 2 : 0; + var charReceived = this.charReceived = buffer.length % 2; + this.charLength = charReceived ? 2 : 0; + return true; } function base64DetectIncompleteChar(buffer) { - this.charReceived = buffer.length % 3; - this.charLength = this.charReceived ? 3 : 0; + var charReceived = this.charReceived = buffer.length % 3; + this.charLength = charReceived ? 3 : 0; + return true; } |