From 9fbd0f0f7df16536533c331e3c634b203087d521 Mon Sep 17 00:00:00 2001 From: Felix Geisendörfer Date: Tue, 13 May 2014 17:36:40 +0200 Subject: string_decoder: Fix failures from new test cases This patch simplifies the implementation of StringDecoder, fixes the failures from the new test cases, and also no longer relies on v8's WriteUtf8 function to encode individual surrogates. --- lib/string_decoder.js | 46 +++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 25 deletions(-) (limited to 'lib/string_decoder.js') diff --git a/lib/string_decoder.js b/lib/string_decoder.js index 6b1e30895a..fe4c9fc170 100644 --- a/lib/string_decoder.js +++ b/lib/string_decoder.js @@ -57,29 +57,29 @@ var StringDecoder = exports.StringDecoder = function(encoding) { StringDecoder.prototype.write = function(buffer) { var charStr = ''; - var offset = 0; - // if our last write ended with an incomplete multibyte character while (this.charLength) { // determine how many remaining bytes this buffer has to offer for this char - var i = (buffer.length >= this.charLength - this.charReceived) ? + var available = (buffer.length >= this.charLength - this.charReceived) ? this.charLength - this.charReceived : buffer.length; // add the new bytes to the char buffer - buffer.copy(this.charBuffer, this.charReceived, offset, i); - this.charReceived += (i - offset); - offset = i; + buffer.copy(this.charBuffer, this.charReceived, 0, available); + this.charReceived += available; if (this.charReceived < this.charLength) { // still not enough chars in this buffer? wait for more ... return ''; } + // remove bytes belonging to the current character from the buffer + buffer = buffer.slice(available, buffer.length); + // get the character that was split charStr = this.charBuffer.slice(0, this.charLength).toString(this.encoding); - // lead surrogate (D800-DBFF) is also the incomplete character + // CESU-8: lead surrogate (D800-DBFF) is also the incomplete character var charCode = charStr.charCodeAt(charStr.length - 1); if (charCode >= 0xD800 && charCode <= 0xDBFF) { this.charLength += this.surrogateSize; @@ -89,34 +89,33 @@ StringDecoder.prototype.write = function(buffer) { this.charReceived = this.charLength = 0; // if there are no more bytes in this buffer, just emit our char - if (i == buffer.length) return charStr; - - // otherwise cut off the characters end from the beginning of this buffer - buffer = buffer.slice(i, buffer.length); + if (buffer.length === 0) { + return charStr; + } break; } - var lenIncomplete = this.detectIncompleteChar(buffer); + // determine and set charLength / charReceived + this.detectIncompleteChar(buffer); var end = buffer.length; if (this.charLength) { // buffer the incomplete character bytes we got - buffer.copy(this.charBuffer, 0, buffer.length - lenIncomplete, end); - this.charReceived = lenIncomplete; - end -= lenIncomplete; + buffer.copy(this.charBuffer, 0, buffer.length - this.charReceived, end); + end -= this.charReceived; } charStr += buffer.toString(this.encoding, 0, end); var end = charStr.length - 1; var charCode = charStr.charCodeAt(end); - // lead surrogate (D800-DBFF) is also the incomplete character + // CESU-8: lead surrogate (D800-DBFF) is also the incomplete character if (charCode >= 0xD800 && charCode <= 0xDBFF) { var size = this.surrogateSize; this.charLength += size; this.charReceived += size; this.charBuffer.copy(this.charBuffer, size, 0, size); - this.charBuffer.write(charStr.charAt(charStr.length - 1), this.encoding); + buffer.copy(this.charBuffer, 0, 0, size); return charStr.substring(0, end); } @@ -153,8 +152,7 @@ StringDecoder.prototype.detectIncompleteChar = function(buffer) { break; } } - - return i; + this.charReceived = i; }; StringDecoder.prototype.end = function(buffer) { @@ -177,13 +175,11 @@ function passThroughWrite(buffer) { } function utf16DetectIncompleteChar(buffer) { - var incomplete = this.charReceived = buffer.length % 2; - this.charLength = incomplete ? 2 : 0; - return incomplete; + this.charReceived = buffer.length % 2; + this.charLength = this.charReceived ? 2 : 0; } function base64DetectIncompleteChar(buffer) { - var incomplete = this.charReceived = buffer.length % 3; - this.charLength = incomplete ? 3 : 0; - return incomplete; + this.charReceived = buffer.length % 3; + this.charLength = this.charReceived ? 3 : 0; } -- cgit v1.2.3