From 9140b97848fb284ec17b87e8933a1e2cb2a7521c Mon Sep 17 00:00:00 2001 From: James M Snell Date: Mon, 23 May 2016 14:30:48 -0700 Subject: doc: general improvements to string_decoder.md copy PR-URL: https://github.com/nodejs/node/pull/6940 Reviewed-By: Anna Henningsen Reviewed-By: Brian White --- doc/api/string_decoder.md | 56 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 9 deletions(-) (limited to 'doc/api/string_decoder.md') diff --git a/doc/api/string_decoder.md b/doc/api/string_decoder.md index 06b97bab48..43ea739f4b 100644 --- a/doc/api/string_decoder.md +++ b/doc/api/string_decoder.md @@ -2,9 +2,15 @@ Stability: 2 - Stable -To use this module, do `require('string_decoder')`. StringDecoder decodes a -buffer to a string. It is a simple interface to `buffer.toString()` but provides -additional support for utf8. +The `string_decoder` module provides an API for decoding `Buffer` objects into +strings in a manner that preserves encoded multi-byte UTF-8 and UTF-16 +characters. It can be accessed using: + +```js +const StringDecoder = require('string_decoder').StringDecoder; +``` + +The following example shows the basic use of the `StringDecoder` class. ```js const StringDecoder = require('string_decoder').StringDecoder; @@ -17,23 +23,55 @@ const euro = Buffer.from([0xE2, 0x82, 0xAC]); console.log(decoder.write(euro)); ``` -## Class: StringDecoder +When a `Buffer` instance is written to the `StringDecoder` instance, an +internal buffer is used to ensure that the decoded string does not contain +any incomplete multibyte characters. These are held in the buffer until the +next call to `stringDecoder.write()` or until `stringDecoder.end()` is called. + +In the following example, the three UTF-8 encoded bytes of the European euro +symbol are written over three separate operations: + +```js +const StringDecoder = require('string_decoder').StringDecoder; +const decoder = new StringDecoder('utf8'); + +decoder.write(Buffer.from([0xE2])); +decoder.write(Buffer.from([0x82])); +console.log(decoder.end(Buffer.from([0xAC]))); +``` + +## Class: new StringDecoder([encoding]) -Accepts a single argument, `encoding` which defaults to `'utf8'`. +* `encoding` {string} The character encoding the `StringDecoder` will use. + Defaults to `'utf8'`. -### decoder.end() +Creates a new `StringDecoder` instance. + +### stringDecoder.end([buffer]) -Returns any trailing bytes that were left in the buffer. +* `buffer` {Buffer} A `Buffer` containing the bytes to decode. + +Returns any remaining input stored in the internal buffer as a string. Bytes +representing incomplete UTF-8 and UTF-16 characters will be replaced with +substitution characters appropriate for the character encoding. -### decoder.write(buffer) +If the `buffer` argument is provided, one final call to `stringDecoder.write()` +is performed before returning the remaining input. + +### stringDecoder.write(buffer) -Returns a decoded string. +* `buffer` {Buffer} A `Buffer` containing the bytes to decode. + +Returns a decoded string, ensuring that any incomplete multibyte characters at +the end of the `Buffer` are omitted from the returned string and stored in an +internal buffer for the next call to `stringDecoder.write()` or +`stringDecoder.end()`. -- cgit v1.2.3