From b010c8716498dca398e61c388859fea92296feb3 Mon Sep 17 00:00:00 2001
From: Fedor Indutny <fedor@indutny.com>
Date: Wed, 2 Mar 2016 14:43:39 +0300
Subject: crypto, string_bytes: treat `buffer` str as `utf8`

Do not treat crypto inputs as `binary` strings, convert them to Buffers
using `new Buffer(..., 'utf8')`, or using newly updated StringBytes
APIs.

PR-URL: https://github.com/nodejs/node/pull/5522
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: James M Snell <jasnell@gmail.com>
---
 doc/api/crypto.markdown           |  4 ++--
 lib/crypto.js                     |  9 +++------
 src/node_crypto.cc                | 15 +++++++--------
 src/string_bytes.cc               |  6 +++---
 test/parallel/test-crypto-hash.js | 13 ++++++++++++-
 5 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/doc/api/crypto.markdown b/doc/api/crypto.markdown
index dd31ae7ac3..46d1e5f3b0 100644
--- a/doc/api/crypto.markdown
+++ b/doc/api/crypto.markdown
@@ -601,7 +601,7 @@ called. Multiple calls will cause an error to be thrown.
 Updates the hash content with the given `data`, the encoding of which
 is given in `input_encoding` and can be `'utf8'`, `'ascii'` or
 `'binary'`. If `encoding` is not provided, and the `data` is a string, an
-encoding of `'binary'` is enforced. If `data` is a [`Buffer`][] then
+encoding of `'utf8'` is enforced. If `data` is a [`Buffer`][] then
 `input_encoding` is ignored.
 
 This can be called many times with new data as it is streamed.
@@ -811,7 +811,7 @@ or [buffers][`Buffer`]. The default value is `'buffer'`, which makes methods
 default to [`Buffer`][] objects.
 
 The `crypto.DEFAULT_ENCODING` mechanism is provided for backwards compatibility
-with legacy programs that expect `'binary'` to be the default encoding.
+with legacy programs that expect `'utf8'` to be the default encoding.
 
 New applications should expect the default to be `'buffer'`. This property may
 become deprecated in a future Node.js release.
diff --git a/lib/crypto.js b/lib/crypto.js
index cf92eff1ee..81eee114be 100644
--- a/lib/crypto.js
+++ b/lib/crypto.js
@@ -30,11 +30,10 @@ const DH_GENERATOR = 2;
 // any explicit encoding in older versions of node, and we don't want
 // to break them unnecessarily.
 function toBuf(str, encoding) {
-  encoding = encoding || 'binary';
   if (typeof str === 'string') {
-    if (encoding === 'buffer')
-      encoding = 'binary';
-    str = new Buffer(str, encoding);
+    if (encoding === 'buffer' || !encoding)
+      encoding = 'utf8';
+    return new Buffer(str, encoding);
   }
   return str;
 }
@@ -67,8 +66,6 @@ Hash.prototype._flush = function(callback) {
 
 Hash.prototype.update = function(data, encoding) {
   encoding = encoding || exports.DEFAULT_ENCODING;
-  if (encoding === 'buffer' && typeof data === 'string')
-    encoding = 'binary';
   this._handle.update(data, encoding);
   return this;
 };
diff --git a/src/node_crypto.cc b/src/node_crypto.cc
index 2cdc31ee4b..ccaa2c7657 100644
--- a/src/node_crypto.cc
+++ b/src/node_crypto.cc
@@ -3369,7 +3369,7 @@ void CipherBase::Update(const FunctionCallbackInfo<Value>& args) {
   // Only copy the data if we have to, because it's a string
   if (args[0]->IsString()) {
     StringBytes::InlineDecoder decoder;
-    if (!decoder.Decode(env, args[0].As<String>(), args[1], BINARY))
+    if (!decoder.Decode(env, args[0].As<String>(), args[1], UTF8))
       return;
     r = cipher->Update(decoder.out(), decoder.size(), &out, &out_len);
   } else {
@@ -3548,7 +3548,7 @@ void Hmac::HmacUpdate(const FunctionCallbackInfo<Value>& args) {
   bool r;
   if (args[0]->IsString()) {
     StringBytes::InlineDecoder decoder;
-    if (!decoder.Decode(env, args[0].As<String>(), args[1], BINARY))
+    if (!decoder.Decode(env, args[0].As<String>(), args[1], UTF8))
       return;
     r = hmac->HmacUpdate(decoder.out(), decoder.size());
   } else {
@@ -3666,7 +3666,7 @@ void Hash::HashUpdate(const FunctionCallbackInfo<Value>& args) {
   bool r;
   if (args[0]->IsString()) {
     StringBytes::InlineDecoder decoder;
-    if (!decoder.Decode(env, args[0].As<String>(), args[1], BINARY))
+    if (!decoder.Decode(env, args[0].As<String>(), args[1], UTF8))
       return;
     r = hash->HashUpdate(decoder.out(), decoder.size());
   } else {
@@ -3818,7 +3818,7 @@ void Sign::SignUpdate(const FunctionCallbackInfo<Value>& args) {
   Error err;
   if (args[0]->IsString()) {
     StringBytes::InlineDecoder decoder;
-    if (!decoder.Decode(env, args[0].As<String>(), args[1], BINARY))
+    if (!decoder.Decode(env, args[0].As<String>(), args[1], UTF8))
       return;
     err = sign->SignUpdate(decoder.out(), decoder.size());
   } else {
@@ -4020,7 +4020,7 @@ void Verify::VerifyUpdate(const FunctionCallbackInfo<Value>& args) {
   Error err;
   if (args[0]->IsString()) {
     StringBytes::InlineDecoder decoder;
-    if (!decoder.Decode(env, args[0].As<String>(), args[1], BINARY))
+    if (!decoder.Decode(env, args[0].As<String>(), args[1], UTF8))
       return;
     err = verify->VerifyUpdate(decoder.out(), decoder.size());
   } else {
@@ -4119,12 +4119,11 @@ void Verify::VerifyFinal(const FunctionCallbackInfo<Value>& args) {
 
   THROW_AND_RETURN_IF_NOT_STRING_OR_BUFFER(args[1]);
 
-  // BINARY works for both buffers and binary strings.
-  enum encoding encoding = BINARY;
+  enum encoding encoding = UTF8;
   if (args.Length() >= 3) {
     encoding = ParseEncoding(env->isolate(),
                              args[2]->ToString(env->isolate()),
-                             BINARY);
+                             UTF8);
   }
 
   ssize_t hlen = StringBytes::Size(env->isolate(), args[1], encoding);
diff --git a/src/string_bytes.cc b/src/string_bytes.cc
index a916caf75e..d9ca6e565e 100644
--- a/src/string_bytes.cc
+++ b/src/string_bytes.cc
@@ -368,7 +368,6 @@ size_t StringBytes::Write(Isolate* isolate,
   switch (encoding) {
     case ASCII:
     case BINARY:
-    case BUFFER:
       if (is_extern && str->IsOneByte()) {
         memcpy(buf, data, nbytes);
       } else {
@@ -379,6 +378,7 @@ size_t StringBytes::Write(Isolate* isolate,
         *chars_written = nbytes;
       break;
 
+    case BUFFER:
     case UTF8:
       nbytes = str->WriteUtf8(buf, buflen, chars_written, flags);
       break;
@@ -480,11 +480,11 @@ size_t StringBytes::StorageSize(Isolate* isolate,
 
   switch (encoding) {
     case BINARY:
-    case BUFFER:
     case ASCII:
       data_size = str->Length();
       break;
 
+    case BUFFER:
     case UTF8:
       // A single UCS2 codepoint never takes up more than 3 utf8 bytes.
       // It is an exercise for the caller to decide when a string is
@@ -532,11 +532,11 @@ size_t StringBytes::Size(Isolate* isolate,
 
   switch (encoding) {
     case BINARY:
-    case BUFFER:
     case ASCII:
       data_size = str->Length();
       break;
 
+    case BUFFER:
     case UTF8:
       data_size = str->Utf8Length();
       break;
diff --git a/test/parallel/test-crypto-hash.js b/test/parallel/test-crypto-hash.js
index 90ff1fd727..3f8c902ab8 100644
--- a/test/parallel/test-crypto-hash.js
+++ b/test/parallel/test-crypto-hash.js
@@ -13,7 +13,7 @@ var crypto = require('crypto');
 // Test hashing
 var a1 = crypto.createHash('sha1').update('Test123').digest('hex');
 var a2 = crypto.createHash('sha256').update('Test123').digest('base64');
-var a3 = crypto.createHash('sha512').update('Test123').digest(); // binary
+var a3 = crypto.createHash('sha512').update('Test123').digest(); // buffer
 var a4 = crypto.createHash('sha1').update('Test123').digest('buffer');
 
 // stream interface
@@ -87,3 +87,14 @@ fileStream.on('close', function() {
 assert.throws(function() {
   crypto.createHash('xyzzy');
 });
+
+// Default UTF-8 encoding
+var hutf8 = crypto.createHash('sha512').update('УТФ-8 text').digest('hex');
+assert.equal(
+    hutf8,
+    '4b21bbd1a68e690a730ddcb5a8bc94ead9879ffe82580767ad7ec6fa8ba2dea6' +
+        '43a821af66afa9a45b6a78c712fecf0e56dc7f43aef4bcfc8eb5b4d8dca6ea5b');
+
+assert.notEqual(
+    hutf8,
+    crypto.createHash('sha512').update('УТФ-8 text', 'binary').digest('hex'));
-- 
cgit v1.2.3