diff options
author | James M Snell <jasnell@gmail.com> | 2016-10-11 14:12:31 -0700 |
---|---|---|
committer | James M Snell <jasnell@gmail.com> | 2016-10-25 10:12:02 -0700 |
commit | e8eaaa772414e6193702f3933cbee919d6fcdc95 (patch) | |
tree | 22268649a84bd380c35c54d0f3a06337589d36f5 | |
parent | 1cf55f806b8a483398272d32a65882fc7b1933d0 (diff) | |
download | android-node-v8-e8eaaa772414e6193702f3933cbee919d6fcdc95.tar.gz android-node-v8-e8eaaa772414e6193702f3933cbee919d6fcdc95.tar.bz2 android-node-v8-e8eaaa772414e6193702f3933cbee919d6fcdc95.zip |
buffer: add buffer.transcode
Add buffer.transcode(source, from, to) method. Primarily uses ICU
to transcode a buffer's content from one of Node.js' supported
encodings to another.
Originally part of a proposal to add a new unicode module. Decided
to refactor the approach towrds individual PRs without a new module.
Refs: https://github.com/nodejs/node/pull/8075
PR-URL: https://github.com/nodejs/node/pull/9038
Reviewed-By: Anna Henningsen <anna@addaleax.net>
-rw-r--r-- | doc/api/buffer.md | 27 | ||||
-rw-r--r-- | lib/buffer.js | 4 | ||||
-rw-r--r-- | lib/internal/buffer.js | 30 | ||||
-rw-r--r-- | node.gyp | 1 | ||||
-rw-r--r-- | src/node_buffer.cc | 55 | ||||
-rw-r--r-- | src/node_i18n.cc | 280 | ||||
-rw-r--r-- | src/util.h | 27 | ||||
-rw-r--r-- | test/parallel/test-icu-transcode.js | 48 | ||||
-rw-r--r-- | tools/icu/icu-generic.gyp | 4 |
9 files changed, 437 insertions, 39 deletions
diff --git a/doc/api/buffer.md b/doc/api/buffer.md index 6d06ae9ddd..3877cc5569 100644 --- a/doc/api/buffer.md +++ b/doc/api/buffer.md @@ -2302,6 +2302,33 @@ added: v3.0.0 On 32-bit architectures, this value is `(2^30)-1` (~1GB). On 64-bit architectures, this value is `(2^31)-1` (~2GB). +## buffer.transcode(source, fromEnc, toEnc) +<!-- YAML +added: REPLACEME +--> + +* `source` {Buffer} A `Buffer` instance +* `fromEnc` {String} The current encoding +* `toEnc` {String} To target encoding + +Re-encodes the given `Buffer` instance from one character encoding to another. +Returns a new `Buffer` instance. + +Throws if the `fromEnc` or `toEnc` specify invalid character encodings or if +conversion from `fromEnc` to `toEnc` is not permitted. + +The transcoding process will use substitution characters if a given byte +sequence cannot be adequately represented in the target encoding. For instance: + +```js +const newBuf = buffer.transcode(Buffer.from('€'), 'utf8', 'ascii'); +console.log(newBuf.toString('ascii')); + // prints '?' +``` + +Because the Euro (`€`) sign is not representable in US-ASCII, it is replaced +with `?` in the transcoded `Buffer`. + ## Class: SlowBuffer <!-- YAML deprecated: v6.0.0 diff --git a/lib/buffer.js b/lib/buffer.js index 667561c3db..4d11aa835f 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -1360,3 +1360,7 @@ Buffer.prototype.swap64 = function swap64() { }; Buffer.prototype.toLocaleString = Buffer.prototype.toString; + +// Put this at the end because internal/buffer has a circular +// dependency on Buffer. +exports.transcode = require('internal/buffer').transcode; diff --git a/lib/internal/buffer.js b/lib/internal/buffer.js new file mode 100644 index 0000000000..fdb34b9efd --- /dev/null +++ b/lib/internal/buffer.js @@ -0,0 +1,30 @@ +'use strict'; + +if (!process.binding('config').hasIntl) { + return; +} + +const normalizeEncoding = require('internal/util').normalizeEncoding; +const Buffer = require('buffer').Buffer; + +const icu = process.binding('icu'); + +// Transcodes the Buffer from one encoding to another, returning a new +// Buffer instance. +exports.transcode = function transcode(source, fromEncoding, toEncoding) { + if (!Buffer.isBuffer(source)) + throw new TypeError('"source" argument must be a Buffer'); + if (source.length === 0) return Buffer.alloc(0); + + fromEncoding = normalizeEncoding(fromEncoding) || fromEncoding; + toEncoding = normalizeEncoding(toEncoding) || toEncoding; + const result = icu.transcode(source, fromEncoding, toEncoding); + if (Buffer.isBuffer(result)) + return result; + + const code = icu.icuErrName(result); + const err = new Error(`Unable to transcode Buffer [${code}]`); + err.code = code; + err.errno = result; + throw err; +}; @@ -74,6 +74,7 @@ 'lib/v8.js', 'lib/vm.js', 'lib/zlib.js', + 'lib/internal/buffer.js', 'lib/internal/child_process.js', 'lib/internal/cluster.js', 'lib/internal/freelist.js', diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 07a4106642..540de1827f 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -22,23 +22,6 @@ if (!(r)) return env->ThrowRangeError("out of range index"); \ } while (0) -#define THROW_AND_RETURN_UNLESS_BUFFER(env, obj) \ - do { \ - if (!HasInstance(obj)) \ - return env->ThrowTypeError("argument should be a Buffer"); \ - } while (0) - -#define SPREAD_ARG(val, name) \ - CHECK((val)->IsUint8Array()); \ - Local<Uint8Array> name = (val).As<Uint8Array>(); \ - ArrayBuffer::Contents name##_c = name->Buffer()->GetContents(); \ - const size_t name##_offset = name->ByteOffset(); \ - const size_t name##_length = name->ByteLength(); \ - char* const name##_data = \ - static_cast<char*>(name##_c.Data()) + name##_offset; \ - if (name##_length > 0) \ - CHECK_NE(name##_data, nullptr); - #define SLICE_START_END(start_arg, end_arg, end_max) \ size_t start; \ size_t end; \ @@ -448,7 +431,7 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) { Isolate* isolate = env->isolate(); THROW_AND_RETURN_UNLESS_BUFFER(env, args.This()); - SPREAD_ARG(args.This(), ts_obj); + SPREAD_BUFFER_ARG(args.This(), ts_obj); if (ts_obj_length == 0) return args.GetReturnValue().SetEmptyString(); @@ -465,7 +448,7 @@ void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) { Environment* env = Environment::GetCurrent(args); THROW_AND_RETURN_UNLESS_BUFFER(env, args.This()); - SPREAD_ARG(args.This(), ts_obj); + SPREAD_BUFFER_ARG(args.This(), ts_obj); if (ts_obj_length == 0) return args.GetReturnValue().SetEmptyString(); @@ -543,8 +526,8 @@ void Copy(const FunctionCallbackInfo<Value> &args) { THROW_AND_RETURN_UNLESS_BUFFER(env, args.This()); THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); Local<Object> target_obj = args[0].As<Object>(); - SPREAD_ARG(args.This(), ts_obj); - SPREAD_ARG(target_obj, target); + SPREAD_BUFFER_ARG(args.This(), ts_obj); + SPREAD_BUFFER_ARG(target_obj, target); size_t target_start; size_t source_start; @@ -577,7 +560,7 @@ void Fill(const FunctionCallbackInfo<Value>& args) { Environment* env = Environment::GetCurrent(args); THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); - SPREAD_ARG(args[0], ts_obj); + SPREAD_BUFFER_ARG(args[0], ts_obj); size_t start = args[2]->Uint32Value(); size_t end = args[3]->Uint32Value(); @@ -590,7 +573,7 @@ void Fill(const FunctionCallbackInfo<Value>& args) { // First check if Buffer has been passed. if (Buffer::HasInstance(args[1])) { - SPREAD_ARG(args[1], fill_obj); + SPREAD_BUFFER_ARG(args[1], fill_obj); str_length = fill_obj_length; memcpy(ts_obj_data + start, fill_obj_data, MIN(str_length, fill_length)); goto start_fill; @@ -669,7 +652,7 @@ void StringWrite(const FunctionCallbackInfo<Value>& args) { Environment* env = Environment::GetCurrent(args); THROW_AND_RETURN_UNLESS_BUFFER(env, args.This()); - SPREAD_ARG(args.This(), ts_obj); + SPREAD_BUFFER_ARG(args.This(), ts_obj); if (!args[0]->IsString()) return env->ThrowTypeError("Argument must be a string"); @@ -747,7 +730,7 @@ static inline void Swizzle(char* start, unsigned int len) { template <typename T, enum Endianness endianness> void ReadFloatGeneric(const FunctionCallbackInfo<Value>& args) { THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); - SPREAD_ARG(args[0], ts_obj); + SPREAD_BUFFER_ARG(args[0], ts_obj); uint32_t offset = args[1]->Uint32Value(); CHECK_LE(offset + sizeof(T), ts_obj_length); @@ -881,8 +864,8 @@ void CompareOffset(const FunctionCallbackInfo<Value> &args) { THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); THROW_AND_RETURN_UNLESS_BUFFER(env, args[1]); - SPREAD_ARG(args[0], ts_obj); - SPREAD_ARG(args[1], target); + SPREAD_BUFFER_ARG(args[0], ts_obj); + SPREAD_BUFFER_ARG(args[1], target); size_t target_start; size_t source_start; @@ -921,8 +904,8 @@ void Compare(const FunctionCallbackInfo<Value> &args) { THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); THROW_AND_RETURN_UNLESS_BUFFER(env, args[1]); - SPREAD_ARG(args[0], obj_a); - SPREAD_ARG(args[1], obj_b); + SPREAD_BUFFER_ARG(args[0], obj_a); + SPREAD_BUFFER_ARG(args[1], obj_b); size_t cmp_length = MIN(obj_a_length, obj_b_length); @@ -977,7 +960,7 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) { UTF8); THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); - SPREAD_ARG(args[0], ts_obj); + SPREAD_BUFFER_ARG(args[0], ts_obj); Local<String> needle = args[1].As<String>(); int64_t offset_i64 = args[2]->IntegerValue(); @@ -1084,8 +1067,8 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) { THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[1]); - SPREAD_ARG(args[0], ts_obj); - SPREAD_ARG(args[1], buf); + SPREAD_BUFFER_ARG(args[0], ts_obj); + SPREAD_BUFFER_ARG(args[1], buf); int64_t offset_i64 = args[2]->IntegerValue(); bool is_forward = args[4]->IsTrue(); @@ -1143,7 +1126,7 @@ void IndexOfNumber(const FunctionCallbackInfo<Value>& args) { ASSERT(args[3]->IsBoolean()); THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); - SPREAD_ARG(args[0], ts_obj); + SPREAD_BUFFER_ARG(args[0], ts_obj); uint32_t needle = args[1]->Uint32Value(); int64_t offset_i64 = args[2]->IntegerValue(); @@ -1171,7 +1154,7 @@ void IndexOfNumber(const FunctionCallbackInfo<Value>& args) { void Swap16(const FunctionCallbackInfo<Value>& args) { Environment* env = Environment::GetCurrent(args); THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); - SPREAD_ARG(args[0], ts_obj); + SPREAD_BUFFER_ARG(args[0], ts_obj); SwapBytes16(ts_obj_data, ts_obj_length); args.GetReturnValue().Set(args[0]); } @@ -1180,7 +1163,7 @@ void Swap16(const FunctionCallbackInfo<Value>& args) { void Swap32(const FunctionCallbackInfo<Value>& args) { Environment* env = Environment::GetCurrent(args); THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); - SPREAD_ARG(args[0], ts_obj); + SPREAD_BUFFER_ARG(args[0], ts_obj); SwapBytes32(ts_obj_data, ts_obj_length); args.GetReturnValue().Set(args[0]); } @@ -1189,7 +1172,7 @@ void Swap32(const FunctionCallbackInfo<Value>& args) { void Swap64(const FunctionCallbackInfo<Value>& args) { Environment* env = Environment::GetCurrent(args); THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); - SPREAD_ARG(args[0], ts_obj); + SPREAD_BUFFER_ARG(args[0], ts_obj); SwapBytes64(ts_obj_data, ts_obj_length); args.GetReturnValue().Set(args[0]); } diff --git a/src/node_i18n.cc b/src/node_i18n.cc index e77591babf..7ac50423b2 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -24,6 +24,7 @@ #if defined(NODE_HAVE_I18N_SUPPORT) #include "node.h" +#include "node_buffer.h" #include "env.h" #include "env-inl.h" #include "util.h" @@ -34,6 +35,10 @@ #include <unicode/uchar.h> #include <unicode/udata.h> #include <unicode/uidna.h> +#include <unicode/utypes.h> +#include <unicode/ucnv.h> +#include <unicode/utf8.h> +#include <unicode/utf16.h> #ifdef NODE_HAVE_SMALL_ICU /* if this is defined, we have a 'secondary' entry point. @@ -54,7 +59,9 @@ namespace node { using v8::Context; using v8::FunctionCallbackInfo; +using v8::Isolate; using v8::Local; +using v8::MaybeLocal; using v8::Object; using v8::String; using v8::Value; @@ -63,6 +70,275 @@ bool flag_icu_data_dir = false; namespace i18n { +const size_t kStorageSize = 1024; + +// TODO(jasnell): This could potentially become a member of MaybeStackBuffer +// at some point in the future. Care would need to be taken with the +// MaybeStackBuffer<UChar> variant below. +MaybeLocal<Object> AsBuffer(Isolate* isolate, + MaybeStackBuffer<char>* buf, + size_t len) { + if (buf->IsAllocated()) { + MaybeLocal<Object> ret = Buffer::New(isolate, buf->out(), len); + if (!ret.IsEmpty()) buf->Release(); + return ret; + } + return Buffer::Copy(isolate, buf->out(), len); +} + +MaybeLocal<Object> AsBuffer(Isolate* isolate, + MaybeStackBuffer<UChar>* buf, + size_t len) { + char* dst = reinterpret_cast<char*>(**buf); + MaybeLocal<Object> ret; + if (buf->IsAllocated()) { + ret = Buffer::New(isolate, dst, len); + if (!ret.IsEmpty()) buf->Release(); + } else { + ret = Buffer::Copy(isolate, dst, len); + } + if (!ret.IsEmpty() && IsBigEndian()) { + SPREAD_BUFFER_ARG(ret.ToLocalChecked(), buf); + SwapBytes16(buf_data, buf_length); + } + return ret; +} + +struct Converter { + explicit Converter(const char* name, const char* sub = NULL) + : conv(nullptr) { + UErrorCode status = U_ZERO_ERROR; + conv = ucnv_open(name, &status); + CHECK(U_SUCCESS(status)); + if (sub != NULL) { + ucnv_setSubstChars(conv, sub, strlen(sub), &status); + } + } + + ~Converter() { + ucnv_close(conv); + } + + UConverter* conv; +}; + +// One-Shot Converters + +void CopySourceBuffer(MaybeStackBuffer<UChar>* dest, + const char* data, + const size_t length, + const size_t length_in_chars) { + dest->AllocateSufficientStorage(length_in_chars); + char* dst = reinterpret_cast<char*>(**dest); + memcpy(dst, data, length); + if (IsBigEndian()) { + SwapBytes16(dst, length); + } +} + +typedef MaybeLocal<Object> (*TranscodeFunc)(Isolate* isolate, + const char* fromEncoding, + const char* toEncoding, + const char* source, + const size_t source_length, + UErrorCode* status); + +MaybeLocal<Object> Transcode(Isolate* isolate, + const char* fromEncoding, + const char* toEncoding, + const char* source, + const size_t source_length, + UErrorCode* status) { + *status = U_ZERO_ERROR; + MaybeLocal<Object> ret; + MaybeStackBuffer<char> result; + Converter to(toEncoding, "?"); + Converter from(fromEncoding); + const uint32_t limit = source_length * ucnv_getMaxCharSize(to.conv); + result.AllocateSufficientStorage(limit); + char* target = *result; + ucnv_convertEx(to.conv, from.conv, &target, target + limit, + &source, source + source_length, nullptr, nullptr, + nullptr, nullptr, true, true, status); + if (U_SUCCESS(*status)) + ret = AsBuffer(isolate, &result, target - &result[0]); + return ret; +} + +MaybeLocal<Object> TranscodeToUcs2(Isolate* isolate, + const char* fromEncoding, + const char* toEncoding, + const char* source, + const size_t source_length, + UErrorCode* status) { + *status = U_ZERO_ERROR; + MaybeLocal<Object> ret; + MaybeStackBuffer<UChar> destbuf(source_length); + Converter from(fromEncoding); + const size_t length_in_chars = source_length * sizeof(*destbuf); + ucnv_toUChars(from.conv, *destbuf, length_in_chars, + source, source_length, status); + if (U_SUCCESS(*status)) + ret = AsBuffer(isolate, &destbuf, length_in_chars); + return ret; +} + +MaybeLocal<Object> TranscodeFromUcs2(Isolate* isolate, + const char* fromEncoding, + const char* toEncoding, + const char* source, + const size_t source_length, + UErrorCode* status) { + *status = U_ZERO_ERROR; + MaybeStackBuffer<UChar> sourcebuf; + MaybeLocal<Object> ret; + Converter to(toEncoding, "?"); + const size_t length_in_chars = source_length / sizeof(UChar); + CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars); + MaybeStackBuffer<char> destbuf(length_in_chars); + const uint32_t len = ucnv_fromUChars(to.conv, *destbuf, length_in_chars, + *sourcebuf, length_in_chars, status); + if (U_SUCCESS(*status)) + ret = AsBuffer(isolate, &destbuf, len); + return ret; +} + +MaybeLocal<Object> TranscodeUcs2FromUtf8(Isolate* isolate, + const char* fromEncoding, + const char* toEncoding, + const char* source, + const size_t source_length, + UErrorCode* status) { + *status = U_ZERO_ERROR; + MaybeStackBuffer<UChar, kStorageSize> destbuf; + int32_t result_length; + u_strFromUTF8(*destbuf, kStorageSize, &result_length, + source, source_length, status); + MaybeLocal<Object> ret; + if (U_SUCCESS(*status)) { + ret = AsBuffer(isolate, &destbuf, result_length * sizeof(**destbuf)); + } else if (*status == U_BUFFER_OVERFLOW_ERROR) { + *status = U_ZERO_ERROR; + destbuf.AllocateSufficientStorage(result_length); + u_strFromUTF8(*destbuf, result_length, &result_length, + source, source_length, status); + if (U_SUCCESS(*status)) + ret = AsBuffer(isolate, &destbuf, result_length * sizeof(**destbuf)); + } + return ret; +} + +MaybeLocal<Object> TranscodeUtf8FromUcs2(Isolate* isolate, + const char* fromEncoding, + const char* toEncoding, + const char* source, + const size_t source_length, + UErrorCode* status) { + *status = U_ZERO_ERROR; + MaybeLocal<Object> ret; + const size_t length_in_chars = source_length / sizeof(UChar); + int32_t result_length; + MaybeStackBuffer<UChar> sourcebuf; + MaybeStackBuffer<char, kStorageSize> destbuf; + CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars); + u_strToUTF8(*destbuf, kStorageSize, &result_length, + *sourcebuf, length_in_chars, status); + if (U_SUCCESS(*status)) { + ret = AsBuffer(isolate, &destbuf, result_length); + } else if (*status == U_BUFFER_OVERFLOW_ERROR) { + *status = U_ZERO_ERROR; + destbuf.AllocateSufficientStorage(result_length); + u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf, + length_in_chars, status); + if (U_SUCCESS(*status)) { + ret = Buffer::New(isolate, *destbuf, result_length); + destbuf.Release(); + } + } + return ret; +} + +const char* EncodingName(const enum encoding encoding) { + switch (encoding) { + case ASCII: return "us-ascii"; + case LATIN1: return "iso8859-1"; + case UCS2: return "utf16le"; + case UTF8: return "utf-8"; + default: return NULL; + } +} + +bool SupportedEncoding(const enum encoding encoding) { + switch (encoding) { + case ASCII: + case LATIN1: + case UCS2: + case UTF8: return true; + default: return false; + } +} + +void Transcode(const FunctionCallbackInfo<Value>&args) { + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + UErrorCode status = U_ZERO_ERROR; + MaybeLocal<Object> result; + + THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); + SPREAD_BUFFER_ARG(args[0], ts_obj); + const enum encoding fromEncoding = ParseEncoding(isolate, args[1], BUFFER); + const enum encoding toEncoding = ParseEncoding(isolate, args[2], BUFFER); + + if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) { + TranscodeFunc tfn = &Transcode; + switch (fromEncoding) { + case ASCII: + case LATIN1: + if (toEncoding == UCS2) + tfn = &TranscodeToUcs2; + break; + case UTF8: + if (toEncoding == UCS2) + tfn = &TranscodeUcs2FromUtf8; + break; + case UCS2: + switch (toEncoding) { + case UCS2: + tfn = &Transcode; + break; + case UTF8: + tfn = &TranscodeUtf8FromUcs2; + break; + default: + tfn = TranscodeFromUcs2; + } + break; + default: + // This should not happen because of the SupportedEncoding checks + ABORT(); + } + + result = tfn(isolate, EncodingName(fromEncoding), EncodingName(toEncoding), + ts_obj_data, ts_obj_length, &status); + } else { + status = U_ILLEGAL_ARGUMENT_ERROR; + } + + if (result.IsEmpty()) + return args.GetReturnValue().Set(status); + + return args.GetReturnValue().Set(result.ToLocalChecked()); +} + +static void ICUErrorName(const FunctionCallbackInfo<Value>& args) { + Environment* env = Environment::GetCurrent(args); + UErrorCode status = static_cast<UErrorCode>(args[0]->Int32Value()); + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), + u_errorName(status), + v8::NewStringType::kNormal).ToLocalChecked()); +} + bool InitializeICUDirectory(const char* icu_data_path) { if (icu_data_path != nullptr) { flag_icu_data_dir = true; @@ -282,6 +558,10 @@ void Init(Local<Object> target, env->SetMethod(target, "toUnicode", ToUnicode); env->SetMethod(target, "toASCII", ToASCII); env->SetMethod(target, "getStringWidth", GetStringWidth); + + // One-shot converters + env->SetMethod(target, "icuErrName", ICUErrorName); + env->SetMethod(target, "transcode", Transcode); } } // namespace i18n diff --git a/src/util.h b/src/util.h index e2f9df02bc..d1c5ac0285 100644 --- a/src/util.h +++ b/src/util.h @@ -343,6 +343,15 @@ class MaybeStackBuffer { buf_ = nullptr; } + bool IsAllocated() { + return buf_ != buf_st_; + } + + void Release() { + buf_ = buf_st_; + length_ = 0; + } + MaybeStackBuffer() : length_(0), buf_(buf_st_) { // Default to a zero-length, null-terminated buffer. buf_[0] = T(); @@ -378,6 +387,24 @@ class BufferValue : public MaybeStackBuffer<char> { explicit BufferValue(v8::Isolate* isolate, v8::Local<v8::Value> value); }; +#define THROW_AND_RETURN_UNLESS_BUFFER(env, obj) \ + do { \ + if (!Buffer::HasInstance(obj)) \ + return env->ThrowTypeError("argument should be a Buffer"); \ + } while (0) + +#define SPREAD_BUFFER_ARG(val, name) \ + CHECK((val)->IsUint8Array()); \ + Local<v8::Uint8Array> name = (val).As<v8::Uint8Array>(); \ + v8::ArrayBuffer::Contents name##_c = name->Buffer()->GetContents(); \ + const size_t name##_offset = name->ByteOffset(); \ + const size_t name##_length = name->ByteLength(); \ + char* const name##_data = \ + static_cast<char*>(name##_c.Data()) + name##_offset; \ + if (name##_length > 0) \ + CHECK_NE(name##_data, nullptr); + + } // namespace node #endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS diff --git a/test/parallel/test-icu-transcode.js b/test/parallel/test-icu-transcode.js new file mode 100644 index 0000000000..f45b997436 --- /dev/null +++ b/test/parallel/test-icu-transcode.js @@ -0,0 +1,48 @@ +'use strict'; + +require('../common'); +const buffer = require('buffer'); +const assert = require('assert'); + +const orig = Buffer.from('tést €', 'utf8'); + +// Test Transcoding +const tests = { + 'latin1': [0x74, 0xe9, 0x73, 0x74, 0x20, 0x3f], + 'ascii': [0x74, 0x3f, 0x73, 0x74, 0x20, 0x3f], + 'ucs2': [0x74, 0x00, 0xe9, 0x00, 0x73, + 0x00, 0x74, 0x00, 0x20, 0x00, + 0xac, 0x20] +}; + +for (const test in tests) { + const dest = buffer.transcode(orig, 'utf8', test); + assert.strictEqual(dest.length, tests[test].length); + for (var n = 0; n < tests[test].length; n++) + assert.strictEqual(dest[n], tests[test][n]); +} + +{ + const dest = buffer.transcode(Buffer.from(tests.ucs2), 'ucs2', 'utf8'); + assert.strictEqual(dest.toString(), orig.toString()); +} + +{ + const utf8 = Buffer.from('€'.repeat(4000), 'utf8'); + const ucs2 = Buffer.from('€'.repeat(4000), 'ucs2'); + const utf8_to_ucs2 = buffer.transcode(utf8, 'utf8', 'ucs2'); + const ucs2_to_utf8 = buffer.transcode(ucs2, 'ucs2', 'utf8'); + assert.deepStrictEqual(utf8, ucs2_to_utf8); + assert.deepStrictEqual(ucs2, utf8_to_ucs2); + assert.strictEqual(ucs2_to_utf8.toString('utf8'), + utf8_to_ucs2.toString('ucs2')); +} + +assert.throws( + () => buffer.transcode(Buffer.from('a'), 'b', 'utf8'), + /Unable to transcode Buffer \[U_ILLEGAL_ARGUMENT_ERROR\]/ +); +assert.throws( + () => buffer.transcode(Buffer.from('a'), 'uf8', 'b'), + /Unable to transcode Buffer \[U_ILLEGAL_ARGUMENT_ERROR\]/ +); diff --git a/tools/icu/icu-generic.gyp b/tools/icu/icu-generic.gyp index 3a284461d8..cf615717e8 100644 --- a/tools/icu/icu-generic.gyp +++ b/tools/icu/icu-generic.gyp @@ -20,9 +20,7 @@ 'type': 'none', 'toolsets': [ 'target' ], 'direct_dependent_settings': { - 'defines': [ - 'UCONFIG_NO_CONVERSION=1', - ] + 'defines': [] }, }, { |