diff options
author | Karl Skomski <karl@skomski.com> | 2015-09-03 10:10:30 +0200 |
---|---|---|
committer | James M Snell <jasnell@gmail.com> | 2015-10-07 21:09:53 -0700 |
commit | a18dd7b7882ca955075ad977a5e7838c792bb76f (patch) | |
tree | 29b747e1919cec49963ba6a55c55ca7607eda5b9 /src/node_buffer.cc | |
parent | aeee956ac73de7af914b747926168714fe4d1c4c (diff) | |
download | android-node-v8-a18dd7b7882ca955075ad977a5e7838c792bb76f.tar.gz android-node-v8-a18dd7b7882ca955075ad977a5e7838c792bb76f.tar.bz2 android-node-v8-a18dd7b7882ca955075ad977a5e7838c792bb76f.zip |
src: replace naive search in Buffer::IndexOf
Adds the string search implementation from v8
which uses naive search if pattern length < 8
or to a specific badness then uses Boyer-Moore-Horspool
Added benchmark shows the expected improvements
Added option to use ucs2 encoding with Buffer::IndexOf
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Trevor Norris <trev.norris@gmail.com>
PR-URL: https://github.com/nodejs/node/pull/2539
Diffstat (limited to 'src/node_buffer.cc')
-rw-r--r-- | src/node_buffer.cc | 178 |
1 files changed, 124 insertions, 54 deletions
diff --git a/src/node_buffer.cc b/src/node_buffer.cc index eb727cf9c8..0492987570 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -4,6 +4,7 @@ #include "env.h" #include "env-inl.h" #include "string_bytes.h" +#include "string_search.h" #include "util.h" #include "util-inl.h" #include "v8-profiler.h" @@ -792,87 +793,156 @@ void Compare(const FunctionCallbackInfo<Value> &args) { } -int32_t IndexOf(const char* haystack, - size_t h_length, - const char* needle, - size_t n_length) { - CHECK_GE(h_length, n_length); - // TODO(trevnorris): Implement Boyer-Moore string search algorithm. - for (size_t i = 0; i < h_length - n_length + 1; i++) { - if (haystack[i] == needle[0]) { - if (memcmp(haystack + i, needle, n_length) == 0) - return i; - } - } - return -1; -} - - void IndexOfString(const FunctionCallbackInfo<Value>& args) { ASSERT(args[1]->IsString()); ASSERT(args[2]->IsNumber()); + enum encoding enc = ParseEncoding(args.GetIsolate(), + args[3], + UTF8); + THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); SPREAD_ARG(args[0], ts_obj); - node::Utf8Value str(args.GetIsolate(), args[1]); - int32_t offset_i32 = args[2]->Int32Value(); - uint32_t offset; + Local<String> needle = args[1].As<String>(); + const char* haystack = ts_obj_data; + const size_t haystack_length = ts_obj_length; + const size_t needle_length = needle->Utf8Length(); + + + if (needle_length == 0 || haystack_length == 0) { + return args.GetReturnValue().Set(-1); + } + + int64_t offset_i64 = args[2]->IntegerValue(); + size_t offset = 0; - if (offset_i32 < 0) { - if (offset_i32 + static_cast<int32_t>(ts_obj_length) < 0) + if (offset_i64 < 0) { + if (offset_i64 + static_cast<int64_t>(haystack_length) < 0) { offset = 0; - else - offset = static_cast<uint32_t>(ts_obj_length + offset_i32); + } else { + offset = static_cast<size_t>(haystack_length + offset_i64); + } } else { - offset = static_cast<uint32_t>(offset_i32); + offset = static_cast<size_t>(offset_i64); } - if (str.length() == 0 || - ts_obj_length == 0 || - (offset != 0 && str.length() + offset <= str.length()) || - str.length() + offset > ts_obj_length) + if (haystack_length < offset || needle_length + offset > haystack_length) { return args.GetReturnValue().Set(-1); + } - int32_t r = - IndexOf(ts_obj_data + offset, ts_obj_length - offset, *str, str.length()); - args.GetReturnValue().Set(r == -1 ? -1 : static_cast<int32_t>(r + offset)); -} + size_t result = haystack_length; + + if (enc == UCS2) { + String::Value needle_value(needle); + if (*needle_value == nullptr) + return args.GetReturnValue().Set(-1); + + if (haystack_length < 2 || needle_value.length() < 1) { + return args.GetReturnValue().Set(-1); + } + + result = SearchString(reinterpret_cast<const uint16_t*>(haystack), + haystack_length / 2, + reinterpret_cast<const uint16_t*>(*needle_value), + needle_value.length(), + offset / 2); + result *= 2; + } else if (enc == UTF8) { + String::Utf8Value needle_value(needle); + if (*needle_value == nullptr) + return args.GetReturnValue().Set(-1); + + result = SearchString(reinterpret_cast<const uint8_t*>(haystack), + haystack_length, + reinterpret_cast<const uint8_t*>(*needle_value), + needle_length, + offset); + } else if (enc == BINARY) { + uint8_t* needle_data = static_cast<uint8_t*>(malloc(needle_length)); + if (needle_data == nullptr) { + return args.GetReturnValue().Set(-1); + } + needle->WriteOneByte( + needle_data, 0, needle_length, String::NO_NULL_TERMINATION); + + result = SearchString(reinterpret_cast<const uint8_t*>(haystack), + haystack_length, + needle_data, + needle_length, + offset); + free(needle_data); + } + args.GetReturnValue().Set( + result == haystack_length ? -1 : static_cast<int>(result)); +} void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) { ASSERT(args[1]->IsObject()); ASSERT(args[2]->IsNumber()); + enum encoding enc = ParseEncoding(args.GetIsolate(), + args[3], + UTF8); + THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); SPREAD_ARG(args[0], ts_obj); SPREAD_ARG(args[1], buf); - const int32_t offset_i32 = args[2]->Int32Value(); - uint32_t offset; if (buf_length > 0) CHECK_NE(buf_data, nullptr); - if (offset_i32 < 0) { - if (offset_i32 + static_cast<int32_t>(ts_obj_length) < 0) + const char* haystack = ts_obj_data; + const size_t haystack_length = ts_obj_length; + const char* needle = buf_data; + const size_t needle_length = buf_length; + + if (needle_length == 0 || haystack_length == 0) { + return args.GetReturnValue().Set(-1); + } + + int64_t offset_i64 = args[2]->IntegerValue(); + size_t offset = 0; + + if (offset_i64 < 0) { + if (offset_i64 + static_cast<int64_t>(haystack_length) < 0) offset = 0; else - offset = static_cast<uint32_t>(ts_obj_length + offset_i32); + offset = static_cast<size_t>(haystack_length + offset_i64); } else { - offset = static_cast<uint32_t>(offset_i32); + offset = static_cast<size_t>(offset_i64); } - if (buf_length == 0 || - ts_obj_length == 0 || - (offset != 0 && buf_length + offset <= buf_length) || - buf_length + offset > ts_obj_length) + if (haystack_length < offset || needle_length + offset > haystack_length) { return args.GetReturnValue().Set(-1); + } - int32_t r = - IndexOf(ts_obj_data + offset, ts_obj_length - offset, buf_data, buf_length); - args.GetReturnValue().Set(r == -1 ? -1 : static_cast<int32_t>(r + offset)); -} + size_t result = haystack_length; + if (enc == UCS2) { + if (haystack_length < 2 || needle_length < 2) { + return args.GetReturnValue().Set(-1); + } + result = SearchString( + reinterpret_cast<const uint16_t*>(haystack), + haystack_length / 2, + reinterpret_cast<const uint16_t*>(needle), + needle_length / 2, + offset / 2); + result *= 2; + } else { + result = SearchString( + reinterpret_cast<const uint8_t*>(haystack), + haystack_length, + reinterpret_cast<const uint8_t*>(needle), + needle_length, + offset); + } + + args.GetReturnValue().Set( + result == haystack_length ? -1 : static_cast<int>(result)); +} void IndexOfNumber(const FunctionCallbackInfo<Value>& args) { ASSERT(args[1]->IsNumber()); @@ -882,16 +952,16 @@ void IndexOfNumber(const FunctionCallbackInfo<Value>& args) { SPREAD_ARG(args[0], ts_obj); uint32_t needle = args[1]->Uint32Value(); - int32_t offset_i32 = args[2]->Int32Value(); - uint32_t offset; + int64_t offset_i64 = args[2]->IntegerValue(); + size_t offset; - if (offset_i32 < 0) { - if (offset_i32 + static_cast<int32_t>(ts_obj_length) < 0) + if (offset_i64 < 0) { + if (offset_i64 + static_cast<int64_t>(ts_obj_length) < 0) offset = 0; else - offset = static_cast<uint32_t>(ts_obj_length + offset_i32); + offset = static_cast<size_t>(ts_obj_length + offset_i64); } else { - offset = static_cast<uint32_t>(offset_i32); + offset = static_cast<size_t>(offset_i64); } if (ts_obj_length == 0 || offset + 1 > ts_obj_length) @@ -899,8 +969,8 @@ void IndexOfNumber(const FunctionCallbackInfo<Value>& args) { void* ptr = memchr(ts_obj_data + offset, needle, ts_obj_length - offset); char* ptr_char = static_cast<char*>(ptr); - args.GetReturnValue().Set( - ptr ? static_cast<int32_t>(ptr_char - ts_obj_data) : -1); + args.GetReturnValue().Set(ptr ? static_cast<int>(ptr_char - ts_obj_data) + : -1); } |