summaryrefslogtreecommitdiff
path: root/deps/v8/src/strings/string-case.cc
blob: 88370a81e321c3bcd5bf030f92815ddd1424f16e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/strings/string-case.h"

#include "src/base/logging.h"
#include "src/common/assert-scope.h"
#include "src/common/globals.h"
#include "src/utils/utils.h"

namespace v8 {
namespace internal {

// FastAsciiConvert tries to do character processing on a word_t basis if
// source and destination strings are properly aligned. Natural alignment of
// string data depends on kTaggedSize so we define word_t via Tagged_t.
using word_t = std::make_unsigned<Tagged_t>::type;

const word_t kWordTAllBitsSet = std::numeric_limits<word_t>::max();
const word_t kOneInEveryByte = kWordTAllBitsSet / 0xFF;
const word_t kAsciiMask = kOneInEveryByte << 7;

#ifdef DEBUG
bool CheckFastAsciiConvert(char* dst, const char* src, int length, bool changed,
                           bool is_to_lower) {
  bool expected_changed = false;
  for (int i = 0; i < length; i++) {
    if (dst[i] == src[i]) continue;
    expected_changed = true;
    if (is_to_lower) {
      DCHECK('A' <= src[i] && src[i] <= 'Z');
      DCHECK(dst[i] == src[i] + ('a' - 'A'));
    } else {
      DCHECK('a' <= src[i] && src[i] <= 'z');
      DCHECK(dst[i] == src[i] - ('a' - 'A'));
    }
  }
  return (expected_changed == changed);
}
#endif

// Given a word and two range boundaries returns a word with high bit
// set in every byte iff the corresponding input byte was strictly in
// the range (m, n). All the other bits in the result are cleared.
// This function is only useful when it can be inlined and the
// boundaries are statically known.
// Requires: all bytes in the input word and the boundaries must be
// ASCII (less than 0x7F).
static inline word_t AsciiRangeMask(word_t w, char m, char n) {
  // Use strict inequalities since in edge cases the function could be
  // further simplified.
  DCHECK(0 < m && m < n);
  // Has high bit set in every w byte less than n.
  word_t tmp1 = kOneInEveryByte * (0x7F + n) - w;
  // Has high bit set in every w byte greater than m.
  word_t tmp2 = w + kOneInEveryByte * (0x7F - m);
  return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));
}

template <bool is_lower>
int FastAsciiConvert(char* dst, const char* src, int length,
                     bool* changed_out) {
#ifdef DEBUG
  char* saved_dst = dst;
#endif
  const char* saved_src = src;
  DisallowHeapAllocation no_gc;
  // We rely on the distance between upper and lower case letters
  // being a known power of 2.
  DCHECK_EQ('a' - 'A', 1 << 5);
  // Boundaries for the range of input characters than require conversion.
  static const char lo = is_lower ? 'A' - 1 : 'a' - 1;
  static const char hi = is_lower ? 'Z' + 1 : 'z' + 1;
  bool changed = false;
  const char* const limit = src + length;

  // dst is newly allocated and always aligned.
  DCHECK(IsAligned(reinterpret_cast<Address>(dst), sizeof(word_t)));
  // Only attempt processing one word at a time if src is also aligned.
  if (IsAligned(reinterpret_cast<Address>(src), sizeof(word_t))) {
    // Process the prefix of the input that requires no conversion one aligned
    // (machine) word at a time.
    while (src <= limit - sizeof(word_t)) {
      const word_t w = *reinterpret_cast<const word_t*>(src);
      if ((w & kAsciiMask) != 0) return static_cast<int>(src - saved_src);
      if (AsciiRangeMask(w, lo, hi) != 0) {
        changed = true;
        break;
      }
      *reinterpret_cast<word_t*>(dst) = w;
      src += sizeof(word_t);
      dst += sizeof(word_t);
    }
    // Process the remainder of the input performing conversion when
    // required one word at a time.
    while (src <= limit - sizeof(word_t)) {
      const word_t w = *reinterpret_cast<const word_t*>(src);
      if ((w & kAsciiMask) != 0) return static_cast<int>(src - saved_src);
      word_t m = AsciiRangeMask(w, lo, hi);
      // The mask has high (7th) bit set in every byte that needs
      // conversion and we know that the distance between cases is
      // 1 << 5.
      *reinterpret_cast<word_t*>(dst) = w ^ (m >> 2);
      src += sizeof(word_t);
      dst += sizeof(word_t);
    }
  }
  // Process the last few bytes of the input (or the whole input if
  // unaligned access is not supported).
  while (src < limit) {
    char c = *src;
    if ((c & kAsciiMask) != 0) return static_cast<int>(src - saved_src);
    if (lo < c && c < hi) {
      c ^= (1 << 5);
      changed = true;
    }
    *dst = c;
    ++src;
    ++dst;
  }

  DCHECK(
      CheckFastAsciiConvert(saved_dst, saved_src, length, changed, is_lower));

  *changed_out = changed;
  return length;
}

template int FastAsciiConvert<false>(char* dst, const char* src, int length,
                                     bool* changed_out);
template int FastAsciiConvert<true>(char* dst, const char* src, int length,
                                    bool* changed_out);

}  // namespace internal
}  // namespace v8