summaryrefslogtreecommitdiff
path: root/deps/v8/src/builtins/regexp-replace.tq
blob: f13724b476ce5d3f07f6920b398fe86d7c48cd06 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include 'src/builtins/builtins-regexp-gen.h'

namespace regexp {

  extern builtin
  StringIndexOf(implicit context: Context)(String, String, Smi): Smi;
  extern builtin
  SubString(implicit context: Context)(String, Smi, Smi): String;

  extern runtime RegExpExecMultiple(implicit context: Context)(
      JSRegExp, String, RegExpMatchInfo, JSArray): Null | JSArray;
  extern transitioning runtime
  RegExpReplaceRT(Context, JSReceiver, String, Object): String;
  extern transitioning runtime
  StringBuilderConcat(implicit context: Context)(JSArray, Smi, String): String;
  extern transitioning runtime
  StringReplaceNonGlobalRegExpWithFunction(implicit context: Context)(
      String, JSRegExp, Callable): String;

  extern macro
  RegExpBuiltinsAssembler::AdvanceStringIndexFast(String, Smi, bool): Smi;

  transitioning macro RegExpReplaceCallableNoExplicitCaptures(implicit context:
                                                                  Context)(
      matchesElements: FixedArray, matchesLength: intptr, string: String,
      replaceFn: Callable) {
    let matchStart: Smi = 0;
    for (let i: intptr = 0; i < matchesLength; i++) {
      typeswitch (matchesElements.objects[i]) {
        // Element represents a slice.
        case (elSmi: Smi): {
          // The slice's match start and end is either encoded as one or two
          // smis. A positive smi indicates a single smi encoding (see
          // ReplacementStringBuilder::AddSubjectSlice()).
          if (elSmi > 0) {
            // For single smi encoding, see
            // StringBuilderSubstringLength::encode() and
            // StringBuilderSubstringPosition::encode().
            const elInt: intptr = Convert<intptr>(elSmi);
            const newMatchStart: intptr = (elInt >> 11) + (elInt & 0x7FF);
            matchStart = Convert<Smi>(newMatchStart);
          } else {
            // For two smi encoding, the length is negative followed by the
            // match start.
            const nextEl: Smi = UnsafeCast<Smi>(matchesElements.objects[++i]);
            matchStart = nextEl - elSmi;
          }
        }
        // Element represents the matched substring, which is then passed to the
        // replace function.
        case (elString: String): {
          const replacementObj: JSAny =
              Call(context, replaceFn, Undefined, elString, matchStart, string);
          const replacement: String = ToString_Inline(context, replacementObj);
          matchesElements.objects[i] = replacement;
          matchStart += elString.length_smi;
        }
        case (Object): deferred {
          unreachable;
        }
      }
    }
  }

  transitioning macro
  RegExpReplaceCallableWithExplicitCaptures(implicit context: Context)(
      matchesElements: FixedArray, matchesLength: intptr, replaceFn: Callable) {
    for (let i: intptr = 0; i < matchesLength; i++) {
      const elArray =
          Cast<JSArray>(matchesElements.objects[i]) otherwise continue;

      // The JSArray is expanded into the function args by Reflect.apply().
      // TODO(jgruber): Remove indirection through Call->ReflectApply.
      const replacementObj: JSAny = Call(
          context, GetReflectApply(), Undefined, replaceFn, Undefined, elArray);

      // Overwrite the i'th element in the results with the string
      // we got back from the callback function.
      matchesElements.objects[i] = ToString_Inline(context, replacementObj);
    }
  }

  transitioning macro RegExpReplaceFastGlobalCallable(implicit context:
                                                          Context)(
      regexp: FastJSRegExp, string: String, replaceFn: Callable): String {
    regexp.lastIndex = 0;

    const kInitialCapacity: Smi = 16;
    const kInitialLength: Smi = 0;
    const result: Null | JSArray = RegExpExecMultiple(
        regexp, string, GetRegExpLastMatchInfo(),
        AllocateJSArray(
            PACKED_ELEMENTS, GetFastPackedElementsJSArrayMap(),
            kInitialCapacity, kInitialLength));

    regexp.lastIndex = 0;

    // If no matches, return the subject string.
    if (result == Null) return string;

    const matches: JSArray = UnsafeCast<JSArray>(result);
    const matchesLength: Smi = Cast<Smi>(matches.length) otherwise unreachable;
    const matchesLengthInt: intptr = Convert<intptr>(matchesLength);
    const matchesElements: FixedArray =
        UnsafeCast<FixedArray>(matches.elements);

    // Reload last match info since it might have changed.
    const nofCaptures: Smi = GetRegExpLastMatchInfo().NumberOfCaptures();

    // If the number of captures is two then there are no explicit captures in
    // the regexp, just the implicit capture that captures the whole match. In
    // this case we can simplify quite a bit and end up with something faster.
    if (nofCaptures == 2) {
      RegExpReplaceCallableNoExplicitCaptures(
          matchesElements, matchesLengthInt, string, replaceFn);
    } else {
      RegExpReplaceCallableWithExplicitCaptures(
          matchesElements, matchesLengthInt, replaceFn);
    }

    return StringBuilderConcat(matches, matchesLength, string);
  }

  transitioning macro RegExpReplaceFastString(implicit context: Context)(
      regexp: FastJSRegExp, string: String, replaceString: String): String {
    // The fast path is reached only if {receiver} is an unmodified JSRegExp
    // instance, {replace_value} is non-callable, and ToString({replace_value})
    // does not contain '$', i.e. we're doing a simple string replacement.
    let result: String = kEmptyString;
    let lastMatchEnd: Smi = 0;
    let unicode: bool = false;
    const replaceLength: Smi = replaceString.length_smi;
    const global: bool = regexp.global;

    if (global) {
      unicode = regexp.unicode;
      regexp.lastIndex = 0;
    }

    while (true) {
      const match: RegExpMatchInfo =
          regexp::RegExpPrototypeExecBodyWithoutResultFast(regexp, string)
          otherwise break;
      const matchStart: Smi = match.GetStartOfCapture(0);
      const matchEnd: Smi = match.GetEndOfCapture(0);

      // TODO(jgruber): We could skip many of the checks that using SubString
      // here entails.
      result = result + SubString(string, lastMatchEnd, matchStart);
      lastMatchEnd = matchEnd;

      if (replaceLength != 0) result = result + replaceString;

      // Non-global case ends here after the first replacement.
      if (!global) break;

      // If match is the empty string, we have to increment lastIndex.
      if (matchEnd == matchStart) {
        regexp.lastIndex =
            AdvanceStringIndexFast(string, regexp.lastIndex, unicode);
      }
    }

    return result + SubString(string, lastMatchEnd, string.length_smi);
  }

  transitioning builtin RegExpReplace(implicit context: Context)(
      regexp: FastJSRegExp, string: String, replaceValue: JSAny): String {
    // TODO(pwong): Remove assert when all callers (StringPrototypeReplace) are
    // from Torque.
    assert(Is<FastJSRegExp>(regexp));

    // 2. Is {replace_value} callable?
    typeswitch (replaceValue) {
      case (replaceFn: Callable): {
        return regexp.global ?
            RegExpReplaceFastGlobalCallable(regexp, string, replaceFn) :
            StringReplaceNonGlobalRegExpWithFunction(string, regexp, replaceFn);
      }
      case (JSAny): {
        const stableRegexp: JSRegExp = regexp;
        const replaceString: String = ToString_Inline(context, replaceValue);

        try {
          // ToString(replaceValue) could potentially change the shape of the
          // RegExp object. Recheck that we are still on the fast path and bail
          // to runtime otherwise.
          const fastRegexp = Cast<FastJSRegExp>(stableRegexp) otherwise Runtime;
          if (StringIndexOf(
                  replaceString, SingleCharacterStringConstant('$'), 0) != -1) {
            goto Runtime;
          }

          return RegExpReplaceFastString(fastRegexp, string, replaceString);
        }
        label Runtime deferred {
          return RegExpReplaceRT(context, stableRegexp, string, replaceString);
        }
      }
    }
  }

  transitioning javascript builtin RegExpPrototypeReplace(
      js-implicit context: Context, receiver: JSAny)(...arguments): JSAny {
    const methodName: constexpr string = 'RegExp.prototype.@@replace';

    // RegExpPrototypeReplace is a bit of a beast - a summary of dispatch logic:
    //
    // if (!IsFastRegExp(receiver)) CallRuntime(RegExpReplace)
    // if (IsCallable(replace)) {
    //   if (IsGlobal(receiver)) {
    //     // Called 'fast-path' but contains several runtime calls.
    //     RegExpReplaceFastGlobalCallable()
    //   } else {
    //     CallRuntime(StringReplaceNonGlobalRegExpWithFunction)
    //   }
    // } else {
    //   if (replace.contains("$")) {
    //     CallRuntime(RegExpReplace)
    //   } else {
    //     RegExpReplaceFastString()
    //   }
    // }

    const string: JSAny = arguments[0];
    const replaceValue: JSAny = arguments[1];

    // Let rx be the this value.
    // If Type(rx) is not Object, throw a TypeError exception.
    const rx = Cast<JSReceiver>(receiver)
        otherwise ThrowTypeError(kIncompatibleMethodReceiver, methodName);

    // Let S be ? ToString(string).
    const s = ToString_Inline(context, string);

    // Fast-path checks: 1. Is the {receiver} an unmodified JSRegExp instance?
    try {
      const fastRx: FastJSRegExp = Cast<FastJSRegExp>(rx) otherwise Runtime;
      return RegExpReplace(fastRx, s, replaceValue);
    }
    label Runtime deferred {
      return RegExpReplaceRT(context, rx, s, replaceValue);
    }
  }

}