diff options
Diffstat (limited to 'deps/icu-small/source/i18n/regexcmp.cpp')
-rw-r--r-- | deps/icu-small/source/i18n/regexcmp.cpp | 64 |
1 files changed, 35 insertions, 29 deletions
diff --git a/deps/icu-small/source/i18n/regexcmp.cpp b/deps/icu-small/source/i18n/regexcmp.cpp index 8d60986fd3..3a6368b07a 100644 --- a/deps/icu-small/source/i18n/regexcmp.cpp +++ b/deps/icu-small/source/i18n/regexcmp.cpp @@ -561,7 +561,7 @@ UBool RegexCompile::doParseActions(int32_t action) // sequence; don't change without making updates there too. // // Compiles to - // 1 START_LA dataLoc Saves SP, Input Pos + // 1 LA_START dataLoc Saves SP, Input Pos, Active input region. // 2. STATE_SAVE 4 on failure of lookahead, goto 4 // 3 JMP 6 continue ... // @@ -575,10 +575,14 @@ UBool RegexCompile::doParseActions(int32_t action) // 8. code for parenthesized stuff. // 9. LA_END // - // Two data slots are reserved, for saving the stack ptr and the input position. + // Four data slots are reserved, for saving state on entry to the look-around + // 0: stack pointer on entry. + // 1: input position on entry. + // 2: fActiveStart, the active bounds start on entry. + // 3: fActiveLimit, the active bounds limit on entry. { fixLiterals(); - int32_t dataLoc = allocateData(2); + int32_t dataLoc = allocateData(4); appendOp(URX_LA_START, dataLoc); appendOp(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+ 2); appendOp(URX_JMP, fRXPat->fCompiledPat->size()+ 3); @@ -599,18 +603,23 @@ UBool RegexCompile::doParseActions(int32_t action) case doOpenLookAheadNeg: // Negated Lookahead. (?! stuff ) // Compiles to - // 1. START_LA dataloc + // 1. LA_START dataloc // 2. SAVE_STATE 7 // Fail within look-ahead block restores to this state, // // which continues with the match. // 3. NOP // Std. Open Paren sequence, for possible '|' // 4. code for parenthesized stuff. - // 5. END_LA // Cut back stack, remove saved state from step 2. + // 5. LA_END // Cut back stack, remove saved state from step 2. // 6. BACKTRACK // code in block succeeded, so neg. lookahead fails. // 7. END_LA // Restore match region, in case look-ahead was using // an alternate (transparent) region. + // Four data slots are reserved, for saving state on entry to the look-around + // 0: stack pointer on entry. + // 1: input position on entry. + // 2: fActiveStart, the active bounds start on entry. + // 3: fActiveLimit, the active bounds limit on entry. { fixLiterals(); - int32_t dataLoc = allocateData(2); + int32_t dataLoc = allocateData(4); appendOp(URX_LA_START, dataLoc); appendOp(URX_STATE_SAVE, 0); // dest address will be patched later. appendOp(URX_NOP, 0); @@ -644,14 +653,16 @@ UBool RegexCompile::doParseActions(int32_t action) // Allocate a block of matcher data, to contain (when running a match) // 0: Stack ptr on entry // 1: Input Index on entry - // 2: Start index of match current match attempt. - // 3: Original Input String len. + // 2: fActiveStart, the active bounds start on entry. + // 3: fActiveLimit, the active bounds limit on entry. + // 4: Start index of match current match attempt. + // The first four items must match the layout of data for LA_START / LA_END // Generate match code for any pending literals. fixLiterals(); // Allocate data space - int32_t dataLoc = allocateData(4); + int32_t dataLoc = allocateData(5); // Emit URX_LB_START appendOp(URX_LB_START, dataLoc); @@ -696,14 +707,16 @@ UBool RegexCompile::doParseActions(int32_t action) // Allocate a block of matcher data, to contain (when running a match) // 0: Stack ptr on entry // 1: Input Index on entry - // 2: Start index of match current match attempt. - // 3: Original Input String len. + // 2: fActiveStart, the active bounds start on entry. + // 3: fActiveLimit, the active bounds limit on entry. + // 4: Start index of match current match attempt. + // The first four items must match the layout of data for LA_START / LA_END // Generate match code for any pending literals. fixLiterals(); // Allocate data space - int32_t dataLoc = allocateData(4); + int32_t dataLoc = allocateData(5); // Emit URX_LB_START appendOp(URX_LB_START, dataLoc); @@ -2285,7 +2298,7 @@ void RegexCompile::handleCloseParen() { error(U_REGEX_LOOK_BEHIND_LIMIT); break; } - if (minML == INT32_MAX && maxML == 0) { + if (minML == INT32_MAX) { // This condition happens when no match is possible, such as with a // [set] expression containing no elements. // In principle, the generated code to evaluate the expression could be deleted, @@ -2328,7 +2341,7 @@ void RegexCompile::handleCloseParen() { error(U_REGEX_LOOK_BEHIND_LIMIT); break; } - if (minML == INT32_MAX && maxML == 0) { + if (minML == INT32_MAX) { // This condition happens when no match is possible, such as with a // [set] expression containing no elements. // In principle, the generated code to evaluate the expression could be deleted, @@ -3381,7 +3394,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { // it assumes that the look-ahead match might be zero-length. // TODO: Positive lookahead could recursively do the block, then continue // with the longer of the block or the value coming in. Ticket 6060 - int32_t depth = (opType == URX_LA_START? 2: 1);; + int32_t depth = (opType == URX_LA_START? 2: 1); for (;;) { loc++; op = (int32_t)fRXPat->fCompiledPat->elementAti(loc); @@ -3463,7 +3476,6 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) { U_ASSERT(start <= end); U_ASSERT(end < fRXPat->fCompiledPat->size()); - int32_t loc; int32_t op; int32_t opType; @@ -3672,7 +3684,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) { case URX_CTR_LOOP: case URX_CTR_LOOP_NG: - // These opcodes will be skipped over by code for URX_CRT_INIT. + // These opcodes will be skipped over by code for URX_CTR_INIT. // We shouldn't encounter them here. UPRV_UNREACHABLE; @@ -3700,21 +3712,15 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) { { // Look-behind. Scan forward until the matching look-around end, // without processing the look-behind block. - int32_t depth = 0; - for (;;) { - loc++; + int32_t dataLoc = URX_VAL(op); + for (loc = loc + 1; loc < end; ++loc) { op = (int32_t)fRXPat->fCompiledPat->elementAti(loc); - if (URX_TYPE(op) == URX_LA_START || URX_TYPE(op) == URX_LB_START) { - depth++; - } - if (URX_TYPE(op) == URX_LA_END || URX_TYPE(op)==URX_LBN_END) { - if (depth == 0) { - break; - } - depth--; + int32_t opType = URX_TYPE(op); + if ((opType == URX_LA_END || opType == URX_LBN_END) && (URX_VAL(op) == dataLoc)) { + break; } - U_ASSERT(loc < end); } + U_ASSERT(loc < end); } break; |