diff options
Diffstat (limited to 'deps/icu-small/source/i18n/rematch.cpp')
-rw-r--r-- | deps/icu-small/source/i18n/rematch.cpp | 144 |
1 files changed, 78 insertions, 66 deletions
diff --git a/deps/icu-small/source/i18n/rematch.cpp b/deps/icu-small/source/i18n/rematch.cpp index 3b8d2333d8..d9af2b3dda 100644 --- a/deps/icu-small/source/i18n/rematch.cpp +++ b/deps/icu-small/source/i18n/rematch.cpp @@ -2069,7 +2069,7 @@ int32_t RegexMatcher::split(UText *input, // if (U_FAILURE(status)) { return 0; - }; + } if (destCapacity < 1) { status = U_ILLEGAL_ARGUMENT_ERROR; @@ -3805,11 +3805,13 @@ GC_Done: case URX_LA_START: { - // Entering a lookahead block. + // Entering a look around block. // Save Stack Ptr, Input Pos. - U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); + U_ASSERT(opValue>=0 && opValue+3<fPattern->fDataSize); fData[opValue] = fStack->size(); fData[opValue+1] = fp->fInputIdx; + fData[opValue+2] = fActiveStart; + fData[opValue+3] = fActiveLimit; fActiveStart = fLookStart; // Set the match region change for fActiveLimit = fLookLimit; // transparent bounds. } @@ -3819,7 +3821,7 @@ GC_Done: { // Leaving a look-ahead block. // restore Stack Ptr, Input Pos to positions they had on entry to block. - U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); + U_ASSERT(opValue>=0 && opValue+3<fPattern->fDataSize); int32_t stackSize = fStack->size(); int32_t newStackSize =(int32_t)fData[opValue]; U_ASSERT(stackSize >= newStackSize); @@ -3839,8 +3841,10 @@ GC_Done: // Restore the active region bounds in the input string; they may have // been changed because of transparent bounds on a Region. - fActiveStart = fRegionStart; - fActiveLimit = fRegionLimit; + fActiveStart = fData[opValue+2]; + fActiveLimit = fData[opValue+3]; + U_ASSERT(fActiveStart >= 0); + U_ASSERT(fActiveLimit <= fInputLength); } break; @@ -3916,17 +3920,19 @@ GC_Done: case URX_LB_START: { // Entering a look-behind block. - // Save Stack Ptr, Input Pos. + // Save Stack Ptr, Input Pos and active input region. // TODO: implement transparent bounds. Ticket #6067 - U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); + U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize); fData[opValue] = fStack->size(); fData[opValue+1] = fp->fInputIdx; - // Init the variable containing the start index for attempted matches. - fData[opValue+2] = -1; // Save input string length, then reset to pin any matches to end at // the current position. + fData[opValue+2] = fActiveStart; fData[opValue+3] = fActiveLimit; + fActiveStart = fRegionStart; fActiveLimit = fp->fInputIdx; + // Init the variable containing the start index for attempted matches. + fData[opValue+4] = -1; } break; @@ -3949,8 +3955,8 @@ GC_Done: U_ASSERT(minML >= 0); // Fetch (from data) the last input index where a match was attempted. - U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); - int64_t &lbStartIdx = fData[opValue+2]; + U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize); + int64_t &lbStartIdx = fData[opValue+4]; if (lbStartIdx < 0) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; @@ -3976,10 +3982,10 @@ GC_Done: // getting a match. Backtrack out, and out of the // Look Behind altogether. fp = (REStackFrame *)fStack->popFrame(fFrameSize); - int64_t restoreInputLen = fData[opValue+3]; - U_ASSERT(restoreInputLen >= fActiveLimit); - U_ASSERT(restoreInputLen <= fInputLength); - fActiveLimit = restoreInputLen; + fActiveStart = fData[opValue+2]; + fActiveLimit = fData[opValue+3]; + U_ASSERT(fActiveStart >= 0); + U_ASSERT(fActiveLimit <= fInputLength); break; } @@ -3993,7 +3999,7 @@ GC_Done: case URX_LB_END: // End of a look-behind block, after a successful match. { - U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); + U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize); if (fp->fInputIdx != fActiveLimit) { // The look-behind expression matched, but the match did not // extend all the way to the point that we are looking behind from. @@ -4004,13 +4010,13 @@ GC_Done: break; } - // Look-behind match is good. Restore the orignal input string length, + // Look-behind match is good. Restore the orignal input string region, // which had been truncated to pin the end of the lookbehind match to the // position being looked-behind. - int64_t originalInputLen = fData[opValue+3]; - U_ASSERT(originalInputLen >= fActiveLimit); - U_ASSERT(originalInputLen <= fInputLength); - fActiveLimit = originalInputLen; + fActiveStart = fData[opValue+2]; + fActiveLimit = fData[opValue+3]; + U_ASSERT(fActiveStart >= 0); + U_ASSERT(fActiveLimit <= fInputLength); } break; @@ -4035,8 +4041,8 @@ GC_Done: U_ASSERT(continueLoc > fp->fPatIdx); // Fetch (from data) the last input index where a match was attempted. - U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); - int64_t &lbStartIdx = fData[opValue+2]; + U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize); + int64_t &lbStartIdx = fData[opValue+4]; if (lbStartIdx < 0) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; @@ -4061,10 +4067,10 @@ GC_Done: // We have tried all potential match starting points without // getting a match, which means that the negative lookbehind as // a whole has succeeded. Jump forward to the continue location - int64_t restoreInputLen = fData[opValue+3]; - U_ASSERT(restoreInputLen >= fActiveLimit); - U_ASSERT(restoreInputLen <= fInputLength); - fActiveLimit = restoreInputLen; + fActiveStart = fData[opValue+2]; + fActiveLimit = fData[opValue+3]; + U_ASSERT(fActiveStart >= 0); + U_ASSERT(fActiveLimit <= fInputLength); fp->fPatIdx = continueLoc; break; } @@ -4079,7 +4085,7 @@ GC_Done: case URX_LBN_END: // End of a negative look-behind block, after a successful match. { - U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); + U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize); if (fp->fInputIdx != fActiveLimit) { // The look-behind expression matched, but the match did not // extend all the way to the point that we are looking behind from. @@ -4096,10 +4102,10 @@ GC_Done: // Restore the orignal input string length, which had been truncated // inorder to pin the end of the lookbehind match // to the position being looked-behind. - int64_t originalInputLen = fData[opValue+3]; - U_ASSERT(originalInputLen >= fActiveLimit); - U_ASSERT(originalInputLen <= fInputLength); - fActiveLimit = originalInputLen; + fActiveStart = fData[opValue+2]; + fActiveLimit = fData[opValue+3]; + U_ASSERT(fActiveStart >= 0); + U_ASSERT(fActiveLimit <= fInputLength); // Restore original stack position, discarding any state saved // by the successful pattern match. @@ -5336,11 +5342,13 @@ GC_Done: case URX_LA_START: { - // Entering a lookahead block. + // Entering a look around block. // Save Stack Ptr, Input Pos. - U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); + U_ASSERT(opValue>=0 && opValue+3<fPattern->fDataSize); fData[opValue] = fStack->size(); fData[opValue+1] = fp->fInputIdx; + fData[opValue+2] = fActiveStart; + fData[opValue+3] = fActiveLimit; fActiveStart = fLookStart; // Set the match region change for fActiveLimit = fLookLimit; // transparent bounds. } @@ -5348,9 +5356,9 @@ GC_Done: case URX_LA_END: { - // Leaving a look-ahead block. + // Leaving a look around block. // restore Stack Ptr, Input Pos to positions they had on entry to block. - U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); + U_ASSERT(opValue>=0 && opValue+3<fPattern->fDataSize); int32_t stackSize = fStack->size(); int32_t newStackSize = (int32_t)fData[opValue]; U_ASSERT(stackSize >= newStackSize); @@ -5370,8 +5378,10 @@ GC_Done: // Restore the active region bounds in the input string; they may have // been changed because of transparent bounds on a Region. - fActiveStart = fRegionStart; - fActiveLimit = fRegionLimit; + fActiveStart = fData[opValue+2]; + fActiveLimit = fData[opValue+3]; + U_ASSERT(fActiveStart >= 0); + U_ASSERT(fActiveLimit <= fInputLength); } break; @@ -5434,17 +5444,19 @@ GC_Done: case URX_LB_START: { // Entering a look-behind block. - // Save Stack Ptr, Input Pos. + // Save Stack Ptr, Input Pos and active input region. // TODO: implement transparent bounds. Ticket #6067 - U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); + U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize); fData[opValue] = fStack->size(); fData[opValue+1] = fp->fInputIdx; - // Init the variable containing the start index for attempted matches. - fData[opValue+2] = -1; // Save input string length, then reset to pin any matches to end at // the current position. + fData[opValue+2] = fActiveStart; fData[opValue+3] = fActiveLimit; + fActiveStart = fRegionStart; fActiveLimit = fp->fInputIdx; + // Init the variable containing the start index for attempted matches. + fData[opValue+4] = -1; } break; @@ -5462,8 +5474,8 @@ GC_Done: U_ASSERT(minML >= 0); // Fetch (from data) the last input index where a match was attempted. - U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); - int64_t &lbStartIdx = fData[opValue+2]; + U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize); + int64_t &lbStartIdx = fData[opValue+4]; if (lbStartIdx < 0) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; @@ -5485,10 +5497,10 @@ GC_Done: // getting a match. Backtrack out, and out of the // Look Behind altogether. fp = (REStackFrame *)fStack->popFrame(fFrameSize); - int64_t restoreInputLen = fData[opValue+3]; - U_ASSERT(restoreInputLen >= fActiveLimit); - U_ASSERT(restoreInputLen <= fInputLength); - fActiveLimit = restoreInputLen; + fActiveStart = fData[opValue+2]; + fActiveLimit = fData[opValue+3]; + U_ASSERT(fActiveStart >= 0); + U_ASSERT(fActiveLimit <= fInputLength); break; } @@ -5502,7 +5514,7 @@ GC_Done: case URX_LB_END: // End of a look-behind block, after a successful match. { - U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); + U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize); if (fp->fInputIdx != fActiveLimit) { // The look-behind expression matched, but the match did not // extend all the way to the point that we are looking behind from. @@ -5513,13 +5525,13 @@ GC_Done: break; } - // Look-behind match is good. Restore the orignal input string length, + // Look-behind match is good. Restore the orignal input string region, // which had been truncated to pin the end of the lookbehind match to the // position being looked-behind. - int64_t originalInputLen = fData[opValue+3]; - U_ASSERT(originalInputLen >= fActiveLimit); - U_ASSERT(originalInputLen <= fInputLength); - fActiveLimit = originalInputLen; + fActiveStart = fData[opValue+2]; + fActiveLimit = fData[opValue+3]; + U_ASSERT(fActiveStart >= 0); + U_ASSERT(fActiveLimit <= fInputLength); } break; @@ -5539,8 +5551,8 @@ GC_Done: U_ASSERT(continueLoc > fp->fPatIdx); // Fetch (from data) the last input index where a match was attempted. - U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); - int64_t &lbStartIdx = fData[opValue+2]; + U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize); + int64_t &lbStartIdx = fData[opValue+4]; if (lbStartIdx < 0) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; @@ -5561,10 +5573,10 @@ GC_Done: // We have tried all potential match starting points without // getting a match, which means that the negative lookbehind as // a whole has succeeded. Jump forward to the continue location - int64_t restoreInputLen = fData[opValue+3]; - U_ASSERT(restoreInputLen >= fActiveLimit); - U_ASSERT(restoreInputLen <= fInputLength); - fActiveLimit = restoreInputLen; + fActiveStart = fData[opValue+2]; + fActiveLimit = fData[opValue+3]; + U_ASSERT(fActiveStart >= 0); + U_ASSERT(fActiveLimit <= fInputLength); fp->fPatIdx = continueLoc; break; } @@ -5579,7 +5591,7 @@ GC_Done: case URX_LBN_END: // End of a negative look-behind block, after a successful match. { - U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); + U_ASSERT(opValue>=0 && opValue+4<fPattern->fDataSize); if (fp->fInputIdx != fActiveLimit) { // The look-behind expression matched, but the match did not // extend all the way to the point that we are looking behind from. @@ -5596,10 +5608,10 @@ GC_Done: // Restore the orignal input string length, which had been truncated // inorder to pin the end of the lookbehind match // to the position being looked-behind. - int64_t originalInputLen = fData[opValue+3]; - U_ASSERT(originalInputLen >= fActiveLimit); - U_ASSERT(originalInputLen <= fInputLength); - fActiveLimit = originalInputLen; + fActiveStart = fData[opValue+2]; + fActiveLimit = fData[opValue+3]; + U_ASSERT(fActiveStart >= 0); + U_ASSERT(fActiveLimit <= fInputLength); // Restore original stack position, discarding any state saved // by the successful pattern match. |