summaryrefslogtreecommitdiff
path: root/deps/icu-small/source/common/unicode/edits.h
diff options
context:
space:
mode:
Diffstat (limited to 'deps/icu-small/source/common/unicode/edits.h')
-rw-r--r--deps/icu-small/source/common/unicode/edits.h150
1 files changed, 129 insertions, 21 deletions
diff --git a/deps/icu-small/source/common/unicode/edits.h b/deps/icu-small/source/common/unicode/edits.h
index 5a72574c14..f767a8d3b4 100644
--- a/deps/icu-small/source/common/unicode/edits.h
+++ b/deps/icu-small/source/common/unicode/edits.h
@@ -17,10 +17,57 @@
U_NAMESPACE_BEGIN
+class UnicodeString;
+
/**
- * Records lengths of string edits but not replacement text.
- * Supports replacements, insertions, deletions in linear progression.
- * Does not support moving/reordering of text.
+ * Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions
+ * in linear progression. Does not support moving/reordering of text.
+ *
+ * There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
+ * instances of this class using {@link #addReplace(int, int)} (for change edits) and
+ * {@link #addUnchanged(int)} (for no-change edits). Change edits are retained with full granularity,
+ * whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
+ * mapping between code points in the source and destination strings.
+ *
+ * After all edits have been added, instances of this class should be considered immutable, and an
+ * {@link Edits::Iterator} can be used for queries.
+ *
+ * There are four flavors of Edits::Iterator:
+ *
+ * <ul>
+ * <li>{@link #getFineIterator()} retains full granularity of change edits.
+ * <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling
+ * next() on the iterator, skips over no-change edits (unchanged regions).
+ * <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change
+ * edits are automatically merged during the construction phase.)
+ * <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when
+ * calling next() on the iterator, skips over no-change edits (unchanged regions).
+ * </ul>
+ *
+ * For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the
+ * following fine edits:
+ * <ul>
+ * <li>abc ⇨ abc (no-change)
+ * <li>ß ⇨ ss (change)
+ * <li>D ⇨ d (change)
+ * <li>e ⇨ e (no-change)
+ * <li>F ⇨ f (change)
+ * </ul>
+ * and the following coarse edits (note how adjacent change edits get merged together):
+ * <ul>
+ * <li>abc ⇨ abc (no-change)
+ * <li>ßD ⇨ ssd (change)
+ * <li>e ⇨ e (no-change)
+ * <li>F ⇨ f (change)
+ * </ul>
+ *
+ * The "fine changes" and "coarse changes" iterators will step through only the change edits when their
+ * {@link Edits::Iterator#next()} methods are called. They are identical to the non-change iterators when
+ * their {@link Edits::Iterator#findSourceIndex(int)} or {@link Edits::Iterator#findDestinationIndex(int)}
+ * methods are used to walk through the string.
+ *
+ * For examples of how to use this class, see the test <code>TestCaseMapEditsIteratorDocs</code> in
+ * UCharacterCaseTest.java.
*
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
@@ -91,13 +138,13 @@ public:
void reset() U_NOEXCEPT;
/**
- * Adds a record for an unchanged segment of text.
+ * Adds a no-change edit: a record for an unchanged segment of text.
* Normally called from inside ICU string transformation functions, not user code.
* @stable ICU 59
*/
void addUnchanged(int32_t unchangedLength);
/**
- * Adds a record for a text replacement/insertion/deletion.
+ * Adds a change edit: a record for a text replacement/insertion/deletion.
* Normally called from inside ICU string transformation functions, not user code.
* @stable ICU 59
*/
@@ -136,6 +183,18 @@ public:
/**
* Access to the list of edits.
+ *
+ * At any moment in time, an instance of this class points to a single edit: a "window" into a span
+ * of the source string and the corresponding span of the destination string. The source string span
+ * starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
+ * span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
+ *
+ * The iterator can be moved between edits using the {@link #next()}, {@link #findSourceIndex(int)},
+ * and {@link #findDestinationIndex(int)} methods. Calling any of these methods mutates the iterator
+ * to make it point to the corresponding edit.
+ *
+ * For more information, see the documentation for {@link Edits}.
+ *
* @see getCoarseIterator
* @see getFineIterator
* @stable ICU 59
@@ -162,7 +221,7 @@ public:
Iterator &operator=(const Iterator &other) = default;
/**
- * Advances to the next edit.
+ * Advances the iterator to the next edit.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
@@ -172,9 +231,9 @@ public:
UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
/**
- * Finds the edit that contains the source index.
- * The source index may be found in a non-change
- * even if normal iteration would skip non-changes.
+ * Moves the iterator to the edit that contains the source index.
+ * The source index may be found in a no-change edit
+ * even if normal iteration would skip no-change edits.
* Normal iteration can continue from a found edit.
*
* The iterator state before this search logically does not matter.
@@ -196,9 +255,9 @@ public:
#ifndef U_HIDE_DRAFT_API
/**
- * Finds the edit that contains the destination index.
- * The destination index may be found in a non-change
- * even if normal iteration would skip non-changes.
+ * Moves the iterator to the edit that contains the destination index.
+ * The destination index may be found in a no-change edit
+ * even if normal iteration would skip no-change edits.
* Normal iteration can continue from a found edit.
*
* The iterator state before this search logically does not matter.
@@ -219,7 +278,7 @@ public:
}
/**
- * Returns the destination index corresponding to the given source index.
+ * Computes the destination index corresponding to the given source index.
* If the source index is inside a change edit (not at its start),
* then the destination index at the end of that edit is returned,
* since there is no information about index mapping inside a change edit.
@@ -243,7 +302,7 @@ public:
int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
/**
- * Returns the source index corresponding to the given destination index.
+ * Computes the source index corresponding to the given destination index.
* If the destination index is inside a change edit (not at its start),
* then the source index at the end of that edit is returned,
* since there is no information about index mapping inside a change edit.
@@ -268,17 +327,27 @@ public:
#endif // U_HIDE_DRAFT_API
/**
+ * Returns whether the edit currently represented by the iterator is a change edit.
+ *
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
* FALSE if oldLength units remain unchanged.
* @stable ICU 59
*/
UBool hasChange() const { return changed; }
+
/**
+ * The length of the current span in the source string, which starts at {@link #sourceIndex}.
+ *
* @return the number of units in the original string which are replaced or remain unchanged.
* @stable ICU 59
*/
int32_t oldLength() const { return oldLength_; }
+
/**
+ * The length of the current span in the destination string, which starts at
+ * {@link #destinationIndex}, or in the replacement string, which starts at
+ * {@link #replacementIndex}.
+ *
* @return the number of units in the modified string, if hasChange() is TRUE.
* Same as oldLength if hasChange() is FALSE.
* @stable ICU 59
@@ -286,22 +355,52 @@ public:
int32_t newLength() const { return newLength_; }
/**
+ * The start index of the current span in the source string; the span has length
+ * {@link #oldLength}.
+ *
* @return the current index into the source string
* @stable ICU 59
*/
int32_t sourceIndex() const { return srcIndex; }
+
/**
+ * The start index of the current span in the replacement string; the span has length
+ * {@link #newLength}. Well-defined only if the current edit is a change edit.
+ * <p>
+ * The <em>replacement string</em> is the concatenation of all substrings of the destination
+ * string corresponding to change edits.
+ * <p>
+ * This method is intended to be used together with operations that write only replacement
+ * characters (e.g., {@link CaseMap#omitUnchangedText()}). The source string can then be modified
+ * in-place.
+ *
* @return the current index into the replacement-characters-only string,
* not counting unchanged spans
* @stable ICU 59
*/
- int32_t replacementIndex() const { return replIndex; }
+ int32_t replacementIndex() const {
+ // TODO: Throw an exception if we aren't in a change edit?
+ return replIndex;
+ }
+
/**
+ * The start index of the current span in the destination string; the span has length
+ * {@link #newLength}.
+ *
* @return the current index into the full destination string
* @stable ICU 59
*/
int32_t destinationIndex() const { return destIndex; }
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * A string representation of the current edit represented by the iterator for debugging. You
+ * should not depend on the contents of the return string.
+ * @internal
+ */
+ UnicodeString& toString(UnicodeString& appendTo) const;
+#endif // U_HIDE_INTERNAL_API
+
private:
friend class Edits;
@@ -330,8 +429,10 @@ public:
};
/**
- * Returns an Iterator for coarse-grained changes for simple string updates.
- * Skips non-changes.
+ * Returns an Iterator for coarse-grained change edits
+ * (adjacent change edits are treated as one).
+ * Can be used to perform simple string updates.
+ * Skips no-change edits.
* @return an Iterator that merges adjacent changes.
* @stable ICU 59
*/
@@ -340,7 +441,10 @@ public:
}
/**
- * Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
+ * Returns an Iterator for coarse-grained change and no-change edits
+ * (adjacent change edits are treated as one).
+ * Can be used to perform simple string updates.
+ * Adjacent change edits are treated as one edit.
* @return an Iterator that merges adjacent changes.
* @stable ICU 59
*/
@@ -349,8 +453,10 @@ public:
}
/**
- * Returns an Iterator for fine-grained changes for modifying styled text.
- * Skips non-changes.
+ * Returns an Iterator for fine-grained change edits
+ * (full granularity of change edits is retained).
+ * Can be used for modifying styled text.
+ * Skips no-change edits.
* @return an Iterator that separates adjacent changes.
* @stable ICU 59
*/
@@ -359,7 +465,9 @@ public:
}
/**
- * Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
+ * Returns an Iterator for fine-grained change and no-change edits
+ * (full granularity of change edits is retained).
+ * Can be used for modifying styled text.
* @return an Iterator that separates adjacent changes.
* @stable ICU 59
*/