summaryrefslogtreecommitdiff
path: root/deps/icu-small/source/common/unicode/filteredbrk.h
blob: 2444114e9a14bfd47f3b12f5ea2e91a75e63703e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
********************************************************************************
*   Copyright (C) 1997-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
********************************************************************************
*/

#ifndef FILTEREDBRK_H
#define FILTEREDBRK_H

#include "unicode/utypes.h"
#include "unicode/brkiter.h"

#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION

U_NAMESPACE_BEGIN

/**
 * \file
 * \brief C++ API: FilteredBreakIteratorBuilder
 */

/**
 * The BreakIteratorFilter is used to modify the behavior of a BreakIterator
 *  by constructing a new BreakIterator which suppresses certain segment boundaries.
 *  See  http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions .
 *  For example, a typical English Sentence Break Iterator would break on the space
 *  in the string "Mr. Smith" (resulting in two segments),
 *  but with "Mr." as an exception, a filtered break iterator
 *  would consider the string "Mr. Smith" to be a single segment.
 *
 * @stable ICU 56
 */
class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
 public:
  /**
   *  destructor.
   * @stable ICU 56
   */
  virtual ~FilteredBreakIteratorBuilder();

  /**
   * Construct a FilteredBreakIteratorBuilder based on rules in a locale.
   * The rules are taken from CLDR exception data for the locale,
   *  see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions
   *  This is the equivalent of calling createInstance(UErrorCode&)
   *    and then repeatedly calling addNoBreakAfter(...) with the contents
   *    of the CLDR exception data.
   * @param where the locale.
   * @param status The error code.
   * @return the new builder
   * @stable ICU 56
   */
  static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status);

#ifndef U_HIDE_DEPRECATED_API
  /**
   * This function has been deprecated in favor of createEmptyInstance, which has
   * identical behavior.
   * @param status The error code.
   * @return the new builder
   * @deprecated ICU 60 use createEmptyInstance instead
   * @see createEmptyInstance()
   */
  static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status);
#endif  /* U_HIDE_DEPRECATED_API */

  /**
   * Construct an empty FilteredBreakIteratorBuilder.
   * In this state, it will not suppress any segment boundaries.
   * @param status The error code.
   * @return the new builder
   * @stable ICU 60
   */
  static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status);

  /**
   * Suppress a certain string from being the end of a segment.
   * For example, suppressing "Mr.", then segments ending in "Mr." will not be returned
   * by the iterator.
   * @param string the string to suppress, such as "Mr."
   * @param status error code
   * @return returns TRUE if the string was not present and now added,
   * FALSE if the call was a no-op because the string was already being suppressed.
   * @stable ICU 56
   */
  virtual UBool suppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;

  /**
   * Stop suppressing a certain string from being the end of the segment.
   * This function does not create any new segment boundaries, but only serves to un-do
   * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of
   * locale data which may be suppressing certain strings.
   * @param string the exception to remove
   * @param status error code
   * @return returns TRUE if the string was present and now removed,
   * FALSE if the call was a no-op because the string was not being suppressed.
   * @stable ICU 56
   */
  virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;

  /**
   * This function has been deprecated in favor of wrapIteratorWithFilter()
   * The behavior is identical.
   * @param adoptBreakIterator the break iterator to adopt
   * @param status error code
   * @return the new BreakIterator, owned by the caller.
   * @deprecated ICU 60 use wrapIteratorWithFilter() instead
   * @see wrapBreakIteratorWithFilter()
   */
  virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;

  /**
   * Wrap (adopt) an existing break iterator in a new filtered instance.
   * The resulting BreakIterator is owned by the caller.
   * The BreakIteratorFilter may be destroyed before the BreakIterator is destroyed.
   * Note that the adoptBreakIterator is adopted by the new BreakIterator
   * and should no longer be used by the caller.
   * The FilteredBreakIteratorBuilder may be reused.
   * This function is an alias for build()
   * @param adoptBreakIterator the break iterator to adopt
   * @param status error code
   * @return the new BreakIterator, owned by the caller.
   * @stable ICU 60
   */
  inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) {
    return build(adoptBreakIterator, status);
  }

 protected:
  /**
   * For subclass use
   * @stable ICU 56
   */
  FilteredBreakIteratorBuilder();
};


U_NAMESPACE_END

#endif // #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION

#endif // #ifndef FILTEREDBRK_H