summaryrefslogtreecommitdiff
path: root/deps/icu-small/source/i18n/affixpatternparser.h
blob: 2105540a1aa7ba41daa99a25ca70609f1052a261 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
/*
*******************************************************************************
* Copyright (C) 2015, International Business Machines
* Corporation and others.  All Rights Reserved.
*******************************************************************************
* affixpatternparser.h
*
* created on: 2015jan06
* created by: Travis Keep
*/

#ifndef __AFFIX_PATTERN_PARSER_H__
#define __AFFIX_PATTERN_PARSER_H__

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING

#include "unicode/unistr.h"
#include "unicode/uobject.h"
#include "pluralaffix.h"

U_NAMESPACE_BEGIN

class PluralRules;
class FixedPrecision;
class DecimalFormatSymbols;

/**
 * A representation of the various forms of a particular currency according
 * to some locale and usage context.
 *
 * Includes the symbol, ISO code form, and long form(s) of the currency name
 * for each plural variation.
 */
class U_I18N_API CurrencyAffixInfo : public UMemory {
public:
    /**
     * Symbol is \u00a4; ISO form is \u00a4\u00a4;
     *  long form is \u00a4\u00a4\u00a4.
     */
    CurrencyAffixInfo();

    const UnicodeString &getSymbol() const { return fSymbol; }
    const UnicodeString &getISO() const { return fISO; }
    const PluralAffix &getLong() const { return fLong; }
    void setSymbol(const UnicodeString &symbol) {
        fSymbol = symbol;
        fIsDefault = FALSE;
    }
    void setISO(const UnicodeString &iso) {
        fISO = iso;
        fIsDefault = FALSE;
    }
    UBool
    equals(const CurrencyAffixInfo &other) const {
        return (fSymbol == other.fSymbol)
                && (fISO == other.fISO)
                && (fLong.equals(other.fLong))
                && (fIsDefault == other.fIsDefault);
    }

    /**
     * Intializes this instance.
     *
     * @param locale the locale for the currency forms.
     * @param rules The plural rules for the locale.
     * @param currency the null terminated, 3 character ISO code of the
     * currency. If NULL, resets this instance as if it were just created.
     * In this case, the first 2 parameters may be NULL as well.
     * @param status any error returned here.
     */
    void set(
            const char *locale, const PluralRules *rules,
            const UChar *currency, UErrorCode &status);

    /**
     * Returns true if this instance is the default. That is has no real
     * currency. For instance never initialized with set()
     * or reset with set(NULL, NULL, NULL, status).
     */
    UBool isDefault() const { return fIsDefault; }

    /**
     * Adjusts the precision used for a particular currency.
     * @param currency the null terminated, 3 character ISO code of the
     * currency.
     * @param usage the usage of the currency
     * @param precision min/max fraction digits and rounding increment
     *  adjusted.
     * @params status any error reported here.
     */
    static void adjustPrecision(
            const UChar *currency, const UCurrencyUsage usage,
            FixedPrecision &precision, UErrorCode &status);

private:
    /**
     * The symbol form of the currency.
     */
    UnicodeString fSymbol;

    /**
     * The ISO form of the currency, usually three letter abbreviation.
     */
    UnicodeString fISO;

    /**
     * The long forms of the currency keyed by plural variation.
     */
    PluralAffix fLong;

    UBool fIsDefault;

};

class AffixPatternIterator;

/**
 * A locale agnostic representation of an affix pattern.
 */
class U_I18N_API AffixPattern : public UMemory {
public:

    /**
     * The token types that can appear in an affix pattern.
     */
    enum ETokenType {
        kLiteral,
        kPercent,
        kPerMill,
        kCurrency,
        kNegative,
        kPositive
    };

    /**
     * An empty affix pattern.
     */
    AffixPattern()
            : tokens(), literals(), hasCurrencyToken(FALSE),
              hasPercentToken(FALSE), hasPermillToken(FALSE),  char32Count(0) {
    }

    /**
     * Adds a string literal to this affix pattern.
     */
    void addLiteral(const UChar *, int32_t start, int32_t len);

    /**
     * Adds a token to this affix pattern. t must not be kLiteral as
     * the addLiteral() method adds literals.
     * @param t the token type to add
     */
    void add(ETokenType t);

    /**
     * Adds a currency token with specific count to this affix pattern.
     * @param count the token count. Used to distinguish between
     *  one, two, or three currency symbols. Note that adding a currency
     *  token with count=2 (Use ISO code) is different than adding two
     *  currency tokens each with count=1 (two currency symbols).
     */
    void addCurrency(uint8_t count);

    /**
     * Makes this instance be an empty affix pattern.
     */
    void remove();

    /**
     * Provides an iterator over the tokens in this instance.
     * @param result this is initialized to point just before the
     *   first token of this instance. Caller must call nextToken()
     *   on the iterator once it is set up to have it actually point
     *   to the first token. This first call to nextToken() will return
     *   FALSE if the AffixPattern being iterated over is empty.
     * @return result
     */
    AffixPatternIterator &iterator(AffixPatternIterator &result) const;

    /**
     * Returns TRUE if this instance has currency tokens in it.
     */
    UBool usesCurrency() const {
        return hasCurrencyToken;
    }

    UBool usesPercent() const {
        return hasPercentToken;
    }

    UBool usesPermill() const {
        return hasPermillToken;
    }

    /**
     * Returns the number of code points a string of this instance
     * would have if none of the special tokens were escaped.
     * Used to compute the padding size.
     */
    int32_t countChar32() const {
        return char32Count;
    }

    /**
     * Appends other to this instance mutating this instance in place.
     * @param other The pattern appended to the end of this one.
     * @return a reference to this instance for chaining.
     */
    AffixPattern &append(const AffixPattern &other);

    /**
     * Converts this AffixPattern back into a user string.
     * It is the inverse of parseUserAffixString.
     */
    UnicodeString &toUserString(UnicodeString &appendTo) const;

    /**
     * Converts this AffixPattern back into a string.
     * It is the inverse of parseAffixString.
     */
    UnicodeString &toString(UnicodeString &appendTo) const;

    /**
     * Parses an affix pattern string appending it to an AffixPattern.
     * Parses affix pattern strings produced from using
     * DecimalFormatPatternParser to parse a format pattern. Affix patterns
     * include the positive prefix and suffix and the negative prefix
     * and suffix. This method expects affix patterns strings to be in the
     * same format that DecimalFormatPatternParser produces. Namely special
     * characters in the affix that correspond to a field type must be
     * prefixed with an apostrophe ('). These special character sequences
     * inluce minus (-), percent (%), permile (U+2030), plus (+),
     * short currency (U+00a4), medium currency (u+00a4 * 2),
     * long currency (u+a4 * 3), and apostrophe (')
     * (apostrophe does not correspond to a field type but has to be escaped
     * because it itself is the escape character).
     * Since the expansion of these special character
     * sequences is locale dependent, these sequences are not expanded in
     * an AffixPattern instance.
     * If these special characters are not prefixed with an apostrophe in
     * the affix pattern string, then they are treated verbatim just as
     * any other character. If an apostrophe prefixes a non special
     * character in the affix pattern, the apostrophe is simply ignored.
     *
     * @param affixStr the string from DecimalFormatPatternParser
     * @param appendTo parsed result appended here.
     * @param status any error parsing returned here.
     */
    static AffixPattern &parseAffixString(
            const UnicodeString &affixStr,
            AffixPattern &appendTo,
            UErrorCode &status);

    /**
     * Parses an affix pattern string appending it to an AffixPattern.
     * Parses affix pattern strings as the user would supply them.
     * In this function, quoting makes special characters like normal
     * characters whereas in parseAffixString, quoting makes special
     * characters special.
     *
     * @param affixStr the string from the user
     * @param appendTo parsed result appended here.
     * @param status any error parsing returned here.
     */
    static AffixPattern &parseUserAffixString(
            const UnicodeString &affixStr,
            AffixPattern &appendTo,
            UErrorCode &status);

    UBool equals(const AffixPattern &other) const {
        return (tokens == other.tokens)
                && (literals == other.literals)
                && (hasCurrencyToken == other.hasCurrencyToken)
                && (hasPercentToken == other.hasPercentToken)
                && (hasPermillToken == other.hasPermillToken)
                && (char32Count == other.char32Count);
    }

private:
    /*
     * Tokens stored here. Each UChar generally stands for one token. A
     * Each token is of form 'etttttttllllllll' llllllll is the length of
     * the token and ranges from 0-255. ttttttt is the token type and ranges
     * from 0-127. If e is set it means this is an extendo token (to be
     * described later). To accomodate token lengths above 255, each normal
     * token (e=0) can be followed by 0 or more extendo tokens (e=1) with
     * the same type. Right now only kLiteral Tokens have extendo tokens.
     * Each extendo token provides the next 8 higher bits for the length.
     * If a kLiteral token is followed by 2 extendo tokens then, then the
     * llllllll of the next extendo token contains bits 8-15 of the length
     * and the last extendo token contains bits 16-23 of the length.
     */
    UnicodeString tokens;

    /*
     * The characters of the kLiteral tokens are concatenated together here.
     * The first characters go with the first kLiteral token, the next
     * characters go with the next kLiteral token etc.
     */
    UnicodeString literals;
    UBool hasCurrencyToken;
    UBool hasPercentToken;
    UBool hasPermillToken;
    int32_t char32Count;
    void add(ETokenType t, uint8_t count);

};

/**
 * An iterator over the tokens in an AffixPattern instance.
 */
class U_I18N_API AffixPatternIterator : public UMemory {
public:

    /**
     * Using an iterator without first calling iterator on an AffixPattern
     * instance to initialize the iterator results in
     * undefined behavior.
     */
    AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { }
    /**
     * Advances this iterator to the next token. Returns FALSE when there
     * are no more tokens. Calling the other methods after nextToken()
     * returns FALSE results in undefined behavior.
     */
    UBool nextToken();

    /**
     * Returns the type of token.
     */
    AffixPattern::ETokenType getTokenType() const;

    /**
     * For literal tokens, returns the literal string. Calling this for
     * other token types results in undefined behavior.
     * @param result replaced with a read-only alias to the literal string.
     * @return result
     */
    UnicodeString &getLiteral(UnicodeString &result) const;

    /**
     * Returns the token length. Usually 1, but for currency tokens may
     * be 2 for ISO code and 3 for long form.
     */
    int32_t getTokenLength() const;
private:
    int32_t nextLiteralIndex;
    int32_t lastLiteralLength;
    int32_t nextTokenIndex;
    const UnicodeString *tokens;
    const UnicodeString *literals;
    friend class AffixPattern;
    AffixPatternIterator(const AffixPatternIterator &);
    AffixPatternIterator &operator=(const AffixPatternIterator &);
};

/**
 * A locale aware class that converts locale independent AffixPattern
 * instances into locale dependent PluralAffix instances.
 */
class U_I18N_API AffixPatternParser : public UMemory {
public:
AffixPatternParser();
AffixPatternParser(const DecimalFormatSymbols &symbols);
void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols);

/**
 * Parses affixPattern appending the result to appendTo.
 * @param affixPattern The affix pattern.
 * @param currencyAffixInfo contains the currency forms.
 * @param appendTo The result of parsing affixPattern is appended here.
 * @param status any error returned here.
 * @return appendTo.
 */
PluralAffix &parse(
        const AffixPattern &affixPattern,
        const CurrencyAffixInfo &currencyAffixInfo,
        PluralAffix &appendTo,
        UErrorCode &status) const;

UBool equals(const AffixPatternParser &other) const {
    return (fPercent == other.fPercent)
            && (fPermill == other.fPermill)
            && (fNegative == other.fNegative)
            && (fPositive == other.fPositive);
}

private:
UnicodeString fPercent;
UnicodeString fPermill;
UnicodeString fNegative;
UnicodeString fPositive;
};


U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif  // __AFFIX_PATTERN_PARSER_H__