summaryrefslogtreecommitdiff
path: root/deps/icu-small/source/common/unicode/localematcher.h
blob: 701123f750b222d59a6254652d3a8be0a3c76405 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License

// localematcher.h
// created: 2019may08 Markus W. Scherer

#ifndef __LOCALEMATCHER_H__
#define __LOCALEMATCHER_H__

#include "unicode/utypes.h"

#if U_SHOW_CPLUSPLUS_API

#include "unicode/locid.h"
#include "unicode/stringpiece.h"
#include "unicode/uobject.h"

/**
 * \file
 * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.
 */

#ifndef U_HIDE_DRAFT_API

/**
 * Builder option for whether the language subtag or the script subtag is most important.
 *
 * @see Builder#setFavorSubtag(FavorSubtag)
 * @draft ICU 65
 */
enum ULocMatchFavorSubtag {
    /**
     * Language differences are most important, then script differences, then region differences.
     * (This is the default behavior.)
     *
     * @draft ICU 65
     */
    ULOCMATCH_FAVOR_LANGUAGE,
    /**
     * Makes script differences matter relatively more than language differences.
     *
     * @draft ICU 65
     */
    ULOCMATCH_FAVOR_SCRIPT
};
#ifndef U_IN_DOXYGEN
typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag;
#endif

/**
 * Builder option for whether all desired locales are treated equally or
 * earlier ones are preferred.
 *
 * @see Builder#setDemotionPerDesiredLocale(Demotion)
 * @draft ICU 65
 */
enum ULocMatchDemotion {
    /**
     * All desired locales are treated equally.
     *
     * @draft ICU 65
     */
    ULOCMATCH_DEMOTION_NONE,
    /**
     * Earlier desired locales are preferred.
     *
     * <p>From each desired locale to the next,
     * the distance to any supported locale is increased by an additional amount
     * which is at least as large as most region mismatches.
     * A later desired locale has to have a better match with some supported locale
     * due to more than merely having the same region subtag.
     *
     * <p>For example: <code>Supported={en, sv}  desired=[en-GB, sv]</code>
     * yields <code>Result(en-GB, en)</code> because
     * with the demotion of sv its perfect match is no better than
     * the region distance between the earlier desired locale en-GB and en=en-US.
     *
     * <p>Notes:
     * <ul>
     *   <li>In some cases, language and/or script differences can be as small as
     *       the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
     *   <li>It is possible for certain region differences to be larger than usual,
     *       and larger than the demotion.
     *       (As of CLDR 35 there is no such case, but
     *        this is possible in future versions of the data.)
     * </ul>
     *
     * @draft ICU 65
     */
    ULOCMATCH_DEMOTION_REGION
};
#ifndef U_IN_DOXYGEN
typedef enum ULocMatchDemotion ULocMatchDemotion;
#endif

struct UHashtable;

U_NAMESPACE_BEGIN

struct LSR;

class LocaleDistance;
class LocaleLsrIterator;
class UVector;
class XLikelySubtags;

/**
 * Immutable class that picks the best match between a user's desired locales and
 * an application's supported locales.
 * Movable but not copyable.
 *
 * <p>Example:
 * <pre>
 * UErrorCode errorCode = U_ZERO_ERROR;
 * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode);
 * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode);  // "en"
 * </pre>
 *
 * <p>A matcher takes into account when languages are close to one another,
 * such as Danish and Norwegian,
 * and when regional variants are close, like en-GB and en-AU as opposed to en-US.
 *
 * <p>If there are multiple supported locales with the same (language, script, region)
 * likely subtags, then the current implementation returns the first of those locales.
 * It ignores variant subtags (except for pseudolocale variants) and extensions.
 * This may change in future versions.
 *
 * <p>For example, the current implementation does not distinguish between
 * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
 *
 * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
 * or place it earlier in the list of supported locales.
 *
 * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
 * The current implementation compares each desired locale with supported locales
 * in the following order:
 * 1. Default locale, if supported;
 * 2. CLDR "paradigm locales" like en-GB and es-419;
 * 3. other supported locales.
 * This may change in future versions.
 *
 * <p>Often a product will just need one matcher instance, built with the languages
 * that it supports. However, it may want multiple instances with different
 * default languages based on additional information, such as the domain.
 *
 * <p>This class is not intended for public subclassing.
 *
 * @draft ICU 65
 */
class U_COMMON_API LocaleMatcher : public UMemory {
public:
    /**
     * Data for the best-matching pair of a desired and a supported locale.
     * Movable but not copyable.
     *
     * @draft ICU 65
     */
    class U_COMMON_API Result : public UMemory {
    public:
        /**
         * Move constructor; might modify the source.
         * This object will have the same contents that the source object had.
         *
         * @param src Result to move contents from.
         * @draft ICU 65
         */
        Result(Result &&src) U_NOEXCEPT;

        /**
         * Destructor.
         *
         * @draft ICU 65
         */
        ~Result();

        /**
         * Move assignment; might modify the source.
         * This object will have the same contents that the source object had.
         *
         * @param src Result to move contents from.
         * @draft ICU 65
         */
        Result &operator=(Result &&src) U_NOEXCEPT;

        /**
         * Returns the best-matching desired locale.
         * nullptr if the list of desired locales is empty or if none matched well enough.
         *
         * @return the best-matching desired locale, or nullptr.
         * @draft ICU 65
         */
        inline const Locale *getDesiredLocale() const { return desiredLocale; }

        /**
         * Returns the best-matching supported locale.
         * If none matched well enough, this is the default locale.
         * The default locale is nullptr if the list of supported locales is empty and
         * no explicit default locale is set.
         *
         * @return the best-matching supported locale, or nullptr.
         * @draft ICU 65
         */
        inline const Locale *getSupportedLocale() const { return supportedLocale; }

        /**
         * Returns the index of the best-matching desired locale in the input Iterable order.
         * -1 if the list of desired locales is empty or if none matched well enough.
         *
         * @return the index of the best-matching desired locale, or -1.
         * @draft ICU 65
         */
        inline int32_t getDesiredIndex() const { return desiredIndex; }

        /**
         * Returns the index of the best-matching supported locale in the
         * constructor’s or builder’s input order (“set” Collection plus “added” locales).
         * If the matcher was built from a locale list string, then the iteration order is that
         * of a LocalePriorityList built from the same string.
         * -1 if the list of supported locales is empty or if none matched well enough.
         *
         * @return the index of the best-matching supported locale, or -1.
         * @draft ICU 65
         */
        inline int32_t getSupportedIndex() const { return supportedIndex; }

        /**
         * Takes the best-matching supported locale and adds relevant fields of the
         * best-matching desired locale, such as the -t- and -u- extensions.
         * May replace some fields of the supported locale.
         * The result is the locale that should be used for date and number formatting, collation, etc.
         * Returns the root locale if getSupportedLocale() returns nullptr.
         *
         * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
         *
         * @return a locale combining the best-matching desired and supported locales.
         * @draft ICU 65
         */
        Locale makeResolvedLocale(UErrorCode &errorCode) const;

    private:
        Result(const Locale *desired, const Locale *supported,
               int32_t desIndex, int32_t suppIndex, UBool owned) :
                desiredLocale(desired), supportedLocale(supported),
                desiredIndex(desIndex), supportedIndex(suppIndex),
                desiredIsOwned(owned) {}

        Result(const Result &other) = delete;
        Result &operator=(const Result &other) = delete;

        const Locale *desiredLocale;
        const Locale *supportedLocale;
        int32_t desiredIndex;
        int32_t supportedIndex;
        UBool desiredIsOwned;

        friend class LocaleMatcher;
    };

    /**
     * LocaleMatcher builder.
     * Movable but not copyable.
     *
     * @see LocaleMatcher#builder()
     * @draft ICU 65
     */
    class U_COMMON_API Builder : public UMemory {
    public:
        /**
         * Constructs a builder used in chaining parameters for building a LocaleMatcher.
         *
         * @return a new Builder object
         * @draft ICU 65
         */
        Builder() {}

        /**
         * Move constructor; might modify the source.
         * This builder will have the same contents that the source builder had.
         *
         * @param src Builder to move contents from.
         * @draft ICU 65
         */
        Builder(Builder &&src) U_NOEXCEPT;

        /**
         * Destructor.
         *
         * @draft ICU 65
         */
        ~Builder();

        /**
         * Move assignment; might modify the source.
         * This builder will have the same contents that the source builder had.
         *
         * @param src Builder to move contents from.
         * @draft ICU 65
         */
        Builder &operator=(Builder &&src) U_NOEXCEPT;

        /**
         * Parses an Accept-Language string
         * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
         * such as "af, en, fr;q=0.9", and sets the supported locales accordingly.
         * Allows whitespace in more places but does not allow "*".
         * Clears any previously set/added supported locales first.
         *
         * @param locales the Accept-Language string of locales to set
         * @return this Builder object
         * @draft ICU 65
         */
        Builder &setSupportedLocalesFromListString(StringPiece locales);

        /**
         * Copies the supported locales, preserving iteration order.
         * Clears any previously set/added supported locales first.
         * Duplicates are allowed, and are not removed.
         *
         * @param locales the list of locale
         * @return this Builder object
         * @draft ICU 65
         */
        Builder &setSupportedLocales(Locale::Iterator &locales);

        /**
         * Copies the supported locales from the begin/end range, preserving iteration order.
         * Clears any previously set/added supported locales first.
         * Duplicates are allowed, and are not removed.
         *
         * Each of the iterator parameter values must be an
         * input iterator whose value is convertible to const Locale &.
         *
         * @param begin Start of range.
         * @param end Exclusive end of range.
         * @return this Builder object
         * @draft ICU 65
         */
        template<typename Iter>
        Builder &setSupportedLocales(Iter begin, Iter end) {
            if (U_FAILURE(errorCode_)) { return *this; }
            clearSupportedLocales();
            while (begin != end) {
                addSupportedLocale(*begin++);
            }
            return *this;
        }

        /**
         * Copies the supported locales from the begin/end range, preserving iteration order.
         * Calls the converter to convert each *begin to a Locale or const Locale &.
         * Clears any previously set/added supported locales first.
         * Duplicates are allowed, and are not removed.
         *
         * Each of the iterator parameter values must be an
         * input iterator whose value is convertible to const Locale &.
         *
         * @param begin Start of range.
         * @param end Exclusive end of range.
         * @param converter Converter from *begin to const Locale & or compatible.
         * @return this Builder object
         * @draft ICU 65
         */
        template<typename Iter, typename Conv>
        Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) {
            if (U_FAILURE(errorCode_)) { return *this; }
            clearSupportedLocales();
            while (begin != end) {
                addSupportedLocale(converter(*begin++));
            }
            return *this;
        }

        /**
         * Adds another supported locale.
         * Duplicates are allowed, and are not removed.
         *
         * @param locale another locale
         * @return this Builder object
         * @draft ICU 65
         */
        Builder &addSupportedLocale(const Locale &locale);

        /**
         * Sets the default locale; if nullptr, or if it is not set explicitly,
         * then the first supported locale is used as the default locale.
         *
         * @param defaultLocale the default locale (will be copied)
         * @return this Builder object
         * @draft ICU 65
         */
        Builder &setDefaultLocale(const Locale *defaultLocale);

        /**
         * If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script
         * differences.
         * This is used in situations (such as maps) where
         * it is better to fall back to the same script than a similar language.
         *
         * @param subtag the subtag to favor
         * @return this Builder object
         * @draft ICU 65
         */
        Builder &setFavorSubtag(ULocMatchFavorSubtag subtag);

        /**
         * Option for whether all desired locales are treated equally or
         * earlier ones are preferred (this is the default).
         *
         * @param demotion the demotion per desired locale to set.
         * @return this Builder object
         * @draft ICU 65
         */
        Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);

        /**
         * Sets the UErrorCode if an error occurred while setting parameters.
         * Preserves older error codes in the outErrorCode.
         *
         * @param outErrorCode Set to an error code if it does not contain one already
         *                  and an error occurred while setting parameters.
         *                  Otherwise unchanged.
         * @return TRUE if U_FAILURE(outErrorCode)
         * @draft ICU 65
         */
        UBool copyErrorTo(UErrorCode &outErrorCode) const;

        /**
         * Builds and returns a new locale matcher.
         * This builder can continue to be used.
         *
         * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
         *                  or else the function returns immediately. Check for U_FAILURE()
         *                  on output or use with function chaining. (See User Guide for details.)
         * @return new LocaleMatcher.
         * @draft ICU 65
         */
        LocaleMatcher build(UErrorCode &errorCode) const;

    private:
        friend class LocaleMatcher;

        Builder(const Builder &other) = delete;
        Builder &operator=(const Builder &other) = delete;

        void clearSupportedLocales();
        bool ensureSupportedLocaleVector();

        UErrorCode errorCode_ = U_ZERO_ERROR;
        UVector *supportedLocales_ = nullptr;
        int32_t thresholdDistance_ = -1;
        ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION;
        Locale *defaultLocale_ = nullptr;
        ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;
    };

    // FYI No public LocaleMatcher constructors in C++; use the Builder.

    /**
     * Move copy constructor; might modify the source.
     * This matcher will have the same settings that the source matcher had.
     * @param src source matcher
     * @draft ICU 65
     */
    LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT;

    /**
     * Destructor.
     * @draft ICU 65
     */
    ~LocaleMatcher();

    /**
     * Move assignment operator; might modify the source.
     * This matcher will have the same settings that the source matcher had.
     * The behavior is undefined if *this and src are the same object.
     * @param src source matcher
     * @return *this
     * @draft ICU 65
     */
    LocaleMatcher &operator=(LocaleMatcher &&src) U_NOEXCEPT;

    /**
     * Returns the supported locale which best matches the desired locale.
     *
     * @param desiredLocale Typically a user's language.
     * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
     *                  or else the function returns immediately. Check for U_FAILURE()
     *                  on output or use with function chaining. (See User Guide for details.)
     * @return the best-matching supported locale.
     * @draft ICU 65
     */
    const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const;

    /**
     * Returns the supported locale which best matches one of the desired locales.
     *
     * @param desiredLocales Typically a user's languages, in order of preference (descending).
     * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
     *                  or else the function returns immediately. Check for U_FAILURE()
     *                  on output or use with function chaining. (See User Guide for details.)
     * @return the best-matching supported locale.
     * @draft ICU 65
     */
    const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;

    /**
     * Parses an Accept-Language string
     * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
     * such as "af, en, fr;q=0.9",
     * and returns the supported locale which best matches one of the desired locales.
     * Allows whitespace in more places but does not allow "*".
     *
     * @param desiredLocaleList Typically a user's languages, as an Accept-Language string.
     * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
     *                  or else the function returns immediately. Check for U_FAILURE()
     *                  on output or use with function chaining. (See User Guide for details.)
     * @return the best-matching supported locale.
     * @draft ICU 65
     */
    const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const;

    /**
     * Returns the best match between the desired locale and the supported locales.
     * If the result's desired locale is not nullptr, then it is the address of the input locale.
     * It has not been cloned.
     *
     * @param desiredLocale Typically a user's language.
     * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
     *                  or else the function returns immediately. Check for U_FAILURE()
     *                  on output or use with function chaining. (See User Guide for details.)
     * @return the best-matching pair of the desired and a supported locale.
     * @draft ICU 65
     */
    Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const;

    /**
     * Returns the best match between the desired and supported locales.
     * If the result's desired locale is not nullptr, then it is a clone of
     * the best-matching desired locale. The Result object owns the clone.
     *
     * @param desiredLocales Typically a user's languages, in order of preference (descending).
     * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
     *                  or else the function returns immediately. Check for U_FAILURE()
     *                  on output or use with function chaining. (See User Guide for details.)
     * @return the best-matching pair of a desired and a supported locale.
     * @draft ICU 65
     */
    Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;

#ifndef U_HIDE_INTERNAL_API
    /**
     * Returns a fraction between 0 and 1, where 1 means that the languages are a
     * perfect match, and 0 means that they are completely different.
     *
     * <p>This is mostly an implementation detail, and the precise values may change over time.
     * The implementation may use either the maximized forms or the others ones, or both.
     * The implementation may or may not rely on the forms to be consistent with each other.
     *
     * <p>Callers should construct and use a matcher rather than match pairs of locales directly.
     *
     * @param desired Desired locale.
     * @param supported Supported locale.
     * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
     *                  or else the function returns immediately. Check for U_FAILURE()
     *                  on output or use with function chaining. (See User Guide for details.)
     * @return value between 0 and 1, inclusive.
     * @internal (has a known user)
     */
    double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
#endif  // U_HIDE_INTERNAL_API

private:
    LocaleMatcher(const Builder &builder, UErrorCode &errorCode);
    LocaleMatcher(const LocaleMatcher &other) = delete;
    LocaleMatcher &operator=(const LocaleMatcher &other) = delete;

    int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;

    const XLikelySubtags &likelySubtags;
    const LocaleDistance &localeDistance;
    int32_t thresholdDistance;
    int32_t demotionPerDesiredLocale;
    ULocMatchFavorSubtag favorSubtag;

    // These are in input order.
    const Locale ** supportedLocales;
    LSR *lsrs;
    int32_t supportedLocalesLength;
    // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
    UHashtable *supportedLsrToIndex;  // Map<LSR, Integer> stores index+1 because 0 is "not found"
    // Array versions of the supportedLsrToIndex keys and values.
    // The distance lookup loops over the supportedLSRs and returns the index of the best match.
    const LSR **supportedLSRs;
    int32_t *supportedIndexes;
    int32_t supportedLSRsLength;
    Locale *ownedDefaultLocale;
    const Locale *defaultLocale;
    int32_t defaultLocaleIndex;
};

U_NAMESPACE_END

#endif  // U_HIDE_DRAFT_API
#endif  // U_SHOW_CPLUSPLUS_API
#endif  // __LOCALEMATCHER_H__