deps/icu-small/source/i18n/string_segment.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145

// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING

// Allow implicit conversion from char16_t* to UnicodeString for this file:
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT

#include "numparse_types.h"
#include "string_segment.h"
#include "putilimp.h"
#include "unicode/utf16.h"
#include "unicode/uniset.h"

U_NAMESPACE_BEGIN


StringSegment::StringSegment(const UnicodeString& str, bool ignoreCase)
        : fStr(str), fStart(0), fEnd(str.length()),
          fFoldCase(ignoreCase) {}

int32_t StringSegment::getOffset() const {
    return fStart;
}

void StringSegment::setOffset(int32_t start) {
    fStart = start;
}

void StringSegment::adjustOffset(int32_t delta) {
    fStart += delta;
}

void StringSegment::adjustOffsetByCodePoint() {
    fStart += U16_LENGTH(getCodePoint());
}

void StringSegment::setLength(int32_t length) {
    fEnd = fStart + length;
}

void StringSegment::resetLength() {
    fEnd = fStr.length();
}

int32_t StringSegment::length() const {
    return fEnd - fStart;
}

char16_t StringSegment::charAt(int32_t index) const {
    return fStr.charAt(index + fStart);
}

UChar32 StringSegment::codePointAt(int32_t index) const {
    return fStr.char32At(index + fStart);
}

UnicodeString StringSegment::toUnicodeString() const {
    return UnicodeString(fStr.getBuffer() + fStart, fEnd - fStart);
}

const UnicodeString StringSegment::toTempUnicodeString() const {
    // Use the readonly-aliasing constructor for efficiency.
    return UnicodeString(FALSE, fStr.getBuffer() + fStart, fEnd - fStart);
}

UChar32 StringSegment::getCodePoint() const {
    char16_t lead = fStr.charAt(fStart);
    if (U16_IS_LEAD(lead) && fStart + 1 < fEnd) {
        return fStr.char32At(fStart);
    } else if (U16_IS_SURROGATE(lead)) {
        return -1;
    } else {
        return lead;
    }
}

bool StringSegment::startsWith(UChar32 otherCp) const {
    return codePointsEqual(getCodePoint(), otherCp, fFoldCase);
}

bool StringSegment::startsWith(const UnicodeSet& uniset) const {
    // TODO: Move UnicodeSet case-folding logic here.
    // TODO: Handle string matches here instead of separately.
    UChar32 cp = getCodePoint();
    if (cp == -1) {
        return false;
    }
    return uniset.contains(cp);
}

bool StringSegment::startsWith(const UnicodeString& other) const {
    if (other.isBogus() || other.length() == 0 || length() == 0) {
        return false;
    }
    int cp1 = getCodePoint();
    int cp2 = other.char32At(0);
    return codePointsEqual(cp1, cp2, fFoldCase);
}

int32_t StringSegment::getCommonPrefixLength(const UnicodeString& other) {
    return getPrefixLengthInternal(other, fFoldCase);
}

int32_t StringSegment::getCaseSensitivePrefixLength(const UnicodeString& other) {
    return getPrefixLengthInternal(other, false);
}

int32_t StringSegment::getPrefixLengthInternal(const UnicodeString& other, bool foldCase) {
    U_ASSERT(other.length() > 0);
    int32_t offset = 0;
    for (; offset < uprv_min(length(), other.length());) {
        // TODO: case-fold code points, not chars
        char16_t c1 = charAt(offset);
        char16_t c2 = other.charAt(offset);
        if (!codePointsEqual(c1, c2, foldCase)) {
            break;
        }
        offset++;
    }
    return offset;
}

bool StringSegment::codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase) {
    if (cp1 == cp2) {
        return true;
    }
    if (!foldCase) {
        return false;
    }
    cp1 = u_foldCase(cp1, TRUE);
    cp2 = u_foldCase(cp2, TRUE);
    return cp1 == cp2;
}

bool StringSegment::operator==(const UnicodeString& other) const {
    return toTempUnicodeString() == other;
}


U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */