summaryrefslogtreecommitdiff
path: root/deps/icu-small/source/tools/toolutil/denseranges.cpp
blob: 7d81f2e9442f0133f8032df123f7ddee1c5e5da9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
/*
*******************************************************************************
*   Copyright (C) 2010, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  denseranges.cpp
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2010sep25
*   created by: Markus W. Scherer
*
* Helper code for finding a small number of dense ranges.
*/

#include "unicode/utypes.h"
#include "denseranges.h"

// Definitions in the anonymous namespace are invisible outside this file.
namespace {

/**
 * Collect up to 15 range gaps and sort them by ascending gap size.
 */
class LargestGaps {
public:
    LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}

    void add(int32_t gapStart, int64_t gapLength) {
        int32_t i=length;
        while(i>0 && gapLength>gapLengths[i-1]) {
            --i;
        }
        if(i<maxLength) {
            // The new gap is now one of the maxLength largest.
            // Insert the new gap, moving up smaller ones of the previous
            // length largest.
            int32_t j= length<maxLength ? length++ : maxLength-1;
            while(j>i) {
                gapStarts[j]=gapStarts[j-1];
                gapLengths[j]=gapLengths[j-1];
                --j;
            }
            gapStarts[i]=gapStart;
            gapLengths[i]=gapLength;
        }
    }

    void truncate(int32_t newLength) {
        if(newLength<length) {
            length=newLength;
        }
    }

    int32_t count() const { return length; }
    int32_t gapStart(int32_t i) const { return gapStarts[i]; }
    int64_t gapLength(int32_t i) const { return gapLengths[i]; }

    int32_t firstAfter(int32_t value) const {
        if(length==0) {
            return -1;
        }
        int32_t minValue=0;
        int32_t minIndex=-1;
        for(int32_t i=0; i<length; ++i) {
            if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) {
                minValue=gapStarts[i];
                minIndex=i;
            }
        }
        return minIndex;
    }

private:
    static const int32_t kCapacity=15;

    int32_t maxLength;
    int32_t length;
    int32_t gapStarts[kCapacity];
    int64_t gapLengths[kCapacity];
};

}  // namespace

/**
 * Does it make sense to write 1..capacity ranges?
 * Returns 0 if not, otherwise the number of ranges.
 * @param values Sorted array of signed-integer values.
 * @param length Number of values.
 * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
 *                Should be 0x80..0x100, must be 1..0x100.
 * @param ranges Output ranges array.
 * @param capacity Maximum number of ranges.
 * @return Minimum number of ranges (at most capacity) that have the desired density,
 *         or 0 if that density cannot be achieved.
 */
U_CAPI int32_t U_EXPORT2
uprv_makeDenseRanges(const int32_t values[], int32_t length,
                     int32_t density,
                     int32_t ranges[][2], int32_t capacity) {
    if(length<=2) {
        return 0;
    }
    int32_t minValue=values[0];
    int32_t maxValue=values[length-1];  // Assume minValue<=maxValue.
    // Use int64_t variables for intermediate-value precision and to avoid
    // signed-int32_t overflow of maxValue-minValue.
    int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
    if(length>=(density*maxLength)/0x100) {
        // Use one range.
        ranges[0][0]=minValue;
        ranges[0][1]=maxValue;
        return 1;
    }
    if(length<=4) {
        return 0;
    }
    // See if we can split [minValue, maxValue] into 2..capacity ranges,
    // divided by the 1..(capacity-1) largest gaps.
    LargestGaps gaps(capacity-1);
    int32_t i;
    int32_t expectedValue=minValue;
    for(i=1; i<length; ++i) {
        ++expectedValue;
        int32_t actualValue=values[i];
        if(expectedValue!=actualValue) {
            gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
            expectedValue=actualValue;
        }
    }
    // We know gaps.count()>=1 because we have fewer values (length) than
    // the length of the [minValue..maxValue] range (maxLength).
    // (Otherwise we would have returned with the one range above.)
    int32_t num;
    for(i=0, num=2;; ++i, ++num) {
        if(i>=gaps.count()) {
            // The values are too sparse for capacity or fewer ranges
            // of the requested density.
            return 0;
        }
        maxLength-=gaps.gapLength(i);
        if(length>num*2 && length>=(density*maxLength)/0x100) {
            break;
        }
    }
    // Use the num ranges with the num-1 largest gaps.
    gaps.truncate(num-1);
    ranges[0][0]=minValue;
    for(i=0; i<=num-2; ++i) {
        int32_t gapIndex=gaps.firstAfter(minValue);
        int32_t gapStart=gaps.gapStart(gapIndex);
        ranges[i][1]=gapStart-1;
        ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
    }
    ranges[num-1][1]=maxValue;
    return num;
}