diff options
Diffstat (limited to 'deps/node/deps/icu-small/source/common/filteredbrk.cpp')
-rw-r--r-- | deps/node/deps/icu-small/source/common/filteredbrk.cpp | 710 |
1 files changed, 0 insertions, 710 deletions
diff --git a/deps/node/deps/icu-small/source/common/filteredbrk.cpp b/deps/node/deps/icu-small/source/common/filteredbrk.cpp deleted file mode 100644 index 162b38de..00000000 --- a/deps/node/deps/icu-small/source/common/filteredbrk.cpp +++ /dev/null @@ -1,710 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2014-2015, International Business Machines Corporation and -* others. All Rights Reserved. -******************************************************************************* -*/ - -#include "unicode/utypes.h" -#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION - -#include "cmemory.h" - -#include "unicode/filteredbrk.h" -#include "unicode/ucharstriebuilder.h" -#include "unicode/ures.h" - -#include "uresimp.h" // ures_getByKeyWithFallback -#include "ubrkimpl.h" // U_ICUDATA_BRKITR -#include "uvector.h" -#include "cmemory.h" - -U_NAMESPACE_BEGIN - -#ifndef FB_DEBUG -#define FB_DEBUG 0 -#endif - -#if FB_DEBUG -#include <stdio.h> -static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d, const char *f, int l) { - char buf[2048]; - if(s) { - s->extract(0,s->length(),buf,2048); - } else { - strcpy(buf,"NULL"); - } - fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n", - f, l, m, buf, (const void*)s, b?'T':'F',(int)d); -} - -#define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__) -#else -#define FB_TRACE(m,s,b,d) -#endif - -/** - * Used with sortedInsert() - */ -static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { - const UnicodeString &a = *(const UnicodeString*)t1.pointer; - const UnicodeString &b = *(const UnicodeString*)t2.pointer; - return a.compare(b); -} - -/** - * A UVector which implements a set of strings. - */ -class U_COMMON_API UStringSet : public UVector { - public: - UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject, - uhash_compareUnicodeString, - 1, - status) {} - virtual ~UStringSet(); - /** - * Is this UnicodeSet contained? - */ - inline UBool contains(const UnicodeString& s) { - return contains((void*) &s); - } - using UVector::contains; - /** - * Return the ith UnicodeString alias - */ - inline const UnicodeString* getStringAt(int32_t i) const { - return (const UnicodeString*)elementAt(i); - } - /** - * Adopt the UnicodeString if not already contained. - * Caller no longer owns the pointer in any case. - * @return true if adopted successfully, false otherwise (error, or else duplicate) - */ - inline UBool adopt(UnicodeString *str, UErrorCode &status) { - if(U_FAILURE(status) || contains(*str)) { - delete str; - return false; - } else { - sortedInsert(str, compareUnicodeString, status); - if(U_FAILURE(status)) { - delete str; - return false; - } - return true; - } - } - /** - * Add by value. - * @return true if successfully adopted. - */ - inline UBool add(const UnicodeString& str, UErrorCode &status) { - if(U_FAILURE(status)) return false; - UnicodeString *t = new UnicodeString(str); - if(t==NULL) { - status = U_MEMORY_ALLOCATION_ERROR; return false; - } - return adopt(t, status); - } - /** - * Remove this string. - * @return true if successfully removed, false otherwise (error, or else it wasn't there) - */ - inline UBool remove(const UnicodeString &s, UErrorCode &status) { - if(U_FAILURE(status)) return false; - return removeElement((void*) &s); - } -}; - -/** - * Virtual, won't be inlined - */ -UStringSet::~UStringSet() {} - -/* ----------------------------------------------------------- */ - - -/* Filtered Break constants */ -static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie -static const int32_t kMATCH = (1<<1); //< exact match - skip this one. -static const int32_t kSuppressInReverse = (1<<0); -static const int32_t kAddToForward = (1<<1); -static const UChar kFULLSTOP = 0x002E; // '.' - -/** - * Shared data for SimpleFilteredSentenceBreakIterator - */ -class SimpleFilteredSentenceBreakData : public UMemory { -public: - SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards ) - : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { } - SimpleFilteredSentenceBreakData *incr() { refcount++; return this; } - SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; } - virtual ~SimpleFilteredSentenceBreakData(); - - LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M." - LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs. - int32_t refcount; -}; - -SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {} - -/** - * Concrete implementation - */ -class SimpleFilteredSentenceBreakIterator : public BreakIterator { -public: - SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status); - SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other); - virtual ~SimpleFilteredSentenceBreakIterator(); -private: - SimpleFilteredSentenceBreakData *fData; - LocalPointer<BreakIterator> fDelegate; - LocalUTextPointer fText; - - /* -- subclass interface -- */ -public: - /* -- cloning and other subclass stuff -- */ - virtual BreakIterator * createBufferClone(void * /*stackBuffer*/, - int32_t &/*BufferSize*/, - UErrorCode &status) { - // for now - always deep clone - status = U_SAFECLONE_ALLOCATED_WARNING; - return clone(); - } - virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBreakIterator(*this); } - virtual UClassID getDynamicClassID(void) const { return NULL; } - virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; } - - /* -- text modifying -- */ - virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); } - virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; } - virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); } - virtual void setText(const UnicodeString &text) { fDelegate->setText(text); } - - /* -- other functions that are just delegated -- */ - virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); } - virtual CharacterIterator& getText(void) const { return fDelegate->getText(); } - - /* -- ITERATION -- */ - virtual int32_t first(void); - virtual int32_t preceding(int32_t offset); - virtual int32_t previous(void); - virtual UBool isBoundary(int32_t offset); - virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct. - - virtual int32_t next(void); - - virtual int32_t next(int32_t n); - virtual int32_t following(int32_t offset); - virtual int32_t last(void); - -private: - /** - * Given that the fDelegate has already given its "initial" answer, - * find the NEXT actual (non-excepted) break. - * @param n initial position from delegate - * @return new break position or UBRK_DONE - */ - int32_t internalNext(int32_t n); - /** - * Given that the fDelegate has already given its "initial" answer, - * find the PREV actual (non-excepted) break. - * @param n initial position from delegate - * @return new break position or UBRK_DONE - */ - int32_t internalPrev(int32_t n); - /** - * set up the UText with the value of the fDelegate. - * Call this before calling breakExceptionAt. - * May be able to avoid excess calls - */ - void resetState(UErrorCode &status); - /** - * Is there a match (exception) at this spot? - */ - enum EFBMatchResult { kNoExceptionHere, kExceptionHere }; - /** - * Determine if there is an exception at this spot - * @param n spot to check - * @return kNoExceptionHere or kExceptionHere - **/ - enum EFBMatchResult breakExceptionAt(int32_t n); -}; - -SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other) - : BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate->clone()) -{ -} - - -SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) : - BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)), - fData(new SimpleFilteredSentenceBreakData(forwards, backwards)), - fDelegate(adopt) -{ - // all set.. -} - -SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() { - fData = fData->decr(); -} - -void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) { - fText.adoptInstead(fDelegate->getUText(fText.orphan(), status)); -} - -SimpleFilteredSentenceBreakIterator::EFBMatchResult -SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) { - int64_t bestPosn = -1; - int32_t bestValue = -1; - // loops while 'n' points to an exception. - utext_setNativeIndex(fText.getAlias(), n); // from n.. - fData->fBackwardsTrie->reset(); - UChar32 uch; - - //if(debug2) u_printf(" n@ %d\n", n); - // Assume a space is following the '.' (so we handle the case: "Mr. /Brown") - if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here?? - // TODO only do this the 1st time? - //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch); - } else { - //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch); - uch = utext_next32(fText.getAlias()); - //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch); - } - - UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE; - - while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and.. - USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie - if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far - bestPosn = utext_getNativeIndex(fText.getAlias()); - bestValue = fData->fBackwardsTrie->getValue(); - } - //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias())); - } - - if(USTRINGTRIE_MATCHES(r)) { // exact match? - //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue); - bestValue = fData->fBackwardsTrie->getValue(); - bestPosn = utext_getNativeIndex(fText.getAlias()); - //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue); - } - - if(bestPosn>=0) { - //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue); - - //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what? - //int32_t bestValue = fBackwardsTrie->getValue(); - ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue); - - if(bestValue == kMATCH) { // exact match! - //if(debug2) u_printf(" exact backward match\n"); - return kExceptionHere; // See if the next is another exception. - } else if(bestValue == kPARTIAL - && fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie - //if(debug2) u_printf(" partial backward match\n"); - // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie - // to see if it matches something going forward. - fData->fForwardsPartialTrie->reset(); - UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE; - utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close .. - //if(debug2) u_printf("Retrying at %d\n", bestPosn); - while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL && - USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) { - //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias())); - } - if(USTRINGTRIE_MATCHES(rfwd)) { - //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch); - // only full matches here, nothing to check - // skip the next: - return kExceptionHere; - } else { - //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch); - // no match (no exception) -return the 'underlying' break - return kNoExceptionHere; - } - } else { - return kNoExceptionHere; // internal error and/or no forwards trie - } - } else { - //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // no best match - return kNoExceptionHere; // No match - so exit. Not an exception. - } -} - -// the workhorse single next. -int32_t -SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) { - if(n == UBRK_DONE || // at end or - fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions - return n; - } - // OK, do we need to break here? - UErrorCode status = U_ZERO_ERROR; - // refresh text - resetState(status); - if(U_FAILURE(status)) return UBRK_DONE; // bail out - int64_t utextLen = utext_nativeLength(fText.getAlias()); - - //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias())); - while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlying break (from fDelegate). - SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n); - - switch(m) { - case kExceptionHere: - n = fDelegate->next(); // skip this one. Find the next lowerlevel break. - continue; - - default: - case kNoExceptionHere: - return n; - } - } - return n; -} - -int32_t -SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) { - if(n == 0 || n == UBRK_DONE || // at end or - fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions - return n; - } - // OK, do we need to break here? - UErrorCode status = U_ZERO_ERROR; - // refresh text - resetState(status); - if(U_FAILURE(status)) return UBRK_DONE; // bail out - - //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias())); - while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying break (from fDelegate). - SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n); - - switch(m) { - case kExceptionHere: - n = fDelegate->previous(); // skip this one. Find the next lowerlevel break. - continue; - - default: - case kNoExceptionHere: - return n; - } - } - return n; -} - - -int32_t -SimpleFilteredSentenceBreakIterator::next() { - return internalNext(fDelegate->next()); -} - -int32_t -SimpleFilteredSentenceBreakIterator::first(void) { - // Don't suppress a break opportunity at the beginning of text. - return fDelegate->first(); -} - -int32_t -SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) { - return internalPrev(fDelegate->preceding(offset)); -} - -int32_t -SimpleFilteredSentenceBreakIterator::previous(void) { - return internalPrev(fDelegate->previous()); -} - -UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) { - if (!fDelegate->isBoundary(offset)) return false; // no break to suppress - - if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions - - UErrorCode status = U_ZERO_ERROR; - resetState(status); - - SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offset); - - switch(m) { - case kExceptionHere: - return false; - default: - case kNoExceptionHere: - return true; - } -} - -int32_t -SimpleFilteredSentenceBreakIterator::next(int32_t offset) { - return internalNext(fDelegate->next(offset)); -} - -int32_t -SimpleFilteredSentenceBreakIterator::following(int32_t offset) { - return internalNext(fDelegate->following(offset)); -} - -int32_t -SimpleFilteredSentenceBreakIterator::last(void) { - // Don't suppress a break opportunity at the end of text. - return fDelegate->last(); -} - - -/** - * Concrete implementation of builder class. - */ -class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder { -public: - virtual ~SimpleFilteredBreakIteratorBuilder(); - SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status); - SimpleFilteredBreakIteratorBuilder(UErrorCode &status); - virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status); - virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status); - virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status); -private: - UStringSet fSet; -}; - -SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder() -{ -} - -SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode &status) - : fSet(status) -{ -} - -SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status) - : fSet(status) -{ - if(U_SUCCESS(status)) { - UErrorCode subStatus = U_ZERO_ERROR; - LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &subStatus)); - if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) { - status = subStatus; // copy the failing status -#if FB_DEBUG - fprintf(stderr, "open BUNDLE %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status)); -#endif - return; // leaves the builder empty, if you try to use it. - } - LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &subStatus)); - if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) { - status = subStatus; // copy the failing status -#if FB_DEBUG - fprintf(stderr, "open EXCEPTIONS %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status)); -#endif - return; // leaves the builder empty, if you try to use it. - } - LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &subStatus)); - -#if FB_DEBUG - { - UErrorCode subsub = subStatus; - fprintf(stderr, "open SentenceBreak %s => %s, %s\n", fromLocale.getBaseName(), ures_getLocale(breaks.getAlias(), &subsub), u_errorName(subStatus)); - } -#endif - - if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) { - status = subStatus; // copy the failing status -#if FB_DEBUG - fprintf(stderr, "open %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status)); -#endif - return; // leaves the builder empty, if you try to use it. - } - - LocalUResourceBundlePointer strs; - subStatus = status; // Pick up inherited warning status now - do { - strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus)); - if(strs.isValid() && U_SUCCESS(subStatus)) { - UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status)); - suppressBreakAfter(str, status); // load the string - } - } while (strs.isValid() && U_SUCCESS(subStatus)); - if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) { - status = subStatus; - } - } -} - -UBool -SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status) -{ - UBool r = fSet.add(exception, status); - FB_TRACE("suppressBreakAfter",&exception,r,0); - return r; -} - -UBool -SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status) -{ - UBool r = fSet.remove(exception, status); - FB_TRACE("unsuppressBreakAfter",&exception,r,0); - return r; -} - -/** - * Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly. - * Work around this. - * - * Note: "new UnicodeString[subCount]" ends up calling global operator new - * on MSVC2012 for some reason. - */ -static inline UnicodeString* newUnicodeStringArray(size_t count) { - return new UnicodeString[count ? count : 1]; -} - -BreakIterator * -SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) { - LocalPointer<BreakIterator> adopt(adoptBreakIterator); - - LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status); - LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status); - if(U_FAILURE(status)) { - return NULL; - } - - int32_t revCount = 0; - int32_t fwdCount = 0; - - int32_t subCount = fSet.size(); - - UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount); - - LocalArray<UnicodeString> ustrs(ustrs_ptr); - - LocalMemory<int> partials; - partials.allocateInsteadAndReset(subCount); - - LocalPointer<UCharsTrie> backwardsTrie; // i.e. ".srM" for Mrs. - LocalPointer<UCharsTrie> forwardsPartialTrie; // Has ".a" for "a.M." - - int n=0; - for ( int32_t i = 0; - i<fSet.size(); - i++) { - const UnicodeString *abbr = fSet.getStringAt(i); - if(abbr) { - FB_TRACE("build",abbr,TRUE,i); - ustrs[n] = *abbr; // copy by value - FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i); - } else { - FB_TRACE("build",abbr,FALSE,i); - status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - partials[n] = 0; // default: not partial - n++; - } - // first pass - find partials. - for(int i=0;i<subCount;i++) { - int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations - if(nn>-1 && (nn+1)!=ustrs[i].length()) { - FB_TRACE("partial",&ustrs[i],FALSE,i); - // is partial. - // is it unique? - int sameAs = -1; - for(int j=0;j<subCount;j++) { - if(j==i) continue; - if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) { - FB_TRACE("prefix",&ustrs[j],FALSE,nn+1); - //UBool otherIsPartial = ((nn+1)!=ustrs[j].length()); // true if ustrs[j] doesn't end at nn - if(partials[j]==0) { // hasn't been processed yet - partials[j] = kSuppressInReverse | kAddToForward; - FB_TRACE("suppressing",&ustrs[j],FALSE,j); - } else if(partials[j] & kSuppressInReverse) { - sameAs = j; // the other entry is already in the reverse table. - } - } - } - FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs); - FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]); - UnicodeString prefix(ustrs[i], 0, nn+1); - if(sameAs == -1 && partials[i] == 0) { - // first one - add the prefix to the reverse table. - prefix.reverse(); - builder->add(prefix, kPARTIAL, status); - revCount++; - FB_TRACE("Added partial",&prefix,FALSE, i); - FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i); - partials[i] = kSuppressInReverse | kAddToForward; - } else { - FB_TRACE("NOT adding partial",&prefix,FALSE, i); - FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i); - } - } - } - for(int i=0;i<subCount;i++) { - if(partials[i]==0) { - ustrs[i].reverse(); - builder->add(ustrs[i], kMATCH, status); - revCount++; - FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i); - } else { - FB_TRACE("Adding fwd",&ustrs[i], FALSE, i); - - // an optimization would be to only add the portion after the '.' - // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward, - // instead of "Ph.D." since we already know the "Ph." part is a match. - // would need the trie to be able to hold 0-length strings, though. - builder2->add(ustrs[i], kMATCH, status); // forward - fwdCount++; - //ustrs[i].reverse(); - ////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status)); - } - } - FB_TRACE("AbbrCount",NULL,FALSE, subCount); - - if(revCount>0) { - backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status)); - if(U_FAILURE(status)) { - FB_TRACE(u_errorName(status),NULL,FALSE, -1); - return NULL; - } - } - - if(fwdCount>0) { - forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status)); - if(U_FAILURE(status)) { - FB_TRACE(u_errorName(status),NULL,FALSE, -1); - return NULL; - } - } - - return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status); -} - - -// ----------- Base class implementation - -FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() { -} - -FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() { -} - -FilteredBreakIteratorBuilder * -FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) { - if(U_FAILURE(status)) return NULL; - LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(where, status), status); - return (U_SUCCESS(status))? ret.orphan(): NULL; -} - -FilteredBreakIteratorBuilder * -FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) { - return createEmptyInstance(status); -} - -FilteredBreakIteratorBuilder * -FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) { - if(U_FAILURE(status)) return NULL; - LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status); - return (U_SUCCESS(status))? ret.orphan(): NULL; -} - -U_NAMESPACE_END - -#endif //#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION |