initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
This commit is contained in:
203
thirdparty/icu4c/common/rbbi_cache.h
vendored
Normal file
203
thirdparty/icu4c/common/rbbi_cache.h
vendored
Normal file
@@ -0,0 +1,203 @@
|
||||
// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// file: rbbi_cache.h
|
||||
//
|
||||
#ifndef RBBI_CACHE_H
|
||||
#define RBBI_CACHE_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/rbbi.h"
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
#include "uvectr32.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/* DictionaryCache stores the boundaries obtained from a run of dictionary characters.
|
||||
* Dictionary boundaries are moved first to this cache, then from here
|
||||
* to the main BreakCache, where they may inter-leave with non-dictionary
|
||||
* boundaries. The public BreakIterator API always fetches directly
|
||||
* from the main BreakCache, not from here.
|
||||
*
|
||||
* In common situations, the number of boundaries in a single dictionary run
|
||||
* should be quite small, it will be terminated by punctuation, spaces,
|
||||
* or any other non-dictionary characters. The main BreakCache may end
|
||||
* up with boundaries from multiple dictionary based runs.
|
||||
*
|
||||
* The boundaries are stored in a simple ArrayList (vector), with the
|
||||
* assumption that they will be accessed sequentially.
|
||||
*/
|
||||
class RuleBasedBreakIterator::DictionaryCache: public UMemory {
|
||||
public:
|
||||
DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status);
|
||||
~DictionaryCache();
|
||||
|
||||
void reset();
|
||||
|
||||
UBool following(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
|
||||
UBool preceding(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
|
||||
|
||||
/**
|
||||
* Populate the cache with the dictionary based boundaries within a region of text.
|
||||
* @param startPos The start position of a range of text
|
||||
* @param endPos The end position of a range of text
|
||||
* @param firstRuleStatus The rule status index that applies to the break at startPos
|
||||
* @param otherRuleStatus The rule status index that applies to boundaries other than startPos
|
||||
* @internal
|
||||
*/
|
||||
void populateDictionary(int32_t startPos, int32_t endPos,
|
||||
int32_t firstRuleStatus, int32_t otherRuleStatus);
|
||||
|
||||
|
||||
|
||||
RuleBasedBreakIterator *fBI;
|
||||
|
||||
UVector32 fBreaks; // A vector containing the boundaries.
|
||||
int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following()
|
||||
// or preceding(). Optimizes sequential access.
|
||||
int32_t fStart; // Text position of first boundary in cache.
|
||||
int32_t fLimit; // Last boundary in cache. Which is the limit of the
|
||||
// text segment being handled by the dictionary.
|
||||
int32_t fFirstRuleStatusIndex; // Rule status info for first boundary.
|
||||
int32_t fOtherRuleStatusIndex; // Rule status info for 2nd through last boundaries.
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* class BreakCache
|
||||
*
|
||||
* Cache of break boundary positions and rule status values.
|
||||
* Break iterator API functions, next(), previous(), etc., will use cached results
|
||||
* when possible, and otherwise cache new results as they are obtained.
|
||||
*
|
||||
* Uniformly caches both dictionary and rule based (non-dictionary) boundaries.
|
||||
*
|
||||
* The cache is implemented as a single circular buffer.
|
||||
*/
|
||||
|
||||
/*
|
||||
* size of the circular cache buffer.
|
||||
*/
|
||||
|
||||
class RuleBasedBreakIterator::BreakCache: public UMemory {
|
||||
public:
|
||||
BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status);
|
||||
virtual ~BreakCache();
|
||||
void reset(int32_t pos = 0, int32_t ruleStatus = 0);
|
||||
void next() { if (fBufIdx == fEndBufIdx) {
|
||||
nextOL();
|
||||
} else {
|
||||
fBufIdx = modChunkSize(fBufIdx + 1);
|
||||
fTextIdx = fBI->fPosition = fBoundaries[fBufIdx];
|
||||
fBI->fRuleStatusIndex = fStatuses[fBufIdx];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nextOL();
|
||||
void previous(UErrorCode &status);
|
||||
|
||||
// Move the iteration state to the position following the startPosition.
|
||||
// Input position must be pinned to the input length.
|
||||
void following(int32_t startPosition, UErrorCode &status);
|
||||
|
||||
void preceding(int32_t startPosition, UErrorCode &status);
|
||||
|
||||
/*
|
||||
* Update the state of the public BreakIterator (fBI) to reflect the
|
||||
* current state of the break iterator cache (this).
|
||||
*/
|
||||
int32_t current();
|
||||
|
||||
/**
|
||||
* Add boundaries to the cache near the specified position.
|
||||
* The given position need not be a boundary itself.
|
||||
* The input position must be within the range of the text, and
|
||||
* on a code point boundary.
|
||||
* If the requested position is a break boundary, leave the iteration
|
||||
* position on it.
|
||||
* If the requested position is not a boundary, leave the iteration
|
||||
* position on the preceding boundary and include both the
|
||||
* preceding and following boundaries in the cache.
|
||||
* Additional boundaries, either preceding or following, may be added
|
||||
* to the cache as a side effect.
|
||||
*
|
||||
* Return false if the operation failed.
|
||||
*/
|
||||
UBool populateNear(int32_t position, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Add boundary(s) to the cache following the current last boundary.
|
||||
* Return false if at the end of the text, and no more boundaries can be added.
|
||||
* Leave iteration position at the first newly added boundary, or unchanged if no boundary was added.
|
||||
*/
|
||||
UBool populateFollowing();
|
||||
|
||||
/**
|
||||
* Add one or more boundaries to the cache preceding the first currently cached boundary.
|
||||
* Leave the iteration position on the first added boundary.
|
||||
* Return false if no boundaries could be added (if at the start of the text.)
|
||||
*/
|
||||
UBool populatePreceding(UErrorCode &status);
|
||||
|
||||
enum UpdatePositionValues {
|
||||
RetainCachePosition = 0,
|
||||
UpdateCachePosition = 1
|
||||
};
|
||||
|
||||
/*
|
||||
* Add the boundary following the current position.
|
||||
* The current position can be left as it was, or changed to the newly added boundary,
|
||||
* as specified by the update parameter.
|
||||
*/
|
||||
void addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
|
||||
|
||||
|
||||
/*
|
||||
* Add the boundary preceding the current position.
|
||||
* The current position can be left as it was, or changed to the newly added boundary,
|
||||
* as specified by the update parameter.
|
||||
*/
|
||||
bool addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
|
||||
|
||||
/**
|
||||
* Set the cache position to the specified position, or, if the position
|
||||
* falls between to cached boundaries, to the preceding boundary.
|
||||
* Fails if the requested position is outside of the range of boundaries currently held by the cache.
|
||||
* The startPosition must be on a code point boundary.
|
||||
*
|
||||
* Return true if successful, false if the specified position is after
|
||||
* the last cached boundary or before the first.
|
||||
*/
|
||||
UBool seek(int32_t startPosition);
|
||||
|
||||
void dumpCache();
|
||||
|
||||
private:
|
||||
static inline int32_t modChunkSize(int index) { return index & (CACHE_SIZE - 1); }
|
||||
|
||||
static constexpr int32_t CACHE_SIZE = 128;
|
||||
static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two.");
|
||||
|
||||
RuleBasedBreakIterator *fBI;
|
||||
int32_t fStartBufIdx;
|
||||
int32_t fEndBufIdx; // inclusive
|
||||
|
||||
int32_t fTextIdx;
|
||||
int32_t fBufIdx;
|
||||
|
||||
int32_t fBoundaries[CACHE_SIZE];
|
||||
uint16_t fStatuses[CACHE_SIZE];
|
||||
|
||||
UVector32 fSideBuffer;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // #if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#endif // RBBI_CACHE_H
|
Reference in New Issue
Block a user