initial commit, 4.5 stable
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
Some checks failed
🔗 GHA / 📊 Static checks (push) Has been cancelled
🔗 GHA / 🤖 Android (push) Has been cancelled
🔗 GHA / 🍏 iOS (push) Has been cancelled
🔗 GHA / 🐧 Linux (push) Has been cancelled
🔗 GHA / 🍎 macOS (push) Has been cancelled
🔗 GHA / 🏁 Windows (push) Has been cancelled
🔗 GHA / 🌐 Web (push) Has been cancelled
This commit is contained in:
239
thirdparty/icu4c/common/unicode/appendable.h
vendored
Normal file
239
thirdparty/icu4c/common/unicode/appendable.h
vendored
Normal file
@@ -0,0 +1,239 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2011-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: appendable.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010dec07
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __APPENDABLE_H__
|
||||
#define __APPENDABLE_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (char16_ts).
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeString;
|
||||
|
||||
/**
|
||||
* Base class for objects to which Unicode characters and strings can be appended.
|
||||
* Combines elements of Java Appendable and ICU4C ByteSink.
|
||||
*
|
||||
* This class can be used in APIs where it does not matter whether the actual destination is
|
||||
* a UnicodeString, a char16_t[] array, a UnicodeSet, or any other object
|
||||
* that receives and processes characters and/or strings.
|
||||
*
|
||||
* Implementation classes must implement at least appendCodeUnit(char16_t).
|
||||
* The base class provides default implementations for the other methods.
|
||||
*
|
||||
* The methods do not take UErrorCode parameters.
|
||||
* If an error occurs (e.g., out-of-memory),
|
||||
* in addition to returning false from failing operations,
|
||||
* the implementation must prevent unexpected behavior (e.g., crashes)
|
||||
* from further calls and should make the error condition available separately
|
||||
* (e.g., store a UErrorCode, make/keep a UnicodeString bogus).
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
class U_COMMON_API Appendable : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
~Appendable();
|
||||
|
||||
/**
|
||||
* Appends a 16-bit code unit.
|
||||
* @param c code unit
|
||||
* @return true if the operation succeeded
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
virtual UBool appendCodeUnit(char16_t c) = 0;
|
||||
|
||||
/**
|
||||
* Appends a code point.
|
||||
* The default implementation calls appendCodeUnit(char16_t) once or twice.
|
||||
* @param c code point 0..0x10ffff
|
||||
* @return true if the operation succeeded
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
virtual UBool appendCodePoint(UChar32 c);
|
||||
|
||||
/**
|
||||
* Appends a string.
|
||||
* The default implementation calls appendCodeUnit(char16_t) for each code unit.
|
||||
* @param s string, must not be nullptr if length!=0
|
||||
* @param length string length, or -1 if NUL-terminated
|
||||
* @return true if the operation succeeded
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
virtual UBool appendString(const char16_t *s, int32_t length);
|
||||
|
||||
/**
|
||||
* Tells the object that the caller is going to append roughly
|
||||
* appendCapacity char16_ts. A subclass might use this to pre-allocate
|
||||
* a larger buffer if necessary.
|
||||
* The default implementation does nothing. (It always returns true.)
|
||||
* @param appendCapacity estimated number of char16_ts that will be appended
|
||||
* @return true if the operation succeeded
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
virtual UBool reserveAppendCapacity(int32_t appendCapacity);
|
||||
|
||||
/**
|
||||
* Returns a writable buffer for appending and writes the buffer's capacity to
|
||||
* *resultCapacity. Guarantees *resultCapacity>=minCapacity.
|
||||
* May return a pointer to the caller-owned scratch buffer which must have
|
||||
* scratchCapacity>=minCapacity.
|
||||
* The returned buffer is only valid until the next operation
|
||||
* on this Appendable.
|
||||
*
|
||||
* After writing at most *resultCapacity char16_ts, call appendString() with the
|
||||
* pointer returned from this function and the number of char16_ts written.
|
||||
* Many appendString() implementations will avoid copying char16_ts if this function
|
||||
* returned an internal buffer.
|
||||
*
|
||||
* Partial usage example:
|
||||
* \code
|
||||
* int32_t capacity;
|
||||
* char16_t* buffer = app.getAppendBuffer(..., &capacity);
|
||||
* ... Write n char16_ts into buffer, with n <= capacity.
|
||||
* app.appendString(buffer, n);
|
||||
* \endcode
|
||||
* In many implementations, that call to append will avoid copying char16_ts.
|
||||
*
|
||||
* If the Appendable allocates or reallocates an internal buffer, it should use
|
||||
* the desiredCapacityHint if appropriate.
|
||||
* If a caller cannot provide a reasonable guess at the desired capacity,
|
||||
* it should pass desiredCapacityHint=0.
|
||||
*
|
||||
* If a non-scratch buffer is returned, the caller may only pass
|
||||
* a prefix to it to appendString().
|
||||
* That is, it is not correct to pass an interior pointer to appendString().
|
||||
*
|
||||
* The default implementation always returns the scratch buffer.
|
||||
*
|
||||
* @param minCapacity required minimum capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param desiredCapacityHint desired capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param scratch default caller-owned buffer
|
||||
* @param scratchCapacity capacity of the scratch buffer
|
||||
* @param resultCapacity pointer to an integer which will be set to the
|
||||
* capacity of the returned buffer
|
||||
* @return a buffer with *resultCapacity>=minCapacity
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
virtual char16_t *getAppendBuffer(int32_t minCapacity,
|
||||
int32_t desiredCapacityHint,
|
||||
char16_t *scratch, int32_t scratchCapacity,
|
||||
int32_t *resultCapacity);
|
||||
};
|
||||
|
||||
/**
|
||||
* An Appendable implementation which writes to a UnicodeString.
|
||||
*
|
||||
* This class is not intended for public subclassing.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
class U_COMMON_API UnicodeStringAppendable : public Appendable {
|
||||
public:
|
||||
/**
|
||||
* Aliases the UnicodeString (keeps its reference) for writing.
|
||||
* @param s The UnicodeString to which this Appendable will write.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
explicit UnicodeStringAppendable(UnicodeString &s) : str(s) {}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
~UnicodeStringAppendable();
|
||||
|
||||
/**
|
||||
* Appends a 16-bit code unit to the string.
|
||||
* @param c code unit
|
||||
* @return true if the operation succeeded
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
virtual UBool appendCodeUnit(char16_t c) override;
|
||||
|
||||
/**
|
||||
* Appends a code point to the string.
|
||||
* @param c code point 0..0x10ffff
|
||||
* @return true if the operation succeeded
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
virtual UBool appendCodePoint(UChar32 c) override;
|
||||
|
||||
/**
|
||||
* Appends a string to the UnicodeString.
|
||||
* @param s string, must not be nullptr if length!=0
|
||||
* @param length string length, or -1 if NUL-terminated
|
||||
* @return true if the operation succeeded
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
virtual UBool appendString(const char16_t *s, int32_t length) override;
|
||||
|
||||
/**
|
||||
* Tells the UnicodeString that the caller is going to append roughly
|
||||
* appendCapacity char16_ts.
|
||||
* @param appendCapacity estimated number of char16_ts that will be appended
|
||||
* @return true if the operation succeeded
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
virtual UBool reserveAppendCapacity(int32_t appendCapacity) override;
|
||||
|
||||
/**
|
||||
* Returns a writable buffer for appending and writes the buffer's capacity to
|
||||
* *resultCapacity. Guarantees *resultCapacity>=minCapacity.
|
||||
* May return a pointer to the caller-owned scratch buffer which must have
|
||||
* scratchCapacity>=minCapacity.
|
||||
* The returned buffer is only valid until the next write operation
|
||||
* on the UnicodeString.
|
||||
*
|
||||
* For details see Appendable::getAppendBuffer().
|
||||
*
|
||||
* @param minCapacity required minimum capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param desiredCapacityHint desired capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param scratch default caller-owned buffer
|
||||
* @param scratchCapacity capacity of the scratch buffer
|
||||
* @param resultCapacity pointer to an integer which will be set to the
|
||||
* capacity of the returned buffer
|
||||
* @return a buffer with *resultCapacity>=minCapacity
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
virtual char16_t *getAppendBuffer(int32_t minCapacity,
|
||||
int32_t desiredCapacityHint,
|
||||
char16_t *scratch, int32_t scratchCapacity,
|
||||
int32_t *resultCapacity) override;
|
||||
|
||||
private:
|
||||
UnicodeString &str;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __APPENDABLE_H__
|
||||
672
thirdparty/icu4c/common/unicode/brkiter.h
vendored
Normal file
672
thirdparty/icu4c/common/unicode/brkiter.h
vendored
Normal file
@@ -0,0 +1,672 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
********************************************************************************
|
||||
* Copyright (C) 1997-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
********************************************************************************
|
||||
*
|
||||
* File brkiter.h
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 02/18/97 aliu Added typedef for TextCount. Made DONE const.
|
||||
* 05/07/97 aliu Fixed DLL declaration.
|
||||
* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
|
||||
* 08/11/98 helena Sync-up JDK1.2.
|
||||
* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef BRKITER_H
|
||||
#define BRKITER_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Break Iterator.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
* Allow the declaration of APIs with pointers to BreakIterator
|
||||
* even when break iteration is removed from the build.
|
||||
*/
|
||||
class BreakIterator;
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#else
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/ubrk.h"
|
||||
#include "unicode/strenum.h"
|
||||
#include "unicode/utext.h"
|
||||
#include "unicode/umisc.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class CharString;
|
||||
|
||||
/**
|
||||
* The BreakIterator class implements methods for finding the location
|
||||
* of boundaries in text. BreakIterator is an abstract base class.
|
||||
* Instances of BreakIterator maintain a current position and scan over
|
||||
* text returning the index of characters where boundaries occur.
|
||||
* <p>
|
||||
* Line boundary analysis determines where a text string can be broken
|
||||
* when line-wrapping. The mechanism correctly handles punctuation and
|
||||
* hyphenated words.
|
||||
* <p>
|
||||
* Sentence boundary analysis allows selection with correct
|
||||
* interpretation of periods within numbers and abbreviations, and
|
||||
* trailing punctuation marks such as quotation marks and parentheses.
|
||||
* <p>
|
||||
* Word boundary analysis is used by search and replace functions, as
|
||||
* well as within text editing applications that allow the user to
|
||||
* select words with a double click. Word selection provides correct
|
||||
* interpretation of punctuation marks within and following
|
||||
* words. Characters that are not part of a word, such as symbols or
|
||||
* punctuation marks, have word-breaks on both sides.
|
||||
* <p>
|
||||
* Character boundary analysis allows users to interact with
|
||||
* characters as they expect to, for example, when moving the cursor
|
||||
* through a text string. Character boundary analysis provides correct
|
||||
* navigation of through character strings, regardless of how the
|
||||
* character is stored. For example, an accented character might be
|
||||
* stored as a base character and a diacritical mark. What users
|
||||
* consider to be a character can differ between languages.
|
||||
* <p>
|
||||
* The text boundary positions are found according to the rules
|
||||
* described in Unicode Standard Annex #29, Text Boundaries, and
|
||||
* Unicode Standard Annex #14, Line Breaking Properties. These
|
||||
* are available at http://www.unicode.org/reports/tr14/ and
|
||||
* http://www.unicode.org/reports/tr29/.
|
||||
* <p>
|
||||
* In addition to the C++ API defined in this header file, a
|
||||
* plain C API with equivalent functionality is defined in the
|
||||
* file ubrk.h
|
||||
* <p>
|
||||
* Code snippets illustrating the use of the Break Iterator APIs
|
||||
* are available in the ICU User Guide,
|
||||
* https://unicode-org.github.io/icu/userguide/boundaryanalysis/
|
||||
* and in the sample program icu/source/samples/break/break.cpp
|
||||
*
|
||||
*/
|
||||
class U_COMMON_API BreakIterator : public UObject {
|
||||
public:
|
||||
/**
|
||||
* destructor
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~BreakIterator();
|
||||
|
||||
/**
|
||||
* Return true if another object is semantically equal to this
|
||||
* one. The other object should be an instance of the same subclass of
|
||||
* BreakIterator. Objects of different subclasses are considered
|
||||
* unequal.
|
||||
* <P>
|
||||
* Return true if this BreakIterator is at the same position in the
|
||||
* same text, and is the same class and type (word, line, etc.) of
|
||||
* BreakIterator, as the argument. Text is considered the same if
|
||||
* it contains the same characters, it need not be the same
|
||||
* object, and styles are not considered.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual bool operator==(const BreakIterator&) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the complement of the result of operator==
|
||||
* @param rhs The BreakIterator to be compared for inequality
|
||||
* @return the complement of the result of operator==
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
bool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
|
||||
|
||||
/**
|
||||
* Return a polymorphic copy of this object. This is an abstract
|
||||
* method which subclasses implement.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual BreakIterator* clone() const = 0;
|
||||
|
||||
/**
|
||||
* Return a polymorphic class ID for this object. Different subclasses
|
||||
* will return distinct unequal values.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const override = 0;
|
||||
|
||||
/**
|
||||
* Return a CharacterIterator over the text being analyzed.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual CharacterIterator& getText() const = 0;
|
||||
|
||||
/**
|
||||
* Get a UText for the text being analyzed.
|
||||
* The returned UText is a shallow clone of the UText used internally
|
||||
* by the break iterator implementation. It can safely be used to
|
||||
* access the text without impacting any break iterator operations,
|
||||
* but the underlying text itself must not be altered.
|
||||
*
|
||||
* @param fillIn A UText to be filled in. If nullptr, a new UText will be
|
||||
* allocated to hold the result.
|
||||
* @param status receives any error codes.
|
||||
* @return The current UText for this break iterator. If an input
|
||||
* UText was provided, it will always be returned.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
|
||||
|
||||
/**
|
||||
* Change the text over which this operates. The text boundary is
|
||||
* reset to the start.
|
||||
*
|
||||
* The BreakIterator will retain a reference to the supplied string.
|
||||
* The caller must not modify or delete the text while the BreakIterator
|
||||
* retains the reference.
|
||||
*
|
||||
* @param text The UnicodeString used to change the text.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void setText(const UnicodeString &text) = 0;
|
||||
|
||||
/**
|
||||
* Reset the break iterator to operate over the text represented by
|
||||
* the UText. The iterator position is reset to the start.
|
||||
*
|
||||
* This function makes a shallow clone of the supplied UText. This means
|
||||
* that the caller is free to immediately close or otherwise reuse the
|
||||
* Utext that was passed as a parameter, but that the underlying text itself
|
||||
* must not be altered while being referenced by the break iterator.
|
||||
*
|
||||
* All index positions returned by break iterator functions are
|
||||
* native indices from the UText. For example, when breaking UTF-8
|
||||
* encoded text, the break positions returned by next(), previous(), etc.
|
||||
* will be UTF-8 string indices, not UTF-16 positions.
|
||||
*
|
||||
* @param text The UText used to change the text.
|
||||
* @param status receives any error codes.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
virtual void setText(UText *text, UErrorCode &status) = 0;
|
||||
|
||||
/**
|
||||
* Change the text over which this operates. The text boundary is
|
||||
* reset to the start.
|
||||
* Note that setText(UText *) provides similar functionality to this function,
|
||||
* and is more efficient.
|
||||
* @param it The CharacterIterator used to change the text.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void adoptText(CharacterIterator* it) = 0;
|
||||
|
||||
enum {
|
||||
/**
|
||||
* DONE is returned by previous() and next() after all valid
|
||||
* boundaries have been returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
DONE = static_cast<int32_t>(-1)
|
||||
};
|
||||
|
||||
/**
|
||||
* Sets the current iteration position to the beginning of the text, position zero.
|
||||
* @return The offset of the beginning of the text, zero.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t first() = 0;
|
||||
|
||||
/**
|
||||
* Set the iterator position to the index immediately BEYOND the last character in the text being scanned.
|
||||
* @return The index immediately BEYOND the last character in the text being scanned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t last() = 0;
|
||||
|
||||
/**
|
||||
* Set the iterator position to the boundary preceding the current boundary.
|
||||
* @return The character index of the previous text boundary or DONE if all
|
||||
* boundaries have been returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t previous() = 0;
|
||||
|
||||
/**
|
||||
* Advance the iterator to the boundary following the current boundary.
|
||||
* @return The character index of the next text boundary or DONE if all
|
||||
* boundaries have been returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t next() = 0;
|
||||
|
||||
/**
|
||||
* Return character index of the current iterator position within the text.
|
||||
* @return The boundary most recently returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t current() const = 0;
|
||||
|
||||
/**
|
||||
* Advance the iterator to the first boundary following the specified offset.
|
||||
* The value returned is always greater than the offset or
|
||||
* the value BreakIterator.DONE
|
||||
* @param offset the offset to begin scanning.
|
||||
* @return The first boundary after the specified offset.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t following(int32_t offset) = 0;
|
||||
|
||||
/**
|
||||
* Set the iterator position to the first boundary preceding the specified offset.
|
||||
* The value returned is always smaller than the offset or
|
||||
* the value BreakIterator.DONE
|
||||
* @param offset the offset to begin scanning.
|
||||
* @return The first boundary before the specified offset.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t preceding(int32_t offset) = 0;
|
||||
|
||||
/**
|
||||
* Return true if the specified position is a boundary position.
|
||||
* As a side effect, the current position of the iterator is set
|
||||
* to the first boundary position at or following the specified offset.
|
||||
* @param offset the offset to check.
|
||||
* @return True if "offset" is a boundary position.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool isBoundary(int32_t offset) = 0;
|
||||
|
||||
/**
|
||||
* Set the iterator position to the nth boundary from the current boundary
|
||||
* @param n the number of boundaries to move by. A value of 0
|
||||
* does nothing. Negative values move to previous boundaries
|
||||
* and positive values move to later boundaries.
|
||||
* @return The new iterator position, or
|
||||
* DONE if there are fewer than |n| boundaries in the specified direction.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t next(int32_t n) = 0;
|
||||
|
||||
/**
|
||||
* For RuleBasedBreakIterators, return the status tag from the break rule
|
||||
* that determined the boundary at the current iteration position.
|
||||
* <p>
|
||||
* For break iterator types that do not support a rule status,
|
||||
* a default value of 0 is returned.
|
||||
* <p>
|
||||
* @return the status from the break rule that determined the boundary at
|
||||
* the current iteration position.
|
||||
* @see RuleBaseBreakIterator::getRuleStatus()
|
||||
* @see UWordBreak
|
||||
* @stable ICU 52
|
||||
*/
|
||||
virtual int32_t getRuleStatus() const;
|
||||
|
||||
/**
|
||||
* For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
|
||||
* that determined the boundary at the current iteration position.
|
||||
* <p>
|
||||
* For break iterator types that do not support rule status,
|
||||
* no values are returned.
|
||||
* <p>
|
||||
* The returned status value(s) are stored into an array provided by the caller.
|
||||
* The values are stored in sorted (ascending) order.
|
||||
* If the capacity of the output array is insufficient to hold the data,
|
||||
* the output will be truncated to the available length, and a
|
||||
* U_BUFFER_OVERFLOW_ERROR will be signaled.
|
||||
* <p>
|
||||
* @see RuleBaseBreakIterator::getRuleStatusVec
|
||||
*
|
||||
* @param fillInVec an array to be filled in with the status values.
|
||||
* @param capacity the length of the supplied vector. A length of zero causes
|
||||
* the function to return the number of status values, in the
|
||||
* normal way, without attempting to store any values.
|
||||
* @param status receives error codes.
|
||||
* @return The number of rule status values from rules that determined
|
||||
* the boundary at the current iteration position.
|
||||
* In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
|
||||
* is the total number of status values that were available,
|
||||
* not the reduced number that were actually returned.
|
||||
* @see getRuleStatus
|
||||
* @stable ICU 52
|
||||
*/
|
||||
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for word-breaks using the given locale.
|
||||
* Returns an instance of a BreakIterator implementing word breaks.
|
||||
* WordBreak is useful for word selection (ex. double click)
|
||||
* @param where the locale.
|
||||
* @param status the error code
|
||||
* @return A BreakIterator for word-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createWordInstance(const Locale& where, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for line-breaks using specified locale.
|
||||
* Returns an instance of a BreakIterator implementing line breaks. Line
|
||||
* breaks are logically possible line breaks, actual line breaks are
|
||||
* usually determined based on display width.
|
||||
* LineBreak is useful for word wrapping text.
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for line-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createLineInstance(const Locale& where, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for character-breaks using specified locale
|
||||
* Returns an instance of a BreakIterator implementing character breaks.
|
||||
* Character breaks are boundaries of combining character sequences.
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for character-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createCharacterInstance(const Locale& where, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for sentence-breaks using specified locale
|
||||
* Returns an instance of a BreakIterator implementing sentence breaks.
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for sentence-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createSentenceInstance(const Locale& where, UErrorCode& status);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Create BreakIterator for title-casing breaks using the specified locale
|
||||
* Returns an instance of a BreakIterator implementing title breaks.
|
||||
* The iterator returned locates title boundaries as described for
|
||||
* Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
|
||||
* please use a word boundary iterator. See {@link #createWordInstance }.
|
||||
*
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for title-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @deprecated ICU 64 Use createWordInstance instead.
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createTitleInstance(const Locale& where, UErrorCode& status);
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Get the set of Locales for which TextBoundaries are installed.
|
||||
* <p><b>Note:</b> this will not return locales added through the register
|
||||
* call. To see the registered locales too, use the getAvailableLocales
|
||||
* function that returns a StringEnumeration object </p>
|
||||
* @param count the output parameter of number of elements in the locale list
|
||||
* @return available locales
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
|
||||
|
||||
/**
|
||||
* Get name of the object for the desired Locale, in the desired language.
|
||||
* @param objectLocale must be from getAvailableLocales.
|
||||
* @param displayLocale specifies the desired locale for output.
|
||||
* @param name the fill-in parameter of the return value
|
||||
* Uses best match.
|
||||
* @return user-displayable name
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
|
||||
const Locale& displayLocale,
|
||||
UnicodeString& name);
|
||||
|
||||
/**
|
||||
* Get name of the object for the desired Locale, in the language of the
|
||||
* default locale.
|
||||
* @param objectLocale must be from getMatchingLocales
|
||||
* @param name the fill-in parameter of the return value
|
||||
* @return user-displayable name
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
|
||||
UnicodeString& name);
|
||||
|
||||
#ifndef U_FORCE_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Deprecated functionality. Use clone() instead.
|
||||
*
|
||||
* Thread safe client-buffer-based cloning operation
|
||||
* Do NOT call delete on a safeclone, since 'new' is not used to create it.
|
||||
* @param stackBuffer user allocated space for the new clone. If nullptr new memory will be allocated.
|
||||
* If buffer is not large enough, new memory will be allocated.
|
||||
* @param BufferSize reference to size of allocated space.
|
||||
* If BufferSize == 0, a sufficient size for use in cloning will
|
||||
* be returned ('pre-flighting')
|
||||
* If BufferSize is not enough for a stack-based safe clone,
|
||||
* new memory will be allocated.
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
|
||||
* necessary.
|
||||
* @return pointer to the new clone
|
||||
*
|
||||
* @deprecated ICU 52. Use clone() instead.
|
||||
*/
|
||||
virtual BreakIterator * createBufferClone(void *stackBuffer,
|
||||
int32_t &BufferSize,
|
||||
UErrorCode &status) = 0;
|
||||
#endif // U_FORCE_HIDE_DEPRECATED_API
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* Determine whether the BreakIterator was created in user memory by
|
||||
* createBufferClone(), and thus should not be deleted. Such objects
|
||||
* must be closed by an explicit call to the destructor (not delete).
|
||||
* @deprecated ICU 52. Always delete the BreakIterator.
|
||||
*/
|
||||
inline UBool isBufferClone();
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
/**
|
||||
* Register a new break iterator of the indicated kind, to use in the given locale.
|
||||
* The break iterator will be adopted. Clones of the iterator will be returned
|
||||
* if a request for a break iterator of the given kind matches or falls back to
|
||||
* this locale.
|
||||
* Because ICU may choose to cache BreakIterators internally, this must
|
||||
* be called at application startup, prior to any calls to
|
||||
* BreakIterator::createXXXInstance to avoid undefined behavior.
|
||||
* @param toAdopt the BreakIterator instance to be adopted
|
||||
* @param locale the Locale for which this instance is to be registered
|
||||
* @param kind the type of iterator for which this instance is to be registered
|
||||
* @param status the in/out status code, no special meanings are assigned
|
||||
* @return a registry key that can be used to unregister this instance
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
|
||||
const Locale& locale,
|
||||
UBreakIteratorType kind,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Unregister a previously-registered BreakIterator using the key returned from the
|
||||
* register call. Key becomes invalid after a successful call and should not be used again.
|
||||
* The BreakIterator corresponding to the key will be deleted.
|
||||
* Because ICU may choose to cache BreakIterators internally, this should
|
||||
* be called during application shutdown, after all calls to
|
||||
* BreakIterator::createXXXInstance to avoid undefined behavior.
|
||||
* @param key the registry key returned by a previous call to registerInstance
|
||||
* @param status the in/out status code, no special meanings are assigned
|
||||
* @return true if the iterator for the key was successfully unregistered
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Return a StringEnumeration over the locales available at the time of the call,
|
||||
* including registered locales.
|
||||
* @return a StringEnumeration over the locales available at the time of the call
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static StringEnumeration* U_EXPORT2 getAvailableLocales();
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns the locale for this break iterator. Two flavors are available: valid and
|
||||
* actual locale.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** Get the locale for this break iterator object. You can choose between valid and actual locale.
|
||||
* @param type type of the locale we're looking for (valid or actual)
|
||||
* @param status error code for the operation
|
||||
* @return the locale
|
||||
* @internal
|
||||
*/
|
||||
const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* Set the subject text string upon which the break iterator is operating
|
||||
* without changing any other aspect of the matching state.
|
||||
* The new and previous text strings must have the same content.
|
||||
*
|
||||
* This function is intended for use in environments where ICU is operating on
|
||||
* strings that may move around in memory. It provides a mechanism for notifying
|
||||
* ICU that the string has been relocated, and providing a new UText to access the
|
||||
* string in its new position.
|
||||
*
|
||||
* Note that the break iterator implementation never copies the underlying text
|
||||
* of a string being processed, but always operates directly on the original text
|
||||
* provided by the user. Refreshing simply drops the references to the old text
|
||||
* and replaces them with references to the new.
|
||||
*
|
||||
* Caution: this function is normally used only by very specialized,
|
||||
* system-level code. One example use case is with garbage collection that moves
|
||||
* the text in memory.
|
||||
*
|
||||
* @param input The new (moved) text string.
|
||||
* @param status Receives errors detected by this function.
|
||||
* @return *this
|
||||
*
|
||||
* @stable ICU 49
|
||||
*/
|
||||
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
|
||||
|
||||
private:
|
||||
static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
|
||||
static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
|
||||
static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
|
||||
|
||||
friend class ICUBreakIteratorFactory;
|
||||
friend class ICUBreakIteratorService;
|
||||
|
||||
protected:
|
||||
// Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
|
||||
// or else the compiler will create a public ones.
|
||||
/** @internal */
|
||||
BreakIterator();
|
||||
/** @internal */
|
||||
BreakIterator (const BreakIterator &other);
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** @internal */
|
||||
BreakIterator (const Locale& valid, const Locale &actual);
|
||||
/** @internal. Assignment Operator, used by RuleBasedBreakIterator. */
|
||||
BreakIterator &operator = (const BreakIterator &other);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
private:
|
||||
|
||||
/** @internal (private) */
|
||||
CharString* actualLocale = nullptr;
|
||||
CharString* validLocale = nullptr;
|
||||
CharString* requestLocale = nullptr;
|
||||
};
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
inline UBool BreakIterator::isBufferClone()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // BRKITER_H
|
||||
//eof
|
||||
307
thirdparty/icu4c/common/unicode/bytestream.h
vendored
Normal file
307
thirdparty/icu4c/common/unicode/bytestream.h
vendored
Normal file
@@ -0,0 +1,307 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
// Copyright (C) 2009-2012, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//
|
||||
// Copyright 2007 Google Inc. All Rights Reserved.
|
||||
// Author: sanjay@google.com (Sanjay Ghemawat)
|
||||
//
|
||||
// Abstract interface that consumes a sequence of bytes (ByteSink).
|
||||
//
|
||||
// Used so that we can write a single piece of code that can operate
|
||||
// on a variety of output string types.
|
||||
//
|
||||
// Various implementations of this interface are provided:
|
||||
// ByteSink:
|
||||
// CheckedArrayByteSink Write to a flat array, with bounds checking
|
||||
// StringByteSink Write to an STL string
|
||||
|
||||
// This code is a contribution of Google code, and the style used here is
|
||||
// a compromise between the original Google code and the ICU coding guidelines.
|
||||
// For example, data types are ICU-ified (size_t,int->int32_t),
|
||||
// and API comments doxygen-ified, but function names and behavior are
|
||||
// as in the original, if possible.
|
||||
// Assertion-style error handling, not available in ICU, was changed to
|
||||
// parameter "pinning" similar to UnicodeString.
|
||||
//
|
||||
// In addition, this is only a partial port of the original Google code,
|
||||
// limited to what was needed so far. The (nearly) complete original code
|
||||
// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
|
||||
// (see ICU ticket 6765, r25517).
|
||||
|
||||
#ifndef __BYTESTREAM_H__
|
||||
#define __BYTESTREAM_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Interface for writing bytes, and implementation classes.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/std_string.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* A ByteSink can be filled with bytes.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
class U_COMMON_API ByteSink : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Default constructor.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
ByteSink() { }
|
||||
/**
|
||||
* Virtual destructor.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual ~ByteSink();
|
||||
|
||||
/**
|
||||
* Append "bytes[0,n-1]" to this.
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual void Append(const char* bytes, int32_t n) = 0;
|
||||
|
||||
/**
|
||||
* Appends n bytes to this. Same as Append().
|
||||
* Call AppendU8() with u8"string literals" which are const char * in C++11
|
||||
* but const char8_t * in C++20.
|
||||
* If the compiler does support char8_t as a distinct type,
|
||||
* then an AppendU8() overload for that is defined and will be chosen.
|
||||
*
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void AppendU8(const char* bytes, int32_t n) {
|
||||
Append(bytes, n);
|
||||
}
|
||||
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Appends n bytes to this. Same as Append() but for a const char8_t * pointer.
|
||||
* Call AppendU8() with u8"string literals" which are const char * in C++11
|
||||
* but const char8_t * in C++20.
|
||||
* If the compiler does support char8_t as a distinct type,
|
||||
* then this AppendU8() overload for that is defined and will be chosen.
|
||||
*
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void AppendU8(const char8_t* bytes, int32_t n) {
|
||||
Append(reinterpret_cast<const char*>(bytes), n);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns a writable buffer for appending and writes the buffer's capacity to
|
||||
* *result_capacity. Guarantees *result_capacity>=min_capacity.
|
||||
* May return a pointer to the caller-owned scratch buffer which must have
|
||||
* scratch_capacity>=min_capacity.
|
||||
* The returned buffer is only valid until the next operation
|
||||
* on this ByteSink.
|
||||
*
|
||||
* After writing at most *result_capacity bytes, call Append() with the
|
||||
* pointer returned from this function and the number of bytes written.
|
||||
* Many Append() implementations will avoid copying bytes if this function
|
||||
* returned an internal buffer.
|
||||
*
|
||||
* Partial usage example:
|
||||
* int32_t capacity;
|
||||
* char* buffer = sink->GetAppendBuffer(..., &capacity);
|
||||
* ... Write n bytes into buffer, with n <= capacity.
|
||||
* sink->Append(buffer, n);
|
||||
* In many implementations, that call to Append will avoid copying bytes.
|
||||
*
|
||||
* If the ByteSink allocates or reallocates an internal buffer, it should use
|
||||
* the desired_capacity_hint if appropriate.
|
||||
* If a caller cannot provide a reasonable guess at the desired capacity,
|
||||
* it should pass desired_capacity_hint=0.
|
||||
*
|
||||
* If a non-scratch buffer is returned, the caller may only pass
|
||||
* a prefix to it to Append().
|
||||
* That is, it is not correct to pass an interior pointer to Append().
|
||||
*
|
||||
* The default implementation always returns the scratch buffer.
|
||||
*
|
||||
* @param min_capacity required minimum capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param desired_capacity_hint desired capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param scratch default caller-owned buffer
|
||||
* @param scratch_capacity capacity of the scratch buffer
|
||||
* @param result_capacity pointer to an integer which will be set to the
|
||||
* capacity of the returned buffer
|
||||
* @return a buffer with *result_capacity>=min_capacity
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual char* GetAppendBuffer(int32_t min_capacity,
|
||||
int32_t desired_capacity_hint,
|
||||
char* scratch, int32_t scratch_capacity,
|
||||
int32_t* result_capacity);
|
||||
|
||||
/**
|
||||
* Flush internal buffers.
|
||||
* Some byte sinks use internal buffers or provide buffering
|
||||
* and require calling Flush() at the end of the stream.
|
||||
* The ByteSink should be ready for further Append() calls after Flush().
|
||||
* The default implementation of Flush() does nothing.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual void Flush();
|
||||
|
||||
private:
|
||||
ByteSink(const ByteSink &) = delete;
|
||||
ByteSink &operator=(const ByteSink &) = delete;
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------
|
||||
// Some standard implementations
|
||||
|
||||
/**
|
||||
* Implementation of ByteSink that writes to a flat byte array,
|
||||
* with bounds-checking:
|
||||
* This sink will not write more than capacity bytes to outbuf.
|
||||
* If more than capacity bytes are Append()ed, then excess bytes are ignored,
|
||||
* and Overflowed() will return true.
|
||||
* Overflow does not cause a runtime error.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
class U_COMMON_API CheckedArrayByteSink : public ByteSink {
|
||||
public:
|
||||
/**
|
||||
* Constructs a ByteSink that will write to outbuf[0..capacity-1].
|
||||
* @param outbuf buffer to write to
|
||||
* @param capacity size of the buffer
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
CheckedArrayByteSink(char* outbuf, int32_t capacity);
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual ~CheckedArrayByteSink();
|
||||
/**
|
||||
* Returns the sink to its original state, without modifying the buffer.
|
||||
* Useful for reusing both the buffer and the sink for multiple streams.
|
||||
* Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0
|
||||
* and Overflowed()=false.
|
||||
* @return *this
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
virtual CheckedArrayByteSink& Reset();
|
||||
/**
|
||||
* Append "bytes[0,n-1]" to this.
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual void Append(const char* bytes, int32_t n) override;
|
||||
/**
|
||||
* Returns a writable buffer for appending and writes the buffer's capacity to
|
||||
* *result_capacity. For details see the base class documentation.
|
||||
* @param min_capacity required minimum capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param desired_capacity_hint desired capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param scratch default caller-owned buffer
|
||||
* @param scratch_capacity capacity of the scratch buffer
|
||||
* @param result_capacity pointer to an integer which will be set to the
|
||||
* capacity of the returned buffer
|
||||
* @return a buffer with *result_capacity>=min_capacity
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual char* GetAppendBuffer(int32_t min_capacity,
|
||||
int32_t desired_capacity_hint,
|
||||
char* scratch, int32_t scratch_capacity,
|
||||
int32_t* result_capacity) override;
|
||||
/**
|
||||
* Returns the number of bytes actually written to the sink.
|
||||
* @return number of bytes written to the buffer
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
int32_t NumberOfBytesWritten() const { return size_; }
|
||||
/**
|
||||
* Returns true if any bytes were discarded, i.e., if there was an
|
||||
* attempt to write more than 'capacity' bytes.
|
||||
* @return true if more than 'capacity' bytes were Append()ed
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
UBool Overflowed() const { return overflowed_; }
|
||||
/**
|
||||
* Returns the number of bytes appended to the sink.
|
||||
* If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten()
|
||||
* else they return the same number.
|
||||
* @return number of bytes written to the buffer
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
int32_t NumberOfBytesAppended() const { return appended_; }
|
||||
private:
|
||||
char* outbuf_;
|
||||
const int32_t capacity_;
|
||||
int32_t size_;
|
||||
int32_t appended_;
|
||||
UBool overflowed_;
|
||||
|
||||
CheckedArrayByteSink() = delete;
|
||||
CheckedArrayByteSink(const CheckedArrayByteSink &) = delete;
|
||||
CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
|
||||
};
|
||||
|
||||
/**
|
||||
* Implementation of ByteSink that writes to a "string".
|
||||
* The StringClass is usually instantiated with a std::string.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
template<typename StringClass>
|
||||
class StringByteSink : public ByteSink {
|
||||
public:
|
||||
/**
|
||||
* Constructs a ByteSink that will append bytes to the dest string.
|
||||
* @param dest pointer to string object to append to
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringByteSink(StringClass* dest) : dest_(dest) { }
|
||||
/**
|
||||
* Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
|
||||
*
|
||||
* @param dest pointer to string object to append to
|
||||
* @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
|
||||
* @stable ICU 60
|
||||
*/
|
||||
StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
|
||||
if (initialAppendCapacity > 0 &&
|
||||
static_cast<uint32_t>(initialAppendCapacity) > dest->capacity() - dest->length()) {
|
||||
dest->reserve(dest->length() + initialAppendCapacity);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Append "bytes[0,n-1]" to this.
|
||||
* @param data the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual void Append(const char* data, int32_t n) override { dest_->append(data, n); }
|
||||
private:
|
||||
StringClass* dest_;
|
||||
|
||||
StringByteSink() = delete;
|
||||
StringByteSink(const StringByteSink &) = delete;
|
||||
StringByteSink &operator=(const StringByteSink &) = delete;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __BYTESTREAM_H__
|
||||
568
thirdparty/icu4c/common/unicode/bytestrie.h
vendored
Normal file
568
thirdparty/icu4c/common/unicode/bytestrie.h
vendored
Normal file
@@ -0,0 +1,568 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: bytestrie.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010sep25
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __BYTESTRIE_H__
|
||||
#define __BYTESTRIE_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Trie for mapping byte sequences to integer values.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/ustringtrie.h"
|
||||
|
||||
class BytesTrieTest;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class ByteSink;
|
||||
class BytesTrieBuilder;
|
||||
class CharString;
|
||||
class UVector32;
|
||||
|
||||
/**
|
||||
* Light-weight, non-const reader class for a BytesTrie.
|
||||
* Traverses a byte-serialized data structure with minimal state,
|
||||
* for mapping byte sequences to non-negative integer values.
|
||||
*
|
||||
* This class owns the serialized trie data only if it was constructed by
|
||||
* the builder's build() method.
|
||||
* The public constructor and the copy constructor only alias the data (only copy the pointer).
|
||||
* There is no assignment operator.
|
||||
*
|
||||
* This class is not intended for public subclassing.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
class U_COMMON_API BytesTrie : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs a BytesTrie reader instance.
|
||||
*
|
||||
* The trieBytes must contain a copy of a byte sequence from the BytesTrieBuilder,
|
||||
* starting with the first byte of that sequence.
|
||||
* The BytesTrie object will not read more bytes than
|
||||
* the BytesTrieBuilder generated in the corresponding build() call.
|
||||
*
|
||||
* The array is not copied/cloned and must not be modified while
|
||||
* the BytesTrie object is in use.
|
||||
*
|
||||
* @param trieBytes The byte array that contains the serialized trie.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
BytesTrie(const void *trieBytes)
|
||||
: ownedArray_(nullptr), bytes_(static_cast<const uint8_t *>(trieBytes)),
|
||||
pos_(bytes_), remainingMatchLength_(-1) {}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
~BytesTrie();
|
||||
|
||||
/**
|
||||
* Copy constructor, copies the other trie reader object and its state,
|
||||
* but not the byte array which will be shared. (Shallow copy.)
|
||||
* @param other Another BytesTrie object.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
BytesTrie(const BytesTrie &other)
|
||||
: ownedArray_(nullptr), bytes_(other.bytes_),
|
||||
pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
|
||||
|
||||
/**
|
||||
* Resets this trie to its initial state.
|
||||
* @return *this
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
BytesTrie &reset() {
|
||||
pos_=bytes_;
|
||||
remainingMatchLength_=-1;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the state of this trie as a 64-bit integer.
|
||||
* The state value is never 0.
|
||||
*
|
||||
* @return opaque state value
|
||||
* @see resetToState64
|
||||
* @stable ICU 65
|
||||
*/
|
||||
uint64_t getState64() const {
|
||||
return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
|
||||
static_cast<uint64_t>(pos_ - bytes_);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this trie to the saved state.
|
||||
* Unlike resetToState(State), the 64-bit state value
|
||||
* must be from getState64() from the same trie object or
|
||||
* from one initialized the exact same way.
|
||||
* Because of no validation, this method is faster.
|
||||
*
|
||||
* @param state The opaque trie state value from getState64().
|
||||
* @return *this
|
||||
* @see getState64
|
||||
* @see resetToState
|
||||
* @see reset
|
||||
* @stable ICU 65
|
||||
*/
|
||||
BytesTrie &resetToState64(uint64_t state) {
|
||||
remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2;
|
||||
pos_ = bytes_ + (state & kState64PosMask);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* BytesTrie state object, for saving a trie's current state
|
||||
* and resetting the trie back to this state later.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
class State : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs an empty State.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
State() { bytes=nullptr; }
|
||||
private:
|
||||
friend class BytesTrie;
|
||||
|
||||
const uint8_t *bytes;
|
||||
const uint8_t *pos;
|
||||
int32_t remainingMatchLength;
|
||||
};
|
||||
|
||||
/**
|
||||
* Saves the state of this trie.
|
||||
* @param state The State object to hold the trie's state.
|
||||
* @return *this
|
||||
* @see resetToState
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
const BytesTrie &saveState(State &state) const {
|
||||
state.bytes=bytes_;
|
||||
state.pos=pos_;
|
||||
state.remainingMatchLength=remainingMatchLength_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this trie to the saved state.
|
||||
* If the state object contains no state, or the state of a different trie,
|
||||
* then this trie remains unchanged.
|
||||
* @param state The State object which holds a saved trie state.
|
||||
* @return *this
|
||||
* @see saveState
|
||||
* @see reset
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
BytesTrie &resetToState(const State &state) {
|
||||
if(bytes_==state.bytes && bytes_!=nullptr) {
|
||||
pos_=state.pos;
|
||||
remainingMatchLength_=state.remainingMatchLength;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the byte sequence so far matches, whether it has a value,
|
||||
* and whether another input byte can continue a matching byte sequence.
|
||||
* @return The match/value Result.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UStringTrieResult current() const;
|
||||
|
||||
/**
|
||||
* Traverses the trie from the initial state for this input byte.
|
||||
* Equivalent to reset().next(inByte).
|
||||
* @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
|
||||
* Values below -0x100 and above 0xff will never match.
|
||||
* @return The match/value Result.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
inline UStringTrieResult first(int32_t inByte) {
|
||||
remainingMatchLength_=-1;
|
||||
if(inByte<0) {
|
||||
inByte+=0x100;
|
||||
}
|
||||
return nextImpl(bytes_, inByte);
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverses the trie from the current state for this input byte.
|
||||
* @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
|
||||
* Values below -0x100 and above 0xff will never match.
|
||||
* @return The match/value Result.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UStringTrieResult next(int32_t inByte);
|
||||
|
||||
/**
|
||||
* Traverses the trie from the current state for this byte sequence.
|
||||
* Equivalent to
|
||||
* \code
|
||||
* Result result=current();
|
||||
* for(each c in s)
|
||||
* if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH;
|
||||
* result=next(c);
|
||||
* return result;
|
||||
* \endcode
|
||||
* @param s A string or byte sequence. Can be nullptr if length is 0.
|
||||
* @param length The length of the byte sequence. Can be -1 if NUL-terminated.
|
||||
* @return The match/value Result.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UStringTrieResult next(const char *s, int32_t length);
|
||||
|
||||
/**
|
||||
* Returns a matching byte sequence's value if called immediately after
|
||||
* current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE.
|
||||
* getValue() can be called multiple times.
|
||||
*
|
||||
* Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE!
|
||||
* @return The value for the byte sequence so far.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
inline int32_t getValue() const {
|
||||
const uint8_t *pos=pos_;
|
||||
int32_t leadByte=*pos++;
|
||||
// U_ASSERT(leadByte>=kMinValueLead);
|
||||
return readValue(pos, leadByte>>1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether all byte sequences reachable from the current state
|
||||
* map to the same value.
|
||||
* @param uniqueValue Receives the unique value, if this function returns true.
|
||||
* (output-only)
|
||||
* @return true if all byte sequences reachable from the current state
|
||||
* map to the same value.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
inline UBool hasUniqueValue(int32_t &uniqueValue) const {
|
||||
const uint8_t *pos=pos_;
|
||||
// Skip the rest of a pending linear-match node.
|
||||
return pos!=nullptr && findUniqueValue(pos+remainingMatchLength_+1, false, uniqueValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds each byte which continues the byte sequence from the current state.
|
||||
* That is, each byte b for which it would be next(b)!=USTRINGTRIE_NO_MATCH now.
|
||||
* @param out Each next byte is appended to this object.
|
||||
* (Only uses the out.Append(s, length) method.)
|
||||
* @return the number of bytes which continue the byte sequence from here
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
int32_t getNextBytes(ByteSink &out) const;
|
||||
|
||||
/**
|
||||
* Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
class U_COMMON_API Iterator : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Iterates from the root of a byte-serialized BytesTrie.
|
||||
* @param trieBytes The trie bytes.
|
||||
* @param maxStringLength If 0, the iterator returns full strings/byte sequences.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
Iterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Iterates from the current state of the specified BytesTrie.
|
||||
* @param trie The trie whose state will be copied for iteration.
|
||||
* @param maxStringLength If 0, the iterator returns full strings/byte sequences.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
Iterator(const BytesTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
~Iterator();
|
||||
|
||||
/**
|
||||
* Resets this iterator to its initial state.
|
||||
* @return *this
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
Iterator &reset();
|
||||
|
||||
/**
|
||||
* @return true if there are more elements.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UBool hasNext() const;
|
||||
|
||||
/**
|
||||
* Finds the next (byte sequence, value) pair if there is one.
|
||||
*
|
||||
* If the byte sequence is truncated to the maximum length and does not
|
||||
* have a real value, then the value is set to -1.
|
||||
* In this case, this "not a real value" is indistinguishable from
|
||||
* a real value of -1.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return true if there is another element.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UBool next(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* @return The NUL-terminated byte sequence for the last successful next().
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
StringPiece getString() const;
|
||||
/**
|
||||
* @return The value for the last successful next().
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
int32_t getValue() const { return value_; }
|
||||
|
||||
private:
|
||||
UBool truncateAndStop();
|
||||
|
||||
const uint8_t *branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode);
|
||||
|
||||
const uint8_t *bytes_;
|
||||
const uint8_t *pos_;
|
||||
const uint8_t *initialPos_;
|
||||
int32_t remainingMatchLength_;
|
||||
int32_t initialRemainingMatchLength_;
|
||||
|
||||
CharString *str_;
|
||||
int32_t maxLength_;
|
||||
int32_t value_;
|
||||
|
||||
// The stack stores pairs of integers for backtracking to another
|
||||
// outbound edge of a branch node.
|
||||
// The first integer is an offset from bytes_.
|
||||
// The second integer has the str_->length() from before the node in bits 15..0,
|
||||
// and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
|
||||
// (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
|
||||
// but the code looks more confusing that way.)
|
||||
UVector32 *stack_;
|
||||
};
|
||||
|
||||
private:
|
||||
friend class BytesTrieBuilder;
|
||||
friend class ::BytesTrieTest;
|
||||
|
||||
/**
|
||||
* Constructs a BytesTrie reader instance.
|
||||
* Unlike the public constructor which just aliases an array,
|
||||
* this constructor adopts the builder's array.
|
||||
* This constructor is only called by the builder.
|
||||
*/
|
||||
BytesTrie(void *adoptBytes, const void *trieBytes)
|
||||
: ownedArray_(static_cast<uint8_t *>(adoptBytes)),
|
||||
bytes_(static_cast<const uint8_t *>(trieBytes)),
|
||||
pos_(bytes_), remainingMatchLength_(-1) {}
|
||||
|
||||
// No assignment operator.
|
||||
BytesTrie &operator=(const BytesTrie &other) = delete;
|
||||
|
||||
inline void stop() {
|
||||
pos_=nullptr;
|
||||
}
|
||||
|
||||
// Reads a compact 32-bit integer.
|
||||
// pos is already after the leadByte, and the lead byte is already shifted right by 1.
|
||||
static int32_t readValue(const uint8_t *pos, int32_t leadByte);
|
||||
static inline const uint8_t *skipValue(const uint8_t *pos, int32_t leadByte) {
|
||||
// U_ASSERT(leadByte>=kMinValueLead);
|
||||
if(leadByte>=(kMinTwoByteValueLead<<1)) {
|
||||
if(leadByte<(kMinThreeByteValueLead<<1)) {
|
||||
++pos;
|
||||
} else if(leadByte<(kFourByteValueLead<<1)) {
|
||||
pos+=2;
|
||||
} else {
|
||||
pos+=3+((leadByte>>1)&1);
|
||||
}
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
static inline const uint8_t *skipValue(const uint8_t *pos) {
|
||||
int32_t leadByte=*pos++;
|
||||
return skipValue(pos, leadByte);
|
||||
}
|
||||
|
||||
// Reads a jump delta and jumps.
|
||||
static const uint8_t *jumpByDelta(const uint8_t *pos);
|
||||
|
||||
static inline const uint8_t *skipDelta(const uint8_t *pos) {
|
||||
int32_t delta=*pos++;
|
||||
if(delta>=kMinTwoByteDeltaLead) {
|
||||
if(delta<kMinThreeByteDeltaLead) {
|
||||
++pos;
|
||||
} else if(delta<kFourByteDeltaLead) {
|
||||
pos+=2;
|
||||
} else {
|
||||
pos+=3+(delta&1);
|
||||
}
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
static inline UStringTrieResult valueResult(int32_t node) {
|
||||
return static_cast<UStringTrieResult>(USTRINGTRIE_INTERMEDIATE_VALUE - (node & kValueIsFinal));
|
||||
}
|
||||
|
||||
// Handles a branch node for both next(byte) and next(string).
|
||||
UStringTrieResult branchNext(const uint8_t *pos, int32_t length, int32_t inByte);
|
||||
|
||||
// Requires remainingLength_<0.
|
||||
UStringTrieResult nextImpl(const uint8_t *pos, int32_t inByte);
|
||||
|
||||
// Helper functions for hasUniqueValue().
|
||||
// Recursively finds a unique value (or whether there is not a unique one)
|
||||
// from a branch.
|
||||
static const uint8_t *findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
|
||||
UBool haveUniqueValue, int32_t &uniqueValue);
|
||||
// Recursively finds a unique value (or whether there is not a unique one)
|
||||
// starting from a position on a node lead byte.
|
||||
static UBool findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue);
|
||||
|
||||
// Helper functions for getNextBytes().
|
||||
// getNextBytes() when pos is on a branch node.
|
||||
static void getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out);
|
||||
static void append(ByteSink &out, int c);
|
||||
|
||||
// BytesTrie data structure
|
||||
//
|
||||
// The trie consists of a series of byte-serialized nodes for incremental
|
||||
// string/byte sequence matching. The root node is at the beginning of the trie data.
|
||||
//
|
||||
// Types of nodes are distinguished by their node lead byte ranges.
|
||||
// After each node, except a final-value node, another node follows to
|
||||
// encode match values or continue matching further bytes.
|
||||
//
|
||||
// Node types:
|
||||
// - Value node: Stores a 32-bit integer in a compact, variable-length format.
|
||||
// The value is for the string/byte sequence so far.
|
||||
// One node bit indicates whether the value is final or whether
|
||||
// matching continues with the next node.
|
||||
// - Linear-match node: Matches a number of bytes.
|
||||
// - Branch node: Branches to other nodes according to the current input byte.
|
||||
// The node byte is the length of the branch (number of bytes to select from)
|
||||
// minus 1. It is followed by a sub-node:
|
||||
// - If the length is at most kMaxBranchLinearSubNodeLength, then
|
||||
// there are length-1 (key, value) pairs and then one more comparison byte.
|
||||
// If one of the key bytes matches, then the value is either a final value for
|
||||
// the string/byte sequence so far, or a "jump" delta to the next node.
|
||||
// If the last byte matches, then matching continues with the next node.
|
||||
// (Values have the same encoding as value nodes.)
|
||||
// - If the length is greater than kMaxBranchLinearSubNodeLength, then
|
||||
// there is one byte and one "jump" delta.
|
||||
// If the input byte is less than the sub-node byte, then "jump" by delta to
|
||||
// the next sub-node which will have a length of length/2.
|
||||
// (The delta has its own compact encoding.)
|
||||
// Otherwise, skip the "jump" delta to the next sub-node
|
||||
// which will have a length of length-length/2.
|
||||
|
||||
// Node lead byte values.
|
||||
|
||||
// 00..0f: Branch node. If node!=0 then the length is node+1, otherwise
|
||||
// the length is one more than the next byte.
|
||||
|
||||
// For a branch sub-node with at most this many entries, we drop down
|
||||
// to a linear search.
|
||||
static const int32_t kMaxBranchLinearSubNodeLength=5;
|
||||
|
||||
// 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
|
||||
static const int32_t kMinLinearMatch=0x10;
|
||||
static const int32_t kMaxLinearMatchLength=0x10;
|
||||
|
||||
// 20..ff: Variable-length value node.
|
||||
// If odd, the value is final. (Otherwise, intermediate value or jump delta.)
|
||||
// Then shift-right by 1 bit.
|
||||
// The remaining lead byte value indicates the number of following bytes (0..4)
|
||||
// and contains the value's top bits.
|
||||
static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x20
|
||||
// It is a final value if bit 0 is set.
|
||||
static const int32_t kValueIsFinal=1;
|
||||
|
||||
// Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
|
||||
static const int32_t kMinOneByteValueLead=kMinValueLead/2; // 0x10
|
||||
static const int32_t kMaxOneByteValue=0x40; // At least 6 bits in the first byte.
|
||||
|
||||
static const int32_t kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1; // 0x51
|
||||
static const int32_t kMaxTwoByteValue=0x1aff;
|
||||
|
||||
static const int32_t kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1; // 0x6c
|
||||
static const int32_t kFourByteValueLead=0x7e;
|
||||
|
||||
// A little more than Unicode code points. (0x11ffff)
|
||||
static const int32_t kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
|
||||
|
||||
static const int32_t kFiveByteValueLead=0x7f;
|
||||
|
||||
// Compact delta integers.
|
||||
static const int32_t kMaxOneByteDelta=0xbf;
|
||||
static const int32_t kMinTwoByteDeltaLead=kMaxOneByteDelta+1; // 0xc0
|
||||
static const int32_t kMinThreeByteDeltaLead=0xf0;
|
||||
static const int32_t kFourByteDeltaLead=0xfe;
|
||||
static const int32_t kFiveByteDeltaLead=0xff;
|
||||
|
||||
static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff
|
||||
static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff
|
||||
|
||||
// For getState64():
|
||||
// The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2
|
||||
// so we need at least 5 bits for that.
|
||||
// We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength.
|
||||
static constexpr int32_t kState64RemainingShift = 59;
|
||||
static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1;
|
||||
|
||||
uint8_t *ownedArray_;
|
||||
|
||||
// Fixed value referencing the BytesTrie bytes.
|
||||
const uint8_t *bytes_;
|
||||
|
||||
// Iterator variables.
|
||||
|
||||
// Pointer to next trie byte to read. nullptr if no more matches.
|
||||
const uint8_t *pos_;
|
||||
// Remaining length of a linear-match node, minus 1. Negative if not in such a node.
|
||||
int32_t remainingMatchLength_;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __BYTESTRIE_H__
|
||||
193
thirdparty/icu4c/common/unicode/bytestriebuilder.h
vendored
Normal file
193
thirdparty/icu4c/common/unicode/bytestriebuilder.h
vendored
Normal file
@@ -0,0 +1,193 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: bytestriebuilder.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010sep25
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Builder for icu::BytesTrie
|
||||
*/
|
||||
|
||||
#ifndef __BYTESTRIEBUILDER_H__
|
||||
#define __BYTESTRIEBUILDER_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/bytestrie.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/stringtriebuilder.h"
|
||||
|
||||
class BytesTrieTest;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class BytesTrieElement;
|
||||
class CharString;
|
||||
/**
|
||||
* Builder class for BytesTrie.
|
||||
*
|
||||
* This class is not intended for public subclassing.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder {
|
||||
public:
|
||||
/**
|
||||
* Constructs an empty builder.
|
||||
* @param errorCode Standard ICU error code.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
BytesTrieBuilder(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
virtual ~BytesTrieBuilder();
|
||||
|
||||
/**
|
||||
* Adds a (byte sequence, value) pair.
|
||||
* The byte sequence must be unique.
|
||||
* The bytes will be copied; the builder does not keep
|
||||
* a reference to the input StringPiece or its data().
|
||||
* @param s The input byte sequence.
|
||||
* @param value The value associated with this byte sequence.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return *this
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
BytesTrieBuilder &add(StringPiece s, int32_t value, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Builds a BytesTrie for the add()ed data.
|
||||
* Once built, no further data can be add()ed until clear() is called.
|
||||
*
|
||||
* A BytesTrie cannot be empty. At least one (byte sequence, value) pair
|
||||
* must have been add()ed.
|
||||
*
|
||||
* This method passes ownership of the builder's internal result array to the new trie object.
|
||||
* Another call to any build() variant will re-serialize the trie.
|
||||
* After clear() has been called, a new array will be used as well.
|
||||
* @param buildOption Build option, see UStringTrieBuildOption.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return A new BytesTrie for the add()ed data.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Builds a BytesTrie for the add()ed data and byte-serializes it.
|
||||
* Once built, no further data can be add()ed until clear() is called.
|
||||
*
|
||||
* A BytesTrie cannot be empty. At least one (byte sequence, value) pair
|
||||
* must have been add()ed.
|
||||
*
|
||||
* Multiple calls to buildStringPiece() return StringPieces referring to the
|
||||
* builder's same byte array, without rebuilding.
|
||||
* If buildStringPiece() is called after build(), the trie will be
|
||||
* re-serialized into a new array (because build() passes on ownership).
|
||||
* If build() is called after buildStringPiece(), the trie object returned
|
||||
* by build() will become the owner of the underlying string for the
|
||||
* previously returned StringPiece.
|
||||
* After clear() has been called, a new array will be used as well.
|
||||
* @param buildOption Build option, see UStringTrieBuildOption.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Removes all (byte sequence, value) pairs.
|
||||
* New data can then be add()ed and a new trie can be built.
|
||||
* @return *this
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
BytesTrieBuilder &clear();
|
||||
|
||||
private:
|
||||
friend class ::BytesTrieTest;
|
||||
|
||||
BytesTrieBuilder(const BytesTrieBuilder &other) = delete; // no copy constructor
|
||||
BytesTrieBuilder &operator=(const BytesTrieBuilder &other) = delete; // no assignment operator
|
||||
|
||||
void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
|
||||
|
||||
virtual int32_t getElementStringLength(int32_t i) const override;
|
||||
virtual char16_t getElementUnit(int32_t i, int32_t byteIndex) const override;
|
||||
virtual int32_t getElementValue(int32_t i) const override;
|
||||
|
||||
virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const override;
|
||||
|
||||
virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const override;
|
||||
virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const override;
|
||||
virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const override;
|
||||
|
||||
virtual UBool matchNodesCanHaveValues() const override { return false; }
|
||||
|
||||
virtual int32_t getMaxBranchLinearSubNodeLength() const override { return BytesTrie::kMaxBranchLinearSubNodeLength; }
|
||||
virtual int32_t getMinLinearMatch() const override { return BytesTrie::kMinLinearMatch; }
|
||||
virtual int32_t getMaxLinearMatchLength() const override { return BytesTrie::kMaxLinearMatchLength; }
|
||||
|
||||
/**
|
||||
* @internal (private)
|
||||
*/
|
||||
class BTLinearMatchNode : public LinearMatchNode {
|
||||
public:
|
||||
BTLinearMatchNode(const char *units, int32_t len, Node *nextNode);
|
||||
virtual bool operator==(const Node &other) const override;
|
||||
virtual void write(StringTrieBuilder &builder) override;
|
||||
private:
|
||||
const char *s;
|
||||
};
|
||||
|
||||
virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
|
||||
Node *nextNode) const override;
|
||||
|
||||
UBool ensureCapacity(int32_t length);
|
||||
virtual int32_t write(int32_t byte) override;
|
||||
int32_t write(const char *b, int32_t length);
|
||||
virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) override;
|
||||
virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) override;
|
||||
virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) override;
|
||||
virtual int32_t writeDeltaTo(int32_t jumpTarget) override;
|
||||
static int32_t internalEncodeDelta(int32_t i, char intBytes[]);
|
||||
|
||||
CharString *strings; // Pointer not object so we need not #include internal charstr.h.
|
||||
BytesTrieElement *elements;
|
||||
int32_t elementsCapacity;
|
||||
int32_t elementsLength;
|
||||
|
||||
// Byte serialization of the trie.
|
||||
// Grows from the back: bytesLength measures from the end of the buffer!
|
||||
char *bytes;
|
||||
int32_t bytesCapacity;
|
||||
int32_t bytesLength;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __BYTESTRIEBUILDER_H__
|
||||
215
thirdparty/icu4c/common/unicode/caniter.h
vendored
Normal file
215
thirdparty/icu4c/common/unicode/caniter.h
vendored
Normal file
@@ -0,0 +1,215 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2014, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef CANITER_H
|
||||
#define CANITER_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Canonical Iterator
|
||||
*/
|
||||
|
||||
/** Should permutation skip characters with combining class zero
|
||||
* Should be either true or false. This is a compile time option
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef CANITER_SKIP_ZEROES
|
||||
#define CANITER_SKIP_ZEROES true
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class Hashtable;
|
||||
class Normalizer2;
|
||||
class Normalizer2Impl;
|
||||
|
||||
/**
|
||||
* This class allows one to iterate through all the strings that are canonically equivalent to a given
|
||||
* string. For example, here are some sample results:
|
||||
Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
|
||||
1: \\u0041\\u030A\\u0064\\u0307\\u0327
|
||||
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
|
||||
2: \\u0041\\u030A\\u0064\\u0327\\u0307
|
||||
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
|
||||
3: \\u0041\\u030A\\u1E0B\\u0327
|
||||
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
|
||||
4: \\u0041\\u030A\\u1E11\\u0307
|
||||
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
|
||||
5: \\u00C5\\u0064\\u0307\\u0327
|
||||
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
|
||||
6: \\u00C5\\u0064\\u0327\\u0307
|
||||
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
|
||||
7: \\u00C5\\u1E0B\\u0327
|
||||
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
|
||||
8: \\u00C5\\u1E11\\u0307
|
||||
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
|
||||
9: \\u212B\\u0064\\u0307\\u0327
|
||||
= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
|
||||
10: \\u212B\\u0064\\u0327\\u0307
|
||||
= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
|
||||
11: \\u212B\\u1E0B\\u0327
|
||||
= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
|
||||
12: \\u212B\\u1E11\\u0307
|
||||
= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
|
||||
*<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
|
||||
* since it has not been optimized for that situation.
|
||||
* Note, CanonicalIterator is not intended to be subclassed.
|
||||
* @author M. Davis
|
||||
* @author C++ port by V. Weinstein
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
class U_COMMON_API CanonicalIterator final : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Construct a CanonicalIterator object
|
||||
* @param source string to get results for
|
||||
* @param status Fill-in parameter which receives the status of this operation.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
CanonicalIterator(const UnicodeString &source, UErrorCode &status);
|
||||
|
||||
/** Destructor
|
||||
* Cleans pieces
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual ~CanonicalIterator();
|
||||
|
||||
/**
|
||||
* Gets the NFD form of the current source we are iterating over.
|
||||
* @return gets the source: NOTE: it is the NFD form of source
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
UnicodeString getSource();
|
||||
|
||||
/**
|
||||
* Resets the iterator so that one can start again from the beginning.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
void reset();
|
||||
|
||||
/**
|
||||
* Get the next canonically equivalent string.
|
||||
* <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
|
||||
* @return the next string that is canonically equivalent. A bogus string is returned when
|
||||
* the iteration is done.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
UnicodeString next();
|
||||
|
||||
/**
|
||||
* Set a new source for this iterator. Allows object reuse.
|
||||
* @param newSource the source string to iterate against. This allows the same iterator to be used
|
||||
* while changing the source string, saving object creation.
|
||||
* @param status Fill-in parameter which receives the status of this operation.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
void setSource(const UnicodeString &newSource, UErrorCode &status);
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Dumb recursive implementation of permutation.
|
||||
* TODO: optimize
|
||||
* @param source the string to find permutations for
|
||||
* @param skipZeros determine if skip zeros
|
||||
* @param result the results in a set.
|
||||
* @param status Fill-in parameter which receives the status of this operation.
|
||||
* @param depth depth of the call.
|
||||
* @internal
|
||||
*/
|
||||
static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth=0);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for this class.
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const override;
|
||||
|
||||
private:
|
||||
// ===================== PRIVATES ==============================
|
||||
// private default constructor
|
||||
CanonicalIterator() = delete;
|
||||
|
||||
|
||||
/**
|
||||
* Copy constructor. Private for now.
|
||||
* @internal (private)
|
||||
*/
|
||||
CanonicalIterator(const CanonicalIterator& other) = delete;
|
||||
|
||||
/**
|
||||
* Assignment operator. Private for now.
|
||||
* @internal (private)
|
||||
*/
|
||||
CanonicalIterator& operator=(const CanonicalIterator& other) = delete;
|
||||
|
||||
// fields
|
||||
UnicodeString source;
|
||||
UBool done;
|
||||
|
||||
// 2 dimensional array holds the pieces of the string with
|
||||
// their different canonically equivalent representations
|
||||
UnicodeString **pieces;
|
||||
int32_t pieces_length;
|
||||
int32_t *pieces_lengths;
|
||||
|
||||
// current is used in iterating to combine pieces
|
||||
int32_t *current;
|
||||
int32_t current_length;
|
||||
|
||||
// transient fields
|
||||
UnicodeString buffer;
|
||||
|
||||
const Normalizer2 *nfd;
|
||||
const Normalizer2Impl *nfcImpl;
|
||||
|
||||
// we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
|
||||
UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)
|
||||
|
||||
//Set getEquivalents2(String segment);
|
||||
Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status);
|
||||
//Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* See if the decomposition of cp2 is at segment starting at segmentPos
|
||||
* (with canonical rearrangement!)
|
||||
* If so, take the remainder, and return the equivalents
|
||||
*/
|
||||
//Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
|
||||
Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
|
||||
//Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
|
||||
|
||||
void cleanPieces();
|
||||
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
497
thirdparty/icu4c/common/unicode/casemap.h
vendored
Normal file
497
thirdparty/icu4c/common/unicode/casemap.h
vendored
Normal file
@@ -0,0 +1,497 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// casemap.h
|
||||
// created: 2017jan12 Markus W. Scherer
|
||||
|
||||
#ifndef __CASEMAP_H__
|
||||
#define __CASEMAP_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Low-level C++ case mapping functions.
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class BreakIterator;
|
||||
class ByteSink;
|
||||
class Edits;
|
||||
|
||||
/**
|
||||
* Low-level C++ case mapping functions.
|
||||
*
|
||||
* @stable ICU 59
|
||||
*/
|
||||
class U_COMMON_API CaseMap final : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Lowercases a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, nullptr = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
|
||||
* dest may be nullptr and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
* When the result would be longer than destCapacity,
|
||||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see u_strToLower
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t toLower(
|
||||
const char *locale, uint32_t options,
|
||||
const char16_t *src, int32_t srcLength,
|
||||
char16_t *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Uppercases a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, nullptr = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
|
||||
* dest may be nullptr and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
* When the result would be longer than destCapacity,
|
||||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see u_strToUpper
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t toUpper(
|
||||
const char *locale, uint32_t options,
|
||||
const char16_t *src, int32_t srcLength,
|
||||
char16_t *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Titlecases a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with options bits.)
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, nullptr = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
|
||||
* U_TITLECASE_NO_LOWERCASE,
|
||||
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
|
||||
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
|
||||
* @param iter A break iterator to find the first characters of words that are to be titlecased.
|
||||
* It is set to the source string (setText())
|
||||
* and used one or more times for iteration (first() and next()).
|
||||
* If nullptr, then a word break iterator for the locale is used
|
||||
* (or something equivalent).
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
|
||||
* dest may be nullptr and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
* When the result would be longer than destCapacity,
|
||||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see u_strToTitle
|
||||
* @see ucasemap_toTitle
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t toTitle(
|
||||
const char *locale, uint32_t options, BreakIterator *iter,
|
||||
const char16_t *src, int32_t srcLength,
|
||||
char16_t *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#endif // UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Case-folds a UTF-16 string and optionally records edits.
|
||||
*
|
||||
* Case folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'T' in CaseFolding.txt.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
|
||||
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
|
||||
* dest may be nullptr and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
* When the result would be longer than destCapacity,
|
||||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see u_strFoldCase
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t fold(
|
||||
uint32_t options,
|
||||
const char16_t *src, int32_t srcLength,
|
||||
char16_t *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Lowercases a UTF-8 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, nullptr = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src The original string.
|
||||
* @param sink A ByteSink to which the result string is written.
|
||||
* sink.Flush() is called at the end.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ucasemap_utf8ToLower
|
||||
* @stable ICU 60
|
||||
*/
|
||||
static void utf8ToLower(
|
||||
const char *locale, uint32_t options,
|
||||
StringPiece src, ByteSink &sink, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Uppercases a UTF-8 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, nullptr = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src The original string.
|
||||
* @param sink A ByteSink to which the result string is written.
|
||||
* sink.Flush() is called at the end.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ucasemap_utf8ToUpper
|
||||
* @stable ICU 60
|
||||
*/
|
||||
static void utf8ToUpper(
|
||||
const char *locale, uint32_t options,
|
||||
StringPiece src, ByteSink &sink, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Titlecases a UTF-8 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with options bits.)
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, nullptr = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
|
||||
* U_TITLECASE_NO_LOWERCASE,
|
||||
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
|
||||
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
|
||||
* @param iter A break iterator to find the first characters of words that are to be titlecased.
|
||||
* It is set to the source string (setUText())
|
||||
* and used one or more times for iteration (first() and next()).
|
||||
* If nullptr, then a word break iterator for the locale is used
|
||||
* (or something equivalent).
|
||||
* @param src The original string.
|
||||
* @param sink A ByteSink to which the result string is written.
|
||||
* sink.Flush() is called at the end.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @stable ICU 60
|
||||
*/
|
||||
static void utf8ToTitle(
|
||||
const char *locale, uint32_t options, BreakIterator *iter,
|
||||
StringPiece src, ByteSink &sink, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#endif // UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Case-folds a UTF-8 string and optionally records edits.
|
||||
*
|
||||
* Case folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'T' in CaseFolding.txt.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src The original string.
|
||||
* @param sink A ByteSink to which the result string is written.
|
||||
* sink.Flush() is called at the end.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ucasemap_utf8FoldCase
|
||||
* @stable ICU 60
|
||||
*/
|
||||
static void utf8Fold(
|
||||
uint32_t options,
|
||||
StringPiece src, ByteSink &sink, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Lowercases a UTF-8 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, nullptr = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be nullptr and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
* When the result would be longer than destCapacity,
|
||||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see ucasemap_utf8ToLower
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t utf8ToLower(
|
||||
const char *locale, uint32_t options,
|
||||
const char *src, int32_t srcLength,
|
||||
char *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Uppercases a UTF-8 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, nullptr = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be nullptr and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
* When the result would be longer than destCapacity,
|
||||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see ucasemap_utf8ToUpper
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t utf8ToUpper(
|
||||
const char *locale, uint32_t options,
|
||||
const char *src, int32_t srcLength,
|
||||
char *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Titlecases a UTF-8 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with options bits.)
|
||||
*
|
||||
* @param locale The locale ID. ("" = root locale, nullptr = default locale.)
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
|
||||
* U_TITLECASE_NO_LOWERCASE,
|
||||
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
|
||||
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
|
||||
* @param iter A break iterator to find the first characters of words that are to be titlecased.
|
||||
* It is set to the source string (setUText())
|
||||
* and used one or more times for iteration (first() and next()).
|
||||
* If nullptr, then a word break iterator for the locale is used
|
||||
* (or something equivalent).
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be nullptr and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
* When the result would be longer than destCapacity,
|
||||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t utf8ToTitle(
|
||||
const char *locale, uint32_t options, BreakIterator *iter,
|
||||
const char *src, int32_t srcLength,
|
||||
char *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#endif // UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Case-folds a UTF-8 string and optionally records edits.
|
||||
*
|
||||
* Case folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'T' in CaseFolding.txt.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
|
||||
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be nullptr and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful.
|
||||
* When the result would be longer than destCapacity,
|
||||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see ucasemap_utf8FoldCase
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t utf8Fold(
|
||||
uint32_t options,
|
||||
const char *src, int32_t srcLength,
|
||||
char *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
private:
|
||||
CaseMap() = delete;
|
||||
CaseMap(const CaseMap &other) = delete;
|
||||
CaseMap &operator=(const CaseMap &other) = delete;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __CASEMAP_H__
|
||||
453
thirdparty/icu4c/common/unicode/char16ptr.h
vendored
Normal file
453
thirdparty/icu4c/common/unicode/char16ptr.h
vendored
Normal file
@@ -0,0 +1,453 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// char16ptr.h
|
||||
// created: 2017feb28 Markus W. Scherer
|
||||
|
||||
#ifndef __CHAR16PTR_H__
|
||||
#define __CHAR16PTR_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
|
||||
|
||||
#include <cstddef>
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: char16_t pointer wrappers with
|
||||
* implicit conversion from bit-compatible raw pointer types.
|
||||
* Also conversion functions from char16_t * to UChar * and OldUChar *.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_ALIASING_BARRIER
|
||||
* Barrier for pointer anti-aliasing optimizations even across function boundaries.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
// Use the predefined value.
|
||||
#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT
|
||||
# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory")
|
||||
#elif defined(U_IN_DOXYGEN)
|
||||
# define U_ALIASING_BARRIER(ptr)
|
||||
#endif
|
||||
|
||||
// ICU DLL-exported
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
class U_COMMON_API Char16Ptr final {
|
||||
public:
|
||||
/**
|
||||
* Copies the pointer.
|
||||
* @param p pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(char16_t *p);
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
/**
|
||||
* Converts the pointer to char16_t *.
|
||||
* @param p pointer to be converted
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(uint16_t *p);
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Converts the pointer to char16_t *.
|
||||
* (Only defined if U_SIZEOF_WCHAR_T==2.)
|
||||
* @param p pointer to be converted
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(wchar_t *p);
|
||||
#endif
|
||||
/**
|
||||
* nullptr constructor.
|
||||
* @param p nullptr
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(std::nullptr_t p);
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ~Char16Ptr();
|
||||
|
||||
/**
|
||||
* Pointer access.
|
||||
* @return the wrapped pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline char16_t *get() const;
|
||||
/**
|
||||
* char16_t pointer access via type conversion (e.g., static_cast).
|
||||
* @return the wrapped pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline operator char16_t *() const { return get(); }
|
||||
|
||||
private:
|
||||
Char16Ptr() = delete;
|
||||
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
template<typename T> static char16_t *cast(T *t) {
|
||||
U_ALIASING_BARRIER(t);
|
||||
return reinterpret_cast<char16_t *>(t);
|
||||
}
|
||||
|
||||
char16_t *p_;
|
||||
#else
|
||||
union {
|
||||
char16_t *cp;
|
||||
uint16_t *up;
|
||||
wchar_t *wp;
|
||||
} u_;
|
||||
#endif
|
||||
};
|
||||
|
||||
/// \cond
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
|
||||
Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {}
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {}
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {}
|
||||
#endif
|
||||
Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {}
|
||||
Char16Ptr::~Char16Ptr() {
|
||||
U_ALIASING_BARRIER(p_);
|
||||
}
|
||||
|
||||
char16_t *Char16Ptr::get() const { return p_; }
|
||||
|
||||
#else
|
||||
|
||||
Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; }
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; }
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; }
|
||||
#endif
|
||||
Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; }
|
||||
Char16Ptr::~Char16Ptr() {}
|
||||
|
||||
char16_t *Char16Ptr::get() const { return u_.cp; }
|
||||
|
||||
#endif
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
class U_COMMON_API ConstChar16Ptr final {
|
||||
public:
|
||||
/**
|
||||
* Copies the pointer.
|
||||
* @param p pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const char16_t *p);
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
/**
|
||||
* Converts the pointer to char16_t *.
|
||||
* @param p pointer to be converted
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const uint16_t *p);
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Converts the pointer to char16_t *.
|
||||
* (Only defined if U_SIZEOF_WCHAR_T==2.)
|
||||
* @param p pointer to be converted
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const wchar_t *p);
|
||||
#endif
|
||||
/**
|
||||
* nullptr constructor.
|
||||
* @param p nullptr
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const std::nullptr_t p);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ~ConstChar16Ptr();
|
||||
|
||||
/**
|
||||
* Pointer access.
|
||||
* @return the wrapped pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline const char16_t *get() const;
|
||||
/**
|
||||
* char16_t pointer access via type conversion (e.g., static_cast).
|
||||
* @return the wrapped pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline operator const char16_t *() const { return get(); }
|
||||
|
||||
private:
|
||||
ConstChar16Ptr() = delete;
|
||||
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
template<typename T> static const char16_t *cast(const T *t) {
|
||||
U_ALIASING_BARRIER(t);
|
||||
return reinterpret_cast<const char16_t *>(t);
|
||||
}
|
||||
|
||||
const char16_t *p_;
|
||||
#else
|
||||
union {
|
||||
const char16_t *cp;
|
||||
const uint16_t *up;
|
||||
const wchar_t *wp;
|
||||
} u_;
|
||||
#endif
|
||||
};
|
||||
|
||||
/// \cond
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
|
||||
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {}
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {}
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {}
|
||||
#endif
|
||||
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {}
|
||||
ConstChar16Ptr::~ConstChar16Ptr() {
|
||||
U_ALIASING_BARRIER(p_);
|
||||
}
|
||||
|
||||
const char16_t *ConstChar16Ptr::get() const { return p_; }
|
||||
|
||||
#else
|
||||
|
||||
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; }
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; }
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; }
|
||||
#endif
|
||||
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; }
|
||||
ConstChar16Ptr::~ConstChar16Ptr() {}
|
||||
|
||||
const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
|
||||
|
||||
#endif
|
||||
/// \endcond
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
// Usable in header-only definitions
|
||||
#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
|
||||
|
||||
namespace U_ICU_NAMESPACE_OR_INTERNAL {
|
||||
|
||||
#ifndef U_FORCE_HIDE_INTERNAL_API
|
||||
/** @internal */
|
||||
template<typename T, typename = std::enable_if_t<std::is_same_v<T, UChar>>>
|
||||
inline const char16_t *uprv_char16PtrFromUChar(const T *p) {
|
||||
if constexpr (std::is_same_v<UChar, char16_t>) {
|
||||
return p;
|
||||
} else {
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
return ConstChar16Ptr(p).get();
|
||||
#else
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<const char16_t *>(p);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000)
|
||||
/** @internal */
|
||||
inline const char16_t *uprv_char16PtrFromUint16(const uint16_t *p) {
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
return ConstChar16Ptr(p).get();
|
||||
#else
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<const char16_t *>(p);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
/** @internal */
|
||||
inline const char16_t *uprv_char16PtrFromWchar(const wchar_t *p) {
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
return ConstChar16Ptr(p).get();
|
||||
#else
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<const char16_t *>(p);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Converts from const char16_t * to const UChar *.
|
||||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as const UChar *
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline const UChar *toUCharPtr(const char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<const UChar *>(p);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts from char16_t * to UChar *.
|
||||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as UChar *
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline UChar *toUCharPtr(char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<UChar *>(p);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts from const char16_t * to const OldUChar *.
|
||||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as const OldUChar *
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline const OldUChar *toOldUCharPtr(const char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<const OldUChar *>(p);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts from char16_t * to OldUChar *.
|
||||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as OldUChar *
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline OldUChar *toOldUCharPtr(char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<OldUChar *>(p);
|
||||
}
|
||||
|
||||
} // U_ICU_NAMESPACE_OR_INTERNAL
|
||||
|
||||
#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
|
||||
|
||||
// ICU DLL-exported
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
#ifndef U_FORCE_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Is T convertible to a std::u16string_view or some other 16-bit string view?
|
||||
* @internal
|
||||
*/
|
||||
template<typename T>
|
||||
constexpr bool ConvertibleToU16StringView =
|
||||
std::is_convertible_v<T, std::u16string_view>
|
||||
#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000)
|
||||
|| std::is_convertible_v<T, std::basic_string_view<uint16_t>>
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
|| std::is_convertible_v<T, std::wstring_view>
|
||||
#endif
|
||||
;
|
||||
|
||||
namespace internal {
|
||||
/**
|
||||
* Pass-through overload.
|
||||
* @internal
|
||||
*/
|
||||
inline std::u16string_view toU16StringView(std::u16string_view sv) { return sv; }
|
||||
|
||||
#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000)
|
||||
/**
|
||||
* Basically undefined behavior but sometimes necessary conversion
|
||||
* from std::basic_string_view<uint16_t> to std::u16string_view.
|
||||
* @internal
|
||||
*/
|
||||
inline std::u16string_view toU16StringView(std::basic_string_view<uint16_t> sv) {
|
||||
return { ConstChar16Ptr(sv.data()), sv.length() };
|
||||
}
|
||||
#endif
|
||||
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
/**
|
||||
* Basically undefined behavior but sometimes necessary conversion
|
||||
* from std::wstring_view to std::u16string_view.
|
||||
* @internal
|
||||
*/
|
||||
inline std::u16string_view toU16StringView(std::wstring_view sv) {
|
||||
return { ConstChar16Ptr(sv.data()), sv.length() };
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Pass-through overload.
|
||||
* @internal
|
||||
*/
|
||||
template <typename T,
|
||||
typename = typename std::enable_if_t<!std::is_pointer_v<std::remove_reference_t<T>>>>
|
||||
inline std::u16string_view toU16StringViewNullable(const T& text) {
|
||||
return toU16StringView(text);
|
||||
}
|
||||
|
||||
/**
|
||||
* In case of nullptr, return an empty view.
|
||||
* @internal
|
||||
*/
|
||||
template <typename T,
|
||||
typename = typename std::enable_if_t<std::is_pointer_v<std::remove_reference_t<T>>>,
|
||||
typename = void>
|
||||
inline std::u16string_view toU16StringViewNullable(const T& text) {
|
||||
if (text == nullptr) return {}; // For backward compatibility.
|
||||
return toU16StringView(text);
|
||||
}
|
||||
|
||||
} // internal
|
||||
#endif // U_FORCE_HIDE_INTERNAL_API
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#endif // __CHAR16PTR_H__
|
||||
734
thirdparty/icu4c/common/unicode/chariter.h
vendored
Normal file
734
thirdparty/icu4c/common/unicode/chariter.h
vendored
Normal file
@@ -0,0 +1,734 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
********************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef CHARITER_H
|
||||
#define CHARITER_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Character Iterator
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
/**
|
||||
* Abstract class that defines an API for forward-only iteration
|
||||
* on text objects.
|
||||
* This is a minimal interface for iteration without random access
|
||||
* or backwards iteration. It is especially useful for wrapping
|
||||
* streams with converters into an object for collation or
|
||||
* normalization.
|
||||
*
|
||||
* <p>Characters can be accessed in two ways: as code units or as
|
||||
* code points.
|
||||
* Unicode code points are 21-bit integers and are the scalar values
|
||||
* of Unicode characters. ICU uses the type UChar32 for them.
|
||||
* Unicode code units are the storage units of a given
|
||||
* Unicode/UCS Transformation Format (a character encoding scheme).
|
||||
* With UTF-16, all code points can be represented with either one
|
||||
* or two code units ("surrogates").
|
||||
* String storage is typically based on code units, while properties
|
||||
* of characters are typically determined using code point values.
|
||||
* Some processes may be designed to work with sequences of code units,
|
||||
* or it may be known that all characters that are important to an
|
||||
* algorithm can be represented with single code units.
|
||||
* Other processes will need to use the code point access functions.</p>
|
||||
*
|
||||
* <p>ForwardCharacterIterator provides nextPostInc() to access
|
||||
* a code unit and advance an internal position into the text object,
|
||||
* similar to a <code>return text[position++]</code>.<br>
|
||||
* It provides next32PostInc() to access a code point and advance an internal
|
||||
* position.</p>
|
||||
*
|
||||
* <p>next32PostInc() assumes that the current position is that of
|
||||
* the beginning of a code point, i.e., of its first code unit.
|
||||
* After next32PostInc(), this will be true again.
|
||||
* In general, access to code units and code points in the same
|
||||
* iteration loop should not be mixed. In UTF-16, if the current position
|
||||
* is on a second code unit (Low Surrogate), then only that code unit
|
||||
* is returned even by next32PostInc().</p>
|
||||
*
|
||||
* <p>For iteration with either function, there are two ways to
|
||||
* check for the end of the iteration. When there are no more
|
||||
* characters in the text object:
|
||||
* <ul>
|
||||
* <li>The hasNext() function returns false.</li>
|
||||
* <li>nextPostInc() and next32PostInc() return DONE
|
||||
* when one attempts to read beyond the end of the text object.</li>
|
||||
* </ul>
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* void function1(ForwardCharacterIterator &it) {
|
||||
* UChar32 c;
|
||||
* while(it.hasNext()) {
|
||||
* c=it.next32PostInc();
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* void function1(ForwardCharacterIterator &it) {
|
||||
* char16_t c;
|
||||
* while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* </p>
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API ForwardCharacterIterator : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Value returned by most of ForwardCharacterIterator's functions
|
||||
* when the iterator has reached the limits of its iteration.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
enum { DONE = 0xffff };
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~ForwardCharacterIterator();
|
||||
|
||||
/**
|
||||
* Returns true when both iterators refer to the same
|
||||
* character in the same character-storage object.
|
||||
* @param that The ForwardCharacterIterator to be compared for equality
|
||||
* @return true when both iterators refer to the same
|
||||
* character in the same character-storage object
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual bool operator==(const ForwardCharacterIterator& that) const = 0;
|
||||
|
||||
/**
|
||||
* Returns true when the iterators refer to different
|
||||
* text-storage objects, or to different characters in the
|
||||
* same text-storage object.
|
||||
* @param that The ForwardCharacterIterator to be compared for inequality
|
||||
* @return true when the iterators refer to different
|
||||
* text-storage objects, or to different characters in the
|
||||
* same text-storage object
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline bool operator!=(const ForwardCharacterIterator& that) const;
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator.
|
||||
* @return the hash code.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t hashCode() const = 0;
|
||||
|
||||
/**
|
||||
* Returns a UClassID for this ForwardCharacterIterator ("poor man's
|
||||
* RTTI").<P> Despite the fact that this function is public,
|
||||
* DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
|
||||
* @return a UClassID for this ForwardCharacterIterator
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const override = 0;
|
||||
|
||||
/**
|
||||
* Gets the current code unit for returning and advances to the next code unit
|
||||
* in the iteration range
|
||||
* (toward endIndex()). If there are
|
||||
* no more code units to return, returns DONE.
|
||||
* @return the current code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t nextPostInc() = 0;
|
||||
|
||||
/**
|
||||
* Gets the current code point for returning and advances to the next code point
|
||||
* in the iteration range
|
||||
* (toward endIndex()). If there are
|
||||
* no more code points to return, returns DONE.
|
||||
* @return the current code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 next32PostInc() = 0;
|
||||
|
||||
/**
|
||||
* Returns false if there are no more code units or code points
|
||||
* at or after the current position in the iteration range.
|
||||
* This is used with nextPostInc() or next32PostInc() in forward
|
||||
* iteration.
|
||||
* @returns false if there are no more code units or code points
|
||||
* at or after the current position in the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool hasNext() = 0;
|
||||
|
||||
protected:
|
||||
/** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
|
||||
ForwardCharacterIterator();
|
||||
|
||||
/** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
|
||||
ForwardCharacterIterator(const ForwardCharacterIterator &other);
|
||||
|
||||
/**
|
||||
* Assignment operator to be overridden in the implementing class.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
|
||||
};
|
||||
|
||||
/**
|
||||
* Abstract class that defines an API for iteration
|
||||
* on text objects.
|
||||
* This is an interface for forward and backward iteration
|
||||
* and random access into a text object.
|
||||
*
|
||||
* <p>The API provides backward compatibility to the Java and older ICU
|
||||
* CharacterIterator classes but extends them significantly:
|
||||
* <ol>
|
||||
* <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
|
||||
* <li>While the old API functions provided forward iteration with
|
||||
* "pre-increment" semantics, the new one also provides functions
|
||||
* with "post-increment" semantics. They are more efficient and should
|
||||
* be the preferred iterator functions for new implementations.
|
||||
* The backward iteration always had "pre-decrement" semantics, which
|
||||
* are efficient.</li>
|
||||
* <li>Just like ForwardCharacterIterator, it provides access to
|
||||
* both code units and code points. Code point access versions are available
|
||||
* for the old and the new iteration semantics.</li>
|
||||
* <li>There are new functions for setting and moving the current position
|
||||
* without returning a character, for efficiency.</li>
|
||||
* </ol>
|
||||
*
|
||||
* See ForwardCharacterIterator for examples for using the new forward iteration
|
||||
* functions. For backward iteration, there is also a hasPrevious() function
|
||||
* that can be used analogously to hasNext().
|
||||
* The old functions work as before and are shown below.</p>
|
||||
*
|
||||
* <p>Examples for some of the new functions:</p>
|
||||
*
|
||||
* Forward iteration with hasNext():
|
||||
* \code
|
||||
* void forward1(CharacterIterator &it) {
|
||||
* UChar32 c;
|
||||
* for(it.setToStart(); it.hasNext();) {
|
||||
* c=it.next32PostInc();
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* Forward iteration more similar to loops with the old forward iteration,
|
||||
* showing a way to convert simple for() loops:
|
||||
* \code
|
||||
* void forward2(CharacterIterator &it) {
|
||||
* char16_t c;
|
||||
* for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* Backward iteration with setToEnd() and hasPrevious():
|
||||
* \code
|
||||
* void backward1(CharacterIterator &it) {
|
||||
* UChar32 c;
|
||||
* for(it.setToEnd(); it.hasPrevious();) {
|
||||
* c=it.previous32();
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* Backward iteration with a more traditional for() loop:
|
||||
* \code
|
||||
* void backward2(CharacterIterator &it) {
|
||||
* char16_t c;
|
||||
* for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* Example for random access:
|
||||
* \code
|
||||
* void random(CharacterIterator &it) {
|
||||
* // set to the third code point from the beginning
|
||||
* it.move32(3, CharacterIterator::kStart);
|
||||
* // get a code point from here without moving the position
|
||||
* UChar32 c=it.current32();
|
||||
* // get the position
|
||||
* int32_t pos=it.getIndex();
|
||||
* // get the previous code unit
|
||||
* char16_t u=it.previous();
|
||||
* // move back one more code unit
|
||||
* it.move(-1, CharacterIterator::kCurrent);
|
||||
* // set the position back to where it was
|
||||
* // and read the same code point c and move beyond it
|
||||
* it.setIndex(pos);
|
||||
* if(c!=it.next32PostInc()) {
|
||||
* exit(1); // CharacterIterator inconsistent
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* <p>Examples, especially for the old API:</p>
|
||||
*
|
||||
* Function processing characters, in this example simple output
|
||||
* <pre>
|
||||
* \code
|
||||
* void processChar( char16_t c )
|
||||
* {
|
||||
* cout << " " << c;
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
* Traverse the text from start to finish
|
||||
* <pre>
|
||||
* \code
|
||||
* void traverseForward(CharacterIterator& iter)
|
||||
* {
|
||||
* for(char16_t c = iter.first(); c != CharacterIterator::DONE; c = iter.next()) {
|
||||
* processChar(c);
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
* Traverse the text backwards, from end to start
|
||||
* <pre>
|
||||
* \code
|
||||
* void traverseBackward(CharacterIterator& iter)
|
||||
* {
|
||||
* for(char16_t c = iter.last(); c != CharacterIterator::DONE; c = iter.previous()) {
|
||||
* processChar(c);
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
* Traverse both forward and backward from a given position in the text.
|
||||
* Calls to notBoundary() in this example represents some additional stopping criteria.
|
||||
* <pre>
|
||||
* \code
|
||||
* void traverseOut(CharacterIterator& iter, int32_t pos)
|
||||
* {
|
||||
* char16_t c;
|
||||
* for (c = iter.setIndex(pos);
|
||||
* c != CharacterIterator::DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
||||
* c = iter.next()) {}
|
||||
* int32_t end = iter.getIndex();
|
||||
* for (c = iter.setIndex(pos);
|
||||
* c != CharacterIterator::DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
||||
* c = iter.previous()) {}
|
||||
* int32_t start = iter.getIndex() + 1;
|
||||
*
|
||||
* cout << "start: " << start << " end: " << end << endl;
|
||||
* for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
|
||||
* processChar(c);
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
* Creating a StringCharacterIterator and calling the test functions
|
||||
* <pre>
|
||||
* \code
|
||||
* void CharacterIterator_Example( void )
|
||||
* {
|
||||
* cout << endl << "===== CharacterIterator_Example: =====" << endl;
|
||||
* UnicodeString text("Ein kleiner Satz.");
|
||||
* StringCharacterIterator iterator(text);
|
||||
* cout << "----- traverseForward: -----------" << endl;
|
||||
* traverseForward( iterator );
|
||||
* cout << endl << endl << "----- traverseBackward: ----------" << endl;
|
||||
* traverseBackward( iterator );
|
||||
* cout << endl << endl << "----- traverseOut: ---------------" << endl;
|
||||
* traverseOut( iterator, 7 );
|
||||
* cout << endl << endl << "-----" << endl;
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
|
||||
public:
|
||||
/**
|
||||
* Origin enumeration for the move() and move32() functions.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
enum EOrigin { kStart, kCurrent, kEnd };
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~CharacterIterator();
|
||||
|
||||
/**
|
||||
* Returns a pointer to a new CharacterIterator of the same
|
||||
* concrete class as this one, and referring to the same
|
||||
* character in the same text-storage object as this one. The
|
||||
* caller is responsible for deleting the new clone.
|
||||
* @return a pointer to a new CharacterIterator
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual CharacterIterator* clone() const = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code unit in its
|
||||
* iteration range, and returns that code unit.
|
||||
* This can be used to begin an iteration with next().
|
||||
* @return the first code unit in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t first() = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code unit in its
|
||||
* iteration range, returns that code unit, and moves the position
|
||||
* to the second code unit. This is an alternative to setToStart()
|
||||
* for forward iteration with nextPostInc().
|
||||
* @return the first code unit in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t firstPostInc();
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code point in its
|
||||
* iteration range, and returns that code unit,
|
||||
* This can be used to begin an iteration with next32().
|
||||
* Note that an iteration with next32PostInc(), beginning with,
|
||||
* e.g., setToStart() or firstPostInc(), is more efficient.
|
||||
* @return the first code point in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 first32() = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code point in its
|
||||
* iteration range, returns that code point, and moves the position
|
||||
* to the second code point. This is an alternative to setToStart()
|
||||
* for forward iteration with next32PostInc().
|
||||
* @return the first code point in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 first32PostInc();
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code unit or code point in its
|
||||
* iteration range. This can be used to begin a forward
|
||||
* iteration with nextPostInc() or next32PostInc().
|
||||
* @return the start position of the iteration range
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t setToStart();
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last code unit in its
|
||||
* iteration range, and returns that code unit.
|
||||
* This can be used to begin an iteration with previous().
|
||||
* @return the last code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t last() = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last code point in its
|
||||
* iteration range, and returns that code unit.
|
||||
* This can be used to begin an iteration with previous32().
|
||||
* @return the last code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 last32() = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to the end of its iteration range, just behind
|
||||
* the last code unit or code point. This can be used to begin a backward
|
||||
* iteration with previous() or previous32().
|
||||
* @return the end position of the iteration range
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t setToEnd();
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th code unit
|
||||
* in the text-storage object the iterator refers to, and
|
||||
* returns that code unit.
|
||||
* @param position the "position"-th code unit in the text-storage object
|
||||
* @return the "position"-th code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t setIndex(int32_t position) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the beginning of the code point
|
||||
* that contains the "position"-th code unit
|
||||
* in the text-storage object the iterator refers to, and
|
||||
* returns that code point.
|
||||
* The current position is adjusted to the beginning of the code point
|
||||
* (its first code unit).
|
||||
* @param position the "position"-th code unit in the text-storage object
|
||||
* @return the "position"-th code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 setIndex32(int32_t position) = 0;
|
||||
|
||||
/**
|
||||
* Returns the code unit the iterator currently refers to.
|
||||
* @return the current code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t current() const = 0;
|
||||
|
||||
/**
|
||||
* Returns the code point the iterator currently refers to.
|
||||
* @return the current code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 current32() const = 0;
|
||||
|
||||
/**
|
||||
* Advances to the next code unit in the iteration range
|
||||
* (toward endIndex()), and returns that code unit. If there are
|
||||
* no more code units to return, returns DONE.
|
||||
* @return the next code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t next() = 0;
|
||||
|
||||
/**
|
||||
* Advances to the next code point in the iteration range
|
||||
* (toward endIndex()), and returns that code point. If there are
|
||||
* no more code points to return, returns DONE.
|
||||
* Note that iteration with "pre-increment" semantics is less
|
||||
* efficient than iteration with "post-increment" semantics
|
||||
* that is provided by next32PostInc().
|
||||
* @return the next code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 next32() = 0;
|
||||
|
||||
/**
|
||||
* Advances to the previous code unit in the iteration range
|
||||
* (toward startIndex()), and returns that code unit. If there are
|
||||
* no more code units to return, returns DONE.
|
||||
* @return the previous code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t previous() = 0;
|
||||
|
||||
/**
|
||||
* Advances to the previous code point in the iteration range
|
||||
* (toward startIndex()), and returns that code point. If there are
|
||||
* no more code points to return, returns DONE.
|
||||
* @return the previous code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 previous32() = 0;
|
||||
|
||||
/**
|
||||
* Returns false if there are no more code units or code points
|
||||
* before the current position in the iteration range.
|
||||
* This is used with previous() or previous32() in backward
|
||||
* iteration.
|
||||
* @return false if there are no more code units or code points
|
||||
* before the current position in the iteration range, return true otherwise.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool hasPrevious() = 0;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the character returned by first(). Since it's
|
||||
* possible to create an iterator that iterates across only
|
||||
* part of a text-storage object, this number isn't
|
||||
* necessarily 0.
|
||||
* @returns the numeric index in the underlying text-storage
|
||||
* object of the character returned by first().
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t startIndex() const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the position immediately BEYOND the character
|
||||
* returned by last().
|
||||
* @return the numeric index in the underlying text-storage
|
||||
* object of the position immediately BEYOND the character
|
||||
* returned by last().
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t endIndex() const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the character the iterator currently refers to
|
||||
* (i.e., the character returned by current()).
|
||||
* @return the numeric index in the text-storage object of
|
||||
* the character the iterator currently refers to
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t getIndex() const;
|
||||
|
||||
/**
|
||||
* Returns the length of the entire text in the underlying
|
||||
* text-storage object.
|
||||
* @return the length of the entire text in the text-storage object
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t getLength() const;
|
||||
|
||||
/**
|
||||
* Moves the current position relative to the start or end of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code units forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
* @param delta the position relative to origin. A positive delta means forward;
|
||||
* a negative delta means backward.
|
||||
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
|
||||
* @return the new position
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t move(int32_t delta, EOrigin origin) = 0;
|
||||
|
||||
/**
|
||||
* Moves the current position relative to the start or end of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code points forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
* @param delta the position relative to origin. A positive delta means forward;
|
||||
* a negative delta means backward.
|
||||
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
|
||||
* @return the new position
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef move32
|
||||
// One of the system headers right now is sometimes defining a conflicting macro we don't use
|
||||
#undef move32
|
||||
#endif
|
||||
virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
|
||||
|
||||
/**
|
||||
* Copies the text under iteration into the UnicodeString
|
||||
* referred to by "result".
|
||||
* @param result Receives a copy of the text under iteration.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void getText(UnicodeString& result) = 0;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Empty constructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator();
|
||||
|
||||
/**
|
||||
* Constructor, just setting the length field in this base class.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator(int32_t length);
|
||||
|
||||
/**
|
||||
* Constructor, just setting the length and position fields in this base class.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator(int32_t length, int32_t position);
|
||||
|
||||
/**
|
||||
* Constructor, just setting the length, start, end, and position fields in this base class.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*
|
||||
* @param that The CharacterIterator to be copied
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator(const CharacterIterator &that);
|
||||
|
||||
/**
|
||||
* Assignment operator. Sets this CharacterIterator to have the same behavior,
|
||||
* as the one passed in.
|
||||
* @param that The CharacterIterator passed in.
|
||||
* @return the newly set CharacterIterator.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator &operator=(const CharacterIterator &that);
|
||||
|
||||
/**
|
||||
* Base class text length field.
|
||||
* Necessary this for correct getText() and hashCode().
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t textLength;
|
||||
|
||||
/**
|
||||
* Base class field for the current position.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t pos;
|
||||
|
||||
/**
|
||||
* Base class field for the start of the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t begin;
|
||||
|
||||
/**
|
||||
* Base class field for the end of the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t end;
|
||||
};
|
||||
|
||||
inline bool
|
||||
ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
|
||||
return !operator==(that);
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::setToStart() {
|
||||
return move(0, kStart);
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::setToEnd() {
|
||||
return move(0, kEnd);
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::startIndex() const {
|
||||
return begin;
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::endIndex() const {
|
||||
return end;
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::getIndex() const {
|
||||
return pos;
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::getLength() const {
|
||||
return textLength;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
48
thirdparty/icu4c/common/unicode/dbbi.h
vendored
Normal file
48
thirdparty/icu4c/common/unicode/dbbi.h
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2006,2013 IBM Corp. All rights reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 12/1/99 rgillam Complete port from Java.
|
||||
* 01/13/2000 helena Added UErrorCode to ctors.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef DBBI_H
|
||||
#define DBBI_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/rbbi.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Dictionary Based Break Iterator
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* An obsolete subclass of RuleBasedBreakIterator. Handling of dictionary-
|
||||
* based break iteration has been folded into the base class. This class
|
||||
* is deprecated as of ICU 3.6.
|
||||
* @deprecated ICU 3.6
|
||||
*/
|
||||
typedef RuleBasedBreakIterator DictionaryBasedBreakIterator;
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
247
thirdparty/icu4c/common/unicode/docmain.h
vendored
Normal file
247
thirdparty/icu4c/common/unicode/docmain.h
vendored
Normal file
@@ -0,0 +1,247 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2012, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*
|
||||
* FILE NAME: DOCMAIN.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 12/11/2000 Ram Creation.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief (Non API- contains Doxygen definitions)
|
||||
*
|
||||
* This file contains documentation for Doxygen and does not have
|
||||
* any significance with respect to C or C++ API
|
||||
*/
|
||||
|
||||
/*! \mainpage
|
||||
*
|
||||
* \section API API Reference Usage
|
||||
*
|
||||
* <h3>C++ Programmers:</h3>
|
||||
* <p>Use <a href="hierarchy.html">Class Hierarchy</a> or <a href="classes.html"> Alphabetical List </a>
|
||||
* or <a href="annotated.html"> Compound List</a>
|
||||
* to find the class you are interested in. For example, to find BreakIterator,
|
||||
* you can go to the <a href="classes.html"> Alphabetical List</a>, then click on
|
||||
* "BreakIterator". Once you are at the class, you will find an inheritance
|
||||
* chart, a list of the public members, a detailed description of the class,
|
||||
* then detailed member descriptions.</p>
|
||||
*
|
||||
* <h3>C Programmers:</h3>
|
||||
* <p>Use <a href="#Module">Module List</a> or <a href="globals_u.html">File Members</a>
|
||||
* to find a list of all the functions and constants.
|
||||
* For example, to find BreakIterator functions you would click on
|
||||
* <a href="files.html"> File List</a>,
|
||||
* then find "ubrk.h" and click on it. You will find descriptions of Defines,
|
||||
* Typedefs, Enumerations, and Functions, with detailed descriptions below.
|
||||
* If you want to find a specific function, such as ubrk_next(), then click
|
||||
* first on <a href="globals.html"> File Members</a>, then use your browser
|
||||
* Find dialog to search for "ubrk_next()".</p>
|
||||
*
|
||||
*
|
||||
* <h3>API References for Previous Releases</h3>
|
||||
* <p>The API References for each release of ICU are also available as
|
||||
* a zip file from the ICU
|
||||
* <a href="https://icu.unicode.org/download">download page</a>.</p>
|
||||
*
|
||||
* <hr>
|
||||
*
|
||||
* <h2>Architecture (User's Guide)</h2>
|
||||
* <ul>
|
||||
* <li><a href="https://unicode-org.github.io/icu/userguide/">Introduction</a></li>
|
||||
* <li><a href="https://unicode-org.github.io/icu/userguide/i18n">Internationalization</a></li>
|
||||
* <li><a href="https://unicode-org.github.io/icu/userguide/design">Locale Model, Multithreading, Error Handling, etc.</a></li>
|
||||
* <li><a href="https://unicode-org.github.io/icu/userguide/conversion">Conversion</a></li>
|
||||
* </ul>
|
||||
*
|
||||
* <hr>
|
||||
*\htmlonly <h2><a NAME="Module">Module List</a></h2> \endhtmlonly
|
||||
* <table border="1" cols="3" align="center">
|
||||
* <tr>
|
||||
* <td><strong>Module Name</strong></td>
|
||||
* <td><strong>C</strong></td>
|
||||
* <td><strong>C++</strong></td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Basic Types and Constants</td>
|
||||
* <td>utypes.h</td>
|
||||
* <td>utypes.h</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Strings and Character Iteration</td>
|
||||
* <td>ustring.h, utf8.h, utf16.h, icu::StringPiece, UText, UCharIterator, icu::ByteSink</td>
|
||||
* <td>icu::UnicodeString, icu::CharacterIterator, icu::Appendable, icu::StringPiece,icu::ByteSink</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Unicode Character<br/>Properties and Names</td>
|
||||
* <td>uchar.h, uscript.h</td>
|
||||
* <td>C API</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Sets of Unicode Code Points and Strings</td>
|
||||
* <td>uset.h</td>
|
||||
* <td>icu::UnicodeSet</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Maps from Unicode Code Points to Integer Values</td>
|
||||
* <td>ucptrie.h, umutablecptrie.h</td>
|
||||
* <td>C API</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Maps from Strings to Integer Values</td>
|
||||
* <td>(no C API)</td>
|
||||
* <td>icu::BytesTrie, icu::UCharsTrie</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Codepage Conversion</td>
|
||||
* <td>ucnv.h, ucnvsel.h</td>
|
||||
* <td>C API</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Codepage Detection</td>
|
||||
* <td>ucsdet.h</td>
|
||||
* <td>C API</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Unicode Text Compression</td>
|
||||
* <td>ucnv.h<br/>(encoding name "SCSU" or "BOCU-1")</td>
|
||||
* <td>C API</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Locales </td>
|
||||
* <td>uloc.h, ulocale.h, ulocbuilder.h</a></td>
|
||||
* <td>icu::Locale, icu::LocaleBuilder, icu::LocaleMatcher</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Resource Bundles</td>
|
||||
* <td>ures.h</td>
|
||||
* <td>icu::ResourceBundle</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Normalization</td>
|
||||
* <td>unorm2.h</td>
|
||||
* <td>icu::Normalizer2</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Calendars and Time Zones</td>
|
||||
* <td>ucal.h</td>
|
||||
* <td>icu::Calendar, icu::TimeZone</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Date and Time Formatting</td>
|
||||
* <td>udat.h</td>
|
||||
* <td>icu::DateFormat</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Relative Date and Time Formatting</td>
|
||||
* <td>ureldatefmt.h</td>
|
||||
* <td>icu::RelativeDateTimeFormatter</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Message Formatting</td>
|
||||
* <td>umsg.h</td>
|
||||
* <td>icu::MessageFormat</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Message Formatting 2<br/>(technology preview)</td>
|
||||
* <td>(no C API)</td>
|
||||
* <td>icu::message2::MessageFormatter</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>List Formatting</td>
|
||||
* <td>ulistformatter.h</td>
|
||||
* <td>icu::ListFormatter</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Number Formatting<br/>(includes currency and unit formatting)</td>
|
||||
* <td>unumberformatter.h, unum.h, usimplenumberformatter.h</td>
|
||||
* <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)<br>icu::number::SimpleNumberFormatter (ICU 73+)</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Number Range Formatting<br />(includes currency and unit ranges)</td>
|
||||
* <td>unumberrangeformatter.h</td>
|
||||
* <td>icu::number::NumberRangeFormatter</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Number Spellout<br/>(Rule Based Number Formatting)</td>
|
||||
* <td>unum.h<br/>(use UNUM_SPELLOUT)</td>
|
||||
* <td>icu::RuleBasedNumberFormat</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Text Transformation<br/>(Transliteration)</td>
|
||||
* <td>utrans.h</td>
|
||||
* <td>icu::Transliterator</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Bidirectional Algorithm</td>
|
||||
* <td>ubidi.h, ubiditransform.h</td>
|
||||
* <td>C API</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Arabic Shaping</td>
|
||||
* <td>ushape.h</td>
|
||||
* <td>C API</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Collation</td>
|
||||
* <td>ucol.h</td>
|
||||
* <td>icu::Collator</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>String Searching</td>
|
||||
* <td>usearch.h</td>
|
||||
* <td>icu::StringSearch</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Index Characters/<br/>Bucketing for Sorted Lists</td>
|
||||
* <td>(no C API)</td>
|
||||
* <td>icu::AlphabeticIndex</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Text Boundary Analysis<br/>(Break Iteration)</td>
|
||||
* <td>ubrk.h</td>
|
||||
* <td>icu::BreakIterator</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Regular Expressions</td>
|
||||
* <td>uregex.h</td>
|
||||
* <td>icu::RegexPattern, icu::RegexMatcher</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>StringPrep</td>
|
||||
* <td>usprep.h</td>
|
||||
* <td>C API</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>International Domain Names in Applications:<br/>
|
||||
* UTS #46 in C/C++, IDNA2003 only via C API</td>
|
||||
* <td>uidna.h</td>
|
||||
* <td>idna.h</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Identifier Spoofing & Confusability</td>
|
||||
* <td>uspoof.h</td>
|
||||
* <td>C API</td>
|
||||
* <tr>
|
||||
* <td>Universal Time Scale</td>
|
||||
* <td>utmscale.h</td>
|
||||
* <td>C API</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Paragraph Layout / Complex Text Layout</td>
|
||||
* <td>playout.h</td>
|
||||
* <td>icu::ParagraphLayout</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>ICU I/O</td>
|
||||
* <td>ustdio.h</td>
|
||||
* <td>ustream.h</td>
|
||||
* </tr>
|
||||
* </table>
|
||||
* <i>This main page is generated from docmain.h</i>
|
||||
*/
|
||||
163
thirdparty/icu4c/common/unicode/dtintrv.h
vendored
Normal file
163
thirdparty/icu4c/common/unicode/dtintrv.h
vendored
Normal file
@@ -0,0 +1,163 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2008-2009, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*
|
||||
* File DTINTRV.H
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef __DTINTRV_H__
|
||||
#define __DTINTRV_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Date Interval data type
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
||||
/**
|
||||
* This class represents a date interval.
|
||||
* It is a pair of UDate representing from UDate 1 to UDate 2.
|
||||
* @stable ICU 4.0
|
||||
**/
|
||||
class U_COMMON_API DateInterval : public UObject {
|
||||
public:
|
||||
|
||||
/**
|
||||
* Construct a DateInterval given a from date and a to date.
|
||||
* @param fromDate The from date in date interval.
|
||||
* @param toDate The to date in date interval.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
DateInterval(UDate fromDate, UDate toDate);
|
||||
|
||||
/**
|
||||
* destructor
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
virtual ~DateInterval();
|
||||
|
||||
/**
|
||||
* Get the from date.
|
||||
* @return the from date in dateInterval.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
inline UDate getFromDate() const;
|
||||
|
||||
/**
|
||||
* Get the to date.
|
||||
* @return the to date in dateInterval.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
inline UDate getToDate() const;
|
||||
|
||||
|
||||
/**
|
||||
* Return the class ID for this class. This is useful only for comparing to
|
||||
* a return value from getDynamicClassID(). For example:
|
||||
* <pre>
|
||||
* . Base* polymorphic_pointer = createPolymorphicObject();
|
||||
* . if (polymorphic_pointer->getDynamicClassID() ==
|
||||
* . derived::getStaticClassID()) ...
|
||||
* </pre>
|
||||
* @return The class ID for all objects of this class.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
/**
|
||||
* Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
|
||||
* method is to implement a simple version of RTTI, since not all C++
|
||||
* compilers support genuine RTTI. Polymorphic operator==() and clone()
|
||||
* methods call this method.
|
||||
*
|
||||
* @return The class ID for this object. All objects of a
|
||||
* given class have the same class ID. Objects of
|
||||
* other classes have different class IDs.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const override;
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
DateInterval(const DateInterval& other);
|
||||
|
||||
/**
|
||||
* Default assignment operator
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
DateInterval& operator=(const DateInterval&);
|
||||
|
||||
/**
|
||||
* Equality operator.
|
||||
* @return true if the two DateIntervals are the same
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
virtual bool operator==(const DateInterval& other) const;
|
||||
|
||||
/**
|
||||
* Non-equality operator
|
||||
* @return true if the two DateIntervals are not the same
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
inline bool operator!=(const DateInterval& other) const;
|
||||
|
||||
|
||||
/**
|
||||
* clone this object.
|
||||
* The caller owns the result and should delete it when done.
|
||||
* @return a cloned DateInterval
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
virtual DateInterval* clone() const;
|
||||
|
||||
private:
|
||||
/**
|
||||
* Default constructor, not implemented.
|
||||
*/
|
||||
DateInterval() = delete;
|
||||
|
||||
UDate fromDate;
|
||||
UDate toDate;
|
||||
|
||||
} ;// end class DateInterval
|
||||
|
||||
|
||||
inline UDate
|
||||
DateInterval::getFromDate() const {
|
||||
return fromDate;
|
||||
}
|
||||
|
||||
|
||||
inline UDate
|
||||
DateInterval::getToDate() const {
|
||||
return toDate;
|
||||
}
|
||||
|
||||
|
||||
inline bool
|
||||
DateInterval::operator!=(const DateInterval& other) const {
|
||||
return ( !operator==(other) );
|
||||
}
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
531
thirdparty/icu4c/common/unicode/edits.h
vendored
Normal file
531
thirdparty/icu4c/common/unicode/edits.h
vendored
Normal file
@@ -0,0 +1,531 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// edits.h
|
||||
// created: 2016dec30 Markus W. Scherer
|
||||
|
||||
#ifndef __EDITS_H__
|
||||
#define __EDITS_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: C++ class Edits for low-level string transformations on styled text.
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeString;
|
||||
|
||||
/**
|
||||
* Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions
|
||||
* in linear progression. Does not support moving/reordering of text.
|
||||
*
|
||||
* There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
|
||||
* instances of this class using {@link #addReplace(int32_t, int32_t)} (for change edits) and
|
||||
* {@link #addUnchanged(int32_t)} (for no-change edits). Change edits are retained with full granularity,
|
||||
* whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
|
||||
* mapping between code points in the source and destination strings.
|
||||
*
|
||||
* After all edits have been added, instances of this class should be considered immutable, and an
|
||||
* {@link Edits::Iterator} can be used for queries.
|
||||
*
|
||||
* There are four flavors of Edits::Iterator:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@link #getFineIterator()} retains full granularity of change edits.
|
||||
* <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling
|
||||
* next() on the iterator, skips over no-change edits (unchanged regions).
|
||||
* <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change
|
||||
* edits are automatically merged during the construction phase.)
|
||||
* <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when
|
||||
* calling next() on the iterator, skips over no-change edits (unchanged regions).
|
||||
* </ul>
|
||||
*
|
||||
* For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the
|
||||
* following fine edits:
|
||||
* <ul>
|
||||
* <li>abc ⇨ abc (no-change)
|
||||
* <li>ß ⇨ ss (change)
|
||||
* <li>D ⇨ d (change)
|
||||
* <li>e ⇨ e (no-change)
|
||||
* <li>F ⇨ f (change)
|
||||
* </ul>
|
||||
* and the following coarse edits (note how adjacent change edits get merged together):
|
||||
* <ul>
|
||||
* <li>abc ⇨ abc (no-change)
|
||||
* <li>ßD ⇨ ssd (change)
|
||||
* <li>e ⇨ e (no-change)
|
||||
* <li>F ⇨ f (change)
|
||||
* </ul>
|
||||
*
|
||||
* The "fine changes" and "coarse changes" iterators will step through only the change edits when their
|
||||
* `Edits::Iterator::next()` methods are called. They are identical to the non-change iterators when
|
||||
* their `Edits::Iterator::findSourceIndex()` or `Edits::Iterator::findDestinationIndex()`
|
||||
* methods are used to walk through the string.
|
||||
*
|
||||
* For examples of how to use this class, see the test `TestCaseMapEditsIteratorDocs` in
|
||||
* UCharacterCaseTest.java.
|
||||
*
|
||||
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
|
||||
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
|
||||
*
|
||||
* @stable ICU 59
|
||||
*/
|
||||
class U_COMMON_API Edits final : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs an empty object.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
Edits() :
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
|
||||
errorCode_(U_ZERO_ERROR) {}
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @param other source edits
|
||||
* @stable ICU 60
|
||||
*/
|
||||
Edits(const Edits &other) :
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(other.length),
|
||||
delta(other.delta), numChanges(other.numChanges),
|
||||
errorCode_(other.errorCode_) {
|
||||
copyArray(other);
|
||||
}
|
||||
/**
|
||||
* Move constructor, might leave src empty.
|
||||
* This object will have the same contents that the source object had.
|
||||
* @param src source edits
|
||||
* @stable ICU 60
|
||||
*/
|
||||
Edits(Edits &&src) noexcept :
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(src.length),
|
||||
delta(src.delta), numChanges(src.numChanges),
|
||||
errorCode_(src.errorCode_) {
|
||||
moveArray(src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
~Edits();
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
* @param other source edits
|
||||
* @return *this
|
||||
* @stable ICU 60
|
||||
*/
|
||||
Edits &operator=(const Edits &other);
|
||||
|
||||
/**
|
||||
* Move assignment operator, might leave src empty.
|
||||
* This object will have the same contents that the source object had.
|
||||
* The behavior is undefined if *this and src are the same object.
|
||||
* @param src source edits
|
||||
* @return *this
|
||||
* @stable ICU 60
|
||||
*/
|
||||
Edits &operator=(Edits &&src) noexcept;
|
||||
|
||||
/**
|
||||
* Resets the data but may not release memory.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
void reset() noexcept;
|
||||
|
||||
/**
|
||||
* Adds a no-change edit: a record for an unchanged segment of text.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
void addUnchanged(int32_t unchangedLength);
|
||||
/**
|
||||
* Adds a change edit: a record for a text replacement/insertion/deletion.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
void addReplace(int32_t oldLength, int32_t newLength);
|
||||
/**
|
||||
* Sets the UErrorCode if an error occurred while recording edits.
|
||||
* Preserves older error codes in the outErrorCode.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @param outErrorCode Set to an error code if it does not contain one already
|
||||
* and an error occurred while recording edits.
|
||||
* Otherwise unchanged.
|
||||
* @return true if U_FAILURE(outErrorCode)
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UBool copyErrorTo(UErrorCode &outErrorCode) const;
|
||||
|
||||
/**
|
||||
* How much longer is the new text compared with the old text?
|
||||
* @return new length minus old length
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t lengthDelta() const { return delta; }
|
||||
/**
|
||||
* @return true if there are any change edits
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UBool hasChanges() const { return numChanges != 0; }
|
||||
|
||||
/**
|
||||
* @return the number of change edits
|
||||
* @stable ICU 60
|
||||
*/
|
||||
int32_t numberOfChanges() const { return numChanges; }
|
||||
|
||||
/**
|
||||
* Access to the list of edits.
|
||||
*
|
||||
* At any moment in time, an instance of this class points to a single edit: a "window" into a span
|
||||
* of the source string and the corresponding span of the destination string. The source string span
|
||||
* starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
|
||||
* span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
|
||||
*
|
||||
* The iterator can be moved between edits using the `next()`, `findSourceIndex(int32_t, UErrorCode &)`,
|
||||
* and `findDestinationIndex(int32_t, UErrorCode &)` methods.
|
||||
* Calling any of these methods mutates the iterator to make it point to the corresponding edit.
|
||||
*
|
||||
* For more information, see the documentation for {@link Edits}.
|
||||
*
|
||||
* @see getCoarseIterator
|
||||
* @see getFineIterator
|
||||
* @stable ICU 59
|
||||
*/
|
||||
struct U_COMMON_API Iterator final : public UMemory {
|
||||
/**
|
||||
* Default constructor, empty iterator.
|
||||
* @stable ICU 60
|
||||
*/
|
||||
Iterator() :
|
||||
array(nullptr), index(0), length(0),
|
||||
remaining(0), onlyChanges_(false), coarse(false),
|
||||
dir(0), changed(false), oldLength_(0), newLength_(0),
|
||||
srcIndex(0), replIndex(0), destIndex(0) {}
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
Iterator(const Iterator &other) = default;
|
||||
/**
|
||||
* Assignment operator.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
Iterator &operator=(const Iterator &other) = default;
|
||||
|
||||
/**
|
||||
* Advances the iterator to the next edit.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return true if there is another edit
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
|
||||
|
||||
/**
|
||||
* Moves the iterator to the edit that contains the source index.
|
||||
* The source index may be found in a no-change edit
|
||||
* even if normal iteration would skip no-change edits.
|
||||
* Normal iteration can continue from a found edit.
|
||||
*
|
||||
* The iterator state before this search logically does not matter.
|
||||
* (It may affect the performance of the search.)
|
||||
*
|
||||
* The iterator state after this search is undefined
|
||||
* if the source index is out of bounds for the source string.
|
||||
*
|
||||
* @param i source index
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return true if the edit for the source index was found
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
|
||||
return findIndex(i, true, errorCode) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves the iterator to the edit that contains the destination index.
|
||||
* The destination index may be found in a no-change edit
|
||||
* even if normal iteration would skip no-change edits.
|
||||
* Normal iteration can continue from a found edit.
|
||||
*
|
||||
* The iterator state before this search logically does not matter.
|
||||
* (It may affect the performance of the search.)
|
||||
*
|
||||
* The iterator state after this search is undefined
|
||||
* if the source index is out of bounds for the source string.
|
||||
*
|
||||
* @param i destination index
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return true if the edit for the destination index was found
|
||||
* @stable ICU 60
|
||||
*/
|
||||
UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
|
||||
return findIndex(i, false, errorCode) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the destination index corresponding to the given source index.
|
||||
* If the source index is inside a change edit (not at its start),
|
||||
* then the destination index at the end of that edit is returned,
|
||||
* since there is no information about index mapping inside a change edit.
|
||||
*
|
||||
* (This means that indexes to the start and middle of an edit,
|
||||
* for example around a grapheme cluster, are mapped to indexes
|
||||
* encompassing the entire edit.
|
||||
* The alternative, mapping an interior index to the start,
|
||||
* would map such an interval to an empty one.)
|
||||
*
|
||||
* This operation will usually but not always modify this object.
|
||||
* The iterator state after this search is undefined.
|
||||
*
|
||||
* @param i source index
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return destination index; undefined if i is not 0..string length
|
||||
* @stable ICU 60
|
||||
*/
|
||||
int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Computes the source index corresponding to the given destination index.
|
||||
* If the destination index is inside a change edit (not at its start),
|
||||
* then the source index at the end of that edit is returned,
|
||||
* since there is no information about index mapping inside a change edit.
|
||||
*
|
||||
* (This means that indexes to the start and middle of an edit,
|
||||
* for example around a grapheme cluster, are mapped to indexes
|
||||
* encompassing the entire edit.
|
||||
* The alternative, mapping an interior index to the start,
|
||||
* would map such an interval to an empty one.)
|
||||
*
|
||||
* This operation will usually but not always modify this object.
|
||||
* The iterator state after this search is undefined.
|
||||
*
|
||||
* @param i destination index
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return source index; undefined if i is not 0..string length
|
||||
* @stable ICU 60
|
||||
*/
|
||||
int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns whether the edit currently represented by the iterator is a change edit.
|
||||
*
|
||||
* @return true if this edit replaces oldLength() units with newLength() different ones.
|
||||
* false if oldLength units remain unchanged.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UBool hasChange() const { return changed; }
|
||||
|
||||
/**
|
||||
* The length of the current span in the source string, which starts at {@link #sourceIndex}.
|
||||
*
|
||||
* @return the number of units in the original string which are replaced or remain unchanged.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t oldLength() const { return oldLength_; }
|
||||
|
||||
/**
|
||||
* The length of the current span in the destination string, which starts at
|
||||
* {@link #destinationIndex}, or in the replacement string, which starts at
|
||||
* {@link #replacementIndex}.
|
||||
*
|
||||
* @return the number of units in the modified string, if hasChange() is true.
|
||||
* Same as oldLength if hasChange() is false.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t newLength() const { return newLength_; }
|
||||
|
||||
/**
|
||||
* The start index of the current span in the source string; the span has length
|
||||
* {@link #oldLength}.
|
||||
*
|
||||
* @return the current index into the source string
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t sourceIndex() const { return srcIndex; }
|
||||
|
||||
/**
|
||||
* The start index of the current span in the replacement string; the span has length
|
||||
* {@link #newLength}. Well-defined only if the current edit is a change edit.
|
||||
*
|
||||
* The *replacement string* is the concatenation of all substrings of the destination
|
||||
* string corresponding to change edits.
|
||||
*
|
||||
* This method is intended to be used together with operations that write only replacement
|
||||
* characters (e.g. operations specifying the \ref U_OMIT_UNCHANGED_TEXT option).
|
||||
* The source string can then be modified in-place.
|
||||
*
|
||||
* @return the current index into the replacement-characters-only string,
|
||||
* not counting unchanged spans
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t replacementIndex() const {
|
||||
// TODO: Throw an exception if we aren't in a change edit?
|
||||
return replIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* The start index of the current span in the destination string; the span has length
|
||||
* {@link #newLength}.
|
||||
*
|
||||
* @return the current index into the full destination string
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t destinationIndex() const { return destIndex; }
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* A string representation of the current edit represented by the iterator for debugging. You
|
||||
* should not depend on the contents of the return string.
|
||||
* @internal
|
||||
*/
|
||||
UnicodeString& toString(UnicodeString& appendTo) const;
|
||||
#endif // U_HIDE_INTERNAL_API
|
||||
|
||||
private:
|
||||
friend class Edits;
|
||||
|
||||
Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
|
||||
|
||||
int32_t readLength(int32_t head);
|
||||
void updateNextIndexes();
|
||||
void updatePreviousIndexes();
|
||||
UBool noNext();
|
||||
UBool next(UBool onlyChanges, UErrorCode &errorCode);
|
||||
UBool previous(UErrorCode &errorCode);
|
||||
/** @return -1: error or i<0; 0: found; 1: i>=string length */
|
||||
int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode);
|
||||
|
||||
const uint16_t *array;
|
||||
int32_t index, length;
|
||||
// 0 if we are not within compressed equal-length changes.
|
||||
// Otherwise the number of remaining changes, including the current one.
|
||||
int32_t remaining;
|
||||
UBool onlyChanges_, coarse;
|
||||
|
||||
int8_t dir; // iteration direction: back(<0), initial(0), forward(>0)
|
||||
UBool changed;
|
||||
int32_t oldLength_, newLength_;
|
||||
int32_t srcIndex, replIndex, destIndex;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns an Iterator for coarse-grained change edits
|
||||
* (adjacent change edits are treated as one).
|
||||
* Can be used to perform simple string updates.
|
||||
* Skips no-change edits.
|
||||
* @return an Iterator that merges adjacent changes.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
Iterator getCoarseChangesIterator() const {
|
||||
return Iterator(array, length, true, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for coarse-grained change and no-change edits
|
||||
* (adjacent change edits are treated as one).
|
||||
* Can be used to perform simple string updates.
|
||||
* Adjacent change edits are treated as one edit.
|
||||
* @return an Iterator that merges adjacent changes.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
Iterator getCoarseIterator() const {
|
||||
return Iterator(array, length, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for fine-grained change edits
|
||||
* (full granularity of change edits is retained).
|
||||
* Can be used for modifying styled text.
|
||||
* Skips no-change edits.
|
||||
* @return an Iterator that separates adjacent changes.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
Iterator getFineChangesIterator() const {
|
||||
return Iterator(array, length, true, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator for fine-grained change and no-change edits
|
||||
* (full granularity of change edits is retained).
|
||||
* Can be used for modifying styled text.
|
||||
* @return an Iterator that separates adjacent changes.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
Iterator getFineIterator() const {
|
||||
return Iterator(array, length, false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the two input Edits and appends the result to this object.
|
||||
*
|
||||
* Consider two string transformations (for example, normalization and case mapping)
|
||||
* where each records Edits in addition to writing an output string.<br>
|
||||
* Edits ab reflect how substrings of input string a
|
||||
* map to substrings of intermediate string b.<br>
|
||||
* Edits bc reflect how substrings of intermediate string b
|
||||
* map to substrings of output string c.<br>
|
||||
* This function merges ab and bc such that the additional edits
|
||||
* recorded in this object reflect how substrings of input string a
|
||||
* map to substrings of output string c.
|
||||
*
|
||||
* If unrelated Edits are passed in where the output string of the first
|
||||
* has a different length than the input string of the second,
|
||||
* then a U_ILLEGAL_ARGUMENT_ERROR is reported.
|
||||
*
|
||||
* @param ab reflects how substrings of input string a
|
||||
* map to substrings of intermediate string b.
|
||||
* @param bc reflects how substrings of intermediate string b
|
||||
* map to substrings of output string c.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return *this, with the merged edits appended
|
||||
* @stable ICU 60
|
||||
*/
|
||||
Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
|
||||
|
||||
private:
|
||||
void releaseArray() noexcept;
|
||||
Edits ©Array(const Edits &other);
|
||||
Edits &moveArray(Edits &src) noexcept;
|
||||
|
||||
void setLastUnit(int32_t last) { array[length - 1] = static_cast<uint16_t>(last); }
|
||||
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
|
||||
|
||||
void append(int32_t r);
|
||||
UBool growArray();
|
||||
|
||||
static const int32_t STACK_CAPACITY = 100;
|
||||
uint16_t *array;
|
||||
int32_t capacity;
|
||||
int32_t length;
|
||||
int32_t delta;
|
||||
int32_t numChanges;
|
||||
UErrorCode errorCode_;
|
||||
uint16_t stackArray[STACK_CAPACITY];
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __EDITS_H__
|
||||
69
thirdparty/icu4c/common/unicode/enumset.h
vendored
Normal file
69
thirdparty/icu4c/common/unicode/enumset.h
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2012,2014 International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++: internal template EnumSet<>
|
||||
*/
|
||||
|
||||
#ifndef ENUMSET_H
|
||||
#define ENUMSET_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/* Can't use #ifndef U_HIDE_INTERNAL_API for the entire EnumSet class, needed in .h file declarations */
|
||||
/**
|
||||
* enum bitset for boolean fields. Similar to Java EnumSet<>.
|
||||
* Needs to range check. Used for private instance variables.
|
||||
* @internal
|
||||
* \cond
|
||||
*/
|
||||
template<typename T, uint32_t minValue, uint32_t limitValue>
|
||||
class EnumSet {
|
||||
public:
|
||||
inline EnumSet() : fBools(0) {}
|
||||
inline EnumSet(const EnumSet<T,minValue,limitValue>& other) : fBools(other.fBools) {}
|
||||
inline ~EnumSet() {}
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
inline void clear() { fBools=0; }
|
||||
inline void add(T toAdd) { set(toAdd, 1); }
|
||||
inline void remove(T toRemove) { set(toRemove, 0); }
|
||||
inline int32_t contains(T toCheck) const { return get(toCheck); }
|
||||
inline void set(T toSet, int32_t v) { fBools=(fBools&(~flag(toSet)))|(v?(flag(toSet)):0); }
|
||||
inline int32_t get(T toCheck) const { return (fBools & flag(toCheck))?1:0; }
|
||||
inline UBool isValidEnum(T toCheck) const { return (toCheck>=minValue&&toCheck<limitValue); }
|
||||
inline UBool isValidValue(int32_t v) const { return (v==0||v==1); }
|
||||
inline const EnumSet<T,minValue,limitValue>& operator=(const EnumSet<T,minValue,limitValue>& other) {
|
||||
fBools = other.fBools;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline uint32_t getAll() const {
|
||||
return fBools;
|
||||
}
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
private:
|
||||
inline uint32_t flag(T toCheck) const { return (1<<(toCheck-minValue)); }
|
||||
private:
|
||||
uint32_t fBools;
|
||||
};
|
||||
|
||||
/** \endcond */
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
#endif /* ENUMSET_H */
|
||||
144
thirdparty/icu4c/common/unicode/errorcode.h
vendored
Normal file
144
thirdparty/icu4c/common/unicode/errorcode.h
vendored
Normal file
@@ -0,0 +1,144 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: errorcode.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2009mar10
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __ERRORCODE_H__
|
||||
#define __ERRORCODE_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: ErrorCode class intended to make it easier to use
|
||||
* ICU C and C++ APIs from C++ user code.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Wrapper class for UErrorCode, with conversion operators for direct use
|
||||
* in ICU C and C++ APIs.
|
||||
* Intended to be used as a base class, where a subclass overrides
|
||||
* the handleFailure() function so that it throws an exception,
|
||||
* does an assert(), logs an error, etc.
|
||||
* This is not an abstract base class. This class can be used and instantiated
|
||||
* by itself, although it will be more useful when subclassed.
|
||||
*
|
||||
* Features:
|
||||
* - The constructor initializes the internal UErrorCode to U_ZERO_ERROR,
|
||||
* removing one common source of errors.
|
||||
* - Same use in C APIs taking a UErrorCode * (pointer)
|
||||
* and C++ taking UErrorCode & (reference) via conversion operators.
|
||||
* - Possible automatic checking for success when it goes out of scope.
|
||||
*
|
||||
* Note: For automatic checking for success in the destructor, a subclass
|
||||
* must implement such logic in its own destructor because the base class
|
||||
* destructor cannot call a subclass function (like handleFailure()).
|
||||
* The ErrorCode base class destructor does nothing.
|
||||
*
|
||||
* Note also: While it is possible for a destructor to throw an exception,
|
||||
* it is generally unsafe to do so. This means that in a subclass the destructor
|
||||
* and the handleFailure() function may need to take different actions.
|
||||
*
|
||||
* Sample code:
|
||||
* \code
|
||||
* class IcuErrorCode: public icu::ErrorCode {
|
||||
* public:
|
||||
* virtual ~IcuErrorCode() { // should be defined in .cpp as "key function"
|
||||
* // Safe because our handleFailure() does not throw exceptions.
|
||||
* if(isFailure()) { handleFailure(); }
|
||||
* }
|
||||
* protected:
|
||||
* virtual void handleFailure() const {
|
||||
* log_failure(u_errorName(errorCode));
|
||||
* exit(errorCode);
|
||||
* }
|
||||
* };
|
||||
* IcuErrorCode error_code;
|
||||
* UConverter *cnv = ucnv_open("Shift-JIS", error_code);
|
||||
* length = ucnv_fromUChars(dest, capacity, src, length, error_code);
|
||||
* ucnv_close(cnv);
|
||||
* // IcuErrorCode destructor checks for success.
|
||||
* \endcode
|
||||
*
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
class U_COMMON_API ErrorCode: public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Default constructor. Initializes its UErrorCode to U_ZERO_ERROR.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
ErrorCode() : errorCode(U_ZERO_ERROR) {}
|
||||
/** Destructor, does nothing. See class documentation for details. @stable ICU 4.2 */
|
||||
virtual ~ErrorCode();
|
||||
/** Conversion operator, returns a reference. @stable ICU 4.2 */
|
||||
operator UErrorCode & () { return errorCode; }
|
||||
/** Conversion operator, returns a pointer. @stable ICU 4.2 */
|
||||
operator UErrorCode * () { return &errorCode; }
|
||||
/** Tests for U_SUCCESS(). @stable ICU 4.2 */
|
||||
UBool isSuccess() const { return U_SUCCESS(errorCode); }
|
||||
/** Tests for U_FAILURE(). @stable ICU 4.2 */
|
||||
UBool isFailure() const { return U_FAILURE(errorCode); }
|
||||
/** Returns the UErrorCode value. @stable ICU 4.2 */
|
||||
UErrorCode get() const { return errorCode; }
|
||||
/** Sets the UErrorCode value. @stable ICU 4.2 */
|
||||
void set(UErrorCode value) { errorCode=value; }
|
||||
/** Returns the UErrorCode value and resets it to U_ZERO_ERROR. @stable ICU 4.2 */
|
||||
UErrorCode reset();
|
||||
/**
|
||||
* Asserts isSuccess().
|
||||
* In other words, this method checks for a failure code,
|
||||
* and the base class handles it like this:
|
||||
* \code
|
||||
* if(isFailure()) { handleFailure(); }
|
||||
* \endcode
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
void assertSuccess() const;
|
||||
/**
|
||||
* Return a string for the UErrorCode value.
|
||||
* The string will be the same as the name of the error code constant
|
||||
* in the UErrorCode enum.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
const char* errorName() const;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Internal UErrorCode, accessible to subclasses.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
UErrorCode errorCode;
|
||||
/**
|
||||
* Called by assertSuccess() if isFailure() is true.
|
||||
* A subclass should override this function to deal with a failure code:
|
||||
* Throw an exception, log an error, terminate the program, or similar.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual void handleFailure() const {}
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __ERRORCODE_H__
|
||||
152
thirdparty/icu4c/common/unicode/filteredbrk.h
vendored
Normal file
152
thirdparty/icu4c/common/unicode/filteredbrk.h
vendored
Normal file
@@ -0,0 +1,152 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
********************************************************************************
|
||||
* Copyright (C) 1997-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef FILTEREDBRK_H
|
||||
#define FILTEREDBRK_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/brkiter.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: FilteredBreakIteratorBuilder
|
||||
*/
|
||||
|
||||
/**
|
||||
* The BreakIteratorFilter is used to modify the behavior of a BreakIterator
|
||||
* by constructing a new BreakIterator which suppresses certain segment boundaries.
|
||||
* See http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions .
|
||||
* For example, a typical English Sentence Break Iterator would break on the space
|
||||
* in the string "Mr. Smith" (resulting in two segments),
|
||||
* but with "Mr." as an exception, a filtered break iterator
|
||||
* would consider the string "Mr. Smith" to be a single segment.
|
||||
*
|
||||
* @stable ICU 56
|
||||
*/
|
||||
class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
|
||||
public:
|
||||
/**
|
||||
* destructor.
|
||||
* @stable ICU 56
|
||||
*/
|
||||
virtual ~FilteredBreakIteratorBuilder();
|
||||
|
||||
/**
|
||||
* Construct a FilteredBreakIteratorBuilder based on rules in a locale.
|
||||
* The rules are taken from CLDR exception data for the locale,
|
||||
* see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions
|
||||
* This is the equivalent of calling createInstance(UErrorCode&)
|
||||
* and then repeatedly calling addNoBreakAfter(...) with the contents
|
||||
* of the CLDR exception data.
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return the new builder
|
||||
* @stable ICU 56
|
||||
*/
|
||||
static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* This function has been deprecated in favor of createEmptyInstance, which has
|
||||
* identical behavior.
|
||||
* @param status The error code.
|
||||
* @return the new builder
|
||||
* @deprecated ICU 60 use createEmptyInstance instead
|
||||
* @see createEmptyInstance()
|
||||
*/
|
||||
static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status);
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Construct an empty FilteredBreakIteratorBuilder.
|
||||
* In this state, it will not suppress any segment boundaries.
|
||||
* @param status The error code.
|
||||
* @return the new builder
|
||||
* @stable ICU 60
|
||||
*/
|
||||
static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Suppress a certain string from being the end of a segment.
|
||||
* For example, suppressing "Mr.", then segments ending in "Mr." will not be returned
|
||||
* by the iterator.
|
||||
* @param string the string to suppress, such as "Mr."
|
||||
* @param status error code
|
||||
* @return returns true if the string was not present and now added,
|
||||
* false if the call was a no-op because the string was already being suppressed.
|
||||
* @stable ICU 56
|
||||
*/
|
||||
virtual UBool suppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
|
||||
|
||||
/**
|
||||
* Stop suppressing a certain string from being the end of the segment.
|
||||
* This function does not create any new segment boundaries, but only serves to un-do
|
||||
* the effect of earlier calls to suppressBreakAfter, or to un-do the effect of
|
||||
* locale data which may be suppressing certain strings.
|
||||
* @param string the exception to remove
|
||||
* @param status error code
|
||||
* @return returns true if the string was present and now removed,
|
||||
* false if the call was a no-op because the string was not being suppressed.
|
||||
* @stable ICU 56
|
||||
*/
|
||||
virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
|
||||
|
||||
#ifndef U_FORCE_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* This function has been deprecated in favor of wrapIteratorWithFilter()
|
||||
* The behavior is identical.
|
||||
* @param adoptBreakIterator the break iterator to adopt
|
||||
* @param status error code
|
||||
* @return the new BreakIterator, owned by the caller.
|
||||
* @deprecated ICU 60 use wrapIteratorWithFilter() instead
|
||||
* @see wrapBreakIteratorWithFilter()
|
||||
*/
|
||||
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
|
||||
#endif // U_FORCE_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* Wrap (adopt) an existing break iterator in a new filtered instance.
|
||||
* The resulting BreakIterator is owned by the caller.
|
||||
* The BreakIteratorFilter may be destroyed before the BreakIterator is destroyed.
|
||||
* Note that the adoptBreakIterator is adopted by the new BreakIterator
|
||||
* and should no longer be used by the caller.
|
||||
* The FilteredBreakIteratorBuilder may be reused.
|
||||
* This function is an alias for build()
|
||||
* @param adoptBreakIterator the break iterator to adopt
|
||||
* @param status error code
|
||||
* @return the new BreakIterator, owned by the caller.
|
||||
* @stable ICU 60
|
||||
*/
|
||||
inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) {
|
||||
return build(adoptBreakIterator, status);
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* For subclass use
|
||||
* @stable ICU 56
|
||||
*/
|
||||
FilteredBreakIteratorBuilder();
|
||||
};
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // #ifndef FILTEREDBRK_H
|
||||
43
thirdparty/icu4c/common/unicode/icudataver.h
vendored
Normal file
43
thirdparty/icu4c/common/unicode/icudataver.h
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: access to ICU Data Version number
|
||||
*/
|
||||
|
||||
#ifndef __ICU_DATA_VER_H__
|
||||
#define __ICU_DATA_VER_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* @stable ICU 49
|
||||
*/
|
||||
#define U_ICU_VERSION_BUNDLE "icuver"
|
||||
|
||||
/**
|
||||
* @stable ICU 49
|
||||
*/
|
||||
#define U_ICU_DATA_KEY "DataVersion"
|
||||
|
||||
/**
|
||||
* Retrieves the data version from icuver and stores it in dataVersionFillin.
|
||||
*
|
||||
* @param dataVersionFillin icuver data version information to be filled in if not-null
|
||||
* @param status stores the error code from the calls to resource bundle
|
||||
*
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status);
|
||||
|
||||
#endif
|
||||
391
thirdparty/icu4c/common/unicode/icuplug.h
vendored
Normal file
391
thirdparty/icu4c/common/unicode/icuplug.h
vendored
Normal file
@@ -0,0 +1,391 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* FILE NAME : icuplug.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 10/29/2009 sl New.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: ICU Plugin API
|
||||
*
|
||||
* <h2>C API: ICU Plugin API</h2>
|
||||
*
|
||||
* <p>C API allowing run-time loadable modules that extend or modify ICU functionality.</p>
|
||||
*
|
||||
* <h3>Loading and Configuration</h3>
|
||||
*
|
||||
* <p>At ICU startup time, the environment variable "ICU_PLUGINS" will be
|
||||
* queried for a directory name. If it is not set, the preprocessor symbol
|
||||
* "DEFAULT_ICU_PLUGINS" will be checked for a default value.</p>
|
||||
*
|
||||
* <p>Within the above-named directory, the file "icuplugins##.txt" will be
|
||||
* opened, if present, where ## is the major+minor number of the currently
|
||||
* running ICU (such as, 44 for ICU 4.4, thus icuplugins44.txt)</p>
|
||||
*
|
||||
* <p>The configuration file has this format:</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>Hash (#) begins a comment line</li>
|
||||
*
|
||||
* <li>Non-comment lines have two or three components:
|
||||
* LIBRARYNAME ENTRYPOINT [ CONFIGURATION .. ]</li>
|
||||
*
|
||||
* <li>Tabs or spaces separate the three items.</li>
|
||||
*
|
||||
* <li>LIBRARYNAME is the name of a shared library, either a short name if
|
||||
* it is on the loader path, or a full pathname.</li>
|
||||
*
|
||||
* <li>ENTRYPOINT is the short (undecorated) symbol name of the plugin's
|
||||
* entrypoint, as above.</li>
|
||||
*
|
||||
* <li>CONFIGURATION is the entire rest of the line . It's passed as-is to
|
||||
* the plugin.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>An example configuration file is, in its entirety:</p>
|
||||
*
|
||||
* \code
|
||||
* # this is icuplugins44.txt
|
||||
* testplug.dll myPlugin hello=world
|
||||
* \endcode
|
||||
* <p>Plugins are categorized as "high" or "low" level. Low level are those
|
||||
* which must be run BEFORE high level plugins, and before any operations
|
||||
* which cause ICU to be 'initialized'. If a plugin is low level but
|
||||
* causes ICU to allocate memory or become initialized, that plugin is said
|
||||
* to cause a 'level change'. </p>
|
||||
*
|
||||
* <p>At load time, ICU first queries all plugins to determine their level,
|
||||
* then loads all 'low' plugins first, and then loads all 'high' plugins.
|
||||
* Plugins are otherwise loaded in the order listed in the configuration file.</p>
|
||||
*
|
||||
* <h3>Implementing a Plugin</h3>
|
||||
* \code
|
||||
* U_CAPI UPlugTokenReturn U_EXPORT2
|
||||
* myPlugin (UPlugData *plug, UPlugReason reason, UErrorCode *status) {
|
||||
* if(reason==UPLUG_REASON_QUERY) {
|
||||
* uplug_setPlugName(plug, "Simple Plugin");
|
||||
* uplug_setPlugLevel(plug, UPLUG_LEVEL_HIGH);
|
||||
* } else if(reason==UPLUG_REASON_LOAD) {
|
||||
* ... Set up some ICU things here....
|
||||
* } else if(reason==UPLUG_REASON_UNLOAD) {
|
||||
* ... unload, clean up ...
|
||||
* }
|
||||
* return UPLUG_TOKEN;
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* <p>The UPlugData* is an opaque pointer to the plugin-specific data, and is
|
||||
* used in all other API calls.</p>
|
||||
*
|
||||
* <p>The API contract is:</p>
|
||||
* <ol><li>The plugin MUST always return UPLUG_TOKEN as a return value- to
|
||||
* indicate that it is a valid plugin.</li>
|
||||
*
|
||||
* <li>When the 'reason' parameter is set to UPLUG_REASON_QUERY, the
|
||||
* plugin MUST call uplug_setPlugLevel() to indicate whether it is a high
|
||||
* level or low level plugin.</li>
|
||||
*
|
||||
* <li>When the 'reason' parameter is UPLUG_REASON_QUERY, the plugin
|
||||
* SHOULD call uplug_setPlugName to indicate a human readable plugin name.</li></ol>
|
||||
*
|
||||
*
|
||||
* \internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
|
||||
|
||||
#ifndef ICUPLUG_H
|
||||
#define ICUPLUG_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
|
||||
#if UCONFIG_ENABLE_PLUGINS || defined(U_IN_DOXYGEN)
|
||||
|
||||
|
||||
|
||||
/* === Basic types === */
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
struct UPlugData;
|
||||
/**
|
||||
* @{
|
||||
* Typedef for opaque structure passed to/from a plugin.
|
||||
* Use the APIs to access it.
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
typedef struct UPlugData UPlugData;
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Random Token to identify a valid ICU plugin. Plugins must return this
|
||||
* from the entrypoint.
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
#define UPLUG_TOKEN 0x54762486
|
||||
|
||||
/**
|
||||
* Max width of names, symbols, and configuration strings
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
#define UPLUG_NAME_MAX 100
|
||||
|
||||
|
||||
/**
|
||||
* Return value from a plugin entrypoint.
|
||||
* Must always be set to UPLUG_TOKEN
|
||||
* @see UPLUG_TOKEN
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
typedef uint32_t UPlugTokenReturn;
|
||||
|
||||
/**
|
||||
* Reason code for the entrypoint's call
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
typedef enum {
|
||||
UPLUG_REASON_QUERY = 0, /**< The plugin is being queried for info. **/
|
||||
UPLUG_REASON_LOAD = 1, /**< The plugin is being loaded. **/
|
||||
UPLUG_REASON_UNLOAD = 2, /**< The plugin is being unloaded. **/
|
||||
/**
|
||||
* Number of known reasons.
|
||||
* @internal The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UPLUG_REASON_COUNT
|
||||
} UPlugReason;
|
||||
|
||||
|
||||
/**
|
||||
* Level of plugin loading
|
||||
* INITIAL: UNKNOWN
|
||||
* QUERY: INVALID -> { LOW | HIGH }
|
||||
* ERR -> INVALID
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
typedef enum {
|
||||
UPLUG_LEVEL_INVALID = 0, /**< The plugin is invalid, hasn't called uplug_setLevel, or can't load. **/
|
||||
UPLUG_LEVEL_UNKNOWN = 1, /**< The plugin is waiting to be installed. **/
|
||||
UPLUG_LEVEL_LOW = 2, /**< The plugin must be called before u_init completes **/
|
||||
UPLUG_LEVEL_HIGH = 3, /**< The plugin can run at any time. **/
|
||||
/**
|
||||
* Number of known levels.
|
||||
* @internal The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UPLUG_LEVEL_COUNT
|
||||
} UPlugLevel;
|
||||
|
||||
/**
|
||||
* Entrypoint for an ICU plugin.
|
||||
* @param plug the UPlugData handle.
|
||||
* @param reason the reason code for the entrypoint's call.
|
||||
* @param status Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return A valid plugin must return UPLUG_TOKEN
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
typedef UPlugTokenReturn (U_EXPORT2 UPlugEntrypoint) (
|
||||
UPlugData *plug,
|
||||
UPlugReason reason,
|
||||
UErrorCode *status);
|
||||
|
||||
/* === Needed for Implementing === */
|
||||
|
||||
/**
|
||||
* Request that this plugin not be unloaded at cleanup time.
|
||||
* This is appropriate for plugins which cannot be cleaned up.
|
||||
* @see u_cleanup()
|
||||
* @param plug plugin
|
||||
* @param dontUnload set true if this plugin can't be unloaded
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uplug_setPlugNoUnload(UPlugData *plug, UBool dontUnload);
|
||||
|
||||
/**
|
||||
* Set the level of this plugin.
|
||||
* @param plug plugin data handle
|
||||
* @param level the level of this plugin
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uplug_setPlugLevel(UPlugData *plug, UPlugLevel level);
|
||||
|
||||
/**
|
||||
* Get the level of this plugin.
|
||||
* @param plug plugin data handle
|
||||
* @return the level of this plugin
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI UPlugLevel U_EXPORT2
|
||||
uplug_getPlugLevel(UPlugData *plug);
|
||||
|
||||
/**
|
||||
* Get the lowest level of plug which can currently load.
|
||||
* For example, if UPLUG_LEVEL_LOW is returned, then low level plugins may load
|
||||
* if UPLUG_LEVEL_HIGH is returned, then only high level plugins may load.
|
||||
* @return the lowest level of plug which can currently load
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI UPlugLevel U_EXPORT2
|
||||
uplug_getCurrentLevel(void);
|
||||
|
||||
|
||||
/**
|
||||
* Get plug load status
|
||||
* @return The error code of this plugin's load attempt.
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI UErrorCode U_EXPORT2
|
||||
uplug_getPlugLoadStatus(UPlugData *plug);
|
||||
|
||||
/**
|
||||
* Set the human-readable name of this plugin.
|
||||
* @param plug plugin data handle
|
||||
* @param name the name of this plugin. The first UPLUG_NAME_MAX characters willi be copied into a new buffer.
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uplug_setPlugName(UPlugData *plug, const char *name);
|
||||
|
||||
/**
|
||||
* Get the human-readable name of this plugin.
|
||||
* @param plug plugin data handle
|
||||
* @return the name of this plugin
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2
|
||||
uplug_getPlugName(UPlugData *plug);
|
||||
|
||||
/**
|
||||
* Return the symbol name for this plugin, if known.
|
||||
* @param plug plugin data handle
|
||||
* @return the symbol name, or NULL
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2
|
||||
uplug_getSymbolName(UPlugData *plug);
|
||||
|
||||
/**
|
||||
* Return the library name for this plugin, if known.
|
||||
* @param plug plugin data handle
|
||||
* @param status error code
|
||||
* @return the library name, or NULL
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2
|
||||
uplug_getLibraryName(UPlugData *plug, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Return the library used for this plugin, if known.
|
||||
* Plugins could use this to load data out of their
|
||||
* @param plug plugin data handle
|
||||
* @return the library, or NULL
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI void * U_EXPORT2
|
||||
uplug_getLibrary(UPlugData *plug);
|
||||
|
||||
/**
|
||||
* Return the plugin-specific context data.
|
||||
* @param plug plugin data handle
|
||||
* @return the context, or NULL if not set
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI void * U_EXPORT2
|
||||
uplug_getContext(UPlugData *plug);
|
||||
|
||||
/**
|
||||
* Set the plugin-specific context data.
|
||||
* @param plug plugin data handle
|
||||
* @param context new context to set
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uplug_setContext(UPlugData *plug, void *context);
|
||||
|
||||
|
||||
/**
|
||||
* Get the configuration string, if available.
|
||||
* The string is in the platform default codepage.
|
||||
* @param plug plugin data handle
|
||||
* @return configuration string, or else null.
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2
|
||||
uplug_getConfiguration(UPlugData *plug);
|
||||
|
||||
/**
|
||||
* Return all currently installed plugins, from newest to oldest
|
||||
* Usage Example:
|
||||
* \code
|
||||
* UPlugData *plug = NULL;
|
||||
* while(plug=uplug_nextPlug(plug)) {
|
||||
* ... do something with 'plug' ...
|
||||
* }
|
||||
* \endcode
|
||||
* Not thread safe- do not call while plugs are added or removed.
|
||||
* @param prior pass in 'NULL' to get the first (most recent) plug,
|
||||
* otherwise pass the value returned on a prior call to uplug_nextPlug
|
||||
* @return the next oldest plugin, or NULL if no more.
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI UPlugData* U_EXPORT2
|
||||
uplug_nextPlug(UPlugData *prior);
|
||||
|
||||
/**
|
||||
* Inject a plugin as if it were loaded from a library.
|
||||
* This is useful for testing plugins.
|
||||
* Note that it will have a 'NULL' library pointer associated
|
||||
* with it, and therefore no llibrary will be closed at cleanup time.
|
||||
* Low level plugins may not be able to load, as ordering can't be enforced.
|
||||
* @param entrypoint entrypoint to install
|
||||
* @param config user specified configuration string, if available, or NULL.
|
||||
* @param status error result
|
||||
* @return the new UPlugData associated with this plugin, or NULL if error.
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI UPlugData* U_EXPORT2
|
||||
uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status);
|
||||
|
||||
|
||||
/**
|
||||
* Inject a plugin from a library, as if the information came from a config file.
|
||||
* Low level plugins may not be able to load, and ordering can't be enforced.
|
||||
* @param libName DLL name to load
|
||||
* @param sym symbol of plugin (UPlugEntrypoint function)
|
||||
* @param config configuration string, or NULL
|
||||
* @param status error result
|
||||
* @return the new UPlugData associated with this plugin, or NULL if error.
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI UPlugData* U_EXPORT2
|
||||
uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Remove a plugin.
|
||||
* Will request the plugin to be unloaded, and close the library if needed
|
||||
* @param plug plugin handle to close
|
||||
* @param status error result
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uplug_removePlug(UPlugData *plug, UErrorCode *status);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
#endif /* UCONFIG_ENABLE_PLUGINS */
|
||||
|
||||
#endif /* _ICUPLUG */
|
||||
|
||||
333
thirdparty/icu4c/common/unicode/idna.h
vendored
Normal file
333
thirdparty/icu4c/common/unicode/idna.h
vendored
Normal file
@@ -0,0 +1,333 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: idna.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010mar05
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __IDNA_H__
|
||||
#define __IDNA_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include "unicode/bytestream.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uidna.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class IDNAInfo;
|
||||
|
||||
/**
|
||||
* Abstract base class for IDNA processing.
|
||||
* See http://www.unicode.org/reports/tr46/
|
||||
* and http://www.ietf.org/rfc/rfc3490.txt
|
||||
*
|
||||
* The IDNA class is not intended for public subclassing.
|
||||
*
|
||||
* This C++ API currently only implements UTS #46.
|
||||
* The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
|
||||
* and IDNA2003 (functions that do not use a service object).
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
class U_COMMON_API IDNA : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
~IDNA();
|
||||
|
||||
/**
|
||||
* Returns an IDNA instance which implements UTS #46.
|
||||
* Returns an unmodifiable instance, owned by the caller.
|
||||
* Cache it for multiple operations, and delete it when done.
|
||||
* The instance is thread-safe, that is, it can be used concurrently.
|
||||
*
|
||||
* UTS #46 defines Unicode IDNA Compatibility Processing,
|
||||
* updated to the latest version of Unicode and compatible with both
|
||||
* IDNA2003 and IDNA2008.
|
||||
*
|
||||
* The worker functions use transitional processing, including deviation mappings,
|
||||
* unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
|
||||
* is used in which case the deviation characters are passed through without change.
|
||||
* <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b>
|
||||
*
|
||||
* Disallowed characters are mapped to U+FFFD.
|
||||
*
|
||||
* For available options see the uidna.h header.
|
||||
* Operations with the UTS #46 instance do not support the
|
||||
* UIDNA_ALLOW_UNASSIGNED option.
|
||||
*
|
||||
* By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
|
||||
* When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
|
||||
* letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
|
||||
*
|
||||
* @param options Bit set to modify the processing and error checking.
|
||||
* These should include UIDNA_DEFAULT, or
|
||||
* UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
|
||||
* See option bit set values in uidna.h.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the UTS #46 IDNA instance, if successful
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
static IDNA *
|
||||
createUTS46Instance(uint32_t options, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Converts a single domain name label into its ASCII form for DNS lookup.
|
||||
* If any processing step fails, then info.hasErrors() will be true and
|
||||
* the result might not be an ASCII string.
|
||||
* The label might be modified according to the types of errors.
|
||||
* Labels with severe errors will be left in (or turned into) their Unicode form.
|
||||
*
|
||||
* The UErrorCode indicates an error only in exceptional cases,
|
||||
* such as a U_MEMORY_ALLOCATION_ERROR.
|
||||
*
|
||||
* @param label Input domain name label
|
||||
* @param dest Destination string object
|
||||
* @param info Output container of IDNA processing details.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return dest
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
virtual UnicodeString &
|
||||
labelToASCII(const UnicodeString &label, UnicodeString &dest,
|
||||
IDNAInfo &info, UErrorCode &errorCode) const = 0;
|
||||
|
||||
/**
|
||||
* Converts a single domain name label into its Unicode form for human-readable display.
|
||||
* If any processing step fails, then info.hasErrors() will be true.
|
||||
* The label might be modified according to the types of errors.
|
||||
*
|
||||
* The UErrorCode indicates an error only in exceptional cases,
|
||||
* such as a U_MEMORY_ALLOCATION_ERROR.
|
||||
*
|
||||
* @param label Input domain name label
|
||||
* @param dest Destination string object
|
||||
* @param info Output container of IDNA processing details.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return dest
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
virtual UnicodeString &
|
||||
labelToUnicode(const UnicodeString &label, UnicodeString &dest,
|
||||
IDNAInfo &info, UErrorCode &errorCode) const = 0;
|
||||
|
||||
/**
|
||||
* Converts a whole domain name into its ASCII form for DNS lookup.
|
||||
* If any processing step fails, then info.hasErrors() will be true and
|
||||
* the result might not be an ASCII string.
|
||||
* The domain name might be modified according to the types of errors.
|
||||
* Labels with severe errors will be left in (or turned into) their Unicode form.
|
||||
*
|
||||
* The UErrorCode indicates an error only in exceptional cases,
|
||||
* such as a U_MEMORY_ALLOCATION_ERROR.
|
||||
*
|
||||
* @param name Input domain name
|
||||
* @param dest Destination string object
|
||||
* @param info Output container of IDNA processing details.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return dest
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
virtual UnicodeString &
|
||||
nameToASCII(const UnicodeString &name, UnicodeString &dest,
|
||||
IDNAInfo &info, UErrorCode &errorCode) const = 0;
|
||||
|
||||
/**
|
||||
* Converts a whole domain name into its Unicode form for human-readable display.
|
||||
* If any processing step fails, then info.hasErrors() will be true.
|
||||
* The domain name might be modified according to the types of errors.
|
||||
*
|
||||
* The UErrorCode indicates an error only in exceptional cases,
|
||||
* such as a U_MEMORY_ALLOCATION_ERROR.
|
||||
*
|
||||
* @param name Input domain name
|
||||
* @param dest Destination string object
|
||||
* @param info Output container of IDNA processing details.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return dest
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
virtual UnicodeString &
|
||||
nameToUnicode(const UnicodeString &name, UnicodeString &dest,
|
||||
IDNAInfo &info, UErrorCode &errorCode) const = 0;
|
||||
|
||||
// UTF-8 versions of the processing methods ---------------------------- ***
|
||||
|
||||
/**
|
||||
* Converts a single domain name label into its ASCII form for DNS lookup.
|
||||
* UTF-8 version of labelToASCII(), same behavior.
|
||||
*
|
||||
* @param label Input domain name label
|
||||
* @param dest Destination byte sink; Flush()ed if successful
|
||||
* @param info Output container of IDNA processing details.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return dest
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
virtual void
|
||||
labelToASCII_UTF8(StringPiece label, ByteSink &dest,
|
||||
IDNAInfo &info, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Converts a single domain name label into its Unicode form for human-readable display.
|
||||
* UTF-8 version of labelToUnicode(), same behavior.
|
||||
*
|
||||
* @param label Input domain name label
|
||||
* @param dest Destination byte sink; Flush()ed if successful
|
||||
* @param info Output container of IDNA processing details.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return dest
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
virtual void
|
||||
labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
|
||||
IDNAInfo &info, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Converts a whole domain name into its ASCII form for DNS lookup.
|
||||
* UTF-8 version of nameToASCII(), same behavior.
|
||||
*
|
||||
* @param name Input domain name
|
||||
* @param dest Destination byte sink; Flush()ed if successful
|
||||
* @param info Output container of IDNA processing details.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return dest
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
virtual void
|
||||
nameToASCII_UTF8(StringPiece name, ByteSink &dest,
|
||||
IDNAInfo &info, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Converts a whole domain name into its Unicode form for human-readable display.
|
||||
* UTF-8 version of nameToUnicode(), same behavior.
|
||||
*
|
||||
* @param name Input domain name
|
||||
* @param dest Destination byte sink; Flush()ed if successful
|
||||
* @param info Output container of IDNA processing details.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return dest
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
virtual void
|
||||
nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
|
||||
IDNAInfo &info, UErrorCode &errorCode) const;
|
||||
};
|
||||
|
||||
class UTS46;
|
||||
|
||||
/**
|
||||
* Output container for IDNA processing errors.
|
||||
* The IDNAInfo class is not suitable for subclassing.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
class U_COMMON_API IDNAInfo : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructor for stack allocation.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {}
|
||||
/**
|
||||
* Were there IDNA processing errors?
|
||||
* @return true if there were processing errors
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UBool hasErrors() const { return errors!=0; }
|
||||
/**
|
||||
* Returns a bit set indicating IDNA processing errors.
|
||||
* See UIDNA_ERROR_... constants in uidna.h.
|
||||
* @return bit set of processing errors
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
uint32_t getErrors() const { return errors; }
|
||||
/**
|
||||
* Returns true if transitional and nontransitional processing produce different results.
|
||||
* This is the case when the input label or domain name contains
|
||||
* one or more deviation characters outside a Punycode label (see UTS #46).
|
||||
* <ul>
|
||||
* <li>With nontransitional processing, such characters are
|
||||
* copied to the destination string.
|
||||
* <li>With transitional processing, such characters are
|
||||
* mapped (sharp s/sigma) or removed (joiner/nonjoiner).
|
||||
* </ul>
|
||||
* @return true if transitional and nontransitional processing produce different results
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UBool isTransitionalDifferent() const { return isTransDiff; }
|
||||
|
||||
private:
|
||||
friend class UTS46;
|
||||
|
||||
IDNAInfo(const IDNAInfo &other) = delete; // no copying
|
||||
IDNAInfo &operator=(const IDNAInfo &other) = delete; // no copying
|
||||
|
||||
void reset() {
|
||||
errors=labelErrors=0;
|
||||
isTransDiff=false;
|
||||
isBiDi=false;
|
||||
isOkBiDi=true;
|
||||
}
|
||||
|
||||
uint32_t errors, labelErrors;
|
||||
UBool isTransDiff;
|
||||
UBool isBiDi;
|
||||
UBool isOkBiDi;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // UCONFIG_NO_IDNA
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __IDNA_H__
|
||||
309
thirdparty/icu4c/common/unicode/localebuilder.h
vendored
Normal file
309
thirdparty/icu4c/common/unicode/localebuilder.h
vendored
Normal file
@@ -0,0 +1,309 @@
|
||||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
#ifndef __LOCALEBUILDER_H__
|
||||
#define __LOCALEBUILDER_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/localematcher.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Builder API for Locale
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
class CharString;
|
||||
|
||||
/**
|
||||
* <code>LocaleBuilder</code> is used to build instances of <code>Locale</code>
|
||||
* from values configured by the setters. Unlike the <code>Locale</code>
|
||||
* constructors, the <code>LocaleBuilder</code> checks if a value configured by a
|
||||
* setter satisfies the syntax requirements defined by the <code>Locale</code>
|
||||
* class. A <code>Locale</code> object created by a <code>LocaleBuilder</code> is
|
||||
* well-formed and can be transformed to a well-formed IETF BCP 47 language tag
|
||||
* without losing information.
|
||||
*
|
||||
* <p>The following example shows how to create a <code>Locale</code> object
|
||||
* with the <code>LocaleBuilder</code>.
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* UErrorCode status = U_ZERO_ERROR;
|
||||
* Locale aLocale = LocaleBuilder()
|
||||
* .setLanguage("sr")
|
||||
* .setScript("Latn")
|
||||
* .setRegion("RS")
|
||||
* .build(status);
|
||||
* if (U_SUCCESS(status)) {
|
||||
* // ...
|
||||
* }
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p>LocaleBuilders can be reused; <code>clear()</code> resets all
|
||||
* fields to their default values.
|
||||
*
|
||||
* <p>LocaleBuilder tracks errors in an internal UErrorCode. For all setters,
|
||||
* except setLanguageTag and setLocale, LocaleBuilder will return immediately
|
||||
* if the internal UErrorCode is in error state.
|
||||
* To reset internal state and error code, call clear method.
|
||||
* The setLanguageTag and setLocale method will first clear the internal
|
||||
* UErrorCode, then track the error of the validation of the input parameter
|
||||
* into the internal UErrorCode.
|
||||
*
|
||||
* @stable ICU 64
|
||||
*/
|
||||
class U_COMMON_API LocaleBuilder : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Constructs an empty LocaleBuilder. The default value of all
|
||||
* fields, extensions, and private use information is the
|
||||
* empty string.
|
||||
*
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocaleBuilder();
|
||||
|
||||
/**
|
||||
* Destructor
|
||||
* @stable ICU 64
|
||||
*/
|
||||
virtual ~LocaleBuilder();
|
||||
|
||||
/**
|
||||
* Resets the <code>LocaleBuilder</code> to match the provided
|
||||
* <code>locale</code>. Existing state is discarded.
|
||||
*
|
||||
* <p>All fields of the locale must be well-formed.
|
||||
* <p>This method clears the internal UErrorCode.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return This builder.
|
||||
*
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocaleBuilder& setLocale(const Locale& locale);
|
||||
|
||||
/**
|
||||
* Resets the LocaleBuilder to match the provided IETF BCP 47 language tag.
|
||||
* Discards the existing state.
|
||||
* The empty string causes the builder to be reset, like {@link #clear}.
|
||||
* Legacy language tags (marked as “Type: grandfathered” in BCP 47)
|
||||
* are converted to their canonical form before being processed.
|
||||
* Otherwise, the <code>language tag</code> must be well-formed,
|
||||
* or else the build() method will later report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>This method clears the internal UErrorCode.
|
||||
*
|
||||
* @param tag the language tag, defined as IETF BCP 47 language tag.
|
||||
* @return This builder.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocaleBuilder& setLanguageTag(StringPiece tag);
|
||||
|
||||
/**
|
||||
* Sets the language. If <code>language</code> is the empty string, the
|
||||
* language in this <code>LocaleBuilder</code> is removed. Otherwise, the
|
||||
* <code>language</code> must be well-formed, or else the build() method will
|
||||
* later report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>The syntax of language value is defined as
|
||||
* [unicode_language_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag).
|
||||
*
|
||||
* @param language the language
|
||||
* @return This builder.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocaleBuilder& setLanguage(StringPiece language);
|
||||
|
||||
/**
|
||||
* Sets the script. If <code>script</code> is the empty string, the script in
|
||||
* this <code>LocaleBuilder</code> is removed.
|
||||
* Otherwise, the <code>script</code> must be well-formed, or else the build()
|
||||
* method will later report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>The script value is a four-letter script code as
|
||||
* [unicode_script_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag)
|
||||
* defined by ISO 15924
|
||||
*
|
||||
* @param script the script
|
||||
* @return This builder.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocaleBuilder& setScript(StringPiece script);
|
||||
|
||||
/**
|
||||
* Sets the region. If region is the empty string, the region in this
|
||||
* <code>LocaleBuilder</code> is removed. Otherwise, the <code>region</code>
|
||||
* must be well-formed, or else the build() method will later report an
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>The region value is defined by
|
||||
* [unicode_region_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag)
|
||||
* as a two-letter ISO 3166 code or a three-digit UN M.49 area code.
|
||||
*
|
||||
* <p>The region value in the <code>Locale</code> created by the
|
||||
* <code>LocaleBuilder</code> is always normalized to upper case.
|
||||
*
|
||||
* @param region the region
|
||||
* @return This builder.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocaleBuilder& setRegion(StringPiece region);
|
||||
|
||||
/**
|
||||
* Sets the variant. If variant is the empty string, the variant in this
|
||||
* <code>LocaleBuilder</code> is removed. Otherwise, the <code>variant</code>
|
||||
* must be well-formed, or else the build() method will later report an
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p><b>Note:</b> This method checks if <code>variant</code>
|
||||
* satisfies the
|
||||
* [unicode_variant_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag)
|
||||
* syntax requirements, and normalizes the value to lowercase letters. However,
|
||||
* the <code>Locale</code> class does not impose any syntactic
|
||||
* restriction on variant. To set an ill-formed variant, use a Locale constructor.
|
||||
* If there are multiple unicode_variant_subtag, the caller must concatenate
|
||||
* them with '-' as separator (ex: "foobar-fibar").
|
||||
*
|
||||
* @param variant the variant
|
||||
* @return This builder.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocaleBuilder& setVariant(StringPiece variant);
|
||||
|
||||
/**
|
||||
* Sets the extension for the given key. If the value is the empty string,
|
||||
* the extension is removed. Otherwise, the <code>key</code> and
|
||||
* <code>value</code> must be well-formed, or else the build() method will
|
||||
* later report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p><b>Note:</b> The key ('u') is used for the Unicode locale extension.
|
||||
* Setting a value for this key replaces any existing Unicode locale key/type
|
||||
* pairs with those defined in the extension.
|
||||
*
|
||||
* <p><b>Note:</b> The key ('x') is used for the private use code. To be
|
||||
* well-formed, the value for this key needs only to have subtags of one to
|
||||
* eight alphanumeric characters, not two to eight as in the general case.
|
||||
*
|
||||
* @param key the extension key
|
||||
* @param value the extension value
|
||||
* @return This builder.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocaleBuilder& setExtension(char key, StringPiece value);
|
||||
|
||||
/**
|
||||
* Sets the Unicode locale keyword type for the given key. If the type
|
||||
* StringPiece is constructed with a nullptr, the keyword is removed.
|
||||
* If the type is the empty string, the keyword is set without type subtags.
|
||||
* Otherwise, the key and type must be well-formed, or else the build()
|
||||
* method will later report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>Keys and types are converted to lower case.
|
||||
*
|
||||
* <p><b>Note</b>:Setting the 'u' extension via {@link #setExtension}
|
||||
* replaces all Unicode locale keywords with those defined in the
|
||||
* extension.
|
||||
*
|
||||
* @param key the Unicode locale key
|
||||
* @param type the Unicode locale type
|
||||
* @return This builder.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocaleBuilder& setUnicodeLocaleKeyword(
|
||||
StringPiece key, StringPiece type);
|
||||
|
||||
/**
|
||||
* Adds a unicode locale attribute, if not already present, otherwise
|
||||
* has no effect. The attribute must not be empty string and must be
|
||||
* well-formed or U_ILLEGAL_ARGUMENT_ERROR will be set to status
|
||||
* during the build() call.
|
||||
*
|
||||
* @param attribute the attribute
|
||||
* @return This builder.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocaleBuilder& addUnicodeLocaleAttribute(StringPiece attribute);
|
||||
|
||||
/**
|
||||
* Removes a unicode locale attribute, if present, otherwise has no
|
||||
* effect. The attribute must not be empty string and must be well-formed
|
||||
* or U_ILLEGAL_ARGUMENT_ERROR will be set to status during the build() call.
|
||||
*
|
||||
* <p>Attribute comparison for removal is case-insensitive.
|
||||
*
|
||||
* @param attribute the attribute
|
||||
* @return This builder.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocaleBuilder& removeUnicodeLocaleAttribute(StringPiece attribute);
|
||||
|
||||
/**
|
||||
* Resets the builder to its initial, empty state.
|
||||
* <p>This method clears the internal UErrorCode.
|
||||
*
|
||||
* @return this builder
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocaleBuilder& clear();
|
||||
|
||||
/**
|
||||
* Resets the extensions to their initial, empty state.
|
||||
* Language, script, region and variant are unchanged.
|
||||
*
|
||||
* @return this builder
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocaleBuilder& clearExtensions();
|
||||
|
||||
/**
|
||||
* Returns an instance of <code>Locale</code> created from the fields set
|
||||
* on this builder.
|
||||
* If any set methods or during the build() call require memory allocation
|
||||
* but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
|
||||
* If any of the fields set by the setters are not well-formed, the status
|
||||
* will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
|
||||
* not change after the build() call and the caller is free to keep using
|
||||
* the same builder to build more locales.
|
||||
*
|
||||
* @return a new Locale
|
||||
* @stable ICU 64
|
||||
*/
|
||||
Locale build(UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Sets the UErrorCode if an error occurred while recording sets.
|
||||
* Preserves older error codes in the outErrorCode.
|
||||
* @param outErrorCode Set to an error code that occurred while setting subtags.
|
||||
* Unchanged if there is no such error or if outErrorCode
|
||||
* already contained an error.
|
||||
* @return true if U_FAILURE(outErrorCode)
|
||||
* @stable ICU 65
|
||||
*/
|
||||
UBool copyErrorTo(UErrorCode &outErrorCode) const;
|
||||
|
||||
private:
|
||||
friend class LocaleMatcher::Result;
|
||||
|
||||
void copyExtensionsFrom(const Locale& src, UErrorCode& errorCode);
|
||||
|
||||
UErrorCode status_;
|
||||
char language_[9];
|
||||
char script_[5];
|
||||
char region_[4];
|
||||
CharString *variant_; // Pointer not object so we need not #include internal charstr.h.
|
||||
icu::Locale *extensions_; // Pointer not object. Storage for all other fields.
|
||||
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __LOCALEBUILDER_H__
|
||||
710
thirdparty/icu4c/common/unicode/localematcher.h
vendored
Normal file
710
thirdparty/icu4c/common/unicode/localematcher.h
vendored
Normal file
@@ -0,0 +1,710 @@
|
||||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// localematcher.h
|
||||
// created: 2019may08 Markus W. Scherer
|
||||
|
||||
#ifndef __LOCALEMATCHER_H__
|
||||
#define __LOCALEMATCHER_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Builder option for whether the language subtag or the script subtag is most important.
|
||||
*
|
||||
* @see LocaleMatcher::Builder#setFavorSubtag(ULocMatchFavorSubtag)
|
||||
* @stable ICU 65
|
||||
*/
|
||||
enum ULocMatchFavorSubtag {
|
||||
/**
|
||||
* Language differences are most important, then script differences, then region differences.
|
||||
* (This is the default behavior.)
|
||||
*
|
||||
* @stable ICU 65
|
||||
*/
|
||||
ULOCMATCH_FAVOR_LANGUAGE,
|
||||
/**
|
||||
* Makes script differences matter relatively more than language differences.
|
||||
*
|
||||
* @stable ICU 65
|
||||
*/
|
||||
ULOCMATCH_FAVOR_SCRIPT
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Builder option for whether all desired locales are treated equally or
|
||||
* earlier ones are preferred.
|
||||
*
|
||||
* @see LocaleMatcher::Builder#setDemotionPerDesiredLocale(ULocMatchDemotion)
|
||||
* @stable ICU 65
|
||||
*/
|
||||
enum ULocMatchDemotion {
|
||||
/**
|
||||
* All desired locales are treated equally.
|
||||
*
|
||||
* @stable ICU 65
|
||||
*/
|
||||
ULOCMATCH_DEMOTION_NONE,
|
||||
/**
|
||||
* Earlier desired locales are preferred.
|
||||
*
|
||||
* <p>From each desired locale to the next,
|
||||
* the distance to any supported locale is increased by an additional amount
|
||||
* which is at least as large as most region mismatches.
|
||||
* A later desired locale has to have a better match with some supported locale
|
||||
* due to more than merely having the same region subtag.
|
||||
*
|
||||
* <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
|
||||
* yields <code>Result(en-GB, en)</code> because
|
||||
* with the demotion of sv its perfect match is no better than
|
||||
* the region distance between the earlier desired locale en-GB and en=en-US.
|
||||
*
|
||||
* <p>Notes:
|
||||
* <ul>
|
||||
* <li>In some cases, language and/or script differences can be as small as
|
||||
* the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
|
||||
* <li>It is possible for certain region differences to be larger than usual,
|
||||
* and larger than the demotion.
|
||||
* (As of CLDR 35 there is no such case, but
|
||||
* this is possible in future versions of the data.)
|
||||
* </ul>
|
||||
*
|
||||
* @stable ICU 65
|
||||
*/
|
||||
ULOCMATCH_DEMOTION_REGION
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef enum ULocMatchDemotion ULocMatchDemotion;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Builder option for whether to include or ignore one-way (fallback) match data.
|
||||
* The LocaleMatcher uses CLDR languageMatch data which includes fallback (oneway=true) entries.
|
||||
* Sometimes it is desirable to ignore those.
|
||||
*
|
||||
* <p>For example, consider a web application with the UI in a given language,
|
||||
* with a link to another, related web app.
|
||||
* The link should include the UI language, and the target server may also use
|
||||
* the client’s Accept-Language header data.
|
||||
* The target server has its own list of supported languages.
|
||||
* One may want to favor UI language consistency, that is,
|
||||
* if there is a decent match for the original UI language, we want to use it,
|
||||
* but not if it is merely a fallback.
|
||||
*
|
||||
* @see LocaleMatcher::Builder#setDirection(ULocMatchDirection)
|
||||
* @stable ICU 67
|
||||
*/
|
||||
enum ULocMatchDirection {
|
||||
/**
|
||||
* Locale matching includes one-way matches such as Breton→French. (default)
|
||||
*
|
||||
* @stable ICU 67
|
||||
*/
|
||||
ULOCMATCH_DIRECTION_WITH_ONE_WAY,
|
||||
/**
|
||||
* Locale matching limited to two-way matches including e.g. Danish↔Norwegian
|
||||
* but ignoring one-way matches.
|
||||
*
|
||||
* @stable ICU 67
|
||||
*/
|
||||
ULOCMATCH_DIRECTION_ONLY_TWO_WAY
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef enum ULocMatchDirection ULocMatchDirection;
|
||||
#endif
|
||||
|
||||
struct UHashtable;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
struct LSR;
|
||||
|
||||
class LikelySubtags;
|
||||
class LocaleDistance;
|
||||
class LocaleLsrIterator;
|
||||
class UVector;
|
||||
|
||||
/**
|
||||
* Immutable class that picks the best match between a user's desired locales and
|
||||
* an application's supported locales.
|
||||
* Movable but not copyable.
|
||||
*
|
||||
* <p>Example:
|
||||
* <pre>
|
||||
* UErrorCode errorCode = U_ZERO_ERROR;
|
||||
* LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode);
|
||||
* Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en"
|
||||
* </pre>
|
||||
*
|
||||
* <p>A matcher takes into account when languages are close to one another,
|
||||
* such as Danish and Norwegian,
|
||||
* and when regional variants are close, like en-GB and en-AU as opposed to en-US.
|
||||
*
|
||||
* <p>If there are multiple supported locales with the same (language, script, region)
|
||||
* likely subtags, then the current implementation returns the first of those locales.
|
||||
* It ignores variant subtags (except for pseudolocale variants) and extensions.
|
||||
* This may change in future versions.
|
||||
*
|
||||
* <p>For example, the current implementation does not distinguish between
|
||||
* de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
|
||||
*
|
||||
* <p>If you prefer one equivalent locale over another, then provide only the preferred one,
|
||||
* or place it earlier in the list of supported locales.
|
||||
*
|
||||
* <p>Otherwise, the order of supported locales may have no effect on the best-match results.
|
||||
* The current implementation compares each desired locale with supported locales
|
||||
* in the following order:
|
||||
* 1. Default locale, if supported;
|
||||
* 2. CLDR "paradigm locales" like en-GB and es-419;
|
||||
* 3. other supported locales.
|
||||
* This may change in future versions.
|
||||
*
|
||||
* <p>Often a product will just need one matcher instance, built with the languages
|
||||
* that it supports. However, it may want multiple instances with different
|
||||
* default languages based on additional information, such as the domain.
|
||||
*
|
||||
* <p>This class is not intended for public subclassing.
|
||||
*
|
||||
* @stable ICU 65
|
||||
*/
|
||||
class U_COMMON_API LocaleMatcher : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Data for the best-matching pair of a desired and a supported locale.
|
||||
* Movable but not copyable.
|
||||
*
|
||||
* @stable ICU 65
|
||||
*/
|
||||
class U_COMMON_API Result : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Move constructor; might modify the source.
|
||||
* This object will have the same contents that the source object had.
|
||||
*
|
||||
* @param src Result to move contents from.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
Result(Result &&src) noexcept;
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*
|
||||
* @stable ICU 65
|
||||
*/
|
||||
~Result();
|
||||
|
||||
/**
|
||||
* Move assignment; might modify the source.
|
||||
* This object will have the same contents that the source object had.
|
||||
*
|
||||
* @param src Result to move contents from.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
Result &operator=(Result &&src) noexcept;
|
||||
|
||||
/**
|
||||
* Returns the best-matching desired locale.
|
||||
* nullptr if the list of desired locales is empty or if none matched well enough.
|
||||
*
|
||||
* @return the best-matching desired locale, or nullptr.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
inline const Locale *getDesiredLocale() const { return desiredLocale; }
|
||||
|
||||
/**
|
||||
* Returns the best-matching supported locale.
|
||||
* If none matched well enough, this is the default locale.
|
||||
* The default locale is nullptr if Builder::setNoDefaultLocale() was called,
|
||||
* or if the list of supported locales is empty and no explicit default locale is set.
|
||||
*
|
||||
* @return the best-matching supported locale, or nullptr.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
inline const Locale *getSupportedLocale() const { return supportedLocale; }
|
||||
|
||||
/**
|
||||
* Returns the index of the best-matching desired locale in the input Iterable order.
|
||||
* -1 if the list of desired locales is empty or if none matched well enough.
|
||||
*
|
||||
* @return the index of the best-matching desired locale, or -1.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
inline int32_t getDesiredIndex() const { return desiredIndex; }
|
||||
|
||||
/**
|
||||
* Returns the index of the best-matching supported locale in the
|
||||
* constructor’s or builder’s input order (“set” Collection plus “added” locales).
|
||||
* If the matcher was built from a locale list string, then the iteration order is that
|
||||
* of a LocalePriorityList built from the same string.
|
||||
* -1 if the list of supported locales is empty or if none matched well enough.
|
||||
*
|
||||
* @return the index of the best-matching supported locale, or -1.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
inline int32_t getSupportedIndex() const { return supportedIndex; }
|
||||
|
||||
/**
|
||||
* Takes the best-matching supported locale and adds relevant fields of the
|
||||
* best-matching desired locale, such as the -t- and -u- extensions.
|
||||
* May replace some fields of the supported locale.
|
||||
* The result is the locale that should be used for date and number formatting, collation, etc.
|
||||
* Returns the root locale if getSupportedLocale() returns nullptr.
|
||||
*
|
||||
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
|
||||
*
|
||||
* @return a locale combining the best-matching desired and supported locales.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
Locale makeResolvedLocale(UErrorCode &errorCode) const;
|
||||
|
||||
private:
|
||||
Result(const Locale *desired, const Locale *supported,
|
||||
int32_t desIndex, int32_t suppIndex, UBool owned) :
|
||||
desiredLocale(desired), supportedLocale(supported),
|
||||
desiredIndex(desIndex), supportedIndex(suppIndex),
|
||||
desiredIsOwned(owned) {}
|
||||
|
||||
Result(const Result &other) = delete;
|
||||
Result &operator=(const Result &other) = delete;
|
||||
|
||||
const Locale *desiredLocale;
|
||||
const Locale *supportedLocale;
|
||||
int32_t desiredIndex;
|
||||
int32_t supportedIndex;
|
||||
UBool desiredIsOwned;
|
||||
|
||||
friend class LocaleMatcher;
|
||||
};
|
||||
|
||||
/**
|
||||
* LocaleMatcher builder.
|
||||
* Movable but not copyable.
|
||||
*
|
||||
* @stable ICU 65
|
||||
*/
|
||||
class U_COMMON_API Builder : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs a builder used in chaining parameters for building a LocaleMatcher.
|
||||
*
|
||||
* @return a new Builder object
|
||||
* @stable ICU 65
|
||||
*/
|
||||
Builder() {}
|
||||
|
||||
/**
|
||||
* Move constructor; might modify the source.
|
||||
* This builder will have the same contents that the source builder had.
|
||||
*
|
||||
* @param src Builder to move contents from.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
Builder(Builder &&src) noexcept;
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*
|
||||
* @stable ICU 65
|
||||
*/
|
||||
~Builder();
|
||||
|
||||
/**
|
||||
* Move assignment; might modify the source.
|
||||
* This builder will have the same contents that the source builder had.
|
||||
*
|
||||
* @param src Builder to move contents from.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
Builder &operator=(Builder &&src) noexcept;
|
||||
|
||||
/**
|
||||
* Parses an Accept-Language string
|
||||
* (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
|
||||
* such as "af, en, fr;q=0.9", and sets the supported locales accordingly.
|
||||
* Allows whitespace in more places but does not allow "*".
|
||||
* Clears any previously set/added supported locales first.
|
||||
*
|
||||
* @param locales the Accept-Language string of locales to set
|
||||
* @return this Builder object
|
||||
* @stable ICU 65
|
||||
*/
|
||||
Builder &setSupportedLocalesFromListString(StringPiece locales);
|
||||
|
||||
/**
|
||||
* Copies the supported locales, preserving iteration order.
|
||||
* Clears any previously set/added supported locales first.
|
||||
* Duplicates are allowed, and are not removed.
|
||||
*
|
||||
* @param locales the list of locale
|
||||
* @return this Builder object
|
||||
* @stable ICU 65
|
||||
*/
|
||||
Builder &setSupportedLocales(Locale::Iterator &locales);
|
||||
|
||||
/**
|
||||
* Copies the supported locales from the begin/end range, preserving iteration order.
|
||||
* Clears any previously set/added supported locales first.
|
||||
* Duplicates are allowed, and are not removed.
|
||||
*
|
||||
* Each of the iterator parameter values must be an
|
||||
* input iterator whose value is convertible to const Locale &.
|
||||
*
|
||||
* @param begin Start of range.
|
||||
* @param end Exclusive end of range.
|
||||
* @return this Builder object
|
||||
* @stable ICU 65
|
||||
*/
|
||||
template<typename Iter>
|
||||
Builder &setSupportedLocales(Iter begin, Iter end) {
|
||||
if (U_FAILURE(errorCode_)) { return *this; }
|
||||
clearSupportedLocales();
|
||||
while (begin != end) {
|
||||
addSupportedLocale(*begin++);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the supported locales from the begin/end range, preserving iteration order.
|
||||
* Calls the converter to convert each *begin to a Locale or const Locale &.
|
||||
* Clears any previously set/added supported locales first.
|
||||
* Duplicates are allowed, and are not removed.
|
||||
*
|
||||
* Each of the iterator parameter values must be an
|
||||
* input iterator whose value is convertible to const Locale &.
|
||||
*
|
||||
* @param begin Start of range.
|
||||
* @param end Exclusive end of range.
|
||||
* @param converter Converter from *begin to const Locale & or compatible.
|
||||
* @return this Builder object
|
||||
* @stable ICU 65
|
||||
*/
|
||||
template<typename Iter, typename Conv>
|
||||
Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) {
|
||||
if (U_FAILURE(errorCode_)) { return *this; }
|
||||
clearSupportedLocales();
|
||||
while (begin != end) {
|
||||
addSupportedLocale(converter(*begin++));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds another supported locale.
|
||||
* Duplicates are allowed, and are not removed.
|
||||
*
|
||||
* @param locale another locale
|
||||
* @return this Builder object
|
||||
* @stable ICU 65
|
||||
*/
|
||||
Builder &addSupportedLocale(const Locale &locale);
|
||||
|
||||
/**
|
||||
* Sets no default locale.
|
||||
* There will be no explicit or implicit default locale.
|
||||
* If there is no good match, then the matcher will return nullptr for the
|
||||
* best supported locale.
|
||||
*
|
||||
* @stable ICU 68
|
||||
*/
|
||||
Builder &setNoDefaultLocale();
|
||||
|
||||
/**
|
||||
* Sets the default locale; if nullptr, or if it is not set explicitly,
|
||||
* then the first supported locale is used as the default locale.
|
||||
* There is no default locale at all (nullptr will be returned instead)
|
||||
* if setNoDefaultLocale() is called.
|
||||
*
|
||||
* @param defaultLocale the default locale (will be copied)
|
||||
* @return this Builder object
|
||||
* @stable ICU 65
|
||||
*/
|
||||
Builder &setDefaultLocale(const Locale *defaultLocale);
|
||||
|
||||
/**
|
||||
* If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script
|
||||
* differences.
|
||||
* This is used in situations (such as maps) where
|
||||
* it is better to fall back to the same script than a similar language.
|
||||
*
|
||||
* @param subtag the subtag to favor
|
||||
* @return this Builder object
|
||||
* @stable ICU 65
|
||||
*/
|
||||
Builder &setFavorSubtag(ULocMatchFavorSubtag subtag);
|
||||
|
||||
/**
|
||||
* Option for whether all desired locales are treated equally or
|
||||
* earlier ones are preferred (this is the default).
|
||||
*
|
||||
* @param demotion the demotion per desired locale to set.
|
||||
* @return this Builder object
|
||||
* @stable ICU 65
|
||||
*/
|
||||
Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);
|
||||
|
||||
/**
|
||||
* Option for whether to include or ignore one-way (fallback) match data.
|
||||
* By default, they are included.
|
||||
*
|
||||
* @param matchDirection the match direction to set.
|
||||
* @return this Builder object
|
||||
* @stable ICU 67
|
||||
*/
|
||||
Builder &setDirection(ULocMatchDirection matchDirection) {
|
||||
if (U_SUCCESS(errorCode_)) {
|
||||
direction_ = matchDirection;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the maximum distance for an acceptable match.
|
||||
* The matcher will return a match for a pair of locales only if
|
||||
* they match at least as well as the pair given here.
|
||||
*
|
||||
* For example, setMaxDistance(en-US, en-GB) limits matches to ones where the
|
||||
* (desired, support) locales have a distance no greater than a region subtag difference.
|
||||
* This is much stricter than the CLDR default.
|
||||
*
|
||||
* The details of locale matching are subject to changes in
|
||||
* CLDR data and in the algorithm.
|
||||
* Specifying a maximum distance in relative terms via a sample pair of locales
|
||||
* insulates from changes that affect all distance metrics similarly,
|
||||
* but some changes will necessarily affect relative distances between
|
||||
* different pairs of locales.
|
||||
*
|
||||
* @param desired the desired locale for distance comparison.
|
||||
* @param supported the supported locale for distance comparison.
|
||||
* @return this Builder object
|
||||
* @stable ICU 68
|
||||
*/
|
||||
Builder &setMaxDistance(const Locale &desired, const Locale &supported);
|
||||
|
||||
/**
|
||||
* Sets the UErrorCode if an error occurred while setting parameters.
|
||||
* Preserves older error codes in the outErrorCode.
|
||||
*
|
||||
* @param outErrorCode Set to an error code if it does not contain one already
|
||||
* and an error occurred while setting parameters.
|
||||
* Otherwise unchanged.
|
||||
* @return true if U_FAILURE(outErrorCode)
|
||||
* @stable ICU 65
|
||||
*/
|
||||
UBool copyErrorTo(UErrorCode &outErrorCode) const;
|
||||
|
||||
/**
|
||||
* Builds and returns a new locale matcher.
|
||||
* This builder can continue to be used.
|
||||
*
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return LocaleMatcher
|
||||
* @stable ICU 65
|
||||
*/
|
||||
LocaleMatcher build(UErrorCode &errorCode) const;
|
||||
|
||||
private:
|
||||
friend class LocaleMatcher;
|
||||
|
||||
Builder(const Builder &other) = delete;
|
||||
Builder &operator=(const Builder &other) = delete;
|
||||
|
||||
void clearSupportedLocales();
|
||||
bool ensureSupportedLocaleVector();
|
||||
|
||||
UErrorCode errorCode_ = U_ZERO_ERROR;
|
||||
UVector *supportedLocales_ = nullptr;
|
||||
int32_t thresholdDistance_ = -1;
|
||||
ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION;
|
||||
Locale *defaultLocale_ = nullptr;
|
||||
bool withDefault_ = true;
|
||||
ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;
|
||||
ULocMatchDirection direction_ = ULOCMATCH_DIRECTION_WITH_ONE_WAY;
|
||||
Locale *maxDistanceDesired_ = nullptr;
|
||||
Locale *maxDistanceSupported_ = nullptr;
|
||||
};
|
||||
|
||||
// FYI No public LocaleMatcher constructors in C++; use the Builder.
|
||||
|
||||
/**
|
||||
* Move copy constructor; might modify the source.
|
||||
* This matcher will have the same settings that the source matcher had.
|
||||
* @param src source matcher
|
||||
* @stable ICU 65
|
||||
*/
|
||||
LocaleMatcher(LocaleMatcher &&src) noexcept;
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
~LocaleMatcher();
|
||||
|
||||
/**
|
||||
* Move assignment operator; might modify the source.
|
||||
* This matcher will have the same settings that the source matcher had.
|
||||
* The behavior is undefined if *this and src are the same object.
|
||||
* @param src source matcher
|
||||
* @return *this
|
||||
* @stable ICU 65
|
||||
*/
|
||||
LocaleMatcher &operator=(LocaleMatcher &&src) noexcept;
|
||||
|
||||
/**
|
||||
* Returns the supported locale which best matches the desired locale.
|
||||
*
|
||||
* @param desiredLocale Typically a user's language.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return the best-matching supported locale.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Returns the supported locale which best matches one of the desired locales.
|
||||
*
|
||||
* @param desiredLocales Typically a user's languages, in order of preference (descending).
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return the best-matching supported locale.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Parses an Accept-Language string
|
||||
* (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
|
||||
* such as "af, en, fr;q=0.9",
|
||||
* and returns the supported locale which best matches one of the desired locales.
|
||||
* Allows whitespace in more places but does not allow "*".
|
||||
*
|
||||
* @param desiredLocaleList Typically a user's languages, as an Accept-Language string.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return the best-matching supported locale.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Returns the best match between the desired locale and the supported locales.
|
||||
* If the result's desired locale is not nullptr, then it is the address of the input locale.
|
||||
* It has not been cloned.
|
||||
*
|
||||
* @param desiredLocale Typically a user's language.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return the best-matching pair of the desired and a supported locale.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Returns the best match between the desired and supported locales.
|
||||
* If the result's desired locale is not nullptr, then it is a clone of
|
||||
* the best-matching desired locale. The Result object owns the clone.
|
||||
*
|
||||
* @param desiredLocales Typically a user's languages, in order of preference (descending).
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return the best-matching pair of a desired and a supported locale.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Returns true if the pair of locales matches acceptably.
|
||||
* This is influenced by Builder options such as setDirection(), setFavorSubtag(),
|
||||
* and setMaxDistance().
|
||||
*
|
||||
* @param desired The desired locale.
|
||||
* @param supported The supported locale.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return true if the pair of locales matches acceptably.
|
||||
* @stable ICU 68
|
||||
*/
|
||||
UBool isMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Returns a fraction between 0 and 1, where 1 means that the languages are a
|
||||
* perfect match, and 0 means that they are completely different.
|
||||
*
|
||||
* <p>This is mostly an implementation detail, and the precise values may change over time.
|
||||
* The implementation may use either the maximized forms or the others ones, or both.
|
||||
* The implementation may or may not rely on the forms to be consistent with each other.
|
||||
*
|
||||
* <p>Callers should construct and use a matcher rather than match pairs of locales directly.
|
||||
*
|
||||
* @param desired Desired locale.
|
||||
* @param supported Supported locale.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return value between 0 and 1, inclusive.
|
||||
* @internal (has a known user)
|
||||
*/
|
||||
double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
|
||||
#endif // U_HIDE_INTERNAL_API
|
||||
|
||||
private:
|
||||
LocaleMatcher(const Builder &builder, UErrorCode &errorCode);
|
||||
LocaleMatcher(const LocaleMatcher &other) = delete;
|
||||
LocaleMatcher &operator=(const LocaleMatcher &other) = delete;
|
||||
|
||||
int32_t putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, UErrorCode &errorCode);
|
||||
|
||||
std::optional<int32_t> getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
|
||||
|
||||
const LikelySubtags &likelySubtags;
|
||||
const LocaleDistance &localeDistance;
|
||||
int32_t thresholdDistance;
|
||||
int32_t demotionPerDesiredLocale;
|
||||
ULocMatchFavorSubtag favorSubtag;
|
||||
ULocMatchDirection direction;
|
||||
|
||||
// These are in input order.
|
||||
const Locale ** supportedLocales;
|
||||
LSR *lsrs;
|
||||
int32_t supportedLocalesLength;
|
||||
// These are in preference order: 1. Default locale 2. paradigm locales 3. others.
|
||||
UHashtable *supportedLsrToIndex; // Map<LSR, Integer>
|
||||
// Array versions of the supportedLsrToIndex keys and values.
|
||||
// The distance lookup loops over the supportedLSRs and returns the index of the best match.
|
||||
const LSR **supportedLSRs;
|
||||
int32_t *supportedIndexes;
|
||||
int32_t supportedLSRsLength;
|
||||
Locale *ownedDefaultLocale;
|
||||
const Locale *defaultLocale;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
#endif // __LOCALEMATCHER_H__
|
||||
609
thirdparty/icu4c/common/unicode/localpointer.h
vendored
Normal file
609
thirdparty/icu4c/common/unicode/localpointer.h
vendored
Normal file
@@ -0,0 +1,609 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: localpointer.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2009nov13
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __LOCALPOINTER_H__
|
||||
#define __LOCALPOINTER_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code.
|
||||
*
|
||||
* These classes are inspired by
|
||||
* - std::auto_ptr
|
||||
* - boost::scoped_ptr & boost::scoped_array
|
||||
* - Taligent Safe Pointers (TOnlyPointerTo)
|
||||
*
|
||||
* but none of those provide for all of the goals for ICU smart pointers:
|
||||
* - Smart pointer owns the object and releases it when it goes out of scope.
|
||||
* - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust.
|
||||
* - ICU-compatible: No exceptions.
|
||||
* - Need to be able to orphan/release the pointer and its ownership.
|
||||
* - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects.
|
||||
*
|
||||
* For details see https://icu.unicode.org/design/cpp/scoped_ptr
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include <memory>
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* "Smart pointer" base class; do not use directly: use LocalPointer etc.
|
||||
*
|
||||
* Base class for smart pointer classes that do not throw exceptions.
|
||||
*
|
||||
* Do not use this base class directly, since it does not delete its pointer.
|
||||
* A subclass must implement methods that delete the pointer:
|
||||
* Destructor and adoptInstead().
|
||||
*
|
||||
* There is no operator T *() provided because the programmer must decide
|
||||
* whether to use getAlias() (without transfer of ownership) or orphan()
|
||||
* (with transfer of ownership and NULLing of the pointer).
|
||||
*
|
||||
* @see LocalPointer
|
||||
* @see LocalArray
|
||||
* @see U_DEFINE_LOCAL_OPEN_POINTER
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
template<typename T>
|
||||
class LocalPointerBase {
|
||||
public:
|
||||
// No heap allocation. Use only on the stack.
|
||||
static void* U_EXPORT2 operator new(size_t) = delete;
|
||||
static void* U_EXPORT2 operator new[](size_t) = delete;
|
||||
#if U_HAVE_PLACEMENT_NEW
|
||||
static void* U_EXPORT2 operator new(size_t, void*) = delete;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Constructor takes ownership.
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
explicit LocalPointerBase(T *p=nullptr) : ptr(p) {}
|
||||
/**
|
||||
* Destructor deletes the object it owns.
|
||||
* Subclass must override: Base class does nothing.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
~LocalPointerBase() { /* delete ptr; */ }
|
||||
/**
|
||||
* nullptr check.
|
||||
* @return true if ==nullptr
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
UBool isNull() const { return ptr==nullptr; }
|
||||
/**
|
||||
* nullptr check.
|
||||
* @return true if !=nullptr
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
UBool isValid() const { return ptr!=nullptr; }
|
||||
/**
|
||||
* Comparison with a simple pointer, so that existing code
|
||||
* with ==nullptr need not be changed.
|
||||
* @param other simple pointer for comparison
|
||||
* @return true if this pointer value equals other
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
bool operator==(const T *other) const { return ptr==other; }
|
||||
/**
|
||||
* Comparison with a simple pointer, so that existing code
|
||||
* with !=nullptr need not be changed.
|
||||
* @param other simple pointer for comparison
|
||||
* @return true if this pointer value differs from other
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
bool operator!=(const T *other) const { return ptr!=other; }
|
||||
/**
|
||||
* Access without ownership change.
|
||||
* @return the pointer value
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T *getAlias() const { return ptr; }
|
||||
/**
|
||||
* Access without ownership change.
|
||||
* @return the pointer value as a reference
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T &operator*() const { return *ptr; }
|
||||
/**
|
||||
* Access without ownership change.
|
||||
* @return the pointer value
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T *operator->() const { return ptr; }
|
||||
/**
|
||||
* Gives up ownership; the internal pointer becomes nullptr.
|
||||
* @return the pointer value;
|
||||
* caller becomes responsible for deleting the object
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T *orphan() {
|
||||
T *p=ptr;
|
||||
ptr=nullptr;
|
||||
return p;
|
||||
}
|
||||
/**
|
||||
* Deletes the object it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
* Subclass must override: Base class does not delete the object.
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
void adoptInstead(T *p) {
|
||||
// delete ptr;
|
||||
ptr=p;
|
||||
}
|
||||
protected:
|
||||
/**
|
||||
* Actual pointer.
|
||||
* @internal
|
||||
*/
|
||||
T *ptr;
|
||||
private:
|
||||
// No comparison operators with other LocalPointerBases.
|
||||
bool operator==(const LocalPointerBase<T> &other) = delete;
|
||||
bool operator!=(const LocalPointerBase<T> &other) = delete;
|
||||
// No ownership sharing: No copy constructor, no assignment operator.
|
||||
LocalPointerBase(const LocalPointerBase<T> &other) = delete;
|
||||
void operator=(const LocalPointerBase<T> &other) = delete;
|
||||
};
|
||||
|
||||
/**
|
||||
* "Smart pointer" class, deletes objects via the standard C++ delete operator.
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* Usage example:
|
||||
* \code
|
||||
* LocalPointer<UnicodeString> s(new UnicodeString((UChar32)0x50005));
|
||||
* int32_t length=s->length(); // 2
|
||||
* char16_t lead=s->charAt(0); // 0xd900
|
||||
* if(some condition) { return; } // no need to explicitly delete the pointer
|
||||
* s.adoptInstead(new UnicodeString((char16_t)0xfffc));
|
||||
* length=s->length(); // 1
|
||||
* // no need to explicitly delete the pointer
|
||||
* \endcode
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
template<typename T>
|
||||
class LocalPointer : public LocalPointerBase<T> {
|
||||
public:
|
||||
using LocalPointerBase<T>::operator*;
|
||||
using LocalPointerBase<T>::operator->;
|
||||
/**
|
||||
* Constructor takes ownership.
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
explicit LocalPointer(T *p=nullptr) : LocalPointerBase<T>(p) {}
|
||||
/**
|
||||
* Constructor takes ownership and reports an error if nullptr.
|
||||
*
|
||||
* This constructor is intended to be used with other-class constructors
|
||||
* that may report a failure UErrorCode,
|
||||
* so that callers need to check only for U_FAILURE(errorCode)
|
||||
* and not also separately for isNull().
|
||||
*
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
|
||||
* if p==nullptr and no other failure code had been set
|
||||
* @stable ICU 55
|
||||
*/
|
||||
LocalPointer(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) {
|
||||
if(p==nullptr && U_SUCCESS(errorCode)) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Move constructor, leaves src with isNull().
|
||||
* @param src source smart pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalPointer(LocalPointer<T> &&src) noexcept : LocalPointerBase<T>(src.ptr) {
|
||||
src.ptr=nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a LocalPointer from a C++11 std::unique_ptr.
|
||||
* The LocalPointer steals the object owned by the std::unique_ptr.
|
||||
*
|
||||
* This constructor works via move semantics. If your std::unique_ptr is
|
||||
* in a local variable, you must use std::move.
|
||||
*
|
||||
* @param p The std::unique_ptr from which the pointer will be stolen.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
explicit LocalPointer(std::unique_ptr<T> &&p)
|
||||
: LocalPointerBase<T>(p.release()) {}
|
||||
|
||||
/**
|
||||
* Destructor deletes the object it owns.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
~LocalPointer() {
|
||||
delete LocalPointerBase<T>::ptr;
|
||||
}
|
||||
/**
|
||||
* Move assignment operator, leaves src with isNull().
|
||||
* The behavior is undefined if *this and src are the same object.
|
||||
* @param src source smart pointer
|
||||
* @return *this
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalPointer<T> &operator=(LocalPointer<T> &&src) noexcept {
|
||||
delete LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=src.ptr;
|
||||
src.ptr=nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move-assign from an std::unique_ptr to this LocalPointer.
|
||||
* Steals the pointer from the std::unique_ptr.
|
||||
*
|
||||
* @param p The std::unique_ptr from which the pointer will be stolen.
|
||||
* @return *this
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocalPointer<T> &operator=(std::unique_ptr<T> &&p) noexcept {
|
||||
adoptInstead(p.release());
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap pointers.
|
||||
* @param other other smart pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
void swap(LocalPointer<T> &other) noexcept {
|
||||
T *temp=LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=other.ptr;
|
||||
other.ptr=temp;
|
||||
}
|
||||
/**
|
||||
* Non-member LocalPointer swap function.
|
||||
* @param p1 will get p2's pointer
|
||||
* @param p2 will get p1's pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
friend inline void swap(LocalPointer<T> &p1, LocalPointer<T> &p2) noexcept {
|
||||
p1.swap(p2);
|
||||
}
|
||||
/**
|
||||
* Deletes the object it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
void adoptInstead(T *p) {
|
||||
delete LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
}
|
||||
/**
|
||||
* Deletes the object it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
*
|
||||
* If U_FAILURE(errorCode), then the current object is retained and the new one deleted.
|
||||
*
|
||||
* If U_SUCCESS(errorCode) but the input pointer is nullptr,
|
||||
* then U_MEMORY_ALLOCATION_ERROR is set,
|
||||
* the current object is deleted, and nullptr is set.
|
||||
*
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
|
||||
* if p==nullptr and no other failure code had been set
|
||||
* @stable ICU 55
|
||||
*/
|
||||
void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) {
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
delete LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
if(p==nullptr) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
} else {
|
||||
delete p;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Conversion operator to a C++11 std::unique_ptr.
|
||||
* Disowns the object and gives it to the returned std::unique_ptr.
|
||||
*
|
||||
* This operator works via move semantics. If your LocalPointer is
|
||||
* in a local variable, you must use std::move.
|
||||
*
|
||||
* @return An std::unique_ptr owning the pointer previously owned by this
|
||||
* icu::LocalPointer.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
operator std::unique_ptr<T> () && {
|
||||
return std::unique_ptr<T>(LocalPointerBase<T>::orphan());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* "Smart pointer" class, deletes objects via the C++ array delete[] operator.
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
* Adds operator[] for array item access.
|
||||
*
|
||||
* Usage example:
|
||||
* \code
|
||||
* LocalArray<UnicodeString> a(new UnicodeString[2]);
|
||||
* a[0].append((char16_t)0x61);
|
||||
* if(some condition) { return; } // no need to explicitly delete the array
|
||||
* a.adoptInstead(new UnicodeString[4]);
|
||||
* a[3].append((char16_t)0x62).append((char16_t)0x63).reverse();
|
||||
* // no need to explicitly delete the array
|
||||
* \endcode
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
template<typename T>
|
||||
class LocalArray : public LocalPointerBase<T> {
|
||||
public:
|
||||
using LocalPointerBase<T>::operator*;
|
||||
using LocalPointerBase<T>::operator->;
|
||||
/**
|
||||
* Constructor takes ownership.
|
||||
* @param p simple pointer to an array of T objects that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
explicit LocalArray(T *p=nullptr) : LocalPointerBase<T>(p) {}
|
||||
/**
|
||||
* Constructor takes ownership and reports an error if nullptr.
|
||||
*
|
||||
* This constructor is intended to be used with other-class constructors
|
||||
* that may report a failure UErrorCode,
|
||||
* so that callers need to check only for U_FAILURE(errorCode)
|
||||
* and not also separately for isNull().
|
||||
*
|
||||
* @param p simple pointer to an array of T objects that is adopted
|
||||
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
|
||||
* if p==nullptr and no other failure code had been set
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalArray(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) {
|
||||
if(p==nullptr && U_SUCCESS(errorCode)) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Move constructor, leaves src with isNull().
|
||||
* @param src source smart pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalArray(LocalArray<T> &&src) noexcept : LocalPointerBase<T>(src.ptr) {
|
||||
src.ptr=nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a LocalArray from a C++11 std::unique_ptr of an array type.
|
||||
* The LocalPointer steals the array owned by the std::unique_ptr.
|
||||
*
|
||||
* This constructor works via move semantics. If your std::unique_ptr is
|
||||
* in a local variable, you must use std::move.
|
||||
*
|
||||
* @param p The std::unique_ptr from which the array will be stolen.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
explicit LocalArray(std::unique_ptr<T[]> &&p)
|
||||
: LocalPointerBase<T>(p.release()) {}
|
||||
|
||||
/**
|
||||
* Destructor deletes the array it owns.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
~LocalArray() {
|
||||
delete[] LocalPointerBase<T>::ptr;
|
||||
}
|
||||
/**
|
||||
* Move assignment operator, leaves src with isNull().
|
||||
* The behavior is undefined if *this and src are the same object.
|
||||
* @param src source smart pointer
|
||||
* @return *this
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalArray<T> &operator=(LocalArray<T> &&src) noexcept {
|
||||
delete[] LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=src.ptr;
|
||||
src.ptr=nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move-assign from an std::unique_ptr to this LocalPointer.
|
||||
* Steals the array from the std::unique_ptr.
|
||||
*
|
||||
* @param p The std::unique_ptr from which the array will be stolen.
|
||||
* @return *this
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocalArray<T> &operator=(std::unique_ptr<T[]> &&p) noexcept {
|
||||
adoptInstead(p.release());
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap pointers.
|
||||
* @param other other smart pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
void swap(LocalArray<T> &other) noexcept {
|
||||
T *temp=LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=other.ptr;
|
||||
other.ptr=temp;
|
||||
}
|
||||
/**
|
||||
* Non-member LocalArray swap function.
|
||||
* @param p1 will get p2's pointer
|
||||
* @param p2 will get p1's pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
friend inline void swap(LocalArray<T> &p1, LocalArray<T> &p2) noexcept {
|
||||
p1.swap(p2);
|
||||
}
|
||||
/**
|
||||
* Deletes the array it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
* @param p simple pointer to an array of T objects that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
void adoptInstead(T *p) {
|
||||
delete[] LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
}
|
||||
/**
|
||||
* Deletes the array it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
*
|
||||
* If U_FAILURE(errorCode), then the current array is retained and the new one deleted.
|
||||
*
|
||||
* If U_SUCCESS(errorCode) but the input pointer is nullptr,
|
||||
* then U_MEMORY_ALLOCATION_ERROR is set,
|
||||
* the current array is deleted, and nullptr is set.
|
||||
*
|
||||
* @param p simple pointer to an array of T objects that is adopted
|
||||
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
|
||||
* if p==nullptr and no other failure code had been set
|
||||
* @stable ICU 56
|
||||
*/
|
||||
void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) {
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
delete[] LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
if(p==nullptr) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
} else {
|
||||
delete[] p;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Array item access (writable).
|
||||
* No index bounds check.
|
||||
* @param i array index
|
||||
* @return reference to the array item
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
|
||||
|
||||
/**
|
||||
* Conversion operator to a C++11 std::unique_ptr.
|
||||
* Disowns the object and gives it to the returned std::unique_ptr.
|
||||
*
|
||||
* This operator works via move semantics. If your LocalPointer is
|
||||
* in a local variable, you must use std::move.
|
||||
*
|
||||
* @return An std::unique_ptr owning the pointer previously owned by this
|
||||
* icu::LocalPointer.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
operator std::unique_ptr<T[]> () && {
|
||||
return std::unique_ptr<T[]>(LocalPointerBase<T>::orphan());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \def U_DEFINE_LOCAL_OPEN_POINTER
|
||||
* "Smart pointer" definition macro, deletes objects via the closeFunction.
|
||||
* Defines a subclass of LocalPointerBase which works just
|
||||
* like LocalPointer<Type> except that this subclass will use the closeFunction
|
||||
* rather than the C++ delete operator.
|
||||
*
|
||||
* Usage example:
|
||||
* \code
|
||||
* LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode));
|
||||
* utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
|
||||
* utf8Out, (int32_t)sizeof(utf8Out),
|
||||
* utf8In, utf8InLength, &errorCode);
|
||||
* if(U_FAILURE(errorCode)) { return; } // no need to explicitly delete the UCaseMap
|
||||
* \endcode
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
|
||||
using LocalPointerClassName = internal::LocalOpenPointer<Type, closeFunction>
|
||||
|
||||
#ifndef U_IN_DOXYGEN
|
||||
namespace internal {
|
||||
/**
|
||||
* Implementation, do not use directly: use U_DEFINE_LOCAL_OPEN_POINTER.
|
||||
*
|
||||
* @see U_DEFINE_LOCAL_OPEN_POINTER
|
||||
* @internal
|
||||
*/
|
||||
template <typename Type, auto closeFunction>
|
||||
class LocalOpenPointer : public LocalPointerBase<Type> {
|
||||
using LocalPointerBase<Type>::ptr;
|
||||
public:
|
||||
using LocalPointerBase<Type>::operator*;
|
||||
using LocalPointerBase<Type>::operator->;
|
||||
explicit LocalOpenPointer(Type *p=nullptr) : LocalPointerBase<Type>(p) {}
|
||||
LocalOpenPointer(LocalOpenPointer &&src) noexcept
|
||||
: LocalPointerBase<Type>(src.ptr) {
|
||||
src.ptr=nullptr;
|
||||
}
|
||||
/* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */
|
||||
explicit LocalOpenPointer(std::unique_ptr<Type, decltype(closeFunction)> &&p)
|
||||
: LocalPointerBase<Type>(p.release()) {}
|
||||
~LocalOpenPointer() { if (ptr != nullptr) { closeFunction(ptr); } }
|
||||
LocalOpenPointer &operator=(LocalOpenPointer &&src) noexcept {
|
||||
if (ptr != nullptr) { closeFunction(ptr); }
|
||||
LocalPointerBase<Type>::ptr=src.ptr;
|
||||
src.ptr=nullptr;
|
||||
return *this;
|
||||
}
|
||||
/* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */
|
||||
LocalOpenPointer &operator=(std::unique_ptr<Type, decltype(closeFunction)> &&p) {
|
||||
adoptInstead(p.release());
|
||||
return *this;
|
||||
}
|
||||
void swap(LocalOpenPointer &other) noexcept {
|
||||
Type *temp=LocalPointerBase<Type>::ptr;
|
||||
LocalPointerBase<Type>::ptr=other.ptr;
|
||||
other.ptr=temp;
|
||||
}
|
||||
friend inline void swap(LocalOpenPointer &p1, LocalOpenPointer &p2) noexcept {
|
||||
p1.swap(p2);
|
||||
}
|
||||
void adoptInstead(Type *p) {
|
||||
if (ptr != nullptr) { closeFunction(ptr); }
|
||||
ptr=p;
|
||||
}
|
||||
operator std::unique_ptr<Type, decltype(closeFunction)> () && {
|
||||
return std::unique_ptr<Type, decltype(closeFunction)>(LocalPointerBase<Type>::orphan(), closeFunction);
|
||||
}
|
||||
};
|
||||
} // namespace internal
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
#endif /* __LOCALPOINTER_H__ */
|
||||
211
thirdparty/icu4c/common/unicode/locdspnm.h
vendored
Normal file
211
thirdparty/icu4c/common/unicode/locdspnm.h
vendored
Normal file
@@ -0,0 +1,211 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 2010-2016, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef LOCDSPNM_H
|
||||
#define LOCDSPNM_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Provides display names of Locale and its components.
|
||||
*/
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/strenum.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/uldnames.h"
|
||||
#include "unicode/udisplaycontext.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Returns display names of Locales and components of Locales. For
|
||||
* more information on language, script, region, variant, key, and
|
||||
* values, see Locale.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
class U_COMMON_API LocaleDisplayNames : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual ~LocaleDisplayNames();
|
||||
|
||||
/**
|
||||
* Convenience overload of
|
||||
* {@link #createInstance(const Locale& locale, UDialectHandling dialectHandling)}
|
||||
* that specifies STANDARD dialect handling.
|
||||
* @param locale the display locale
|
||||
* @return a LocaleDisplayNames instance
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
inline static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale);
|
||||
|
||||
/**
|
||||
* Returns an instance of LocaleDisplayNames that returns names
|
||||
* formatted for the provided locale, using the provided
|
||||
* dialectHandling.
|
||||
*
|
||||
* @param locale the display locale
|
||||
* @param dialectHandling how to select names for locales
|
||||
* @return a LocaleDisplayNames instance
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale,
|
||||
UDialectHandling dialectHandling);
|
||||
|
||||
/**
|
||||
* Returns an instance of LocaleDisplayNames that returns names formatted
|
||||
* for the provided locale, using the provided UDisplayContext settings.
|
||||
*
|
||||
* @param locale the display locale
|
||||
* @param contexts List of one or more context settings (e.g. for dialect
|
||||
* handling, capitalization, etc.
|
||||
* @param length Number of items in the contexts list
|
||||
* @return a LocaleDisplayNames instance
|
||||
* @stable ICU 51
|
||||
*/
|
||||
static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale,
|
||||
UDisplayContext *contexts, int32_t length);
|
||||
|
||||
// getters for state
|
||||
/**
|
||||
* Returns the locale used to determine the display names. This is
|
||||
* not necessarily the same locale passed to {@link #createInstance}.
|
||||
* @return the display locale
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual const Locale& getLocale() const = 0;
|
||||
|
||||
/**
|
||||
* Returns the dialect handling used in the display names.
|
||||
* @return the dialect handling enum
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UDialectHandling getDialectHandling() const = 0;
|
||||
|
||||
/**
|
||||
* Returns the UDisplayContext value for the specified UDisplayContextType.
|
||||
* @param type the UDisplayContextType whose value to return
|
||||
* @return the UDisplayContext for the specified type.
|
||||
* @stable ICU 51
|
||||
*/
|
||||
virtual UDisplayContext getContext(UDisplayContextType type) const = 0;
|
||||
|
||||
// names for entire locales
|
||||
/**
|
||||
* Returns the display name of the provided locale.
|
||||
* @param locale the locale whose display name to return
|
||||
* @param result receives the locale's display name
|
||||
* @return the display name of the provided locale
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString& localeDisplayName(const Locale& locale,
|
||||
UnicodeString& result) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided locale id.
|
||||
* @param localeId the id of the locale whose display name to return
|
||||
* @param result receives the locale's display name
|
||||
* @return the display name of the provided locale
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString& localeDisplayName(const char* localeId,
|
||||
UnicodeString& result) const = 0;
|
||||
|
||||
// names for components of a locale id
|
||||
/**
|
||||
* Returns the display name of the provided language code.
|
||||
* @param lang the language code
|
||||
* @param result receives the language code's display name
|
||||
* @return the display name of the provided language code
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString& languageDisplayName(const char* lang,
|
||||
UnicodeString& result) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided script code.
|
||||
* @param script the script code
|
||||
* @param result receives the script code's display name
|
||||
* @return the display name of the provided script code
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString& scriptDisplayName(const char* script,
|
||||
UnicodeString& result) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided script code.
|
||||
* @param scriptCode the script code number
|
||||
* @param result receives the script code's display name
|
||||
* @return the display name of the provided script code
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode,
|
||||
UnicodeString& result) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided region code.
|
||||
* @param region the region code
|
||||
* @param result receives the region code's display name
|
||||
* @return the display name of the provided region code
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString& regionDisplayName(const char* region,
|
||||
UnicodeString& result) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided variant.
|
||||
* @param variant the variant string
|
||||
* @param result receives the variant's display name
|
||||
* @return the display name of the provided variant
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString& variantDisplayName(const char* variant,
|
||||
UnicodeString& result) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided locale key.
|
||||
* @param key the locale key name
|
||||
* @param result receives the locale key's display name
|
||||
* @return the display name of the provided locale key
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString& keyDisplayName(const char* key,
|
||||
UnicodeString& result) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided value (used with the provided key).
|
||||
* @param key the locale key name
|
||||
* @param value the locale key's value
|
||||
* @param result receives the value's display name
|
||||
* @return the display name of the provided value
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString& keyValueDisplayName(const char* key, const char* value,
|
||||
UnicodeString& result) const = 0;
|
||||
};
|
||||
|
||||
inline LocaleDisplayNames* LocaleDisplayNames::createInstance(const Locale& locale) {
|
||||
return LocaleDisplayNames::createInstance(locale, ULDN_STANDARD_NAMES);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
1297
thirdparty/icu4c/common/unicode/locid.h
vendored
Normal file
1297
thirdparty/icu4c/common/unicode/locid.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
950
thirdparty/icu4c/common/unicode/messagepattern.h
vendored
Normal file
950
thirdparty/icu4c/common/unicode/messagepattern.h
vendored
Normal file
@@ -0,0 +1,950 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2011-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: messagepattern.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2011mar14
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __MESSAGEPATTERN_H__
|
||||
#define __MESSAGEPATTERN_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
/**
|
||||
* Mode for when an apostrophe starts quoted literal text for MessageFormat output.
|
||||
* The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
|
||||
* (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
|
||||
* <p>
|
||||
* A pair of adjacent apostrophes always results in a single apostrophe in the output,
|
||||
* even when the pair is between two single, text-quoting apostrophes.
|
||||
* <p>
|
||||
* The following table shows examples of desired MessageFormat.format() output
|
||||
* with the pattern strings that yield that output.
|
||||
* <p>
|
||||
* <table>
|
||||
* <tr>
|
||||
* <th>Desired output</th>
|
||||
* <th>DOUBLE_OPTIONAL</th>
|
||||
* <th>DOUBLE_REQUIRED</th>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>I see {many}</td>
|
||||
* <td>I see '{many}'</td>
|
||||
* <td>(same)</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>I said {'Wow!'}</td>
|
||||
* <td>I said '{''Wow!''}'</td>
|
||||
* <td>(same)</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>I don't know</td>
|
||||
* <td>I don't know OR<br> I don''t know</td>
|
||||
* <td>I don''t know</td>
|
||||
* </tr>
|
||||
* </table>
|
||||
* @stable ICU 4.8
|
||||
* @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
|
||||
*/
|
||||
enum UMessagePatternApostropheMode {
|
||||
/**
|
||||
* A literal apostrophe is represented by
|
||||
* either a single or a double apostrophe pattern character.
|
||||
* Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
|
||||
* if it immediately precedes a curly brace {},
|
||||
* or a pipe symbol | if inside a choice format,
|
||||
* or a pound symbol # if inside a plural format.
|
||||
* <p>
|
||||
* This is the default behavior starting with ICU 4.8.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_APOS_DOUBLE_OPTIONAL,
|
||||
/**
|
||||
* A literal apostrophe must be represented by
|
||||
* a double apostrophe pattern character.
|
||||
* A single apostrophe always starts quoted literal text.
|
||||
* <p>
|
||||
* This is the behavior of ICU 4.6 and earlier, and of the JDK.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_APOS_DOUBLE_REQUIRED
|
||||
};
|
||||
/**
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
|
||||
|
||||
/**
|
||||
* MessagePattern::Part type constants.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
enum UMessagePatternPartType {
|
||||
/**
|
||||
* Start of a message pattern (main or nested).
|
||||
* The length is 0 for the top-level message
|
||||
* and for a choice argument sub-message, otherwise 1 for the '{'.
|
||||
* The value indicates the nesting level, starting with 0 for the main message.
|
||||
* <p>
|
||||
* There is always a later MSG_LIMIT part.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_MSG_START,
|
||||
/**
|
||||
* End of a message pattern (main or nested).
|
||||
* The length is 0 for the top-level message and
|
||||
* the last sub-message of a choice argument,
|
||||
* otherwise 1 for the '}' or (in a choice argument style) the '|'.
|
||||
* The value indicates the nesting level, starting with 0 for the main message.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_MSG_LIMIT,
|
||||
/**
|
||||
* Indicates a substring of the pattern string which is to be skipped when formatting.
|
||||
* For example, an apostrophe that begins or ends quoted text
|
||||
* would be indicated with such a part.
|
||||
* The value is undefined and currently always 0.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_SKIP_SYNTAX,
|
||||
/**
|
||||
* Indicates that a syntax character needs to be inserted for auto-quoting.
|
||||
* The length is 0.
|
||||
* The value is the character code of the insertion character. (U+0027=APOSTROPHE)
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_INSERT_CHAR,
|
||||
/**
|
||||
* Indicates a syntactic (non-escaped) # symbol in a plural variant.
|
||||
* When formatting, replace this part's substring with the
|
||||
* (value-offset) for the plural argument value.
|
||||
* The value is undefined and currently always 0.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_REPLACE_NUMBER,
|
||||
/**
|
||||
* Start of an argument.
|
||||
* The length is 1 for the '{'.
|
||||
* The value is the ordinal value of the ArgType. Use getArgType().
|
||||
* <p>
|
||||
* This part is followed by either an ARG_NUMBER or ARG_NAME,
|
||||
* followed by optional argument sub-parts (see UMessagePatternArgType constants)
|
||||
* and finally an ARG_LIMIT part.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_START,
|
||||
/**
|
||||
* End of an argument.
|
||||
* The length is 1 for the '}'.
|
||||
* The value is the ordinal value of the ArgType. Use getArgType().
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_LIMIT,
|
||||
/**
|
||||
* The argument number, provided by the value.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_NUMBER,
|
||||
/**
|
||||
* The argument name.
|
||||
* The value is undefined and currently always 0.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_NAME,
|
||||
/**
|
||||
* The argument type.
|
||||
* The value is undefined and currently always 0.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_TYPE,
|
||||
/**
|
||||
* The argument style text.
|
||||
* The value is undefined and currently always 0.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_STYLE,
|
||||
/**
|
||||
* A selector substring in a "complex" argument style.
|
||||
* The value is undefined and currently always 0.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_SELECTOR,
|
||||
/**
|
||||
* An integer value, for example the offset or an explicit selector value
|
||||
* in a PluralFormat style.
|
||||
* The part value is the integer value.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_INT,
|
||||
/**
|
||||
* A numeric value, for example the offset or an explicit selector value
|
||||
* in a PluralFormat style.
|
||||
* The part value is an index into an internal array of numeric values;
|
||||
* use getNumericValue().
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_DOUBLE
|
||||
};
|
||||
/**
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
typedef enum UMessagePatternPartType UMessagePatternPartType;
|
||||
|
||||
/**
|
||||
* Argument type constants.
|
||||
* Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
|
||||
*
|
||||
* Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
|
||||
* with a nesting level one greater than the surrounding message.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
enum UMessagePatternArgType {
|
||||
/**
|
||||
* The argument has no specified type.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_ARG_TYPE_NONE,
|
||||
/**
|
||||
* The argument has a "simple" type which is provided by the ARG_TYPE part.
|
||||
* An ARG_STYLE part might follow that.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_ARG_TYPE_SIMPLE,
|
||||
/**
|
||||
* The argument is a ChoiceFormat with one or more
|
||||
* ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_ARG_TYPE_CHOICE,
|
||||
/**
|
||||
* The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
|
||||
* (e.g., offset:1)
|
||||
* and one or more (ARG_SELECTOR [explicit-value] message) tuples.
|
||||
* If the selector has an explicit value (e.g., =2), then
|
||||
* that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
|
||||
* Otherwise the message immediately follows the ARG_SELECTOR.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_ARG_TYPE_PLURAL,
|
||||
/**
|
||||
* The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_ARG_TYPE_SELECT,
|
||||
/**
|
||||
* The argument is an ordinal-number PluralFormat
|
||||
* with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
|
||||
* @stable ICU 50
|
||||
*/
|
||||
UMSGPAT_ARG_TYPE_SELECTORDINAL
|
||||
};
|
||||
/**
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
typedef enum UMessagePatternArgType UMessagePatternArgType;
|
||||
|
||||
/**
|
||||
* \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
|
||||
* Returns true if the argument type has a plural style part sequence and semantics,
|
||||
* for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
|
||||
* @stable ICU 50
|
||||
*/
|
||||
#define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
|
||||
((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
|
||||
|
||||
enum {
|
||||
/**
|
||||
* Return value from MessagePattern.validateArgumentName() for when
|
||||
* the string is a valid "pattern identifier" but not a number.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
|
||||
|
||||
/**
|
||||
* Return value from MessagePattern.validateArgumentName() for when
|
||||
* the string is invalid.
|
||||
* It might not be a valid "pattern identifier",
|
||||
* or it have only ASCII digits but there is a leading zero or the number is too large.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMSGPAT_ARG_NAME_NOT_VALID=-2
|
||||
};
|
||||
|
||||
/**
|
||||
* Special value that is returned by getNumericValue(Part) when no
|
||||
* numeric value is defined for a part.
|
||||
* @see MessagePattern.getNumericValue()
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class MessagePatternDoubleList;
|
||||
class MessagePatternPartsList;
|
||||
|
||||
/**
|
||||
* Parses and represents ICU MessageFormat patterns.
|
||||
* Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
|
||||
* Used in the implementations of those classes as well as in tools
|
||||
* for message validation, translation and format conversion.
|
||||
* <p>
|
||||
* The parser handles all syntax relevant for identifying message arguments.
|
||||
* This includes "complex" arguments whose style strings contain
|
||||
* nested MessageFormat pattern substrings.
|
||||
* For "simple" arguments (with no nested MessageFormat pattern substrings),
|
||||
* the argument style is not parsed any further.
|
||||
* <p>
|
||||
* The parser handles named and numbered message arguments and allows both in one message.
|
||||
* <p>
|
||||
* Once a pattern has been parsed successfully, iterate through the parsed data
|
||||
* with countParts(), getPart() and related methods.
|
||||
* <p>
|
||||
* The data logically represents a parse tree, but is stored and accessed
|
||||
* as a list of "parts" for fast and simple parsing and to minimize object allocations.
|
||||
* Arguments and nested messages are best handled via recursion.
|
||||
* For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
|
||||
* the index of the corresponding _LIMIT "part".
|
||||
* <p>
|
||||
* List of "parts":
|
||||
* <pre>
|
||||
* message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
|
||||
* argument = noneArg | simpleArg | complexArg
|
||||
* complexArg = choiceArg | pluralArg | selectArg
|
||||
*
|
||||
* noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
|
||||
* simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
|
||||
* choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
|
||||
* pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
|
||||
* selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
|
||||
*
|
||||
* choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
|
||||
* pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
|
||||
* selectStyle = (ARG_SELECTOR message)+
|
||||
* </pre>
|
||||
* <ul>
|
||||
* <li>Literal output text is not represented directly by "parts" but accessed
|
||||
* between parts of a message, from one part's getLimit() to the next part's getIndex().
|
||||
* <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
|
||||
* <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
|
||||
* the less-than-or-equal-to sign (U+2264).
|
||||
* <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
|
||||
* The optional numeric Part between each (ARG_SELECTOR, message) pair
|
||||
* is the value of an explicit-number selector like "=2",
|
||||
* otherwise the selector is a non-numeric identifier.
|
||||
* <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
|
||||
* </ul>
|
||||
* <p>
|
||||
* This class is not intended for public subclassing.
|
||||
*
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
class U_COMMON_API MessagePattern : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
MessagePattern(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Constructs an empty MessagePattern.
|
||||
* @param mode Explicit UMessagePatternApostropheMode.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Constructs a MessagePattern with default UMessagePatternApostropheMode and
|
||||
* parses the MessageFormat pattern string.
|
||||
* @param pattern a MessageFormat pattern string
|
||||
* @param parseError Struct to receive information on the position
|
||||
* of an error within the pattern.
|
||||
* Can be nullptr.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* TODO: turn @throws into UErrorCode specifics?
|
||||
* @throws IllegalArgumentException for syntax errors in the pattern string
|
||||
* @throws IndexOutOfBoundsException if certain limits are exceeded
|
||||
* (e.g., argument number too high, argument name too long, etc.)
|
||||
* @throws NumberFormatException if a number could not be parsed
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @param other Object to copy.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
MessagePattern(const MessagePattern &other);
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
* @param other Object to copy.
|
||||
* @return *this=other
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
MessagePattern &operator=(const MessagePattern &other);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
virtual ~MessagePattern();
|
||||
|
||||
/**
|
||||
* Parses a MessageFormat pattern string.
|
||||
* @param pattern a MessageFormat pattern string
|
||||
* @param parseError Struct to receive information on the position
|
||||
* of an error within the pattern.
|
||||
* Can be nullptr.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return *this
|
||||
* @throws IllegalArgumentException for syntax errors in the pattern string
|
||||
* @throws IndexOutOfBoundsException if certain limits are exceeded
|
||||
* (e.g., argument number too high, argument name too long, etc.)
|
||||
* @throws NumberFormatException if a number could not be parsed
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
MessagePattern &parse(const UnicodeString &pattern,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Parses a ChoiceFormat pattern string.
|
||||
* @param pattern a ChoiceFormat pattern string
|
||||
* @param parseError Struct to receive information on the position
|
||||
* of an error within the pattern.
|
||||
* Can be nullptr.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return *this
|
||||
* @throws IllegalArgumentException for syntax errors in the pattern string
|
||||
* @throws IndexOutOfBoundsException if certain limits are exceeded
|
||||
* (e.g., argument number too high, argument name too long, etc.)
|
||||
* @throws NumberFormatException if a number could not be parsed
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Parses a PluralFormat pattern string.
|
||||
* @param pattern a PluralFormat pattern string
|
||||
* @param parseError Struct to receive information on the position
|
||||
* of an error within the pattern.
|
||||
* Can be nullptr.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return *this
|
||||
* @throws IllegalArgumentException for syntax errors in the pattern string
|
||||
* @throws IndexOutOfBoundsException if certain limits are exceeded
|
||||
* (e.g., argument number too high, argument name too long, etc.)
|
||||
* @throws NumberFormatException if a number could not be parsed
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
MessagePattern &parsePluralStyle(const UnicodeString &pattern,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Parses a SelectFormat pattern string.
|
||||
* @param pattern a SelectFormat pattern string
|
||||
* @param parseError Struct to receive information on the position
|
||||
* of an error within the pattern.
|
||||
* Can be nullptr.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return *this
|
||||
* @throws IllegalArgumentException for syntax errors in the pattern string
|
||||
* @throws IndexOutOfBoundsException if certain limits are exceeded
|
||||
* (e.g., argument number too high, argument name too long, etc.)
|
||||
* @throws NumberFormatException if a number could not be parsed
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
MessagePattern &parseSelectStyle(const UnicodeString &pattern,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Clears this MessagePattern.
|
||||
* countParts() will return 0.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
void clear();
|
||||
|
||||
/**
|
||||
* Clears this MessagePattern and sets the UMessagePatternApostropheMode.
|
||||
* countParts() will return 0.
|
||||
* @param mode The new UMessagePatternApostropheMode.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
|
||||
clear();
|
||||
aposMode=mode;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param other another object to compare with.
|
||||
* @return true if this object is equivalent to the other one.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
bool operator==(const MessagePattern &other) const;
|
||||
|
||||
/**
|
||||
* @param other another object to compare with.
|
||||
* @return false if this object is equivalent to the other one.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
inline bool operator!=(const MessagePattern &other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return A hash code for this object.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
int32_t hashCode() const;
|
||||
|
||||
/**
|
||||
* @return this instance's UMessagePatternApostropheMode.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMessagePatternApostropheMode getApostropheMode() const {
|
||||
return aposMode;
|
||||
}
|
||||
|
||||
// Java has package-private jdkAposMode() here.
|
||||
// In C++, this is declared in the MessageImpl class.
|
||||
|
||||
/**
|
||||
* @return the parsed pattern string (null if none was parsed).
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
const UnicodeString &getPatternString() const {
|
||||
return msg;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the parsed pattern have named arguments like {first_name}?
|
||||
* @return true if the parsed pattern has at least one named argument.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UBool hasNamedArguments() const {
|
||||
return hasArgNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the parsed pattern have numbered arguments like {2}?
|
||||
* @return true if the parsed pattern has at least one numbered argument.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UBool hasNumberedArguments() const {
|
||||
return hasArgNumbers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates and parses an argument name or argument number string.
|
||||
* An argument name must be a "pattern identifier", that is, it must contain
|
||||
* no Unicode Pattern_Syntax or Pattern_White_Space characters.
|
||||
* If it only contains ASCII digits, then it must be a small integer with no leading zero.
|
||||
* @param name Input string.
|
||||
* @return >=0 if the name is a valid number,
|
||||
* ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
|
||||
* ARG_NAME_NOT_VALID (-2) if it is neither.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
static int32_t validateArgumentName(const UnicodeString &name);
|
||||
|
||||
/**
|
||||
* Returns a version of the parsed pattern string where each ASCII apostrophe
|
||||
* is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
|
||||
* <p>
|
||||
* For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
|
||||
* into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
|
||||
* @return the deep-auto-quoted version of the parsed pattern string.
|
||||
* @see MessageFormat.autoQuoteApostrophe()
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UnicodeString autoQuoteApostropheDeep() const;
|
||||
|
||||
class Part;
|
||||
|
||||
/**
|
||||
* Returns the number of "parts" created by parsing the pattern string.
|
||||
* Returns 0 if no pattern has been parsed or clear() was called.
|
||||
* @return the number of pattern parts.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
int32_t countParts() const {
|
||||
return partsLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the i-th pattern "part".
|
||||
* @param i The index of the Part data. (0..countParts()-1)
|
||||
* @return the i-th pattern "part".
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
const Part &getPart(int32_t i) const {
|
||||
return parts[i];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the UMessagePatternPartType of the i-th pattern "part".
|
||||
* Convenience method for getPart(i).getType().
|
||||
* @param i The index of the Part data. (0..countParts()-1)
|
||||
* @return The UMessagePatternPartType of the i-th Part.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMessagePatternPartType getPartType(int32_t i) const {
|
||||
return getPart(i).type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the pattern index of the specified pattern "part".
|
||||
* Convenience method for getPart(partIndex).getIndex().
|
||||
* @param partIndex The index of the Part data. (0..countParts()-1)
|
||||
* @return The pattern index of this Part.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
int32_t getPatternIndex(int32_t partIndex) const {
|
||||
return getPart(partIndex).index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the substring of the pattern string indicated by the Part.
|
||||
* Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
|
||||
* @param part a part of this MessagePattern.
|
||||
* @return the substring associated with part.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UnicodeString getSubstring(const Part &part) const {
|
||||
return msg.tempSubString(part.index, part.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares the part's substring with the input string s.
|
||||
* @param part a part of this MessagePattern.
|
||||
* @param s a string.
|
||||
* @return true if getSubstring(part).equals(s).
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
|
||||
return 0==msg.compare(part.index, part.length, s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
|
||||
* @param part a part of this MessagePattern.
|
||||
* @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
double getNumericValue(const Part &part) const;
|
||||
|
||||
/**
|
||||
* Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
|
||||
* @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
|
||||
* @return the "offset:" value.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
double getPluralOffset(int32_t pluralStart) const;
|
||||
|
||||
/**
|
||||
* Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
|
||||
* @param start The index of some Part data (0..countParts()-1);
|
||||
* this Part should be of Type ARG_START or MSG_START.
|
||||
* @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
|
||||
* or start itself if getPartType(msgStart)!=ARG|MSG_START.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
int32_t getLimitPartIndex(int32_t start) const {
|
||||
int32_t limit=getPart(start).limitPartIndex;
|
||||
if(limit<start) {
|
||||
return start;
|
||||
}
|
||||
return limit;
|
||||
}
|
||||
|
||||
/**
|
||||
* A message pattern "part", representing a pattern parsing event.
|
||||
* There is a part for the start and end of a message or argument,
|
||||
* for quoting and escaping of and with ASCII apostrophes,
|
||||
* and for syntax elements of "complex" arguments.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
class Part : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Default constructor, do not use.
|
||||
* @internal
|
||||
*/
|
||||
Part() {}
|
||||
|
||||
/**
|
||||
* Returns the type of this part.
|
||||
* @return the part type.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMessagePatternPartType getType() const {
|
||||
return type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the pattern string index associated with this Part.
|
||||
* @return this part's pattern string index.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
int32_t getIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the length of the pattern substring associated with this Part.
|
||||
* This is 0 for some parts.
|
||||
* @return this part's pattern substring length.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
int32_t getLength() const {
|
||||
return length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the pattern string limit (exclusive-end) index associated with this Part.
|
||||
* Convenience method for getIndex()+getLength().
|
||||
* @return this part's pattern string limit index, same as getIndex()+getLength().
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
int32_t getLimit() const {
|
||||
return index+length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a value associated with this part.
|
||||
* See the documentation of each part type for details.
|
||||
* @return the part value.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
int32_t getValue() const {
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
|
||||
* otherwise UMSGPAT_ARG_TYPE_NONE.
|
||||
* @return the argument type for this part.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UMessagePatternArgType getArgType() const {
|
||||
UMessagePatternPartType msgType=getType();
|
||||
if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) {
|
||||
return static_cast<UMessagePatternArgType>(value);
|
||||
} else {
|
||||
return UMSGPAT_ARG_TYPE_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates whether the Part type has a numeric value.
|
||||
* If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
|
||||
* @param type The Part type to be tested.
|
||||
* @return true if the Part type has a numeric value.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
static UBool hasNumericValue(UMessagePatternPartType type) {
|
||||
return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param other another object to compare with.
|
||||
* @return true if this object is equivalent to the other one.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
bool operator==(const Part &other) const;
|
||||
|
||||
/**
|
||||
* @param other another object to compare with.
|
||||
* @return false if this object is equivalent to the other one.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
inline bool operator!=(const Part &other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return A hash code for this object.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
int32_t hashCode() const {
|
||||
return ((type*37+index)*37+length)*37+value;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class MessagePattern;
|
||||
|
||||
static const int32_t MAX_LENGTH=0xffff;
|
||||
static const int32_t MAX_VALUE=0x7fff;
|
||||
static const int32_t MAX_NESTED_LEVELS=0x03ff;
|
||||
|
||||
// Some fields are not final because they are modified during pattern parsing.
|
||||
// After pattern parsing, the parts are effectively immutable.
|
||||
UMessagePatternPartType type;
|
||||
int32_t index;
|
||||
uint16_t length;
|
||||
int16_t value;
|
||||
int32_t limitPartIndex;
|
||||
};
|
||||
|
||||
private:
|
||||
void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
void postParse();
|
||||
|
||||
int32_t parseMessage(int32_t index, int32_t msgStartLength,
|
||||
int32_t nestingLevel, UMessagePatternArgType parentType,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Validates and parses an argument name or argument number string.
|
||||
* This internal method assumes that the input substring is a "pattern identifier".
|
||||
* @return >=0 if the name is a valid number,
|
||||
* ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
|
||||
* ARG_NAME_NOT_VALID (-2) if it is neither.
|
||||
* @see #validateArgumentName(String)
|
||||
*/
|
||||
static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
|
||||
|
||||
int32_t parseArgNumber(int32_t start, int32_t limit) {
|
||||
return parseArgNumber(msg, start, limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a number from the specified message substring.
|
||||
* @param start start index into the message string
|
||||
* @param limit limit index into the message string, must be start<limit
|
||||
* @param allowInfinity true if U+221E is allowed (for ChoiceFormat)
|
||||
* @param parseError
|
||||
* @param errorCode
|
||||
*/
|
||||
void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
// Java has package-private appendReducedApostrophes() here.
|
||||
// In C++, this is declared in the MessageImpl class.
|
||||
|
||||
int32_t skipWhiteSpace(int32_t index);
|
||||
|
||||
int32_t skipIdentifier(int32_t index);
|
||||
|
||||
/**
|
||||
* Skips a sequence of characters that could occur in a double value.
|
||||
* Does not fully parse or validate the value.
|
||||
*/
|
||||
int32_t skipDouble(int32_t index);
|
||||
|
||||
static UBool isArgTypeChar(UChar32 c);
|
||||
|
||||
UBool isChoice(int32_t index);
|
||||
|
||||
UBool isPlural(int32_t index);
|
||||
|
||||
UBool isSelect(int32_t index);
|
||||
|
||||
UBool isOrdinal(int32_t index);
|
||||
|
||||
/**
|
||||
* @return true if we are inside a MessageFormat (sub-)pattern,
|
||||
* as opposed to inside a top-level choice/plural/select pattern.
|
||||
*/
|
||||
UBool inMessageFormatPattern(int32_t nestingLevel);
|
||||
|
||||
/**
|
||||
* @return true if we are in a MessageFormat sub-pattern
|
||||
* of a top-level ChoiceFormat pattern.
|
||||
*/
|
||||
UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
|
||||
|
||||
void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
|
||||
int32_t value, UErrorCode &errorCode);
|
||||
|
||||
void addLimitPart(int32_t start,
|
||||
UMessagePatternPartType type, int32_t index, int32_t length,
|
||||
int32_t value, UErrorCode &errorCode);
|
||||
|
||||
void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
|
||||
|
||||
void setParseError(UParseError *parseError, int32_t index);
|
||||
|
||||
UBool init(UErrorCode &errorCode);
|
||||
UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
|
||||
|
||||
UMessagePatternApostropheMode aposMode;
|
||||
UnicodeString msg;
|
||||
// ArrayList<Part> parts=new ArrayList<Part>();
|
||||
MessagePatternPartsList *partsList;
|
||||
Part *parts;
|
||||
int32_t partsLength;
|
||||
// ArrayList<Double> numericValues;
|
||||
MessagePatternDoubleList *numericValuesList;
|
||||
double *numericValues;
|
||||
int32_t numericValuesLength;
|
||||
UBool hasArgNames;
|
||||
UBool hasArgNumbers;
|
||||
UBool needsAutoQuoting;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // !UCONFIG_NO_FORMATTING
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __MESSAGEPATTERN_H__
|
||||
791
thirdparty/icu4c/common/unicode/normalizer2.h
vendored
Normal file
791
thirdparty/icu4c/common/unicode/normalizer2.h
vendored
Normal file
@@ -0,0 +1,791 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: normalizer2.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2009nov22
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __NORMALIZER2_H__
|
||||
#define __NORMALIZER2_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: New API for Unicode Normalization.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/unorm2.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class ByteSink;
|
||||
|
||||
/**
|
||||
* Unicode normalization functionality for standard Unicode normalization or
|
||||
* for using custom mapping tables.
|
||||
* All instances of this class are unmodifiable/immutable.
|
||||
* Instances returned by getInstance() are singletons that must not be deleted by the caller.
|
||||
* The Normalizer2 class is not intended for public subclassing.
|
||||
*
|
||||
* The primary functions are to produce a normalized string and to detect whether
|
||||
* a string is already normalized.
|
||||
* The most commonly used normalization forms are those defined in
|
||||
* http://www.unicode.org/unicode/reports/tr15/
|
||||
* However, this API supports additional normalization forms for specialized purposes.
|
||||
* For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
|
||||
* and can be used in implementations of UTS #46.
|
||||
*
|
||||
* Not only are the standard compose and decompose modes supplied,
|
||||
* but additional modes are provided as documented in the Mode enum.
|
||||
*
|
||||
* Some of the functions in this class identify normalization boundaries.
|
||||
* At a normalization boundary, the portions of the string
|
||||
* before it and starting from it do not interact and can be handled independently.
|
||||
*
|
||||
* The spanQuickCheckYes() stops at a normalization boundary.
|
||||
* When the goal is a normalized string, then the text before the boundary
|
||||
* can be copied, and the remainder can be processed with normalizeSecondAndAppend().
|
||||
*
|
||||
* The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
|
||||
* a character is guaranteed to be at a normalization boundary,
|
||||
* regardless of context.
|
||||
* This is used for moving from one normalization boundary to the next
|
||||
* or preceding boundary, and for performing iterative normalization.
|
||||
*
|
||||
* Iterative normalization is useful when only a small portion of a
|
||||
* longer string needs to be processed.
|
||||
* For example, in ICU, iterative normalization is used by the NormalizationTransliterator
|
||||
* (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
|
||||
* (to process only the substring for which sort key bytes are computed).
|
||||
*
|
||||
* The set of normalization boundaries returned by these functions may not be
|
||||
* complete: There may be more boundaries that could be returned.
|
||||
* Different functions may return different boundaries.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
class U_COMMON_API Normalizer2 : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
~Normalizer2();
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFC normalization.
|
||||
* Same as getInstance(nullptr, "nfc", UNORM2_COMPOSE, errorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @stable ICU 49
|
||||
*/
|
||||
static const Normalizer2 *
|
||||
getNFCInstance(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFD normalization.
|
||||
* Same as getInstance(nullptr, "nfc", UNORM2_DECOMPOSE, errorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @stable ICU 49
|
||||
*/
|
||||
static const Normalizer2 *
|
||||
getNFDInstance(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFKC normalization.
|
||||
* Same as getInstance(nullptr, "nfkc", UNORM2_COMPOSE, errorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @stable ICU 49
|
||||
*/
|
||||
static const Normalizer2 *
|
||||
getNFKCInstance(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFKD normalization.
|
||||
* Same as getInstance(nullptr, "nfkc", UNORM2_DECOMPOSE, errorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @stable ICU 49
|
||||
*/
|
||||
static const Normalizer2 *
|
||||
getNFKDInstance(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode toNFKC_Casefold() normalization
|
||||
* which is equivalent to applying the NFKC_Casefold mappings and then NFC.
|
||||
* See https://www.unicode.org/reports/tr44/#NFKC_Casefold
|
||||
*
|
||||
* Same as getInstance(nullptr, "nfkc_cf", UNORM2_COMPOSE, errorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @stable ICU 49
|
||||
*/
|
||||
static const Normalizer2 *
|
||||
getNFKCCasefoldInstance(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization
|
||||
* which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC.
|
||||
* See https://www.unicode.org/reports/tr44/#NFKC_Simple_Casefold
|
||||
*
|
||||
* Same as getInstance(nullptr, "nfkc_scf", UNORM2_COMPOSE, errorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @stable ICU 74
|
||||
*/
|
||||
static const Normalizer2 *
|
||||
getNFKCSimpleCasefoldInstance(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance which uses the specified data file
|
||||
* (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
|
||||
* and which composes or decomposes text according to the specified mode.
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
*
|
||||
* Use packageName=nullptr for data files that are part of ICU's own data.
|
||||
* Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
|
||||
* Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
|
||||
* Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
|
||||
*
|
||||
* @param packageName nullptr for ICU built-in data, otherwise application data package name
|
||||
* @param name "nfc" or "nfkc" or "nfkc_cf" or "nfkc_scf" or name of custom data file
|
||||
* @param mode normalization mode (compose or decompose etc.)
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
static const Normalizer2 *
|
||||
getInstance(const char *packageName,
|
||||
const char *name,
|
||||
UNormalization2Mode mode,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns the normalized form of the source string.
|
||||
* @param src source string
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return normalized src
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
UnicodeString
|
||||
normalize(const UnicodeString &src, UErrorCode &errorCode) const {
|
||||
UnicodeString result;
|
||||
normalize(src, result, errorCode);
|
||||
return result;
|
||||
}
|
||||
/**
|
||||
* Writes the normalized form of the source string to the destination string
|
||||
* (replacing its contents) and returns the destination string.
|
||||
* The source and destination strings must be different objects.
|
||||
* @param src source string
|
||||
* @param dest destination string; its contents is replaced with normalized src
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return dest
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString &
|
||||
normalize(const UnicodeString &src,
|
||||
UnicodeString &dest,
|
||||
UErrorCode &errorCode) const = 0;
|
||||
|
||||
/**
|
||||
* Normalizes a UTF-8 string and optionally records how source substrings
|
||||
* relate to changed and unchanged result substrings.
|
||||
*
|
||||
* Implemented completely for all built-in modes except for FCD.
|
||||
* The base class implementation converts to & from UTF-16 and does not support edits.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src Source UTF-8 string.
|
||||
* @param sink A ByteSink to which the normalized UTF-8 result string is written.
|
||||
* sink.Flush() is called at the end.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @stable ICU 60
|
||||
*/
|
||||
virtual void
|
||||
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
|
||||
Edits *edits, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Appends the normalized form of the second string to the first string
|
||||
* (merging them at the boundary) and returns the first string.
|
||||
* The result is normalized if the first string was normalized.
|
||||
* The first and second strings must be different objects.
|
||||
* @param first string, should be normalized
|
||||
* @param second string, will be normalized
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return first
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString &
|
||||
normalizeSecondAndAppend(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const = 0;
|
||||
/**
|
||||
* Appends the second string to the first string
|
||||
* (merging them at the boundary) and returns the first string.
|
||||
* The result is normalized if both the strings were normalized.
|
||||
* The first and second strings must be different objects.
|
||||
* @param first string, should be normalized
|
||||
* @param second string, should be normalized
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return first
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString &
|
||||
append(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const = 0;
|
||||
|
||||
/**
|
||||
* Gets the decomposition mapping of c.
|
||||
* Roughly equivalent to normalizing the String form of c
|
||||
* on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
|
||||
* returns false and does not write a string
|
||||
* if c does not have a decomposition mapping in this instance's data.
|
||||
* This function is independent of the mode of the Normalizer2.
|
||||
* @param c code point
|
||||
* @param decomposition String object which will be set to c's
|
||||
* decomposition mapping, if there is one.
|
||||
* @return true if c has a decomposition, otherwise false
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
virtual UBool
|
||||
getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
|
||||
|
||||
/**
|
||||
* Gets the raw decomposition mapping of c.
|
||||
*
|
||||
* This is similar to the getDecomposition() method but returns the
|
||||
* raw decomposition mapping as specified in UnicodeData.txt or
|
||||
* (for custom data) in the mapping files processed by the gennorm2 tool.
|
||||
* By contrast, getDecomposition() returns the processed,
|
||||
* recursively-decomposed version of this mapping.
|
||||
*
|
||||
* When used on a standard NFKC Normalizer2 instance,
|
||||
* getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
|
||||
*
|
||||
* When used on a standard NFC Normalizer2 instance,
|
||||
* it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
|
||||
* in this case, the result contains either one or two code points (=1..4 char16_ts).
|
||||
*
|
||||
* This function is independent of the mode of the Normalizer2.
|
||||
* The default implementation returns false.
|
||||
* @param c code point
|
||||
* @param decomposition String object which will be set to c's
|
||||
* raw decomposition mapping, if there is one.
|
||||
* @return true if c has a decomposition, otherwise false
|
||||
* @stable ICU 49
|
||||
*/
|
||||
virtual UBool
|
||||
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
|
||||
|
||||
/**
|
||||
* Performs pairwise composition of a & b and returns the composite if there is one.
|
||||
*
|
||||
* Returns a composite code point c only if c has a two-way mapping to a+b.
|
||||
* In standard Unicode normalization, this means that
|
||||
* c has a canonical decomposition to a+b
|
||||
* and c does not have the Full_Composition_Exclusion property.
|
||||
*
|
||||
* This function is independent of the mode of the Normalizer2.
|
||||
* The default implementation returns a negative value.
|
||||
* @param a A (normalization starter) code point.
|
||||
* @param b Another code point.
|
||||
* @return The non-negative composite code point if there is one; otherwise a negative value.
|
||||
* @stable ICU 49
|
||||
*/
|
||||
virtual UChar32
|
||||
composePair(UChar32 a, UChar32 b) const;
|
||||
|
||||
/**
|
||||
* Gets the combining class of c.
|
||||
* The default implementation returns 0
|
||||
* but all standard implementations return the Unicode Canonical_Combining_Class value.
|
||||
* @param c code point
|
||||
* @return c's combining class
|
||||
* @stable ICU 49
|
||||
*/
|
||||
virtual uint8_t
|
||||
getCombiningClass(UChar32 c) const;
|
||||
|
||||
/**
|
||||
* Tests if the string is normalized.
|
||||
* Internally, in cases where the quickCheck() method would return "maybe"
|
||||
* (which is only possible for the two COMPOSE modes) this method
|
||||
* resolves to "yes" or "no" to provide a definitive result,
|
||||
* at the cost of doing more work in those cases.
|
||||
* @param s input string
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return true if s is normalized
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UBool
|
||||
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
|
||||
/**
|
||||
* Tests if the UTF-8 string is normalized.
|
||||
* Internally, in cases where the quickCheck() method would return "maybe"
|
||||
* (which is only possible for the two COMPOSE modes) this method
|
||||
* resolves to "yes" or "no" to provide a definitive result,
|
||||
* at the cost of doing more work in those cases.
|
||||
*
|
||||
* This works for all normalization modes.
|
||||
* It is optimized for UTF-8 for all built-in modes except for FCD.
|
||||
* The base class implementation converts to UTF-16 and calls isNormalized().
|
||||
*
|
||||
* @param s UTF-8 input string
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return true if s is normalized
|
||||
* @stable ICU 60
|
||||
*/
|
||||
virtual UBool
|
||||
isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
|
||||
|
||||
|
||||
/**
|
||||
* Tests if the string is normalized.
|
||||
* For the two COMPOSE modes, the result could be "maybe" in cases that
|
||||
* would take a little more work to resolve definitively.
|
||||
* Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
|
||||
* combination of quick check + normalization, to avoid
|
||||
* re-checking the "yes" prefix.
|
||||
* @param s input string
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return UNormalizationCheckResult
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UNormalizationCheckResult
|
||||
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the end of the normalized substring of the input string.
|
||||
* In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
|
||||
* the substring <code>UnicodeString(s, 0, end)</code>
|
||||
* will pass the quick check with a "yes" result.
|
||||
*
|
||||
* The returned end index is usually one or more characters before the
|
||||
* "no" or "maybe" character: The end index is at a normalization boundary.
|
||||
* (See the class documentation for more about normalization boundaries.)
|
||||
*
|
||||
* When the goal is a normalized string and most input strings are expected
|
||||
* to be normalized already, then call this method,
|
||||
* and if it returns a prefix shorter than the input string,
|
||||
* copy that prefix and use normalizeSecondAndAppend() for the remainder.
|
||||
* @param s input string
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return "yes" span end index
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual int32_t
|
||||
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
|
||||
|
||||
/**
|
||||
* Tests if the character always has a normalization boundary before it,
|
||||
* regardless of context.
|
||||
* If true, then the character does not normalization-interact with
|
||||
* preceding characters.
|
||||
* In other words, a string containing this character can be normalized
|
||||
* by processing portions before this character and starting from this
|
||||
* character independently.
|
||||
* This is used for iterative normalization. See the class documentation for details.
|
||||
* @param c character to test
|
||||
* @return true if c has a normalization boundary before it
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
|
||||
|
||||
/**
|
||||
* Tests if the character always has a normalization boundary after it,
|
||||
* regardless of context.
|
||||
* If true, then the character does not normalization-interact with
|
||||
* following characters.
|
||||
* In other words, a string containing this character can be normalized
|
||||
* by processing portions up to this character and after this
|
||||
* character independently.
|
||||
* This is used for iterative normalization. See the class documentation for details.
|
||||
* Note that this operation may be significantly slower than hasBoundaryBefore().
|
||||
* @param c character to test
|
||||
* @return true if c has a normalization boundary after it
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
|
||||
|
||||
/**
|
||||
* Tests if the character is normalization-inert.
|
||||
* If true, then the character does not change, nor normalization-interact with
|
||||
* preceding or following characters.
|
||||
* In other words, a string containing this character can be normalized
|
||||
* by processing portions before this character and after this
|
||||
* character independently.
|
||||
* This is used for iterative normalization. See the class documentation for details.
|
||||
* Note that this operation may be significantly slower than hasBoundaryBefore().
|
||||
* @param c character to test
|
||||
* @return true if c is normalization-inert
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UBool isInert(UChar32 c) const = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* Normalization filtered by a UnicodeSet.
|
||||
* Normalizes portions of the text contained in the filter set and leaves
|
||||
* portions not contained in the filter set unchanged.
|
||||
* Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
|
||||
* Not-in-the-filter text is treated as "is normalized" and "quick check yes".
|
||||
* This class implements all of (and only) the Normalizer2 API.
|
||||
* An instance of this class is unmodifiable/immutable but is constructed and
|
||||
* must be destructed by the owner.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
|
||||
public:
|
||||
/**
|
||||
* Constructs a filtered normalizer wrapping any Normalizer2 instance
|
||||
* and a filter set.
|
||||
* Both are aliased and must not be modified or deleted while this object
|
||||
* is used.
|
||||
* The filter set should be frozen; otherwise the performance will suffer greatly.
|
||||
* @param n2 wrapped Normalizer2 instance
|
||||
* @param filterSet UnicodeSet which determines the characters to be normalized
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
|
||||
norm2(n2), set(filterSet) {}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
~FilteredNormalizer2();
|
||||
|
||||
/**
|
||||
* Writes the normalized form of the source string to the destination string
|
||||
* (replacing its contents) and returns the destination string.
|
||||
* The source and destination strings must be different objects.
|
||||
* @param src source string
|
||||
* @param dest destination string; its contents is replaced with normalized src
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return dest
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString &
|
||||
normalize(const UnicodeString &src,
|
||||
UnicodeString &dest,
|
||||
UErrorCode &errorCode) const override;
|
||||
|
||||
/**
|
||||
* Normalizes a UTF-8 string and optionally records how source substrings
|
||||
* relate to changed and unchanged result substrings.
|
||||
*
|
||||
* Implemented completely for most built-in modes except for FCD.
|
||||
* The base class implementation converts to & from UTF-16 and does not support edits.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src Source UTF-8 string.
|
||||
* @param sink A ByteSink to which the normalized UTF-8 result string is written.
|
||||
* sink.Flush() is called at the end.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* The Edits contents is undefined if any error occurs.
|
||||
* This function calls edits->reset() first unless
|
||||
* options includes U_EDITS_NO_RESET. edits can be nullptr.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @stable ICU 60
|
||||
*/
|
||||
virtual void
|
||||
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
|
||||
Edits *edits, UErrorCode &errorCode) const override;
|
||||
|
||||
/**
|
||||
* Appends the normalized form of the second string to the first string
|
||||
* (merging them at the boundary) and returns the first string.
|
||||
* The result is normalized if the first string was normalized.
|
||||
* The first and second strings must be different objects.
|
||||
* @param first string, should be normalized
|
||||
* @param second string, will be normalized
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return first
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString &
|
||||
normalizeSecondAndAppend(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const override;
|
||||
/**
|
||||
* Appends the second string to the first string
|
||||
* (merging them at the boundary) and returns the first string.
|
||||
* The result is normalized if both the strings were normalized.
|
||||
* The first and second strings must be different objects.
|
||||
* @param first string, should be normalized
|
||||
* @param second string, should be normalized
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return first
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UnicodeString &
|
||||
append(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const override;
|
||||
|
||||
/**
|
||||
* Gets the decomposition mapping of c.
|
||||
* For details see the base class documentation.
|
||||
*
|
||||
* This function is independent of the mode of the Normalizer2.
|
||||
* @param c code point
|
||||
* @param decomposition String object which will be set to c's
|
||||
* decomposition mapping, if there is one.
|
||||
* @return true if c has a decomposition, otherwise false
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
virtual UBool
|
||||
getDecomposition(UChar32 c, UnicodeString &decomposition) const override;
|
||||
|
||||
/**
|
||||
* Gets the raw decomposition mapping of c.
|
||||
* For details see the base class documentation.
|
||||
*
|
||||
* This function is independent of the mode of the Normalizer2.
|
||||
* @param c code point
|
||||
* @param decomposition String object which will be set to c's
|
||||
* raw decomposition mapping, if there is one.
|
||||
* @return true if c has a decomposition, otherwise false
|
||||
* @stable ICU 49
|
||||
*/
|
||||
virtual UBool
|
||||
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const override;
|
||||
|
||||
/**
|
||||
* Performs pairwise composition of a & b and returns the composite if there is one.
|
||||
* For details see the base class documentation.
|
||||
*
|
||||
* This function is independent of the mode of the Normalizer2.
|
||||
* @param a A (normalization starter) code point.
|
||||
* @param b Another code point.
|
||||
* @return The non-negative composite code point if there is one; otherwise a negative value.
|
||||
* @stable ICU 49
|
||||
*/
|
||||
virtual UChar32
|
||||
composePair(UChar32 a, UChar32 b) const override;
|
||||
|
||||
/**
|
||||
* Gets the combining class of c.
|
||||
* The default implementation returns 0
|
||||
* but all standard implementations return the Unicode Canonical_Combining_Class value.
|
||||
* @param c code point
|
||||
* @return c's combining class
|
||||
* @stable ICU 49
|
||||
*/
|
||||
virtual uint8_t
|
||||
getCombiningClass(UChar32 c) const override;
|
||||
|
||||
/**
|
||||
* Tests if the string is normalized.
|
||||
* For details see the Normalizer2 base class documentation.
|
||||
* @param s input string
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return true if s is normalized
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UBool
|
||||
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override;
|
||||
/**
|
||||
* Tests if the UTF-8 string is normalized.
|
||||
* Internally, in cases where the quickCheck() method would return "maybe"
|
||||
* (which is only possible for the two COMPOSE modes) this method
|
||||
* resolves to "yes" or "no" to provide a definitive result,
|
||||
* at the cost of doing more work in those cases.
|
||||
*
|
||||
* This works for all normalization modes.
|
||||
* It is optimized for UTF-8 for all built-in modes except for FCD.
|
||||
* The base class implementation converts to UTF-16 and calls isNormalized().
|
||||
*
|
||||
* @param s UTF-8 input string
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return true if s is normalized
|
||||
* @stable ICU 60
|
||||
*/
|
||||
virtual UBool
|
||||
isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const override;
|
||||
/**
|
||||
* Tests if the string is normalized.
|
||||
* For details see the Normalizer2 base class documentation.
|
||||
* @param s input string
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return UNormalizationCheckResult
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UNormalizationCheckResult
|
||||
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override;
|
||||
/**
|
||||
* Returns the end of the normalized substring of the input string.
|
||||
* For details see the Normalizer2 base class documentation.
|
||||
* @param s input string
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return "yes" span end index
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual int32_t
|
||||
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const override;
|
||||
|
||||
/**
|
||||
* Tests if the character always has a normalization boundary before it,
|
||||
* regardless of context.
|
||||
* For details see the Normalizer2 base class documentation.
|
||||
* @param c character to test
|
||||
* @return true if c has a normalization boundary before it
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const override;
|
||||
|
||||
/**
|
||||
* Tests if the character always has a normalization boundary after it,
|
||||
* regardless of context.
|
||||
* For details see the Normalizer2 base class documentation.
|
||||
* @param c character to test
|
||||
* @return true if c has a normalization boundary after it
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const override;
|
||||
|
||||
/**
|
||||
* Tests if the character is normalization-inert.
|
||||
* For details see the Normalizer2 base class documentation.
|
||||
* @param c character to test
|
||||
* @return true if c is normalization-inert
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
virtual UBool isInert(UChar32 c) const override;
|
||||
private:
|
||||
UnicodeString &
|
||||
normalize(const UnicodeString &src,
|
||||
UnicodeString &dest,
|
||||
USetSpanCondition spanCondition,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
void
|
||||
normalizeUTF8(uint32_t options, const char *src, int32_t length,
|
||||
ByteSink &sink, Edits *edits,
|
||||
USetSpanCondition spanCondition,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
UnicodeString &
|
||||
normalizeSecondAndAppend(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UBool doNormalize,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
const Normalizer2 &norm2;
|
||||
const UnicodeSet &set;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __NORMALIZER2_H__
|
||||
816
thirdparty/icu4c/common/unicode/normlzr.h
vendored
Normal file
816
thirdparty/icu4c/common/unicode/normlzr.h
vendored
Normal file
@@ -0,0 +1,816 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1996-2015, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef NORMLZR_H
|
||||
#define NORMLZR_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Unicode Normalization
|
||||
*/
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
/**
|
||||
* Old Unicode normalization API.
|
||||
*
|
||||
* This API has been replaced by the Normalizer2 class and is only available
|
||||
* for backward compatibility. This class simply delegates to the Normalizer2 class.
|
||||
* There is one exception: The new API does not provide a replacement for Normalizer::compare().
|
||||
*
|
||||
* The Normalizer class supports the standard normalization forms described in
|
||||
* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
|
||||
* Unicode Standard Annex #15: Unicode Normalization Forms</a>.
|
||||
*
|
||||
* The Normalizer class consists of two parts:
|
||||
* - static functions that normalize strings or test if strings are normalized
|
||||
* - a Normalizer object is an iterator that takes any kind of text and
|
||||
* provides iteration over its normalized form
|
||||
*
|
||||
* The Normalizer class is not suitable for subclassing.
|
||||
*
|
||||
* For basic information about normalization forms and details about the C API
|
||||
* please see the documentation in unorm.h.
|
||||
*
|
||||
* The iterator API with the Normalizer constructors and the non-static functions
|
||||
* use a CharacterIterator as input. It is possible to pass a string which
|
||||
* is then internally wrapped in a CharacterIterator.
|
||||
* The input text is not normalized all at once, but incrementally where needed
|
||||
* (providing efficient random access).
|
||||
* This allows to pass in a large text but spend only a small amount of time
|
||||
* normalizing a small part of that text.
|
||||
* However, if the entire text is normalized, then the iterator will be
|
||||
* slower than normalizing the entire text at once and iterating over the result.
|
||||
* A possible use of the Normalizer iterator is also to report an index into the
|
||||
* original text that is close to where the normalized characters come from.
|
||||
*
|
||||
* <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0.
|
||||
* The earlier implementation reported the getIndex() inconsistently,
|
||||
* and previous() could not be used after setIndex(), next(), first(), and current().
|
||||
*
|
||||
* Normalizer allows to start normalizing from anywhere in the input text by
|
||||
* calling setIndexOnly(), first(), or last().
|
||||
* Without calling any of these, the iterator will start at the beginning of the text.
|
||||
*
|
||||
* At any time, next() returns the next normalized code point (UChar32),
|
||||
* with post-increment semantics (like CharacterIterator::next32PostInc()).
|
||||
* previous() returns the previous normalized code point (UChar32),
|
||||
* with pre-decrement semantics (like CharacterIterator::previous32()).
|
||||
*
|
||||
* current() returns the current code point
|
||||
* (respectively the one at the newly set index) without moving
|
||||
* the getIndex(). Note that if the text at the current position
|
||||
* needs to be normalized, then these functions will do that.
|
||||
* (This is why current() is not const.)
|
||||
* It is more efficient to call setIndexOnly() instead, which does not
|
||||
* normalize.
|
||||
*
|
||||
* getIndex() always refers to the position in the input text where the normalized
|
||||
* code points are returned from. It does not always change with each returned
|
||||
* code point.
|
||||
* The code point that is returned from any of the functions
|
||||
* corresponds to text at or after getIndex(), according to the
|
||||
* function's iteration semantics (post-increment or pre-decrement).
|
||||
*
|
||||
* next() returns a code point from at or after the getIndex()
|
||||
* from before the next() call. After the next() call, the getIndex()
|
||||
* might have moved to where the next code point will be returned from
|
||||
* (from a next() or current() call).
|
||||
* This is semantically equivalent to array access with array[index++]
|
||||
* (post-increment semantics).
|
||||
*
|
||||
* previous() returns a code point from at or after the getIndex()
|
||||
* from after the previous() call.
|
||||
* This is semantically equivalent to array access with array[--index]
|
||||
* (pre-decrement semantics).
|
||||
*
|
||||
* Internally, the Normalizer iterator normalizes a small piece of text
|
||||
* starting at the getIndex() and ending at a following "safe" index.
|
||||
* The normalized results is stored in an internal string buffer, and
|
||||
* the code points are iterated from there.
|
||||
* With multiple iteration calls, this is repeated until the next piece
|
||||
* of text needs to be normalized, and the getIndex() needs to be moved.
|
||||
*
|
||||
* The following "safe" index, the internal buffer, and the secondary
|
||||
* iteration index into that buffer are not exposed on the API.
|
||||
* This also means that it is currently not practical to return to
|
||||
* a particular, arbitrary position in the text because one would need to
|
||||
* know, and be able to set, in addition to the getIndex(), at least also the
|
||||
* current index into the internal buffer.
|
||||
* It is currently only possible to observe when getIndex() changes
|
||||
* (with careful consideration of the iteration semantics),
|
||||
* at which time the internal index will be 0.
|
||||
* For example, if getIndex() is different after next() than before it,
|
||||
* then the internal index is 0 and one can return to this getIndex()
|
||||
* later with setIndexOnly().
|
||||
*
|
||||
* Note: While the setIndex() and getIndex() refer to indices in the
|
||||
* underlying Unicode input text, the next() and previous() methods
|
||||
* iterate through characters in the normalized output.
|
||||
* This means that there is not necessarily a one-to-one correspondence
|
||||
* between characters returned by next() and previous() and the indices
|
||||
* passed to and returned from setIndex() and getIndex().
|
||||
* It is for this reason that Normalizer does not implement the CharacterIterator interface.
|
||||
*
|
||||
* @author Laura Werner, Mark Davis, Markus Scherer
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API Normalizer : public UObject {
|
||||
public:
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* If DONE is returned from an iteration function that returns a code point,
|
||||
* then there are no more normalization results available.
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
enum {
|
||||
DONE=0xffff
|
||||
};
|
||||
|
||||
// Constructors
|
||||
|
||||
/**
|
||||
* Creates a new <code>Normalizer</code> object for iterating over the
|
||||
* normalized form of a given string.
|
||||
* <p>
|
||||
* @param str The string to be normalized. The normalization
|
||||
* will start at the beginning of the string.
|
||||
*
|
||||
* @param mode The normalization mode.
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
Normalizer(const UnicodeString& str, UNormalizationMode mode);
|
||||
|
||||
/**
|
||||
* Creates a new <code>Normalizer</code> object for iterating over the
|
||||
* normalized form of a given string.
|
||||
* <p>
|
||||
* @param str The string to be normalized. The normalization
|
||||
* will start at the beginning of the string.
|
||||
*
|
||||
* @param length Length of the string, or -1 if NUL-terminated.
|
||||
* @param mode The normalization mode.
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode);
|
||||
|
||||
/**
|
||||
* Creates a new <code>Normalizer</code> object for iterating over the
|
||||
* normalized form of the given text.
|
||||
* <p>
|
||||
* @param iter The input text to be normalized. The normalization
|
||||
* will start at the beginning of the string.
|
||||
*
|
||||
* @param mode The normalization mode.
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#ifndef U_FORCE_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @param copy The object to be copied.
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
Normalizer(const Normalizer& copy);
|
||||
|
||||
/**
|
||||
* Destructor
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
virtual ~Normalizer();
|
||||
#endif // U_FORCE_HIDE_DEPRECATED_API
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Static utility methods
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Normalizes a <code>UnicodeString</code> according to the specified normalization mode.
|
||||
* This is a wrapper for unorm_normalize(), using UnicodeString's.
|
||||
*
|
||||
* The <code>options</code> parameter specifies which optional
|
||||
* <code>Normalizer</code> features are to be enabled for this operation.
|
||||
*
|
||||
* @param source the input string to be normalized.
|
||||
* @param mode the normalization mode
|
||||
* @param options the optional features to be enabled (0 for no options)
|
||||
* @param result The normalized string (on output).
|
||||
* @param status The error code.
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
static void U_EXPORT2 normalize(const UnicodeString& source,
|
||||
UNormalizationMode mode, int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Compose a <code>UnicodeString</code>.
|
||||
* This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
|
||||
* This is a wrapper for unorm_normalize(), using UnicodeString's.
|
||||
*
|
||||
* The <code>options</code> parameter specifies which optional
|
||||
* <code>Normalizer</code> features are to be enabled for this operation.
|
||||
*
|
||||
* @param source the string to be composed.
|
||||
* @param compat Perform compatibility decomposition before composition.
|
||||
* If this argument is <code>false</code>, only canonical
|
||||
* decomposition will be performed.
|
||||
* @param options the optional features to be enabled (0 for no options)
|
||||
* @param result The composed string (on output).
|
||||
* @param status The error code.
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
static void U_EXPORT2 compose(const UnicodeString& source,
|
||||
UBool compat, int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Static method to decompose a <code>UnicodeString</code>.
|
||||
* This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
|
||||
* This is a wrapper for unorm_normalize(), using UnicodeString's.
|
||||
*
|
||||
* The <code>options</code> parameter specifies which optional
|
||||
* <code>Normalizer</code> features are to be enabled for this operation.
|
||||
*
|
||||
* @param source the string to be decomposed.
|
||||
* @param compat Perform compatibility decomposition.
|
||||
* If this argument is <code>false</code>, only canonical
|
||||
* decomposition will be performed.
|
||||
* @param options the optional features to be enabled (0 for no options)
|
||||
* @param result The decomposed string (on output).
|
||||
* @param status The error code.
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
static void U_EXPORT2 decompose(const UnicodeString& source,
|
||||
UBool compat, int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Performing quick check on a string, to quickly determine if the string is
|
||||
* in a particular normalization format.
|
||||
* This is a wrapper for unorm_quickCheck(), using a UnicodeString.
|
||||
*
|
||||
* Three types of result can be returned UNORM_YES, UNORM_NO or
|
||||
* UNORM_MAYBE. Result UNORM_YES indicates that the argument
|
||||
* string is in the desired normalized format, UNORM_NO determines that
|
||||
* argument string is not in the desired normalized format. A
|
||||
* UNORM_MAYBE result indicates that a more thorough check is required,
|
||||
* the user may have to put the string in its normalized form and compare the
|
||||
* results.
|
||||
* @param source string for determining if it is in a normalized format
|
||||
* @param mode normalization format
|
||||
* @param status A reference to a UErrorCode to receive any errors
|
||||
* @return UNORM_YES, UNORM_NO or UNORM_MAYBE
|
||||
*
|
||||
* @see isNormalized
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
static inline UNormalizationCheckResult
|
||||
quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Performing quick check on a string; same as the other version of quickCheck
|
||||
* but takes an extra options parameter like most normalization functions.
|
||||
*
|
||||
* @param source string for determining if it is in a normalized format
|
||||
* @param mode normalization format
|
||||
* @param options the optional features to be enabled (0 for no options)
|
||||
* @param status A reference to a UErrorCode to receive any errors
|
||||
* @return UNORM_YES, UNORM_NO or UNORM_MAYBE
|
||||
*
|
||||
* @see isNormalized
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
static UNormalizationCheckResult
|
||||
quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Test if a string is in a given normalization form.
|
||||
* This is semantically equivalent to source.equals(normalize(source, mode)) .
|
||||
*
|
||||
* Unlike unorm_quickCheck(), this function returns a definitive result,
|
||||
* never a "maybe".
|
||||
* For NFD, NFKD, and FCD, both functions work exactly the same.
|
||||
* For NFC and NFKC where quickCheck may return "maybe", this function will
|
||||
* perform further tests to arrive at a true/false result.
|
||||
*
|
||||
* @param src String that is to be tested if it is in a normalization format.
|
||||
* @param mode Which normalization form to test for.
|
||||
* @param errorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return Boolean value indicating whether the source string is in the
|
||||
* "mode" normalization form.
|
||||
*
|
||||
* @see quickCheck
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
static inline UBool
|
||||
isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Test if a string is in a given normalization form; same as the other version of isNormalized
|
||||
* but takes an extra options parameter like most normalization functions.
|
||||
*
|
||||
* @param src String that is to be tested if it is in a normalization format.
|
||||
* @param mode Which normalization form to test for.
|
||||
* @param options the optional features to be enabled (0 for no options)
|
||||
* @param errorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return Boolean value indicating whether the source string is in the
|
||||
* "mode" normalization form.
|
||||
*
|
||||
* @see quickCheck
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
static UBool
|
||||
isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Concatenate normalized strings, making sure that the result is normalized as well.
|
||||
*
|
||||
* If both the left and the right strings are in
|
||||
* the normalization form according to "mode/options",
|
||||
* then the result will be
|
||||
*
|
||||
* \code
|
||||
* dest=normalize(left+right, mode, options)
|
||||
* \endcode
|
||||
*
|
||||
* For details see unorm_concatenate in unorm.h.
|
||||
*
|
||||
* @param left Left source string.
|
||||
* @param right Right source string.
|
||||
* @param result The output string.
|
||||
* @param mode The normalization mode.
|
||||
* @param options A bit set of normalization options.
|
||||
* @param errorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return result
|
||||
*
|
||||
* @see unorm_concatenate
|
||||
* @see normalize
|
||||
* @see unorm_next
|
||||
* @see unorm_previous
|
||||
*
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
static UnicodeString &
|
||||
U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
|
||||
UnicodeString &result,
|
||||
UNormalizationMode mode, int32_t options,
|
||||
UErrorCode &errorCode);
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Compare two strings for canonical equivalence.
|
||||
* Further options include case-insensitive comparison and
|
||||
* code point order (as opposed to code unit order).
|
||||
*
|
||||
* Canonical equivalence between two strings is defined as their normalized
|
||||
* forms (NFD or NFC) being identical.
|
||||
* This function compares strings incrementally instead of normalizing
|
||||
* (and optionally case-folding) both strings entirely,
|
||||
* improving performance significantly.
|
||||
*
|
||||
* Bulk normalization is only necessary if the strings do not fulfill the FCD
|
||||
* conditions. Only in this case, and only if the strings are relatively long,
|
||||
* is memory allocated temporarily.
|
||||
* For FCD strings and short non-FCD strings there is no memory allocation.
|
||||
*
|
||||
* Semantically, this is equivalent to
|
||||
* strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
|
||||
* where code point order and foldCase are all optional.
|
||||
*
|
||||
* UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
|
||||
* the case folding must be performed first, then the normalization.
|
||||
*
|
||||
* @param s1 First source string.
|
||||
* @param s2 Second source string.
|
||||
*
|
||||
* @param options A bit set of options:
|
||||
* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
|
||||
* Case-sensitive comparison in code unit order, and the input strings
|
||||
* are quick-checked for FCD.
|
||||
*
|
||||
* - UNORM_INPUT_IS_FCD
|
||||
* Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
|
||||
* If not set, the function will quickCheck for FCD
|
||||
* and normalize if necessary.
|
||||
*
|
||||
* - U_COMPARE_CODE_POINT_ORDER
|
||||
* Set to choose code point order instead of code unit order
|
||||
* (see u_strCompare for details).
|
||||
*
|
||||
* - U_COMPARE_IGNORE_CASE
|
||||
* Set to compare strings case-insensitively using case folding,
|
||||
* instead of case-sensitively.
|
||||
* If set, then the following case folding options are used.
|
||||
*
|
||||
* - Options as used with case-insensitive comparisons, currently:
|
||||
*
|
||||
* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
* (see u_strCaseCompare for details)
|
||||
*
|
||||
* - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
|
||||
*
|
||||
* @param errorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return <0 or 0 or >0 as usual for string comparisons
|
||||
*
|
||||
* @see unorm_compare
|
||||
* @see normalize
|
||||
* @see UNORM_FCD
|
||||
* @see u_strCompare
|
||||
* @see u_strCaseCompare
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
static inline int32_t
|
||||
compare(const UnicodeString &s1, const UnicodeString &s2,
|
||||
uint32_t options,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
//-------------------------------------------------------------------------
|
||||
// Iteration API
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Return the current character in the normalized text.
|
||||
* current() may need to normalize some text at getIndex().
|
||||
* The getIndex() is not changed.
|
||||
*
|
||||
* @return the current normalized code point
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
UChar32 current();
|
||||
|
||||
/**
|
||||
* Return the first character in the normalized text.
|
||||
* This is equivalent to setIndexOnly(startIndex()) followed by next().
|
||||
* (Post-increment semantics.)
|
||||
*
|
||||
* @return the first normalized code point
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
UChar32 first();
|
||||
|
||||
/**
|
||||
* Return the last character in the normalized text.
|
||||
* This is equivalent to setIndexOnly(endIndex()) followed by previous().
|
||||
* (Pre-decrement semantics.)
|
||||
*
|
||||
* @return the last normalized code point
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
UChar32 last();
|
||||
|
||||
/**
|
||||
* Return the next character in the normalized text.
|
||||
* (Post-increment semantics.)
|
||||
* If the end of the text has already been reached, DONE is returned.
|
||||
* The DONE value could be confused with a U+FFFF non-character code point
|
||||
* in the text. If this is possible, you can test getIndex()<endIndex()
|
||||
* before calling next(), or (getIndex()<endIndex() || last()!=DONE)
|
||||
* after calling next(). (Calling last() will change the iterator state!)
|
||||
*
|
||||
* The C API unorm_next() is more efficient and does not have this ambiguity.
|
||||
*
|
||||
* @return the next normalized code point
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
UChar32 next();
|
||||
|
||||
/**
|
||||
* Return the previous character in the normalized text and decrement.
|
||||
* (Pre-decrement semantics.)
|
||||
* If the beginning of the text has already been reached, DONE is returned.
|
||||
* The DONE value could be confused with a U+FFFF non-character code point
|
||||
* in the text. If this is possible, you can test
|
||||
* (getIndex()>startIndex() || first()!=DONE). (Calling first() will change
|
||||
* the iterator state!)
|
||||
*
|
||||
* The C API unorm_previous() is more efficient and does not have this ambiguity.
|
||||
*
|
||||
* @return the previous normalized code point
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
UChar32 previous();
|
||||
|
||||
/**
|
||||
* Set the iteration position in the input text that is being normalized,
|
||||
* without any immediate normalization.
|
||||
* After setIndexOnly(), getIndex() will return the same index that is
|
||||
* specified here.
|
||||
*
|
||||
* @param index the desired index in the input text.
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
void setIndexOnly(int32_t index);
|
||||
|
||||
/**
|
||||
* Reset the index to the beginning of the text.
|
||||
* This is equivalent to setIndexOnly(startIndex)).
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
void reset();
|
||||
|
||||
/**
|
||||
* Retrieve the current iteration position in the input text that is
|
||||
* being normalized.
|
||||
*
|
||||
* A following call to next() will return a normalized code point from
|
||||
* the input text at or after this index.
|
||||
*
|
||||
* After a call to previous(), getIndex() will point at or before the
|
||||
* position in the input text where the normalized code point
|
||||
* was returned from with previous().
|
||||
*
|
||||
* @return the current index in the input text
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
int32_t getIndex() const;
|
||||
|
||||
/**
|
||||
* Retrieve the index of the start of the input text. This is the begin index
|
||||
* of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string
|
||||
* over which this <code>Normalizer</code> is iterating.
|
||||
*
|
||||
* @return the smallest index in the input text where the Normalizer operates
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
int32_t startIndex() const;
|
||||
|
||||
/**
|
||||
* Retrieve the index of the end of the input text. This is the end index
|
||||
* of the <code>CharacterIterator</code> or the length of the string
|
||||
* over which this <code>Normalizer</code> is iterating.
|
||||
* This end index is exclusive, i.e., the Normalizer operates only on characters
|
||||
* before this index.
|
||||
*
|
||||
* @return the first index in the input text where the Normalizer does not operate
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
int32_t endIndex() const;
|
||||
|
||||
/**
|
||||
* Returns true when both iterators refer to the same character in the same
|
||||
* input text.
|
||||
*
|
||||
* @param that a Normalizer object to compare this one to
|
||||
* @return comparison result
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
bool operator==(const Normalizer& that) const;
|
||||
|
||||
/**
|
||||
* Returns false when both iterators refer to the same character in the same
|
||||
* input text.
|
||||
*
|
||||
* @param that a Normalizer object to compare this one to
|
||||
* @return comparison result
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
inline bool operator!=(const Normalizer& that) const;
|
||||
|
||||
/**
|
||||
* Returns a pointer to a new Normalizer that is a clone of this one.
|
||||
* The caller is responsible for deleting the new clone.
|
||||
* @return a pointer to a new Normalizer
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
Normalizer* clone() const;
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator.
|
||||
*
|
||||
* @return the hash code
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
int32_t hashCode() const;
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Property access methods
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Set the normalization mode for this object.
|
||||
* <p>
|
||||
* <b>Note:</b>If the normalization mode is changed while iterating
|
||||
* over a string, calls to {@link #next() } and {@link #previous() } may
|
||||
* return previously buffers characters in the old normalization mode
|
||||
* until the iteration is able to re-sync at the next base character.
|
||||
* It is safest to call {@link #setIndexOnly }, {@link #reset() },
|
||||
* {@link #setText }, {@link #first() },
|
||||
* {@link #last() }, etc. after calling <code>setMode</code>.
|
||||
* <p>
|
||||
* @param newMode the new mode for this <code>Normalizer</code>.
|
||||
* @see #getUMode
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
void setMode(UNormalizationMode newMode);
|
||||
|
||||
/**
|
||||
* Return the normalization mode for this object.
|
||||
*
|
||||
* This is an unusual name because there used to be a getMode() that
|
||||
* returned a different type.
|
||||
*
|
||||
* @return the mode for this <code>Normalizer</code>
|
||||
* @see #setMode
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
UNormalizationMode getUMode() const;
|
||||
|
||||
/**
|
||||
* Set options that affect this <code>Normalizer</code>'s operation.
|
||||
* Options do not change the basic composition or decomposition operation
|
||||
* that is being performed, but they control whether
|
||||
* certain optional portions of the operation are done.
|
||||
* Currently the only available option is obsolete.
|
||||
*
|
||||
* It is possible to specify multiple options that are all turned on or off.
|
||||
*
|
||||
* @param option the option(s) whose value is/are to be set.
|
||||
* @param value the new setting for the option. Use <code>true</code> to
|
||||
* turn the option(s) on and <code>false</code> to turn it/them off.
|
||||
*
|
||||
* @see #getOption
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
void setOption(int32_t option,
|
||||
UBool value);
|
||||
|
||||
/**
|
||||
* Determine whether an option is turned on or off.
|
||||
* If multiple options are specified, then the result is true if any
|
||||
* of them are set.
|
||||
* <p>
|
||||
* @param option the option(s) that are to be checked
|
||||
* @return true if any of the option(s) are set
|
||||
* @see #setOption
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
UBool getOption(int32_t option) const;
|
||||
|
||||
/**
|
||||
* Set the input text over which this <code>Normalizer</code> will iterate.
|
||||
* The iteration position is set to the beginning.
|
||||
*
|
||||
* @param newText a string that replaces the current input text
|
||||
* @param status a UErrorCode
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
void setText(const UnicodeString& newText,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Set the input text over which this <code>Normalizer</code> will iterate.
|
||||
* The iteration position is set to the beginning.
|
||||
*
|
||||
* @param newText a CharacterIterator object that replaces the current input text
|
||||
* @param status a UErrorCode
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
void setText(const CharacterIterator& newText,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Set the input text over which this <code>Normalizer</code> will iterate.
|
||||
* The iteration position is set to the beginning.
|
||||
*
|
||||
* @param newText a string that replaces the current input text
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @param status a UErrorCode
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
void setText(ConstChar16Ptr newText,
|
||||
int32_t length,
|
||||
UErrorCode &status);
|
||||
/**
|
||||
* Copies the input text into the UnicodeString argument.
|
||||
*
|
||||
* @param result Receives a copy of the text under iteration.
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
void getText(UnicodeString& result);
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for this class.
|
||||
* @returns a UClassID for this class.
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#ifndef U_FORCE_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
||||
* @return a UClassID for the actual class.
|
||||
* @deprecated ICU 56 Use Normalizer2 instead.
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const override;
|
||||
#endif // U_FORCE_HIDE_DEPRECATED_API
|
||||
|
||||
private:
|
||||
//-------------------------------------------------------------------------
|
||||
// Private functions
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
Normalizer() = delete; // default constructor not implemented
|
||||
Normalizer &operator=(const Normalizer &that) = delete; // assignment operator not implemented
|
||||
|
||||
// Private utility methods for iteration
|
||||
// For documentation, see the source code
|
||||
UBool nextNormalize();
|
||||
UBool previousNormalize();
|
||||
|
||||
void init();
|
||||
void clearBuffer();
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Private data
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
FilteredNormalizer2*fFilteredNorm2; // owned if not nullptr
|
||||
const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
|
||||
UNormalizationMode fUMode; // deprecated
|
||||
int32_t fOptions;
|
||||
|
||||
// The input text and our position in it
|
||||
CharacterIterator *text;
|
||||
|
||||
// The normalization buffer is the result of normalization
|
||||
// of the source in [currentIndex..nextIndex[ .
|
||||
int32_t currentIndex, nextIndex;
|
||||
|
||||
// A buffer for holding intermediate results
|
||||
UnicodeString buffer;
|
||||
int32_t bufferPos;
|
||||
};
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Inline implementations
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
inline bool
|
||||
Normalizer::operator!= (const Normalizer& other) const
|
||||
{ return ! operator==(other); }
|
||||
|
||||
inline UNormalizationCheckResult
|
||||
Normalizer::quickCheck(const UnicodeString& source,
|
||||
UNormalizationMode mode,
|
||||
UErrorCode &status) {
|
||||
return quickCheck(source, mode, 0, status);
|
||||
}
|
||||
|
||||
inline UBool
|
||||
Normalizer::isNormalized(const UnicodeString& source,
|
||||
UNormalizationMode mode,
|
||||
UErrorCode &status) {
|
||||
return isNormalized(source, mode, 0, status);
|
||||
}
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
inline int32_t
|
||||
Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
|
||||
uint32_t options,
|
||||
UErrorCode &errorCode) {
|
||||
// all argument checking is done in unorm_compare
|
||||
return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(),
|
||||
toUCharPtr(s2.getBuffer()), s2.length(),
|
||||
options,
|
||||
&errorCode);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
||||
#endif // NORMLZR_H
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
94
thirdparty/icu4c/common/unicode/parseerr.h
vendored
Normal file
94
thirdparty/icu4c/common/unicode/parseerr.h
vendored
Normal file
@@ -0,0 +1,94 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2005, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 03/14/00 aliu Creation.
|
||||
* 06/27/00 aliu Change from C++ class to C struct
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef PARSEERR_H
|
||||
#define PARSEERR_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Parse Error Information
|
||||
*/
|
||||
/**
|
||||
* The capacity of the context strings in UParseError.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
enum { U_PARSE_CONTEXT_LEN = 16 };
|
||||
|
||||
/**
|
||||
* A UParseError struct is used to returned detailed information about
|
||||
* parsing errors. It is used by ICU parsing engines that parse long
|
||||
* rules, patterns, or programs, where the text being parsed is long
|
||||
* enough that more information than a UErrorCode is needed to
|
||||
* localize the error.
|
||||
*
|
||||
* <p>The line, offset, and context fields are optional; parsing
|
||||
* engines may choose not to use to use them.
|
||||
*
|
||||
* <p>The preContext and postContext strings include some part of the
|
||||
* context surrounding the error. If the source text is "let for=7"
|
||||
* and "for" is the error (e.g., because it is a reserved word), then
|
||||
* some examples of what a parser might produce are the following:
|
||||
*
|
||||
* <pre>
|
||||
* preContext postContext
|
||||
* "" "" The parser does not support context
|
||||
* "let " "=7" Pre- and post-context only
|
||||
* "let " "for=7" Pre- and post-context and error text
|
||||
* "" "for" Error text only
|
||||
* </pre>
|
||||
*
|
||||
* <p>Examples of engines which use UParseError (or may use it in the
|
||||
* future) are Transliterator, RuleBasedBreakIterator, and
|
||||
* RegexPattern.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct UParseError {
|
||||
|
||||
/**
|
||||
* The line on which the error occurred. If the parser uses this
|
||||
* field, it sets it to the line number of the source text line on
|
||||
* which the error appears, which will be a value >= 1. If the
|
||||
* parse does not support line numbers, the value will be <= 0.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t line;
|
||||
|
||||
/**
|
||||
* The character offset to the error. If the line field is >= 1,
|
||||
* then this is the offset from the start of the line. Otherwise,
|
||||
* this is the offset from the start of the text. If the parser
|
||||
* does not support this field, it will have a value < 0.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t offset;
|
||||
|
||||
/**
|
||||
* Textual context before the error. Null-terminated. The empty
|
||||
* string if not supported by parser.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UChar preContext[U_PARSE_CONTEXT_LEN];
|
||||
|
||||
/**
|
||||
* The error itself and/or textual context after the error.
|
||||
* Null-terminated. The empty string if not supported by parser.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UChar postContext[U_PARSE_CONTEXT_LEN];
|
||||
|
||||
} UParseError;
|
||||
|
||||
#endif
|
||||
237
thirdparty/icu4c/common/unicode/parsepos.h
vendored
Normal file
237
thirdparty/icu4c/common/unicode/parsepos.h
vendored
Normal file
@@ -0,0 +1,237 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
* Copyright (C) 1997-2005, International Business Machines Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*
|
||||
* File PARSEPOS.H
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 07/09/97 helena Converted from java.
|
||||
* 07/17/98 stephen Added errorIndex support.
|
||||
* 05/11/99 stephen Cleaned up.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef PARSEPOS_H
|
||||
#define PARSEPOS_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Canonical Iterator
|
||||
*/
|
||||
/**
|
||||
* <code>ParsePosition</code> is a simple class used by <code>Format</code>
|
||||
* and its subclasses to keep track of the current position during parsing.
|
||||
* The <code>parseObject</code> method in the various <code>Format</code>
|
||||
* classes requires a <code>ParsePosition</code> object as an argument.
|
||||
*
|
||||
* <p>
|
||||
* By design, as you parse through a string with different formats,
|
||||
* you can use the same <code>ParsePosition</code>, since the index parameter
|
||||
* records the current position.
|
||||
*
|
||||
* The ParsePosition class is not suitable for subclassing.
|
||||
*
|
||||
* @version 1.3 10/30/97
|
||||
* @author Mark Davis, Helena Shih
|
||||
* @see java.text.Format
|
||||
*/
|
||||
|
||||
class U_COMMON_API ParsePosition : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Default constructor, the index starts with 0 as default.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ParsePosition()
|
||||
: UObject(),
|
||||
index(0),
|
||||
errorIndex(-1)
|
||||
{}
|
||||
|
||||
/**
|
||||
* Create a new ParsePosition with the given initial index.
|
||||
* @param newIndex the new text offset.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ParsePosition(int32_t newIndex)
|
||||
: UObject(),
|
||||
index(newIndex),
|
||||
errorIndex(-1)
|
||||
{}
|
||||
|
||||
/**
|
||||
* Copy constructor
|
||||
* @param copy the object to be copied from.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ParsePosition(const ParsePosition& copy)
|
||||
: UObject(copy),
|
||||
index(copy.index),
|
||||
errorIndex(copy.errorIndex)
|
||||
{}
|
||||
|
||||
/**
|
||||
* Destructor
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~ParsePosition();
|
||||
|
||||
/**
|
||||
* Assignment operator
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline ParsePosition& operator=(const ParsePosition& copy);
|
||||
|
||||
/**
|
||||
* Equality operator.
|
||||
* @return true if the two parse positions are equal, false otherwise.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline bool operator==(const ParsePosition& that) const;
|
||||
|
||||
/**
|
||||
* Equality operator.
|
||||
* @return true if the two parse positions are not equal, false otherwise.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline bool operator!=(const ParsePosition& that) const;
|
||||
|
||||
/**
|
||||
* Clone this object.
|
||||
* Clones can be used concurrently in multiple threads.
|
||||
* If an error occurs, then nullptr is returned.
|
||||
* The caller must delete the clone.
|
||||
*
|
||||
* @return a clone of this object
|
||||
*
|
||||
* @see getDynamicClassID
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
ParsePosition *clone() const;
|
||||
|
||||
/**
|
||||
* Retrieve the current parse position. On input to a parse method, this
|
||||
* is the index of the character at which parsing will begin; on output, it
|
||||
* is the index of the character following the last character parsed.
|
||||
* @return the current index.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t getIndex() const;
|
||||
|
||||
/**
|
||||
* Set the current parse position.
|
||||
* @param index the new index.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline void setIndex(int32_t index);
|
||||
|
||||
/**
|
||||
* Set the index at which a parse error occurred. Formatters
|
||||
* should set this before returning an error code from their
|
||||
* parseObject method. The default value is -1 if this is not
|
||||
* set.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline void setErrorIndex(int32_t ei);
|
||||
|
||||
/**
|
||||
* Retrieve the index at which an error occurred, or -1 if the
|
||||
* error index has not been set.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t getErrorIndex() const;
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for this class.
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const override;
|
||||
|
||||
private:
|
||||
/**
|
||||
* Input: the place you start parsing.
|
||||
* <br>Output: position where the parse stopped.
|
||||
* This is designed to be used serially,
|
||||
* with each call setting index up for the next one.
|
||||
*/
|
||||
int32_t index;
|
||||
|
||||
/**
|
||||
* The index at which a parse error occurred.
|
||||
*/
|
||||
int32_t errorIndex;
|
||||
|
||||
};
|
||||
|
||||
inline ParsePosition&
|
||||
ParsePosition::operator=(const ParsePosition& copy)
|
||||
{
|
||||
index = copy.index;
|
||||
errorIndex = copy.errorIndex;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline bool
|
||||
ParsePosition::operator==(const ParsePosition& copy) const
|
||||
{
|
||||
if(index != copy.index || errorIndex != copy.errorIndex)
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool
|
||||
ParsePosition::operator!=(const ParsePosition& copy) const
|
||||
{
|
||||
return !operator==(copy);
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
ParsePosition::getIndex() const
|
||||
{
|
||||
return index;
|
||||
}
|
||||
|
||||
inline void
|
||||
ParsePosition::setIndex(int32_t offset)
|
||||
{
|
||||
this->index = offset;
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
ParsePosition::getErrorIndex() const
|
||||
{
|
||||
return errorIndex;
|
||||
}
|
||||
|
||||
inline void
|
||||
ParsePosition::setErrorIndex(int32_t ei)
|
||||
{
|
||||
this->errorIndex = ei;
|
||||
}
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
861
thirdparty/icu4c/common/unicode/platform.h
vendored
Normal file
861
thirdparty/icu4c/common/unicode/platform.h
vendored
Normal file
@@ -0,0 +1,861 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* FILE NAME : platform.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/13/98 nos Creation (content moved here from ptypes.h).
|
||||
* 03/02/99 stephen Added AS400 support.
|
||||
* 03/30/99 stephen Added Linux support.
|
||||
* 04/13/99 stephen Reworked for autoconf.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _PLATFORM_H
|
||||
#define _PLATFORM_H
|
||||
|
||||
#include "unicode/uconfig.h"
|
||||
#include "unicode/uvernum.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Basic types for the platform.
|
||||
*
|
||||
* This file used to be generated by autoconf/configure.
|
||||
* Starting with ICU 49, platform.h is a normal source file,
|
||||
* to simplify cross-compiling and working with non-autoconf/make build systems.
|
||||
*
|
||||
* When a value in this file does not work on a platform, then please
|
||||
* try to derive it from the U_PLATFORM value
|
||||
* (for which we might need a new value constant in rare cases)
|
||||
* and/or from other macros that are predefined by the compiler
|
||||
* or defined in standard (POSIX or platform or compiler) headers.
|
||||
*
|
||||
* As a temporary workaround, you can add an explicit \#define for some macros
|
||||
* before it is first tested, or add an equivalent -D macro definition
|
||||
* to the compiler's command line.
|
||||
*
|
||||
* Note: Some compilers provide ways to show the predefined macros.
|
||||
* For example, with gcc you can compile an empty .c file and have the compiler
|
||||
* print the predefined macros with
|
||||
* \code
|
||||
* gcc -E -dM -x c /dev/null | sort
|
||||
* \endcode
|
||||
* (You can provide an actual empty .c file rather than /dev/null.
|
||||
* <code>-x c++</code> is for C++.)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Define some things so that they can be documented.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_IN_DOXYGEN
|
||||
/*
|
||||
* Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented.
|
||||
* Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented.
|
||||
*/
|
||||
|
||||
/* None for now. */
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM
|
||||
* The U_PLATFORM macro defines the platform we're on.
|
||||
*
|
||||
* We used to define one different, value-less macro per platform.
|
||||
* That made it hard to know the set of relevant platforms and macros,
|
||||
* and hard to deal with variants of platforms.
|
||||
*
|
||||
* Starting with ICU 49, we define platforms as numeric macros,
|
||||
* with ranges of values for related platforms and their variants.
|
||||
* The U_PLATFORM macro is set to one of these values.
|
||||
*
|
||||
* Historical note from the Solaris Wikipedia article:
|
||||
* AT&T and Sun collaborated on a project to merge the most popular Unix variants
|
||||
* on the market at that time: BSD, System V, and Xenix.
|
||||
* This became Unix System V Release 4 (SVR4).
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
|
||||
/** Unknown platform. @internal */
|
||||
#define U_PF_UNKNOWN 0
|
||||
/** Windows @internal */
|
||||
#define U_PF_WINDOWS 1000
|
||||
/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */
|
||||
#define U_PF_MINGW 1800
|
||||
/**
|
||||
* Cygwin. Windows, calls to cygwin1.dll for Posix functions,
|
||||
* using MSVC or GNU gcc and binutils.
|
||||
* @internal
|
||||
*/
|
||||
#define U_PF_CYGWIN 1900
|
||||
/* Reserve 2000 for U_PF_UNIX? */
|
||||
/** HP-UX is based on UNIX System V. @internal */
|
||||
#define U_PF_HPUX 2100
|
||||
/** Solaris is a Unix operating system based on SVR4. @internal */
|
||||
#define U_PF_SOLARIS 2600
|
||||
/** BSD is a UNIX operating system derivative. @internal */
|
||||
#define U_PF_BSD 3000
|
||||
/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */
|
||||
#define U_PF_AIX 3100
|
||||
/** IRIX is based on UNIX System V with BSD extensions. @internal */
|
||||
#define U_PF_IRIX 3200
|
||||
/**
|
||||
* Darwin is a POSIX-compliant operating system, composed of code developed by Apple,
|
||||
* as well as code derived from NeXTSTEP, BSD, and other projects,
|
||||
* built around the Mach kernel.
|
||||
* Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based.
|
||||
* (Original description modified from WikiPedia.)
|
||||
* @internal
|
||||
*/
|
||||
#define U_PF_DARWIN 3500
|
||||
/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */
|
||||
#define U_PF_IPHONE 3550
|
||||
/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */
|
||||
#define U_PF_QNX 3700
|
||||
/** Linux is a Unix-like operating system. @internal */
|
||||
#define U_PF_LINUX 4000
|
||||
/**
|
||||
* Native Client is pretty close to Linux.
|
||||
* See https://developer.chrome.com/native-client and
|
||||
* http://www.chromium.org/nativeclient
|
||||
* @internal
|
||||
*/
|
||||
#define U_PF_BROWSER_NATIVE_CLIENT 4020
|
||||
/** Android is based on Linux. @internal */
|
||||
#define U_PF_ANDROID 4050
|
||||
/** Haiku is a POSIX-ish platform. @internal */
|
||||
#define U_PF_HAIKU 4080
|
||||
/** Fuchsia is a POSIX-ish platform. @internal */
|
||||
#define U_PF_FUCHSIA 4100
|
||||
/* Maximum value for Linux-based platform is 4499 */
|
||||
/**
|
||||
* Emscripten is a C++ transpiler for the Web that can target asm.js or
|
||||
* WebAssembly. It provides some POSIX-compatible wrappers and stubs and
|
||||
* some Linux-like functionality, but is not fully compatible with
|
||||
* either.
|
||||
* @internal
|
||||
*/
|
||||
#define U_PF_EMSCRIPTEN 5010
|
||||
/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
|
||||
#define U_PF_OS390 9000
|
||||
/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */
|
||||
#define U_PF_OS400 9400
|
||||
|
||||
#ifdef U_PLATFORM
|
||||
/* Use the predefined value. */
|
||||
#elif defined(__MINGW32__)
|
||||
# define U_PLATFORM U_PF_MINGW
|
||||
#elif defined(__CYGWIN__)
|
||||
# define U_PLATFORM U_PF_CYGWIN
|
||||
/* Cygwin uchar.h doesn't exist until Cygwin 3.5. */
|
||||
# include <cygwin/version.h>
|
||||
#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
# define U_PLATFORM U_PF_WINDOWS
|
||||
#elif defined(__ANDROID__)
|
||||
# define U_PLATFORM U_PF_ANDROID
|
||||
/* Android wchar_t support depends on the API level. */
|
||||
# include <android/api-level.h>
|
||||
#elif defined(__pnacl__) || defined(__native_client__)
|
||||
# define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT
|
||||
#elif defined(__Fuchsia__)
|
||||
# define U_PLATFORM U_PF_FUCHSIA
|
||||
#elif defined(linux) || defined(__linux__) || defined(__linux)
|
||||
# define U_PLATFORM U_PF_LINUX
|
||||
#elif defined(__APPLE__) && defined(__MACH__)
|
||||
# include <TargetConditionals.h>
|
||||
# if (defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) && (defined(TARGET_OS_MACCATALYST) && !TARGET_OS_MACCATALYST) /* variant of TARGET_OS_MAC */
|
||||
# define U_PLATFORM U_PF_IPHONE
|
||||
# else
|
||||
# define U_PLATFORM U_PF_DARWIN
|
||||
# endif
|
||||
#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__)
|
||||
# if defined(__FreeBSD__)
|
||||
# include <sys/endian.h>
|
||||
# endif
|
||||
# define U_PLATFORM U_PF_BSD
|
||||
#elif defined(sun) || defined(__sun)
|
||||
/* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */
|
||||
# define U_PLATFORM U_PF_SOLARIS
|
||||
# if defined(__GNUC__)
|
||||
/* Solaris/GCC needs this header file to get the proper endianness. Normally, this
|
||||
* header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h
|
||||
* is included which does not include this header file.
|
||||
*/
|
||||
# include <sys/isa_defs.h>
|
||||
# endif
|
||||
#elif defined(_AIX) || defined(__TOS_AIX__)
|
||||
# define U_PLATFORM U_PF_AIX
|
||||
#elif defined(_hpux) || defined(hpux) || defined(__hpux)
|
||||
# define U_PLATFORM U_PF_HPUX
|
||||
#elif defined(sgi) || defined(__sgi)
|
||||
# define U_PLATFORM U_PF_IRIX
|
||||
#elif defined(__QNX__) || defined(__QNXNTO__)
|
||||
# define U_PLATFORM U_PF_QNX
|
||||
#elif defined(__TOS_MVS__)
|
||||
# define U_PLATFORM U_PF_OS390
|
||||
#elif defined(__OS400__) || defined(__TOS_OS400__)
|
||||
# define U_PLATFORM U_PF_OS400
|
||||
#elif defined(__HAIKU__)
|
||||
# define U_PLATFORM U_PF_HAIKU
|
||||
#elif defined(__EMSCRIPTEN__)
|
||||
# define U_PLATFORM U_PF_EMSCRIPTEN
|
||||
#else
|
||||
# define U_PLATFORM U_PF_UNKNOWN
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_REAL_MSVC
|
||||
* Defined if the compiler is the real MSVC compiler (and not something like
|
||||
* Clang setting _MSC_VER in order to compile Windows code that requires it).
|
||||
* Otherwise undefined.
|
||||
* @internal
|
||||
*/
|
||||
#if (defined(_MSC_VER) && !(defined(__clang__) && __clang__)) || defined(U_IN_DOXYGEN)
|
||||
# define U_REAL_MSVC
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def CYGWINMSVC
|
||||
* Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
|
||||
* Otherwise undefined.
|
||||
* @internal
|
||||
*/
|
||||
/* Commented out because this is already set in mh-cygwin-msvc
|
||||
#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER)
|
||||
# define CYGWINMSVC
|
||||
#endif
|
||||
*/
|
||||
#ifdef U_IN_DOXYGEN
|
||||
# define CYGWINMSVC
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_USES_ONLY_WIN32_API
|
||||
* Defines whether the platform uses only the Win32 API.
|
||||
* Set to 1 for Windows/MSVC, ClangCL and MinGW but not Cygwin.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_USES_ONLY_WIN32_API
|
||||
/* Use the predefined value. */
|
||||
#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC)
|
||||
# define U_PLATFORM_USES_ONLY_WIN32_API 1
|
||||
#else
|
||||
/* Cygwin implements POSIX. */
|
||||
# define U_PLATFORM_USES_ONLY_WIN32_API 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_HAS_WIN32_API
|
||||
* Defines whether the Win32 API is available on the platform.
|
||||
* Set to 1 for Windows/MSVC, ClangCL, MinGW and Cygwin.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_HAS_WIN32_API
|
||||
/* Use the predefined value. */
|
||||
#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
|
||||
# define U_PLATFORM_HAS_WIN32_API 1
|
||||
#else
|
||||
# define U_PLATFORM_HAS_WIN32_API 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_HAS_WINUWP_API
|
||||
* Defines whether target is intended for Universal Windows Platform API
|
||||
* Set to 1 for Windows10 Release Solution Configuration
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_HAS_WINUWP_API
|
||||
/* Use the predefined value. */
|
||||
#else
|
||||
# define U_PLATFORM_HAS_WINUWP_API 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_IMPLEMENTS_POSIX
|
||||
* Defines whether the platform implements (most of) the POSIX API.
|
||||
* Set to 1 for Cygwin and most other platforms.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_IMPLEMENTS_POSIX
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM_USES_ONLY_WIN32_API
|
||||
# define U_PLATFORM_IMPLEMENTS_POSIX 0
|
||||
#else
|
||||
# define U_PLATFORM_IMPLEMENTS_POSIX 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_IS_LINUX_BASED
|
||||
* Defines whether the platform is Linux or one of its derivatives.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_IS_LINUX_BASED
|
||||
/* Use the predefined value. */
|
||||
#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= 4499
|
||||
# define U_PLATFORM_IS_LINUX_BASED 1
|
||||
#else
|
||||
# define U_PLATFORM_IS_LINUX_BASED 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_IS_DARWIN_BASED
|
||||
* Defines whether the platform is Darwin or one of its derivatives.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_IS_DARWIN_BASED
|
||||
/* Use the predefined value. */
|
||||
#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE
|
||||
# define U_PLATFORM_IS_DARWIN_BASED 1
|
||||
#else
|
||||
# define U_PLATFORM_IS_DARWIN_BASED 0
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/** @{ Compiler and environment features */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_GCC_MAJOR_MINOR
|
||||
* Indicates whether the compiler is gcc (test for != 0),
|
||||
* and if so, contains its major (times 100) and minor version numbers.
|
||||
* If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0.
|
||||
*
|
||||
* For example, for testing for whether we have gcc, and whether it's 4.6 or higher,
|
||||
* use "#if U_GCC_MAJOR_MINOR >= 406".
|
||||
* @internal
|
||||
*/
|
||||
#ifdef __GNUC__
|
||||
# define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__)
|
||||
#else
|
||||
# define U_GCC_MAJOR_MINOR 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_IS_BIG_ENDIAN
|
||||
* Determines the endianness of the platform.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_IS_BIG_ENDIAN
|
||||
/* Use the predefined value. */
|
||||
#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN)
|
||||
# define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN)
|
||||
#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
|
||||
/* gcc */
|
||||
# define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
||||
#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN)
|
||||
# define U_IS_BIG_ENDIAN 1
|
||||
#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN)
|
||||
# define U_IS_BIG_ENDIAN 0
|
||||
#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__)
|
||||
/* These platforms do not appear to predefine any endianness macros. */
|
||||
# define U_IS_BIG_ENDIAN 1
|
||||
#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0)
|
||||
/* HPPA do not appear to predefine any endianness macros. */
|
||||
# define U_IS_BIG_ENDIAN 1
|
||||
#elif defined(sparc) || defined(__sparc) || defined(__sparc__)
|
||||
/* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */
|
||||
# define U_IS_BIG_ENDIAN 1
|
||||
#else
|
||||
# define U_IS_BIG_ENDIAN 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_HAVE_PLACEMENT_NEW
|
||||
* Determines whether to override placement new and delete for STL.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#ifdef U_HAVE_PLACEMENT_NEW
|
||||
/* Use the predefined value. */
|
||||
#elif defined(__BORLANDC__)
|
||||
# define U_HAVE_PLACEMENT_NEW 0
|
||||
#else
|
||||
# define U_HAVE_PLACEMENT_NEW 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_HAVE_DEBUG_LOCATION_NEW
|
||||
* Define this to define the MFC debug version of the operator new.
|
||||
*
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
#ifdef U_HAVE_DEBUG_LOCATION_NEW
|
||||
/* Use the predefined value. */
|
||||
#elif defined(_MSC_VER)
|
||||
# define U_HAVE_DEBUG_LOCATION_NEW 1
|
||||
#else
|
||||
# define U_HAVE_DEBUG_LOCATION_NEW 0
|
||||
#endif
|
||||
|
||||
/* Compatibility with compilers other than clang: http://clang.llvm.org/docs/LanguageExtensions.html */
|
||||
#ifdef __has_attribute
|
||||
# define UPRV_HAS_ATTRIBUTE(x) __has_attribute(x)
|
||||
#else
|
||||
# define UPRV_HAS_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
#ifdef __has_cpp_attribute
|
||||
# define UPRV_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
|
||||
#else
|
||||
# define UPRV_HAS_CPP_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
#ifdef __has_declspec_attribute
|
||||
# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) __has_declspec_attribute(x)
|
||||
#else
|
||||
# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
#ifdef __has_builtin
|
||||
# define UPRV_HAS_BUILTIN(x) __has_builtin(x)
|
||||
#else
|
||||
# define UPRV_HAS_BUILTIN(x) 0
|
||||
#endif
|
||||
#ifdef __has_feature
|
||||
# define UPRV_HAS_FEATURE(x) __has_feature(x)
|
||||
#else
|
||||
# define UPRV_HAS_FEATURE(x) 0
|
||||
#endif
|
||||
#ifdef __has_extension
|
||||
# define UPRV_HAS_EXTENSION(x) __has_extension(x)
|
||||
#else
|
||||
# define UPRV_HAS_EXTENSION(x) 0
|
||||
#endif
|
||||
#ifdef __has_warning
|
||||
# define UPRV_HAS_WARNING(x) __has_warning(x)
|
||||
#else
|
||||
# define UPRV_HAS_WARNING(x) 0
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__clang__)
|
||||
#define UPRV_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined")))
|
||||
#else
|
||||
#define UPRV_NO_SANITIZE_UNDEFINED
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_MALLOC_ATTR
|
||||
* Attribute to mark functions as malloc-like
|
||||
* @internal
|
||||
*/
|
||||
#if defined(__GNUC__) && __GNUC__>=3
|
||||
# define U_MALLOC_ATTR __attribute__ ((__malloc__))
|
||||
#else
|
||||
# define U_MALLOC_ATTR
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_ALLOC_SIZE_ATTR
|
||||
* Attribute to specify the size of the allocated buffer for malloc-like functions
|
||||
* @internal
|
||||
*/
|
||||
#if (defined(__GNUC__) && \
|
||||
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || \
|
||||
UPRV_HAS_ATTRIBUTE(alloc_size)
|
||||
# define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X)))
|
||||
# define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y)))
|
||||
#else
|
||||
# define U_ALLOC_SIZE_ATTR(X)
|
||||
# define U_ALLOC_SIZE_ATTR2(X,Y)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CPLUSPLUS_VERSION
|
||||
* 0 if no C++; 1, 11, 14, ... if C++.
|
||||
* Support for specific features cannot always be determined by the C++ version alone.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_CPLUSPLUS_VERSION
|
||||
# if U_CPLUSPLUS_VERSION != 0 && !defined(__cplusplus)
|
||||
# undef U_CPLUSPLUS_VERSION
|
||||
# define U_CPLUSPLUS_VERSION 0
|
||||
# endif
|
||||
/* Otherwise use the predefined value. */
|
||||
#elif !defined(__cplusplus)
|
||||
# define U_CPLUSPLUS_VERSION 0
|
||||
#elif __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
|
||||
# define U_CPLUSPLUS_VERSION 17
|
||||
#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
|
||||
# define U_CPLUSPLUS_VERSION 14
|
||||
#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
|
||||
# define U_CPLUSPLUS_VERSION 11
|
||||
#else
|
||||
// C++98 or C++03
|
||||
# define U_CPLUSPLUS_VERSION 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_FALLTHROUGH
|
||||
* Annotate intentional fall-through between switch labels.
|
||||
* http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
|
||||
* @internal
|
||||
*/
|
||||
#ifndef __cplusplus
|
||||
// Not for C.
|
||||
#elif defined(U_FALLTHROUGH)
|
||||
// Use the predefined value.
|
||||
#elif defined(__clang__)
|
||||
// Test for compiler vs. feature separately.
|
||||
// Other compilers might choke on the feature test.
|
||||
# if UPRV_HAS_CPP_ATTRIBUTE(clang::fallthrough) || \
|
||||
(UPRV_HAS_FEATURE(cxx_attributes) && \
|
||||
UPRV_HAS_WARNING("-Wimplicit-fallthrough"))
|
||||
# define U_FALLTHROUGH [[clang::fallthrough]]
|
||||
# endif
|
||||
#elif defined(__GNUC__) && (__GNUC__ >= 7)
|
||||
# define U_FALLTHROUGH __attribute__((fallthrough))
|
||||
#endif
|
||||
|
||||
#ifndef U_FALLTHROUGH
|
||||
# define U_FALLTHROUGH
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/*===========================================================================*/
|
||||
/** @{ Character data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_ASCII_FAMILY 0
|
||||
|
||||
/**
|
||||
* U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_EBCDIC_FAMILY 1
|
||||
|
||||
/**
|
||||
* \def U_CHARSET_FAMILY
|
||||
*
|
||||
* <p>These definitions allow to specify the encoding of text
|
||||
* in the char data type as defined by the platform and the compiler.
|
||||
* It is enough to determine the code point values of "invariant characters",
|
||||
* which are the ones shared by all encodings that are in use
|
||||
* on a given platform.</p>
|
||||
*
|
||||
* <p>Those "invariant characters" should be all the uppercase and lowercase
|
||||
* latin letters, the digits, the space, and "basic punctuation".
|
||||
* Also, '\\n', '\\r', '\\t' should be available.</p>
|
||||
*
|
||||
* <p>The list of "invariant characters" is:<br>
|
||||
* \code
|
||||
* A-Z a-z 0-9 SPACE " % & ' ( ) * + , - . / : ; < = > ? _
|
||||
* \endcode
|
||||
* <br>
|
||||
* (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p>
|
||||
*
|
||||
* <p>This matches the IBM Syntactic Character Set (CS 640).</p>
|
||||
*
|
||||
* <p>In other words, all the graphic characters in 7-bit ASCII should
|
||||
* be safely accessible except the following:</p>
|
||||
*
|
||||
* \code
|
||||
* '\' <backslash>
|
||||
* '[' <left bracket>
|
||||
* ']' <right bracket>
|
||||
* '{' <left brace>
|
||||
* '}' <right brace>
|
||||
* '^' <circumflex>
|
||||
* '~' <tilde>
|
||||
* '!' <exclamation mark>
|
||||
* '#' <number sign>
|
||||
* '|' <vertical line>
|
||||
* '$' <dollar sign>
|
||||
* '@' <commercial at>
|
||||
* '`' <grave accent>
|
||||
* \endcode
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef U_CHARSET_FAMILY
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB)
|
||||
# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
|
||||
#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__)
|
||||
# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
|
||||
#else
|
||||
# define U_CHARSET_FAMILY U_ASCII_FAMILY
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CHARSET_IS_UTF8
|
||||
*
|
||||
* Hardcode the default charset to UTF-8.
|
||||
*
|
||||
* If this is set to 1, then
|
||||
* - ICU will assume that all non-invariant char*, StringPiece, std::string etc.
|
||||
* contain UTF-8 text, regardless of what the system API uses
|
||||
* - some ICU code will use fast functions like u_strFromUTF8()
|
||||
* rather than the more general and more heavy-weight conversion API (ucnv.h)
|
||||
* - ucnv_getDefaultName() always returns "UTF-8"
|
||||
* - ucnv_setDefaultName() is disabled and will not change the default charset
|
||||
* - static builds of ICU are smaller
|
||||
* - more functionality is available with the UCONFIG_NO_CONVERSION build-time
|
||||
* configuration option (see unicode/uconfig.h)
|
||||
* - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable
|
||||
*
|
||||
* @stable ICU 4.2
|
||||
* @see UCONFIG_NO_CONVERSION
|
||||
*/
|
||||
#ifdef U_CHARSET_IS_UTF8
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED || \
|
||||
U_PLATFORM == U_PF_EMSCRIPTEN
|
||||
# define U_CHARSET_IS_UTF8 1
|
||||
#else
|
||||
# define U_CHARSET_IS_UTF8 0
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/*===========================================================================*/
|
||||
/** @{ Information about wchar support */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_HAVE_WCHAR_H
|
||||
* Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef U_HAVE_WCHAR_H
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9
|
||||
/*
|
||||
* Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t.
|
||||
* The type and header existed, but the library functions did not work as expected.
|
||||
* The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway.
|
||||
*/
|
||||
# define U_HAVE_WCHAR_H 0
|
||||
#else
|
||||
# define U_HAVE_WCHAR_H 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_SIZEOF_WCHAR_T
|
||||
* U_SIZEOF_WCHAR_T==sizeof(wchar_t)
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef U_SIZEOF_WCHAR_T
|
||||
/* Use the predefined value. */
|
||||
#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9)
|
||||
/*
|
||||
* Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring.
|
||||
* Newer Mac OS X has size 4.
|
||||
*/
|
||||
# define U_SIZEOF_WCHAR_T 1
|
||||
#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
#elif U_PLATFORM == U_PF_AIX
|
||||
/*
|
||||
* AIX 6.1 information, section "Wide character data representation":
|
||||
* "... the wchar_t datatype is 32-bit in the 64-bit environment and
|
||||
* 16-bit in the 32-bit environment."
|
||||
* and
|
||||
* "All locales use Unicode for their wide character code values (process code),
|
||||
* except the IBM-eucTW codeset."
|
||||
*/
|
||||
# ifdef __64BIT__
|
||||
# define U_SIZEOF_WCHAR_T 4
|
||||
# else
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
# endif
|
||||
#elif U_PLATFORM == U_PF_OS390
|
||||
/*
|
||||
* z/OS V1R11 information center, section "LP64 | ILP32":
|
||||
* "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes.
|
||||
* Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes."
|
||||
*/
|
||||
# ifdef _LP64
|
||||
# define U_SIZEOF_WCHAR_T 4
|
||||
# else
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
# endif
|
||||
#elif U_PLATFORM == U_PF_OS400
|
||||
# if defined(__UTF32__)
|
||||
/*
|
||||
* LOCALETYPE(*LOCALEUTF) is specified.
|
||||
* Wide-character strings are in UTF-32,
|
||||
* narrow-character strings are in UTF-8.
|
||||
*/
|
||||
# define U_SIZEOF_WCHAR_T 4
|
||||
# elif defined(__UCS2__)
|
||||
/*
|
||||
* LOCALETYPE(*LOCALEUCS2) is specified.
|
||||
* Wide-character strings are in UCS-2,
|
||||
* narrow-character strings are in EBCDIC.
|
||||
*/
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
# else
|
||||
/*
|
||||
* LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified.
|
||||
* Wide-character strings are in 16-bit EBCDIC,
|
||||
* narrow-character strings are in EBCDIC.
|
||||
*/
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
# endif
|
||||
#else
|
||||
# define U_SIZEOF_WCHAR_T 4
|
||||
#endif
|
||||
|
||||
#ifndef U_HAVE_WCSCPY
|
||||
#define U_HAVE_WCSCPY U_HAVE_WCHAR_H
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* \def U_HAVE_CHAR16_T
|
||||
* Defines whether the char16_t type is available for UTF-16
|
||||
* and u"abc" UTF-16 string literals are supported.
|
||||
* This is a new standard type and standard string literal syntax in C++11
|
||||
* but has been available in some compilers before.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_HAVE_CHAR16_T
|
||||
/* Use the predefined value. */
|
||||
#else
|
||||
/*
|
||||
* Notes:
|
||||
* C++11 and C11 require support for UTF-16 literals
|
||||
* Doesn't work on Mac C11 (see workaround in ptypes.h)
|
||||
* or Cygwin less than 3.5.
|
||||
*/
|
||||
# if defined(__cplusplus)
|
||||
# define U_HAVE_CHAR16_T 1
|
||||
# elif U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && CYGWIN_VERSION_DLL_MAJOR < 3005)
|
||||
# define U_HAVE_CHAR16_T 0
|
||||
# else
|
||||
// conformant C11
|
||||
# define U_HAVE_CHAR16_T 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @{
|
||||
* \def U_DECLARE_UTF16
|
||||
* Do not use this macro because it is not defined on all platforms.
|
||||
* In C++, use std::u16string_view literals, see the UNICODE_STRING docs.
|
||||
* In C, use u"UTF-16 literals".
|
||||
* See also the public U_STRING_DECL macro.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_DECLARE_UTF16
|
||||
/* Use the predefined value. */
|
||||
#elif U_HAVE_CHAR16_T \
|
||||
|| (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
|
||||
|| (defined(__HP_aCC) && __HP_aCC >= 035000) \
|
||||
|| (defined(__HP_cc) && __HP_cc >= 111106) \
|
||||
|| (defined(U_IN_DOXYGEN))
|
||||
# define U_DECLARE_UTF16(string) u ## string
|
||||
#elif U_SIZEOF_WCHAR_T == 2 \
|
||||
&& (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__)))
|
||||
# define U_DECLARE_UTF16(string) L ## string
|
||||
#else
|
||||
/* Leave U_DECLARE_UTF16 undefined. See unistr.h. */
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/*===========================================================================*/
|
||||
/** @{ Symbol import-export control */
|
||||
/*===========================================================================*/
|
||||
|
||||
#ifdef U_EXPORT
|
||||
/* Use the predefined value. */
|
||||
#elif defined(U_STATIC_IMPLEMENTATION)
|
||||
# define U_EXPORT
|
||||
#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \
|
||||
UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllimport__))
|
||||
# define U_EXPORT __declspec(dllexport)
|
||||
#elif defined(__GNUC__) || defined(__open_xl__)
|
||||
# define U_EXPORT __attribute__((visibility("default")))
|
||||
#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
|
||||
|| (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550)
|
||||
# define U_EXPORT __global
|
||||
/*#elif defined(__HP_aCC) || defined(__HP_cc)
|
||||
# define U_EXPORT __declspec(dllexport)*/
|
||||
#else
|
||||
# define U_EXPORT
|
||||
#endif
|
||||
|
||||
/* U_CALLCONV is related to U_EXPORT2 */
|
||||
#ifdef U_EXPORT2
|
||||
/* Use the predefined value. */
|
||||
#elif defined(_MSC_VER)
|
||||
# define U_EXPORT2 __cdecl
|
||||
#else
|
||||
# define U_EXPORT2
|
||||
#endif
|
||||
|
||||
#ifdef U_IMPORT
|
||||
/* Use the predefined value. */
|
||||
#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \
|
||||
UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllimport__))
|
||||
/* Windows needs to export/import data. */
|
||||
# define U_IMPORT __declspec(dllimport)
|
||||
#else
|
||||
# define U_IMPORT
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_HIDDEN
|
||||
* This is used to mark internal structs declared within external classes,
|
||||
* to prevent the internal structs from having the same visibility as the
|
||||
* class within which they are declared.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_HIDDEN
|
||||
/* Use the predefined value. */
|
||||
#elif defined(__GNUC__) || defined(__open_xl__)
|
||||
# define U_HIDDEN __attribute__((visibility("hidden")))
|
||||
#else
|
||||
# define U_HIDDEN
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CALLCONV
|
||||
* Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary
|
||||
* in callback function typedefs to make sure that the calling convention
|
||||
* is compatible.
|
||||
*
|
||||
* This is only used for non-ICU-API functions.
|
||||
* When a function is a public ICU API,
|
||||
* you must use the U_CAPI and U_EXPORT2 qualifiers.
|
||||
*
|
||||
* Please note, you need to use U_CALLCONV after the *.
|
||||
*
|
||||
* NO : "static const char U_CALLCONV *func( . . . )"
|
||||
* YES: "static const char* U_CALLCONV func( . . . )"
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
|
||||
# define U_CALLCONV __cdecl
|
||||
#else
|
||||
# define U_CALLCONV U_EXPORT2
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CALLCONV_FPTR
|
||||
* Similar to U_CALLCONV, but only used on function pointers.
|
||||
* @internal
|
||||
*/
|
||||
#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
|
||||
# define U_CALLCONV_FPTR U_CALLCONV
|
||||
#else
|
||||
# define U_CALLCONV_FPTR
|
||||
#endif
|
||||
/** @} */
|
||||
|
||||
#endif // _PLATFORM_H
|
||||
66
thirdparty/icu4c/common/unicode/ptypes.h
vendored
Normal file
66
thirdparty/icu4c/common/unicode/ptypes.h
vendored
Normal file
@@ -0,0 +1,66 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* FILE NAME : ptypes.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/13/98 nos Creation (content moved here from ptypes.h).
|
||||
* 03/02/99 stephen Added AS400 support.
|
||||
* 03/30/99 stephen Added Linux support.
|
||||
* 04/13/99 stephen Reworked for autoconf.
|
||||
* 09/18/08 srl Moved basic types back to ptypes.h from platform.h
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Definitions of integer types of various widths
|
||||
*/
|
||||
|
||||
#ifndef _PTYPES_H
|
||||
#define _PTYPES_H
|
||||
|
||||
/**
|
||||
* \def __STDC_LIMIT_MACROS
|
||||
* According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations
|
||||
* macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested.
|
||||
* We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code
|
||||
* that uses such limit macros.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef __STDC_LIMIT_MACROS
|
||||
#define __STDC_LIMIT_MACROS
|
||||
#endif
|
||||
|
||||
/* NULL, size_t, wchar_t */
|
||||
#include <stddef.h>
|
||||
|
||||
/* More platform-specific definitions. */
|
||||
#include "unicode/platform.h"
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Generic data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// C++11 and C11 both specify that the data type char16_t should exist, C++11
|
||||
// as a keyword and C11 as a typedef in the uchar.h header file, but not all
|
||||
// implementations (looking at you, Apple, spring 2024) actually do this, so
|
||||
// ICU4C must detect and deal with that.
|
||||
#if !defined(__cplusplus) && !defined(U_IN_DOXYGEN)
|
||||
# if U_HAVE_CHAR16_T
|
||||
# include <uchar.h>
|
||||
# else
|
||||
typedef uint16_t char16_t;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* _PTYPES_H */
|
||||
183
thirdparty/icu4c/common/unicode/putil.h
vendored
Normal file
183
thirdparty/icu4c/common/unicode/putil.h
vendored
Normal file
@@ -0,0 +1,183 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* FILE NAME : putil.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/14/98 nos Creation (content moved here from utypes.h).
|
||||
* 06/17/99 erm Added IEEE_754
|
||||
* 07/22/98 stephen Added IEEEremainder, max, min, trunc
|
||||
* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
|
||||
* 08/24/98 stephen Added longBitsFromDouble
|
||||
* 03/02/99 stephen Removed openFile(). Added AS400 support.
|
||||
* 04/15/99 stephen Converted to C
|
||||
* 11/15/99 helena Integrated S/390 changes for IEEE support.
|
||||
* 01/11/00 helena Added u_getVersion.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef PUTIL_H
|
||||
#define PUTIL_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Platform Utilities
|
||||
*/
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Platform utilities */
|
||||
/*==========================================================================*/
|
||||
|
||||
/**
|
||||
* Platform utilities isolates the platform dependencies of the
|
||||
* library. For each platform which this code is ported to, these
|
||||
* functions may have to be re-implemented.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Return the ICU data directory.
|
||||
* The data directory is where common format ICU data files (.dat files)
|
||||
* are loaded from. Note that normal use of the built-in ICU
|
||||
* facilities does not require loading of an external data file;
|
||||
* unless you are adding custom data to ICU, the data directory
|
||||
* does not need to be set.
|
||||
*
|
||||
* The data directory is determined as follows:
|
||||
* If u_setDataDirectory() has been called, that is it, otherwise
|
||||
* if the ICU_DATA environment variable is set, use that, otherwise
|
||||
* If a data directory was specified at ICU build time
|
||||
* <code>
|
||||
* \code
|
||||
* #define ICU_DATA_DIR "path"
|
||||
* \endcode
|
||||
* </code> use that,
|
||||
* otherwise no data directory is available.
|
||||
*
|
||||
* @return the data directory, or an empty string ("") if no data directory has
|
||||
* been specified.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2 u_getDataDirectory(void);
|
||||
|
||||
|
||||
/**
|
||||
* Set the ICU data directory.
|
||||
* The data directory is where common format ICU data files (.dat files)
|
||||
* are loaded from. Note that normal use of the built-in ICU
|
||||
* facilities does not require loading of an external data file;
|
||||
* unless you are adding custom data to ICU, the data directory
|
||||
* does not need to be set.
|
||||
*
|
||||
* This function should be called at most once in a process, before the
|
||||
* first ICU operation (e.g., u_init()) that will require the loading of an
|
||||
* ICU data file.
|
||||
* This function is not thread-safe. Use it before calling ICU APIs from
|
||||
* multiple threads.
|
||||
*
|
||||
* @param directory The directory to be set.
|
||||
*
|
||||
* @see u_init
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 u_setDataDirectory(const char *directory);
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Return the time zone files override directory, or an empty string if
|
||||
* no directory was specified. Certain time zone resources will be preferentially
|
||||
* loaded from individual files in this directory.
|
||||
*
|
||||
* @return the time zone data override directory.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2 u_getTimeZoneFilesDirectory(UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Set the time zone files override directory.
|
||||
* This function is not thread safe; it must not be called concurrently with
|
||||
* u_getTimeZoneFilesDirectory() or any other use of ICU time zone functions.
|
||||
* This function should only be called before using any ICU service that
|
||||
* will access the time zone data.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
|
||||
/**
|
||||
* @{
|
||||
* Filesystem file and path separator characters.
|
||||
* Example: '/' and ':' on Unix, '\\' and ';' on Windows.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#if U_PLATFORM_USES_ONLY_WIN32_API
|
||||
# define U_FILE_SEP_CHAR '\\'
|
||||
# define U_FILE_ALT_SEP_CHAR '/'
|
||||
# define U_PATH_SEP_CHAR ';'
|
||||
# define U_FILE_SEP_STRING "\\"
|
||||
# define U_FILE_ALT_SEP_STRING "/"
|
||||
# define U_PATH_SEP_STRING ";"
|
||||
#else
|
||||
# define U_FILE_SEP_CHAR '/'
|
||||
# define U_FILE_ALT_SEP_CHAR '/'
|
||||
# define U_PATH_SEP_CHAR ':'
|
||||
# define U_FILE_SEP_STRING "/"
|
||||
# define U_FILE_ALT_SEP_STRING "/"
|
||||
# define U_PATH_SEP_STRING ":"
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Convert char characters to UChar characters.
|
||||
* This utility function is useful only for "invariant characters"
|
||||
* that are encoded in the platform default encoding.
|
||||
* They are a small, constant subset of the encoding and include
|
||||
* just the latin letters, digits, and some punctuation.
|
||||
* For details, see U_CHARSET_FAMILY.
|
||||
*
|
||||
* @param cs Input string, points to <code>length</code>
|
||||
* character bytes from a subset of the platform encoding.
|
||||
* @param us Output string, points to memory for <code>length</code>
|
||||
* Unicode characters.
|
||||
* @param length The number of characters to convert; this may
|
||||
* include the terminating <code>NUL</code>.
|
||||
*
|
||||
* @see U_CHARSET_FAMILY
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_charsToUChars(const char *cs, UChar *us, int32_t length);
|
||||
|
||||
/**
|
||||
* Convert UChar characters to char characters.
|
||||
* This utility function is useful only for "invariant characters"
|
||||
* that can be encoded in the platform default encoding.
|
||||
* They are a small, constant subset of the encoding and include
|
||||
* just the latin letters, digits, and some punctuation.
|
||||
* For details, see U_CHARSET_FAMILY.
|
||||
*
|
||||
* @param us Input string, points to <code>length</code>
|
||||
* Unicode characters that can be encoded with the
|
||||
* codepage-invariant subset of the platform encoding.
|
||||
* @param cs Output string, points to memory for <code>length</code>
|
||||
* character bytes.
|
||||
* @param length The number of characters to convert; this may
|
||||
* include the terminating <code>NUL</code>.
|
||||
*
|
||||
* @see U_CHARSET_FAMILY
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_UCharsToChars(const UChar *us, char *cs, int32_t length);
|
||||
|
||||
#endif
|
||||
823
thirdparty/icu4c/common/unicode/rbbi.h
vendored
Normal file
823
thirdparty/icu4c/common/unicode/rbbi.h
vendored
Normal file
@@ -0,0 +1,823 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
***************************************************************************
|
||||
* Copyright (C) 1999-2016 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
***************************************************************************
|
||||
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 10/22/99 alan Creation.
|
||||
* 11/11/99 rgillam Complete port from Java.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef RBBI_H
|
||||
#define RBBI_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Rule Based Break Iterator
|
||||
*/
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/schriter.h"
|
||||
|
||||
struct UCPTrie;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/** @internal */
|
||||
class LanguageBreakEngine;
|
||||
struct RBBIDataHeader;
|
||||
class RBBIDataWrapper;
|
||||
class UnhandledEngine;
|
||||
class UStack;
|
||||
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* The ExternalBreakEngine class define an abstract interface for the host environment
|
||||
* to provide a low level facility to break text for unicode text in script that the text boundary
|
||||
* cannot be handled by upper level rule based logic, for example, for Chinese and Japanese
|
||||
* word breaking, Thai, Khmer, Burmese, Lao and other Southeast Asian scripts.
|
||||
* The host environment implement one or more subclass of ExternalBreakEngine and
|
||||
* register them in the initialization time by calling
|
||||
* RuleBasedBreakIterator::registerExternalBreakEngine(). ICU adopt and own the engine and will
|
||||
* delete the registered external engine in proper time during the clean up
|
||||
* event.
|
||||
* @internal ICU 74 technology preview
|
||||
*/
|
||||
class ExternalBreakEngine : public UObject {
|
||||
public:
|
||||
/**
|
||||
* destructor
|
||||
* @internal ICU 74 technology preview
|
||||
*/
|
||||
virtual ~ExternalBreakEngine() {}
|
||||
|
||||
/**
|
||||
* <p>Indicate whether this engine handles a particular character when
|
||||
* the RuleBasedBreakIterator is used for a particular locale. This method is used
|
||||
* by the RuleBasedBreakIterator to find a break engine.</p>
|
||||
* @param c A character which begins a run that the engine might handle.
|
||||
* @param locale The locale.
|
||||
* @return true if this engine handles the particular character for that locale.
|
||||
* @internal ICU 74 technology preview
|
||||
*/
|
||||
virtual bool isFor(UChar32 c, const char* locale) const = 0;
|
||||
|
||||
/**
|
||||
* <p>Indicate whether this engine handles a particular character.This method is
|
||||
* used by the RuleBasedBreakIterator after it already find a break engine to see which
|
||||
* characters after the first one can be handled by this break engine.</p>
|
||||
* @param c A character that the engine might handle.
|
||||
* @return true if this engine handles the particular character.
|
||||
* @internal ICU 74 technology preview
|
||||
*/
|
||||
virtual bool handles(UChar32 c) const = 0;
|
||||
|
||||
/**
|
||||
* <p>Divide up a range of text handled by this break engine.</p>
|
||||
*
|
||||
* @param text A UText representing the text
|
||||
* @param start The start of the range of known characters
|
||||
* @param end The end of the range of known characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or
|
||||
* nullptr
|
||||
* @param foundBreaksCapacity The capacity of foundBreaks
|
||||
* @param status Information on any errors encountered.
|
||||
* @return The number of breaks found
|
||||
* @internal ICU 74 technology preview
|
||||
*/
|
||||
virtual int32_t fillBreaks(UText* text, int32_t start, int32_t end,
|
||||
int32_t* foundBreaks, int32_t foundBreaksCapacity,
|
||||
UErrorCode& status) const = 0;
|
||||
};
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* A subclass of BreakIterator whose behavior is specified using a list of rules.
|
||||
* <p>Instances of this class are most commonly created by the factory methods of
|
||||
* BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc.,
|
||||
* and then used via the abstract API in class BreakIterator</p>
|
||||
*
|
||||
* <p>See the ICU User Guide for information on Break Iterator Rules.</p>
|
||||
*
|
||||
* <p>This class is not intended to be subclassed.</p>
|
||||
*/
|
||||
class U_COMMON_API RuleBasedBreakIterator /*final*/ : public BreakIterator {
|
||||
|
||||
private:
|
||||
/**
|
||||
* The UText through which this BreakIterator accesses the text
|
||||
* @internal (private)
|
||||
*/
|
||||
UText fText = UTEXT_INITIALIZER;
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
public:
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
/**
|
||||
* The rule data for this BreakIterator instance.
|
||||
* Not for general use; Public only for testing purposes.
|
||||
* @internal
|
||||
*/
|
||||
RBBIDataWrapper *fData = nullptr;
|
||||
|
||||
private:
|
||||
/**
|
||||
* The saved error code associated with this break iterator.
|
||||
* This is the value to be returned by copyErrorTo().
|
||||
*/
|
||||
UErrorCode fErrorCode = U_ZERO_ERROR;
|
||||
|
||||
/**
|
||||
* The current position of the iterator. Pinned, 0 < fPosition <= text.length.
|
||||
* Never has the value UBRK_DONE (-1).
|
||||
*/
|
||||
int32_t fPosition = 0;
|
||||
|
||||
/**
|
||||
* TODO:
|
||||
*/
|
||||
int32_t fRuleStatusIndex = 0;
|
||||
|
||||
/**
|
||||
* Cache of previously determined boundary positions.
|
||||
*/
|
||||
class BreakCache;
|
||||
BreakCache *fBreakCache = nullptr;
|
||||
|
||||
/**
|
||||
* Cache of boundary positions within a region of text that has been
|
||||
* sub-divided by dictionary based breaking.
|
||||
*/
|
||||
class DictionaryCache;
|
||||
DictionaryCache *fDictionaryCache = nullptr;
|
||||
|
||||
/**
|
||||
*
|
||||
* If present, UStack of LanguageBreakEngine objects that might handle
|
||||
* dictionary characters. Searched from top to bottom to find an object to
|
||||
* handle a given character.
|
||||
* @internal (private)
|
||||
*/
|
||||
UStack *fLanguageBreakEngines = nullptr;
|
||||
|
||||
/**
|
||||
*
|
||||
* If present, the special LanguageBreakEngine used for handling
|
||||
* characters that are in the dictionary set, but not handled by any
|
||||
* LanguageBreakEngine.
|
||||
* @internal (private)
|
||||
*/
|
||||
UnhandledEngine *fUnhandledBreakEngine = nullptr;
|
||||
|
||||
/**
|
||||
* Counter for the number of characters encountered with the "dictionary"
|
||||
* flag set.
|
||||
* @internal (private)
|
||||
*/
|
||||
uint32_t fDictionaryCharCount = 0;
|
||||
|
||||
/**
|
||||
* A character iterator that refers to the same text as the UText, above.
|
||||
* Only included for compatibility with old API, which was based on CharacterIterators.
|
||||
* Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
|
||||
*/
|
||||
CharacterIterator *fCharIter = &fSCharIter;
|
||||
|
||||
/**
|
||||
* When the input text is provided by a UnicodeString, this will point to
|
||||
* a characterIterator that wraps that data. Needed only for the
|
||||
* implementation of getText(), a backwards compatibility issue.
|
||||
*/
|
||||
UCharCharacterIterator fSCharIter {u"", 0};
|
||||
|
||||
/**
|
||||
* True when iteration has run off the end, and iterator functions should return UBRK_DONE.
|
||||
*/
|
||||
bool fDone = false;
|
||||
|
||||
/**
|
||||
* Array of look-ahead tentative results.
|
||||
*/
|
||||
int32_t *fLookAheadMatches = nullptr;
|
||||
|
||||
/**
|
||||
* A flag to indicate if phrase based breaking is enabled.
|
||||
*/
|
||||
UBool fIsPhraseBreaking = false;
|
||||
|
||||
//=======================================================================
|
||||
// constructors
|
||||
//=======================================================================
|
||||
|
||||
/**
|
||||
* Constructor from a flattened set of RBBI data in malloced memory.
|
||||
* RulesBasedBreakIterators built from a custom set of rules
|
||||
* are created via this constructor; the rules are compiled
|
||||
* into memory, then the break iterator is constructed here.
|
||||
*
|
||||
* The break iterator adopts the memory, and will
|
||||
* free it when done.
|
||||
* @internal (private)
|
||||
*/
|
||||
RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* This constructor uses the udata interface to create a BreakIterator
|
||||
* whose internal tables live in a memory-mapped file. "image" is an
|
||||
* ICU UDataMemory handle for the pre-compiled break iterator tables.
|
||||
* @param image handle to the memory image for the break iterator data.
|
||||
* Ownership of the UDataMemory handle passes to the Break Iterator,
|
||||
* which will be responsible for closing it when it is no longer needed.
|
||||
* @param status Information on any errors encountered.
|
||||
* @param isPhraseBreaking true if phrase based breaking is required, otherwise false.
|
||||
* @see udata_open
|
||||
* @see #getBinaryRules
|
||||
* @internal (private)
|
||||
*/
|
||||
RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status);
|
||||
|
||||
/** @internal */
|
||||
friend class RBBIRuleBuilder;
|
||||
/** @internal */
|
||||
friend class BreakIterator;
|
||||
|
||||
/**
|
||||
* Default constructor with an error code parameter.
|
||||
* Aside from error handling, otherwise identical to the default constructor.
|
||||
* Internally, handles common initialization for other constructors.
|
||||
* @internal (private)
|
||||
*/
|
||||
RuleBasedBreakIterator(UErrorCode *status);
|
||||
|
||||
public:
|
||||
|
||||
/** Default constructor. Creates an empty shell of an iterator, with no
|
||||
* rules or text to iterate over. Object can subsequently be assigned to,
|
||||
* but is otherwise unusable.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
RuleBasedBreakIterator();
|
||||
|
||||
/**
|
||||
* Copy constructor. Will produce a break iterator with the same behavior,
|
||||
* and which iterates over the same text, as the one passed in.
|
||||
* @param that The RuleBasedBreakIterator passed to be copied
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
|
||||
|
||||
/**
|
||||
* Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
|
||||
* @param rules The break rules to be used.
|
||||
* @param parseError In the event of a syntax error in the rules, provides the location
|
||||
* within the rules of the problem.
|
||||
* @param status Information on any errors encountered.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
RuleBasedBreakIterator( const UnicodeString &rules,
|
||||
UParseError &parseError,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
|
||||
* Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules().
|
||||
* Construction of a break iterator in this way is substantially faster than
|
||||
* construction from source rules.
|
||||
*
|
||||
* Ownership of the storage containing the compiled rules remains with the
|
||||
* caller of this function. The compiled rules must not be modified or
|
||||
* deleted during the life of the break iterator.
|
||||
*
|
||||
* The compiled rules are not compatible across different major versions of ICU.
|
||||
* The compiled rules are compatible only between machines with the same
|
||||
* byte ordering (little or big endian) and the same base character set family
|
||||
* (ASCII or EBCDIC).
|
||||
*
|
||||
* @see #getBinaryRules
|
||||
* @param compiledRules A pointer to the compiled break rules to be used.
|
||||
* @param ruleLength The length of the compiled break rules, in bytes. This
|
||||
* corresponds to the length value produced by getBinaryRules().
|
||||
* @param status Information on any errors encountered, including invalid
|
||||
* binary rules.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
RuleBasedBreakIterator(const uint8_t *compiledRules,
|
||||
uint32_t ruleLength,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* This constructor uses the udata interface to create a BreakIterator
|
||||
* whose internal tables live in a memory-mapped file. "image" is an
|
||||
* ICU UDataMemory handle for the pre-compiled break iterator tables.
|
||||
* @param image handle to the memory image for the break iterator data.
|
||||
* Ownership of the UDataMemory handle passes to the Break Iterator,
|
||||
* which will be responsible for closing it when it is no longer needed.
|
||||
* @param status Information on any errors encountered.
|
||||
* @see udata_open
|
||||
* @see #getBinaryRules
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Destructor
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~RuleBasedBreakIterator();
|
||||
|
||||
/**
|
||||
* Assignment operator. Sets this iterator to have the same behavior,
|
||||
* and iterate over the same text, as the one passed in.
|
||||
* @param that The RuleBasedBreakItertor passed in
|
||||
* @return the newly created RuleBasedBreakIterator
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
|
||||
|
||||
/**
|
||||
* Equality operator. Returns true if both BreakIterators are of the
|
||||
* same class, have the same behavior, and iterate over the same text.
|
||||
* @param that The BreakIterator to be compared for equality
|
||||
* @return true if both BreakIterators are of the
|
||||
* same class, have the same behavior, and iterate over the same text.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual bool operator==(const BreakIterator& that) const override;
|
||||
|
||||
/**
|
||||
* Not-equal operator. If operator== returns true, this returns false,
|
||||
* and vice versa.
|
||||
* @param that The BreakIterator to be compared for inequality
|
||||
* @return true if both BreakIterators are not same.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline bool operator!=(const BreakIterator& that) const {
|
||||
return !operator==(that);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a newly-constructed RuleBasedBreakIterator with the same
|
||||
* behavior, and iterating over the same text, as this one.
|
||||
* Differs from the copy constructor in that it is polymorphic, and
|
||||
* will correctly clone (copy) a derived class.
|
||||
* clone() is thread safe. Multiple threads may simultaneously
|
||||
* clone the same source break iterator.
|
||||
* @return a newly-constructed RuleBasedBreakIterator
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual RuleBasedBreakIterator* clone() const override;
|
||||
|
||||
/**
|
||||
* Compute a hash code for this BreakIterator
|
||||
* @return A hash code
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t hashCode() const;
|
||||
|
||||
/**
|
||||
* Returns the description used to create this iterator
|
||||
* @return the description used to create this iterator
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual const UnicodeString& getRules() const;
|
||||
|
||||
//=======================================================================
|
||||
// BreakIterator overrides
|
||||
//=======================================================================
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Return a CharacterIterator over the text being analyzed.
|
||||
* The returned character iterator is owned by the break iterator, and must
|
||||
* not be deleted by the caller. Repeated calls to this function may
|
||||
* return the same CharacterIterator.
|
||||
* </p>
|
||||
* <p>
|
||||
* The returned character iterator must not be used concurrently with
|
||||
* the break iterator. If concurrent operation is needed, clone the
|
||||
* returned character iterator first and operate on the clone.
|
||||
* </p>
|
||||
* <p>
|
||||
* When the break iterator is operating on text supplied via a UText,
|
||||
* this function will fail, returning a CharacterIterator containing no text.
|
||||
* The function getUText() provides similar functionality,
|
||||
* is reliable, and is more efficient.
|
||||
* </p>
|
||||
*
|
||||
* TODO: deprecate this function?
|
||||
*
|
||||
* @return An iterator over the text being analyzed.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual CharacterIterator& getText() const override;
|
||||
|
||||
/**
|
||||
* Get a UText for the text being analyzed.
|
||||
* The returned UText is a shallow clone of the UText used internally
|
||||
* by the break iterator implementation. It can safely be used to
|
||||
* access the text without impacting any break iterator operations,
|
||||
* but the underlying text itself must not be altered.
|
||||
*
|
||||
* @param fillIn A UText to be filled in. If nullptr, a new UText will be
|
||||
* allocated to hold the result.
|
||||
* @param status receives any error codes.
|
||||
* @return The current UText for this break iterator. If an input
|
||||
* UText was provided, it will always be returned.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
virtual UText *getUText(UText *fillIn, UErrorCode &status) const override;
|
||||
|
||||
/**
|
||||
* Set the iterator to analyze a new piece of text. This function resets
|
||||
* the current iteration position to the beginning of the text.
|
||||
* @param newText An iterator over the text to analyze. The BreakIterator
|
||||
* takes ownership of the character iterator. The caller MUST NOT delete it!
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void adoptText(CharacterIterator* newText) override;
|
||||
|
||||
/**
|
||||
* Set the iterator to analyze a new piece of text. This function resets
|
||||
* the current iteration position to the beginning of the text.
|
||||
*
|
||||
* The BreakIterator will retain a reference to the supplied string.
|
||||
* The caller must not modify or delete the text while the BreakIterator
|
||||
* retains the reference.
|
||||
*
|
||||
* @param newText The text to analyze.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void setText(const UnicodeString& newText) override;
|
||||
|
||||
/**
|
||||
* Reset the break iterator to operate over the text represented by
|
||||
* the UText. The iterator position is reset to the start.
|
||||
*
|
||||
* This function makes a shallow clone of the supplied UText. This means
|
||||
* that the caller is free to immediately close or otherwise reuse the
|
||||
* Utext that was passed as a parameter, but that the underlying text itself
|
||||
* must not be altered while being referenced by the break iterator.
|
||||
*
|
||||
* @param text The UText used to change the text.
|
||||
* @param status Receives any error codes.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
virtual void setText(UText *text, UErrorCode &status) override;
|
||||
|
||||
/**
|
||||
* Sets the current iteration position to the beginning of the text, position zero.
|
||||
* @return The offset of the beginning of the text, zero.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t first() override;
|
||||
|
||||
/**
|
||||
* Sets the current iteration position to the end of the text.
|
||||
* @return The text's past-the-end offset.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t last() override;
|
||||
|
||||
/**
|
||||
* Advances the iterator either forward or backward the specified number of steps.
|
||||
* Negative values move backward, and positive values move forward. This is
|
||||
* equivalent to repeatedly calling next() or previous().
|
||||
* @param n The number of steps to move. The sign indicates the direction
|
||||
* (negative is backwards, and positive is forwards).
|
||||
* @return The character offset of the boundary position n boundaries away from
|
||||
* the current one.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t next(int32_t n) override;
|
||||
|
||||
/**
|
||||
* Advances the iterator to the next boundary position.
|
||||
* @return The position of the first boundary after this one.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t next() override;
|
||||
|
||||
/**
|
||||
* Moves the iterator backwards, to the last boundary preceding this one.
|
||||
* @return The position of the last boundary position preceding this one.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t previous() override;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first boundary position following
|
||||
* the specified position.
|
||||
* @param offset The position from which to begin searching for a break position.
|
||||
* @return The position of the first break after the current position.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t following(int32_t offset) override;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last boundary position before the
|
||||
* specified position.
|
||||
* @param offset The position to begin searching for a break from.
|
||||
* @return The position of the last boundary before the starting position.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t preceding(int32_t offset) override;
|
||||
|
||||
/**
|
||||
* Returns true if the specified position is a boundary position. As a side
|
||||
* effect, leaves the iterator pointing to the first boundary position at
|
||||
* or after "offset".
|
||||
* @param offset the offset to check.
|
||||
* @return True if "offset" is a boundary position.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool isBoundary(int32_t offset) override;
|
||||
|
||||
/**
|
||||
* Returns the current iteration position. Note that UBRK_DONE is never
|
||||
* returned from this function; if iteration has run to the end of a
|
||||
* string, current() will return the length of the string while
|
||||
* next() will return UBRK_DONE).
|
||||
* @return The current iteration position.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t current() const override;
|
||||
|
||||
/**
|
||||
* Return the status tag from the break rule that determined the boundary at
|
||||
* the current iteration position. For break rules that do not specify a
|
||||
* status, a default value of 0 is returned. If more than one break rule
|
||||
* would cause a boundary to be located at some position in the text,
|
||||
* the numerically largest of the applicable status values is returned.
|
||||
* <p>
|
||||
* Of the standard types of ICU break iterators, only word break and
|
||||
* line break provide status values. The values are defined in
|
||||
* the header file ubrk.h. For Word breaks, the status allows distinguishing between words
|
||||
* that contain alphabetic letters, "words" that appear to be numbers,
|
||||
* punctuation and spaces, words containing ideographic characters, and
|
||||
* more. For Line Break, the status distinguishes between hard (mandatory) breaks
|
||||
* and soft (potential) break positions.
|
||||
* <p>
|
||||
* <code>getRuleStatus()</code> can be called after obtaining a boundary
|
||||
* position from <code>next()</code>, <code>previous()</code>, or
|
||||
* any other break iterator functions that returns a boundary position.
|
||||
* <p>
|
||||
* Note that <code>getRuleStatus()</code> returns the value corresponding to
|
||||
* <code>current()</code> index even after <code>next()</code> has returned DONE.
|
||||
* <p>
|
||||
* When creating custom break rules, one is free to define whatever
|
||||
* status values may be convenient for the application.
|
||||
* <p>
|
||||
* @return the status from the break rule that determined the boundary
|
||||
* at the current iteration position.
|
||||
*
|
||||
* @see UWordBreak
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
virtual int32_t getRuleStatus() const override;
|
||||
|
||||
/**
|
||||
* Get the status (tag) values from the break rule(s) that determined the boundary
|
||||
* at the current iteration position.
|
||||
* <p>
|
||||
* The returned status value(s) are stored into an array provided by the caller.
|
||||
* The values are stored in sorted (ascending) order.
|
||||
* If the capacity of the output array is insufficient to hold the data,
|
||||
* the output will be truncated to the available length, and a
|
||||
* U_BUFFER_OVERFLOW_ERROR will be signaled.
|
||||
*
|
||||
* @param fillInVec an array to be filled in with the status values.
|
||||
* @param capacity the length of the supplied vector. A length of zero causes
|
||||
* the function to return the number of status values, in the
|
||||
* normal way, without attempting to store any values.
|
||||
* @param status receives error codes.
|
||||
* @return The number of rule status values from the rules that determined
|
||||
* the boundary at the current iteration position.
|
||||
* In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
|
||||
* is the total number of status values that were available,
|
||||
* not the reduced number that were actually returned.
|
||||
* @see getRuleStatus
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) override;
|
||||
|
||||
/**
|
||||
* Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
|
||||
* This method is to implement a simple version of RTTI, since not all
|
||||
* C++ compilers support genuine RTTI. Polymorphic operator==() and
|
||||
* clone() methods call this method.
|
||||
*
|
||||
* @return The class ID for this object. All objects of a
|
||||
* given class have the same class ID. Objects of
|
||||
* other classes have different class IDs.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const override;
|
||||
|
||||
/**
|
||||
* Returns the class ID for this class. This is useful only for
|
||||
* comparing to a return value from getDynamicClassID(). For example:
|
||||
*
|
||||
* Base* polymorphic_pointer = createPolymorphicObject();
|
||||
* if (polymorphic_pointer->getDynamicClassID() ==
|
||||
* Derived::getStaticClassID()) ...
|
||||
*
|
||||
* @return The class ID for all objects of this class.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
#ifndef U_FORCE_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Deprecated functionality. Use clone() instead.
|
||||
*
|
||||
* Create a clone (copy) of this break iterator in memory provided
|
||||
* by the caller. The idea is to increase performance by avoiding
|
||||
* a storage allocation. Use of this function is NOT RECOMMENDED.
|
||||
* Performance gains are minimal, and correct buffer management is
|
||||
* tricky. Use clone() instead.
|
||||
*
|
||||
* @param stackBuffer The pointer to the memory into which the cloned object
|
||||
* should be placed. If nullptr, allocate heap memory
|
||||
* for the cloned object.
|
||||
* @param BufferSize The size of the buffer. If zero, return the required
|
||||
* buffer size, but do not clone the object. If the
|
||||
* size was too small (but not zero), allocate heap
|
||||
* storage for the cloned object.
|
||||
*
|
||||
* @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be
|
||||
* returned if the provided buffer was too small, and
|
||||
* the clone was therefore put on the heap.
|
||||
*
|
||||
* @return Pointer to the clone object. This may differ from the stackBuffer
|
||||
* address if the byte alignment of the stack buffer was not suitable
|
||||
* or if the stackBuffer was too small to hold the clone.
|
||||
* @deprecated ICU 52. Use clone() instead.
|
||||
*/
|
||||
virtual RuleBasedBreakIterator *createBufferClone(void *stackBuffer,
|
||||
int32_t &BufferSize,
|
||||
UErrorCode &status) override;
|
||||
#endif // U_FORCE_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* Return the binary form of compiled break rules,
|
||||
* which can then be used to create a new break iterator at some
|
||||
* time in the future. Creating a break iterator from pre-compiled rules
|
||||
* is much faster than building one from the source form of the
|
||||
* break rules.
|
||||
*
|
||||
* The binary data can only be used with the same version of ICU
|
||||
* and on the same platform type (processor endian-ness)
|
||||
*
|
||||
* @param length Returns the length of the binary data. (Out parameter.)
|
||||
*
|
||||
* @return A pointer to the binary (compiled) rule data. The storage
|
||||
* belongs to the RulesBasedBreakIterator object, not the
|
||||
* caller, and must not be modified or deleted.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
virtual const uint8_t *getBinaryRules(uint32_t &length);
|
||||
|
||||
/**
|
||||
* Set the subject text string upon which the break iterator is operating
|
||||
* without changing any other aspect of the matching state.
|
||||
* The new and previous text strings must have the same content.
|
||||
*
|
||||
* This function is intended for use in environments where ICU is operating on
|
||||
* strings that may move around in memory. It provides a mechanism for notifying
|
||||
* ICU that the string has been relocated, and providing a new UText to access the
|
||||
* string in its new position.
|
||||
*
|
||||
* Note that the break iterator implementation never copies the underlying text
|
||||
* of a string being processed, but always operates directly on the original text
|
||||
* provided by the user. Refreshing simply drops the references to the old text
|
||||
* and replaces them with references to the new.
|
||||
*
|
||||
* Caution: this function is normally used only by very specialized,
|
||||
* system-level code. One example use case is with garbage collection that moves
|
||||
* the text in memory.
|
||||
*
|
||||
* @param input The new (moved) text string.
|
||||
* @param status Receives errors detected by this function.
|
||||
* @return *this
|
||||
*
|
||||
* @stable ICU 49
|
||||
*/
|
||||
virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status) override;
|
||||
|
||||
|
||||
private:
|
||||
//=======================================================================
|
||||
// implementation
|
||||
//=======================================================================
|
||||
/**
|
||||
* Iterate backwards from an arbitrary position in the input text using the
|
||||
* synthesized Safe Reverse rules.
|
||||
* This locates a "Safe Position" from which the forward break rules
|
||||
* will operate correctly. A Safe Position is not necessarily a boundary itself.
|
||||
*
|
||||
* @param fromPosition the position in the input text to begin the iteration.
|
||||
* @internal (private)
|
||||
*/
|
||||
int32_t handleSafePrevious(int32_t fromPosition);
|
||||
|
||||
/**
|
||||
* Find a rule-based boundary by running the state machine.
|
||||
* Input
|
||||
* fPosition, the position in the text to begin from.
|
||||
* Output
|
||||
* fPosition: the boundary following the starting position.
|
||||
* fDictionaryCharCount the number of dictionary characters encountered.
|
||||
* If > 0, the segment will be further subdivided
|
||||
* fRuleStatusIndex Info from the state table indicating which rules caused the boundary.
|
||||
*
|
||||
* @internal (private)
|
||||
*/
|
||||
int32_t handleNext();
|
||||
|
||||
/*
|
||||
* Templatized version of handleNext() and handleSafePrevious().
|
||||
*
|
||||
* There will be exactly four instantiations, two each for 8 and 16 bit tables,
|
||||
* two each for 8 and 16 bit trie.
|
||||
* Having separate instantiations for the table types keeps conditional tests of
|
||||
* the table type out of the inner loops, at the expense of replicated code.
|
||||
*
|
||||
* The template parameter for the Trie access function is a value, not a type.
|
||||
* Doing it this way, the compiler will inline the Trie function in the
|
||||
* expanded functions. (Both the 8 and 16 bit access functions have the same type
|
||||
* signature)
|
||||
*/
|
||||
|
||||
typedef uint16_t (*PTrieFunc)(const UCPTrie *, UChar32);
|
||||
|
||||
template<typename RowType, PTrieFunc trieFunc>
|
||||
int32_t handleSafePrevious(int32_t fromPosition);
|
||||
|
||||
template<typename RowType, PTrieFunc trieFunc>
|
||||
int32_t handleNext();
|
||||
|
||||
|
||||
/**
|
||||
* This function returns the appropriate LanguageBreakEngine for a
|
||||
* given character c.
|
||||
* @param c A character in the dictionary set
|
||||
* @param locale The locale.
|
||||
* @internal (private)
|
||||
*/
|
||||
const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c, const char* locale);
|
||||
|
||||
public:
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Debugging function only.
|
||||
* @internal
|
||||
*/
|
||||
void dumpCache();
|
||||
|
||||
/**
|
||||
* Debugging function only.
|
||||
* @internal
|
||||
*/
|
||||
void dumpTables();
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Register a new external break engine. The external break engine will be adopted.
|
||||
* Because ICU may choose to cache break engine internally, this must
|
||||
* be called at application startup, prior to any calls to
|
||||
* object methods of RuleBasedBreakIterator to avoid undefined behavior.
|
||||
* @param toAdopt the ExternalBreakEngine instance to be adopted
|
||||
* @param status the in/out status code, no special meanings are assigned
|
||||
* @internal ICU 74 technology preview
|
||||
*/
|
||||
static void U_EXPORT2 registerExternalBreakEngine(
|
||||
ExternalBreakEngine* toAdopt, UErrorCode& status);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
};
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
266
thirdparty/icu4c/common/unicode/rep.h
vendored
Normal file
266
thirdparty/icu4c/common/unicode/rep.h
vendored
Normal file
@@ -0,0 +1,266 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**************************************************************************
|
||||
* Copyright (C) 1999-2012, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
**************************************************************************
|
||||
* Date Name Description
|
||||
* 11/17/99 aliu Creation. Ported from java. Modified to
|
||||
* match current UnicodeString API. Forced
|
||||
* to use name "handleReplaceBetween" because
|
||||
* of existing methods in UnicodeString.
|
||||
**************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef REP_H
|
||||
#define REP_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Replaceable String
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeString;
|
||||
|
||||
/**
|
||||
* <code>Replaceable</code> is an abstract base class representing a
|
||||
* string of characters that supports the replacement of a range of
|
||||
* itself with a new string of characters. It is used by APIs that
|
||||
* change a piece of text while retaining metadata. Metadata is data
|
||||
* other than the Unicode characters returned by char32At(). One
|
||||
* example of metadata is style attributes; another is an edit
|
||||
* history, marking each character with an author and revision number.
|
||||
*
|
||||
* <p>An implicit aspect of the <code>Replaceable</code> API is that
|
||||
* during a replace operation, new characters take on the metadata of
|
||||
* the old characters. For example, if the string "the <b>bold</b>
|
||||
* font" has range (4, 8) replaced with "strong", then it becomes "the
|
||||
* <b>strong</b> font".
|
||||
*
|
||||
* <p><code>Replaceable</code> specifies ranges using a start
|
||||
* offset and a limit offset. The range of characters thus specified
|
||||
* includes the characters at offset start..limit-1. That is, the
|
||||
* start offset is inclusive, and the limit offset is exclusive.
|
||||
*
|
||||
* <p><code>Replaceable</code> also includes API to access characters
|
||||
* in the string: <code>length()</code>, <code>charAt()</code>,
|
||||
* <code>char32At()</code>, and <code>extractBetween()</code>.
|
||||
*
|
||||
* <p>For a subclass to support metadata, typical behavior of
|
||||
* <code>replace()</code> is the following:
|
||||
* <ul>
|
||||
* <li>Set the metadata of the new text to the metadata of the first
|
||||
* character replaced</li>
|
||||
* <li>If no characters are replaced, use the metadata of the
|
||||
* previous character</li>
|
||||
* <li>If there is no previous character (i.e. start == 0), use the
|
||||
* following character</li>
|
||||
* <li>If there is no following character (i.e. the replaceable was
|
||||
* empty), use default metadata.<br>
|
||||
* <li>If the code point U+FFFF is seen, it should be interpreted as
|
||||
* a special marker having no metadata<li>
|
||||
* </li>
|
||||
* </ul>
|
||||
* If this is not the behavior, the subclass should document any differences.
|
||||
* @author Alan Liu
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API Replaceable : public UObject {
|
||||
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~Replaceable();
|
||||
|
||||
/**
|
||||
* Returns the number of 16-bit code units in the text.
|
||||
* @return number of 16-bit code units in text
|
||||
* @stable ICU 1.8
|
||||
*/
|
||||
inline int32_t length() const;
|
||||
|
||||
/**
|
||||
* Returns the 16-bit code unit at the given offset into the text.
|
||||
* @param offset an integer between 0 and <code>length()</code>-1
|
||||
* inclusive
|
||||
* @return 16-bit code unit of text at given offset
|
||||
* @stable ICU 1.8
|
||||
*/
|
||||
inline char16_t charAt(int32_t offset) const;
|
||||
|
||||
/**
|
||||
* Returns the 32-bit code point at the given 16-bit offset into
|
||||
* the text. This assumes the text is stored as 16-bit code units
|
||||
* with surrogate pairs intermixed. If the offset of a leading or
|
||||
* trailing code unit of a surrogate pair is given, return the
|
||||
* code point of the surrogate pair.
|
||||
*
|
||||
* @param offset an integer between 0 and <code>length()</code>-1
|
||||
* inclusive
|
||||
* @return 32-bit code point of text at given offset
|
||||
* @stable ICU 1.8
|
||||
*/
|
||||
inline UChar32 char32At(int32_t offset) const;
|
||||
|
||||
/**
|
||||
* Copies characters in the range [<tt>start</tt>, <tt>limit</tt>)
|
||||
* into the UnicodeString <tt>target</tt>.
|
||||
* @param start offset of first character which will be copied
|
||||
* @param limit offset immediately following the last character to
|
||||
* be copied
|
||||
* @param target UnicodeString into which to copy characters.
|
||||
* @return A reference to <TT>target</TT>
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
virtual void extractBetween(int32_t start,
|
||||
int32_t limit,
|
||||
UnicodeString& target) const = 0;
|
||||
|
||||
/**
|
||||
* Replaces a substring of this object with the given text. If the
|
||||
* characters being replaced have metadata, the new characters
|
||||
* that replace them should be given the same metadata.
|
||||
*
|
||||
* <p>Subclasses must ensure that if the text between start and
|
||||
* limit is equal to the replacement text, that replace has no
|
||||
* effect. That is, any metadata
|
||||
* should be unaffected. In addition, subclasses are encouraged to
|
||||
* check for initial and trailing identical characters, and make a
|
||||
* smaller replacement if possible. This will preserve as much
|
||||
* metadata as possible.
|
||||
* @param start the beginning index, inclusive; <code>0 <= start
|
||||
* <= limit</code>.
|
||||
* @param limit the ending index, exclusive; <code>start <= limit
|
||||
* <= length()</code>.
|
||||
* @param text the text to replace characters <code>start</code>
|
||||
* to <code>limit - 1</code>
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void handleReplaceBetween(int32_t start,
|
||||
int32_t limit,
|
||||
const UnicodeString& text) = 0;
|
||||
// Note: All other methods in this class take the names of
|
||||
// existing UnicodeString methods. This method is the exception.
|
||||
// It is named differently because all replace methods of
|
||||
// UnicodeString return a UnicodeString&. The 'between' is
|
||||
// required in order to conform to the UnicodeString naming
|
||||
// convention; API taking start/length are named <operation>, and
|
||||
// those taking start/limit are named <operationBetween>. The
|
||||
// 'handle' is added because 'replaceBetween' and
|
||||
// 'doReplaceBetween' are already taken.
|
||||
|
||||
/**
|
||||
* Copies a substring of this object, retaining metadata.
|
||||
* This method is used to duplicate or reorder substrings.
|
||||
* The destination index must not overlap the source range.
|
||||
*
|
||||
* @param start the beginning index, inclusive; <code>0 <= start <=
|
||||
* limit</code>.
|
||||
* @param limit the ending index, exclusive; <code>start <= limit <=
|
||||
* length()</code>.
|
||||
* @param dest the destination index. The characters from
|
||||
* <code>start..limit-1</code> will be copied to <code>dest</code>.
|
||||
* Implementations of this method may assume that <code>dest <= start ||
|
||||
* dest >= limit</code>.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0;
|
||||
|
||||
/**
|
||||
* Returns true if this object contains metadata. If a
|
||||
* Replaceable object has metadata, calls to the Replaceable API
|
||||
* must be made so as to preserve metadata. If it does not, calls
|
||||
* to the Replaceable API may be optimized to improve performance.
|
||||
* The default implementation returns true.
|
||||
* @return true if this object contains metadata
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
virtual UBool hasMetaData() const;
|
||||
|
||||
/**
|
||||
* Clone this object, an instance of a subclass of Replaceable.
|
||||
* Clones can be used concurrently in multiple threads.
|
||||
* If a subclass does not implement clone(), or if an error occurs,
|
||||
* then nullptr is returned.
|
||||
* The caller must delete the clone.
|
||||
*
|
||||
* @return a clone of this object
|
||||
*
|
||||
* @see getDynamicClassID
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
virtual Replaceable *clone() const;
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Default constructor.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
inline Replaceable();
|
||||
|
||||
/*
|
||||
* Assignment operator not declared. The compiler will provide one
|
||||
* which does nothing since this class does not contain any data members.
|
||||
* API/code coverage may show the assignment operator as present and
|
||||
* untested - ignore.
|
||||
* Subclasses need this assignment operator if they use compiler-provided
|
||||
* assignment operators of their own. An alternative to not declaring one
|
||||
* here would be to declare and empty-implement a protected or public one.
|
||||
Replaceable &Replaceable::operator=(const Replaceable &);
|
||||
*/
|
||||
|
||||
/**
|
||||
* Virtual version of length().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual int32_t getLength() const = 0;
|
||||
|
||||
/**
|
||||
* Virtual version of charAt().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual char16_t getCharAt(int32_t offset) const = 0;
|
||||
|
||||
/**
|
||||
* Virtual version of char32At().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UChar32 getChar32At(int32_t offset) const = 0;
|
||||
};
|
||||
|
||||
inline Replaceable::Replaceable() {}
|
||||
|
||||
inline int32_t
|
||||
Replaceable::length() const {
|
||||
return getLength();
|
||||
}
|
||||
|
||||
inline char16_t
|
||||
Replaceable::charAt(int32_t offset) const {
|
||||
return getCharAt(offset);
|
||||
}
|
||||
|
||||
inline UChar32
|
||||
Replaceable::char32At(int32_t offset) const {
|
||||
return getChar32At(offset);
|
||||
}
|
||||
|
||||
// There is no rep.cpp, see unistr.cpp for Replaceable function implementations.
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
489
thirdparty/icu4c/common/unicode/resbund.h
vendored
Normal file
489
thirdparty/icu4c/common/unicode/resbund.h
vendored
Normal file
@@ -0,0 +1,489 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1996-2013, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* File resbund.h
|
||||
*
|
||||
* CREATED BY
|
||||
* Richard Gillam
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 2/5/97 aliu Added scanForLocaleInFile. Added
|
||||
* constructor which attempts to read resource bundle
|
||||
* from a specific file, without searching other files.
|
||||
* 2/11/97 aliu Added UErrorCode return values to constructors. Fixed
|
||||
* infinite loops in scanForFile and scanForLocale.
|
||||
* Modified getRawResourceData to not delete storage
|
||||
* in localeData and resourceData which it doesn't own.
|
||||
* Added Mac compatibility #ifdefs for tellp() and
|
||||
* ios::nocreate.
|
||||
* 2/18/97 helena Updated with 100% documentation coverage.
|
||||
* 3/13/97 aliu Rewrote to load in entire resource bundle and store
|
||||
* it as a Hashtable of ResourceBundleData objects.
|
||||
* Added state table to govern parsing of files.
|
||||
* Modified to load locale index out of new file
|
||||
* distinct from default.txt.
|
||||
* 3/25/97 aliu Modified to support 2-d arrays, needed for timezone
|
||||
* data. Added support for custom file suffixes. Again,
|
||||
* needed to support timezone data.
|
||||
* 4/7/97 aliu Cleaned up.
|
||||
* 03/02/99 stephen Removed dependency on FILE*.
|
||||
* 03/29/99 helena Merged Bertrand and Stephen's changes.
|
||||
* 06/11/99 stephen Removed parsing of .txt files.
|
||||
* Reworked to use new binary format.
|
||||
* Cleaned up.
|
||||
* 06/14/99 stephen Removed methods taking a filename suffix.
|
||||
* 11/09/99 weiv Added getLocale(), fRealLocale, removed fRealLocaleID
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef RESBUND_H
|
||||
#define RESBUND_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/locid.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Resource Bundle
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* A class representing a collection of resource information pertaining to a given
|
||||
* locale. A resource bundle provides a way of accessing locale- specific information in
|
||||
* a data file. You create a resource bundle that manages the resources for a given
|
||||
* locale and then ask it for individual resources.
|
||||
* <P>
|
||||
* Resource bundles in ICU4C are currently defined using text files which conform to the following
|
||||
* <a href="https://github.com/unicode-org/icu-docs/blob/main/design/bnf_rb.txt">BNF definition</a>.
|
||||
* More on resource bundle concepts and syntax can be found in the
|
||||
* <a href="https://unicode-org.github.io/icu/userguide/locale/resources">Users Guide</a>.
|
||||
* <P>
|
||||
*
|
||||
* The ResourceBundle class is not suitable for subclassing.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API ResourceBundle : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @param packageName The packageName and locale together point to an ICU udata object,
|
||||
* as defined by <code> udata_open( packageName, "res", locale, err) </code>
|
||||
* or equivalent. Typically, packageName will refer to a (.dat) file, or to
|
||||
* a package registered with udata_setAppData(). Using a full file or directory
|
||||
* pathname for packageName is deprecated.
|
||||
* @param locale This is the locale this resource bundle is for. To get resources
|
||||
* for the French locale, for example, you would create a
|
||||
* ResourceBundle passing Locale::FRENCH for the "locale" parameter,
|
||||
* and all subsequent calls to that resource bundle will return
|
||||
* resources that pertain to the French locale. If the caller doesn't
|
||||
* pass a locale parameter, the default locale for the system (as
|
||||
* returned by Locale::getDefault()) will be used.
|
||||
* @param err The Error Code.
|
||||
* The UErrorCode& err parameter is used to return status information to the user. To
|
||||
* check whether the construction succeeded or not, you should check the value of
|
||||
* U_SUCCESS(err). If you wish more detailed information, you can check for
|
||||
* informational error results which still indicate success. U_USING_FALLBACK_WARNING
|
||||
* indicates that a fall back locale was used. For example, 'de_CH' was requested,
|
||||
* but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that
|
||||
* the default locale data was used; neither the requested locale nor any of its
|
||||
* fall back locales could be found.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ResourceBundle(const UnicodeString& packageName,
|
||||
const Locale& locale,
|
||||
UErrorCode& err);
|
||||
|
||||
/**
|
||||
* Construct a resource bundle for the default bundle in the specified package.
|
||||
*
|
||||
* @param packageName The packageName and locale together point to an ICU udata object,
|
||||
* as defined by <code> udata_open( packageName, "res", locale, err) </code>
|
||||
* or equivalent. Typically, packageName will refer to a (.dat) file, or to
|
||||
* a package registered with udata_setAppData(). Using a full file or directory
|
||||
* pathname for packageName is deprecated.
|
||||
* @param err A UErrorCode value
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ResourceBundle(const UnicodeString& packageName,
|
||||
UErrorCode& err);
|
||||
|
||||
/**
|
||||
* Construct a resource bundle for the ICU default bundle.
|
||||
*
|
||||
* @param err A UErrorCode value
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ResourceBundle(UErrorCode &err);
|
||||
|
||||
/**
|
||||
* Standard constructor, constructs a resource bundle for the locale-specific
|
||||
* bundle in the specified package.
|
||||
*
|
||||
* @param packageName The packageName and locale together point to an ICU udata object,
|
||||
* as defined by <code> udata_open( packageName, "res", locale, err) </code>
|
||||
* or equivalent. Typically, packageName will refer to a (.dat) file, or to
|
||||
* a package registered with udata_setAppData(). Using a full file or directory
|
||||
* pathname for packageName is deprecated.
|
||||
* nullptr is used to refer to ICU data.
|
||||
* @param locale The locale for which to open a resource bundle.
|
||||
* @param err A UErrorCode value
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ResourceBundle(const char* packageName,
|
||||
const Locale& locale,
|
||||
UErrorCode& err);
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*
|
||||
* @param original The resource bundle to copy.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ResourceBundle(const ResourceBundle &original);
|
||||
|
||||
/**
|
||||
* Constructor from a C UResourceBundle. The resource bundle is
|
||||
* copied and not adopted. ures_close will still need to be used on the
|
||||
* original resource bundle.
|
||||
*
|
||||
* @param res A pointer to the C resource bundle.
|
||||
* @param status A UErrorCode value.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ResourceBundle(UResourceBundle *res,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
*
|
||||
* @param other The resource bundle to copy.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ResourceBundle&
|
||||
operator=(const ResourceBundle& other);
|
||||
|
||||
/** Destructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~ResourceBundle();
|
||||
|
||||
/**
|
||||
* Clone this object.
|
||||
* Clones can be used concurrently in multiple threads.
|
||||
* If an error occurs, then nullptr is returned.
|
||||
* The caller must delete the clone.
|
||||
*
|
||||
* @return a clone of this object
|
||||
*
|
||||
* @see getDynamicClassID
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
ResourceBundle *clone() const;
|
||||
|
||||
/**
|
||||
* Returns the size of a resource. Size for scalar types is always 1, and for vector/table types is
|
||||
* the number of child resources.
|
||||
* @warning Integer array is treated as a scalar type. There are no
|
||||
* APIs to access individual members of an integer array. It
|
||||
* is always returned as a whole.
|
||||
*
|
||||
* @return number of resources in a given resource.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t getSize() const;
|
||||
|
||||
/**
|
||||
* returns a string from a string resource type
|
||||
*
|
||||
* @param status fills in the outgoing error code
|
||||
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
|
||||
* could be a warning
|
||||
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
|
||||
* @return a pointer to a zero-terminated char16_t array which lives in a memory mapped/DLL file.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UnicodeString
|
||||
getString(UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* returns a binary data from a resource. Can be used at most primitive resource types (binaries,
|
||||
* strings, ints)
|
||||
*
|
||||
* @param len fills in the length of resulting byte chunk
|
||||
* @param status fills in the outgoing error code
|
||||
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
|
||||
* could be a warning
|
||||
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
|
||||
* @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
const uint8_t*
|
||||
getBinary(int32_t& len, UErrorCode& status) const;
|
||||
|
||||
|
||||
/**
|
||||
* returns an integer vector from a resource.
|
||||
*
|
||||
* @param len fills in the length of resulting integer vector
|
||||
* @param status fills in the outgoing error code
|
||||
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
|
||||
* could be a warning
|
||||
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
|
||||
* @return a pointer to a vector of integers that lives in a memory mapped/DLL file.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
const int32_t*
|
||||
getIntVector(int32_t& len, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* returns an unsigned integer from a resource.
|
||||
* This integer is originally 28 bits.
|
||||
*
|
||||
* @param status fills in the outgoing error code
|
||||
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
|
||||
* could be a warning
|
||||
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
|
||||
* @return an unsigned integer value
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
uint32_t
|
||||
getUInt(UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* returns a signed integer from a resource.
|
||||
* This integer is originally 28 bit and the sign gets propagated.
|
||||
*
|
||||
* @param status fills in the outgoing error code
|
||||
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
|
||||
* could be a warning
|
||||
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
|
||||
* @return a signed integer value
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t
|
||||
getInt(UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Checks whether the resource has another element to iterate over.
|
||||
*
|
||||
* @return true if there are more elements, false if there is no more elements
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UBool hasNext() const;
|
||||
|
||||
/**
|
||||
* Resets the internal context of a resource so that iteration starts from the first element.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
void resetIterator();
|
||||
|
||||
/**
|
||||
* Returns the key associated with this resource. Not all the resources have a key - only
|
||||
* those that are members of a table.
|
||||
*
|
||||
* @return a key associated to this resource, or nullptr if it doesn't have a key
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
const char* getKey() const;
|
||||
|
||||
/**
|
||||
* Gets the locale ID of the resource bundle as a string.
|
||||
* Same as getLocale().getName() .
|
||||
*
|
||||
* @return the locale ID of the resource bundle as a string
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
const char* getName() const;
|
||||
|
||||
/**
|
||||
* Returns the type of a resource. Available types are defined in enum UResType
|
||||
*
|
||||
* @return type of the given resource.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UResType getType() const;
|
||||
|
||||
/**
|
||||
* Returns the next resource in a given resource or nullptr if there are no more resources
|
||||
*
|
||||
* @param status fills in the outgoing error code
|
||||
* @return ResourceBundle object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ResourceBundle
|
||||
getNext(UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Returns the next string in a resource or nullptr if there are no more resources
|
||||
* to iterate over.
|
||||
*
|
||||
* @param status fills in the outgoing error code
|
||||
* @return an UnicodeString object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UnicodeString
|
||||
getNextString(UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Returns the next string in a resource or nullptr if there are no more resources
|
||||
* to iterate over.
|
||||
*
|
||||
* @param key fill in for key associated with this string
|
||||
* @param status fills in the outgoing error code
|
||||
* @return an UnicodeString object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UnicodeString
|
||||
getNextString(const char ** key,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Returns the resource in a resource at the specified index.
|
||||
*
|
||||
* @param index an index to the wanted resource.
|
||||
* @param status fills in the outgoing error code
|
||||
* @return ResourceBundle object. If there is an error, resource is invalid.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ResourceBundle
|
||||
get(int32_t index,
|
||||
UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Returns the string in a given resource at the specified index.
|
||||
*
|
||||
* @param index an index to the wanted string.
|
||||
* @param status fills in the outgoing error code
|
||||
* @return an UnicodeString object. If there is an error, string is bogus
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UnicodeString
|
||||
getStringEx(int32_t index,
|
||||
UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Returns a resource in a resource that has a given key. This procedure works only with table
|
||||
* resources.
|
||||
*
|
||||
* @param key a key associated with the wanted resource
|
||||
* @param status fills in the outgoing error code.
|
||||
* @return ResourceBundle object. If there is an error, resource is invalid.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ResourceBundle
|
||||
get(const char* key,
|
||||
UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Returns a string in a resource that has a given key. This procedure works only with table
|
||||
* resources.
|
||||
*
|
||||
* @param key a key associated with the wanted string
|
||||
* @param status fills in the outgoing error code
|
||||
* @return an UnicodeString object. If there is an error, string is bogus
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UnicodeString
|
||||
getStringEx(const char* key,
|
||||
UErrorCode& status) const;
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Return the version number associated with this ResourceBundle as a string. Please
|
||||
* use getVersion, as this method is going to be deprecated.
|
||||
*
|
||||
* @return A version number string as specified in the resource bundle or its parent.
|
||||
* The caller does not own this string.
|
||||
* @see getVersion
|
||||
* @deprecated ICU 2.8 Use getVersion instead.
|
||||
*/
|
||||
const char* getVersionNumber() const;
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Return the version number associated with this ResourceBundle as a UVersionInfo array.
|
||||
*
|
||||
* @param versionInfo A UVersionInfo array that is filled with the version number
|
||||
* as specified in the resource bundle or its parent.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
void
|
||||
getVersion(UVersionInfo versionInfo) const;
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Return the Locale associated with this ResourceBundle.
|
||||
*
|
||||
* @return a Locale object
|
||||
* @deprecated ICU 2.8 Use getLocale(ULocDataLocaleType type, UErrorCode &status) overload instead.
|
||||
*/
|
||||
const Locale& getLocale() const;
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Return the Locale associated with this ResourceBundle.
|
||||
* @param type You can choose between requested, valid and actual
|
||||
* locale. For description see the definition of
|
||||
* ULocDataLocaleType in uloc.h
|
||||
* @param status just for catching illegal arguments
|
||||
*
|
||||
* @return a Locale object
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
Locale
|
||||
getLocale(ULocDataLocaleType type, UErrorCode &status) const;
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* This API implements multilevel fallback
|
||||
* @internal
|
||||
*/
|
||||
ResourceBundle
|
||||
getWithFallback(const char* key, UErrorCode& status);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const override;
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for this class.
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
private:
|
||||
ResourceBundle() = delete; // default constructor not implemented
|
||||
|
||||
UResourceBundle *fResource;
|
||||
void constructForLocale(const UnicodeString& path, const Locale& locale, UErrorCode& error);
|
||||
Locale *fLocale;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
187
thirdparty/icu4c/common/unicode/schriter.h
vendored
Normal file
187
thirdparty/icu4c/common/unicode/schriter.h
vendored
Normal file
@@ -0,0 +1,187 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1998-2005, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* File schriter.h
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/05/99 stephen Cleaned up.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef SCHRITER_H
|
||||
#define SCHRITER_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/uchriter.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: String Character Iterator
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
/**
|
||||
* A concrete subclass of CharacterIterator that iterates over the
|
||||
* characters (code units or code points) in a UnicodeString.
|
||||
* It's possible not only to create an
|
||||
* iterator that iterates over an entire UnicodeString, but also to
|
||||
* create one that iterates over only a subrange of a UnicodeString
|
||||
* (iterators over different subranges of the same UnicodeString don't
|
||||
* compare equal).
|
||||
* @see CharacterIterator
|
||||
* @see ForwardCharacterIterator
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API StringCharacterIterator : public UCharCharacterIterator {
|
||||
public:
|
||||
/**
|
||||
* Create an iterator over the UnicodeString referred to by "textStr".
|
||||
* The UnicodeString object is copied.
|
||||
* The iteration range is the whole string, and the starting position is 0.
|
||||
* @param textStr The unicode string used to create an iterator
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
StringCharacterIterator(const UnicodeString& textStr);
|
||||
|
||||
/**
|
||||
* Create an iterator over the UnicodeString referred to by "textStr".
|
||||
* The iteration range is the whole string, and the starting
|
||||
* position is specified by "textPos". If "textPos" is outside the valid
|
||||
* iteration range, the behavior of this object is undefined.
|
||||
* @param textStr The unicode string used to create an iterator
|
||||
* @param textPos The starting position of the iteration
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
StringCharacterIterator(const UnicodeString& textStr,
|
||||
int32_t textPos);
|
||||
|
||||
/**
|
||||
* Create an iterator over the UnicodeString referred to by "textStr".
|
||||
* The UnicodeString object is copied.
|
||||
* The iteration range begins with the code unit specified by
|
||||
* "textBegin" and ends with the code unit BEFORE the code unit specified
|
||||
* by "textEnd". The starting position is specified by "textPos". If
|
||||
* "textBegin" and "textEnd" don't form a valid range on "text" (i.e.,
|
||||
* textBegin >= textEnd or either is negative or greater than text.size()),
|
||||
* or "textPos" is outside the range defined by "textBegin" and "textEnd",
|
||||
* the behavior of this iterator is undefined.
|
||||
* @param textStr The unicode string used to create the StringCharacterIterator
|
||||
* @param textBegin The begin position of the iteration range
|
||||
* @param textEnd The end position of the iteration range
|
||||
* @param textPos The starting position of the iteration
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
StringCharacterIterator(const UnicodeString& textStr,
|
||||
int32_t textBegin,
|
||||
int32_t textEnd,
|
||||
int32_t textPos);
|
||||
|
||||
/**
|
||||
* Copy constructor. The new iterator iterates over the same range
|
||||
* of the same string as "that", and its initial position is the
|
||||
* same as "that"'s current position.
|
||||
* The UnicodeString object in "that" is copied.
|
||||
* @param that The StringCharacterIterator to be copied
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
StringCharacterIterator(const StringCharacterIterator& that);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~StringCharacterIterator();
|
||||
|
||||
/**
|
||||
* Assignment operator. *this is altered to iterate over the same
|
||||
* range of the same string as "that", and refers to the same
|
||||
* character within that string as "that" does.
|
||||
* @param that The object to be copied.
|
||||
* @return the newly created object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
StringCharacterIterator&
|
||||
operator=(const StringCharacterIterator& that);
|
||||
|
||||
/**
|
||||
* Returns true if the iterators iterate over the same range of the
|
||||
* same string and are pointing at the same character.
|
||||
* @param that The ForwardCharacterIterator to be compared for equality
|
||||
* @return true if the iterators iterate over the same range of the
|
||||
* same string and are pointing at the same character.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual bool operator==(const ForwardCharacterIterator& that) const override;
|
||||
|
||||
/**
|
||||
* Returns a new StringCharacterIterator referring to the same
|
||||
* character in the same range of the same string as this one. The
|
||||
* caller must delete the new iterator.
|
||||
* @return the newly cloned object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual StringCharacterIterator* clone() const override;
|
||||
|
||||
/**
|
||||
* Sets the iterator to iterate over the provided string.
|
||||
* @param newText The string to be iterated over
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
void setText(const UnicodeString& newText);
|
||||
|
||||
/**
|
||||
* Copies the UnicodeString under iteration into the UnicodeString
|
||||
* referred to by "result". Even if this iterator iterates across
|
||||
* only a part of this string, the whole string is copied.
|
||||
* @param result Receives a copy of the text under iteration.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void getText(UnicodeString& result) override;
|
||||
|
||||
/**
|
||||
* Return a class ID for this object (not really public)
|
||||
* @return a class ID for this object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const override;
|
||||
|
||||
/**
|
||||
* Return a class ID for this class (not really public)
|
||||
* @return a class ID for this class
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Default constructor, iteration over empty string.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
StringCharacterIterator();
|
||||
|
||||
/**
|
||||
* Copy of the iterated string object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UnicodeString text;
|
||||
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
339
thirdparty/icu4c/common/unicode/simpleformatter.h
vendored
Normal file
339
thirdparty/icu4c/common/unicode/simpleformatter.h
vendored
Normal file
@@ -0,0 +1,339 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 2014-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
******************************************************************************
|
||||
* simpleformatter.h
|
||||
*/
|
||||
|
||||
#ifndef __SIMPLEFORMATTER_H__
|
||||
#define __SIMPLEFORMATTER_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Simple formatter, minimal subset of MessageFormat.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// Forward declaration:
|
||||
namespace number::impl {
|
||||
class SimpleModifier;
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats simple patterns like "{1} was born in {0}".
|
||||
* Minimal subset of MessageFormat; fast, simple, minimal dependencies.
|
||||
* Supports only numbered arguments with no type nor style parameters,
|
||||
* and formats only string values.
|
||||
* Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior.
|
||||
*
|
||||
* Factory methods set error codes for syntax errors
|
||||
* and for too few or too many arguments/placeholders.
|
||||
*
|
||||
* SimpleFormatter objects are thread-safe except for assignment and applying new patterns.
|
||||
*
|
||||
* Example:
|
||||
* <pre>
|
||||
* UErrorCode errorCode = U_ZERO_ERROR;
|
||||
* SimpleFormatter fmt("{1} '{born}' in {0}", errorCode);
|
||||
* UnicodeString result;
|
||||
*
|
||||
* // Output: "paul {born} in england"
|
||||
* fmt.format("england", "paul", result, errorCode);
|
||||
* </pre>
|
||||
*
|
||||
* This class is not intended for public subclassing.
|
||||
*
|
||||
* @see MessageFormat
|
||||
* @see UMessagePatternApostropheMode
|
||||
* @stable ICU 57
|
||||
*/
|
||||
class U_COMMON_API SimpleFormatter final : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Default constructor.
|
||||
* @stable ICU 57
|
||||
*/
|
||||
SimpleFormatter() : compiledPattern(static_cast<char16_t>(0)) {}
|
||||
|
||||
/**
|
||||
* Constructs a formatter from the pattern string.
|
||||
*
|
||||
* @param pattern The pattern string.
|
||||
* @param errorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
|
||||
* @stable ICU 57
|
||||
*/
|
||||
SimpleFormatter(const UnicodeString& pattern, UErrorCode &errorCode) {
|
||||
applyPattern(pattern, errorCode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a formatter from the pattern string.
|
||||
* The number of arguments checked against the given limits is the
|
||||
* highest argument number plus one, not the number of occurrences of arguments.
|
||||
*
|
||||
* @param pattern The pattern string.
|
||||
* @param min The pattern must have at least this many arguments.
|
||||
* @param max The pattern must have at most this many arguments.
|
||||
* @param errorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
|
||||
* too few or too many arguments.
|
||||
* @stable ICU 57
|
||||
*/
|
||||
SimpleFormatter(const UnicodeString& pattern, int32_t min, int32_t max,
|
||||
UErrorCode &errorCode) {
|
||||
applyPatternMinMaxArguments(pattern, min, max, errorCode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @stable ICU 57
|
||||
*/
|
||||
SimpleFormatter(const SimpleFormatter& other)
|
||||
: compiledPattern(other.compiledPattern) {}
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
* @stable ICU 57
|
||||
*/
|
||||
SimpleFormatter &operator=(const SimpleFormatter& other);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 57
|
||||
*/
|
||||
~SimpleFormatter();
|
||||
|
||||
/**
|
||||
* Changes this object according to the new pattern.
|
||||
*
|
||||
* @param pattern The pattern string.
|
||||
* @param errorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
|
||||
* @return true if U_SUCCESS(errorCode).
|
||||
* @stable ICU 57
|
||||
*/
|
||||
UBool applyPattern(const UnicodeString &pattern, UErrorCode &errorCode) {
|
||||
return applyPatternMinMaxArguments(pattern, 0, INT32_MAX, errorCode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Changes this object according to the new pattern.
|
||||
* The number of arguments checked against the given limits is the
|
||||
* highest argument number plus one, not the number of occurrences of arguments.
|
||||
*
|
||||
* @param pattern The pattern string.
|
||||
* @param min The pattern must have at least this many arguments.
|
||||
* @param max The pattern must have at most this many arguments.
|
||||
* @param errorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
|
||||
* too few or too many arguments.
|
||||
* @return true if U_SUCCESS(errorCode).
|
||||
* @stable ICU 57
|
||||
*/
|
||||
UBool applyPatternMinMaxArguments(const UnicodeString &pattern,
|
||||
int32_t min, int32_t max, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* @return The max argument number + 1.
|
||||
* @stable ICU 57
|
||||
*/
|
||||
int32_t getArgumentLimit() const {
|
||||
return getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats the given value, appending to the appendTo builder.
|
||||
* The argument value must not be the same object as appendTo.
|
||||
* getArgumentLimit() must be at most 1.
|
||||
*
|
||||
* @param value0 Value for argument {0}.
|
||||
* @param appendTo Gets the formatted pattern and value appended.
|
||||
* @param errorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return appendTo
|
||||
* @stable ICU 57
|
||||
*/
|
||||
UnicodeString &format(
|
||||
const UnicodeString &value0,
|
||||
UnicodeString &appendTo, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Formats the given values, appending to the appendTo builder.
|
||||
* An argument value must not be the same object as appendTo.
|
||||
* getArgumentLimit() must be at most 2.
|
||||
*
|
||||
* @param value0 Value for argument {0}.
|
||||
* @param value1 Value for argument {1}.
|
||||
* @param appendTo Gets the formatted pattern and values appended.
|
||||
* @param errorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return appendTo
|
||||
* @stable ICU 57
|
||||
*/
|
||||
UnicodeString &format(
|
||||
const UnicodeString &value0,
|
||||
const UnicodeString &value1,
|
||||
UnicodeString &appendTo, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Formats the given values, appending to the appendTo builder.
|
||||
* An argument value must not be the same object as appendTo.
|
||||
* getArgumentLimit() must be at most 3.
|
||||
*
|
||||
* @param value0 Value for argument {0}.
|
||||
* @param value1 Value for argument {1}.
|
||||
* @param value2 Value for argument {2}.
|
||||
* @param appendTo Gets the formatted pattern and values appended.
|
||||
* @param errorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return appendTo
|
||||
* @stable ICU 57
|
||||
*/
|
||||
UnicodeString &format(
|
||||
const UnicodeString &value0,
|
||||
const UnicodeString &value1,
|
||||
const UnicodeString &value2,
|
||||
UnicodeString &appendTo, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Formats the given values, appending to the appendTo string.
|
||||
*
|
||||
* @param values The argument values.
|
||||
* An argument value must not be the same object as appendTo.
|
||||
* Can be nullptr if valuesLength==getArgumentLimit()==0.
|
||||
* @param valuesLength The length of the values array.
|
||||
* Must be at least getArgumentLimit().
|
||||
* @param appendTo Gets the formatted pattern and values appended.
|
||||
* @param offsets offsets[i] receives the offset of where
|
||||
* values[i] replaced pattern argument {i}.
|
||||
* Can be shorter or longer than values. Can be nullptr if offsetsLength==0.
|
||||
* If there is no {i} in the pattern, then offsets[i] is set to -1.
|
||||
* @param offsetsLength The length of the offsets array.
|
||||
* @param errorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return appendTo
|
||||
* @stable ICU 57
|
||||
*/
|
||||
UnicodeString &formatAndAppend(
|
||||
const UnicodeString *const *values, int32_t valuesLength,
|
||||
UnicodeString &appendTo,
|
||||
int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Formats the given values, replacing the contents of the result string.
|
||||
* May optimize by actually appending to the result if it is the same object
|
||||
* as the value corresponding to the initial argument in the pattern.
|
||||
*
|
||||
* @param values The argument values.
|
||||
* An argument value may be the same object as result.
|
||||
* Can be nullptr if valuesLength==getArgumentLimit()==0.
|
||||
* @param valuesLength The length of the values array.
|
||||
* Must be at least getArgumentLimit().
|
||||
* @param result Gets its contents replaced by the formatted pattern and values.
|
||||
* @param offsets offsets[i] receives the offset of where
|
||||
* values[i] replaced pattern argument {i}.
|
||||
* Can be shorter or longer than values. Can be nullptr if offsetsLength==0.
|
||||
* If there is no {i} in the pattern, then offsets[i] is set to -1.
|
||||
* @param offsetsLength The length of the offsets array.
|
||||
* @param errorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return result
|
||||
* @stable ICU 57
|
||||
*/
|
||||
UnicodeString &formatAndReplace(
|
||||
const UnicodeString *const *values, int32_t valuesLength,
|
||||
UnicodeString &result,
|
||||
int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Returns the pattern text with none of the arguments.
|
||||
* Like formatting with all-empty string values.
|
||||
* @stable ICU 57
|
||||
*/
|
||||
UnicodeString getTextWithNoArguments() const {
|
||||
return getTextWithNoArguments(
|
||||
compiledPattern.getBuffer(),
|
||||
compiledPattern.length(),
|
||||
nullptr,
|
||||
0);
|
||||
}
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Returns the pattern text with none of the arguments.
|
||||
* Like formatting with all-empty string values.
|
||||
*
|
||||
* TODO(ICU-20406): Replace this with an Iterator interface.
|
||||
*
|
||||
* @param offsets offsets[i] receives the offset of where {i} was located
|
||||
* before it was replaced by an empty string.
|
||||
* For example, "a{0}b{1}" produces offset 1 for i=0 and 2 for i=1.
|
||||
* Can be nullptr if offsetsLength==0.
|
||||
* If there is no {i} in the pattern, then offsets[i] is set to -1.
|
||||
* @param offsetsLength The length of the offsets array.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
UnicodeString getTextWithNoArguments(int32_t *offsets, int32_t offsetsLength) const {
|
||||
return getTextWithNoArguments(
|
||||
compiledPattern.getBuffer(),
|
||||
compiledPattern.length(),
|
||||
offsets,
|
||||
offsetsLength);
|
||||
}
|
||||
#endif // U_HIDE_INTERNAL_API
|
||||
|
||||
private:
|
||||
/**
|
||||
* Binary representation of the compiled pattern.
|
||||
* Index 0: One more than the highest argument number.
|
||||
* Followed by zero or more arguments or literal-text segments.
|
||||
*
|
||||
* An argument is stored as its number, less than ARG_NUM_LIMIT.
|
||||
* A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
|
||||
* followed by that many chars.
|
||||
*/
|
||||
UnicodeString compiledPattern;
|
||||
|
||||
static inline int32_t getArgumentLimit(const char16_t *compiledPattern,
|
||||
int32_t compiledPatternLength) {
|
||||
return compiledPatternLength == 0 ? 0 : compiledPattern[0];
|
||||
}
|
||||
|
||||
static UnicodeString getTextWithNoArguments(
|
||||
const char16_t *compiledPattern,
|
||||
int32_t compiledPatternLength,
|
||||
int32_t *offsets,
|
||||
int32_t offsetsLength);
|
||||
|
||||
static UnicodeString &format(
|
||||
const char16_t *compiledPattern, int32_t compiledPatternLength,
|
||||
const UnicodeString *const *values,
|
||||
UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
|
||||
int32_t *offsets, int32_t offsetsLength,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
// Give access to internals to SimpleModifier for number formatting
|
||||
friend class number::impl::SimpleModifier;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __SIMPLEFORMATTER_H__
|
||||
41
thirdparty/icu4c/common/unicode/std_string.h
vendored
Normal file
41
thirdparty/icu4c/common/unicode/std_string.h
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: std_string.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2009feb19
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __STD_STRING_H__
|
||||
#define __STD_STRING_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Central ICU header for including the C++ standard <string>
|
||||
* header and for related definitions.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
// Workaround for a libstdc++ bug before libstdc++4.6 (2011).
|
||||
// https://bugs.llvm.org/show_bug.cgi?id=13364
|
||||
#if defined(__GLIBCXX__)
|
||||
namespace std { class type_info; }
|
||||
#endif
|
||||
#include <string>
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __STD_STRING_H__
|
||||
281
thirdparty/icu4c/common/unicode/strenum.h
vendored
Normal file
281
thirdparty/icu4c/common/unicode/strenum.h
vendored
Normal file
@@ -0,0 +1,281 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef STRENUM_H
|
||||
#define STRENUM_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: String Enumeration
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Base class for 'pure' C++ implementations of uenum api. Adds a
|
||||
* method that returns the next UnicodeString since in C++ this can
|
||||
* be a common storage format for strings.
|
||||
*
|
||||
* <p>The model is that the enumeration is over strings maintained by
|
||||
* a 'service.' At any point, the service might change, invalidating
|
||||
* the enumerator (though this is expected to be rare). The iterator
|
||||
* returns an error if this has occurred. Lack of the error is no
|
||||
* guarantee that the service didn't change immediately after the
|
||||
* call, so the returned string still might not be 'valid' on
|
||||
* subsequent use.</p>
|
||||
*
|
||||
* <p>Strings may take the form of const char*, const char16_t*, or const
|
||||
* UnicodeString*. The type you get is determine by the variant of
|
||||
* 'next' that you call. In general the StringEnumeration is
|
||||
* optimized for one of these types, but all StringEnumerations can
|
||||
* return all types. Returned strings are each terminated with a NUL.
|
||||
* Depending on the service data, they might also include embedded NUL
|
||||
* characters, so API is provided to optionally return the true
|
||||
* length, counting the embedded NULs but not counting the terminating
|
||||
* NUL.</p>
|
||||
*
|
||||
* <p>The pointers returned by next, unext, and snext become invalid
|
||||
* upon any subsequent call to the enumeration's destructor, next,
|
||||
* unext, snext, or reset.</p>
|
||||
*
|
||||
* ICU 2.8 adds some default implementations and helper functions
|
||||
* for subclasses.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
class U_COMMON_API StringEnumeration : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual ~StringEnumeration();
|
||||
|
||||
/**
|
||||
* Clone this object, an instance of a subclass of StringEnumeration.
|
||||
* Clones can be used concurrently in multiple threads.
|
||||
* If a subclass does not implement clone(), or if an error occurs,
|
||||
* then nullptr is returned.
|
||||
* The caller must delete the clone.
|
||||
*
|
||||
* @return a clone of this object
|
||||
*
|
||||
* @see getDynamicClassID
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
virtual StringEnumeration *clone() const;
|
||||
|
||||
/**
|
||||
* <p>Return the number of elements that the iterator traverses. If
|
||||
* the iterator is out of sync with its service, status is set to
|
||||
* U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
|
||||
*
|
||||
* <p>The return value will not change except possibly as a result of
|
||||
* a subsequent call to reset, or if the iterator becomes out of sync.</p>
|
||||
*
|
||||
* <p>This is a convenience function. It can end up being very
|
||||
* expensive as all the items might have to be pre-fetched
|
||||
* (depending on the storage format of the data being
|
||||
* traversed).</p>
|
||||
*
|
||||
* @param status the error code.
|
||||
* @return number of elements in the iterator.
|
||||
*
|
||||
* @stable ICU 2.4 */
|
||||
virtual int32_t count(UErrorCode& status) const = 0;
|
||||
|
||||
/**
|
||||
* <p>Returns the next element as a NUL-terminated char*. If there
|
||||
* are no more elements, returns nullptr. If the resultLength pointer
|
||||
* is not nullptr, the length of the string (not counting the
|
||||
* terminating NUL) is returned at that address. If an error
|
||||
* status is returned, the value at resultLength is undefined.</p>
|
||||
*
|
||||
* <p>The returned pointer is owned by this iterator and must not be
|
||||
* deleted by the caller. The pointer is valid until the next call
|
||||
* to next, unext, snext, reset, or the enumerator's destructor.</p>
|
||||
*
|
||||
* <p>If the iterator is out of sync with its service, status is set
|
||||
* to U_ENUM_OUT_OF_SYNC_ERROR and nullptr is returned.</p>
|
||||
*
|
||||
* <p>If the native service string is a char16_t* string, it is
|
||||
* converted to char* with the invariant converter. If the
|
||||
* conversion fails (because a character cannot be converted) then
|
||||
* status is set to U_INVARIANT_CONVERSION_ERROR and the return
|
||||
* value is undefined (though not nullptr).</p>
|
||||
*
|
||||
* Starting with ICU 2.8, the default implementation calls snext()
|
||||
* and handles the conversion.
|
||||
* Either next() or snext() must be implemented differently by a subclass.
|
||||
*
|
||||
* @param status the error code.
|
||||
* @param resultLength a pointer to receive the length, can be nullptr.
|
||||
* @return a pointer to the string, or nullptr.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual const char* next(int32_t *resultLength, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* <p>Returns the next element as a NUL-terminated char16_t*. If there
|
||||
* are no more elements, returns nullptr. If the resultLength pointer
|
||||
* is not nullptr, the length of the string (not counting the
|
||||
* terminating NUL) is returned at that address. If an error
|
||||
* status is returned, the value at resultLength is undefined.</p>
|
||||
*
|
||||
* <p>The returned pointer is owned by this iterator and must not be
|
||||
* deleted by the caller. The pointer is valid until the next call
|
||||
* to next, unext, snext, reset, or the enumerator's destructor.</p>
|
||||
*
|
||||
* <p>If the iterator is out of sync with its service, status is set
|
||||
* to U_ENUM_OUT_OF_SYNC_ERROR and nullptr is returned.</p>
|
||||
*
|
||||
* Starting with ICU 2.8, the default implementation calls snext()
|
||||
* and handles the conversion.
|
||||
*
|
||||
* @param status the error code.
|
||||
* @param resultLength a pointer to receive the length, can be nullptr.
|
||||
* @return a pointer to the string, or nullptr.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual const char16_t* unext(int32_t *resultLength, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* <p>Returns the next element a UnicodeString*. If there are no
|
||||
* more elements, returns nullptr.</p>
|
||||
*
|
||||
* <p>The returned pointer is owned by this iterator and must not be
|
||||
* deleted by the caller. The pointer is valid until the next call
|
||||
* to next, unext, snext, reset, or the enumerator's destructor.</p>
|
||||
*
|
||||
* <p>If the iterator is out of sync with its service, status is set
|
||||
* to U_ENUM_OUT_OF_SYNC_ERROR and nullptr is returned.</p>
|
||||
*
|
||||
* Starting with ICU 2.8, the default implementation calls next()
|
||||
* and handles the conversion.
|
||||
* Either next() or snext() must be implemented differently by a subclass.
|
||||
*
|
||||
* @param status the error code.
|
||||
* @return a pointer to the string, or nullptr.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual const UnicodeString* snext(UErrorCode& status);
|
||||
|
||||
/**
|
||||
* <p>Resets the iterator. This re-establishes sync with the
|
||||
* service and rewinds the iterator to start at the first
|
||||
* element.</p>
|
||||
*
|
||||
* <p>Previous pointers returned by next, unext, or snext become
|
||||
* invalid, and the value returned by count might change.</p>
|
||||
*
|
||||
* @param status the error code.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual void reset(UErrorCode& status) = 0;
|
||||
|
||||
/**
|
||||
* Compares this enumeration to other to check if both are equal
|
||||
*
|
||||
* @param that The other string enumeration to compare this object to
|
||||
* @return true if the enumerations are equal. false if not.
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
virtual bool operator==(const StringEnumeration& that)const;
|
||||
/**
|
||||
* Compares this enumeration to other to check if both are not equal
|
||||
*
|
||||
* @param that The other string enumeration to compare this object to
|
||||
* @return true if the enumerations are equal. false if not.
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
virtual bool operator!=(const StringEnumeration& that)const;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* UnicodeString field for use with default implementations and subclasses.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
UnicodeString unistr;
|
||||
/**
|
||||
* char * default buffer for use with default implementations and subclasses.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
char charsBuffer[32];
|
||||
/**
|
||||
* char * buffer for use with default implementations and subclasses.
|
||||
* Allocated in constructor and in ensureCharsCapacity().
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
char *chars;
|
||||
/**
|
||||
* Capacity of chars, for use with default implementations and subclasses.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
int32_t charsCapacity;
|
||||
|
||||
/**
|
||||
* Default constructor for use with default implementations and subclasses.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
StringEnumeration();
|
||||
|
||||
/**
|
||||
* Ensures that chars is at least as large as the requested capacity.
|
||||
* For use with default implementations and subclasses.
|
||||
*
|
||||
* @param capacity Requested capacity.
|
||||
* @param status ICU in/out error code.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
void ensureCharsCapacity(int32_t capacity, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Converts s to Unicode and sets unistr to the result.
|
||||
* For use with default implementations and subclasses,
|
||||
* especially for implementations of snext() in terms of next().
|
||||
* This is provided with a helper function instead of a default implementation
|
||||
* of snext() to avoid potential infinite loops between next() and snext().
|
||||
*
|
||||
* For example:
|
||||
* \code
|
||||
* const UnicodeString* snext(UErrorCode& status) {
|
||||
* int32_t resultLength=0;
|
||||
* const char *s=next(&resultLength, status);
|
||||
* return setChars(s, resultLength, status);
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* @param s String to be converted to Unicode.
|
||||
* @param length Length of the string.
|
||||
* @param status ICU in/out error code.
|
||||
* @return A pointer to unistr.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status);
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
/* STRENUM_H */
|
||||
#endif
|
||||
190
thirdparty/icu4c/common/unicode/stringoptions.h
vendored
Normal file
190
thirdparty/icu4c/common/unicode/stringoptions.h
vendored
Normal file
@@ -0,0 +1,190 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// stringoptions.h
|
||||
// created: 2017jun08 Markus W. Scherer
|
||||
|
||||
#ifndef __STRINGOPTIONS_H__
|
||||
#define __STRINGOPTIONS_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Bit set option bit constants for various string and character processing functions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Option value for case folding: Use default mappings defined in CaseFolding.txt.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_FOLD_CASE_DEFAULT 0
|
||||
|
||||
/**
|
||||
* Option value for case folding:
|
||||
*
|
||||
* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
|
||||
* and dotless i appropriately for Turkic languages (tr, az).
|
||||
*
|
||||
* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
|
||||
* are to be included for default mappings and
|
||||
* excluded for the Turkic-specific mappings.
|
||||
*
|
||||
* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
|
||||
* are to be excluded for default mappings and
|
||||
* included for the Turkic-specific mappings.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
|
||||
|
||||
/**
|
||||
* Titlecase the string as a whole rather than each word.
|
||||
* (Titlecase only the character at index 0, possibly adjusted.)
|
||||
* Option bits value for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* It is an error to specify multiple titlecasing iterator options together,
|
||||
* including both an options bit and an explicit BreakIterator.
|
||||
*
|
||||
* @see U_TITLECASE_ADJUST_TO_CASED
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_TITLECASE_WHOLE_STRING 0x20
|
||||
|
||||
/**
|
||||
* Titlecase sentences rather than words.
|
||||
* (Titlecase only the first character of each sentence, possibly adjusted.)
|
||||
* Option bits value for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* It is an error to specify multiple titlecasing iterator options together,
|
||||
* including both an options bit and an explicit BreakIterator.
|
||||
*
|
||||
* @see U_TITLECASE_ADJUST_TO_CASED
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_TITLECASE_SENTENCES 0x40
|
||||
|
||||
/**
|
||||
* Do not lowercase non-initial parts of words when titlecasing.
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* By default, titlecasing will titlecase the character at each
|
||||
* (possibly adjusted) BreakIterator index and
|
||||
* lowercase all other characters up to the next iterator index.
|
||||
* With this option, the other characters will not be modified.
|
||||
*
|
||||
* @see U_TITLECASE_ADJUST_TO_CASED
|
||||
* @see UnicodeString::toTitle
|
||||
* @see CaseMap::toTitle
|
||||
* @see ucasemap_setOptions
|
||||
* @see ucasemap_toTitle
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
#define U_TITLECASE_NO_LOWERCASE 0x100
|
||||
|
||||
/**
|
||||
* Do not adjust the titlecasing BreakIterator indexes;
|
||||
* titlecase exactly the characters at breaks from the iterator.
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* By default, titlecasing will take each break iterator index,
|
||||
* adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
|
||||
* and titlecase that one.
|
||||
*
|
||||
* Other characters are lowercased.
|
||||
*
|
||||
* It is an error to specify multiple titlecasing adjustment options together.
|
||||
*
|
||||
* @see U_TITLECASE_ADJUST_TO_CASED
|
||||
* @see U_TITLECASE_NO_LOWERCASE
|
||||
* @see UnicodeString::toTitle
|
||||
* @see CaseMap::toTitle
|
||||
* @see ucasemap_setOptions
|
||||
* @see ucasemap_toTitle
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
|
||||
|
||||
/**
|
||||
* Adjust each titlecasing BreakIterator index to the next cased character.
|
||||
* (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* This used to be the default index adjustment in ICU.
|
||||
* Since ICU 60, the default index adjustment is to the next character that is
|
||||
* a letter, number, symbol, or private use code point.
|
||||
* (Uncased modifier letters are skipped.)
|
||||
* The difference in behavior is small for word titlecasing,
|
||||
* but the new adjustment is much better for whole-string and sentence titlecasing:
|
||||
* It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
|
||||
*
|
||||
* It is an error to specify multiple titlecasing adjustment options together.
|
||||
*
|
||||
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_TITLECASE_ADJUST_TO_CASED 0x400
|
||||
|
||||
/**
|
||||
* Option for string transformation functions to not first reset the Edits object.
|
||||
* Used for example in some case-mapping and normalization functions.
|
||||
*
|
||||
* @see CaseMap
|
||||
* @see Edits
|
||||
* @see Normalizer2
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_EDITS_NO_RESET 0x2000
|
||||
|
||||
/**
|
||||
* Omit unchanged text when recording how source substrings
|
||||
* relate to changed and unchanged result substrings.
|
||||
* Used for example in some case-mapping and normalization functions.
|
||||
*
|
||||
* @see CaseMap
|
||||
* @see Edits
|
||||
* @see Normalizer2
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_OMIT_UNCHANGED_TEXT 0x4000
|
||||
|
||||
/**
|
||||
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
|
||||
* Compare strings in code point order instead of code unit order.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define U_COMPARE_CODE_POINT_ORDER 0x8000
|
||||
|
||||
/**
|
||||
* Option bit for unorm_compare:
|
||||
* Perform case-insensitive comparison.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define U_COMPARE_IGNORE_CASE 0x10000
|
||||
|
||||
/**
|
||||
* Option bit for unorm_compare:
|
||||
* Both input strings are assumed to fulfill FCD conditions.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define UNORM_INPUT_IS_FCD 0x20000
|
||||
|
||||
// Related definitions elsewhere.
|
||||
// Options that are not meaningful in the same functions
|
||||
// can share the same bits.
|
||||
//
|
||||
// Public:
|
||||
// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
|
||||
//
|
||||
// Internal: (may change or be removed)
|
||||
// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
|
||||
// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
|
||||
// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
|
||||
// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
|
||||
// ustr_imp.h #define _STRNCMP_STYLE 0x1000
|
||||
// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
|
||||
|
||||
#endif // __STRINGOPTIONS_H__
|
||||
354
thirdparty/icu4c/common/unicode/stringpiece.h
vendored
Normal file
354
thirdparty/icu4c/common/unicode/stringpiece.h
vendored
Normal file
@@ -0,0 +1,354 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
// Copyright (C) 2009-2013, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//
|
||||
// Copyright 2001 and onwards Google Inc.
|
||||
// Author: Sanjay Ghemawat
|
||||
|
||||
// This code is a contribution of Google code, and the style used here is
|
||||
// a compromise between the original Google code and the ICU coding guidelines.
|
||||
// For example, data types are ICU-ified (size_t,int->int32_t),
|
||||
// and API comments doxygen-ified, but function names and behavior are
|
||||
// as in the original, if possible.
|
||||
// Assertion-style error handling, not available in ICU, was changed to
|
||||
// parameter "pinning" similar to UnicodeString.
|
||||
//
|
||||
// In addition, this is only a partial port of the original Google code,
|
||||
// limited to what was needed so far. The (nearly) complete original code
|
||||
// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
|
||||
// (see ICU ticket 6765, r25517).
|
||||
|
||||
#ifndef __STRINGPIECE_H__
|
||||
#define __STRINGPIECE_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: StringPiece: Read-only byte string wrapper class.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include <cstddef>
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/std_string.h"
|
||||
|
||||
// Arghh! I wish C++ literals were "string".
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* A string-like object that points to a sized piece of memory.
|
||||
*
|
||||
* We provide non-explicit singleton constructors so users can pass
|
||||
* in a "const char*" or a "string" wherever a "StringPiece" is
|
||||
* expected.
|
||||
*
|
||||
* Functions or methods may use StringPiece parameters to accept either a
|
||||
* "const char*" or a "string" value that will be implicitly converted to a
|
||||
* StringPiece.
|
||||
*
|
||||
* Systematic usage of StringPiece is encouraged as it will reduce unnecessary
|
||||
* conversions from "const char*" to "string" and back again.
|
||||
*
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
class U_COMMON_API StringPiece : public UMemory {
|
||||
private:
|
||||
const char* ptr_;
|
||||
int32_t length_;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Default constructor, creates an empty StringPiece.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece() : ptr_(nullptr), length_(0) { }
|
||||
|
||||
/**
|
||||
* Constructs from a NUL-terminated const char * pointer.
|
||||
* @param str a NUL-terminated const char * pointer
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const char* str);
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a NUL-terminated const char8_t * pointer.
|
||||
* @param str a NUL-terminated const char8_t * pointer
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
|
||||
#endif
|
||||
/**
|
||||
* Constructs an empty StringPiece.
|
||||
* Needed for type disambiguation from multiple other overloads.
|
||||
* @param p nullptr
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
|
||||
|
||||
/**
|
||||
* Constructs from a std::string.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const std::string& str)
|
||||
: ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
|
||||
#if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a std::u8string.
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const std::u8string& str)
|
||||
: ptr_(reinterpret_cast<const char*>(str.data())),
|
||||
length_(static_cast<int32_t>(str.size())) { }
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Constructs from some other implementation of a string piece class, from any
|
||||
* C++ record type that has these two methods:
|
||||
*
|
||||
* \code{.cpp}
|
||||
*
|
||||
* struct OtherStringPieceClass {
|
||||
* const char* data(); // or const char8_t*
|
||||
* size_t size();
|
||||
* };
|
||||
*
|
||||
* \endcode
|
||||
*
|
||||
* The other string piece class will typically be std::string_view from C++17
|
||||
* or absl::string_view from Abseil.
|
||||
*
|
||||
* Starting with C++20, data() may also return a const char8_t* pointer,
|
||||
* as from std::u8string_view.
|
||||
*
|
||||
* @param str the other string piece
|
||||
* @stable ICU 65
|
||||
*/
|
||||
template <typename T,
|
||||
typename = std::enable_if_t<
|
||||
(std::is_same_v<decltype(T().data()), const char*>
|
||||
#if defined(__cpp_char8_t)
|
||||
|| std::is_same_v<decltype(T().data()), const char8_t*>
|
||||
#endif
|
||||
) &&
|
||||
std::is_same_v<decltype(T().size()), size_t>>>
|
||||
StringPiece(T str)
|
||||
: ptr_(reinterpret_cast<const char*>(str.data())),
|
||||
length_(static_cast<int32_t>(str.size())) {}
|
||||
|
||||
/**
|
||||
* Constructs from a const char * pointer and a specified length.
|
||||
* @param offset a const char * pointer (need not be terminated)
|
||||
* @param len the length of the string; must be non-negative
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a const char8_t * pointer and a specified length.
|
||||
* @param str a const char8_t * pointer (need not be terminated)
|
||||
* @param len the length of the string; must be non-negative
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const char8_t* str, int32_t len) :
|
||||
StringPiece(reinterpret_cast<const char*>(str), len) {}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Substring of another StringPiece.
|
||||
* @param x the other StringPiece
|
||||
* @param pos start position in x; must be non-negative and <= x.length().
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const StringPiece& x, int32_t pos);
|
||||
/**
|
||||
* Substring of another StringPiece.
|
||||
* @param x the other StringPiece
|
||||
* @param pos start position in x; must be non-negative and <= x.length().
|
||||
* @param len length of the substring;
|
||||
* must be non-negative and will be pinned to at most x.length() - pos.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const StringPiece& x, int32_t pos, int32_t len);
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Converts to a std::string_view().
|
||||
* @internal
|
||||
*/
|
||||
inline operator std::string_view() const {
|
||||
return {data(), static_cast<std::string_view::size_type>(size())};
|
||||
}
|
||||
#endif // U_HIDE_INTERNAL_API
|
||||
|
||||
/**
|
||||
* Returns the string pointer. May be nullptr if it is empty.
|
||||
*
|
||||
* data() may return a pointer to a buffer with embedded NULs, and the
|
||||
* returned buffer may or may not be null terminated. Therefore it is
|
||||
* typically a mistake to pass data() to a routine that expects a NUL
|
||||
* terminated string.
|
||||
* @return the string pointer
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
const char* data() const { return ptr_; }
|
||||
/**
|
||||
* Returns the string length. Same as length().
|
||||
* @return the string length
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
int32_t size() const { return length_; }
|
||||
/**
|
||||
* Returns the string length. Same as size().
|
||||
* @return the string length
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
int32_t length() const { return length_; }
|
||||
/**
|
||||
* Returns whether the string is empty.
|
||||
* @return true if the string is empty
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
UBool empty() const { return length_ == 0; }
|
||||
|
||||
/**
|
||||
* Sets to an empty string.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
void clear() { ptr_ = nullptr; length_ = 0; }
|
||||
|
||||
/**
|
||||
* Reset the stringpiece to refer to new data.
|
||||
* @param xdata pointer the new string data. Need not be nul terminated.
|
||||
* @param len the length of the new data
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; }
|
||||
|
||||
/**
|
||||
* Reset the stringpiece to refer to new data.
|
||||
* @param str a pointer to a NUL-terminated string.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
void set(const char* str);
|
||||
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Resets the stringpiece to refer to new data.
|
||||
* @param xdata pointer the new string data. Need not be NUL-terminated.
|
||||
* @param len the length of the new data
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void set(const char8_t* xdata, int32_t len) {
|
||||
set(reinterpret_cast<const char*>(xdata), len);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the stringpiece to refer to new data.
|
||||
* @param str a pointer to a NUL-terminated string.
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void set(const char8_t* str) {
|
||||
set(reinterpret_cast<const char*>(str));
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Removes the first n string units.
|
||||
* @param n prefix length, must be non-negative and <=length()
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
void remove_prefix(int32_t n) {
|
||||
if (n >= 0) {
|
||||
if (n > length_) {
|
||||
n = length_;
|
||||
}
|
||||
ptr_ += n;
|
||||
length_ -= n;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the last n string units.
|
||||
* @param n suffix length, must be non-negative and <=length()
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
void remove_suffix(int32_t n) {
|
||||
if (n >= 0) {
|
||||
if (n <= length_) {
|
||||
length_ -= n;
|
||||
} else {
|
||||
length_ = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches the StringPiece for the given search string (needle);
|
||||
* @param needle The string for which to search.
|
||||
* @param offset Where to start searching within this string (haystack).
|
||||
* @return The offset of needle in haystack, or -1 if not found.
|
||||
* @stable ICU 67
|
||||
*/
|
||||
int32_t find(StringPiece needle, int32_t offset);
|
||||
|
||||
/**
|
||||
* Compares this StringPiece with the other StringPiece, with semantics
|
||||
* similar to std::string::compare().
|
||||
* @param other The string to compare to.
|
||||
* @return below zero if this < other; above zero if this > other; 0 if this == other.
|
||||
* @stable ICU 67
|
||||
*/
|
||||
int32_t compare(StringPiece other);
|
||||
|
||||
/**
|
||||
* Maximum integer, used as a default value for substring methods.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
static const int32_t npos; // = 0x7fffffff;
|
||||
|
||||
/**
|
||||
* Returns a substring of this StringPiece.
|
||||
* @param pos start position; must be non-negative and <= length().
|
||||
* @param len length of the substring;
|
||||
* must be non-negative and will be pinned to at most length() - pos.
|
||||
* @return the substring StringPiece
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece substr(int32_t pos, int32_t len = npos) const {
|
||||
return StringPiece(*this, pos, len);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Global operator == for StringPiece
|
||||
* @param x The first StringPiece to compare.
|
||||
* @param y The second StringPiece to compare.
|
||||
* @return true if the string data is equal
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
U_EXPORT UBool U_EXPORT2
|
||||
operator==(const StringPiece& x, const StringPiece& y);
|
||||
|
||||
/**
|
||||
* Global operator != for StringPiece
|
||||
* @param x The first StringPiece to compare.
|
||||
* @param y The second StringPiece to compare.
|
||||
* @return true if the string data is not equal
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
inline bool operator!=(const StringPiece& x, const StringPiece& y) {
|
||||
return !(x == y);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __STRINGPIECE_H__
|
||||
426
thirdparty/icu4c/common/unicode/stringtriebuilder.h
vendored
Normal file
426
thirdparty/icu4c/common/unicode/stringtriebuilder.h
vendored
Normal file
@@ -0,0 +1,426 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2012,2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: stringtriebuilder.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010dec24
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __STRINGTRIEBUILDER_H__
|
||||
#define __STRINGTRIEBUILDER_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Builder API for trie builders
|
||||
*/
|
||||
|
||||
// Forward declaration.
|
||||
/// \cond
|
||||
struct UHashtable;
|
||||
typedef struct UHashtable UHashtable;
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* Build options for BytesTrieBuilder and CharsTrieBuilder.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
enum UStringTrieBuildOption {
|
||||
/**
|
||||
* Builds a trie quickly.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
USTRINGTRIE_BUILD_FAST,
|
||||
/**
|
||||
* Builds a trie more slowly, attempting to generate
|
||||
* a shorter but equivalent serialization.
|
||||
* This build option also uses more memory.
|
||||
*
|
||||
* This option can be effective when many integer values are the same
|
||||
* and string/byte sequence suffixes can be shared.
|
||||
* Runtime speed is not expected to improve.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
USTRINGTRIE_BUILD_SMALL
|
||||
};
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Base class for string trie builder classes.
|
||||
*
|
||||
* This class is not intended for public subclassing.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
class U_COMMON_API StringTrieBuilder : public UObject {
|
||||
public:
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** @internal */
|
||||
static int32_t hashNode(const void *node);
|
||||
/** @internal */
|
||||
static UBool equalNodes(const void *left, const void *right);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
protected:
|
||||
// Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API
|
||||
// or else the compiler will create a public default constructor.
|
||||
/** @internal */
|
||||
StringTrieBuilder();
|
||||
/** @internal */
|
||||
virtual ~StringTrieBuilder();
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** @internal */
|
||||
void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
|
||||
/** @internal */
|
||||
void deleteCompactBuilder();
|
||||
|
||||
/** @internal */
|
||||
void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
|
||||
|
||||
/** @internal */
|
||||
int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
|
||||
/** @internal */
|
||||
int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
class Node;
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** @internal */
|
||||
Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
|
||||
/** @internal */
|
||||
Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
|
||||
int32_t length, UErrorCode &errorCode);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/** @internal */
|
||||
virtual int32_t getElementStringLength(int32_t i) const = 0;
|
||||
/** @internal */
|
||||
virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0;
|
||||
/** @internal */
|
||||
virtual int32_t getElementValue(int32_t i) const = 0;
|
||||
|
||||
// Finds the first unit index after this one where
|
||||
// the first and last element have different units again.
|
||||
/** @internal */
|
||||
virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
|
||||
|
||||
// Number of different units at unitIndex.
|
||||
/** @internal */
|
||||
virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
|
||||
/** @internal */
|
||||
virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
|
||||
/** @internal */
|
||||
virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0;
|
||||
|
||||
/** @internal */
|
||||
virtual UBool matchNodesCanHaveValues() const = 0;
|
||||
|
||||
/** @internal */
|
||||
virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
|
||||
/** @internal */
|
||||
virtual int32_t getMinLinearMatch() const = 0;
|
||||
/** @internal */
|
||||
virtual int32_t getMaxLinearMatchLength() const = 0;
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
// max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).
|
||||
/** @internal */
|
||||
static const int32_t kMaxBranchLinearSubNodeLength=5;
|
||||
|
||||
// Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units.
|
||||
// log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.
|
||||
/** @internal */
|
||||
static const int32_t kMaxSplitBranchLevels=14;
|
||||
|
||||
/**
|
||||
* Makes sure that there is only one unique node registered that is
|
||||
* equivalent to newNode.
|
||||
* @param newNode Input node. The builder takes ownership.
|
||||
* @param errorCode ICU in/out UErrorCode.
|
||||
Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==nullptr.
|
||||
* @return newNode if it is the first of its kind, or
|
||||
* an equivalent node if newNode is a duplicate.
|
||||
* @internal
|
||||
*/
|
||||
Node *registerNode(Node *newNode, UErrorCode &errorCode);
|
||||
/**
|
||||
* Makes sure that there is only one unique FinalValueNode registered
|
||||
* with this value.
|
||||
* Avoids creating a node if the value is a duplicate.
|
||||
* @param value A final value.
|
||||
* @param errorCode ICU in/out UErrorCode.
|
||||
Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==nullptr.
|
||||
* @return A FinalValueNode with the given value.
|
||||
* @internal
|
||||
*/
|
||||
Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/*
|
||||
* C++ note:
|
||||
* registerNode() and registerFinalValue() take ownership of their input nodes,
|
||||
* and only return owned nodes.
|
||||
* If they see a failure UErrorCode, they will delete the input node.
|
||||
* If they get a nullptr pointer, they will record a U_MEMORY_ALLOCATION_ERROR.
|
||||
* If there is a failure, they return nullptr.
|
||||
*
|
||||
* nullptr Node pointers can be safely passed into other Nodes because
|
||||
* they call the static Node::hashCode() which checks for a nullptr pointer first.
|
||||
*
|
||||
* Therefore, as long as builder functions register a new node,
|
||||
* they need to check for failures only before explicitly dereferencing
|
||||
* a Node pointer, or before setting a new UErrorCode.
|
||||
*/
|
||||
|
||||
// Hash set of nodes, maps from nodes to integer 1.
|
||||
/** @internal */
|
||||
UHashtable *nodes;
|
||||
|
||||
// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
|
||||
// it is needed for layout of other objects.
|
||||
/**
|
||||
* @internal
|
||||
* \cond
|
||||
*/
|
||||
class Node : public UObject {
|
||||
public:
|
||||
Node(int32_t initialHash) : hash(initialHash), offset(0) {}
|
||||
inline int32_t hashCode() const { return hash; }
|
||||
// Handles node==nullptr.
|
||||
static inline int32_t hashCode(const Node *node) { return node==nullptr ? 0 : node->hashCode(); }
|
||||
// Base class operator==() compares the actual class types.
|
||||
virtual bool operator==(const Node &other) const;
|
||||
inline bool operator!=(const Node &other) const { return !operator==(other); }
|
||||
/**
|
||||
* Traverses the Node graph and numbers branch edges, with rightmost edges first.
|
||||
* This is to avoid writing a duplicate node twice.
|
||||
*
|
||||
* Branch nodes in this trie data structure are not symmetric.
|
||||
* Most branch edges "jump" to other nodes but the rightmost branch edges
|
||||
* just continue without a jump.
|
||||
* Therefore, write() must write the rightmost branch edge last
|
||||
* (trie units are written backwards), and must write it at that point even if
|
||||
* it is a duplicate of a node previously written elsewhere.
|
||||
*
|
||||
* This function visits and marks right branch edges first.
|
||||
* Edges are numbered with increasingly negative values because we share the
|
||||
* offset field which gets positive values when nodes are written.
|
||||
* A branch edge also remembers the first number for any of its edges.
|
||||
*
|
||||
* When a further-left branch edge has a number in the range of the rightmost
|
||||
* edge's numbers, then it will be written as part of the required right edge
|
||||
* and we can avoid writing it first.
|
||||
*
|
||||
* After root.markRightEdgesFirst(-1) the offsets of all nodes are negative
|
||||
* edge numbers.
|
||||
*
|
||||
* @param edgeNumber The first edge number for this node and its sub-nodes.
|
||||
* @return An edge number that is at least the maximum-negative
|
||||
* of the input edge number and the numbers of this node and all of its sub-nodes.
|
||||
*/
|
||||
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
|
||||
// write() must set the offset to a positive value.
|
||||
virtual void write(StringTrieBuilder &builder) = 0;
|
||||
// See markRightEdgesFirst.
|
||||
inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
|
||||
StringTrieBuilder &builder) {
|
||||
// Note: Edge numbers are negative, lastRight<=firstRight.
|
||||
// If offset>0 then this node and its sub-nodes have been written already
|
||||
// and we need not write them again.
|
||||
// If this node is part of the unwritten right branch edge,
|
||||
// then we wait until that is written.
|
||||
if(offset<0 && (offset<lastRight || firstRight<offset)) {
|
||||
write(builder);
|
||||
}
|
||||
}
|
||||
inline int32_t getOffset() const { return offset; }
|
||||
protected:
|
||||
int32_t hash;
|
||||
int32_t offset;
|
||||
};
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
// This class should not be overridden because
|
||||
// registerFinalValue() compares a stack-allocated FinalValueNode
|
||||
// (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)
|
||||
// with the input node, and the
|
||||
// !Node::operator==(other) used inside FinalValueNode::operator==(other)
|
||||
// will be false if the typeid's are different.
|
||||
/** @internal */
|
||||
class FinalValueNode : public Node {
|
||||
public:
|
||||
FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {}
|
||||
virtual bool operator==(const Node &other) const override;
|
||||
virtual void write(StringTrieBuilder &builder) override;
|
||||
protected:
|
||||
int32_t value;
|
||||
};
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
|
||||
// it is needed for layout of other objects.
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
class ValueNode : public Node {
|
||||
public:
|
||||
ValueNode(int32_t initialHash) : Node(initialHash), hasValue(false), value(0) {}
|
||||
virtual bool operator==(const Node &other) const override;
|
||||
void setValue(int32_t v) {
|
||||
hasValue=true;
|
||||
value=v;
|
||||
hash=hash*37u+v;
|
||||
}
|
||||
protected:
|
||||
UBool hasValue;
|
||||
int32_t value;
|
||||
};
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
class IntermediateValueNode : public ValueNode {
|
||||
public:
|
||||
IntermediateValueNode(int32_t v, Node *nextNode)
|
||||
: ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); }
|
||||
virtual bool operator==(const Node &other) const override;
|
||||
virtual int32_t markRightEdgesFirst(int32_t edgeNumber) override;
|
||||
virtual void write(StringTrieBuilder &builder) override;
|
||||
protected:
|
||||
Node *next;
|
||||
};
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
|
||||
// it is needed for layout of other objects.
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
class LinearMatchNode : public ValueNode {
|
||||
public:
|
||||
LinearMatchNode(int32_t len, Node *nextNode)
|
||||
: ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),
|
||||
length(len), next(nextNode) {}
|
||||
virtual bool operator==(const Node &other) const override;
|
||||
virtual int32_t markRightEdgesFirst(int32_t edgeNumber) override;
|
||||
protected:
|
||||
int32_t length;
|
||||
Node *next;
|
||||
};
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
class BranchNode : public Node {
|
||||
public:
|
||||
BranchNode(int32_t initialHash) : Node(initialHash) {}
|
||||
protected:
|
||||
int32_t firstEdgeNumber;
|
||||
};
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
class ListBranchNode : public BranchNode {
|
||||
public:
|
||||
ListBranchNode() : BranchNode(0x444444), length(0) {}
|
||||
virtual bool operator==(const Node &other) const override;
|
||||
virtual int32_t markRightEdgesFirst(int32_t edgeNumber) override;
|
||||
virtual void write(StringTrieBuilder &builder) override;
|
||||
// Adds a unit with a final value.
|
||||
void add(int32_t c, int32_t value) {
|
||||
units[length] = static_cast<char16_t>(c);
|
||||
equal[length]=nullptr;
|
||||
values[length]=value;
|
||||
++length;
|
||||
hash=(hash*37u+c)*37u+value;
|
||||
}
|
||||
// Adds a unit which leads to another match node.
|
||||
void add(int32_t c, Node *node) {
|
||||
units[length] = static_cast<char16_t>(c);
|
||||
equal[length]=node;
|
||||
values[length]=0;
|
||||
++length;
|
||||
hash=(hash*37u+c)*37u+hashCode(node);
|
||||
}
|
||||
protected:
|
||||
Node *equal[kMaxBranchLinearSubNodeLength]; // nullptr means "has final value".
|
||||
int32_t length;
|
||||
int32_t values[kMaxBranchLinearSubNodeLength];
|
||||
char16_t units[kMaxBranchLinearSubNodeLength];
|
||||
};
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
class SplitBranchNode : public BranchNode {
|
||||
public:
|
||||
SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
|
||||
: BranchNode(((0x555555u*37u+middleUnit)*37u+
|
||||
hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)),
|
||||
unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
|
||||
virtual bool operator==(const Node &other) const override;
|
||||
virtual int32_t markRightEdgesFirst(int32_t edgeNumber) override;
|
||||
virtual void write(StringTrieBuilder &builder) override;
|
||||
protected:
|
||||
char16_t unit;
|
||||
Node *lessThan;
|
||||
Node *greaterOrEqual;
|
||||
};
|
||||
|
||||
// Branch head node, for writing the actual node lead unit.
|
||||
/** @internal */
|
||||
class BranchHeadNode : public ValueNode {
|
||||
public:
|
||||
BranchHeadNode(int32_t len, Node *subNode)
|
||||
: ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)),
|
||||
length(len), next(subNode) {}
|
||||
virtual bool operator==(const Node &other) const override;
|
||||
virtual int32_t markRightEdgesFirst(int32_t edgeNumber) override;
|
||||
virtual void write(StringTrieBuilder &builder) override;
|
||||
protected:
|
||||
int32_t length;
|
||||
Node *next; // A branch sub-node.
|
||||
};
|
||||
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
/// \endcond
|
||||
|
||||
/** @internal */
|
||||
virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
|
||||
Node *nextNode) const = 0;
|
||||
|
||||
/** @internal */
|
||||
virtual int32_t write(int32_t unit) = 0;
|
||||
/** @internal */
|
||||
virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
|
||||
/** @internal */
|
||||
virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
|
||||
/** @internal */
|
||||
virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
|
||||
/** @internal */
|
||||
virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __STRINGTRIEBUILDER_H__
|
||||
119
thirdparty/icu4c/common/unicode/symtable.h
vendored
Normal file
119
thirdparty/icu4c/common/unicode/symtable.h
vendored
Normal file
@@ -0,0 +1,119 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2000-2005, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 02/04/00 aliu Creation.
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef SYMTABLE_H
|
||||
#define SYMTABLE_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: An interface that defines both lookup protocol and parsing of
|
||||
* symbolic names.
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class ParsePosition;
|
||||
class UnicodeFunctor;
|
||||
class UnicodeSet;
|
||||
class UnicodeString;
|
||||
|
||||
/**
|
||||
* An interface that defines both lookup protocol and parsing of
|
||||
* symbolic names.
|
||||
*
|
||||
* <p>A symbol table maintains two kinds of mappings. The first is
|
||||
* between symbolic names and their values. For example, if the
|
||||
* variable with the name "start" is set to the value "alpha"
|
||||
* (perhaps, though not necessarily, through an expression such as
|
||||
* "$start=alpha"), then the call lookup("start") will return the
|
||||
* char[] array ['a', 'l', 'p', 'h', 'a'].
|
||||
*
|
||||
* <p>The second kind of mapping is between character values and
|
||||
* UnicodeMatcher objects. This is used by RuleBasedTransliterator,
|
||||
* which uses characters in the private use area to represent objects
|
||||
* such as UnicodeSets. If U+E015 is mapped to the UnicodeSet [a-z],
|
||||
* then lookupMatcher(0xE015) will return the UnicodeSet [a-z].
|
||||
*
|
||||
* <p>Finally, a symbol table defines parsing behavior for symbolic
|
||||
* names. All symbolic names start with the SYMBOL_REF character.
|
||||
* When a parser encounters this character, it calls parseReference()
|
||||
* with the position immediately following the SYMBOL_REF. The symbol
|
||||
* table parses the name, if there is one, and returns it.
|
||||
*
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ {
|
||||
public:
|
||||
|
||||
/**
|
||||
* The character preceding a symbol reference name.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
enum { SYMBOL_REF = 0x0024 /*$*/ };
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
virtual ~SymbolTable();
|
||||
|
||||
/**
|
||||
* Lookup the characters associated with this string and return it.
|
||||
* Return <tt>nullptr</tt> if no such name exists. The resultant
|
||||
* string may have length zero.
|
||||
* @param s the symbolic name to lookup
|
||||
* @return a string containing the name's value, or <tt>nullptr</tt> if
|
||||
* there is no mapping for s.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;
|
||||
|
||||
/**
|
||||
* Lookup the UnicodeMatcher associated with the given character, and
|
||||
* return it. Return <tt>nullptr</tt> if not found.
|
||||
* @param ch a 32-bit code point from 0 to 0x10FFFF inclusive.
|
||||
* @return the UnicodeMatcher object represented by the given
|
||||
* character, or nullptr if there is no mapping for ch.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0;
|
||||
|
||||
/**
|
||||
* Parse a symbol reference name from the given string, starting
|
||||
* at the given position. If no valid symbol reference name is
|
||||
* found, return the empty string and leave pos unchanged. That is, if the
|
||||
* character at pos cannot start a name, or if pos is at or after
|
||||
* text.length(), then return an empty string. This indicates an
|
||||
* isolated SYMBOL_REF character.
|
||||
* @param text the text to parse for the name
|
||||
* @param pos on entry, the index of the first character to parse.
|
||||
* This is the character following the SYMBOL_REF character. On
|
||||
* exit, the index after the last parsed character. If the parse
|
||||
* failed, pos is unchanged on exit.
|
||||
* @param limit the index after the last character to be parsed.
|
||||
* @return the parsed name, or an empty string if there is no
|
||||
* valid symbolic name at the given position.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
virtual UnicodeString parseReference(const UnicodeString& text,
|
||||
ParsePosition& pos, int32_t limit) const = 0;
|
||||
};
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
2211
thirdparty/icu4c/common/unicode/ubidi.h
vendored
Normal file
2211
thirdparty/icu4c/common/unicode/ubidi.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
326
thirdparty/icu4c/common/unicode/ubiditransform.h
vendored
Normal file
326
thirdparty/icu4c/common/unicode/ubiditransform.h
vendored
Normal file
@@ -0,0 +1,326 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* © 2016 and later: Unicode, Inc. and others.
|
||||
* License & terms of use: http://www.unicode.org/copyright.html
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: ubiditransform.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2016jul24
|
||||
* created by: Lina Kemmel
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef UBIDITRANSFORM_H
|
||||
#define UBIDITRANSFORM_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ubidi.h"
|
||||
#include "unicode/uchar.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Bidi Transformations
|
||||
*/
|
||||
|
||||
/**
|
||||
* `UBiDiOrder` indicates the order of text.
|
||||
*
|
||||
* This bidi transformation engine supports all possible combinations (4 in
|
||||
* total) of input and output text order:
|
||||
*
|
||||
* - <logical input, visual output>: unless the output direction is RTL, this
|
||||
* corresponds to a normal operation of the Bidi algorithm as described in the
|
||||
* Unicode Technical Report and implemented by `UBiDi` when the
|
||||
* reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL
|
||||
* mode is not supported by `UBiDi` and is accomplished through
|
||||
* reversing a visual LTR string,
|
||||
*
|
||||
* - <visual input, logical output>: unless the input direction is RTL, this
|
||||
* corresponds to an "inverse bidi algorithm" in `UBiDi` with the
|
||||
* reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`.
|
||||
* Visual RTL mode is not not supported by `UBiDi` and is
|
||||
* accomplished through reversing a visual LTR string,
|
||||
*
|
||||
* - <logical input, logical output>: if the input and output base directions
|
||||
* mismatch, this corresponds to the `UBiDi` implementation with the
|
||||
* reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the
|
||||
* input and output base directions are identical, the transformation engine
|
||||
* will only handle character mirroring and Arabic shaping operations without
|
||||
* reordering,
|
||||
*
|
||||
* - <visual input, visual output>: this reordering mode is not supported by
|
||||
* the `UBiDi` engine; it implies character mirroring, Arabic
|
||||
* shaping, and - if the input/output base directions mismatch - string
|
||||
* reverse operations.
|
||||
* @see ubidi_setInverse
|
||||
* @see ubidi_setReorderingMode
|
||||
* @see UBIDI_REORDER_DEFAULT
|
||||
* @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
|
||||
* @see UBIDI_REORDER_RUNS_ONLY
|
||||
* @stable ICU 58
|
||||
*/
|
||||
typedef enum {
|
||||
/** 0: Constant indicating a logical order.
|
||||
* This is the default for input text.
|
||||
* @stable ICU 58
|
||||
*/
|
||||
UBIDI_LOGICAL = 0,
|
||||
/** 1: Constant indicating a visual order.
|
||||
* This is a default for output text.
|
||||
* @stable ICU 58
|
||||
*/
|
||||
UBIDI_VISUAL
|
||||
} UBiDiOrder;
|
||||
|
||||
/**
|
||||
* <code>UBiDiMirroring</code> indicates whether or not characters with the
|
||||
* "mirrored" property in RTL runs should be replaced with their mirror-image
|
||||
* counterparts.
|
||||
* @see UBIDI_DO_MIRRORING
|
||||
* @see ubidi_setReorderingOptions
|
||||
* @see ubidi_writeReordered
|
||||
* @see ubidi_writeReverse
|
||||
* @stable ICU 58
|
||||
*/
|
||||
typedef enum {
|
||||
/** 0: Constant indicating that character mirroring should not be
|
||||
* performed.
|
||||
* This is the default.
|
||||
* @stable ICU 58
|
||||
*/
|
||||
UBIDI_MIRRORING_OFF = 0,
|
||||
/** 1: Constant indicating that character mirroring should be performed.
|
||||
* This corresponds to calling <code>ubidi_writeReordered</code> or
|
||||
* <code>ubidi_writeReverse</code> with the
|
||||
* <code>UBIDI_DO_MIRRORING</code> option bit set.
|
||||
* @stable ICU 58
|
||||
*/
|
||||
UBIDI_MIRRORING_ON
|
||||
} UBiDiMirroring;
|
||||
|
||||
/**
|
||||
* Forward declaration of the <code>UBiDiTransform</code> structure that stores
|
||||
* information used by the layout transformation engine.
|
||||
* @stable ICU 58
|
||||
*/
|
||||
typedef struct UBiDiTransform UBiDiTransform;
|
||||
|
||||
/**
|
||||
* Performs transformation of text from the bidi layout defined by the input
|
||||
* ordering scheme to the bidi layout defined by the output ordering scheme,
|
||||
* and applies character mirroring and Arabic shaping operations.<p>
|
||||
* In terms of <code>UBiDi</code>, such a transformation implies:
|
||||
* <ul>
|
||||
* <li>calling <code>ubidi_setReorderingMode</code> as needed (when the
|
||||
* reordering mode is other than normal),</li>
|
||||
* <li>calling <code>ubidi_setInverse</code> as needed (when text should be
|
||||
* transformed from a visual to a logical form),</li>
|
||||
* <li>resolving embedding levels of each character in the input text by
|
||||
* calling <code>ubidi_setPara</code>,</li>
|
||||
* <li>reordering the characters based on the computed embedding levels, also
|
||||
* performing character mirroring as needed, and streaming the result to the
|
||||
* output, by calling <code>ubidi_writeReordered</code>,</li>
|
||||
* <li>performing Arabic digit and letter shaping on the output text by calling
|
||||
* <code>u_shapeArabic</code>.</li>
|
||||
* </ul>
|
||||
* An "ordering scheme" encompasses the base direction and the order of text,
|
||||
* and these characteristics must be defined by the caller for both input and
|
||||
* output explicitly .<p>
|
||||
* There are 36 possible combinations of <input, output> ordering schemes,
|
||||
* which are partially supported by <code>UBiDi</code> already. Examples of the
|
||||
* currently supported combinations:
|
||||
* <ul>
|
||||
* <li><Logical LTR, Visual LTR>: this is equivalent to calling
|
||||
* <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
|
||||
* <li><Logical RTL, Visual LTR>: this is equivalent to calling
|
||||
* <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li>
|
||||
* <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to
|
||||
* calling <code>ubidi_setPara</code> with
|
||||
* <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li>
|
||||
* <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to
|
||||
* calling <code>ubidi_setPara</code> with
|
||||
* <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li>
|
||||
* <li><Visual LTR, Logical LTR>: this is equivalent to
|
||||
* calling <code>ubidi_setInverse(UBiDi*, true)</code> and then
|
||||
* <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
|
||||
* <li><Visual LTR, Logical RTL>: this is equivalent to
|
||||
* calling <code>ubidi_setInverse(UBiDi*, true)</code> and then
|
||||
* <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li>
|
||||
* </ul>
|
||||
* All combinations that involve the Visual RTL scheme are unsupported by
|
||||
* <code>UBiDi</code>, for instance:
|
||||
* <ul>
|
||||
* <li><Logical LTR, Visual RTL>,</li>
|
||||
* <li><Visual RTL, Logical RTL>.</li>
|
||||
* </ul>
|
||||
* <p>Example of usage of the transformation engine:<br>
|
||||
* <pre>
|
||||
* \code
|
||||
* UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0};
|
||||
* UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0};
|
||||
* UErrorCode errorCode = U_ZERO_ERROR;
|
||||
* // Run a transformation.
|
||||
* ubiditransform_transform(pBidiTransform,
|
||||
* text1, -1, text2, -1,
|
||||
* UBIDI_LTR, UBIDI_VISUAL,
|
||||
* UBIDI_RTL, UBIDI_LOGICAL,
|
||||
* UBIDI_MIRRORING_OFF,
|
||||
* U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
|
||||
* &errorCode);
|
||||
* // Do something with text2.
|
||||
* text2[4] = '2';
|
||||
* // Run a reverse transformation.
|
||||
* ubiditransform_transform(pBidiTransform,
|
||||
* text2, -1, text1, -1,
|
||||
* UBIDI_RTL, UBIDI_LOGICAL,
|
||||
* UBIDI_LTR, UBIDI_VISUAL,
|
||||
* UBIDI_MIRRORING_OFF,
|
||||
* U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
|
||||
* &errorCode);
|
||||
*\endcode
|
||||
* </pre>
|
||||
* </p>
|
||||
*
|
||||
* @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object
|
||||
* allocated with <code>ubiditransform_open()</code> or
|
||||
* <code>NULL</code>.<p>
|
||||
* This object serves for one-time setup to amortize initialization
|
||||
* overheads. Use of this object is not thread-safe. All other threads
|
||||
* should allocate a new <code>UBiDiTransform</code> object by calling
|
||||
* <code>ubiditransform_open()</code> before using it. Alternatively,
|
||||
* a caller can set this parameter to <code>NULL</code>, in which case
|
||||
* the object will be allocated by the engine on the fly.</p>
|
||||
* @param src A pointer to the text that the Bidi layout transformations will
|
||||
* be performed on.
|
||||
* <p><strong>Note:</strong> the text must be (at least)
|
||||
* <code>srcLength</code> long.</p>
|
||||
* @param srcLength The length of the text, in number of UChars. If
|
||||
* <code>length == -1</code> then the text must be zero-terminated.
|
||||
* @param dest A pointer to where the processed text is to be copied.
|
||||
* @param destSize The size of the <code>dest</code> buffer, in number of
|
||||
* UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set,
|
||||
* then the destination length could be as large as
|
||||
* <code>srcLength * 2</code>. Otherwise, the destination length will
|
||||
* not exceed <code>srcLength</code>. If the caller reserves the last
|
||||
* position for zero-termination, it should be excluded from
|
||||
* <code>destSize</code>.
|
||||
* <p><code>destSize == -1</code> is allowed and makes sense when
|
||||
* <code>dest</code> was holds some meaningful value, e.g. that of
|
||||
* <code>src</code>. In this case <code>dest</code> must be
|
||||
* zero-terminated.</p>
|
||||
* @param inParaLevel A base embedding level of the input as defined in
|
||||
* <code>ubidi_setPara</code> documentation for the
|
||||
* <code>paraLevel</code> parameter.
|
||||
* @param inOrder An order of the input, which can be one of the
|
||||
* <code>UBiDiOrder</code> values.
|
||||
* @param outParaLevel A base embedding level of the output as defined in
|
||||
* <code>ubidi_setPara</code> documentation for the
|
||||
* <code>paraLevel</code> parameter.
|
||||
* @param outOrder An order of the output, which can be one of the
|
||||
* <code>UBiDiOrder</code> values.
|
||||
* @param doMirroring Indicates whether or not to perform character mirroring,
|
||||
* and can accept one of the <code>UBiDiMirroring</code> values.
|
||||
* @param shapingOptions Arabic digit and letter shaping options defined in the
|
||||
* ushape.h documentation.
|
||||
* <p><strong>Note:</strong> Direction indicator options are computed by
|
||||
* the transformation engine based on the effective ordering schemes, so
|
||||
* user-defined direction indicators will be ignored.</p>
|
||||
* @param pErrorCode A pointer to an error code value.
|
||||
*
|
||||
* @return The destination length, i.e. the number of UChars written to
|
||||
* <code>dest</code>. If the transformation fails, the return value
|
||||
* will be 0 (and the error code will be written to
|
||||
* <code>pErrorCode</code>).
|
||||
*
|
||||
* @see UBiDiLevel
|
||||
* @see UBiDiOrder
|
||||
* @see UBiDiMirroring
|
||||
* @see ubidi_setPara
|
||||
* @see u_shapeArabic
|
||||
* @stable ICU 58
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
ubiditransform_transform(UBiDiTransform *pBiDiTransform,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destSize,
|
||||
UBiDiLevel inParaLevel, UBiDiOrder inOrder,
|
||||
UBiDiLevel outParaLevel, UBiDiOrder outOrder,
|
||||
UBiDiMirroring doMirroring, uint32_t shapingOptions,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Allocates a <code>UBiDiTransform</code> object. This object can be reused,
|
||||
* e.g. with different ordering schemes, mirroring or shaping options.<p>
|
||||
* <strong>Note:</strong>The object can only be reused in the same thread.
|
||||
* All other threads should allocate a new <code>UBiDiTransform</code> object
|
||||
* before using it.<p>
|
||||
* Example of usage:<p>
|
||||
* <pre>
|
||||
* \code
|
||||
* UErrorCode errorCode = U_ZERO_ERROR;
|
||||
* // Open a new UBiDiTransform.
|
||||
* UBiDiTransform* transform = ubiditransform_open(&errorCode);
|
||||
* // Run a transformation.
|
||||
* ubiditransform_transform(transform,
|
||||
* text1, -1, text2, -1,
|
||||
* UBIDI_RTL, UBIDI_LOGICAL,
|
||||
* UBIDI_LTR, UBIDI_VISUAL,
|
||||
* UBIDI_MIRRORING_ON,
|
||||
* U_SHAPE_DIGITS_EN2AN,
|
||||
* &errorCode);
|
||||
* // Do something with the output text and invoke another transformation using
|
||||
* // that text as input.
|
||||
* ubiditransform_transform(transform,
|
||||
* text2, -1, text3, -1,
|
||||
* UBIDI_LTR, UBIDI_VISUAL,
|
||||
* UBIDI_RTL, UBIDI_VISUAL,
|
||||
* UBIDI_MIRRORING_ON,
|
||||
* 0, &errorCode);
|
||||
*\endcode
|
||||
* </pre>
|
||||
* <p>
|
||||
* The <code>UBiDiTransform</code> object must be deallocated by calling
|
||||
* <code>ubiditransform_close()</code>.
|
||||
*
|
||||
* @return An empty <code>UBiDiTransform</code> object.
|
||||
* @stable ICU 58
|
||||
*/
|
||||
U_CAPI UBiDiTransform* U_EXPORT2
|
||||
ubiditransform_open(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Deallocates the given <code>UBiDiTransform</code> object.
|
||||
* @stable ICU 58
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubiditransform_close(UBiDiTransform *pBidiTransform);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUBiDiTransformPointer
|
||||
* "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 58
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
647
thirdparty/icu4c/common/unicode/ubrk.h
vendored
Normal file
647
thirdparty/icu4c/common/unicode/ubrk.h
vendored
Normal file
@@ -0,0 +1,647 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1996-2015, International Business Machines Corporation and others.
|
||||
* All Rights Reserved.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef UBRK_H
|
||||
#define UBRK_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/utext.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* A text-break iterator.
|
||||
* For usage in C programs.
|
||||
*/
|
||||
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
|
||||
# define UBRK_TYPEDEF_UBREAK_ITERATOR
|
||||
/**
|
||||
* Opaque type representing an ICU Break iterator object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct UBreakIterator UBreakIterator;
|
||||
#endif
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/parseerr.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: BreakIterator
|
||||
*
|
||||
* <h2> BreakIterator C API </h2>
|
||||
*
|
||||
* The BreakIterator C API defines methods for finding the location
|
||||
* of boundaries in text. Pointer to a UBreakIterator maintain a
|
||||
* current position and scan over text returning the index of characters
|
||||
* where boundaries occur.
|
||||
* <p>
|
||||
* Line boundary analysis determines where a text string can be broken
|
||||
* when line-wrapping. The mechanism correctly handles punctuation and
|
||||
* hyphenated words.
|
||||
* <p>
|
||||
* Note: The locale keyword "lb" can be used to modify line break
|
||||
* behavior according to the CSS level 3 line-break options, see
|
||||
* <http://dev.w3.org/csswg/css-text/#line-breaking>. For example:
|
||||
* "ja@lb=strict", "zh@lb=loose".
|
||||
* <p>
|
||||
* Sentence boundary analysis allows selection with correct
|
||||
* interpretation of periods within numbers and abbreviations, and
|
||||
* trailing punctuation marks such as quotation marks and parentheses.
|
||||
* <p>
|
||||
* Note: The locale keyword "ss" can be used to enable use of
|
||||
* segmentation suppression data (preventing breaks in English after
|
||||
* abbreviations such as "Mr." or "Est.", for example), as follows:
|
||||
* "en@ss=standard".
|
||||
* <p>
|
||||
* Word boundary analysis is used by search and replace functions, as
|
||||
* well as within text editing applications that allow the user to
|
||||
* select words with a double click. Word selection provides correct
|
||||
* interpretation of punctuation marks within and following
|
||||
* words. Characters that are not part of a word, such as symbols or
|
||||
* punctuation marks, have word-breaks on both sides.
|
||||
* <p>
|
||||
* Character boundary analysis identifies the boundaries of
|
||||
* "Extended Grapheme Clusters", which are groupings of codepoints
|
||||
* that should be treated as character-like units for many text operations.
|
||||
* Please see Unicode Standard Annex #29, Unicode Text Segmentation,
|
||||
* http://www.unicode.org/reports/tr29/ for additional information
|
||||
* on grapheme clusters and guidelines on their use.
|
||||
* <p>
|
||||
* Title boundary analysis locates all positions,
|
||||
* typically starts of words, that should be set to Title Case
|
||||
* when title casing the text.
|
||||
* <p>
|
||||
* The text boundary positions are found according to the rules
|
||||
* described in Unicode Standard Annex #29, Text Boundaries, and
|
||||
* Unicode Standard Annex #14, Line Breaking Properties. These
|
||||
* are available at http://www.unicode.org/reports/tr14/ and
|
||||
* http://www.unicode.org/reports/tr29/.
|
||||
* <p>
|
||||
* In addition to the plain C API defined in this header file, an
|
||||
* object oriented C++ API with equivalent functionality is defined in the
|
||||
* file brkiter.h.
|
||||
* <p>
|
||||
* Code snippets illustrating the use of the Break Iterator APIs
|
||||
* are available in the ICU User Guide,
|
||||
* https://unicode-org.github.io/icu/userguide/boundaryanalysis/
|
||||
* and in the sample program icu/source/samples/break/break.cpp
|
||||
*/
|
||||
|
||||
/** The possible types of text boundaries. @stable ICU 2.0 */
|
||||
typedef enum UBreakIteratorType {
|
||||
/** Character breaks @stable ICU 2.0 */
|
||||
UBRK_CHARACTER = 0,
|
||||
/** Word breaks @stable ICU 2.0 */
|
||||
UBRK_WORD = 1,
|
||||
/** Line breaks @stable ICU 2.0 */
|
||||
UBRK_LINE = 2,
|
||||
/** Sentence breaks @stable ICU 2.0 */
|
||||
UBRK_SENTENCE = 3,
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Title Case breaks
|
||||
* The iterator created using this type locates title boundaries as described for
|
||||
* Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
|
||||
* please use Word Boundary iterator.
|
||||
*
|
||||
* @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
|
||||
*/
|
||||
UBRK_TITLE = 4,
|
||||
/**
|
||||
* One more than the highest normal UBreakIteratorType value.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UBRK_COUNT = 5
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
} UBreakIteratorType;
|
||||
|
||||
/** Value indicating all text boundaries have been returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UBRK_DONE ((int32_t) -1)
|
||||
|
||||
|
||||
/**
|
||||
* Enum constants for the word break tags returned by
|
||||
* getRuleStatus(). A range of values is defined for each category of
|
||||
* word, to allow for further subdivisions of a category in future releases.
|
||||
* Applications should check for tag values falling within the range, rather
|
||||
* than for single individual values.
|
||||
*
|
||||
* The numeric values of all of these constants are stable (will not change).
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
typedef enum UWordBreak {
|
||||
/** Tag value for "words" that do not fit into any of other categories.
|
||||
* Includes spaces and most punctuation. */
|
||||
UBRK_WORD_NONE = 0,
|
||||
/** Upper bound for tags for uncategorized words. */
|
||||
UBRK_WORD_NONE_LIMIT = 100,
|
||||
/** Tag value for words that appear to be numbers, lower limit. */
|
||||
UBRK_WORD_NUMBER = 100,
|
||||
/** Tag value for words that appear to be numbers, upper limit. */
|
||||
UBRK_WORD_NUMBER_LIMIT = 200,
|
||||
/** Tag value for words that contain letters, excluding
|
||||
* hiragana, katakana or ideographic characters, lower limit. */
|
||||
UBRK_WORD_LETTER = 200,
|
||||
/** Tag value for words containing letters, upper limit */
|
||||
UBRK_WORD_LETTER_LIMIT = 300,
|
||||
/** Tag value for words containing kana characters, lower limit */
|
||||
UBRK_WORD_KANA = 300,
|
||||
/** Tag value for words containing kana characters, upper limit */
|
||||
UBRK_WORD_KANA_LIMIT = 400,
|
||||
/** Tag value for words containing ideographic characters, lower limit */
|
||||
UBRK_WORD_IDEO = 400,
|
||||
/** Tag value for words containing ideographic characters, upper limit */
|
||||
UBRK_WORD_IDEO_LIMIT = 500
|
||||
} UWordBreak;
|
||||
|
||||
/**
|
||||
* Enum constants for the line break tags returned by getRuleStatus().
|
||||
* A range of values is defined for each category of
|
||||
* word, to allow for further subdivisions of a category in future releases.
|
||||
* Applications should check for tag values falling within the range, rather
|
||||
* than for single individual values.
|
||||
*
|
||||
* The numeric values of all of these constants are stable (will not change).
|
||||
*
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
typedef enum ULineBreakTag {
|
||||
/** Tag value for soft line breaks, positions at which a line break
|
||||
* is acceptable but not required */
|
||||
UBRK_LINE_SOFT = 0,
|
||||
/** Upper bound for soft line breaks. */
|
||||
UBRK_LINE_SOFT_LIMIT = 100,
|
||||
/** Tag value for a hard, or mandatory line break */
|
||||
UBRK_LINE_HARD = 100,
|
||||
/** Upper bound for hard line breaks. */
|
||||
UBRK_LINE_HARD_LIMIT = 200
|
||||
} ULineBreakTag;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Enum constants for the sentence break tags returned by getRuleStatus().
|
||||
* A range of values is defined for each category of
|
||||
* sentence, to allow for further subdivisions of a category in future releases.
|
||||
* Applications should check for tag values falling within the range, rather
|
||||
* than for single individual values.
|
||||
*
|
||||
* The numeric values of all of these constants are stable (will not change).
|
||||
*
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
typedef enum USentenceBreakTag {
|
||||
/** Tag value for for sentences ending with a sentence terminator
|
||||
* ('.', '?', '!', etc.) character, possibly followed by a
|
||||
* hard separator (CR, LF, PS, etc.)
|
||||
*/
|
||||
UBRK_SENTENCE_TERM = 0,
|
||||
/** Upper bound for tags for sentences ended by sentence terminators. */
|
||||
UBRK_SENTENCE_TERM_LIMIT = 100,
|
||||
/** Tag value for for sentences that do not contain an ending
|
||||
* sentence terminator ('.', '?', '!', etc.) character, but
|
||||
* are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
|
||||
*/
|
||||
UBRK_SENTENCE_SEP = 100,
|
||||
/** Upper bound for tags for sentences ended by a separator. */
|
||||
UBRK_SENTENCE_SEP_LIMIT = 200
|
||||
/** Tag value for a hard, or mandatory line break */
|
||||
} USentenceBreakTag;
|
||||
|
||||
|
||||
/**
|
||||
* Open a new UBreakIterator for locating text boundaries for a specified locale.
|
||||
* A UBreakIterator may be used for detecting character, line, word,
|
||||
* and sentence breaks in text.
|
||||
* @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
|
||||
* UBRK_LINE, UBRK_SENTENCE
|
||||
* @param locale The locale specifying the text-breaking conventions. Note that
|
||||
* locale keys such as "lb" and "ss" may be used to modify text break behavior,
|
||||
* see general discussion of BreakIterator C API.
|
||||
* @param text The text to be iterated over. May be null, in which case ubrk_setText() is
|
||||
* used to specify the text to be iterated.
|
||||
* @param textLength The number of characters in text, or -1 if null-terminated.
|
||||
* @param status A UErrorCode to receive any errors.
|
||||
* @return A UBreakIterator for the specified locale.
|
||||
* @see ubrk_openRules
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UBreakIterator* U_EXPORT2
|
||||
ubrk_open(UBreakIteratorType type,
|
||||
const char *locale,
|
||||
const UChar *text,
|
||||
int32_t textLength,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Open a new UBreakIterator for locating text boundaries using specified breaking rules.
|
||||
* The rule syntax is ... (TBD)
|
||||
* @param rules A set of rules specifying the text breaking conventions.
|
||||
* @param rulesLength The number of characters in rules, or -1 if null-terminated.
|
||||
* @param text The text to be iterated over. May be null, in which case ubrk_setText() is
|
||||
* used to specify the text to be iterated.
|
||||
* @param textLength The number of characters in text, or -1 if null-terminated.
|
||||
* @param parseErr Receives position and context information for any syntax errors
|
||||
* detected while parsing the rules.
|
||||
* @param status A UErrorCode to receive any errors.
|
||||
* @return A UBreakIterator for the specified rules.
|
||||
* @see ubrk_open
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI UBreakIterator* U_EXPORT2
|
||||
ubrk_openRules(const UChar *rules,
|
||||
int32_t rulesLength,
|
||||
const UChar *text,
|
||||
int32_t textLength,
|
||||
UParseError *parseErr,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
|
||||
* Opening a UBreakIterator this way is substantially faster than using ubrk_openRules.
|
||||
* Binary rules may be obtained using ubrk_getBinaryRules. The compiled rules are not
|
||||
* compatible across different major versions of ICU, nor across platforms of different
|
||||
* endianness or different base character set family (ASCII vs EBCDIC).
|
||||
* @param binaryRules A set of compiled binary rules specifying the text breaking
|
||||
* conventions. Ownership of the storage containing the compiled
|
||||
* rules remains with the caller of this function. The compiled
|
||||
* rules must not be modified or deleted during the life of the
|
||||
* break iterator.
|
||||
* @param rulesLength The length of binaryRules in bytes; must be >= 0.
|
||||
* @param text The text to be iterated over. May be null, in which case
|
||||
* ubrk_setText() is used to specify the text to be iterated.
|
||||
* @param textLength The number of characters in text, or -1 if null-terminated.
|
||||
* @param status Pointer to UErrorCode to receive any errors.
|
||||
* @return UBreakIterator for the specified rules.
|
||||
* @see ubrk_getBinaryRules
|
||||
* @stable ICU 59
|
||||
*/
|
||||
U_CAPI UBreakIterator* U_EXPORT2
|
||||
ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
|
||||
const UChar * text, int32_t textLength,
|
||||
UErrorCode * status);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* Thread safe cloning operation
|
||||
* @param bi iterator to be cloned
|
||||
* @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
|
||||
* user allocated space for the new clone. If NULL new memory will be allocated.
|
||||
* If buffer is not large enough, new memory will be allocated.
|
||||
* Clients can use the U_BRK_SAFECLONE_BUFFERSIZE.
|
||||
* @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
|
||||
* pointer to size of allocated space.
|
||||
* If *pBufferSize == 0, a sufficient size for use in cloning will
|
||||
* be returned ('pre-flighting')
|
||||
* If *pBufferSize is not enough for a stack-based safe clone,
|
||||
* new memory will be allocated.
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used
|
||||
* if pBufferSize != NULL and any allocations were necessary
|
||||
* @return pointer to the new clone
|
||||
* @deprecated ICU 69 Use ubrk_clone() instead.
|
||||
*/
|
||||
U_DEPRECATED UBreakIterator * U_EXPORT2
|
||||
ubrk_safeClone(
|
||||
const UBreakIterator *bi,
|
||||
void *stackBuffer,
|
||||
int32_t *pBufferSize,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Thread safe cloning operation.
|
||||
* @param bi iterator to be cloned
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* @return pointer to the new clone
|
||||
* @stable ICU 69
|
||||
*/
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
ubrk_clone(const UBreakIterator *bi,
|
||||
UErrorCode *status);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
|
||||
* @deprecated ICU 52. Do not rely on ubrk_safeClone() cloning into any provided buffer.
|
||||
*/
|
||||
#define U_BRK_SAFECLONE_BUFFERSIZE 1
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Close a UBreakIterator.
|
||||
* Once closed, a UBreakIterator may no longer be used.
|
||||
* @param bi The break iterator to close.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubrk_close(UBreakIterator *bi);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUBreakIteratorPointer
|
||||
* "Smart pointer" class, closes a UBreakIterator via ubrk_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Sets an existing iterator to point to a new piece of text.
|
||||
* The break iterator retains a pointer to the supplied text.
|
||||
* The caller must not modify or delete the text while the BreakIterator
|
||||
* retains the reference.
|
||||
*
|
||||
* @param bi The iterator to use
|
||||
* @param text The text to be set
|
||||
* @param textLength The length of the text
|
||||
* @param status The error code
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubrk_setText(UBreakIterator* bi,
|
||||
const UChar* text,
|
||||
int32_t textLength,
|
||||
UErrorCode* status);
|
||||
|
||||
|
||||
/**
|
||||
* Sets an existing iterator to point to a new piece of text.
|
||||
*
|
||||
* All index positions returned by break iterator functions are
|
||||
* native indices from the UText. For example, when breaking UTF-8
|
||||
* encoded text, the break positions returned by \ref ubrk_next, \ref ubrk_previous, etc.
|
||||
* will be UTF-8 string indices, not UTF-16 positions.
|
||||
*
|
||||
* @param bi The iterator to use
|
||||
* @param text The text to be set.
|
||||
* This function makes a shallow clone of the supplied UText. This means
|
||||
* that the caller is free to immediately close or otherwise reuse the
|
||||
* UText that was passed as a parameter, but that the underlying text itself
|
||||
* must not be altered while being referenced by the break iterator.
|
||||
* @param status The error code
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubrk_setUText(UBreakIterator* bi,
|
||||
UText* text,
|
||||
UErrorCode* status);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Determine the most recently-returned text boundary.
|
||||
*
|
||||
* @param bi The break iterator to use.
|
||||
* @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
|
||||
* \ref ubrk_first, or \ref ubrk_last.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_current(const UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Advance the iterator to the boundary following the current boundary.
|
||||
*
|
||||
* @param bi The break iterator to use.
|
||||
* @return The character index of the next text boundary, or UBRK_DONE
|
||||
* if all text boundaries have been returned.
|
||||
* @see ubrk_previous
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_next(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Set the iterator position to the boundary preceding the current boundary.
|
||||
*
|
||||
* @param bi The break iterator to use.
|
||||
* @return The character index of the preceding text boundary, or UBRK_DONE
|
||||
* if all text boundaries have been returned.
|
||||
* @see ubrk_next
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_previous(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Set the iterator position to zero, the start of the text being scanned.
|
||||
* @param bi The break iterator to use.
|
||||
* @return The new iterator position (zero).
|
||||
* @see ubrk_last
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_first(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Set the iterator position to the index immediately <EM>beyond</EM> the last character in the text being scanned.
|
||||
* This is not the same as the last character.
|
||||
* @param bi The break iterator to use.
|
||||
* @return The character offset immediately <EM>beyond</EM> the last character in the
|
||||
* text being scanned.
|
||||
* @see ubrk_first
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_last(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Set the iterator position to the first boundary preceding the specified offset.
|
||||
* The new position is always smaller than offset, or UBRK_DONE.
|
||||
* @param bi The break iterator to use.
|
||||
* @param offset The offset to begin scanning.
|
||||
* @return The text boundary preceding offset, or UBRK_DONE.
|
||||
* @see ubrk_following
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_preceding(UBreakIterator *bi,
|
||||
int32_t offset);
|
||||
|
||||
/**
|
||||
* Advance the iterator to the first boundary following the specified offset.
|
||||
* The value returned is always greater than offset, or UBRK_DONE.
|
||||
* @param bi The break iterator to use.
|
||||
* @param offset The offset to begin scanning.
|
||||
* @return The text boundary following offset, or UBRK_DONE.
|
||||
* @see ubrk_preceding
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_following(UBreakIterator *bi,
|
||||
int32_t offset);
|
||||
|
||||
/**
|
||||
* Get a locale for which text breaking information is available.
|
||||
* A UBreakIterator in a locale returned by this function will perform the correct
|
||||
* text breaking for the locale.
|
||||
* @param index The index of the desired locale.
|
||||
* @return A locale for which number text breaking information is available, or 0 if none.
|
||||
* @see ubrk_countAvailable
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ubrk_getAvailable(int32_t index);
|
||||
|
||||
/**
|
||||
* Determine how many locales have text breaking information available.
|
||||
* This function is most useful as determining the loop ending condition for
|
||||
* calls to \ref ubrk_getAvailable.
|
||||
* @return The number of locales for which text breaking information is available.
|
||||
* @see ubrk_getAvailable
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_countAvailable(void);
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if the specified position is a boundary position. As a side
|
||||
* effect, leaves the iterator pointing to the first boundary position at
|
||||
* or after "offset".
|
||||
* @param bi The break iterator to use.
|
||||
* @param offset the offset to check.
|
||||
* @return True if "offset" is a boundary position.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
|
||||
|
||||
/**
|
||||
* Return the status from the break rule that determined the most recently
|
||||
* returned break position. The values appear in the rule source
|
||||
* within brackets, {123}, for example. For rules that do not specify a
|
||||
* status, a default value of 0 is returned.
|
||||
* <p>
|
||||
* For word break iterators, the possible values are defined in enum UWordBreak.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_getRuleStatus(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Get the statuses from the break rules that determined the most recently
|
||||
* returned break position. The values appear in the rule source
|
||||
* within brackets, {123}, for example. The default status value for rules
|
||||
* that do not explicitly provide one is zero.
|
||||
* <p>
|
||||
* For word break iterators, the possible values are defined in enum UWordBreak.
|
||||
* @param bi The break iterator to use
|
||||
* @param fillInVec an array to be filled in with the status values.
|
||||
* @param capacity the length of the supplied vector. A length of zero causes
|
||||
* the function to return the number of status values, in the
|
||||
* normal way, without attempting to store any values.
|
||||
* @param status receives error codes.
|
||||
* @return The number of rule status values from rules that determined
|
||||
* the most recent boundary returned by the break iterator.
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Return the locale of the break iterator. You can choose between the valid and
|
||||
* the actual locale.
|
||||
* @param bi break iterator
|
||||
* @param type locale type (valid or actual)
|
||||
* @param status error code
|
||||
* @return locale string
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Set the subject text string upon which the break iterator is operating
|
||||
* without changing any other aspect of the state.
|
||||
* The new and previous text strings must have the same content.
|
||||
*
|
||||
* This function is intended for use in environments where ICU is operating on
|
||||
* strings that may move around in memory. It provides a mechanism for notifying
|
||||
* ICU that the string has been relocated, and providing a new UText to access the
|
||||
* string in its new position.
|
||||
*
|
||||
* Note that the break iterator never copies the underlying text
|
||||
* of a string being processed, but always operates directly on the original text
|
||||
* provided by the user. Refreshing simply drops the references to the old text
|
||||
* and replaces them with references to the new.
|
||||
*
|
||||
* Caution: this function is normally used only by very specialized
|
||||
* system-level code. One example use case is with garbage collection
|
||||
* that moves the text in memory.
|
||||
*
|
||||
* @param bi The break iterator.
|
||||
* @param text The new (moved) text string.
|
||||
* @param status Receives errors detected by this function.
|
||||
*
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubrk_refreshUText(UBreakIterator *bi,
|
||||
UText *text,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
/**
|
||||
* Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
|
||||
* The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator
|
||||
* more quickly than using ubrk_openRules. The compiled rules are not compatible across
|
||||
* different major versions of ICU, nor across platforms of different endianness or
|
||||
* different base character set family (ASCII vs EBCDIC). Supports preflighting (with
|
||||
* binaryRules=NULL and rulesCapacity=0) to get the rules length without copying them to
|
||||
* the binaryRules buffer. However, whether preflighting or not, if the actual length
|
||||
* is greater than INT32_MAX, then the function returns 0 and sets *status to
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR.
|
||||
|
||||
* @param bi The break iterator to use.
|
||||
* @param binaryRules Buffer to receive the compiled binary rules; set to NULL for
|
||||
* preflighting.
|
||||
* @param rulesCapacity Capacity (in bytes) of the binaryRules buffer; set to 0 for
|
||||
* preflighting. Must be >= 0.
|
||||
* @param status Pointer to UErrorCode to receive any errors, such as
|
||||
* U_BUFFER_OVERFLOW_ERROR, U_INDEX_OUTOFBOUNDS_ERROR, or
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
* @return The actual byte length of the binary rules, if <= INT32_MAX;
|
||||
* otherwise 0. If not preflighting and this is larger than
|
||||
* rulesCapacity, *status will be set to an error.
|
||||
* @see ubrk_openBinaryRules
|
||||
* @stable ICU 59
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_getBinaryRules(UBreakIterator *bi,
|
||||
uint8_t * binaryRules, int32_t rulesCapacity,
|
||||
UErrorCode * status);
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
||||
#endif
|
||||
388
thirdparty/icu4c/common/unicode/ucasemap.h
vendored
Normal file
388
thirdparty/icu4c/common/unicode/ucasemap.h
vendored
Normal file
@@ -0,0 +1,388 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2005-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: ucasemap.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2005may06
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Case mapping service object and functions using it.
|
||||
*/
|
||||
|
||||
#ifndef __UCASEMAP_H__
|
||||
#define __UCASEMAP_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/stringoptions.h"
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Unicode case mapping functions using a UCaseMap service object.
|
||||
*
|
||||
* The service object takes care of memory allocations, data loading, and setup
|
||||
* for the attributes, as usual.
|
||||
*
|
||||
* Currently, the functionality provided here does not overlap with uchar.h
|
||||
* and ustring.h, except for ucasemap_toTitle().
|
||||
*
|
||||
* ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
|
||||
*/
|
||||
|
||||
/**
|
||||
* UCaseMap is an opaque service object for newer ICU case mapping functions.
|
||||
* Older functions did not use a service object.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
struct UCaseMap;
|
||||
typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
|
||||
|
||||
/**
|
||||
* Open a UCaseMap service object for a locale and a set of options.
|
||||
* The locale ID and options are preprocessed so that functions using the
|
||||
* service object need not process them in each call.
|
||||
*
|
||||
* @param locale ICU locale ID, used for language-dependent
|
||||
* upper-/lower-/title-casing according to the Unicode standard.
|
||||
* Usual semantics: ""=root, NULL=default locale, etc.
|
||||
* @param options Options bit set, used for case folding and string comparisons.
|
||||
* Same flags as for u_foldCase(), u_strFoldCase(),
|
||||
* u_strCaseCompare(), etc.
|
||||
* Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return Pointer to a UCaseMap service object, if successful.
|
||||
*
|
||||
* @see U_FOLD_CASE_DEFAULT
|
||||
* @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
* @see U_TITLECASE_NO_LOWERCASE
|
||||
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
U_CAPI UCaseMap * U_EXPORT2
|
||||
ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Close a UCaseMap service object.
|
||||
* @param csm Object to be closed.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ucasemap_close(UCaseMap *csm);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUCaseMapPointer
|
||||
* "Smart pointer" class, closes a UCaseMap via ucasemap_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Get the locale ID that is used for language-dependent case mappings.
|
||||
* @param csm UCaseMap service object.
|
||||
* @return locale ID
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2
|
||||
ucasemap_getLocale(const UCaseMap *csm);
|
||||
|
||||
/**
|
||||
* Get the options bit set that is used for case folding and string comparisons.
|
||||
* @param csm UCaseMap service object.
|
||||
* @return options bit set
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
ucasemap_getOptions(const UCaseMap *csm);
|
||||
|
||||
/**
|
||||
* Set the locale ID that is used for language-dependent case mappings.
|
||||
*
|
||||
* @param csm UCaseMap service object.
|
||||
* @param locale Locale ID, see ucasemap_open().
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ucasemap_open
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Set the options bit set that is used for case folding and string comparisons.
|
||||
*
|
||||
* @param csm UCaseMap service object.
|
||||
* @param options Options bit set, see ucasemap_open().
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ucasemap_open
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Get the break iterator that is used for titlecasing.
|
||||
* Do not modify the returned break iterator.
|
||||
* @param csm UCaseMap service object.
|
||||
* @return titlecasing break iterator
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
U_CAPI const UBreakIterator * U_EXPORT2
|
||||
ucasemap_getBreakIterator(const UCaseMap *csm);
|
||||
|
||||
/**
|
||||
* Set the break iterator that is used for titlecasing.
|
||||
* The UCaseMap service object releases a previously set break iterator
|
||||
* and "adopts" this new one, taking ownership of it.
|
||||
* It will be released in a subsequent call to ucasemap_setBreakIterator()
|
||||
* or ucasemap_close().
|
||||
*
|
||||
* Break iterator operations are not thread-safe. Therefore, titlecasing
|
||||
* functions use non-const UCaseMap objects. It is not possible to titlecase
|
||||
* strings concurrently using the same UCaseMap.
|
||||
*
|
||||
* @param csm UCaseMap service object.
|
||||
* @param iterToAdopt Break iterator to be adopted for titlecasing.
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ucasemap_toTitle
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
|
||||
* except that it takes ucasemap_setOptions() into account and has performance
|
||||
* advantages from being able to use a UCaseMap object for multiple case mapping
|
||||
* operations, saving setup time.
|
||||
*
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with ucasemap_setOptions().)
|
||||
*
|
||||
* Note: This function takes a non-const UCaseMap pointer because it will
|
||||
* open a default break iterator if no break iterator was set yet,
|
||||
* and effectively call ucasemap_setBreakIterator();
|
||||
* also because the break iterator is stateful and will be modified during
|
||||
* the iteration.
|
||||
*
|
||||
* The titlecase break iterator can be provided to customize for arbitrary
|
||||
* styles, using rules and dictionaries beyond the standard iterators.
|
||||
* If the break iterator passed in is null, the default Unicode algorithm
|
||||
* will be used to determine the titlecase positions.
|
||||
*
|
||||
* This function uses only the setText(), first() and next() methods of the
|
||||
* provided break iterator.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param csm UCaseMap service object. This pointer is non-const!
|
||||
* See the note above for details.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToTitle
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_toTitle(UCaseMap *csm,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif // UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Lowercase the characters in a UTF-8 string.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param csm UCaseMap service object.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToLower
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_utf8ToLower(const UCaseMap *csm,
|
||||
char *dest, int32_t destCapacity,
|
||||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Uppercase the characters in a UTF-8 string.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param csm UCaseMap service object.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToUpper
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_utf8ToUpper(const UCaseMap *csm,
|
||||
char *dest, int32_t destCapacity,
|
||||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Titlecase a UTF-8 string.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with ucasemap_setOptions().)
|
||||
*
|
||||
* Note: This function takes a non-const UCaseMap pointer because it will
|
||||
* open a default break iterator if no break iterator was set yet,
|
||||
* and effectively call ucasemap_setBreakIterator();
|
||||
* also because the break iterator is stateful and will be modified during
|
||||
* the iteration.
|
||||
*
|
||||
* The titlecase break iterator can be provided to customize for arbitrary
|
||||
* styles, using rules and dictionaries beyond the standard iterators.
|
||||
* If the break iterator passed in is null, the default Unicode algorithm
|
||||
* will be used to determine the titlecase positions.
|
||||
*
|
||||
* This function uses only the setUText(), first(), next() and close() methods of the
|
||||
* provided break iterator.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param csm UCaseMap service object. This pointer is non-const!
|
||||
* See the note above for details.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToTitle
|
||||
* @see U_TITLECASE_NO_LOWERCASE
|
||||
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_utf8ToTitle(UCaseMap *csm,
|
||||
char *dest, int32_t destCapacity,
|
||||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Case-folds the characters in a UTF-8 string.
|
||||
*
|
||||
* Case-folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'T' in CaseFolding.txt.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param csm UCaseMap service object.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strFoldCase
|
||||
* @see ucasemap_setOptions
|
||||
* @see U_FOLD_CASE_DEFAULT
|
||||
* @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_utf8FoldCase(const UCaseMap *csm,
|
||||
char *dest, int32_t destCapacity,
|
||||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
160
thirdparty/icu4c/common/unicode/ucat.h
vendored
Normal file
160
thirdparty/icu4c/common/unicode/ucat.h
vendored
Normal file
@@ -0,0 +1,160 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2003-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
* Created: March 19 2003
|
||||
* Since: ICU 2.6
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef UCAT_H
|
||||
#define UCAT_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ures.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Message Catalog Wrappers
|
||||
*
|
||||
* This C API provides look-alike functions that deliberately resemble
|
||||
* the POSIX catopen, catclose, and catgets functions. The underlying
|
||||
* implementation is in terms of ICU resource bundles, rather than
|
||||
* POSIX message catalogs.
|
||||
*
|
||||
* The ICU resource bundles obey standard ICU inheritance policies.
|
||||
* To facilitate this, sets and messages are flattened into one tier.
|
||||
* This is done by creating resource bundle keys of the form
|
||||
* <set_num>%<msg_num> where set_num is the set number and msg_num is
|
||||
* the message number, formatted as decimal strings.
|
||||
*
|
||||
* Example: Consider a message catalog containing two sets:
|
||||
*
|
||||
* Set 1: Message 4 = "Good morning."
|
||||
* Message 5 = "Good afternoon."
|
||||
* Message 7 = "Good evening."
|
||||
* Message 8 = "Good night."
|
||||
* Set 4: Message 14 = "Please "
|
||||
* Message 19 = "Thank you."
|
||||
* Message 20 = "Sincerely,"
|
||||
*
|
||||
* The ICU resource bundle source file would, assuming it is named
|
||||
* "greet.txt", would look like this:
|
||||
*
|
||||
* greet
|
||||
* {
|
||||
* 1%4 { "Good morning." }
|
||||
* 1%5 { "Good afternoon." }
|
||||
* 1%7 { "Good evening." }
|
||||
* 1%8 { "Good night." }
|
||||
*
|
||||
* 4%14 { "Please " }
|
||||
* 4%19 { "Thank you." }
|
||||
* 4%20 { "Sincerely," }
|
||||
* }
|
||||
*
|
||||
* The catgets function is commonly used in combination with functions
|
||||
* like printf and strftime. ICU components like message format can
|
||||
* be used instead, although they use a different format syntax.
|
||||
* There is an ICU package, icuio, that provides some of
|
||||
* the POSIX-style formatting API.
|
||||
*/
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* An ICU message catalog descriptor, analogous to nl_catd.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
typedef UResourceBundle* u_nl_catd;
|
||||
|
||||
/**
|
||||
* Open and return an ICU message catalog descriptor. The descriptor
|
||||
* may be passed to u_catgets() to retrieve localized strings.
|
||||
*
|
||||
* @param name string containing the full path pointing to the
|
||||
* directory where the resources reside followed by the package name
|
||||
* e.g. "/usr/resource/my_app/resources/guimessages" on a Unix system.
|
||||
* If NULL, ICU default data files will be used.
|
||||
*
|
||||
* Unlike POSIX, environment variables are not interpolated within the
|
||||
* name.
|
||||
*
|
||||
* @param locale the locale for which we want to open the resource. If
|
||||
* NULL, the default ICU locale will be used (see uloc_getDefault). If
|
||||
* strlen(locale) == 0, the root locale will be used.
|
||||
*
|
||||
* @param ec input/output error code. Upon output,
|
||||
* U_USING_FALLBACK_WARNING indicates that a fallback locale was
|
||||
* used. For example, 'de_CH' was requested, but nothing was found
|
||||
* there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that the
|
||||
* default locale data or root locale data was used; neither the
|
||||
* requested locale nor any of its fallback locales were found.
|
||||
*
|
||||
* @return a message catalog descriptor that may be passed to
|
||||
* u_catgets(). If the ec parameter indicates success, then the caller
|
||||
* is responsible for calling u_catclose() to close the message
|
||||
* catalog. If the ec parameter indicates failure, then NULL will be
|
||||
* returned.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI u_nl_catd U_EXPORT2
|
||||
u_catopen(const char* name, const char* locale, UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Close an ICU message catalog, given its descriptor.
|
||||
*
|
||||
* @param catd a message catalog descriptor to be closed. May be NULL,
|
||||
* in which case no action is taken.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_catclose(u_nl_catd catd);
|
||||
|
||||
/**
|
||||
* Retrieve a localized string from an ICU message catalog.
|
||||
*
|
||||
* @param catd a message catalog descriptor returned by u_catopen.
|
||||
*
|
||||
* @param set_num the message catalog set number. Sets need not be
|
||||
* numbered consecutively.
|
||||
*
|
||||
* @param msg_num the message catalog message number within the
|
||||
* set. Messages need not be numbered consecutively.
|
||||
*
|
||||
* @param s the default string. This is returned if the string
|
||||
* specified by the set_num and msg_num is not found. It must be
|
||||
* zero-terminated.
|
||||
*
|
||||
* @param len fill-in parameter to receive the length of the result.
|
||||
* May be NULL, in which case it is ignored.
|
||||
*
|
||||
* @param ec input/output error code. May be U_USING_FALLBACK_WARNING
|
||||
* or U_USING_DEFAULT_WARNING. U_MISSING_RESOURCE_ERROR indicates that
|
||||
* the set_num/msg_num tuple does not specify a valid message string
|
||||
* in this catalog.
|
||||
*
|
||||
* @return a pointer to a zero-terminated UChar array which lives in
|
||||
* an internal buffer area, typically a memory mapped/DLL file. The
|
||||
* caller must NOT delete this pointer. If the call is unsuccessful
|
||||
* for any reason, then s is returned. This includes the situation in
|
||||
* which ec indicates a failing error code upon entry to this
|
||||
* function.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI const UChar* U_EXPORT2
|
||||
u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num,
|
||||
const UChar* s,
|
||||
int32_t* len, UErrorCode* ec);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif /*UCAT_H*/
|
||||
/*eof*/
|
||||
4404
thirdparty/icu4c/common/unicode/uchar.h
vendored
Normal file
4404
thirdparty/icu4c/common/unicode/uchar.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
623
thirdparty/icu4c/common/unicode/ucharstrie.h
vendored
Normal file
623
thirdparty/icu4c/common/unicode/ucharstrie.h
vendored
Normal file
@@ -0,0 +1,623 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: ucharstrie.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010nov14
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UCHARSTRIE_H__
|
||||
#define __UCHARSTRIE_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Trie for mapping Unicode strings (or 16-bit-unit sequences)
|
||||
* to integer values.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/ustringtrie.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class Appendable;
|
||||
class UCharsTrieBuilder;
|
||||
class UVector32;
|
||||
|
||||
/**
|
||||
* Light-weight, non-const reader class for a UCharsTrie.
|
||||
* Traverses a char16_t-serialized data structure with minimal state,
|
||||
* for mapping strings (16-bit-unit sequences) to non-negative integer values.
|
||||
*
|
||||
* This class owns the serialized trie data only if it was constructed by
|
||||
* the builder's build() method.
|
||||
* The public constructor and the copy constructor only alias the data (only copy the pointer).
|
||||
* There is no assignment operator.
|
||||
*
|
||||
* This class is not intended for public subclassing.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
class U_COMMON_API UCharsTrie : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs a UCharsTrie reader instance.
|
||||
*
|
||||
* The trieUChars must contain a copy of a char16_t sequence from the UCharsTrieBuilder,
|
||||
* starting with the first char16_t of that sequence.
|
||||
* The UCharsTrie object will not read more char16_ts than
|
||||
* the UCharsTrieBuilder generated in the corresponding build() call.
|
||||
*
|
||||
* The array is not copied/cloned and must not be modified while
|
||||
* the UCharsTrie object is in use.
|
||||
*
|
||||
* @param trieUChars The char16_t array that contains the serialized trie.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UCharsTrie(ConstChar16Ptr trieUChars)
|
||||
: ownedArray_(nullptr), uchars_(trieUChars),
|
||||
pos_(uchars_), remainingMatchLength_(-1) {}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
~UCharsTrie();
|
||||
|
||||
/**
|
||||
* Copy constructor, copies the other trie reader object and its state,
|
||||
* but not the char16_t array which will be shared. (Shallow copy.)
|
||||
* @param other Another UCharsTrie object.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UCharsTrie(const UCharsTrie &other)
|
||||
: ownedArray_(nullptr), uchars_(other.uchars_),
|
||||
pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
|
||||
|
||||
/**
|
||||
* Resets this trie to its initial state.
|
||||
* @return *this
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UCharsTrie &reset() {
|
||||
pos_=uchars_;
|
||||
remainingMatchLength_=-1;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the state of this trie as a 64-bit integer.
|
||||
* The state value is never 0.
|
||||
*
|
||||
* @return opaque state value
|
||||
* @see resetToState64
|
||||
* @stable ICU 65
|
||||
*/
|
||||
uint64_t getState64() const {
|
||||
return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
|
||||
static_cast<uint64_t>(pos_ - uchars_);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this trie to the saved state.
|
||||
* Unlike resetToState(State), the 64-bit state value
|
||||
* must be from getState64() from the same trie object or
|
||||
* from one initialized the exact same way.
|
||||
* Because of no validation, this method is faster.
|
||||
*
|
||||
* @param state The opaque trie state value from getState64().
|
||||
* @return *this
|
||||
* @see getState64
|
||||
* @see resetToState
|
||||
* @see reset
|
||||
* @stable ICU 65
|
||||
*/
|
||||
UCharsTrie &resetToState64(uint64_t state) {
|
||||
remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2;
|
||||
pos_ = uchars_ + (state & kState64PosMask);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* UCharsTrie state object, for saving a trie's current state
|
||||
* and resetting the trie back to this state later.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
class State : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs an empty State.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
State() { uchars=nullptr; }
|
||||
private:
|
||||
friend class UCharsTrie;
|
||||
|
||||
const char16_t *uchars;
|
||||
const char16_t *pos;
|
||||
int32_t remainingMatchLength;
|
||||
};
|
||||
|
||||
/**
|
||||
* Saves the state of this trie.
|
||||
* @param state The State object to hold the trie's state.
|
||||
* @return *this
|
||||
* @see resetToState
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
const UCharsTrie &saveState(State &state) const {
|
||||
state.uchars=uchars_;
|
||||
state.pos=pos_;
|
||||
state.remainingMatchLength=remainingMatchLength_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this trie to the saved state.
|
||||
* If the state object contains no state, or the state of a different trie,
|
||||
* then this trie remains unchanged.
|
||||
* @param state The State object which holds a saved trie state.
|
||||
* @return *this
|
||||
* @see saveState
|
||||
* @see reset
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UCharsTrie &resetToState(const State &state) {
|
||||
if(uchars_==state.uchars && uchars_!=nullptr) {
|
||||
pos_=state.pos;
|
||||
remainingMatchLength_=state.remainingMatchLength;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the string so far matches, whether it has a value,
|
||||
* and whether another input char16_t can continue a matching string.
|
||||
* @return The match/value Result.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UStringTrieResult current() const;
|
||||
|
||||
/**
|
||||
* Traverses the trie from the initial state for this input char16_t.
|
||||
* Equivalent to reset().next(uchar).
|
||||
* @param uchar Input char value. Values below 0 and above 0xffff will never match.
|
||||
* @return The match/value Result.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
inline UStringTrieResult first(int32_t uchar) {
|
||||
remainingMatchLength_=-1;
|
||||
return nextImpl(uchars_, uchar);
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverses the trie from the initial state for the
|
||||
* one or two UTF-16 code units for this input code point.
|
||||
* Equivalent to reset().nextForCodePoint(cp).
|
||||
* @param cp A Unicode code point 0..0x10ffff.
|
||||
* @return The match/value Result.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UStringTrieResult firstForCodePoint(UChar32 cp);
|
||||
|
||||
/**
|
||||
* Traverses the trie from the current state for this input char16_t.
|
||||
* @param uchar Input char value. Values below 0 and above 0xffff will never match.
|
||||
* @return The match/value Result.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UStringTrieResult next(int32_t uchar);
|
||||
|
||||
/**
|
||||
* Traverses the trie from the current state for the
|
||||
* one or two UTF-16 code units for this input code point.
|
||||
* @param cp A Unicode code point 0..0x10ffff.
|
||||
* @return The match/value Result.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UStringTrieResult nextForCodePoint(UChar32 cp);
|
||||
|
||||
/**
|
||||
* Traverses the trie from the current state for this string.
|
||||
* Equivalent to
|
||||
* \code
|
||||
* Result result=current();
|
||||
* for(each c in s)
|
||||
* if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH;
|
||||
* result=next(c);
|
||||
* return result;
|
||||
* \endcode
|
||||
* @param s A string. Can be nullptr if length is 0.
|
||||
* @param length The length of the string. Can be -1 if NUL-terminated.
|
||||
* @return The match/value Result.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UStringTrieResult next(ConstChar16Ptr s, int32_t length);
|
||||
|
||||
/**
|
||||
* Returns a matching string's value if called immediately after
|
||||
* current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE.
|
||||
* getValue() can be called multiple times.
|
||||
*
|
||||
* Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE!
|
||||
* @return The value for the string so far.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
inline int32_t getValue() const {
|
||||
const char16_t *pos=pos_;
|
||||
int32_t leadUnit=*pos++;
|
||||
// U_ASSERT(leadUnit>=kMinValueLead);
|
||||
return leadUnit&kValueIsFinal ?
|
||||
readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether all strings reachable from the current state
|
||||
* map to the same value.
|
||||
* @param uniqueValue Receives the unique value, if this function returns true.
|
||||
* (output-only)
|
||||
* @return true if all strings reachable from the current state
|
||||
* map to the same value.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
inline UBool hasUniqueValue(int32_t &uniqueValue) const {
|
||||
const char16_t *pos=pos_;
|
||||
// Skip the rest of a pending linear-match node.
|
||||
return pos!=nullptr && findUniqueValue(pos+remainingMatchLength_+1, false, uniqueValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds each char16_t which continues the string from the current state.
|
||||
* That is, each char16_t c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now.
|
||||
* @param out Each next char16_t is appended to this object.
|
||||
* @return the number of char16_ts which continue the string from here
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
int32_t getNextUChars(Appendable &out) const;
|
||||
|
||||
/**
|
||||
* Iterator for all of the (string, value) pairs in a UCharsTrie.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
class U_COMMON_API Iterator : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Iterates from the root of a char16_t-serialized UCharsTrie.
|
||||
* @param trieUChars The trie char16_ts.
|
||||
* @param maxStringLength If 0, the iterator returns full strings.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Iterates from the current state of the specified UCharsTrie.
|
||||
* @param trie The trie whose state will be copied for iteration.
|
||||
* @param maxStringLength If 0, the iterator returns full strings.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
~Iterator();
|
||||
|
||||
/**
|
||||
* Resets this iterator to its initial state.
|
||||
* @return *this
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
Iterator &reset();
|
||||
|
||||
/**
|
||||
* @return true if there are more elements.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UBool hasNext() const;
|
||||
|
||||
/**
|
||||
* Finds the next (string, value) pair if there is one.
|
||||
*
|
||||
* If the string is truncated to the maximum length and does not
|
||||
* have a real value, then the value is set to -1.
|
||||
* In this case, this "not a real value" is indistinguishable from
|
||||
* a real value of -1.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return true if there is another element.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UBool next(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* @return The string for the last successful next().
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
const UnicodeString &getString() const { return str_; }
|
||||
/**
|
||||
* @return The value for the last successful next().
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
int32_t getValue() const { return value_; }
|
||||
|
||||
private:
|
||||
UBool truncateAndStop() {
|
||||
pos_=nullptr;
|
||||
value_=-1; // no real value for str
|
||||
return true;
|
||||
}
|
||||
|
||||
const char16_t *branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode);
|
||||
|
||||
const char16_t *uchars_;
|
||||
const char16_t *pos_;
|
||||
const char16_t *initialPos_;
|
||||
int32_t remainingMatchLength_;
|
||||
int32_t initialRemainingMatchLength_;
|
||||
UBool skipValue_; // Skip intermediate value which was already delivered.
|
||||
|
||||
UnicodeString str_;
|
||||
int32_t maxLength_;
|
||||
int32_t value_;
|
||||
|
||||
// The stack stores pairs of integers for backtracking to another
|
||||
// outbound edge of a branch node.
|
||||
// The first integer is an offset from uchars_.
|
||||
// The second integer has the str_.length() from before the node in bits 15..0,
|
||||
// and the remaining branch length in bits 31..16.
|
||||
// (We could store the remaining branch length minus 1 in bits 30..16 and not use the sign bit,
|
||||
// but the code looks more confusing that way.)
|
||||
UVector32 *stack_;
|
||||
};
|
||||
|
||||
private:
|
||||
friend class UCharsTrieBuilder;
|
||||
|
||||
/**
|
||||
* Constructs a UCharsTrie reader instance.
|
||||
* Unlike the public constructor which just aliases an array,
|
||||
* this constructor adopts the builder's array.
|
||||
* This constructor is only called by the builder.
|
||||
*/
|
||||
UCharsTrie(char16_t *adoptUChars, const char16_t *trieUChars)
|
||||
: ownedArray_(adoptUChars), uchars_(trieUChars),
|
||||
pos_(uchars_), remainingMatchLength_(-1) {}
|
||||
|
||||
// No assignment operator.
|
||||
UCharsTrie &operator=(const UCharsTrie &other) = delete;
|
||||
|
||||
inline void stop() {
|
||||
pos_=nullptr;
|
||||
}
|
||||
|
||||
// Reads a compact 32-bit integer.
|
||||
// pos is already after the leadUnit, and the lead unit has bit 15 reset.
|
||||
static inline int32_t readValue(const char16_t *pos, int32_t leadUnit) {
|
||||
int32_t value;
|
||||
if(leadUnit<kMinTwoUnitValueLead) {
|
||||
value=leadUnit;
|
||||
} else if(leadUnit<kThreeUnitValueLead) {
|
||||
value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
|
||||
} else {
|
||||
value=(pos[0]<<16)|pos[1];
|
||||
}
|
||||
return value;
|
||||
}
|
||||
static inline const char16_t *skipValue(const char16_t *pos, int32_t leadUnit) {
|
||||
if(leadUnit>=kMinTwoUnitValueLead) {
|
||||
if(leadUnit<kThreeUnitValueLead) {
|
||||
++pos;
|
||||
} else {
|
||||
pos+=2;
|
||||
}
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
static inline const char16_t *skipValue(const char16_t *pos) {
|
||||
int32_t leadUnit=*pos++;
|
||||
return skipValue(pos, leadUnit&0x7fff);
|
||||
}
|
||||
|
||||
static inline int32_t readNodeValue(const char16_t *pos, int32_t leadUnit) {
|
||||
// U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
|
||||
int32_t value;
|
||||
if(leadUnit<kMinTwoUnitNodeValueLead) {
|
||||
value=(leadUnit>>6)-1;
|
||||
} else if(leadUnit<kThreeUnitNodeValueLead) {
|
||||
value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
|
||||
} else {
|
||||
value=(pos[0]<<16)|pos[1];
|
||||
}
|
||||
return value;
|
||||
}
|
||||
static inline const char16_t *skipNodeValue(const char16_t *pos, int32_t leadUnit) {
|
||||
// U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
|
||||
if(leadUnit>=kMinTwoUnitNodeValueLead) {
|
||||
if(leadUnit<kThreeUnitNodeValueLead) {
|
||||
++pos;
|
||||
} else {
|
||||
pos+=2;
|
||||
}
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
static inline const char16_t *jumpByDelta(const char16_t *pos) {
|
||||
int32_t delta=*pos++;
|
||||
if(delta>=kMinTwoUnitDeltaLead) {
|
||||
if(delta==kThreeUnitDeltaLead) {
|
||||
delta=(pos[0]<<16)|pos[1];
|
||||
pos+=2;
|
||||
} else {
|
||||
delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
|
||||
}
|
||||
}
|
||||
return pos+delta;
|
||||
}
|
||||
|
||||
static const char16_t *skipDelta(const char16_t *pos) {
|
||||
int32_t delta=*pos++;
|
||||
if(delta>=kMinTwoUnitDeltaLead) {
|
||||
if(delta==kThreeUnitDeltaLead) {
|
||||
pos+=2;
|
||||
} else {
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
static inline UStringTrieResult valueResult(int32_t node) {
|
||||
return static_cast<UStringTrieResult>(USTRINGTRIE_INTERMEDIATE_VALUE - (node >> 15));
|
||||
}
|
||||
|
||||
// Handles a branch node for both next(uchar) and next(string).
|
||||
UStringTrieResult branchNext(const char16_t *pos, int32_t length, int32_t uchar);
|
||||
|
||||
// Requires remainingLength_<0.
|
||||
UStringTrieResult nextImpl(const char16_t *pos, int32_t uchar);
|
||||
|
||||
// Helper functions for hasUniqueValue().
|
||||
// Recursively finds a unique value (or whether there is not a unique one)
|
||||
// from a branch.
|
||||
static const char16_t *findUniqueValueFromBranch(const char16_t *pos, int32_t length,
|
||||
UBool haveUniqueValue, int32_t &uniqueValue);
|
||||
// Recursively finds a unique value (or whether there is not a unique one)
|
||||
// starting from a position on a node lead unit.
|
||||
static UBool findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue);
|
||||
|
||||
// Helper functions for getNextUChars().
|
||||
// getNextUChars() when pos is on a branch node.
|
||||
static void getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out);
|
||||
|
||||
// UCharsTrie data structure
|
||||
//
|
||||
// The trie consists of a series of char16_t-serialized nodes for incremental
|
||||
// Unicode string/char16_t sequence matching. (char16_t=16-bit unsigned integer)
|
||||
// The root node is at the beginning of the trie data.
|
||||
//
|
||||
// Types of nodes are distinguished by their node lead unit ranges.
|
||||
// After each node, except a final-value node, another node follows to
|
||||
// encode match values or continue matching further units.
|
||||
//
|
||||
// Node types:
|
||||
// - Final-value node: Stores a 32-bit integer in a compact, variable-length format.
|
||||
// The value is for the string/char16_t sequence so far.
|
||||
// - Match node, optionally with an intermediate value in a different compact format.
|
||||
// The value, if present, is for the string/char16_t sequence so far.
|
||||
//
|
||||
// Aside from the value, which uses the node lead unit's high bits:
|
||||
//
|
||||
// - Linear-match node: Matches a number of units.
|
||||
// - Branch node: Branches to other nodes according to the current input unit.
|
||||
// The node unit is the length of the branch (number of units to select from)
|
||||
// minus 1. It is followed by a sub-node:
|
||||
// - If the length is at most kMaxBranchLinearSubNodeLength, then
|
||||
// there are length-1 (key, value) pairs and then one more comparison unit.
|
||||
// If one of the key units matches, then the value is either a final value for
|
||||
// the string so far, or a "jump" delta to the next node.
|
||||
// If the last unit matches, then matching continues with the next node.
|
||||
// (Values have the same encoding as final-value nodes.)
|
||||
// - If the length is greater than kMaxBranchLinearSubNodeLength, then
|
||||
// there is one unit and one "jump" delta.
|
||||
// If the input unit is less than the sub-node unit, then "jump" by delta to
|
||||
// the next sub-node which will have a length of length/2.
|
||||
// (The delta has its own compact encoding.)
|
||||
// Otherwise, skip the "jump" delta to the next sub-node
|
||||
// which will have a length of length-length/2.
|
||||
|
||||
// Match-node lead unit values, after masking off intermediate-value bits:
|
||||
|
||||
// 0000..002f: Branch node. If node!=0 then the length is node+1, otherwise
|
||||
// the length is one more than the next unit.
|
||||
|
||||
// For a branch sub-node with at most this many entries, we drop down
|
||||
// to a linear search.
|
||||
static const int32_t kMaxBranchLinearSubNodeLength=5;
|
||||
|
||||
// 0030..003f: Linear-match node, match 1..16 units and continue reading the next node.
|
||||
static const int32_t kMinLinearMatch=0x30;
|
||||
static const int32_t kMaxLinearMatchLength=0x10;
|
||||
|
||||
// Match-node lead unit bits 14..6 for the optional intermediate value.
|
||||
// If these bits are 0, then there is no intermediate value.
|
||||
// Otherwise, see the *NodeValue* constants below.
|
||||
static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x0040
|
||||
static const int32_t kNodeTypeMask=kMinValueLead-1; // 0x003f
|
||||
|
||||
// A final-value node has bit 15 set.
|
||||
static const int32_t kValueIsFinal=0x8000;
|
||||
|
||||
// Compact value: After testing and masking off bit 15, use the following thresholds.
|
||||
static const int32_t kMaxOneUnitValue=0x3fff;
|
||||
|
||||
static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1; // 0x4000
|
||||
static const int32_t kThreeUnitValueLead=0x7fff;
|
||||
|
||||
static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1; // 0x3ffeffff
|
||||
|
||||
// Compact intermediate-value integer, lead unit shared with a branch or linear-match node.
|
||||
static const int32_t kMaxOneUnitNodeValue=0xff;
|
||||
static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6); // 0x4040
|
||||
static const int32_t kThreeUnitNodeValueLead=0x7fc0;
|
||||
|
||||
static const int32_t kMaxTwoUnitNodeValue=
|
||||
((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1; // 0xfdffff
|
||||
|
||||
// Compact delta integers.
|
||||
static const int32_t kMaxOneUnitDelta=0xfbff;
|
||||
static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1; // 0xfc00
|
||||
static const int32_t kThreeUnitDeltaLead=0xffff;
|
||||
|
||||
static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff
|
||||
|
||||
// For getState64():
|
||||
// The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2
|
||||
// so we need at least 5 bits for that.
|
||||
// We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength.
|
||||
static constexpr int32_t kState64RemainingShift = 59;
|
||||
static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1;
|
||||
|
||||
char16_t *ownedArray_;
|
||||
|
||||
// Fixed value referencing the UCharsTrie words.
|
||||
const char16_t *uchars_;
|
||||
|
||||
// Iterator variables.
|
||||
|
||||
// Pointer to next trie unit to read. nullptr if no more matches.
|
||||
const char16_t *pos_;
|
||||
// Remaining length of a linear-match node, minus 1. Negative if not in such a node.
|
||||
int32_t remainingMatchLength_;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __UCHARSTRIE_H__
|
||||
193
thirdparty/icu4c/common/unicode/ucharstriebuilder.h
vendored
Normal file
193
thirdparty/icu4c/common/unicode/ucharstriebuilder.h
vendored
Normal file
@@ -0,0 +1,193 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: ucharstriebuilder.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010nov14
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UCHARSTRIEBUILDER_H__
|
||||
#define __UCHARSTRIEBUILDER_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/stringtriebuilder.h"
|
||||
#include "unicode/ucharstrie.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Builder for icu::UCharsTrie
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UCharsTrieElement;
|
||||
|
||||
/**
|
||||
* Builder class for UCharsTrie.
|
||||
*
|
||||
* This class is not intended for public subclassing.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
class U_COMMON_API UCharsTrieBuilder : public StringTrieBuilder {
|
||||
public:
|
||||
/**
|
||||
* Constructs an empty builder.
|
||||
* @param errorCode Standard ICU error code.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UCharsTrieBuilder(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
virtual ~UCharsTrieBuilder();
|
||||
|
||||
/**
|
||||
* Adds a (string, value) pair.
|
||||
* The string must be unique.
|
||||
* The string contents will be copied; the builder does not keep
|
||||
* a reference to the input UnicodeString or its buffer.
|
||||
* @param s The input string.
|
||||
* @param value The value associated with this string.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return *this
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UCharsTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Builds a UCharsTrie for the add()ed data.
|
||||
* Once built, no further data can be add()ed until clear() is called.
|
||||
*
|
||||
* A UCharsTrie cannot be empty. At least one (string, value) pair
|
||||
* must have been add()ed.
|
||||
*
|
||||
* This method passes ownership of the builder's internal result array to the new trie object.
|
||||
* Another call to any build() variant will re-serialize the trie.
|
||||
* After clear() has been called, a new array will be used as well.
|
||||
* @param buildOption Build option, see UStringTrieBuildOption.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return A new UCharsTrie for the add()ed data.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UCharsTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Builds a UCharsTrie for the add()ed data and char16_t-serializes it.
|
||||
* Once built, no further data can be add()ed until clear() is called.
|
||||
*
|
||||
* A UCharsTrie cannot be empty. At least one (string, value) pair
|
||||
* must have been add()ed.
|
||||
*
|
||||
* Multiple calls to buildUnicodeString() set the UnicodeStrings to the
|
||||
* builder's same char16_t array, without rebuilding.
|
||||
* If buildUnicodeString() is called after build(), the trie will be
|
||||
* re-serialized into a new array (because build() passes on ownership).
|
||||
* If build() is called after buildUnicodeString(), the trie object returned
|
||||
* by build() will become the owner of the underlying data for the
|
||||
* previously returned UnicodeString.
|
||||
* After clear() has been called, a new array will be used as well.
|
||||
* @param buildOption Build option, see UStringTrieBuildOption.
|
||||
* @param result A UnicodeString which will be set to the char16_t-serialized
|
||||
* UCharsTrie for the add()ed data.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return result
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UnicodeString &buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Removes all (string, value) pairs.
|
||||
* New data can then be add()ed and a new trie can be built.
|
||||
* @return *this
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
UCharsTrieBuilder &clear() {
|
||||
strings.remove();
|
||||
elementsLength=0;
|
||||
ucharsLength=0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
UCharsTrieBuilder(const UCharsTrieBuilder &other) = delete; // no copy constructor
|
||||
UCharsTrieBuilder &operator=(const UCharsTrieBuilder &other) = delete; // no assignment operator
|
||||
|
||||
void buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
|
||||
|
||||
virtual int32_t getElementStringLength(int32_t i) const override;
|
||||
virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const override;
|
||||
virtual int32_t getElementValue(int32_t i) const override;
|
||||
|
||||
virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const override;
|
||||
|
||||
virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const override;
|
||||
virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const override;
|
||||
virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const override;
|
||||
|
||||
virtual UBool matchNodesCanHaveValues() const override { return true; }
|
||||
|
||||
virtual int32_t getMaxBranchLinearSubNodeLength() const override { return UCharsTrie::kMaxBranchLinearSubNodeLength; }
|
||||
virtual int32_t getMinLinearMatch() const override { return UCharsTrie::kMinLinearMatch; }
|
||||
virtual int32_t getMaxLinearMatchLength() const override { return UCharsTrie::kMaxLinearMatchLength; }
|
||||
|
||||
class UCTLinearMatchNode : public LinearMatchNode {
|
||||
public:
|
||||
UCTLinearMatchNode(const char16_t *units, int32_t len, Node *nextNode);
|
||||
virtual bool operator==(const Node &other) const override;
|
||||
virtual void write(StringTrieBuilder &builder) override;
|
||||
private:
|
||||
const char16_t *s;
|
||||
};
|
||||
|
||||
virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
|
||||
Node *nextNode) const override;
|
||||
|
||||
UBool ensureCapacity(int32_t length);
|
||||
virtual int32_t write(int32_t unit) override;
|
||||
int32_t write(const char16_t *s, int32_t length);
|
||||
virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) override;
|
||||
virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) override;
|
||||
virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) override;
|
||||
virtual int32_t writeDeltaTo(int32_t jumpTarget) override;
|
||||
|
||||
UnicodeString strings;
|
||||
UCharsTrieElement *elements;
|
||||
int32_t elementsCapacity;
|
||||
int32_t elementsLength;
|
||||
|
||||
// char16_t serialization of the trie.
|
||||
// Grows from the back: ucharsLength measures from the end of the buffer!
|
||||
char16_t *uchars;
|
||||
int32_t ucharsCapacity;
|
||||
int32_t ucharsLength;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __UCHARSTRIEBUILDER_H__
|
||||
393
thirdparty/icu4c/common/unicode/uchriter.h
vendored
Normal file
393
thirdparty/icu4c/common/unicode/uchriter.h
vendored
Normal file
@@ -0,0 +1,393 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1998-2005, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef UCHRITER_H
|
||||
#define UCHRITER_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/chariter.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: char16_t Character Iterator
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* A concrete subclass of CharacterIterator that iterates over the
|
||||
* characters (code units or code points) in a char16_t array.
|
||||
* It's possible not only to create an
|
||||
* iterator that iterates over an entire char16_t array, but also to
|
||||
* create one that iterates over only a subrange of a char16_t array
|
||||
* (iterators over different subranges of the same char16_t array don't
|
||||
* compare equal).
|
||||
* @see CharacterIterator
|
||||
* @see ForwardCharacterIterator
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API UCharCharacterIterator : public CharacterIterator {
|
||||
public:
|
||||
/**
|
||||
* Create an iterator over the char16_t array referred to by "textPtr".
|
||||
* The iteration range is 0 to <code>length-1</code>.
|
||||
* text is only aliased, not adopted (the
|
||||
* destructor will not delete it).
|
||||
* @param textPtr The char16_t array to be iterated over
|
||||
* @param length The length of the char16_t array
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length);
|
||||
|
||||
/**
|
||||
* Create an iterator over the char16_t array referred to by "textPtr".
|
||||
* The iteration range is 0 to <code>length-1</code>.
|
||||
* text is only aliased, not adopted (the
|
||||
* destructor will not delete it).
|
||||
* The starting
|
||||
* position is specified by "position". If "position" is outside the valid
|
||||
* iteration range, the behavior of this object is undefined.
|
||||
* @param textPtr The char16_t array to be iterated over
|
||||
* @param length The length of the char16_t array
|
||||
* @param position The starting position of the iteration
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length,
|
||||
int32_t position);
|
||||
|
||||
/**
|
||||
* Create an iterator over the char16_t array referred to by "textPtr".
|
||||
* The iteration range is 0 to <code>end-1</code>.
|
||||
* text is only aliased, not adopted (the
|
||||
* destructor will not delete it).
|
||||
* The starting
|
||||
* position is specified by "position". If begin and end do not
|
||||
* form a valid iteration range or "position" is outside the valid
|
||||
* iteration range, the behavior of this object is undefined.
|
||||
* @param textPtr The char16_t array to be iterated over
|
||||
* @param length The length of the char16_t array
|
||||
* @param textBegin The begin position of the iteration range
|
||||
* @param textEnd The end position of the iteration range
|
||||
* @param position The starting position of the iteration
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length,
|
||||
int32_t textBegin,
|
||||
int32_t textEnd,
|
||||
int32_t position);
|
||||
|
||||
/**
|
||||
* Copy constructor. The new iterator iterates over the same range
|
||||
* of the same string as "that", and its initial position is the
|
||||
* same as "that"'s current position.
|
||||
* @param that The UCharCharacterIterator to be copied
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UCharCharacterIterator(const UCharCharacterIterator& that);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~UCharCharacterIterator();
|
||||
|
||||
/**
|
||||
* Assignment operator. *this is altered to iterate over the sane
|
||||
* range of the same string as "that", and refers to the same
|
||||
* character within that string as "that" does.
|
||||
* @param that The object to be copied
|
||||
* @return the newly created object
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UCharCharacterIterator&
|
||||
operator=(const UCharCharacterIterator& that);
|
||||
|
||||
/**
|
||||
* Returns true if the iterators iterate over the same range of the
|
||||
* same string and are pointing at the same character.
|
||||
* @param that The ForwardCharacterIterator used to be compared for equality
|
||||
* @return true if the iterators iterate over the same range of the
|
||||
* same string and are pointing at the same character.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual bool operator==(const ForwardCharacterIterator& that) const override;
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator.
|
||||
* @return the hash code.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t hashCode() const override;
|
||||
|
||||
/**
|
||||
* Returns a new UCharCharacterIterator referring to the same
|
||||
* character in the same range of the same string as this one. The
|
||||
* caller must delete the new iterator.
|
||||
* @return the CharacterIterator newly created
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UCharCharacterIterator* clone() const override;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code unit in its
|
||||
* iteration range, and returns that code unit.
|
||||
* This can be used to begin an iteration with next().
|
||||
* @return the first code unit in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t first() override;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code unit in its
|
||||
* iteration range, returns that code unit, and moves the position
|
||||
* to the second code unit. This is an alternative to setToStart()
|
||||
* for forward iteration with nextPostInc().
|
||||
* @return the first code unit in its iteration range
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t firstPostInc() override;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code point in its
|
||||
* iteration range, and returns that code unit,
|
||||
* This can be used to begin an iteration with next32().
|
||||
* Note that an iteration with next32PostInc(), beginning with,
|
||||
* e.g., setToStart() or firstPostInc(), is more efficient.
|
||||
* @return the first code point in its iteration range
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 first32() override;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code point in its
|
||||
* iteration range, returns that code point, and moves the position
|
||||
* to the second code point. This is an alternative to setToStart()
|
||||
* for forward iteration with next32PostInc().
|
||||
* @return the first code point in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 first32PostInc() override;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last code unit in its
|
||||
* iteration range, and returns that code unit.
|
||||
* This can be used to begin an iteration with previous().
|
||||
* @return the last code unit in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t last() override;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last code point in its
|
||||
* iteration range, and returns that code unit.
|
||||
* This can be used to begin an iteration with previous32().
|
||||
* @return the last code point in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 last32() override;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th code unit
|
||||
* in the text-storage object the iterator refers to, and
|
||||
* returns that code unit.
|
||||
* @param position the position within the text-storage object
|
||||
* @return the code unit
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t setIndex(int32_t position) override;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the beginning of the code point
|
||||
* that contains the "position"-th code unit
|
||||
* in the text-storage object the iterator refers to, and
|
||||
* returns that code point.
|
||||
* The current position is adjusted to the beginning of the code point
|
||||
* (its first code unit).
|
||||
* @param position the position within the text-storage object
|
||||
* @return the code unit
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 setIndex32(int32_t position) override;
|
||||
|
||||
/**
|
||||
* Returns the code unit the iterator currently refers to.
|
||||
* @return the code unit the iterator currently refers to.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t current() const override;
|
||||
|
||||
/**
|
||||
* Returns the code point the iterator currently refers to.
|
||||
* @return the code point the iterator currently refers to.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 current32() const override;
|
||||
|
||||
/**
|
||||
* Advances to the next code unit in the iteration range (toward
|
||||
* endIndex()), and returns that code unit. If there are no more
|
||||
* code units to return, returns DONE.
|
||||
* @return the next code unit in the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t next() override;
|
||||
|
||||
/**
|
||||
* Gets the current code unit for returning and advances to the next code unit
|
||||
* in the iteration range
|
||||
* (toward endIndex()). If there are
|
||||
* no more code units to return, returns DONE.
|
||||
* @return the current code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t nextPostInc() override;
|
||||
|
||||
/**
|
||||
* Advances to the next code point in the iteration range (toward
|
||||
* endIndex()), and returns that code point. If there are no more
|
||||
* code points to return, returns DONE.
|
||||
* Note that iteration with "pre-increment" semantics is less
|
||||
* efficient than iteration with "post-increment" semantics
|
||||
* that is provided by next32PostInc().
|
||||
* @return the next code point in the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 next32() override;
|
||||
|
||||
/**
|
||||
* Gets the current code point for returning and advances to the next code point
|
||||
* in the iteration range
|
||||
* (toward endIndex()). If there are
|
||||
* no more code points to return, returns DONE.
|
||||
* @return the current point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 next32PostInc() override;
|
||||
|
||||
/**
|
||||
* Returns false if there are no more code units or code points
|
||||
* at or after the current position in the iteration range.
|
||||
* This is used with nextPostInc() or next32PostInc() in forward
|
||||
* iteration.
|
||||
* @return false if there are no more code units or code points
|
||||
* at or after the current position in the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool hasNext() override;
|
||||
|
||||
/**
|
||||
* Advances to the previous code unit in the iteration range (toward
|
||||
* startIndex()), and returns that code unit. If there are no more
|
||||
* code units to return, returns DONE.
|
||||
* @return the previous code unit in the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t previous() override;
|
||||
|
||||
/**
|
||||
* Advances to the previous code point in the iteration range (toward
|
||||
* startIndex()), and returns that code point. If there are no more
|
||||
* code points to return, returns DONE.
|
||||
* @return the previous code point in the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 previous32() override;
|
||||
|
||||
/**
|
||||
* Returns false if there are no more code units or code points
|
||||
* before the current position in the iteration range.
|
||||
* This is used with previous() or previous32() in backward
|
||||
* iteration.
|
||||
* @return false if there are no more code units or code points
|
||||
* before the current position in the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool hasPrevious() override;
|
||||
|
||||
/**
|
||||
* Moves the current position relative to the start or end of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code units forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
* @param delta the position relative to origin. A positive delta means forward;
|
||||
* a negative delta means backward.
|
||||
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
|
||||
* @return the new position
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t move(int32_t delta, EOrigin origin) override;
|
||||
|
||||
/**
|
||||
* Moves the current position relative to the start or end of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code points forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
* @param delta the position relative to origin. A positive delta means forward;
|
||||
* a negative delta means backward.
|
||||
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
|
||||
* @return the new position
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef move32
|
||||
// One of the system headers right now is sometimes defining a conflicting macro we don't use
|
||||
#undef move32
|
||||
#endif
|
||||
virtual int32_t move32(int32_t delta, EOrigin origin) override;
|
||||
|
||||
/**
|
||||
* Sets the iterator to iterate over a new range of text
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
void setText(ConstChar16Ptr newText, int32_t newTextLength);
|
||||
|
||||
/**
|
||||
* Copies the char16_t array under iteration into the UnicodeString
|
||||
* referred to by "result". Even if this iterator iterates across
|
||||
* only a part of this string, the whole string is copied.
|
||||
* @param result Receives a copy of the text under iteration.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void getText(UnicodeString& result) override;
|
||||
|
||||
/**
|
||||
* Return a class ID for this class (not really public)
|
||||
* @return a class ID for this class
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
/**
|
||||
* Return a class ID for this object (not really public)
|
||||
* @return a class ID for this object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const override;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Protected constructor
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UCharCharacterIterator();
|
||||
/**
|
||||
* Protected member text
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
const char16_t* text;
|
||||
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
262
thirdparty/icu4c/common/unicode/uclean.h
vendored
Normal file
262
thirdparty/icu4c/common/unicode/uclean.h
vendored
Normal file
@@ -0,0 +1,262 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 2001-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
******************************************************************************
|
||||
* file name: uclean.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2001July05
|
||||
* created by: George Rhoten
|
||||
*/
|
||||
|
||||
#ifndef __UCLEAN_H__
|
||||
#define __UCLEAN_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Initialize and clean up ICU
|
||||
*/
|
||||
|
||||
/**
|
||||
* Initialize ICU.
|
||||
*
|
||||
* Use of this function is optional. It is OK to simply use ICU
|
||||
* services and functions without first having initialized
|
||||
* ICU by calling u_init().
|
||||
*
|
||||
* u_init() will attempt to load some part of ICU's data, and is
|
||||
* useful as a test for configuration or installation problems that
|
||||
* leave the ICU data inaccessible. A successful invocation of u_init()
|
||||
* does not, however, guarantee that all ICU data is accessible.
|
||||
*
|
||||
* Multiple calls to u_init() cause no harm, aside from the small amount
|
||||
* of time required.
|
||||
*
|
||||
* In old versions of ICU, u_init() was required in multi-threaded applications
|
||||
* to ensure the thread safety of ICU. u_init() is no longer needed for this purpose.
|
||||
*
|
||||
* @param status An ICU UErrorCode parameter. It must not be <code>NULL</code>.
|
||||
* An Error will be returned if some required part of ICU data can not
|
||||
* be loaded or initialized.
|
||||
* The function returns immediately if the input error code indicates a
|
||||
* failure, as usual.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_init(UErrorCode *status);
|
||||
|
||||
#ifndef U_HIDE_SYSTEM_API
|
||||
/**
|
||||
* Clean up the system resources, such as allocated memory or open files,
|
||||
* used in all ICU libraries. This will free/delete all memory owned by the
|
||||
* ICU libraries, and return them to their original load state. All open ICU
|
||||
* items (collators, resource bundles, converters, etc.) must be closed before
|
||||
* calling this function, otherwise ICU may not free its allocated memory
|
||||
* (e.g. close your converters and resource bundles before calling this
|
||||
* function). Generally, this function should be called once just before
|
||||
* an application exits. For applications that dynamically load and unload
|
||||
* the ICU libraries (relatively uncommon), u_cleanup() should be called
|
||||
* just before the library unload.
|
||||
* <p>
|
||||
* u_cleanup() also clears any ICU heap functions, mutex functions or
|
||||
* trace functions that may have been set for the process.
|
||||
* This has the effect of restoring ICU to its initial condition, before
|
||||
* any of these override functions were installed. Refer to
|
||||
* u_setMemoryFunctions(), u_setMutexFunctions and
|
||||
* utrace_setFunctions(). If ICU is to be reinitialized after
|
||||
* calling u_cleanup(), these runtime override functions will need to
|
||||
* be set up again if they are still required.
|
||||
* <p>
|
||||
* u_cleanup() is not thread safe. All other threads should stop using ICU
|
||||
* before calling this function.
|
||||
* <p>
|
||||
* Any open ICU items will be left in an undefined state by u_cleanup(),
|
||||
* and any subsequent attempt to use such an item will give unpredictable
|
||||
* results.
|
||||
* <p>
|
||||
* After calling u_cleanup(), an application may continue to use ICU by
|
||||
* calling u_init(). An application must invoke u_init() first from one single
|
||||
* thread before allowing other threads call u_init(). All threads existing
|
||||
* at the time of the first thread's call to u_init() must also call
|
||||
* u_init() themselves before continuing with other ICU operations.
|
||||
* <p>
|
||||
* The use of u_cleanup() just before an application terminates is optional,
|
||||
* but it should be called only once for performance reasons. The primary
|
||||
* benefit is to eliminate reports of memory or resource leaks originating
|
||||
* in ICU code from the results generated by heap analysis tools.
|
||||
* <p>
|
||||
* <strong>Use this function with great care!</strong>
|
||||
* </p>
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
* @system
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_cleanup(void);
|
||||
|
||||
U_CDECL_BEGIN
|
||||
/**
|
||||
* Pointer type for a user supplied memory allocation function.
|
||||
* @param context user supplied value, obtained from u_setMemoryFunctions().
|
||||
* @param size The number of bytes to be allocated
|
||||
* @return Pointer to the newly allocated memory, or NULL if the allocation failed.
|
||||
* @stable ICU 2.8
|
||||
* @system
|
||||
*/
|
||||
typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size);
|
||||
/**
|
||||
* Pointer type for a user supplied memory re-allocation function.
|
||||
* @param context user supplied value, obtained from u_setMemoryFunctions().
|
||||
* @param mem Pointer to the memory block to be resized.
|
||||
* @param size The new size for the block.
|
||||
* @return Pointer to the newly allocated memory, or NULL if the allocation failed.
|
||||
* @stable ICU 2.8
|
||||
* @system
|
||||
*/
|
||||
typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t size);
|
||||
/**
|
||||
* Pointer type for a user supplied memory free function. Behavior should be
|
||||
* similar the standard C library free().
|
||||
* @param context user supplied value, obtained from u_setMemoryFunctions().
|
||||
* @param mem Pointer to the memory block to be freed.
|
||||
* @return Pointer to the resized memory block, or NULL if the resizing failed.
|
||||
* @stable ICU 2.8
|
||||
* @system
|
||||
*/
|
||||
typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem);
|
||||
|
||||
/**
|
||||
* Set the functions that ICU will use for memory allocation.
|
||||
* Use of this function is optional; by default (without this function), ICU will
|
||||
* use the standard C library malloc() and free() functions.
|
||||
* This function can only be used when ICU is in an initial, unused state, before
|
||||
* u_init() has been called.
|
||||
* @param context This pointer value will be saved, and then (later) passed as
|
||||
* a parameter to the memory functions each time they
|
||||
* are called.
|
||||
* @param a Pointer to a user-supplied malloc function.
|
||||
* @param r Pointer to a user-supplied realloc function.
|
||||
* @param f Pointer to a user-supplied free function.
|
||||
* @param status Receives error values.
|
||||
* @stable ICU 2.8
|
||||
* @system
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV_FPTR a, UMemReallocFn * U_CALLCONV_FPTR r, UMemFreeFn * U_CALLCONV_FPTR f,
|
||||
UErrorCode *status);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/*********************************************************************************
|
||||
*
|
||||
* Deprecated Functions
|
||||
*
|
||||
* The following functions for user supplied mutexes are no longer supported.
|
||||
* Any attempt to use them will return a U_UNSUPPORTED_ERROR.
|
||||
*
|
||||
**********************************************************************************/
|
||||
|
||||
/**
|
||||
* An opaque pointer type that represents an ICU mutex.
|
||||
* For user-implemented mutexes, the value will typically point to a
|
||||
* struct or object that implements the mutex.
|
||||
* @deprecated ICU 52. This type is no longer supported.
|
||||
* @system
|
||||
*/
|
||||
typedef void *UMTX;
|
||||
|
||||
U_CDECL_BEGIN
|
||||
/**
|
||||
* Function Pointer type for a user supplied mutex initialization function.
|
||||
* The user-supplied function will be called by ICU whenever ICU needs to create a
|
||||
* new mutex. The function implementation should create a mutex, and store a pointer
|
||||
* to something that uniquely identifies the mutex into the UMTX that is supplied
|
||||
* as a parameter.
|
||||
* @param context user supplied value, obtained from u_setMutexFunctions().
|
||||
* @param mutex Receives a pointer that identifies the new mutex.
|
||||
* The mutex init function must set the UMTX to a non-null value.
|
||||
* Subsequent calls by ICU to lock, unlock, or destroy a mutex will
|
||||
* identify the mutex by the UMTX value.
|
||||
* @param status Error status. Report errors back to ICU by setting this variable
|
||||
* with an error code.
|
||||
* @deprecated ICU 52. This function is no longer supported.
|
||||
* @system
|
||||
*/
|
||||
typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCode* status);
|
||||
|
||||
|
||||
/**
|
||||
* Function Pointer type for a user supplied mutex functions.
|
||||
* One of the user-supplied functions with this signature will be called by ICU
|
||||
* whenever ICU needs to lock, unlock, or destroy a mutex.
|
||||
* @param context user supplied value, obtained from u_setMutexFunctions().
|
||||
* @param mutex specify the mutex on which to operate.
|
||||
* @deprecated ICU 52. This function is no longer supported.
|
||||
* @system
|
||||
*/
|
||||
typedef void U_CALLCONV UMtxFn (const void *context, UMTX *mutex);
|
||||
U_CDECL_END
|
||||
|
||||
/**
|
||||
* Set the functions that ICU will use for mutex operations
|
||||
* Use of this function is optional; by default (without this function), ICU will
|
||||
* directly access system functions for mutex operations
|
||||
* This function can only be used when ICU is in an initial, unused state, before
|
||||
* u_init() has been called.
|
||||
* @param context This pointer value will be saved, and then (later) passed as
|
||||
* a parameter to the user-supplied mutex functions each time they
|
||||
* are called.
|
||||
* @param init Pointer to a mutex initialization function. Must be non-null.
|
||||
* @param destroy Pointer to the mutex destroy function. Must be non-null.
|
||||
* @param lock pointer to the mutex lock function. Must be non-null.
|
||||
* @param unlock Pointer to the mutex unlock function. Must be non-null.
|
||||
* @param status Receives error values.
|
||||
* @deprecated ICU 52. This function is no longer supported.
|
||||
* @system
|
||||
*/
|
||||
U_DEPRECATED void U_EXPORT2
|
||||
u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtxFn *lock, UMtxFn *unlock,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
/**
|
||||
* Pointer type for a user supplied atomic increment or decrement function.
|
||||
* @param context user supplied value, obtained from u_setAtomicIncDecFunctions().
|
||||
* @param p Pointer to a 32 bit int to be incremented or decremented
|
||||
* @return The value of the variable after the inc or dec operation.
|
||||
* @deprecated ICU 52. This function is no longer supported.
|
||||
* @system
|
||||
*/
|
||||
typedef int32_t U_CALLCONV UMtxAtomicFn(const void *context, int32_t *p);
|
||||
|
||||
/**
|
||||
* Set the functions that ICU will use for atomic increment and decrement of int32_t values.
|
||||
* Use of this function is optional; by default (without this function), ICU will
|
||||
* use its own internal implementation of atomic increment/decrement.
|
||||
* This function can only be used when ICU is in an initial, unused state, before
|
||||
* u_init() has been called.
|
||||
* @param context This pointer value will be saved, and then (later) passed as
|
||||
* a parameter to the increment and decrement functions each time they
|
||||
* are called. This function can only be called
|
||||
* @param inc Pointer to a function to do an atomic increment operation. Must be non-null.
|
||||
* @param dec Pointer to a function to do an atomic decrement operation. Must be non-null.
|
||||
* @param status Receives error values.
|
||||
* @deprecated ICU 52. This function is no longer supported.
|
||||
* @system
|
||||
*/
|
||||
U_DEPRECATED void U_EXPORT2
|
||||
u_setAtomicIncDecFunctions(const void *context, UMtxAtomicFn *inc, UMtxAtomicFn *dec,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
#endif /* U_HIDE_SYSTEM_API */
|
||||
|
||||
#endif
|
||||
2053
thirdparty/icu4c/common/unicode/ucnv.h
vendored
Normal file
2053
thirdparty/icu4c/common/unicode/ucnv.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
164
thirdparty/icu4c/common/unicode/ucnv_cb.h
vendored
Normal file
164
thirdparty/icu4c/common/unicode/ucnv_cb.h
vendored
Normal file
@@ -0,0 +1,164 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2000-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* ucnv_cb.h:
|
||||
* External APIs for the ICU's codeset conversion library
|
||||
* Helena Shih
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: UConverter functions to aid the writers of callbacks
|
||||
*
|
||||
* <h2> Callback API for UConverter </h2>
|
||||
*
|
||||
* These functions are provided here for the convenience of the callback
|
||||
* writer. If you are just looking for callback functions to use, please
|
||||
* see ucnv_err.h. DO NOT call these functions directly when you are
|
||||
* working with converters, unless your code has been called as a callback
|
||||
* via ucnv_setFromUCallback or ucnv_setToUCallback !!
|
||||
*
|
||||
* A note about error codes and overflow. Unlike other ICU functions,
|
||||
* these functions do not expect the error status to be U_ZERO_ERROR.
|
||||
* Callbacks must be much more careful about their error codes.
|
||||
* The error codes used here are in/out parameters, which should be passed
|
||||
* back in the callback's error parameter.
|
||||
*
|
||||
* For example, if you call ucnv_cbfromUWriteBytes to write data out
|
||||
* to the output codepage, it may return U_BUFFER_OVERFLOW_ERROR if
|
||||
* the data did not fit in the target. But this isn't a failing error,
|
||||
* in fact, ucnv_cbfromUWriteBytes may be called AGAIN with the error
|
||||
* status still U_BUFFER_OVERFLOW_ERROR to attempt to write further bytes,
|
||||
* which will also go into the internal overflow buffers.
|
||||
*
|
||||
* Concerning offsets, the 'offset' parameters here are relative to the start
|
||||
* of SOURCE. For example, Suppose the string "ABCD" was being converted
|
||||
* from Unicode into a codepage which doesn't have a mapping for 'B'.
|
||||
* 'A' will be written out correctly, but
|
||||
* The FromU Callback will be called on an unassigned character for 'B'.
|
||||
* At this point, this is the state of the world:
|
||||
* Target: A [..] [points after A]
|
||||
* Source: A B [C] D [points to C - B has been consumed]
|
||||
* 0 1 2 3
|
||||
* codePoint = "B" [the unassigned codepoint]
|
||||
*
|
||||
* Now, suppose a callback wants to write the substitution character '?' to
|
||||
* the target. It calls ucnv_cbFromUWriteBytes() to write the ?.
|
||||
* It should pass ZERO as the offset, because the offset as far as the
|
||||
* callback is concerned is relative to the SOURCE pointer [which points
|
||||
* before 'C'.] If the callback goes into the args and consumes 'C' also,
|
||||
* it would call FromUWriteBytes with an offset of 1 (and advance the source
|
||||
* pointer).
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef UCNV_CB_H
|
||||
#define UCNV_CB_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
|
||||
/**
|
||||
* ONLY used by FromU callback functions.
|
||||
* Writes out the specified byte output bytes to the target byte buffer or to converter internal buffers.
|
||||
*
|
||||
* @param args callback fromUnicode arguments
|
||||
* @param source source bytes to write
|
||||
* @param length length of bytes to write
|
||||
* @param offsetIndex the relative offset index from callback.
|
||||
* @param err error status. If <TT>U_BUFFER_OVERFLOW</TT> is returned, then U_BUFFER_OVERFLOW <STRONG>must</STRONG>
|
||||
* be returned to the user, because it means that not all data could be written into the target buffer, and some is
|
||||
* in the converter error buffer.
|
||||
* @see ucnv_cbFromUWriteSub
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
|
||||
const char* source,
|
||||
int32_t length,
|
||||
int32_t offsetIndex,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* ONLY used by FromU callback functions.
|
||||
* This function will write out the correct substitution character sequence
|
||||
* to the target.
|
||||
*
|
||||
* @param args callback fromUnicode arguments
|
||||
* @param offsetIndex the relative offset index from the current source pointer to be used
|
||||
* @param err error status. If <TT>U_BUFFER_OVERFLOW</TT> is returned, then U_BUFFER_OVERFLOW <STRONG>must</STRONG>
|
||||
* be returned to the user, because it means that not all data could be written into the target buffer, and some is
|
||||
* in the converter error buffer.
|
||||
* @see ucnv_cbFromUWriteBytes
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
|
||||
int32_t offsetIndex,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* ONLY used by fromU callback functions.
|
||||
* This function will write out the error character(s) to the target UChar buffer.
|
||||
*
|
||||
* @param args callback fromUnicode arguments
|
||||
* @param source pointer to pointer to first UChar to write [on exit: 1 after last UChar processed]
|
||||
* @param sourceLimit pointer after last UChar to write
|
||||
* @param offsetIndex the relative offset index from callback which will be set
|
||||
* @param err error status <TT>U_BUFFER_OVERFLOW</TT>
|
||||
* @see ucnv_cbToUWriteSub
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
|
||||
const UChar** source,
|
||||
const UChar* sourceLimit,
|
||||
int32_t offsetIndex,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* ONLY used by ToU callback functions.
|
||||
* This function will write out the specified characters to the target
|
||||
* UChar buffer.
|
||||
*
|
||||
* @param args callback toUnicode arguments
|
||||
* @param source source string to write
|
||||
* @param length the length of source string
|
||||
* @param offsetIndex the relative offset index which will be written.
|
||||
* @param err error status <TT>U_BUFFER_OVERFLOW</TT>
|
||||
* @see ucnv_cbToUWriteSub
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
|
||||
const UChar* source,
|
||||
int32_t length,
|
||||
int32_t offsetIndex,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* ONLY used by ToU callback functions.
|
||||
* This function will write out the Unicode substitution character (U+FFFD).
|
||||
*
|
||||
* @param args callback fromUnicode arguments
|
||||
* @param offsetIndex the relative offset index from callback.
|
||||
* @param err error status <TT>U_BUFFER_OVERFLOW</TT>
|
||||
* @see ucnv_cbToUWriteUChars
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
|
||||
int32_t offsetIndex,
|
||||
UErrorCode * err);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
465
thirdparty/icu4c/common/unicode/ucnv_err.h
vendored
Normal file
465
thirdparty/icu4c/common/unicode/ucnv_err.h
vendored
Normal file
@@ -0,0 +1,465 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2009, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
*
|
||||
* ucnv_err.h:
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: UConverter predefined error callbacks
|
||||
*
|
||||
* <h2>Error Behaviour Functions</h2>
|
||||
* Defines some error behaviour functions called by ucnv_{from,to}Unicode
|
||||
* These are provided as part of ICU and many are stable, but they
|
||||
* can also be considered only as an example of what can be done with
|
||||
* callbacks. You may of course write your own.
|
||||
*
|
||||
* If you want to write your own, you may also find the functions from
|
||||
* ucnv_cb.h useful when writing your own callbacks.
|
||||
*
|
||||
* These functions, although public, should NEVER be called directly.
|
||||
* They should be used as parameters to the ucnv_setFromUCallback
|
||||
* and ucnv_setToUCallback functions, to set the behaviour of a converter
|
||||
* when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
|
||||
*
|
||||
* usage example: 'STOP' doesn't need any context, but newContext
|
||||
* could be set to something other than 'NULL' if needed. The available
|
||||
* contexts in this header can modify the default behavior of the callback.
|
||||
*
|
||||
* \code
|
||||
* UErrorCode err = U_ZERO_ERROR;
|
||||
* UConverter *myConverter = ucnv_open("ibm-949", &err);
|
||||
* const void *oldContext;
|
||||
* UConverterFromUCallback oldAction;
|
||||
*
|
||||
*
|
||||
* if (U_SUCCESS(err))
|
||||
* {
|
||||
* ucnv_setFromUCallBack(myConverter,
|
||||
* UCNV_FROM_U_CALLBACK_STOP,
|
||||
* NULL,
|
||||
* &oldAction,
|
||||
* &oldContext,
|
||||
* &status);
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* The code above tells "myConverter" to stop when it encounters an
|
||||
* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
|
||||
* Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
|
||||
* and ucnv_setToUCallBack would need to be called in order to change
|
||||
* that behavior too.
|
||||
*
|
||||
* Here is an example with a context:
|
||||
*
|
||||
* \code
|
||||
* UErrorCode err = U_ZERO_ERROR;
|
||||
* UConverter *myConverter = ucnv_open("ibm-949", &err);
|
||||
* const void *oldContext;
|
||||
* UConverterFromUCallback oldAction;
|
||||
*
|
||||
*
|
||||
* if (U_SUCCESS(err))
|
||||
* {
|
||||
* ucnv_setToUCallBack(myConverter,
|
||||
* UCNV_TO_U_CALLBACK_SUBSTITUTE,
|
||||
* UCNV_SUB_STOP_ON_ILLEGAL,
|
||||
* &oldAction,
|
||||
* &oldContext,
|
||||
* &status);
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* The code above tells "myConverter" to stop when it encounters an
|
||||
* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
|
||||
* Codepage -> Unicode. Any unmapped and legal characters will be
|
||||
* substituted to be the default substitution character.
|
||||
*/
|
||||
|
||||
#ifndef UCNV_ERR_H
|
||||
#define UCNV_ERR_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION
|
||||
|
||||
/** Forward declaring the UConverter structure. @stable ICU 2.0 */
|
||||
struct UConverter;
|
||||
|
||||
/** @stable ICU 2.0 */
|
||||
typedef struct UConverter UConverter;
|
||||
|
||||
/**
|
||||
* FROM_U, TO_U context options for sub callback
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_SUB_STOP_ON_ILLEGAL "i"
|
||||
|
||||
/**
|
||||
* FROM_U, TO_U context options for skip callback
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
|
||||
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_ICU NULL
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_JAVA "J"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
|
||||
* TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_C "C"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
|
||||
* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_XML_DEC "D"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
|
||||
* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_XML_HEX "X"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_UNICODE "U"
|
||||
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
|
||||
* a backslash, 1..6 hex digits, and a space)
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_CSS2 "S"
|
||||
|
||||
/**
|
||||
* The process condition code to be used with the callbacks.
|
||||
* Codes which are greater than UCNV_IRREGULAR should be
|
||||
* passed on to any chained callbacks.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef enum {
|
||||
UCNV_UNASSIGNED = 0, /**< The code point is unassigned.
|
||||
The error code U_INVALID_CHAR_FOUND will be set. */
|
||||
UCNV_ILLEGAL = 1, /**< The code point is illegal. For example,
|
||||
\\x81\\x2E is illegal in SJIS because \\x2E
|
||||
is not a valid trail byte for the \\x81
|
||||
lead byte.
|
||||
Also, starting with Unicode 3.0.1, non-shortest byte sequences
|
||||
in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
|
||||
are also illegal, not just irregular.
|
||||
The error code U_ILLEGAL_CHAR_FOUND will be set. */
|
||||
UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in
|
||||
the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
|
||||
are irregular UTF-8 byte sequences for single surrogate
|
||||
code points.
|
||||
The error code U_INVALID_CHAR_FOUND will be set. */
|
||||
UCNV_RESET = 3, /**< The callback is called with this reason when a
|
||||
'reset' has occurred. Callback should reset all
|
||||
state. */
|
||||
UCNV_CLOSE = 4, /**< Called when the converter is closed. The
|
||||
callback should release any allocated memory.*/
|
||||
UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the
|
||||
converter. the pointer available as the
|
||||
'context' is an alias to the original converters'
|
||||
context pointer. If the context must be owned
|
||||
by the new converter, the callback must clone
|
||||
the data and call ucnv_setFromUCallback
|
||||
(or setToUCallback) with the correct pointer.
|
||||
@stable ICU 2.2
|
||||
*/
|
||||
} UConverterCallbackReason;
|
||||
|
||||
|
||||
/**
|
||||
* The structure for the fromUnicode callback function parameter.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct {
|
||||
uint16_t size; /**< The size of this struct. @stable ICU 2.0 */
|
||||
UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
|
||||
UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
|
||||
const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
|
||||
const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
|
||||
char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
|
||||
const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
|
||||
int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
|
||||
} UConverterFromUnicodeArgs;
|
||||
|
||||
|
||||
/**
|
||||
* The structure for the toUnicode callback function parameter.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct {
|
||||
uint16_t size; /**< The size of this struct @stable ICU 2.0 */
|
||||
UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
|
||||
UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
|
||||
const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
|
||||
const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
|
||||
UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
|
||||
const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
|
||||
int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
|
||||
} UConverterToUnicodeArgs;
|
||||
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
*
|
||||
* @param context Pointer to the callback's private data
|
||||
* @param fromUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err This should always be set to a failure status prior to calling.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
|
||||
const void *context,
|
||||
UConverterFromUnicodeArgs *fromUArgs,
|
||||
const UChar* codeUnits,
|
||||
int32_t length,
|
||||
UChar32 codePoint,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
*
|
||||
* @param context Pointer to the callback's private data
|
||||
* @param toUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err This should always be set to a failure status prior to calling.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
|
||||
const void *context,
|
||||
UConverterToUnicodeArgs *toUArgs,
|
||||
const char* codeUnits,
|
||||
int32_t length,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This From Unicode callback skips any ILLEGAL_SEQUENCE, or
|
||||
* skips only UNASSIGNED_SEQUENCE depending on the context parameter
|
||||
* simply ignoring those characters.
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
* NULL: Skips any ILLEGAL_SEQUENCE
|
||||
* @param fromUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
|
||||
const void *context,
|
||||
UConverterFromUnicodeArgs *fromUArgs,
|
||||
const UChar* codeUnits,
|
||||
int32_t length,
|
||||
UChar32 codePoint,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
|
||||
* UNASSIGNED_SEQUENCE depending on context parameter, with the
|
||||
* current substitution string for the converter. This is the default
|
||||
* callback.
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
* NULL: Substitutes any ILLEGAL_SEQUENCE
|
||||
* @param fromUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @see ucnv_setSubstChars
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
|
||||
const void *context,
|
||||
UConverterFromUnicodeArgs *fromUArgs,
|
||||
const UChar* codeUnits,
|
||||
int32_t length,
|
||||
UChar32 codePoint,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
|
||||
* hexadecimal representation of the illegal codepoints
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* <ul>
|
||||
* <li>UCNV_ESCAPE_ICU: Substitutes the ILLEGAL SEQUENCE with the hexadecimal
|
||||
* representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
|
||||
* In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* %UD84D%UDC56</li>
|
||||
* <li>UCNV_ESCAPE_JAVA: Substitutes the ILLEGAL SEQUENCE with the hexadecimal
|
||||
* representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
|
||||
* In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* \\uD84D\\uDC56</li>
|
||||
* <li>UCNV_ESCAPE_C: Substitutes the ILLEGAL SEQUENCE with the hexadecimal
|
||||
* representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
|
||||
* In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* \\U00023456</li>
|
||||
* <li>UCNV_ESCAPE_XML_DEC: Substitutes the ILLEGAL SEQUENCE with the decimal
|
||||
* representation in the format \htmlonly&#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;")\endhtmlonly.
|
||||
* In the Event the converter doesn't support the characters {&,#}[0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* &#144470; and Zero padding is ignored.</li>
|
||||
* <li>UCNV_ESCAPE_XML_HEX:Substitutes the ILLEGAL SEQUENCE with the decimal
|
||||
* representation in the format \htmlonly&#xXXXX; e.g. "&#xFFFE;&#x00AC;&#xC8FE;")\endhtmlonly.
|
||||
* In the Event the converter doesn't support the characters {&,#,x}[0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* \htmlonly&#x23456;\endhtmlonly</li>
|
||||
* </ul>
|
||||
* @param fromUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
|
||||
const void *context,
|
||||
UConverterFromUnicodeArgs *fromUArgs,
|
||||
const UChar* codeUnits,
|
||||
int32_t length,
|
||||
UChar32 codePoint,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This To Unicode callback skips any ILLEGAL_SEQUENCE, or
|
||||
* skips only UNASSIGNED_SEQUENCE depending on the context parameter
|
||||
* simply ignoring those characters.
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
* NULL: Skips any ILLEGAL_SEQUENCE
|
||||
* @param toUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
|
||||
const void *context,
|
||||
UConverterToUnicodeArgs *toUArgs,
|
||||
const char* codeUnits,
|
||||
int32_t length,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
|
||||
* UNASSIGNED_SEQUENCE depending on context parameter, with the
|
||||
* Unicode substitution character, U+FFFD.
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
* NULL: Substitutes any ILLEGAL_SEQUENCE
|
||||
* @param toUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
|
||||
const void *context,
|
||||
UConverterToUnicodeArgs *toUArgs,
|
||||
const char* codeUnits,
|
||||
int32_t length,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
|
||||
* hexadecimal representation of the illegal bytes
|
||||
* (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
|
||||
*
|
||||
* @param context This function currently recognizes the callback options:
|
||||
* UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
|
||||
* UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
|
||||
* @param toUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
|
||||
const void *context,
|
||||
UConverterToUnicodeArgs *toUArgs,
|
||||
const char* codeUnits,
|
||||
int32_t length,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/*UCNV_ERR_H*/
|
||||
193
thirdparty/icu4c/common/unicode/ucnvsel.h
vendored
Normal file
193
thirdparty/icu4c/common/unicode/ucnvsel.h
vendored
Normal file
@@ -0,0 +1,193 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2008-2011, International Business Machines
|
||||
* Corporation, Google and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
/*
|
||||
* Author : eldawy@google.com (Mohamed Eldawy)
|
||||
* ucnvsel.h
|
||||
*
|
||||
* Purpose: To generate a list of encodings capable of handling
|
||||
* a given Unicode text
|
||||
*
|
||||
* Started 09-April-2008
|
||||
*/
|
||||
|
||||
#ifndef __ICU_UCNV_SEL_H__
|
||||
#define __ICU_UCNV_SEL_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION
|
||||
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "unicode/uenum.h"
|
||||
#include "unicode/ucnv.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Encoding/charset encoding selector
|
||||
*
|
||||
* A converter selector is built with a set of encoding/charset names
|
||||
* and given an input string returns the set of names of the
|
||||
* corresponding converters which can convert the string.
|
||||
*
|
||||
* A converter selector can be serialized into a buffer and reopened
|
||||
* from the serialized form.
|
||||
*/
|
||||
|
||||
struct UConverterSelector;
|
||||
/**
|
||||
* @{
|
||||
* Typedef for selector data structure.
|
||||
*/
|
||||
typedef struct UConverterSelector UConverterSelector;
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Open a selector.
|
||||
* If converterListSize is 0, build for all available converters.
|
||||
* If excludedCodePoints is NULL, don't exclude any code points.
|
||||
*
|
||||
* @param converterList a pointer to encoding names needed to be involved.
|
||||
* Can be NULL if converterListSize==0.
|
||||
* The list and the names will be cloned, and the caller
|
||||
* retains ownership of the original.
|
||||
* @param converterListSize number of encodings in above list.
|
||||
* If 0, builds a selector for all available converters.
|
||||
* @param excludedCodePoints a set of code points to be excluded from consideration.
|
||||
* That is, excluded code points in a string do not change
|
||||
* the selection result. (They might be handled by a callback.)
|
||||
* Use NULL to exclude nothing.
|
||||
* @param whichSet what converter set to use? Use this to determine whether
|
||||
* to consider only roundtrip mappings or also fallbacks.
|
||||
* @param status an in/out ICU UErrorCode
|
||||
* @return the new selector
|
||||
*
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
U_CAPI UConverterSelector* U_EXPORT2
|
||||
ucnvsel_open(const char* const* converterList, int32_t converterListSize,
|
||||
const USet* excludedCodePoints,
|
||||
const UConverterUnicodeSet whichSet, UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Closes a selector.
|
||||
* If any Enumerations were returned by ucnv_select*, they become invalid.
|
||||
* They can be closed before or after calling ucnv_closeSelector,
|
||||
* but should never be used after the selector is closed.
|
||||
*
|
||||
* @see ucnv_selectForString
|
||||
* @see ucnv_selectForUTF8
|
||||
*
|
||||
* @param sel selector to close
|
||||
*
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnvsel_close(UConverterSelector *sel);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUConverterSelectorPointer
|
||||
* "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Open a selector from its serialized form.
|
||||
* The buffer must remain valid and unchanged for the lifetime of the selector.
|
||||
* This is much faster than creating a selector from scratch.
|
||||
* Using a serialized form from a different machine (endianness/charset) is supported.
|
||||
*
|
||||
* @param buffer pointer to the serialized form of a converter selector;
|
||||
* must be 32-bit-aligned
|
||||
* @param length the capacity of this buffer (can be equal to or larger than
|
||||
* the actual data length)
|
||||
* @param status an in/out ICU UErrorCode
|
||||
* @return the new selector
|
||||
*
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
U_CAPI UConverterSelector* U_EXPORT2
|
||||
ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Serialize a selector into a linear buffer.
|
||||
* The serialized form is portable to different machines.
|
||||
*
|
||||
* @param sel selector to consider
|
||||
* @param buffer pointer to 32-bit-aligned memory to be filled with the
|
||||
* serialized form of this converter selector
|
||||
* @param bufferCapacity the capacity of this buffer
|
||||
* @param status an in/out ICU UErrorCode
|
||||
* @return the required buffer capacity to hold serialize data (even if the call fails
|
||||
* with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
|
||||
*
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucnvsel_serialize(const UConverterSelector* sel,
|
||||
void* buffer, int32_t bufferCapacity, UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Select converters that can map all characters in a UTF-16 string,
|
||||
* ignoring the excluded code points.
|
||||
*
|
||||
* @param sel a selector
|
||||
* @param s UTF-16 string
|
||||
* @param length length of the string, or -1 if NUL-terminated
|
||||
* @param status an in/out ICU UErrorCode
|
||||
* @return an enumeration containing encoding names.
|
||||
* The returned encoding names and their order will be the same as
|
||||
* supplied when building the selector.
|
||||
*
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
U_CAPI UEnumeration * U_EXPORT2
|
||||
ucnvsel_selectForString(const UConverterSelector* sel,
|
||||
const UChar *s, int32_t length, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Select converters that can map all characters in a UTF-8 string,
|
||||
* ignoring the excluded code points.
|
||||
*
|
||||
* @param sel a selector
|
||||
* @param s UTF-8 string
|
||||
* @param length length of the string, or -1 if NUL-terminated
|
||||
* @param status an in/out ICU UErrorCode
|
||||
* @return an enumeration containing encoding names.
|
||||
* The returned encoding names and their order will be the same as
|
||||
* supplied when building the selector.
|
||||
*
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
U_CAPI UEnumeration * U_EXPORT2
|
||||
ucnvsel_selectForUTF8(const UConverterSelector* sel,
|
||||
const char *s, int32_t length, UErrorCode *status);
|
||||
|
||||
#endif /* !UCONFIG_NO_CONVERSION */
|
||||
|
||||
#endif /* __ICU_UCNV_SEL_H__ */
|
||||
477
thirdparty/icu4c/common/unicode/uconfig.h
vendored
Normal file
477
thirdparty/icu4c/common/unicode/uconfig.h
vendored
Normal file
@@ -0,0 +1,477 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: uconfig.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002sep19
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UCONFIG_H__
|
||||
#define __UCONFIG_H__
|
||||
|
||||
|
||||
/*!
|
||||
* \file
|
||||
* \brief User-configurable settings
|
||||
*
|
||||
* Miscellaneous switches:
|
||||
*
|
||||
* A number of macros affect a variety of minor aspects of ICU.
|
||||
* Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h)
|
||||
* and moved here to make them easier to find.
|
||||
*
|
||||
* Switches for excluding parts of ICU library code modules:
|
||||
*
|
||||
* Changing these macros allows building partial, smaller libraries for special purposes.
|
||||
* By default, all modules are built.
|
||||
* The switches are fairly coarse, controlling large modules.
|
||||
* Basic services cannot be turned off.
|
||||
*
|
||||
* Building with any of these options does not guarantee that the
|
||||
* ICU build process will completely work. It is recommended that
|
||||
* the ICU libraries and data be built using the normal build.
|
||||
* At that time you should remove the data used by those services.
|
||||
* After building the ICU data library, you should rebuild the ICU
|
||||
* libraries with these switches customized to your needs.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h"
|
||||
* prior to determining default settings for uconfig variables.
|
||||
*
|
||||
* @internal ICU 4.0
|
||||
*/
|
||||
#if defined(UCONFIG_USE_LOCAL)
|
||||
#include "uconfig_local.h"
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_DEBUG
|
||||
* Determines whether to include debugging code.
|
||||
* Automatically set on Windows, but most compilers do not have
|
||||
* related predefined macros.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_DEBUG
|
||||
/* Use the predefined value. */
|
||||
#elif defined(_DEBUG)
|
||||
/*
|
||||
* _DEBUG is defined by Visual Studio debug compilation.
|
||||
* Do *not* test for its NDEBUG macro: It is an orthogonal macro
|
||||
* which disables assert().
|
||||
*/
|
||||
# define U_DEBUG 1
|
||||
# else
|
||||
# define U_DEBUG 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Determines whether to enable auto cleanup of libraries.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UCLN_NO_AUTO_CLEANUP
|
||||
#define UCLN_NO_AUTO_CLEANUP 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_DISABLE_RENAMING
|
||||
* Determines whether to disable renaming or not.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_DISABLE_RENAMING
|
||||
#define U_DISABLE_RENAMING 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
* Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h.
|
||||
* utypes.h includes those headers if this macro is defined to 0.
|
||||
* Otherwise, each those headers must be included explicitly when using one of their macros.
|
||||
* Defaults to 0 for backward compatibility, except inside ICU.
|
||||
* @stable ICU 49
|
||||
*/
|
||||
#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
/* Use the predefined value. */
|
||||
#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
|
||||
defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
|
||||
defined(U_TOOLUTIL_IMPLEMENTATION)
|
||||
# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1
|
||||
#else
|
||||
# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_OVERRIDE_CXX_ALLOCATION
|
||||
* Determines whether to override new and delete.
|
||||
* ICU is normally built such that all of its C++ classes, via their UMemory base,
|
||||
* override operators new and delete to use its internal, customizable,
|
||||
* non-exception-throwing memory allocation functions. (Default value 1 for this macro.)
|
||||
*
|
||||
* This is especially important when the application and its libraries use multiple heaps.
|
||||
* For example, on Windows, this allows the ICU DLL to be used by
|
||||
* applications that statically link the C Runtime library.
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#ifndef U_OVERRIDE_CXX_ALLOCATION
|
||||
#define U_OVERRIDE_CXX_ALLOCATION 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_ENABLE_TRACING
|
||||
* Determines whether to enable tracing.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_ENABLE_TRACING
|
||||
#define U_ENABLE_TRACING 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_ENABLE_PLUGINS
|
||||
* Determines whether to enable ICU plugins.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UCONFIG_ENABLE_PLUGINS
|
||||
#define UCONFIG_ENABLE_PLUGINS 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_ENABLE_DYLOAD
|
||||
* Whether to enable Dynamic loading in ICU.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_ENABLE_DYLOAD
|
||||
#define U_ENABLE_DYLOAD 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CHECK_DYLOAD
|
||||
* Whether to test Dynamic loading as an OS capability.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_CHECK_DYLOAD
|
||||
#define U_CHECK_DYLOAD 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_DEFAULT_SHOW_DRAFT
|
||||
* Do we allow ICU users to use the draft APIs by default?
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_DEFAULT_SHOW_DRAFT
|
||||
#define U_DEFAULT_SHOW_DRAFT 1
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Custom icu entry point renaming */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_HAVE_LIB_SUFFIX
|
||||
* 1 if a custom library suffix is set.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_HAVE_LIB_SUFFIX
|
||||
/* Use the predefined value. */
|
||||
#elif defined(U_LIB_SUFFIX_C_NAME) || defined(U_IN_DOXYGEN)
|
||||
# define U_HAVE_LIB_SUFFIX 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_LIB_SUFFIX_C_NAME_STRING
|
||||
* Defines the library suffix as a string with C syntax.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_LIB_SUFFIX_C_NAME_STRING
|
||||
/* Use the predefined value. */
|
||||
#elif defined(U_LIB_SUFFIX_C_NAME)
|
||||
# define CONVERT_TO_STRING(s) #s
|
||||
# define U_LIB_SUFFIX_C_NAME_STRING CONVERT_TO_STRING(U_LIB_SUFFIX_C_NAME)
|
||||
#else
|
||||
# define U_LIB_SUFFIX_C_NAME_STRING ""
|
||||
#endif
|
||||
|
||||
/* common/i18n library switches --------------------------------------------- */
|
||||
|
||||
/**
|
||||
* \def UCONFIG_ONLY_COLLATION
|
||||
* This switch turns off modules that are not needed for collation.
|
||||
*
|
||||
* It does not turn off legacy conversion because that is necessary
|
||||
* for ICU to work on EBCDIC platforms (for the default converter).
|
||||
* If you want "only collation" and do not build for EBCDIC,
|
||||
* then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_ONLY_COLLATION
|
||||
# define UCONFIG_ONLY_COLLATION 0
|
||||
#endif
|
||||
|
||||
#if UCONFIG_ONLY_COLLATION
|
||||
/* common library */
|
||||
# define UCONFIG_NO_BREAK_ITERATION 1
|
||||
# define UCONFIG_NO_IDNA 1
|
||||
|
||||
/* i18n library */
|
||||
# if UCONFIG_NO_COLLATION
|
||||
# error Contradictory collation switches in uconfig.h.
|
||||
# endif
|
||||
# define UCONFIG_NO_FORMATTING 1
|
||||
# define UCONFIG_NO_TRANSLITERATION 1
|
||||
# define UCONFIG_NO_REGULAR_EXPRESSIONS 1
|
||||
#endif
|
||||
|
||||
/* common library switches -------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_FILE_IO
|
||||
* This switch turns off all file access in the common library
|
||||
* where file access is only used for data loading.
|
||||
* ICU data must then be provided in the form of a data DLL (or with an
|
||||
* equivalent way to link to the data residing in an executable,
|
||||
* as in building a combined library with both the common library's code and
|
||||
* the data), or via udata_setCommonData().
|
||||
* Application data must be provided via udata_setAppData() or by using
|
||||
* "open" functions that take pointers to data, for example ucol_openBinary().
|
||||
*
|
||||
* File access is not used at all in the i18n library.
|
||||
*
|
||||
* File access cannot be turned off for the icuio library or for the ICU
|
||||
* test suites and ICU tools.
|
||||
*
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
#ifndef UCONFIG_NO_FILE_IO
|
||||
# define UCONFIG_NO_FILE_IO 0
|
||||
#endif
|
||||
|
||||
#if UCONFIG_NO_FILE_IO && defined(U_TIMEZONE_FILES_DIR)
|
||||
# error Contradictory file io switches in uconfig.h.
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_CONVERSION
|
||||
* ICU will not completely build (compiling the tools fails) with this
|
||||
* switch turned on.
|
||||
* This switch turns off all converters.
|
||||
*
|
||||
* You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
|
||||
* in utypes.h if char* strings in your environment are always in UTF-8.
|
||||
*
|
||||
* @stable ICU 3.2
|
||||
* @see U_CHARSET_IS_UTF8
|
||||
*/
|
||||
#ifndef UCONFIG_NO_CONVERSION
|
||||
# define UCONFIG_NO_CONVERSION 0
|
||||
#endif
|
||||
|
||||
#if UCONFIG_NO_CONVERSION
|
||||
# define UCONFIG_NO_LEGACY_CONVERSION 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_ONLY_HTML_CONVERSION
|
||||
* This switch turns off all of the converters NOT listed in
|
||||
* the HTML encoding standard:
|
||||
* http://www.w3.org/TR/encoding/#names-and-labels
|
||||
*
|
||||
* This is not possible on EBCDIC platforms
|
||||
* because they need ibm-37 or ibm-1047 default converters.
|
||||
*
|
||||
* @stable ICU 55
|
||||
*/
|
||||
#ifndef UCONFIG_ONLY_HTML_CONVERSION
|
||||
# define UCONFIG_ONLY_HTML_CONVERSION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_LEGACY_CONVERSION
|
||||
* This switch turns off all converters except for
|
||||
* - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
|
||||
* - US-ASCII
|
||||
* - ISO-8859-1
|
||||
*
|
||||
* Turning off legacy conversion is not possible on EBCDIC platforms
|
||||
* because they need ibm-37 or ibm-1047 default converters.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_LEGACY_CONVERSION
|
||||
# define UCONFIG_NO_LEGACY_CONVERSION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_NORMALIZATION
|
||||
* This switch turns off normalization.
|
||||
* It implies turning off several other services as well, for example
|
||||
* collation and IDNA.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#ifndef UCONFIG_NO_NORMALIZATION
|
||||
# define UCONFIG_NO_NORMALIZATION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_USE_ML_PHRASE_BREAKING
|
||||
* This switch turns on BudouX ML phrase-based line breaking, rather than using the dictionary.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UCONFIG_USE_ML_PHRASE_BREAKING
|
||||
# define UCONFIG_USE_ML_PHRASE_BREAKING 0
|
||||
#endif
|
||||
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
/* common library */
|
||||
/* ICU 50 CJK dictionary BreakIterator uses normalization */
|
||||
# define UCONFIG_NO_BREAK_ITERATION 1
|
||||
/* IDNA (UTS #46) is implemented via normalization */
|
||||
# define UCONFIG_NO_IDNA 1
|
||||
|
||||
/* i18n library */
|
||||
# if UCONFIG_ONLY_COLLATION
|
||||
# error Contradictory collation switches in uconfig.h.
|
||||
# endif
|
||||
# define UCONFIG_NO_COLLATION 1
|
||||
# define UCONFIG_NO_TRANSLITERATION 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_BREAK_ITERATION
|
||||
* This switch turns off break iteration.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_BREAK_ITERATION
|
||||
# define UCONFIG_NO_BREAK_ITERATION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_IDNA
|
||||
* This switch turns off IDNA.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#ifndef UCONFIG_NO_IDNA
|
||||
# define UCONFIG_NO_IDNA 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
|
||||
* Determines the default UMessagePatternApostropheMode.
|
||||
* See the documentation for that enum.
|
||||
*
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
|
||||
# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_USE_WINDOWS_LCID_MAPPING_API
|
||||
* On platforms where U_PLATFORM_HAS_WIN32_API is true, this switch determines
|
||||
* if the Windows platform APIs are used for LCID<->Locale Name conversions.
|
||||
* Otherwise, only the built-in ICU tables are used.
|
||||
*
|
||||
* @internal ICU 64
|
||||
*/
|
||||
#ifndef UCONFIG_USE_WINDOWS_LCID_MAPPING_API
|
||||
# define UCONFIG_USE_WINDOWS_LCID_MAPPING_API 1
|
||||
#endif
|
||||
|
||||
/* i18n library switches ---------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_COLLATION
|
||||
* This switch turns off collation and collation-based string search.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_COLLATION
|
||||
# define UCONFIG_NO_COLLATION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_FORMATTING
|
||||
* This switch turns off formatting and calendar/timezone services.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_FORMATTING
|
||||
# define UCONFIG_NO_FORMATTING 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_MF2
|
||||
* This switch turns off the experimental MessageFormat 2.0 API.
|
||||
*
|
||||
* @internal ICU 75 technology preview
|
||||
* @deprecated This API is for technology preview only.
|
||||
*/
|
||||
#ifndef UCONFIG_NO_MF2
|
||||
# define UCONFIG_NO_MF2 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_TRANSLITERATION
|
||||
* This switch turns off transliteration.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_TRANSLITERATION
|
||||
# define UCONFIG_NO_TRANSLITERATION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_REGULAR_EXPRESSIONS
|
||||
* This switch turns off regular expressions.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
|
||||
# define UCONFIG_NO_REGULAR_EXPRESSIONS 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_SERVICE
|
||||
* This switch turns off service registration.
|
||||
*
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
#ifndef UCONFIG_NO_SERVICE
|
||||
# define UCONFIG_NO_SERVICE 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_HAVE_PARSEALLINPUT
|
||||
* This switch turns on the "parse all input" attribute. Binary incompatible.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UCONFIG_HAVE_PARSEALLINPUT
|
||||
# define UCONFIG_HAVE_PARSEALLINPUT 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_FILTERED_BREAK_ITERATION
|
||||
* This switch turns off filtered break iteration code.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UCONFIG_NO_FILTERED_BREAK_ITERATION
|
||||
# define UCONFIG_NO_FILTERED_BREAK_ITERATION 0
|
||||
#endif
|
||||
|
||||
#endif // __UCONFIG_H__
|
||||
158
thirdparty/icu4c/common/unicode/ucpmap.h
vendored
Normal file
158
thirdparty/icu4c/common/unicode/ucpmap.h
vendored
Normal file
@@ -0,0 +1,158 @@
|
||||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// ucpmap.h
|
||||
// created: 2018sep03 Markus W. Scherer
|
||||
|
||||
#ifndef __UCPMAP_H__
|
||||
#define __UCPMAP_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: This file defines an abstract map from Unicode code points to integer values.
|
||||
*
|
||||
* @see UCPMap
|
||||
* @see UCPTrie
|
||||
* @see UMutableCPTrie
|
||||
*/
|
||||
|
||||
/**
|
||||
* Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
|
||||
*
|
||||
* @see UCPTrie
|
||||
* @see UMutableCPTrie
|
||||
* @stable ICU 63
|
||||
*/
|
||||
typedef struct UCPMap UCPMap;
|
||||
|
||||
/**
|
||||
* Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates.
|
||||
* Most users should use UCPMAP_RANGE_NORMAL.
|
||||
*
|
||||
* @see ucpmap_getRange
|
||||
* @see ucptrie_getRange
|
||||
* @see umutablecptrie_getRange
|
||||
* @stable ICU 63
|
||||
*/
|
||||
enum UCPMapRangeOption {
|
||||
/**
|
||||
* ucpmap_getRange() enumerates all same-value ranges as stored in the map.
|
||||
* Most users should use this option.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPMAP_RANGE_NORMAL,
|
||||
/**
|
||||
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
|
||||
* except that lead surrogates (U+D800..U+DBFF) are treated as having the
|
||||
* surrogateValue, which is passed to getRange() as a separate parameter.
|
||||
* The surrogateValue is not transformed via filter().
|
||||
* See U_IS_LEAD(c).
|
||||
*
|
||||
* Most users should use UCPMAP_RANGE_NORMAL instead.
|
||||
*
|
||||
* This option is useful for maps that map surrogate code *units* to
|
||||
* special values optimized for UTF-16 string processing
|
||||
* or for special error behavior for unpaired surrogates,
|
||||
* but those values are not to be associated with the lead surrogate code *points*.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
|
||||
/**
|
||||
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
|
||||
* except that all surrogates (U+D800..U+DFFF) are treated as having the
|
||||
* surrogateValue, which is passed to getRange() as a separate parameter.
|
||||
* The surrogateValue is not transformed via filter().
|
||||
* See U_IS_SURROGATE(c).
|
||||
*
|
||||
* Most users should use UCPMAP_RANGE_NORMAL instead.
|
||||
*
|
||||
* This option is useful for maps that map surrogate code *units* to
|
||||
* special values optimized for UTF-16 string processing
|
||||
* or for special error behavior for unpaired surrogates,
|
||||
* but those values are not to be associated with the lead surrogate code *points*.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPMAP_RANGE_FIXED_ALL_SURROGATES
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef enum UCPMapRangeOption UCPMapRangeOption;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns the value for a code point as stored in the map, with range checking.
|
||||
* Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
|
||||
*
|
||||
* @param map the map
|
||||
* @param c the code point
|
||||
* @return the map value,
|
||||
* or an implementation-defined error value if the code point is not in the range 0..U+10FFFF
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
ucpmap_get(const UCPMap *map, UChar32 c);
|
||||
|
||||
/**
|
||||
* Callback function type: Modifies a map value.
|
||||
* Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange().
|
||||
* The modified value will be returned by the getRange function.
|
||||
*
|
||||
* Can be used to ignore some of the value bits,
|
||||
* make a filter for one of several values,
|
||||
* return a value index computed from the map value, etc.
|
||||
*
|
||||
* @param context an opaque pointer, as passed into the getRange function
|
||||
* @param value a value from the map
|
||||
* @return the modified value
|
||||
* @stable ICU 63
|
||||
*/
|
||||
typedef uint32_t U_CALLCONV
|
||||
UCPMapValueFilter(const void *context, uint32_t value);
|
||||
|
||||
/**
|
||||
* Returns the last code point such that all those from start to there have the same value.
|
||||
* Can be used to efficiently iterate over all same-value ranges in a map.
|
||||
* (This is normally faster than iterating over code points and get()ting each value,
|
||||
* but much slower than a data structure that stores ranges directly.)
|
||||
*
|
||||
* If the UCPMapValueFilter function pointer is not NULL, then
|
||||
* the value to be delivered is passed through that function, and the return value is the end
|
||||
* of the range where all values are modified to the same actual value.
|
||||
* The value is unchanged if that function pointer is NULL.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* UChar32 start = 0, end;
|
||||
* uint32_t value;
|
||||
* while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
|
||||
* NULL, NULL, &value)) >= 0) {
|
||||
* // Work with the range start..end and its value.
|
||||
* start = end + 1;
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* @param map the map
|
||||
* @param start range start
|
||||
* @param option defines whether surrogates are treated normally,
|
||||
* or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
|
||||
* @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
|
||||
* @param filter a pointer to a function that may modify the map data value,
|
||||
* or NULL if the values from the map are to be used unmodified
|
||||
* @param context an opaque pointer that is passed on to the filter function
|
||||
* @param pValue if not NULL, receives the value that every code point start..end has;
|
||||
* may have been modified by filter(context, map value)
|
||||
* if that function pointer is not NULL
|
||||
* @return the range end code point, or -1 if start is not a valid code point
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
ucpmap_getRange(const UCPMap *map, UChar32 start,
|
||||
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
||||
645
thirdparty/icu4c/common/unicode/ucptrie.h
vendored
Normal file
645
thirdparty/icu4c/common/unicode/ucptrie.h
vendored
Normal file
@@ -0,0 +1,645 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// ucptrie.h (modified from utrie2.h)
|
||||
// created: 2017dec29 Markus W. Scherer
|
||||
|
||||
#ifndef __UCPTRIE_H__
|
||||
#define __UCPTRIE_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucpmap.h"
|
||||
#include "unicode/utf8.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: This file defines an immutable Unicode code point trie.
|
||||
*
|
||||
* @see UCPTrie
|
||||
* @see UMutableCPTrie
|
||||
*/
|
||||
|
||||
#ifndef U_IN_DOXYGEN
|
||||
/** @internal */
|
||||
typedef union UCPTrieData {
|
||||
/** @internal */
|
||||
const void *ptr0;
|
||||
/** @internal */
|
||||
const uint16_t *ptr16;
|
||||
/** @internal */
|
||||
const uint32_t *ptr32;
|
||||
/** @internal */
|
||||
const uint8_t *ptr8;
|
||||
} UCPTrieData;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Immutable Unicode code point trie structure.
|
||||
* Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.
|
||||
* For details see https://icu.unicode.org/design/struct/utrie
|
||||
*
|
||||
* Do not access UCPTrie fields directly; use public functions and macros.
|
||||
* Functions are easy to use: They support all trie types and value widths.
|
||||
*
|
||||
* When performance is really important, macros provide faster access.
|
||||
* Most macros are specific to either "fast" or "small" tries, see UCPTrieType.
|
||||
* There are "fast" macros for special optimized use cases.
|
||||
*
|
||||
* The macros will return bogus values, or may crash, if used on the wrong type or value width.
|
||||
*
|
||||
* @see UMutableCPTrie
|
||||
* @stable ICU 63
|
||||
*/
|
||||
struct UCPTrie {
|
||||
#ifndef U_IN_DOXYGEN
|
||||
/** @internal */
|
||||
const uint16_t *index;
|
||||
/** @internal */
|
||||
UCPTrieData data;
|
||||
|
||||
/** @internal */
|
||||
int32_t indexLength;
|
||||
/** @internal */
|
||||
int32_t dataLength;
|
||||
/** Start of the last range which ends at U+10FFFF. @internal */
|
||||
UChar32 highStart;
|
||||
/** highStart>>12 @internal */
|
||||
uint16_t shifted12HighStart;
|
||||
|
||||
/** @internal */
|
||||
int8_t type; // UCPTrieType
|
||||
/** @internal */
|
||||
int8_t valueWidth; // UCPTrieValueWidth
|
||||
|
||||
/** padding/reserved @internal */
|
||||
uint32_t reserved32;
|
||||
/** padding/reserved @internal */
|
||||
uint16_t reserved16;
|
||||
|
||||
/**
|
||||
* Internal index-3 null block offset.
|
||||
* Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block.
|
||||
* @internal
|
||||
*/
|
||||
uint16_t index3NullOffset;
|
||||
/**
|
||||
* Internal data null block offset, not shifted.
|
||||
* Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block.
|
||||
* @internal
|
||||
*/
|
||||
int32_t dataNullOffset;
|
||||
/** @internal */
|
||||
uint32_t nullValue;
|
||||
|
||||
#ifdef UCPTRIE_DEBUG
|
||||
/** @internal */
|
||||
const char *name;
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef struct UCPTrie UCPTrie;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Selectors for the type of a UCPTrie.
|
||||
* Different trade-offs for size vs. speed.
|
||||
*
|
||||
* @see umutablecptrie_buildImmutable
|
||||
* @see ucptrie_openFromBinary
|
||||
* @see ucptrie_getType
|
||||
* @stable ICU 63
|
||||
*/
|
||||
enum UCPTrieType {
|
||||
/**
|
||||
* For ucptrie_openFromBinary() to accept any type.
|
||||
* ucptrie_getType() will return the actual type.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPTRIE_TYPE_ANY = -1,
|
||||
/**
|
||||
* Fast/simple/larger BMP data structure. Use functions and "fast" macros.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPTRIE_TYPE_FAST,
|
||||
/**
|
||||
* Small/slower BMP data structure. Use functions and "small" macros.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPTRIE_TYPE_SMALL
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef enum UCPTrieType UCPTrieType;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Selectors for the number of bits in a UCPTrie data value.
|
||||
*
|
||||
* @see umutablecptrie_buildImmutable
|
||||
* @see ucptrie_openFromBinary
|
||||
* @see ucptrie_getValueWidth
|
||||
* @stable ICU 63
|
||||
*/
|
||||
enum UCPTrieValueWidth {
|
||||
/**
|
||||
* For ucptrie_openFromBinary() to accept any data value width.
|
||||
* ucptrie_getValueWidth() will return the actual data value width.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPTRIE_VALUE_BITS_ANY = -1,
|
||||
/**
|
||||
* The trie stores 16 bits per data value.
|
||||
* It returns them as unsigned values 0..0xffff=65535.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPTRIE_VALUE_BITS_16,
|
||||
/**
|
||||
* The trie stores 32 bits per data value.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPTRIE_VALUE_BITS_32,
|
||||
/**
|
||||
* The trie stores 8 bits per data value.
|
||||
* It returns them as unsigned values 0..0xff=255.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPTRIE_VALUE_BITS_8
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef enum UCPTrieValueWidth UCPTrieValueWidth;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Opens a trie from its binary form, stored in 32-bit-aligned memory.
|
||||
* Inverse of ucptrie_toBinary().
|
||||
*
|
||||
* The memory must remain valid and unchanged as long as the trie is used.
|
||||
* You must ucptrie_close() the trie once you are done using it.
|
||||
*
|
||||
* @param type selects the trie type; results in an
|
||||
* U_INVALID_FORMAT_ERROR if it does not match the binary data;
|
||||
* use UCPTRIE_TYPE_ANY to accept any type
|
||||
* @param valueWidth selects the number of bits in a data value; results in an
|
||||
* U_INVALID_FORMAT_ERROR if it does not match the binary data;
|
||||
* use UCPTRIE_VALUE_BITS_ANY to accept any data value width
|
||||
* @param data a pointer to 32-bit-aligned memory containing the binary data of a UCPTrie
|
||||
* @param length the number of bytes available at data;
|
||||
* can be more than necessary
|
||||
* @param pActualLength receives the actual number of bytes at data taken up by the trie data;
|
||||
* can be NULL
|
||||
* @param pErrorCode an in/out ICU UErrorCode
|
||||
* @return the trie
|
||||
*
|
||||
* @see umutablecptrie_open
|
||||
* @see umutablecptrie_buildImmutable
|
||||
* @see ucptrie_toBinary
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI UCPTrie * U_EXPORT2
|
||||
ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth,
|
||||
const void *data, int32_t length, int32_t *pActualLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Closes a trie and releases associated memory.
|
||||
*
|
||||
* @param trie the trie
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ucptrie_close(UCPTrie *trie);
|
||||
|
||||
/**
|
||||
* Returns the trie type.
|
||||
*
|
||||
* @param trie the trie
|
||||
* @return the trie type
|
||||
* @see ucptrie_openFromBinary
|
||||
* @see UCPTRIE_TYPE_ANY
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI UCPTrieType U_EXPORT2
|
||||
ucptrie_getType(const UCPTrie *trie);
|
||||
|
||||
/**
|
||||
* Returns the number of bits in a trie data value.
|
||||
*
|
||||
* @param trie the trie
|
||||
* @return the number of bits in a trie data value
|
||||
* @see ucptrie_openFromBinary
|
||||
* @see UCPTRIE_VALUE_BITS_ANY
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI UCPTrieValueWidth U_EXPORT2
|
||||
ucptrie_getValueWidth(const UCPTrie *trie);
|
||||
|
||||
/**
|
||||
* Returns the value for a code point as stored in the trie, with range checking.
|
||||
* Returns the trie error value if c is not in the range 0..U+10FFFF.
|
||||
*
|
||||
* Easier to use than UCPTRIE_FAST_GET() and similar macros but slower.
|
||||
* Easier to use because, unlike the macros, this function works on all UCPTrie
|
||||
* objects, for all types and value widths.
|
||||
*
|
||||
* @param trie the trie
|
||||
* @param c the code point
|
||||
* @return the trie value,
|
||||
* or the trie error value if the code point is not in the range 0..U+10FFFF
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
ucptrie_get(const UCPTrie *trie, UChar32 c);
|
||||
|
||||
/**
|
||||
* Returns the last code point such that all those from start to there have the same value.
|
||||
* Can be used to efficiently iterate over all same-value ranges in a trie.
|
||||
* (This is normally faster than iterating over code points and get()ting each value,
|
||||
* but much slower than a data structure that stores ranges directly.)
|
||||
*
|
||||
* If the UCPMapValueFilter function pointer is not NULL, then
|
||||
* the value to be delivered is passed through that function, and the return value is the end
|
||||
* of the range where all values are modified to the same actual value.
|
||||
* The value is unchanged if that function pointer is NULL.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* UChar32 start = 0, end;
|
||||
* uint32_t value;
|
||||
* while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
|
||||
* NULL, NULL, &value)) >= 0) {
|
||||
* // Work with the range start..end and its value.
|
||||
* start = end + 1;
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* @param trie the trie
|
||||
* @param start range start
|
||||
* @param option defines whether surrogates are treated normally,
|
||||
* or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
|
||||
* @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
|
||||
* @param filter a pointer to a function that may modify the trie data value,
|
||||
* or NULL if the values from the trie are to be used unmodified
|
||||
* @param context an opaque pointer that is passed on to the filter function
|
||||
* @param pValue if not NULL, receives the value that every code point start..end has;
|
||||
* may have been modified by filter(context, trie value)
|
||||
* if that function pointer is not NULL
|
||||
* @return the range end code point, or -1 if start is not a valid code point
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
ucptrie_getRange(const UCPTrie *trie, UChar32 start,
|
||||
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
|
||||
|
||||
/**
|
||||
* Writes a memory-mappable form of the trie into 32-bit aligned memory.
|
||||
* Inverse of ucptrie_openFromBinary().
|
||||
*
|
||||
* @param trie the trie
|
||||
* @param data a pointer to 32-bit-aligned memory to be filled with the trie data;
|
||||
* can be NULL if capacity==0
|
||||
* @param capacity the number of bytes available at data, or 0 for pure preflighting
|
||||
* @param pErrorCode an in/out ICU UErrorCode;
|
||||
* U_BUFFER_OVERFLOW_ERROR if the capacity is too small
|
||||
* @return the number of bytes written or (if buffer overflow) needed for the trie
|
||||
*
|
||||
* @see ucptrie_openFromBinary()
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Macro parameter value for a trie with 16-bit data values.
|
||||
* Use the name of this macro as a "dataAccess" parameter in other macros.
|
||||
* Do not use this macro in any other way.
|
||||
*
|
||||
* @see UCPTRIE_VALUE_BITS_16
|
||||
* @stable ICU 63
|
||||
*/
|
||||
#define UCPTRIE_16(trie, i) ((trie)->data.ptr16[i])
|
||||
|
||||
/**
|
||||
* Macro parameter value for a trie with 32-bit data values.
|
||||
* Use the name of this macro as a "dataAccess" parameter in other macros.
|
||||
* Do not use this macro in any other way.
|
||||
*
|
||||
* @see UCPTRIE_VALUE_BITS_32
|
||||
* @stable ICU 63
|
||||
*/
|
||||
#define UCPTRIE_32(trie, i) ((trie)->data.ptr32[i])
|
||||
|
||||
/**
|
||||
* Macro parameter value for a trie with 8-bit data values.
|
||||
* Use the name of this macro as a "dataAccess" parameter in other macros.
|
||||
* Do not use this macro in any other way.
|
||||
*
|
||||
* @see UCPTRIE_VALUE_BITS_8
|
||||
* @stable ICU 63
|
||||
*/
|
||||
#define UCPTRIE_8(trie, i) ((trie)->data.ptr8[i])
|
||||
|
||||
/**
|
||||
* Returns a trie value for a code point, with range checking.
|
||||
* Returns the trie error value if c is not in the range 0..U+10FFFF.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param c (UChar32, in) the input code point
|
||||
* @return The code point's trie value.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
#define UCPTRIE_FAST_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c))
|
||||
|
||||
/**
|
||||
* Returns a 16-bit trie value for a code point, with range checking.
|
||||
* Returns the trie error value if c is not in the range U+0000..U+10FFFF.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_SMALL
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param c (UChar32, in) the input code point
|
||||
* @return The code point's trie value.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
#define UCPTRIE_SMALL_GET(trie, dataAccess, c) \
|
||||
dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c))
|
||||
|
||||
/**
|
||||
* UTF-16: Reads the next code point (UChar32 c, out), post-increments src,
|
||||
* and gets a value from the trie.
|
||||
* Sets the trie error value if c is an unpaired surrogate.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param src (const UChar *, in/out) the source text pointer
|
||||
* @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
|
||||
* @param c (UChar32, out) variable for the code point
|
||||
* @param result (out) variable for the trie lookup result
|
||||
* @stable ICU 63
|
||||
*/
|
||||
#define UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c) = *(src)++; \
|
||||
int32_t __index; \
|
||||
if (!U16_IS_SURROGATE(c)) { \
|
||||
__index = _UCPTRIE_FAST_INDEX(trie, c); \
|
||||
} else { \
|
||||
uint16_t __c2; \
|
||||
if (U16_IS_SURROGATE_LEAD(c) && (src) != (limit) && U16_IS_TRAIL(__c2 = *(src))) { \
|
||||
++(src); \
|
||||
(c) = U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
__index = _UCPTRIE_SMALL_INDEX(trie, c); \
|
||||
} else { \
|
||||
__index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \
|
||||
} \
|
||||
} \
|
||||
(result) = dataAccess(trie, __index); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src,
|
||||
* and gets a value from the trie.
|
||||
* Sets the trie error value if c is an unpaired surrogate.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param start (const UChar *, in) the start pointer for the text
|
||||
* @param src (const UChar *, in/out) the source text pointer
|
||||
* @param c (UChar32, out) variable for the code point
|
||||
* @param result (out) variable for the trie lookup result
|
||||
* @stable ICU 63
|
||||
*/
|
||||
#define UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c) = *--(src); \
|
||||
int32_t __index; \
|
||||
if (!U16_IS_SURROGATE(c)) { \
|
||||
__index = _UCPTRIE_FAST_INDEX(trie, c); \
|
||||
} else { \
|
||||
uint16_t __c2; \
|
||||
if (U16_IS_SURROGATE_TRAIL(c) && (src) != (start) && U16_IS_LEAD(__c2 = *((src) - 1))) { \
|
||||
--(src); \
|
||||
(c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
__index = _UCPTRIE_SMALL_INDEX(trie, c); \
|
||||
} else { \
|
||||
__index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \
|
||||
} \
|
||||
} \
|
||||
(result) = dataAccess(trie, __index); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* UTF-8: Post-increments src and gets a value from the trie.
|
||||
* Sets the trie error value for an ill-formed byte sequence.
|
||||
*
|
||||
* Unlike UCPTRIE_FAST_U16_NEXT() this UTF-8 macro does not provide the code point
|
||||
* because it would be more work to do so and is often not needed.
|
||||
* If the trie value differs from the error value, then the byte sequence is well-formed,
|
||||
* and the code point can be assembled without revalidation.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param src (const char *, in/out) the source text pointer
|
||||
* @param limit (const char *, in) the limit pointer for the text (must not be NULL)
|
||||
* @param result (out) variable for the trie lookup result
|
||||
* @stable ICU 63
|
||||
*/
|
||||
#define UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __lead = (uint8_t)*(src)++; \
|
||||
if (!U8_IS_SINGLE(__lead)) { \
|
||||
uint8_t __t1, __t2, __t3; \
|
||||
if ((src) != (limit) && \
|
||||
(__lead >= 0xe0 ? \
|
||||
__lead < 0xf0 ? /* U+0800..U+FFFF except surrogates */ \
|
||||
U8_LEAD3_T1_BITS[__lead &= 0xf] & (1 << ((__t1 = *(src)) >> 5)) && \
|
||||
++(src) != (limit) && (__t2 = *(src) - 0x80) <= 0x3f && \
|
||||
(__lead = ((int32_t)(trie)->index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) \
|
||||
: /* U+10000..U+10FFFF */ \
|
||||
(__lead -= 0xf0) <= 4 && \
|
||||
U8_LEAD4_T1_BITS[(__t1 = *(src)) >> 4] & (1 << __lead) && \
|
||||
(__lead = (__lead << 6) | (__t1 & 0x3f), ++(src) != (limit)) && \
|
||||
(__t2 = *(src) - 0x80) <= 0x3f && \
|
||||
++(src) != (limit) && (__t3 = *(src) - 0x80) <= 0x3f && \
|
||||
(__lead = __lead >= (trie)->shifted12HighStart ? \
|
||||
(trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \
|
||||
ucptrie_internalSmallU8Index((trie), __lead, __t2, __t3), 1) \
|
||||
: /* U+0080..U+07FF */ \
|
||||
__lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && \
|
||||
(__lead = (int32_t)(trie)->index[__lead & 0x1f] + __t1, 1))) { \
|
||||
++(src); \
|
||||
} else { \
|
||||
__lead = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; /* ill-formed*/ \
|
||||
} \
|
||||
} \
|
||||
(result) = dataAccess(trie, __lead); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* UTF-8: Pre-decrements src and gets a value from the trie.
|
||||
* Sets the trie error value for an ill-formed byte sequence.
|
||||
*
|
||||
* Unlike UCPTRIE_FAST_U16_PREV() this UTF-8 macro does not provide the code point
|
||||
* because it would be more work to do so and is often not needed.
|
||||
* If the trie value differs from the error value, then the byte sequence is well-formed,
|
||||
* and the code point can be assembled without revalidation.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param start (const char *, in) the start pointer for the text
|
||||
* @param src (const char *, in/out) the source text pointer
|
||||
* @param result (out) variable for the trie lookup result
|
||||
* @stable ICU 63
|
||||
*/
|
||||
#define UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __index = (uint8_t)*--(src); \
|
||||
if (!U8_IS_SINGLE(__index)) { \
|
||||
__index = ucptrie_internalU8PrevIndex((trie), __index, (const uint8_t *)(start), \
|
||||
(const uint8_t *)(src)); \
|
||||
(src) -= __index & 7; \
|
||||
__index >>= 3; \
|
||||
} \
|
||||
(result) = dataAccess(trie, __index); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Returns a trie value for an ASCII code point, without range checking.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie (of either fast or small type)
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param c (UChar32, in) the input code point; must be U+0000..U+007F
|
||||
* @return The ASCII code point's trie value.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
#define UCPTRIE_ASCII_GET(trie, dataAccess, c) dataAccess(trie, c)
|
||||
|
||||
/**
|
||||
* Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking.
|
||||
* Can be used to look up a value for a UTF-16 code unit if other parts of
|
||||
* the string processing check for surrogates.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param c (UChar32, in) the input code point, must be U+0000..U+FFFF
|
||||
* @return The BMP code point's trie value.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
#define UCPTRIE_FAST_BMP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c))
|
||||
|
||||
/**
|
||||
* Returns a trie value for a supplementary code point (U+10000..U+10FFFF),
|
||||
* without range checking.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param c (UChar32, in) the input code point, must be U+10000..U+10FFFF
|
||||
* @return The supplementary code point's trie value.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
#define UCPTRIE_FAST_SUPP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c))
|
||||
|
||||
/* Internal definitions ----------------------------------------------------- */
|
||||
|
||||
#ifndef U_IN_DOXYGEN
|
||||
|
||||
/**
|
||||
* Internal implementation constants.
|
||||
* These are needed for the API macros, but users should not use these directly.
|
||||
* @internal
|
||||
*/
|
||||
enum {
|
||||
/** @internal */
|
||||
UCPTRIE_FAST_SHIFT = 6,
|
||||
|
||||
/** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */
|
||||
UCPTRIE_FAST_DATA_BLOCK_LENGTH = 1 << UCPTRIE_FAST_SHIFT,
|
||||
|
||||
/** Mask for getting the lower bits for the in-fast-data-block offset. @internal */
|
||||
UCPTRIE_FAST_DATA_MASK = UCPTRIE_FAST_DATA_BLOCK_LENGTH - 1,
|
||||
|
||||
/** @internal */
|
||||
UCPTRIE_SMALL_MAX = 0xfff,
|
||||
|
||||
/**
|
||||
* Offset from dataLength (to be subtracted) for fetching the
|
||||
* value returned for out-of-range code points and ill-formed UTF-8/16.
|
||||
* @internal
|
||||
*/
|
||||
UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET = 1,
|
||||
/**
|
||||
* Offset from dataLength (to be subtracted) for fetching the
|
||||
* value returned for code points highStart..U+10FFFF.
|
||||
* @internal
|
||||
*/
|
||||
UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET = 2
|
||||
};
|
||||
|
||||
/* Internal functions and macros -------------------------------------------- */
|
||||
// Do not conditionalize with #ifndef U_HIDE_INTERNAL_API, needed for public API
|
||||
|
||||
/** @internal */
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c);
|
||||
|
||||
/** @internal */
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3);
|
||||
|
||||
/**
|
||||
* Internal function for part of the UCPTRIE_FAST_U8_PREVxx() macro implementations.
|
||||
* Do not call directly.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c,
|
||||
const uint8_t *start, const uint8_t *src);
|
||||
|
||||
/** Internal trie getter for a code point below the fast limit. Returns the data index. @internal */
|
||||
#define _UCPTRIE_FAST_INDEX(trie, c) \
|
||||
((int32_t)(trie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK))
|
||||
|
||||
/** Internal trie getter for a code point at or above the fast limit. Returns the data index. @internal */
|
||||
#define _UCPTRIE_SMALL_INDEX(trie, c) \
|
||||
((c) >= (trie)->highStart ? \
|
||||
(trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \
|
||||
ucptrie_internalSmallIndex(trie, c))
|
||||
|
||||
/**
|
||||
* Internal trie getter for a code point, with checking that c is in U+0000..10FFFF.
|
||||
* Returns the data index.
|
||||
* @internal
|
||||
*/
|
||||
#define _UCPTRIE_CP_INDEX(trie, fastMax, c) \
|
||||
((uint32_t)(c) <= (uint32_t)(fastMax) ? \
|
||||
_UCPTRIE_FAST_INDEX(trie, c) : \
|
||||
(uint32_t)(c) <= 0x10ffff ? \
|
||||
_UCPTRIE_SMALL_INDEX(trie, c) : \
|
||||
(trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET)
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif // U_IN_DOXYGEN
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUCPTriePointer
|
||||
* "Smart pointer" class, closes a UCPTrie via ucptrie_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUCPTriePointer, UCPTrie, ucptrie_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#endif
|
||||
466
thirdparty/icu4c/common/unicode/ucurr.h
vendored
Normal file
466
thirdparty/icu4c/common/unicode/ucurr.h
vendored
Normal file
@@ -0,0 +1,466 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2002-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef _UCURR_H_
|
||||
#define _UCURR_H_
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uenum.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Encapsulates information about a currency.
|
||||
*
|
||||
* The ucurr API encapsulates information about a currency, as defined by
|
||||
* ISO 4217. A currency is represented by a 3-character string
|
||||
* containing its ISO 4217 code. This API can return various data
|
||||
* necessary the proper display of a currency:
|
||||
*
|
||||
* <ul><li>A display symbol, for a specific locale
|
||||
* <li>The number of fraction digits to display
|
||||
* <li>A rounding increment
|
||||
* </ul>
|
||||
*
|
||||
* The <tt>DecimalFormat</tt> class uses these data to display
|
||||
* currencies.
|
||||
* @author Alan Liu
|
||||
* @since ICU 2.2
|
||||
*/
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
/**
|
||||
* Currency Usage used for Decimal Format
|
||||
* @stable ICU 54
|
||||
*/
|
||||
enum UCurrencyUsage {
|
||||
/**
|
||||
* a setting to specify currency usage which determines currency digit
|
||||
* and rounding for standard usage, for example: "50.00 NT$"
|
||||
* used as DEFAULT value
|
||||
* @stable ICU 54
|
||||
*/
|
||||
UCURR_USAGE_STANDARD=0,
|
||||
/**
|
||||
* a setting to specify currency usage which determines currency digit
|
||||
* and rounding for cash usage, for example: "50 NT$"
|
||||
* @stable ICU 54
|
||||
*/
|
||||
UCURR_USAGE_CASH=1,
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One higher than the last enum UCurrencyUsage constant.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UCURR_USAGE_COUNT=2
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
};
|
||||
/** Currency Usage used for Decimal Format */
|
||||
typedef enum UCurrencyUsage UCurrencyUsage;
|
||||
|
||||
/**
|
||||
* Finds a currency code for the given locale.
|
||||
* @param locale the locale for which to retrieve a currency code.
|
||||
* Currency can be specified by the "currency" keyword
|
||||
* in which case it overrides the default currency code
|
||||
* @param buff fill in buffer. Can be NULL for preflighting.
|
||||
* @param buffCapacity capacity of the fill in buffer. Can be 0 for
|
||||
* preflighting. If it is non-zero, the buff parameter
|
||||
* must not be NULL.
|
||||
* @param ec error code
|
||||
* @return length of the currency string. It should always be 3. If 0,
|
||||
* currency couldn't be found or the input values are
|
||||
* invalid.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucurr_forLocale(const char* locale,
|
||||
UChar* buff,
|
||||
int32_t buffCapacity,
|
||||
UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Selector constants for ucurr_getName().
|
||||
*
|
||||
* @see ucurr_getName
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
typedef enum UCurrNameStyle {
|
||||
/**
|
||||
* Selector for ucurr_getName indicating a symbolic name for a
|
||||
* currency, such as "$" for USD.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
UCURR_SYMBOL_NAME,
|
||||
|
||||
/**
|
||||
* Selector for ucurr_getName indicating the long name for a
|
||||
* currency, such as "US Dollar" for USD.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
UCURR_LONG_NAME,
|
||||
|
||||
/**
|
||||
* Selector for getName() indicating the narrow currency symbol.
|
||||
* The narrow currency symbol is similar to the regular currency
|
||||
* symbol, but it always takes the shortest form: for example,
|
||||
* "$" instead of "US$" for USD in en-CA.
|
||||
*
|
||||
* @stable ICU 61
|
||||
*/
|
||||
UCURR_NARROW_SYMBOL_NAME,
|
||||
|
||||
/**
|
||||
* Selector for getName() indicating the formal currency symbol.
|
||||
* The formal currency symbol is similar to the regular currency
|
||||
* symbol, but it always takes the form used in formal settings
|
||||
* such as banking; for example, "NT$" instead of "$" for TWD in zh-TW.
|
||||
*
|
||||
* @stable ICU 68
|
||||
*/
|
||||
UCURR_FORMAL_SYMBOL_NAME,
|
||||
|
||||
/**
|
||||
* Selector for getName() indicating the variant currency symbol.
|
||||
* The variant symbol for a currency is an alternative symbol
|
||||
* that is not necessarily as widely used as the regular symbol.
|
||||
*
|
||||
* @stable ICU 68
|
||||
*/
|
||||
UCURR_VARIANT_SYMBOL_NAME
|
||||
|
||||
} UCurrNameStyle;
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
/**
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
typedef const void* UCurrRegistryKey;
|
||||
|
||||
/**
|
||||
* Register an (existing) ISO 4217 currency code for the given locale.
|
||||
* Only the country code and the two variants EURO and PRE_EURO are
|
||||
* recognized.
|
||||
* @param isoCode the three-letter ISO 4217 currency code
|
||||
* @param locale the locale for which to register this currency code
|
||||
* @param status the in/out status code
|
||||
* @return a registry key that can be used to unregister this currency code, or NULL
|
||||
* if there was an error.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI UCurrRegistryKey U_EXPORT2
|
||||
ucurr_register(const UChar* isoCode,
|
||||
const char* locale,
|
||||
UErrorCode* status);
|
||||
/**
|
||||
* Unregister the previously-registered currency definitions using the
|
||||
* URegistryKey returned from ucurr_register. Key becomes invalid after
|
||||
* a successful call and should not be used again. Any currency
|
||||
* that might have been hidden by the original ucurr_register call is
|
||||
* restored.
|
||||
* @param key the registry key returned by a previous call to ucurr_register
|
||||
* @param status the in/out status code, no special meanings are assigned
|
||||
* @return true if the currency for this key was successfully unregistered
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ucurr_unregister(UCurrRegistryKey key, UErrorCode* status);
|
||||
#endif /* UCONFIG_NO_SERVICE */
|
||||
|
||||
/**
|
||||
* Returns the display name for the given currency in the
|
||||
* given locale. For example, the display name for the USD
|
||||
* currency object in the en_US locale is "$".
|
||||
* @param currency null-terminated 3-letter ISO 4217 code
|
||||
* @param locale locale in which to display currency
|
||||
* @param nameStyle selector for which kind of name to return
|
||||
* @param isChoiceFormat always set to false, or can be NULL;
|
||||
* display names are static strings;
|
||||
* since ICU 4.4, ChoiceFormat patterns are no longer supported
|
||||
* @param len fill-in parameter to receive length of result
|
||||
* @param ec error code
|
||||
* @return pointer to display string of 'len' UChars. If the resource
|
||||
* data contains no entry for 'currency', then 'currency' itself is
|
||||
* returned.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI const UChar* U_EXPORT2
|
||||
ucurr_getName(const UChar* currency,
|
||||
const char* locale,
|
||||
UCurrNameStyle nameStyle,
|
||||
UBool* isChoiceFormat,
|
||||
int32_t* len,
|
||||
UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Returns the plural name for the given currency in the
|
||||
* given locale. For example, the plural name for the USD
|
||||
* currency object in the en_US locale is "US dollar" or "US dollars".
|
||||
* @param currency null-terminated 3-letter ISO 4217 code
|
||||
* @param locale locale in which to display currency
|
||||
* @param isChoiceFormat always set to false, or can be NULL;
|
||||
* display names are static strings;
|
||||
* since ICU 4.4, ChoiceFormat patterns are no longer supported
|
||||
* @param pluralCount plural count
|
||||
* @param len fill-in parameter to receive length of result
|
||||
* @param ec error code
|
||||
* @return pointer to display string of 'len' UChars. If the resource
|
||||
* data contains no entry for 'currency', then 'currency' itself is
|
||||
* returned.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
U_CAPI const UChar* U_EXPORT2
|
||||
ucurr_getPluralName(const UChar* currency,
|
||||
const char* locale,
|
||||
UBool* isChoiceFormat,
|
||||
const char* pluralCount,
|
||||
int32_t* len,
|
||||
UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Returns the number of the number of fraction digits that should
|
||||
* be displayed for the given currency.
|
||||
* This is equivalent to ucurr_getDefaultFractionDigitsForUsage(currency,UCURR_USAGE_STANDARD,ec);
|
||||
*
|
||||
* Important: The number of fraction digits for a given currency is NOT
|
||||
* guaranteed to be constant across versions of ICU or CLDR. For example,
|
||||
* do NOT use this value as a mechanism for deciding the magnitude used
|
||||
* to store currency values in a database. You should use this value for
|
||||
* display purposes only.
|
||||
*
|
||||
* @param currency null-terminated 3-letter ISO 4217 code
|
||||
* @param ec input-output error code
|
||||
* @return a non-negative number of fraction digits to be
|
||||
* displayed, or 0 if there is an error
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucurr_getDefaultFractionDigits(const UChar* currency,
|
||||
UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Returns the number of the number of fraction digits that should
|
||||
* be displayed for the given currency with usage.
|
||||
*
|
||||
* Important: The number of fraction digits for a given currency is NOT
|
||||
* guaranteed to be constant across versions of ICU or CLDR. For example,
|
||||
* do NOT use this value as a mechanism for deciding the magnitude used
|
||||
* to store currency values in a database. You should use this value for
|
||||
* display purposes only.
|
||||
*
|
||||
* @param currency null-terminated 3-letter ISO 4217 code
|
||||
* @param usage enum usage for the currency
|
||||
* @param ec input-output error code
|
||||
* @return a non-negative number of fraction digits to be
|
||||
* displayed, or 0 if there is an error
|
||||
* @stable ICU 54
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucurr_getDefaultFractionDigitsForUsage(const UChar* currency,
|
||||
const UCurrencyUsage usage,
|
||||
UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Returns the rounding increment for the given currency, or 0.0 if no
|
||||
* rounding is done by the currency.
|
||||
* This is equivalent to ucurr_getRoundingIncrementForUsage(currency,UCURR_USAGE_STANDARD,ec);
|
||||
* @param currency null-terminated 3-letter ISO 4217 code
|
||||
* @param ec input-output error code
|
||||
* @return the non-negative rounding increment, or 0.0 if none,
|
||||
* or 0.0 if there is an error
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
U_CAPI double U_EXPORT2
|
||||
ucurr_getRoundingIncrement(const UChar* currency,
|
||||
UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Returns the rounding increment for the given currency, or 0.0 if no
|
||||
* rounding is done by the currency given usage.
|
||||
* @param currency null-terminated 3-letter ISO 4217 code
|
||||
* @param usage enum usage for the currency
|
||||
* @param ec input-output error code
|
||||
* @return the non-negative rounding increment, or 0.0 if none,
|
||||
* or 0.0 if there is an error
|
||||
* @stable ICU 54
|
||||
*/
|
||||
U_CAPI double U_EXPORT2
|
||||
ucurr_getRoundingIncrementForUsage(const UChar* currency,
|
||||
const UCurrencyUsage usage,
|
||||
UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Selector constants for ucurr_openCurrencies().
|
||||
*
|
||||
* @see ucurr_openCurrencies
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
typedef enum UCurrCurrencyType {
|
||||
/**
|
||||
* Select all ISO-4217 currency codes.
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
UCURR_ALL = INT32_MAX,
|
||||
/**
|
||||
* Select only ISO-4217 commonly used currency codes.
|
||||
* These currencies can be found in common use, and they usually have
|
||||
* bank notes or coins associated with the currency code.
|
||||
* This does not include fund codes, precious metals and other
|
||||
* various ISO-4217 codes limited to special financial products.
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
UCURR_COMMON = 1,
|
||||
/**
|
||||
* Select ISO-4217 uncommon currency codes.
|
||||
* These codes respresent fund codes, precious metals and other
|
||||
* various ISO-4217 codes limited to special financial products.
|
||||
* A fund code is a monetary resource associated with a currency.
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
UCURR_UNCOMMON = 2,
|
||||
/**
|
||||
* Select only deprecated ISO-4217 codes.
|
||||
* These codes are no longer in general public use.
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
UCURR_DEPRECATED = 4,
|
||||
/**
|
||||
* Select only non-deprecated ISO-4217 codes.
|
||||
* These codes are in general public use.
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
UCURR_NON_DEPRECATED = 8
|
||||
} UCurrCurrencyType;
|
||||
|
||||
/**
|
||||
* Provides a UEnumeration object for listing ISO-4217 codes.
|
||||
* @param currType You can use one of several UCurrCurrencyType values for this
|
||||
* variable. You can also | (or) them together to get a specific list of
|
||||
* currencies. Most people will want to use the (UCURR_COMMON|UCURR_NON_DEPRECATED) value to
|
||||
* get a list of current currencies.
|
||||
* @param pErrorCode Error code
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
U_CAPI UEnumeration * U_EXPORT2
|
||||
ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Queries if the given ISO 4217 3-letter code is available on the specified date range.
|
||||
*
|
||||
* Note: For checking availability of a currency on a specific date, specify the date on both 'from' and 'to'
|
||||
*
|
||||
* When 'from' is U_DATE_MIN and 'to' is U_DATE_MAX, this method checks if the specified currency is available any time.
|
||||
* If 'from' and 'to' are same UDate value, this method checks if the specified currency is available on that date.
|
||||
*
|
||||
* @param isoCode
|
||||
* The ISO 4217 3-letter code.
|
||||
*
|
||||
* @param from
|
||||
* The lower bound of the date range, inclusive. When 'from' is U_DATE_MIN, check the availability
|
||||
* of the currency any date before 'to'
|
||||
*
|
||||
* @param to
|
||||
* The upper bound of the date range, inclusive. When 'to' is U_DATE_MAX, check the availability of
|
||||
* the currency any date after 'from'
|
||||
*
|
||||
* @param errorCode
|
||||
* ICU error code
|
||||
*
|
||||
* @return true if the given ISO 4217 3-letter code is supported on the specified date range.
|
||||
*
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ucurr_isAvailable(const UChar* isoCode,
|
||||
UDate from,
|
||||
UDate to,
|
||||
UErrorCode* errorCode);
|
||||
|
||||
/**
|
||||
* Finds the number of valid currency codes for the
|
||||
* given locale and date.
|
||||
* @param locale the locale for which to retrieve the
|
||||
* currency count.
|
||||
* @param date the date for which to retrieve the
|
||||
* currency count for the given locale.
|
||||
* @param ec error code
|
||||
* @return the number of currency codes for the
|
||||
* given locale and date. If 0, currency
|
||||
* codes couldn't be found for the input
|
||||
* values are invalid.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucurr_countCurrencies(const char* locale,
|
||||
UDate date,
|
||||
UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Finds a currency code for the given locale and date
|
||||
* @param locale the locale for which to retrieve a currency code.
|
||||
* Currency can be specified by the "currency" keyword
|
||||
* in which case it overrides the default currency code
|
||||
* @param date the date for which to retrieve a currency code for
|
||||
* the given locale.
|
||||
* @param index the index within the available list of currency codes
|
||||
* for the given locale on the given date.
|
||||
* @param buff fill in buffer. Can be NULL for preflighting.
|
||||
* @param buffCapacity capacity of the fill in buffer. Can be 0 for
|
||||
* preflighting. If it is non-zero, the buff parameter
|
||||
* must not be NULL.
|
||||
* @param ec error code
|
||||
* @return length of the currency string. It should always be 3.
|
||||
* If 0, currency couldn't be found or the input values are
|
||||
* invalid.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucurr_forLocaleAndDate(const char* locale,
|
||||
UDate date,
|
||||
int32_t index,
|
||||
UChar* buff,
|
||||
int32_t buffCapacity,
|
||||
UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Given a key and a locale, returns an array of string values in a preferred
|
||||
* order that would make a difference. These are all and only those values where
|
||||
* the open (creation) of the service with the locale formed from the input locale
|
||||
* plus input keyword and that value has different behavior than creation with the
|
||||
* input locale alone.
|
||||
* @param key one of the keys supported by this service. For now, only
|
||||
* "currency" is supported.
|
||||
* @param locale the locale
|
||||
* @param commonlyUsed if set to true it will return only commonly used values
|
||||
* with the given locale in preferred order. Otherwise,
|
||||
* it will return all the available values for the locale.
|
||||
* @param status error status
|
||||
* @return a string enumeration over keyword values for the given key and the locale.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
ucurr_getKeywordValuesForLocale(const char* key,
|
||||
const char* locale,
|
||||
UBool commonlyUsed,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Returns the ISO 4217 numeric code for the currency.
|
||||
* <p>Note: If the ISO 4217 numeric code is not assigned for the currency or
|
||||
* the currency is unknown, this function returns 0.
|
||||
*
|
||||
* @param currency null-terminated 3-letter ISO 4217 code
|
||||
* @return The ISO 4217 numeric code of the currency
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucurr_getNumericCode(const UChar* currency);
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif
|
||||
440
thirdparty/icu4c/common/unicode/udata.h
vendored
Normal file
440
thirdparty/icu4c/common/unicode/udata.h
vendored
Normal file
@@ -0,0 +1,440 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: udata.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999oct25
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UDATA_H__
|
||||
#define __UDATA_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Data loading interface
|
||||
*
|
||||
* <h2>Information about data loading interface</h2>
|
||||
*
|
||||
* This API is used to find and efficiently load data for ICU and applications
|
||||
* using ICU. It provides an abstract interface that specifies a data type and
|
||||
* name to find and load the data. Normally this API is used by other ICU APIs
|
||||
* to load required data out of the ICU data library, but it can be used to
|
||||
* load data out of other places.
|
||||
*
|
||||
* See the User Guide Data Management chapter.
|
||||
*/
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Character used to separate package names from tree names
|
||||
* @internal ICU 3.0
|
||||
*/
|
||||
#define U_TREE_SEPARATOR '-'
|
||||
|
||||
/**
|
||||
* String used to separate package names from tree names
|
||||
* @internal ICU 3.0
|
||||
*/
|
||||
#define U_TREE_SEPARATOR_STRING "-"
|
||||
|
||||
/**
|
||||
* Character used to separate parts of entry names
|
||||
* @internal ICU 3.0
|
||||
*/
|
||||
#define U_TREE_ENTRY_SEP_CHAR '/'
|
||||
|
||||
/**
|
||||
* String used to separate parts of entry names
|
||||
* @internal ICU 3.0
|
||||
*/
|
||||
#define U_TREE_ENTRY_SEP_STRING "/"
|
||||
|
||||
/**
|
||||
* Alias for standard ICU data
|
||||
* @internal ICU 3.0
|
||||
*/
|
||||
#define U_ICUDATA_ALIAS "ICUDATA"
|
||||
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* UDataInfo contains the properties about the requested data.
|
||||
* This is meta data.
|
||||
*
|
||||
* <p>This structure may grow in the future, indicated by the
|
||||
* <code>size</code> field.</p>
|
||||
*
|
||||
* <p>ICU data must be at least 8-aligned, and should be 16-aligned.
|
||||
* The UDataInfo struct begins 4 bytes after the start of the data item,
|
||||
* so it is 4-aligned.
|
||||
*
|
||||
* <p>The platform data property fields help determine if a data
|
||||
* file can be efficiently used on a given machine.
|
||||
* The particular fields are of importance only if the data
|
||||
* is affected by the properties - if there is integer data
|
||||
* with word sizes > 1 byte, char* text, or UChar* text.</p>
|
||||
*
|
||||
* <p>The implementation for the <code>udata_open[Choice]()</code>
|
||||
* functions may reject data based on the value in <code>isBigEndian</code>.
|
||||
* No other field is used by the <code>udata</code> API implementation.</p>
|
||||
*
|
||||
* <p>The <code>dataFormat</code> may be used to identify
|
||||
* the kind of data, e.g. a converter table.</p>
|
||||
*
|
||||
* <p>The <code>formatVersion</code> field should be used to
|
||||
* make sure that the format can be interpreted.
|
||||
* It may be a good idea to check only for the one or two highest
|
||||
* of the version elements to allow the data memory to
|
||||
* get more or somewhat rearranged contents, for as long
|
||||
* as the using code can still interpret the older contents.</p>
|
||||
*
|
||||
* <p>The <code>dataVersion</code> field is intended to be a
|
||||
* common place to store the source version of the data;
|
||||
* for data from the Unicode character database, this could
|
||||
* reflect the Unicode version.</p>
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct {
|
||||
/** sizeof(UDataInfo)
|
||||
* @stable ICU 2.0 */
|
||||
uint16_t size;
|
||||
|
||||
/** unused, set to 0
|
||||
* @stable ICU 2.0*/
|
||||
uint16_t reservedWord;
|
||||
|
||||
/* platform data properties */
|
||||
/** 0 for little-endian machine, 1 for big-endian
|
||||
* @stable ICU 2.0 */
|
||||
uint8_t isBigEndian;
|
||||
|
||||
/** see U_CHARSET_FAMILY values in utypes.h
|
||||
* @stable ICU 2.0*/
|
||||
uint8_t charsetFamily;
|
||||
|
||||
/** sizeof(UChar), one of { 1, 2, 4 }
|
||||
* @stable ICU 2.0*/
|
||||
uint8_t sizeofUChar;
|
||||
|
||||
/** unused, set to 0
|
||||
* @stable ICU 2.0*/
|
||||
uint8_t reservedByte;
|
||||
|
||||
/** data format identifier
|
||||
* @stable ICU 2.0*/
|
||||
uint8_t dataFormat[4];
|
||||
|
||||
/** versions: [0] major [1] minor [2] milli [3] micro
|
||||
* @stable ICU 2.0*/
|
||||
uint8_t formatVersion[4];
|
||||
|
||||
/** versions: [0] major [1] minor [2] milli [3] micro
|
||||
* @stable ICU 2.0*/
|
||||
uint8_t dataVersion[4];
|
||||
} UDataInfo;
|
||||
|
||||
/* API for reading data -----------------------------------------------------*/
|
||||
|
||||
/**
|
||||
* Forward declaration of the data memory type.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct UDataMemory UDataMemory;
|
||||
|
||||
/**
|
||||
* Callback function for udata_openChoice().
|
||||
* @param context parameter passed into <code>udata_openChoice()</code>.
|
||||
* @param type The type of the data as passed into <code>udata_openChoice()</code>.
|
||||
* It may be <code>NULL</code>.
|
||||
* @param name The name of the data as passed into <code>udata_openChoice()</code>.
|
||||
* @param pInfo A pointer to the <code>UDataInfo</code> structure
|
||||
* of data that has been loaded and will be returned
|
||||
* by <code>udata_openChoice()</code> if this function
|
||||
* returns <code>true</code>.
|
||||
* @return true if the current data memory is acceptable
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef UBool U_CALLCONV
|
||||
UDataMemoryIsAcceptable(void *context,
|
||||
const char *type, const char *name,
|
||||
const UDataInfo *pInfo);
|
||||
|
||||
|
||||
/**
|
||||
* Convenience function.
|
||||
* This function works the same as <code>udata_openChoice</code>
|
||||
* except that any data that matches the type and name
|
||||
* is assumed to be acceptable.
|
||||
* @param path Specifies an absolute path and/or a basename for the
|
||||
* finding of the data in the file system.
|
||||
* <code>NULL</code> for ICU data.
|
||||
* @param type A string that specifies the type of data to be loaded.
|
||||
* For example, resource bundles are loaded with type "res",
|
||||
* conversion tables with type "cnv".
|
||||
* This may be <code>NULL</code> or empty.
|
||||
* @param name A string that specifies the name of the data.
|
||||
* @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
|
||||
* @return A pointer (handle) to a data memory object, or <code>NULL</code>
|
||||
* if an error occurs. Call <code>udata_getMemory()</code>
|
||||
* to get a pointer to the actual data.
|
||||
*
|
||||
* @see udata_openChoice
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UDataMemory * U_EXPORT2
|
||||
udata_open(const char *path, const char *type, const char *name,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Data loading function.
|
||||
* This function is used to find and load efficiently data for
|
||||
* ICU and applications using ICU.
|
||||
* It provides an abstract interface that allows to specify a data
|
||||
* type and name to find and load the data.
|
||||
*
|
||||
* <p>The implementation depends on platform properties and user preferences
|
||||
* and may involve loading shared libraries (DLLs), mapping
|
||||
* files into memory, or fopen()/fread() files.
|
||||
* It may also involve using static memory or database queries etc.
|
||||
* Several or all data items may be combined into one entity
|
||||
* (DLL, memory-mappable file).</p>
|
||||
*
|
||||
* <p>The data is always preceded by a header that includes
|
||||
* a <code>UDataInfo</code> structure.
|
||||
* The caller's <code>isAcceptable()</code> function is called to make
|
||||
* sure that the data is useful. It may be called several times if it
|
||||
* rejects the data and there is more than one location with data
|
||||
* matching the type and name.</p>
|
||||
*
|
||||
* <p>If <code>path==NULL</code>, then ICU data is loaded.
|
||||
* Otherwise, it is separated into a basename and a basename-less directory string.
|
||||
* The basename is used as the data package name, and the directory is
|
||||
* logically prepended to the ICU data directory string.</p>
|
||||
*
|
||||
* <p>For details about ICU data loading see the User Guide
|
||||
* Data Management chapter. (https://unicode-org.github.io/icu/userguide/icu_data/)</p>
|
||||
*
|
||||
* @param path Specifies an absolute path and/or a basename for the
|
||||
* finding of the data in the file system.
|
||||
* <code>NULL</code> for ICU data.
|
||||
* @param type A string that specifies the type of data to be loaded.
|
||||
* For example, resource bundles are loaded with type "res",
|
||||
* conversion tables with type "cnv".
|
||||
* This may be <code>NULL</code> or empty.
|
||||
* @param name A string that specifies the name of the data.
|
||||
* @param isAcceptable This function is called to verify that loaded data
|
||||
* is useful for the client code. If it returns false
|
||||
* for all data items, then <code>udata_openChoice()</code>
|
||||
* will return with an error.
|
||||
* @param context Arbitrary parameter to be passed into isAcceptable.
|
||||
* @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
|
||||
* @return A pointer (handle) to a data memory object, or <code>NULL</code>
|
||||
* if an error occurs. Call <code>udata_getMemory()</code>
|
||||
* to get a pointer to the actual data.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UDataMemory * U_EXPORT2
|
||||
udata_openChoice(const char *path, const char *type, const char *name,
|
||||
UDataMemoryIsAcceptable *isAcceptable, void *context,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Close the data memory.
|
||||
* This function must be called to allow the system to
|
||||
* release resources associated with this data memory.
|
||||
* @param pData The pointer to data memory object
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
udata_close(UDataMemory *pData);
|
||||
|
||||
/**
|
||||
* Get the pointer to the actual data inside the data memory.
|
||||
* The data is read-only.
|
||||
*
|
||||
* ICU data must be at least 8-aligned, and should be 16-aligned.
|
||||
*
|
||||
* @param pData The pointer to data memory object
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const void * U_EXPORT2
|
||||
udata_getMemory(UDataMemory *pData);
|
||||
|
||||
/**
|
||||
* Get the information from the data memory header.
|
||||
* This allows to get access to the header containing
|
||||
* platform data properties etc. which is not part of
|
||||
* the data itself and can therefore not be accessed
|
||||
* via the pointer that <code>udata_getMemory()</code> returns.
|
||||
*
|
||||
* @param pData pointer to the data memory object
|
||||
* @param pInfo pointer to a UDataInfo object;
|
||||
* its <code>size</code> field must be set correctly,
|
||||
* typically to <code>sizeof(UDataInfo)</code>.
|
||||
*
|
||||
* <code>*pInfo</code> will be filled with the UDataInfo structure
|
||||
* in the data memory object. If this structure is smaller than
|
||||
* <code>pInfo->size</code>, then the <code>size</code> will be
|
||||
* adjusted and only part of the structure will be filled.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
|
||||
|
||||
/**
|
||||
* This function bypasses the normal ICU data loading process and
|
||||
* allows you to force ICU's system data to come out of a user-specified
|
||||
* area in memory.
|
||||
*
|
||||
* ICU data must be at least 8-aligned, and should be 16-aligned.
|
||||
* See https://unicode-org.github.io/icu/userguide/icu_data
|
||||
*
|
||||
* The format of this data is that of the icu common data file, as is
|
||||
* generated by the pkgdata tool with mode=common or mode=dll.
|
||||
* You can read in a whole common mode file and pass the address to the start of the
|
||||
* data, or (with the appropriate link options) pass in the pointer to
|
||||
* the data that has been loaded from a dll by the operating system,
|
||||
* as shown in this code:
|
||||
*
|
||||
* extern const char U_IMPORT U_ICUDATA_ENTRY_POINT [];
|
||||
* // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool
|
||||
* UErrorCode status = U_ZERO_ERROR;
|
||||
*
|
||||
* udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status);
|
||||
*
|
||||
* It is important that the declaration be as above. The entry point
|
||||
* must not be declared as an extern void*.
|
||||
*
|
||||
* Starting with ICU 4.4, it is possible to set several data packages,
|
||||
* one per call to this function.
|
||||
* udata_open() will look for data in the multiple data packages in the order
|
||||
* in which they were set.
|
||||
* The position of the linked-in or default-name ICU .data package in the
|
||||
* search list depends on when the first data item is loaded that is not contained
|
||||
* in the already explicitly set packages.
|
||||
* If data was loaded implicitly before the first call to this function
|
||||
* (for example, via opening a converter, constructing a UnicodeString
|
||||
* from default-codepage data, using formatting or collation APIs, etc.),
|
||||
* then the default data will be first in the list.
|
||||
*
|
||||
* This function has no effect on application (non ICU) data. See udata_setAppData()
|
||||
* for similar functionality for application data.
|
||||
*
|
||||
* @param data pointer to ICU common data
|
||||
* @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
udata_setCommonData(const void *data, UErrorCode *err);
|
||||
|
||||
|
||||
/**
|
||||
* This function bypasses the normal ICU data loading process for application-specific
|
||||
* data and allows you to force the it to come out of a user-specified
|
||||
* pointer.
|
||||
*
|
||||
* ICU data must be at least 8-aligned, and should be 16-aligned.
|
||||
* See https://unicode-org.github.io/icu/userguide/icu_data
|
||||
*
|
||||
* The format of this data is that of the icu common data file, like 'icudt26l.dat'
|
||||
* or the corresponding shared library (DLL) file.
|
||||
* The application must read in or otherwise construct an image of the data and then
|
||||
* pass the address of it to this function.
|
||||
*
|
||||
*
|
||||
* Warning: setAppData will set a U_USING_DEFAULT_WARNING code if
|
||||
* data with the specified path that has already been opened, or
|
||||
* if setAppData with the same path has already been called.
|
||||
* Any such calls to setAppData will have no effect.
|
||||
*
|
||||
*
|
||||
* @param packageName the package name by which the application will refer
|
||||
* to (open) this data
|
||||
* @param data pointer to the data
|
||||
* @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
|
||||
* @see udata_setCommonData
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
udata_setAppData(const char *packageName, const void *data, UErrorCode *err);
|
||||
|
||||
/**
|
||||
* Possible settings for udata_setFileAccess()
|
||||
* @see udata_setFileAccess
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
typedef enum UDataFileAccess {
|
||||
/** ICU looks for data in single files first, then in packages. (default) @stable ICU 3.4 */
|
||||
UDATA_FILES_FIRST,
|
||||
/** An alias for the default access mode. @stable ICU 3.4 */
|
||||
UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST,
|
||||
/** ICU only loads data from packages, not from single files. @stable ICU 3.4 */
|
||||
UDATA_ONLY_PACKAGES,
|
||||
/** ICU loads data from packages first, and only from single files
|
||||
if the data cannot be found in a package. @stable ICU 3.4 */
|
||||
UDATA_PACKAGES_FIRST,
|
||||
/** ICU does not access the file system for data loading. @stable ICU 3.4 */
|
||||
UDATA_NO_FILES,
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Number of real UDataFileAccess values.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UDATA_FILE_ACCESS_COUNT
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
} UDataFileAccess;
|
||||
|
||||
/**
|
||||
* This function may be called to control how ICU loads data. It must be called
|
||||
* before any ICU data is loaded, including application data loaded with
|
||||
* ures/ResourceBundle or udata APIs. This function is not multithread safe.
|
||||
* The results of calling it while other threads are loading data are undefined.
|
||||
* @param access The type of file access to be used
|
||||
* @param status Error code.
|
||||
* @see UDataFileAccess
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
udata_setFileAccess(UDataFileAccess access, UErrorCode *status);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUDataMemoryPointer
|
||||
* "Smart pointer" class, closes a UDataMemory via udata_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#endif
|
||||
173
thirdparty/icu4c/common/unicode/udisplaycontext.h
vendored
Normal file
173
thirdparty/icu4c/common/unicode/udisplaycontext.h
vendored
Normal file
@@ -0,0 +1,173 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*****************************************************************************************
|
||||
* Copyright (C) 2014-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef UDISPLAYCONTEXT_H
|
||||
#define UDISPLAYCONTEXT_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Display context types (enum values)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Display context types, for getting values of a particular setting.
|
||||
* Note, the specific numeric values are internal and may change.
|
||||
* @stable ICU 51
|
||||
*/
|
||||
enum UDisplayContextType {
|
||||
/**
|
||||
* Type to retrieve the dialect handling setting, e.g.
|
||||
* UDISPCTX_STANDARD_NAMES or UDISPCTX_DIALECT_NAMES.
|
||||
* @stable ICU 51
|
||||
*/
|
||||
UDISPCTX_TYPE_DIALECT_HANDLING = 0,
|
||||
/**
|
||||
* Type to retrieve the capitalization context setting, e.g.
|
||||
* UDISPCTX_CAPITALIZATION_NONE, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,
|
||||
* UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, etc.
|
||||
* @stable ICU 51
|
||||
*/
|
||||
UDISPCTX_TYPE_CAPITALIZATION = 1,
|
||||
/**
|
||||
* Type to retrieve the display length setting, e.g.
|
||||
* UDISPCTX_LENGTH_FULL, UDISPCTX_LENGTH_SHORT.
|
||||
* @stable ICU 54
|
||||
*/
|
||||
UDISPCTX_TYPE_DISPLAY_LENGTH = 2,
|
||||
/**
|
||||
* Type to retrieve the substitute handling setting, e.g.
|
||||
* UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE.
|
||||
* @stable ICU 58
|
||||
*/
|
||||
UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3
|
||||
};
|
||||
/**
|
||||
* @stable ICU 51
|
||||
*/
|
||||
typedef enum UDisplayContextType UDisplayContextType;
|
||||
|
||||
/**
|
||||
* Display context settings.
|
||||
* Note, the specific numeric values are internal and may change.
|
||||
* @stable ICU 51
|
||||
*/
|
||||
enum UDisplayContext {
|
||||
/**
|
||||
* ================================
|
||||
* DIALECT_HANDLING can be set to one of UDISPCTX_STANDARD_NAMES or
|
||||
* UDISPCTX_DIALECT_NAMES. Use UDisplayContextType UDISPCTX_TYPE_DIALECT_HANDLING
|
||||
* to get the value.
|
||||
*/
|
||||
/**
|
||||
* A possible setting for DIALECT_HANDLING:
|
||||
* use standard names when generating a locale name,
|
||||
* e.g. en_GB displays as 'English (United Kingdom)'.
|
||||
* @stable ICU 51
|
||||
*/
|
||||
UDISPCTX_STANDARD_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 0,
|
||||
/**
|
||||
* A possible setting for DIALECT_HANDLING:
|
||||
* use dialect names, when generating a locale name,
|
||||
* e.g. en_GB displays as 'British English'.
|
||||
* @stable ICU 51
|
||||
*/
|
||||
UDISPCTX_DIALECT_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 1,
|
||||
/**
|
||||
* ================================
|
||||
* CAPITALIZATION can be set to one of UDISPCTX_CAPITALIZATION_NONE,
|
||||
* UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,
|
||||
* UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE,
|
||||
* UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, or
|
||||
* UDISPCTX_CAPITALIZATION_FOR_STANDALONE.
|
||||
* Use UDisplayContextType UDISPCTX_TYPE_CAPITALIZATION to get the value.
|
||||
*/
|
||||
/**
|
||||
* The capitalization context to be used is unknown (this is the default value).
|
||||
* @stable ICU 51
|
||||
*/
|
||||
UDISPCTX_CAPITALIZATION_NONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 0,
|
||||
/**
|
||||
* The capitalization context if a date, date symbol or display name is to be
|
||||
* formatted with capitalization appropriate for the middle of a sentence.
|
||||
* @stable ICU 51
|
||||
*/
|
||||
UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 1,
|
||||
/**
|
||||
* The capitalization context if a date, date symbol or display name is to be
|
||||
* formatted with capitalization appropriate for the beginning of a sentence.
|
||||
* @stable ICU 51
|
||||
*/
|
||||
UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 2,
|
||||
/**
|
||||
* The capitalization context if a date, date symbol or display name is to be
|
||||
* formatted with capitalization appropriate for a user-interface list or menu item.
|
||||
* @stable ICU 51
|
||||
*/
|
||||
UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 3,
|
||||
/**
|
||||
* The capitalization context if a date, date symbol or display name is to be
|
||||
* formatted with capitalization appropriate for stand-alone usage such as an
|
||||
* isolated name on a calendar page.
|
||||
* @stable ICU 51
|
||||
*/
|
||||
UDISPCTX_CAPITALIZATION_FOR_STANDALONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 4,
|
||||
/**
|
||||
* ================================
|
||||
* DISPLAY_LENGTH can be set to one of UDISPCTX_LENGTH_FULL or
|
||||
* UDISPCTX_LENGTH_SHORT. Use UDisplayContextType UDISPCTX_TYPE_DISPLAY_LENGTH
|
||||
* to get the value.
|
||||
*/
|
||||
/**
|
||||
* A possible setting for DISPLAY_LENGTH:
|
||||
* use full names when generating a locale name,
|
||||
* e.g. "United States" for US.
|
||||
* @stable ICU 54
|
||||
*/
|
||||
UDISPCTX_LENGTH_FULL = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 0,
|
||||
/**
|
||||
* A possible setting for DISPLAY_LENGTH:
|
||||
* use short names when generating a locale name,
|
||||
* e.g. "U.S." for US.
|
||||
* @stable ICU 54
|
||||
*/
|
||||
UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1,
|
||||
/**
|
||||
* ================================
|
||||
* SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or
|
||||
* UDISPCTX_NO_SUBSTITUTE. Use UDisplayContextType UDISPCTX_TYPE_SUBSTITUTE_HANDLING
|
||||
* to get the value.
|
||||
*/
|
||||
/**
|
||||
* A possible setting for SUBSTITUTE_HANDLING:
|
||||
* Returns a fallback value (e.g., the input code) when no data is available.
|
||||
* This is the default value.
|
||||
* @stable ICU 58
|
||||
*/
|
||||
UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0,
|
||||
/**
|
||||
* A possible setting for SUBSTITUTE_HANDLING:
|
||||
* Returns a null value with error code set to U_ILLEGAL_ARGUMENT_ERROR when no
|
||||
* data is available.
|
||||
* @stable ICU 58
|
||||
*/
|
||||
UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1
|
||||
|
||||
};
|
||||
/**
|
||||
* @stable ICU 51
|
||||
*/
|
||||
typedef enum UDisplayContext UDisplayContext;
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif
|
||||
209
thirdparty/icu4c/common/unicode/uenum.h
vendored
Normal file
209
thirdparty/icu4c/common/unicode/uenum.h
vendored
Normal file
@@ -0,0 +1,209 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: uenum.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:2
|
||||
*
|
||||
* created on: 2002jul08
|
||||
* created by: Vladimir Weinstein
|
||||
*/
|
||||
|
||||
#ifndef __UENUM_H
|
||||
#define __UENUM_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
class StringEnumeration;
|
||||
U_NAMESPACE_END
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: String Enumeration
|
||||
*/
|
||||
|
||||
/**
|
||||
* An enumeration object.
|
||||
* For usage in C programs.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
struct UEnumeration;
|
||||
/** structure representing an enumeration object instance @stable ICU 2.2 */
|
||||
typedef struct UEnumeration UEnumeration;
|
||||
|
||||
/**
|
||||
* Disposes of resources in use by the iterator. If en is NULL,
|
||||
* does nothing. After this call, any char* or UChar* pointer
|
||||
* returned by uenum_unext() or uenum_next() is invalid.
|
||||
* @param en UEnumeration structure pointer
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uenum_close(UEnumeration* en);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUEnumerationPointer
|
||||
* "Smart pointer" class, closes a UEnumeration via uenum_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUEnumerationPointer, UEnumeration, uenum_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns the number of elements that the iterator traverses. If
|
||||
* the iterator is out-of-sync with its service, status is set to
|
||||
* U_ENUM_OUT_OF_SYNC_ERROR.
|
||||
* This is a convenience function. It can end up being very
|
||||
* expensive as all the items might have to be pre-fetched (depending
|
||||
* on the type of data being traversed). Use with caution and only
|
||||
* when necessary.
|
||||
* @param en UEnumeration structure pointer
|
||||
* @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
|
||||
* iterator is out of sync.
|
||||
* @return number of elements in the iterator
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uenum_count(UEnumeration* en, UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Returns the next element in the iterator's list. If there are
|
||||
* no more elements, returns NULL. If the iterator is out-of-sync
|
||||
* with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
|
||||
* NULL is returned. If the native service string is a char* string,
|
||||
* it is converted to UChar* with the invariant converter.
|
||||
* The result is terminated by (UChar)0.
|
||||
* @param en the iterator object
|
||||
* @param resultLength pointer to receive the length of the result
|
||||
* (not including the terminating \\0).
|
||||
* If the pointer is NULL it is ignored.
|
||||
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
|
||||
* the iterator is out of sync with its service.
|
||||
* @return a pointer to the string. The string will be
|
||||
* zero-terminated. The return pointer is owned by this iterator
|
||||
* and must not be deleted by the caller. The pointer is valid
|
||||
* until the next call to any uenum_... method, including
|
||||
* uenum_next() or uenum_unext(). When all strings have been
|
||||
* traversed, returns NULL.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI const UChar* U_EXPORT2
|
||||
uenum_unext(UEnumeration* en,
|
||||
int32_t* resultLength,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Returns the next element in the iterator's list. If there are
|
||||
* no more elements, returns NULL. If the iterator is out-of-sync
|
||||
* with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
|
||||
* NULL is returned. If the native service string is a UChar*
|
||||
* string, it is converted to char* with the invariant converter.
|
||||
* The result is terminated by (char)0. If the conversion fails
|
||||
* (because a character cannot be converted) then status is set to
|
||||
* U_INVARIANT_CONVERSION_ERROR and the return value is undefined
|
||||
* (but non-NULL).
|
||||
* @param en the iterator object
|
||||
* @param resultLength pointer to receive the length of the result
|
||||
* (not including the terminating \\0).
|
||||
* If the pointer is NULL it is ignored.
|
||||
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
|
||||
* the iterator is out of sync with its service. Set to
|
||||
* U_INVARIANT_CONVERSION_ERROR if the underlying native string is
|
||||
* UChar* and conversion to char* with the invariant converter
|
||||
* fails. This error pertains only to current string, so iteration
|
||||
* might be able to continue successfully.
|
||||
* @return a pointer to the string. The string will be
|
||||
* zero-terminated. The return pointer is owned by this iterator
|
||||
* and must not be deleted by the caller. The pointer is valid
|
||||
* until the next call to any uenum_... method, including
|
||||
* uenum_next() or uenum_unext(). When all strings have been
|
||||
* traversed, returns NULL.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uenum_next(UEnumeration* en,
|
||||
int32_t* resultLength,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Resets the iterator to the current list of service IDs. This
|
||||
* re-establishes sync with the service and rewinds the iterator
|
||||
* to start at the first element.
|
||||
* @param en the iterator object
|
||||
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
|
||||
* the iterator is out of sync with its service.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uenum_reset(UEnumeration* en, UErrorCode* status);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* Given a StringEnumeration, wrap it in a UEnumeration. The
|
||||
* StringEnumeration is adopted; after this call, the caller must not
|
||||
* delete it (regardless of error status).
|
||||
* @param adopted the C++ StringEnumeration to be wrapped in a UEnumeration.
|
||||
* @param ec the error code.
|
||||
* @return a UEnumeration wrapping the adopted StringEnumeration.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Given an array of const UChar* strings, return a UEnumeration. String pointers from 0..count-1 must not be null.
|
||||
* Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
|
||||
* \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration
|
||||
* @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller.
|
||||
* @param count length of the array
|
||||
* @param ec error code
|
||||
* @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory.
|
||||
* @see uenum_close
|
||||
* @stable ICU 50
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count,
|
||||
UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null.
|
||||
* Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
|
||||
* \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration
|
||||
* @param strings array of char* strings (each null terminated). All storage is owned by the caller.
|
||||
* @param count length of the array
|
||||
* @param ec error code
|
||||
* @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory
|
||||
* @see uenum_close
|
||||
* @stable ICU 50
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
uenum_openCharStringsEnumeration(const char* const strings[], int32_t count,
|
||||
UErrorCode* ec);
|
||||
|
||||
#endif
|
||||
794
thirdparty/icu4c/common/unicode/uidna.h
vendored
Normal file
794
thirdparty/icu4c/common/unicode/uidna.h
vendored
Normal file
@@ -0,0 +1,794 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: uidna.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb1
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#ifndef __UIDNA_H__
|
||||
#define __UIDNA_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "unicode/parseerr.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Internationalizing Domain Names in Applications (IDNA)
|
||||
*
|
||||
* IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
|
||||
*
|
||||
* The C API functions which do take a UIDNA * service object pointer
|
||||
* implement UTS #46 and IDNA2008.
|
||||
*
|
||||
* IDNA2003 is obsolete.
|
||||
* The C API functions which do not take a service object pointer
|
||||
* implement IDNA2003. They are all deprecated.
|
||||
*/
|
||||
|
||||
/*
|
||||
* IDNA option bit set values.
|
||||
*/
|
||||
enum {
|
||||
/**
|
||||
* Default options value: UTS #46 nontransitional processing.
|
||||
* For use in static worker and factory methods.
|
||||
*
|
||||
* Since ICU 76, this is the same as
|
||||
* UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE,
|
||||
* corresponding to Unicode 15.1 UTS #46 deprecating transitional processing.
|
||||
* (These options are ignored by the IDNA2003 implementation.)
|
||||
*
|
||||
* Before ICU 76, this constant did not set any of the options.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
UIDNA_DEFAULT=0x30,
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Option to allow unassigned code points in domain names and labels.
|
||||
* For use in static worker and factory methods.
|
||||
* <p>This option is ignored by the UTS46 implementation.
|
||||
* (UTS #46 disallows unassigned code points.)
|
||||
* @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
|
||||
*/
|
||||
UIDNA_ALLOW_UNASSIGNED=1,
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
/**
|
||||
* Option to check whether the input conforms to the STD3 ASCII rules,
|
||||
* for example the restriction of labels to LDH characters
|
||||
* (ASCII Letters, Digits and Hyphen-Minus).
|
||||
* For use in static worker and factory methods.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
UIDNA_USE_STD3_RULES=2,
|
||||
/**
|
||||
* IDNA option to check for whether the input conforms to the BiDi rules.
|
||||
* For use in static worker and factory methods.
|
||||
* <p>This option is ignored by the IDNA2003 implementation.
|
||||
* (IDNA2003 always performs a BiDi check.)
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_CHECK_BIDI=4,
|
||||
/**
|
||||
* IDNA option to check for whether the input conforms to the CONTEXTJ rules.
|
||||
* For use in static worker and factory methods.
|
||||
* <p>This option is ignored by the IDNA2003 implementation.
|
||||
* (The CONTEXTJ check is new in IDNA2008.)
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_CHECK_CONTEXTJ=8,
|
||||
/**
|
||||
* IDNA option for nontransitional processing in ToASCII().
|
||||
* For use in static worker and factory methods.
|
||||
*
|
||||
* <p>By default, ToASCII() uses transitional processing.
|
||||
* Unicode 15.1 UTS #46 deprecated transitional processing.
|
||||
*
|
||||
* <p>This option is ignored by the IDNA2003 implementation.
|
||||
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
|
||||
* @stable ICU 4.6
|
||||
* @see UIDNA_DEFAULT
|
||||
*/
|
||||
UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
|
||||
/**
|
||||
* IDNA option for nontransitional processing in ToUnicode().
|
||||
* For use in static worker and factory methods.
|
||||
*
|
||||
* <p>By default, ToUnicode() uses transitional processing.
|
||||
* Unicode 15.1 UTS #46 deprecated transitional processing.
|
||||
*
|
||||
* <p>This option is ignored by the IDNA2003 implementation.
|
||||
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
|
||||
* @stable ICU 4.6
|
||||
* @see UIDNA_DEFAULT
|
||||
*/
|
||||
UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
|
||||
/**
|
||||
* IDNA option to check for whether the input conforms to the CONTEXTO rules.
|
||||
* For use in static worker and factory methods.
|
||||
* <p>This option is ignored by the IDNA2003 implementation.
|
||||
* (The CONTEXTO check is new in IDNA2008.)
|
||||
* <p>This is for use by registries for IDNA2008 conformance.
|
||||
* UTS #46 does not require the CONTEXTO check.
|
||||
* @stable ICU 49
|
||||
*/
|
||||
UIDNA_CHECK_CONTEXTO=0x40
|
||||
};
|
||||
|
||||
/**
|
||||
* Opaque C service object type for the new IDNA API.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
struct UIDNA;
|
||||
typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
|
||||
|
||||
/**
|
||||
* Returns a UIDNA instance which implements UTS #46.
|
||||
* Returns an unmodifiable instance, owned by the caller.
|
||||
* Cache it for multiple operations, and uidna_close() it when done.
|
||||
* The instance is thread-safe, that is, it can be used concurrently.
|
||||
*
|
||||
* For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
|
||||
*
|
||||
* @param options Bit set to modify the processing and error checking.
|
||||
* These should include UIDNA_DEFAULT, or
|
||||
* UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
|
||||
* See option bit set values in uidna.h.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the UTS #46 UIDNA instance, if successful
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
U_CAPI UIDNA * U_EXPORT2
|
||||
uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Closes a UIDNA instance.
|
||||
* @param idna UIDNA instance to be closed
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uidna_close(UIDNA *idna);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUIDNAPointer
|
||||
* "Smart pointer" class, closes a UIDNA via uidna_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Output container for IDNA processing errors.
|
||||
* Initialize with UIDNA_INFO_INITIALIZER:
|
||||
* \code
|
||||
* UIDNAInfo info = UIDNA_INFO_INITIALIZER;
|
||||
* int32_t length = uidna_nameToASCII(..., &info, &errorCode);
|
||||
* if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
|
||||
* \endcode
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
typedef struct UIDNAInfo {
|
||||
/** sizeof(UIDNAInfo) @stable ICU 4.6 */
|
||||
int16_t size;
|
||||
/**
|
||||
* Set to true if transitional and nontransitional processing produce different results.
|
||||
* For details see C++ IDNAInfo::isTransitionalDifferent().
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UBool isTransitionalDifferent;
|
||||
UBool reservedB3; /**< Reserved field, do not use. @internal */
|
||||
/**
|
||||
* Bit set indicating IDNA processing errors. 0 if no errors.
|
||||
* See UIDNA_ERROR_... constants.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
uint32_t errors;
|
||||
int32_t reservedI2; /**< Reserved field, do not use. @internal */
|
||||
int32_t reservedI3; /**< Reserved field, do not use. @internal */
|
||||
} UIDNAInfo;
|
||||
|
||||
/**
|
||||
* Static initializer for a UIDNAInfo struct.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
#define UIDNA_INFO_INITIALIZER { \
|
||||
(int16_t)sizeof(UIDNAInfo), \
|
||||
false, false, \
|
||||
0, 0, 0 }
|
||||
|
||||
/**
|
||||
* Converts a single domain name label into its ASCII form for DNS lookup.
|
||||
* If any processing step fails, then pInfo->errors will be non-zero and
|
||||
* the result might not be an ASCII string.
|
||||
* The label might be modified according to the types of errors.
|
||||
* Labels with severe errors will be left in (or turned into) their Unicode form.
|
||||
*
|
||||
* The UErrorCode indicates an error only in exceptional cases,
|
||||
* such as a U_MEMORY_ALLOCATION_ERROR.
|
||||
*
|
||||
* @param idna UIDNA instance
|
||||
* @param label Input domain name label
|
||||
* @param length Label length, or -1 if NUL-terminated
|
||||
* @param dest Destination string buffer
|
||||
* @param capacity Destination buffer capacity
|
||||
* @param pInfo Output container of IDNA processing details.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return destination string length
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_labelToASCII(const UIDNA *idna,
|
||||
const UChar *label, int32_t length,
|
||||
UChar *dest, int32_t capacity,
|
||||
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Converts a single domain name label into its Unicode form for human-readable display.
|
||||
* If any processing step fails, then pInfo->errors will be non-zero.
|
||||
* The label might be modified according to the types of errors.
|
||||
*
|
||||
* The UErrorCode indicates an error only in exceptional cases,
|
||||
* such as a U_MEMORY_ALLOCATION_ERROR.
|
||||
*
|
||||
* @param idna UIDNA instance
|
||||
* @param label Input domain name label
|
||||
* @param length Label length, or -1 if NUL-terminated
|
||||
* @param dest Destination string buffer
|
||||
* @param capacity Destination buffer capacity
|
||||
* @param pInfo Output container of IDNA processing details.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return destination string length
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_labelToUnicode(const UIDNA *idna,
|
||||
const UChar *label, int32_t length,
|
||||
UChar *dest, int32_t capacity,
|
||||
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Converts a whole domain name into its ASCII form for DNS lookup.
|
||||
* If any processing step fails, then pInfo->errors will be non-zero and
|
||||
* the result might not be an ASCII string.
|
||||
* The domain name might be modified according to the types of errors.
|
||||
* Labels with severe errors will be left in (or turned into) their Unicode form.
|
||||
*
|
||||
* The UErrorCode indicates an error only in exceptional cases,
|
||||
* such as a U_MEMORY_ALLOCATION_ERROR.
|
||||
*
|
||||
* @param idna UIDNA instance
|
||||
* @param name Input domain name
|
||||
* @param length Domain name length, or -1 if NUL-terminated
|
||||
* @param dest Destination string buffer
|
||||
* @param capacity Destination buffer capacity
|
||||
* @param pInfo Output container of IDNA processing details.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return destination string length
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_nameToASCII(const UIDNA *idna,
|
||||
const UChar *name, int32_t length,
|
||||
UChar *dest, int32_t capacity,
|
||||
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Converts a whole domain name into its Unicode form for human-readable display.
|
||||
* If any processing step fails, then pInfo->errors will be non-zero.
|
||||
* The domain name might be modified according to the types of errors.
|
||||
*
|
||||
* The UErrorCode indicates an error only in exceptional cases,
|
||||
* such as a U_MEMORY_ALLOCATION_ERROR.
|
||||
*
|
||||
* @param idna UIDNA instance
|
||||
* @param name Input domain name
|
||||
* @param length Domain name length, or -1 if NUL-terminated
|
||||
* @param dest Destination string buffer
|
||||
* @param capacity Destination buffer capacity
|
||||
* @param pInfo Output container of IDNA processing details.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return destination string length
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_nameToUnicode(const UIDNA *idna,
|
||||
const UChar *name, int32_t length,
|
||||
UChar *dest, int32_t capacity,
|
||||
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
|
||||
|
||||
/* UTF-8 versions of the processing methods --------------------------------- */
|
||||
|
||||
/**
|
||||
* Converts a single domain name label into its ASCII form for DNS lookup.
|
||||
* UTF-8 version of uidna_labelToASCII(), same behavior.
|
||||
*
|
||||
* @param idna UIDNA instance
|
||||
* @param label Input domain name label
|
||||
* @param length Label length, or -1 if NUL-terminated
|
||||
* @param dest Destination string buffer
|
||||
* @param capacity Destination buffer capacity
|
||||
* @param pInfo Output container of IDNA processing details.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return destination string length
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_labelToASCII_UTF8(const UIDNA *idna,
|
||||
const char *label, int32_t length,
|
||||
char *dest, int32_t capacity,
|
||||
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Converts a single domain name label into its Unicode form for human-readable display.
|
||||
* UTF-8 version of uidna_labelToUnicode(), same behavior.
|
||||
*
|
||||
* @param idna UIDNA instance
|
||||
* @param label Input domain name label
|
||||
* @param length Label length, or -1 if NUL-terminated
|
||||
* @param dest Destination string buffer
|
||||
* @param capacity Destination buffer capacity
|
||||
* @param pInfo Output container of IDNA processing details.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return destination string length
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_labelToUnicodeUTF8(const UIDNA *idna,
|
||||
const char *label, int32_t length,
|
||||
char *dest, int32_t capacity,
|
||||
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Converts a whole domain name into its ASCII form for DNS lookup.
|
||||
* UTF-8 version of uidna_nameToASCII(), same behavior.
|
||||
*
|
||||
* @param idna UIDNA instance
|
||||
* @param name Input domain name
|
||||
* @param length Domain name length, or -1 if NUL-terminated
|
||||
* @param dest Destination string buffer
|
||||
* @param capacity Destination buffer capacity
|
||||
* @param pInfo Output container of IDNA processing details.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return destination string length
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_nameToASCII_UTF8(const UIDNA *idna,
|
||||
const char *name, int32_t length,
|
||||
char *dest, int32_t capacity,
|
||||
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Converts a whole domain name into its Unicode form for human-readable display.
|
||||
* UTF-8 version of uidna_nameToUnicode(), same behavior.
|
||||
*
|
||||
* @param idna UIDNA instance
|
||||
* @param name Input domain name
|
||||
* @param length Domain name length, or -1 if NUL-terminated
|
||||
* @param dest Destination string buffer
|
||||
* @param capacity Destination buffer capacity
|
||||
* @param pInfo Output container of IDNA processing details.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return destination string length
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_nameToUnicodeUTF8(const UIDNA *idna,
|
||||
const char *name, int32_t length,
|
||||
char *dest, int32_t capacity,
|
||||
UIDNAInfo *pInfo, UErrorCode *pErrorCode);
|
||||
|
||||
/*
|
||||
* IDNA error bit set values.
|
||||
* When a domain name or label fails a processing step or does not meet the
|
||||
* validity criteria, then one or more of these error bits are set.
|
||||
*/
|
||||
enum {
|
||||
/**
|
||||
* A non-final domain name label (or the whole domain name) is empty.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_ERROR_EMPTY_LABEL=1,
|
||||
/**
|
||||
* A domain name label is longer than 63 bytes.
|
||||
* (See STD13/RFC1034 3.1. Name space specifications and terminology.)
|
||||
* This is only checked in ToASCII operations, and only if the output label is all-ASCII.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_ERROR_LABEL_TOO_LONG=2,
|
||||
/**
|
||||
* A domain name is longer than 255 bytes in its storage form.
|
||||
* (See STD13/RFC1034 3.1. Name space specifications and terminology.)
|
||||
* This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
|
||||
/**
|
||||
* A label starts with a hyphen-minus ('-').
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_ERROR_LEADING_HYPHEN=8,
|
||||
/**
|
||||
* A label ends with a hyphen-minus ('-').
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_ERROR_TRAILING_HYPHEN=0x10,
|
||||
/**
|
||||
* A label contains hyphen-minus ('-') in the third and fourth positions.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_ERROR_HYPHEN_3_4=0x20,
|
||||
/**
|
||||
* A label starts with a combining mark.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
|
||||
/**
|
||||
* A label or domain name contains disallowed characters.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_ERROR_DISALLOWED=0x80,
|
||||
/**
|
||||
* A label starts with "xn--" but does not contain valid Punycode.
|
||||
* That is, an xn-- label failed Punycode decoding.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_ERROR_PUNYCODE=0x100,
|
||||
/**
|
||||
* A label contains a dot=full stop.
|
||||
* This can occur in an input string for a single-label function.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_ERROR_LABEL_HAS_DOT=0x200,
|
||||
/**
|
||||
* An ACE label does not contain a valid label string.
|
||||
* The label was successfully ACE (Punycode) decoded but the resulting
|
||||
* string had severe validation errors. For example,
|
||||
* it might contain characters that are not allowed in ACE labels,
|
||||
* or it might not be normalized.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
|
||||
/**
|
||||
* A label does not meet the IDNA BiDi requirements (for right-to-left characters).
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_ERROR_BIDI=0x800,
|
||||
/**
|
||||
* A label does not meet the IDNA CONTEXTJ requirements.
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
UIDNA_ERROR_CONTEXTJ=0x1000,
|
||||
/**
|
||||
* A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
|
||||
* Some punctuation characters "Would otherwise have been DISALLOWED"
|
||||
* but are allowed in certain contexts. (RFC 5892)
|
||||
* @stable ICU 49
|
||||
*/
|
||||
UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
|
||||
/**
|
||||
* A label does not meet the IDNA CONTEXTO requirements for digits.
|
||||
* Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
|
||||
* @stable ICU 49
|
||||
*/
|
||||
UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
|
||||
};
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/* IDNA2003 API ------------------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* ASCII names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
|
||||
*
|
||||
* IDNA2003 API Overview:
|
||||
*
|
||||
* The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
|
||||
* (http://www.ietf.org/rfc/rfc3490.txt).
|
||||
* The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
|
||||
* containing non-ASCII code points are processed by the
|
||||
* ToASCII operation before passing it to resolver libraries. Domain names
|
||||
* that are obtained from resolver libraries are processed by the
|
||||
* ToUnicode operation before displaying the domain name to the user.
|
||||
* IDNA requires that implementations process input strings with Nameprep
|
||||
* (http://www.ietf.org/rfc/rfc3491.txt),
|
||||
* which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
|
||||
* and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
|
||||
* Implementations of IDNA MUST fully implement Nameprep and Punycode;
|
||||
* neither Nameprep nor Punycode are optional.
|
||||
* The input and output of ToASCII and ToUnicode operations are Unicode
|
||||
* and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
|
||||
* multiple times to an input string will yield the same result as applying the operation
|
||||
* once.
|
||||
* ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
|
||||
* ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
|
||||
*
|
||||
* @param src Input UChar array containing label in Unicode.
|
||||
* @param srcLength Number of UChars in src, or -1 if NUL-terminated.
|
||||
* @param dest Output UChar array with ASCII (ACE encoded) label.
|
||||
* @param destCapacity Size of dest.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_ERROR error code.
|
||||
*
|
||||
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param parseError Pointer to UParseError struct to receive information on position
|
||||
* of error if an error is encountered. Can be NULL.
|
||||
* @param status ICU in/out error code parameter.
|
||||
* U_INVALID_CHAR_FOUND if src contains
|
||||
* unmatched single surrogates.
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
|
||||
* too many code points.
|
||||
* U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
* @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
|
||||
*/
|
||||
U_DEPRECATED int32_t U_EXPORT2
|
||||
uidna_toASCII(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status);
|
||||
|
||||
|
||||
/**
|
||||
* IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* Unicode names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
|
||||
*
|
||||
* @param src Input UChar array containing ASCII (ACE encoded) label.
|
||||
* @param srcLength Number of UChars in src, or -1 if NUL-terminated.
|
||||
* @param dest Output Converted UChar array containing Unicode equivalent of label.
|
||||
* @param destCapacity Size of dest.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_ERROR error code.
|
||||
*
|
||||
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points. <b> Note: </b> This option is
|
||||
* required on toUnicode operation because the RFC mandates
|
||||
* verification of decoded ACE input by applying toASCII and comparing
|
||||
* its output with source
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param parseError Pointer to UParseError struct to receive information on position
|
||||
* of error if an error is encountered. Can be NULL.
|
||||
* @param status ICU in/out error code parameter.
|
||||
* U_INVALID_CHAR_FOUND if src contains
|
||||
* unmatched single surrogates.
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
|
||||
* too many code points.
|
||||
* U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
* @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
|
||||
*/
|
||||
U_DEPRECATED int32_t U_EXPORT2
|
||||
uidna_toUnicode(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status);
|
||||
|
||||
|
||||
/**
|
||||
* IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
* It is important to note that this operation can fail. If it fails, then the input
|
||||
* domain name cannot be used as an Internationalized Domain Name and the application
|
||||
* should have methods defined to deal with the failure.
|
||||
*
|
||||
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src Input UChar array containing IDN in Unicode.
|
||||
* @param srcLength Number of UChars in src, or -1 if NUL-terminated.
|
||||
* @param dest Output UChar array with ASCII (ACE encoded) IDN.
|
||||
* @param destCapacity Size of dest.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
*
|
||||
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param parseError Pointer to UParseError struct to receive information on position
|
||||
* of error if an error is encountered. Can be NULL.
|
||||
* @param status ICU in/out error code parameter.
|
||||
* U_INVALID_CHAR_FOUND if src contains
|
||||
* unmatched single surrogates.
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
|
||||
* too many code points.
|
||||
* U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
* @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
|
||||
*/
|
||||
U_DEPRECATED int32_t U_EXPORT2
|
||||
uidna_IDNToASCII( const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
*
|
||||
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src Input UChar array containing IDN in ASCII (ACE encoded) form.
|
||||
* @param srcLength Number of UChars in src, or -1 if NUL-terminated.
|
||||
* @param dest Output UChar array containing Unicode equivalent of source IDN.
|
||||
* @param destCapacity Size of dest.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
*
|
||||
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param parseError Pointer to UParseError struct to receive information on position
|
||||
* of error if an error is encountered. Can be NULL.
|
||||
* @param status ICU in/out error code parameter.
|
||||
* U_INVALID_CHAR_FOUND if src contains
|
||||
* unmatched single surrogates.
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
|
||||
* too many code points.
|
||||
* U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
* @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
|
||||
*/
|
||||
U_DEPRECATED int32_t U_EXPORT2
|
||||
uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* IDNA2003: Compare two IDN strings for equivalence.
|
||||
* This function splits the domain names into labels and compares them.
|
||||
* According to IDN RFC, whenever two labels are compared, they are
|
||||
* considered equal if and only if their ASCII forms (obtained by
|
||||
* applying toASCII) match using an case-insensitive ASCII comparison.
|
||||
* Two domain names are considered a match if and only if all labels
|
||||
* match regardless of whether label separators match.
|
||||
*
|
||||
* @param s1 First source string.
|
||||
* @param length1 Length of first source string, or -1 if NUL-terminated.
|
||||
*
|
||||
* @param s2 Second source string.
|
||||
* @param length2 Length of second source string, or -1 if NUL-terminated.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
*
|
||||
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return <0 or 0 or >0 as usual for string comparisons
|
||||
* @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
|
||||
*/
|
||||
U_DEPRECATED int32_t U_EXPORT2
|
||||
uidna_compare( const UChar *s1, int32_t length1,
|
||||
const UChar *s2, int32_t length2,
|
||||
int32_t options,
|
||||
UErrorCode* status);
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
||||
|
||||
#endif
|
||||
709
thirdparty/icu4c/common/unicode/uiter.h
vendored
Normal file
709
thirdparty/icu4c/common/unicode/uiter.h
vendored
Normal file
@@ -0,0 +1,709 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2011 International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: uiter.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002jan18
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UITER_H__
|
||||
#define __UITER_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Unicode Character Iteration
|
||||
*
|
||||
* @see UCharIterator
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class CharacterIterator;
|
||||
class Replaceable;
|
||||
|
||||
U_NAMESPACE_END
|
||||
#endif
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
struct UCharIterator;
|
||||
typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
|
||||
|
||||
/**
|
||||
* Origin constants for UCharIterator.getIndex() and UCharIterator.move().
|
||||
* @see UCharIteratorMove
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef enum UCharIteratorOrigin {
|
||||
UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
|
||||
} UCharIteratorOrigin;
|
||||
|
||||
/** Constants for UCharIterator. @stable ICU 2.6 */
|
||||
enum {
|
||||
/**
|
||||
* Constant value that may be returned by UCharIteratorMove
|
||||
* indicating that the final UTF-16 index is not known, but that the move succeeded.
|
||||
* This can occur when moving relative to limit or length, or
|
||||
* when moving relative to the current index after a setState()
|
||||
* when the current UTF-16 index is not known.
|
||||
*
|
||||
* It would be very inefficient to have to count from the beginning of the text
|
||||
* just to get the current/limit/length index after moving relative to it.
|
||||
* The actual index can be determined with getIndex(UITER_CURRENT)
|
||||
* which will count the UChars if necessary.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
UITER_UNKNOWN_INDEX=-2
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Constant for UCharIterator getState() indicating an error or
|
||||
* an unknown state.
|
||||
* Returned by uiter_getState()/UCharIteratorGetState
|
||||
* when an error occurs.
|
||||
* Also, some UCharIterator implementations may not be able to return
|
||||
* a valid state for each position. This will be clearly documented
|
||||
* for each such iterator (none of the public ones here).
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define UITER_NO_STATE ((uint32_t)0xffffffff)
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.getIndex().
|
||||
*
|
||||
* Gets the current position, or the start or limit of the
|
||||
* iteration range.
|
||||
*
|
||||
* This function may perform slowly for UITER_CURRENT after setState() was called,
|
||||
* or for UITER_LENGTH, because an iterator implementation may have to count
|
||||
* UChars if the underlying storage is not UTF-16.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param origin get the 0, start, limit, length, or current index
|
||||
* @return the requested index, or U_SENTINEL in an error condition
|
||||
*
|
||||
* @see UCharIteratorOrigin
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.move().
|
||||
*
|
||||
* Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
|
||||
*
|
||||
* Moves the current position relative to the start or limit of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code units forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
* Out of bounds movement will be pinned to the start or limit.
|
||||
*
|
||||
* This function may perform slowly for moving relative to UITER_LENGTH
|
||||
* because an iterator implementation may have to count the rest of the
|
||||
* UChars if the native storage is not UTF-16.
|
||||
*
|
||||
* When moving relative to the limit or length, or
|
||||
* relative to the current position after setState() was called,
|
||||
* move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
|
||||
* determination of the actual UTF-16 index.
|
||||
* The actual index can be determined with getIndex(UITER_CURRENT)
|
||||
* which will count the UChars if necessary.
|
||||
* See UITER_UNKNOWN_INDEX for details.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param delta can be positive, zero, or negative
|
||||
* @param origin move relative to the 0, start, limit, length, or current index
|
||||
* @return the new index, or U_SENTINEL on an error condition,
|
||||
* or UITER_UNKNOWN_INDEX when the index is not known.
|
||||
*
|
||||
* @see UCharIteratorOrigin
|
||||
* @see UCharIterator
|
||||
* @see UITER_UNKNOWN_INDEX
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.hasNext().
|
||||
*
|
||||
* Check if current() and next() can still
|
||||
* return another code unit.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return boolean value for whether current() and next() can still return another code unit
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UBool U_CALLCONV
|
||||
UCharIteratorHasNext(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.hasPrevious().
|
||||
*
|
||||
* Check if previous() can still return another code unit.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return boolean value for whether previous() can still return another code unit
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UBool U_CALLCONV
|
||||
UCharIteratorHasPrevious(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.current().
|
||||
*
|
||||
* Return the code unit at the current position,
|
||||
* or U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the current code unit
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UChar32 U_CALLCONV
|
||||
UCharIteratorCurrent(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.next().
|
||||
*
|
||||
* Return the code unit at the current index and increment
|
||||
* the index (post-increment, like s[i++]),
|
||||
* or return U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the current code unit (and post-increment the current index)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UChar32 U_CALLCONV
|
||||
UCharIteratorNext(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.previous().
|
||||
*
|
||||
* Decrement the index and return the code unit from there
|
||||
* (pre-decrement, like s[--i]),
|
||||
* or return U_SENTINEL if there is none (index is at the start).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the previous code unit (after pre-decrementing the current index)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UChar32 U_CALLCONV
|
||||
UCharIteratorPrevious(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.reservedFn().
|
||||
* Reserved for future use.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param something some integer argument
|
||||
* @return some integer
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UCharIteratorReserved(UCharIterator *iter, int32_t something);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.getState().
|
||||
*
|
||||
* Get the "state" of the iterator in the form of a single 32-bit word.
|
||||
* It is recommended that the state value be calculated to be as small as
|
||||
* is feasible. For strings with limited lengths, fewer than 32 bits may
|
||||
* be sufficient.
|
||||
*
|
||||
* This is used together with setState()/UCharIteratorSetState
|
||||
* to save and restore the iterator position more efficiently than with
|
||||
* getIndex()/move().
|
||||
*
|
||||
* The iterator state is defined as a uint32_t value because it is designed
|
||||
* for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
|
||||
* of the character iterator.
|
||||
*
|
||||
* With some UCharIterator implementations (e.g., UTF-8),
|
||||
* getting and setting the UTF-16 index with existing functions
|
||||
* (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
|
||||
* relatively slow because the iterator has to "walk" from a known index
|
||||
* to the requested one.
|
||||
* This takes more time the farther it needs to go.
|
||||
*
|
||||
* An opaque state value allows an iterator implementation to provide
|
||||
* an internal index (UTF-8: the source byte array index) for
|
||||
* fast, constant-time restoration.
|
||||
*
|
||||
* After calling setState(), a getIndex(UITER_CURRENT) may be slow because
|
||||
* the UTF-16 index may not be restored as well, but the iterator can deliver
|
||||
* the correct text contents and move relative to the current position
|
||||
* without performance degradation.
|
||||
*
|
||||
* Some UCharIterator implementations may not be able to return
|
||||
* a valid state for each position, in which case they return UITER_NO_STATE instead.
|
||||
* This will be clearly documented for each such iterator (none of the public ones here).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the state word
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see UCharIteratorSetState
|
||||
* @see UITER_NO_STATE
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
typedef uint32_t U_CALLCONV
|
||||
UCharIteratorGetState(const UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.setState().
|
||||
*
|
||||
* Restore the "state" of the iterator using a state word from a getState() call.
|
||||
* The iterator object need not be the same one as for which getState() was called,
|
||||
* but it must be of the same type (set up using the same uiter_setXYZ function)
|
||||
* and it must iterate over the same string
|
||||
* (binary identical regardless of memory address).
|
||||
* For more about the state word see UCharIteratorGetState.
|
||||
*
|
||||
* After calling setState(), a getIndex(UITER_CURRENT) may be slow because
|
||||
* the UTF-16 index may not be restored as well, but the iterator can deliver
|
||||
* the correct text contents and move relative to the current position
|
||||
* without performance degradation.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param state the state word from a getState() call
|
||||
* on a same-type, same-string iterator
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see UCharIteratorGetState
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
typedef void U_CALLCONV
|
||||
UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
|
||||
|
||||
|
||||
/**
|
||||
* C API for code unit iteration.
|
||||
* This can be used as a C wrapper around
|
||||
* CharacterIterator, Replaceable, or implemented using simple strings, etc.
|
||||
*
|
||||
* There are two roles for using UCharIterator:
|
||||
*
|
||||
* A "provider" sets the necessary function pointers and controls the "protected"
|
||||
* fields of the UCharIterator structure. A "provider" passes a UCharIterator
|
||||
* into C APIs that need a UCharIterator as an abstract, flexible string interface.
|
||||
*
|
||||
* Implementations of such C APIs are "callers" of UCharIterator functions;
|
||||
* they only use the "public" function pointers and never access the "protected"
|
||||
* fields directly.
|
||||
*
|
||||
* The current() and next() functions only check the current index against the
|
||||
* limit, and previous() only checks the current index against the start,
|
||||
* to see if the iterator already reached the end of the iteration range.
|
||||
*
|
||||
* The assumption - in all iterators - is that the index is moved via the API,
|
||||
* which means it won't go out of bounds, or the index is modified by
|
||||
* user code that knows enough about the iterator implementation to set valid
|
||||
* index values.
|
||||
*
|
||||
* UCharIterator functions return code unit values 0..0xffff,
|
||||
* or U_SENTINEL if the iteration bounds are reached.
|
||||
*
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
struct UCharIterator {
|
||||
/**
|
||||
* (protected) Pointer to string or wrapped object or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
const void *context;
|
||||
|
||||
/**
|
||||
* (protected) Length of string or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t length;
|
||||
|
||||
/**
|
||||
* (protected) Start index or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t start;
|
||||
|
||||
/**
|
||||
* (protected) Current index or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t index;
|
||||
|
||||
/**
|
||||
* (protected) Limit index or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t limit;
|
||||
|
||||
/**
|
||||
* (protected) Used by UTF-8 iterators and possibly others.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t reservedField;
|
||||
|
||||
/**
|
||||
* (public) Returns the current position or the
|
||||
* start or limit index of the iteration range.
|
||||
*
|
||||
* @see UCharIteratorGetIndex
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorGetIndex *getIndex;
|
||||
|
||||
/**
|
||||
* (public) Moves the current position relative to the start or limit of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code units forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
*
|
||||
* @see UCharIteratorMove
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorMove *move;
|
||||
|
||||
/**
|
||||
* (public) Check if current() and next() can still
|
||||
* return another code unit.
|
||||
*
|
||||
* @see UCharIteratorHasNext
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorHasNext *hasNext;
|
||||
|
||||
/**
|
||||
* (public) Check if previous() can still return another code unit.
|
||||
*
|
||||
* @see UCharIteratorHasPrevious
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorHasPrevious *hasPrevious;
|
||||
|
||||
/**
|
||||
* (public) Return the code unit at the current position,
|
||||
* or U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @see UCharIteratorCurrent
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorCurrent *current;
|
||||
|
||||
/**
|
||||
* (public) Return the code unit at the current index and increment
|
||||
* the index (post-increment, like s[i++]),
|
||||
* or return U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @see UCharIteratorNext
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorNext *next;
|
||||
|
||||
/**
|
||||
* (public) Decrement the index and return the code unit from there
|
||||
* (pre-decrement, like s[--i]),
|
||||
* or return U_SENTINEL if there is none (index is at the start).
|
||||
*
|
||||
* @see UCharIteratorPrevious
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorPrevious *previous;
|
||||
|
||||
/**
|
||||
* (public) Reserved for future use. Currently NULL.
|
||||
*
|
||||
* @see UCharIteratorReserved
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorReserved *reservedFn;
|
||||
|
||||
/**
|
||||
* (public) Return the state of the iterator, to be restored later with setState().
|
||||
* This function pointer is NULL if the iterator does not implement it.
|
||||
*
|
||||
* @see UCharIteratorGet
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
UCharIteratorGetState *getState;
|
||||
|
||||
/**
|
||||
* (public) Restore the iterator state from the state word from a call
|
||||
* to getState().
|
||||
* This function pointer is NULL if the iterator does not implement it.
|
||||
*
|
||||
* @see UCharIteratorSet
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
UCharIteratorSetState *setState;
|
||||
};
|
||||
|
||||
/**
|
||||
* Helper function for UCharIterator to get the code point
|
||||
* at the current index.
|
||||
*
|
||||
* Return the code point that includes the code unit at the current position,
|
||||
* or U_SENTINEL if there is none (index is at the limit).
|
||||
* If the current code unit is a lead or trail surrogate,
|
||||
* then the following or preceding surrogate is used to form
|
||||
* the code point value.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the current code point
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see U16_GET
|
||||
* @see UnicodeString::char32At()
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
uiter_current32(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Helper function for UCharIterator to get the next code point.
|
||||
*
|
||||
* Return the code point at the current index and increment
|
||||
* the index (post-increment, like s[i++]),
|
||||
* or return U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the current code point (and post-increment the current index)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see U16_NEXT
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
uiter_next32(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Helper function for UCharIterator to get the previous code point.
|
||||
*
|
||||
* Decrement the index and return the code point from there
|
||||
* (pre-decrement, like s[--i]),
|
||||
* or return U_SENTINEL if there is none (index is at the start).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the previous code point (after pre-decrementing the current index)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see U16_PREV
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
uiter_previous32(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Get the "state" of the iterator in the form of a single 32-bit word.
|
||||
* This is a convenience function that calls iter->getState(iter)
|
||||
* if iter->getState is not NULL;
|
||||
* if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
|
||||
*
|
||||
* Some UCharIterator implementations may not be able to return
|
||||
* a valid state for each position, in which case they return UITER_NO_STATE instead.
|
||||
* This will be clearly documented for each such iterator (none of the public ones here).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the state word
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see UCharIteratorGetState
|
||||
* @see UITER_NO_STATE
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
uiter_getState(const UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Restore the "state" of the iterator using a state word from a getState() call.
|
||||
* This is a convenience function that calls iter->setState(iter, state, pErrorCode)
|
||||
* if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param state the state word from a getState() call
|
||||
* on a same-type, same-string iterator
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see UCharIteratorSetState
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to iterate over a string.
|
||||
*
|
||||
* Sets the UCharIterator function pointers for iteration over the string s
|
||||
* with iteration boundaries start=index=0 and length=limit=string length.
|
||||
* The "provider" may set the start, index, and limit values at any time
|
||||
* within the range 0..length.
|
||||
* The length field will be ignored.
|
||||
*
|
||||
* The string pointer s is set into UCharIterator.context without copying
|
||||
* or reallocating the string contents.
|
||||
*
|
||||
* getState() simply returns the current index.
|
||||
* move() will always return the final index.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param s String to iterate over
|
||||
* @param length Length of s, or -1 if NUL-terminated
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to iterate over a UTF-16BE string
|
||||
* (byte vector with a big-endian pair of bytes per UChar).
|
||||
*
|
||||
* Everything works just like with a normal UChar iterator (uiter_setString),
|
||||
* except that UChars are assembled from byte pairs,
|
||||
* and that the length argument here indicates an even number of bytes.
|
||||
*
|
||||
* getState() simply returns the current index.
|
||||
* move() will always return the final index.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param s UTF-16BE string to iterate over
|
||||
* @param length Length of s as an even number of bytes, or -1 if NUL-terminated
|
||||
* (NUL means pair of 0 bytes at even index from s)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see uiter_setString
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to iterate over a UTF-8 string.
|
||||
*
|
||||
* Sets the UCharIterator function pointers for iteration over the UTF-8 string s
|
||||
* with UTF-8 iteration boundaries 0 and length.
|
||||
* The implementation counts the UTF-16 index on the fly and
|
||||
* lazily evaluates the UTF-16 length of the text.
|
||||
*
|
||||
* The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
|
||||
* When the reservedField is not 0, then it contains a supplementary code point
|
||||
* and the UTF-16 index is between the two corresponding surrogates.
|
||||
* At that point, the UTF-8 index is behind that code point.
|
||||
*
|
||||
* The UTF-8 string pointer s is set into UCharIterator.context without copying
|
||||
* or reallocating the string contents.
|
||||
*
|
||||
* getState() returns a state value consisting of
|
||||
* - the current UTF-8 source byte index (bits 31..1)
|
||||
* - a flag (bit 0) that indicates whether the UChar position is in the middle
|
||||
* of a surrogate pair
|
||||
* (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
|
||||
*
|
||||
* getState() cannot also encode the UTF-16 index in the state value.
|
||||
* move(relative to limit or length), or
|
||||
* move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param s UTF-8 string to iterate over
|
||||
* @param length Length of s in bytes, or -1 if NUL-terminated
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to wrap around a C++ CharacterIterator.
|
||||
*
|
||||
* Sets the UCharIterator function pointers for iteration using the
|
||||
* CharacterIterator charIter.
|
||||
*
|
||||
* The CharacterIterator pointer charIter is set into UCharIterator.context
|
||||
* without copying or cloning the CharacterIterator object.
|
||||
* The other "protected" UCharIterator fields are set to 0 and will be ignored.
|
||||
* The iteration index and boundaries are controlled by the CharacterIterator.
|
||||
*
|
||||
* getState() simply returns the current index.
|
||||
* move() will always return the final index.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param charIter CharacterIterator to wrap
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to iterate over a C++ Replaceable.
|
||||
*
|
||||
* Sets the UCharIterator function pointers for iteration over the
|
||||
* Replaceable rep with iteration boundaries start=index=0 and
|
||||
* length=limit=rep->length().
|
||||
* The "provider" may set the start, index, and limit values at any time
|
||||
* within the range 0..length=rep->length().
|
||||
* The length field will be ignored.
|
||||
*
|
||||
* The Replaceable pointer rep is set into UCharIterator.context without copying
|
||||
* or cloning/reallocating the Replaceable object.
|
||||
*
|
||||
* getState() simply returns the current index.
|
||||
* move() will always return the final index.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param rep Replaceable to iterate over
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
|
||||
|
||||
#endif
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
||||
307
thirdparty/icu4c/common/unicode/uldnames.h
vendored
Normal file
307
thirdparty/icu4c/common/unicode/uldnames.h
vendored
Normal file
@@ -0,0 +1,307 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2016, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef __ULDNAMES_H__
|
||||
#define __ULDNAMES_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Provides display names of Locale ids and their components.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/udisplaycontext.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* Enum used in LocaleDisplayNames::createInstance.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Use standard names when generating a locale name,
|
||||
* e.g. en_GB displays as 'English (United Kingdom)'.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
ULDN_STANDARD_NAMES = 0,
|
||||
/**
|
||||
* Use dialect names, when generating a locale name,
|
||||
* e.g. en_GB displays as 'British English'.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
ULDN_DIALECT_NAMES
|
||||
} UDialectHandling;
|
||||
|
||||
/**
|
||||
* Opaque C service object type for the locale display names API
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
struct ULocaleDisplayNames;
|
||||
|
||||
/**
|
||||
* C typedef for struct ULocaleDisplayNames.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
typedef struct ULocaleDisplayNames ULocaleDisplayNames;
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
/**
|
||||
* Returns an instance of LocaleDisplayNames that returns names
|
||||
* formatted for the provided locale, using the provided
|
||||
* dialectHandling. The usual value for dialectHandling is
|
||||
* ULOC_STANDARD_NAMES.
|
||||
*
|
||||
* @param locale the display locale
|
||||
* @param dialectHandling how to select names for locales
|
||||
* @return a ULocaleDisplayNames instance
|
||||
* @param pErrorCode the status code
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI ULocaleDisplayNames * U_EXPORT2
|
||||
uldn_open(const char * locale,
|
||||
UDialectHandling dialectHandling,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Closes a ULocaleDisplayNames instance obtained from uldn_open().
|
||||
* @param ldn the ULocaleDisplayNames instance to be closed
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uldn_close(ULocaleDisplayNames *ldn);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalULocaleDisplayNamesPointer
|
||||
* "Smart pointer" class, closes a ULocaleDisplayNames via uldn_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalULocaleDisplayNamesPointer, ULocaleDisplayNames, uldn_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/* getters for state */
|
||||
|
||||
/**
|
||||
* Returns the locale used to determine the display names. This is
|
||||
* not necessarily the same locale passed to {@link #uldn_open}.
|
||||
* @param ldn the LocaleDisplayNames instance
|
||||
* @return the display locale
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2
|
||||
uldn_getLocale(const ULocaleDisplayNames *ldn);
|
||||
|
||||
/**
|
||||
* Returns the dialect handling used in the display names.
|
||||
* @param ldn the LocaleDisplayNames instance
|
||||
* @return the dialect handling enum
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI UDialectHandling U_EXPORT2
|
||||
uldn_getDialectHandling(const ULocaleDisplayNames *ldn);
|
||||
|
||||
/* names for entire locales */
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided locale.
|
||||
* @param ldn the LocaleDisplayNames instance
|
||||
* @param locale the locale whose display name to return
|
||||
* @param result receives the display name
|
||||
* @param maxResultSize the size of the result buffer
|
||||
* @param pErrorCode the status code
|
||||
* @return the actual buffer size needed for the display name. If it's
|
||||
* greater than maxResultSize, the returned name will be truncated.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uldn_localeDisplayName(const ULocaleDisplayNames *ldn,
|
||||
const char *locale,
|
||||
UChar *result,
|
||||
int32_t maxResultSize,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/* names for components of a locale */
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided language code.
|
||||
* @param ldn the LocaleDisplayNames instance
|
||||
* @param lang the language code whose display name to return
|
||||
* @param result receives the display name
|
||||
* @param maxResultSize the size of the result buffer
|
||||
* @param pErrorCode the status code
|
||||
* @return the actual buffer size needed for the display name. If it's
|
||||
* greater than maxResultSize, the returned name will be truncated.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uldn_languageDisplayName(const ULocaleDisplayNames *ldn,
|
||||
const char *lang,
|
||||
UChar *result,
|
||||
int32_t maxResultSize,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided script.
|
||||
* @param ldn the LocaleDisplayNames instance
|
||||
* @param script the script whose display name to return
|
||||
* @param result receives the display name
|
||||
* @param maxResultSize the size of the result buffer
|
||||
* @param pErrorCode the status code
|
||||
* @return the actual buffer size needed for the display name. If it's
|
||||
* greater than maxResultSize, the returned name will be truncated.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uldn_scriptDisplayName(const ULocaleDisplayNames *ldn,
|
||||
const char *script,
|
||||
UChar *result,
|
||||
int32_t maxResultSize,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided script code.
|
||||
* @param ldn the LocaleDisplayNames instance
|
||||
* @param scriptCode the script code whose display name to return
|
||||
* @param result receives the display name
|
||||
* @param maxResultSize the size of the result buffer
|
||||
* @param pErrorCode the status code
|
||||
* @return the actual buffer size needed for the display name. If it's
|
||||
* greater than maxResultSize, the returned name will be truncated.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn,
|
||||
UScriptCode scriptCode,
|
||||
UChar *result,
|
||||
int32_t maxResultSize,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided region code.
|
||||
* @param ldn the LocaleDisplayNames instance
|
||||
* @param region the region code whose display name to return
|
||||
* @param result receives the display name
|
||||
* @param maxResultSize the size of the result buffer
|
||||
* @param pErrorCode the status code
|
||||
* @return the actual buffer size needed for the display name. If it's
|
||||
* greater than maxResultSize, the returned name will be truncated.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uldn_regionDisplayName(const ULocaleDisplayNames *ldn,
|
||||
const char *region,
|
||||
UChar *result,
|
||||
int32_t maxResultSize,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided variant
|
||||
* @param ldn the LocaleDisplayNames instance
|
||||
* @param variant the variant whose display name to return
|
||||
* @param result receives the display name
|
||||
* @param maxResultSize the size of the result buffer
|
||||
* @param pErrorCode the status code
|
||||
* @return the actual buffer size needed for the display name. If it's
|
||||
* greater than maxResultSize, the returned name will be truncated.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uldn_variantDisplayName(const ULocaleDisplayNames *ldn,
|
||||
const char *variant,
|
||||
UChar *result,
|
||||
int32_t maxResultSize,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided locale key
|
||||
* @param ldn the LocaleDisplayNames instance
|
||||
* @param key the locale key whose display name to return
|
||||
* @param result receives the display name
|
||||
* @param maxResultSize the size of the result buffer
|
||||
* @param pErrorCode the status code
|
||||
* @return the actual buffer size needed for the display name. If it's
|
||||
* greater than maxResultSize, the returned name will be truncated.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uldn_keyDisplayName(const ULocaleDisplayNames *ldn,
|
||||
const char *key,
|
||||
UChar *result,
|
||||
int32_t maxResultSize,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns the display name of the provided value (used with the provided key).
|
||||
* @param ldn the LocaleDisplayNames instance
|
||||
* @param key the locale key
|
||||
* @param value the locale key's value
|
||||
* @param result receives the display name
|
||||
* @param maxResultSize the size of the result buffer
|
||||
* @param pErrorCode the status code
|
||||
* @return the actual buffer size needed for the display name. If it's
|
||||
* greater than maxResultSize, the returned name will be truncated.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn,
|
||||
const char *key,
|
||||
const char *value,
|
||||
UChar *result,
|
||||
int32_t maxResultSize,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns an instance of LocaleDisplayNames that returns names formatted
|
||||
* for the provided locale, using the provided UDisplayContext settings.
|
||||
*
|
||||
* @param locale The display locale
|
||||
* @param contexts List of one or more context settings (e.g. for dialect
|
||||
* handling, capitalization, etc.
|
||||
* @param length Number of items in the contexts list
|
||||
* @param pErrorCode Pointer to UErrorCode input/output status. If at entry this indicates
|
||||
* a failure status, the function will do nothing; otherwise this will be
|
||||
* updated with any new status from the function.
|
||||
* @return a ULocaleDisplayNames instance
|
||||
* @stable ICU 51
|
||||
*/
|
||||
U_CAPI ULocaleDisplayNames * U_EXPORT2
|
||||
uldn_openForContext(const char * locale, UDisplayContext *contexts,
|
||||
int32_t length, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns the UDisplayContext value for the specified UDisplayContextType.
|
||||
* @param ldn the ULocaleDisplayNames instance
|
||||
* @param type the UDisplayContextType whose value to return
|
||||
* @param pErrorCode Pointer to UErrorCode input/output status. If at entry this indicates
|
||||
* a failure status, the function will do nothing; otherwise this will be
|
||||
* updated with any new status from the function.
|
||||
* @return the UDisplayContextValue for the specified type.
|
||||
* @stable ICU 51
|
||||
*/
|
||||
U_CAPI UDisplayContext U_EXPORT2
|
||||
uldn_getContext(const ULocaleDisplayNames *ldn, UDisplayContextType type,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif /* !UCONFIG_NO_FORMATTING */
|
||||
#endif /* __ULDNAMES_H__ */
|
||||
1410
thirdparty/icu4c/common/unicode/uloc.h
vendored
Normal file
1410
thirdparty/icu4c/common/unicode/uloc.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
226
thirdparty/icu4c/common/unicode/ulocale.h
vendored
Normal file
226
thirdparty/icu4c/common/unicode/ulocale.h
vendored
Normal file
@@ -0,0 +1,226 @@
|
||||
// © 2023 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#ifndef ULOCALE_H
|
||||
#define ULOCALE_H
|
||||
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/uenum.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Locale ID functionality similar to C++ class Locale
|
||||
*/
|
||||
|
||||
/**
|
||||
* Opaque C service object type for the locale API
|
||||
* @stable ICU 74
|
||||
*/
|
||||
struct ULocale;
|
||||
|
||||
/**
|
||||
* C typedef for struct ULocale.
|
||||
* @stable ICU 74
|
||||
*/
|
||||
typedef struct ULocale ULocale;
|
||||
|
||||
/**
|
||||
* Constructs an ULocale from the locale ID.
|
||||
* The created ULocale should be destroyed by calling
|
||||
* ulocale_close();
|
||||
* @param localeID the locale, a const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the locale; if negative, then the locale need to be
|
||||
* null terminated.
|
||||
* @param err the error code
|
||||
* @return the locale.
|
||||
*
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI ULocale* U_EXPORT2
|
||||
ulocale_openForLocaleID(const char* localeID, int32_t length, UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Constructs an ULocale from the provided IETF BCP 47 language tag.
|
||||
* The created ULocale should be destroyed by calling
|
||||
* ulocale_close();
|
||||
* @param tag the language tag, defined as IETF BCP 47 language tag, const
|
||||
* char* pointer (need not be terminated when the length is non-negative)
|
||||
* @param length the length of the tag; if negative, then the tag need to be
|
||||
* null terminated.
|
||||
* @param err the error code
|
||||
* @return the locale.
|
||||
*
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI ULocale* U_EXPORT2
|
||||
ulocale_openForLanguageTag(const char* tag, int32_t length, UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Close the locale and destroy it's internal states.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocale_close(ULocale* locale);
|
||||
|
||||
/**
|
||||
* Returns the locale's ISO-639 language code.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return the language code of the locale.
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ulocale_getLanguage(const ULocale* locale);
|
||||
|
||||
/**
|
||||
* Returns the locale's ISO-15924 abbreviation script code.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return A pointer to the script.
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ulocale_getScript(const ULocale* locale);
|
||||
|
||||
/**
|
||||
* Returns the locale's ISO-3166 region code.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return A pointer to the region.
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ulocale_getRegion(const ULocale* locale);
|
||||
|
||||
/**
|
||||
* Returns the locale's variant code.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return A pointer to the variant.
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ulocale_getVariant(const ULocale* locale);
|
||||
|
||||
/**
|
||||
* Returns the programmatic name of the entire locale, with the language,
|
||||
* country and variant separated by underbars. If a field is missing, up
|
||||
* to two leading underbars will occur. Example: "en", "de_DE", "en_US_WIN",
|
||||
* "de__POSIX", "fr__MAC", "__MAC", "_MT", "_FR_EURO"
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return A pointer to "name".
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ulocale_getLocaleID(const ULocale* locale);
|
||||
|
||||
/**
|
||||
* Returns the programmatic name of the entire locale as ulocale_getLocaleID()
|
||||
* would return, but without keywords.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return A pointer to "base name".
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ulocale_getBaseName(const ULocale* locale);
|
||||
|
||||
/**
|
||||
* Gets the bogus state. Locale object can be bogus if it doesn't exist
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return false if it is a real locale, true if it is a bogus locale
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI bool U_EXPORT2
|
||||
ulocale_isBogus(const ULocale* locale);
|
||||
|
||||
/**
|
||||
* Gets the list of keywords for the specified locale.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @param err the error code
|
||||
* @return pointer to UEnumeration, or nullptr if there are no keywords.
|
||||
* Client must call uenum_close() to dispose the returned value.
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
ulocale_getKeywords(const ULocale* locale, UErrorCode *err);
|
||||
|
||||
/**
|
||||
* Gets the list of unicode keywords for the specified locale.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @param err the error code
|
||||
* @return pointer to UEnumeration, or nullptr if there are no keywords.
|
||||
* Client must call uenum_close() to dispose the returned value.
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
ulocale_getUnicodeKeywords(const ULocale* locale, UErrorCode *err);
|
||||
|
||||
/**
|
||||
* Gets the value for a keyword.
|
||||
*
|
||||
* This uses legacy keyword=value pairs, like "collation=phonebook".
|
||||
*
|
||||
* @param locale the locale
|
||||
* @param keyword the keyword, a const char * pointer (need not be
|
||||
* terminated when the length is non-negative)
|
||||
* @param keywordLength the length of the keyword; if negative, then the
|
||||
* keyword need to be null terminated.
|
||||
* @param valueBuffer The buffer to receive the value.
|
||||
* @param valueBufferCapacity The capacity of receiving valueBuffer.
|
||||
* @param err the error code
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ulocale_getKeywordValue(
|
||||
const ULocale* locale, const char* keyword, int32_t keywordLength,
|
||||
char* valueBuffer, int32_t valueBufferCapacity, UErrorCode *err);
|
||||
|
||||
/**
|
||||
* Gets the Unicode value for a Unicode keyword.
|
||||
*
|
||||
* This uses Unicode key-value pairs, like "co-phonebk".
|
||||
*
|
||||
* @param locale the locale
|
||||
* @param keyword the Unicode keyword, a const char * pointer (need not be
|
||||
* terminated when the length is non-negative)
|
||||
* @param keywordLength the length of the Unicode keyword; if negative,
|
||||
* then the keyword need to be null terminated.
|
||||
* @param valueBuffer The buffer to receive the Unicode value.
|
||||
* @param valueBufferCapacity The capacity of receiving valueBuffer.
|
||||
* @param err the error code
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ulocale_getUnicodeKeywordValue(
|
||||
const ULocale* locale, const char* keyword, int32_t keywordLength,
|
||||
char* valueBuffer, int32_t valueBufferCapacity, UErrorCode *err);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalULocalePointer
|
||||
* "Smart pointer" class, closes a ULocale via ulocale_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalULocalePointer, ULocale, ulocale_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif /*_ULOCALE */
|
||||
437
thirdparty/icu4c/common/unicode/ulocbuilder.h
vendored
Normal file
437
thirdparty/icu4c/common/unicode/ulocbuilder.h
vendored
Normal file
@@ -0,0 +1,437 @@
|
||||
// © 2023 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
#ifndef __ULOCBUILDER_H__
|
||||
#define __ULOCBUILDER_H__
|
||||
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/ulocale.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Builder API for Locale
|
||||
*/
|
||||
|
||||
/**
|
||||
* Opaque C service object type for the locale builder API
|
||||
* @stable ICU 74
|
||||
*/
|
||||
struct ULocaleBuilder;
|
||||
|
||||
/**
|
||||
* C typedef for struct ULocaleBuilder.
|
||||
* @stable ICU 74
|
||||
*/
|
||||
typedef struct ULocaleBuilder ULocaleBuilder;
|
||||
|
||||
/**
|
||||
* <code>ULocaleBuilder</code> is used to build valid <code>locale</code> id
|
||||
* string or IETF BCP 47 language tag from values configured by the setters.
|
||||
* The <code>ULocaleBuilder</code> checks if a value configured by a
|
||||
* setter satisfies the syntax requirements defined by the <code>Locale</code>
|
||||
* class. A string of Locale created by a <code>ULocaleBuilder</code> is
|
||||
* well-formed and can be transformed to a well-formed IETF BCP 47 language tag
|
||||
* without losing information.
|
||||
*
|
||||
* <p>The following example shows how to create a <code>locale</code> string
|
||||
* with the <code>ULocaleBuilder</code>.
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* UErrorCode err = U_ZERO_ERROR;
|
||||
* char buffer[ULOC_FULLNAME_CAPACITY];
|
||||
* ULocaleBuilder* builder = ulocbld_open();
|
||||
* ulocbld_setLanguage(builder, "sr", -1);
|
||||
* ulocbld_setScript(builder, "Latn", -1);
|
||||
* ulocbld_setRegion(builder, "RS", -1);
|
||||
* int32_t length = ulocbld_buildLocaleID(
|
||||
* builder, buffer, ULOC_FULLNAME_CAPACITY, &error);
|
||||
* ulocbld_close(builder);
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p>ULocaleBuilders can be reused; <code>ulocbld_clear()</code> resets all
|
||||
* fields to their default values.
|
||||
*
|
||||
* <p>ULocaleBuilder tracks errors in an internal UErrorCode. For all setters,
|
||||
* except ulocbld_setLanguageTag and ulocbld_setLocale, ULocaleBuilder will return immediately
|
||||
* if the internal UErrorCode is in error state.
|
||||
* To reset internal state and error code, call clear method.
|
||||
* The ulocbld_setLanguageTag and setLocale method will first clear the internal
|
||||
* UErrorCode, then track the error of the validation of the input parameter
|
||||
* into the internal UErrorCode.
|
||||
*
|
||||
* @stable ICU 74
|
||||
*/
|
||||
|
||||
/**
|
||||
* Constructs an empty ULocaleBuilder. The default value of all
|
||||
* fields, extensions, and private use information is the
|
||||
* empty string. The created builder should be destroyed by calling
|
||||
* ulocbld_close();
|
||||
*
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI ULocaleBuilder* U_EXPORT2
|
||||
ulocbld_open(void);
|
||||
|
||||
/**
|
||||
* Close the builder and destroy it's internal states.
|
||||
* @param builder the builder
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_close(ULocaleBuilder* builder);
|
||||
|
||||
/**
|
||||
* Resets the <code>ULocaleBuilder</code> to match the provided
|
||||
* <code>locale</code>. Existing state is discarded.
|
||||
*
|
||||
* <p>All fields of the locale must be well-formed.
|
||||
* <p>This method clears the internal UErrorCode.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param locale the locale, a const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the locale; if negative, then the locale need to be
|
||||
* null terminated,
|
||||
*
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setLocale(ULocaleBuilder* builder, const char* locale, int32_t length);
|
||||
|
||||
/**
|
||||
* Resets the <code>ULocaleBuilder</code> to match the provided
|
||||
* <code>ULocale</code>. Existing state is discarded.
|
||||
*
|
||||
* <p>The locale must be not bogus.
|
||||
* <p>This method clears the internal UErrorCode.
|
||||
*
|
||||
* @param builder the builder.
|
||||
* @param locale the locale, a ULocale* pointer. The builder adopts the locale
|
||||
* after the call and the client must not delete it.
|
||||
*
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_adoptULocale(ULocaleBuilder* builder, ULocale* locale);
|
||||
|
||||
/**
|
||||
* Resets the ULocaleBuilder to match the provided IETF BCP 47 language tag.
|
||||
* Discards the existing state.
|
||||
* The empty string causes the builder to be reset, like {@link #ulocbld_clear}.
|
||||
* Legacy language tags (marked as “Type: grandfathered” in BCP 47)
|
||||
* are converted to their canonical form before being processed.
|
||||
* Otherwise, the <code>language tag</code> must be well-formed,
|
||||
* or else the ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() methods
|
||||
* will later report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>This method clears the internal UErrorCode.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param tag the language tag, defined as IETF BCP 47 language tag, a
|
||||
* const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the tag; if negative, then the tag need to be
|
||||
* null terminated,
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setLanguageTag(ULocaleBuilder* builder, const char* tag, int32_t length);
|
||||
|
||||
/**
|
||||
* Sets the language. If <code>language</code> is the empty string, the
|
||||
* language in this <code>ULocaleBuilder</code> is removed. Otherwise, the
|
||||
* <code>language</code> must be well-formed, or else the ulocbld_buildLocaleID()
|
||||
* and ulocbld_buildLanguageTag() methods will
|
||||
* later report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>The syntax of language value is defined as
|
||||
* [unicode_language_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag).
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param language the language, a const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the language; if negative, then the language need to be
|
||||
* null terminated,
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setLanguage(ULocaleBuilder* builder, const char* language, int32_t length);
|
||||
|
||||
/**
|
||||
* Sets the script. If <code>script</code> is the empty string, the script in
|
||||
* this <code>ULocaleBuilder</code> is removed.
|
||||
* Otherwise, the <code>script</code> must be well-formed, or else the
|
||||
* ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() methods will later
|
||||
* report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>The script value is a four-letter script code as
|
||||
* [unicode_script_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag)
|
||||
* defined by ISO 15924
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param script the script, a const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the script; if negative, then the script need to be
|
||||
* null terminated,
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setScript(ULocaleBuilder* builder, const char* script, int32_t length);
|
||||
|
||||
/**
|
||||
* Sets the region. If region is the empty string, the region in this
|
||||
* <code>ULocaleBuilder</code> is removed. Otherwise, the <code>region</code>
|
||||
* must be well-formed, or else the ulocbld_buildLocaleID() and
|
||||
* ulocbld_buildLanguageTag() methods will later report an
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>The region value is defined by
|
||||
* [unicode_region_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag)
|
||||
* as a two-letter ISO 3166 code or a three-digit UN M.49 area code.
|
||||
*
|
||||
* <p>The region value in the <code>Locale</code> created by the
|
||||
* <code>ULocaleBuilder</code> is always normalized to upper case.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param region the region, a const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the region; if negative, then the region need to be
|
||||
* null terminated,
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setRegion(ULocaleBuilder* builder, const char* region, int32_t length);
|
||||
|
||||
/**
|
||||
* Sets the variant. If variant is the empty string, the variant in this
|
||||
* <code>ULocaleBuilder</code> is removed. Otherwise, the <code>variant</code>
|
||||
* must be well-formed, or else the ulocbld_buildLocaleID() and
|
||||
* ulocbld_buildLanguageTag() methods will later report an
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p><b>Note:</b> This method checks if <code>variant</code>
|
||||
* satisfies the
|
||||
* [unicode_variant_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag)
|
||||
* syntax requirements, and normalizes the value to lowercase letters. However,
|
||||
* the <code>Locale</code> class does not impose any syntactic
|
||||
* restriction on variant. To set an ill-formed variant, use a Locale constructor.
|
||||
* If there are multiple unicode_variant_subtag, the caller must concatenate
|
||||
* them with '-' as separator (ex: "foobar-fibar").
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param variant the variant, a const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the variant; if negative, then the variant need to be
|
||||
* null terminated,
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setVariant(ULocaleBuilder* builder, const char* variant, int32_t length);
|
||||
|
||||
/**
|
||||
* Sets the extension for the given key. If the value is the empty string,
|
||||
* the extension is removed. Otherwise, the <code>key</code> and
|
||||
* <code>value</code> must be well-formed, or else the ulocbld_buildLocaleID()
|
||||
* and ulocbld_buildLanguageTag() methods will
|
||||
* later report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p><b>Note:</b> The key ('u') is used for the Unicode locale extension.
|
||||
* Setting a value for this key replaces any existing Unicode locale key/type
|
||||
* pairs with those defined in the extension.
|
||||
*
|
||||
* <p><b>Note:</b> The key ('x') is used for the private use code. To be
|
||||
* well-formed, the value for this key needs only to have subtags of one to
|
||||
* eight alphanumeric characters, not two to eight as in the general case.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param key the extension key
|
||||
* @param value the value, a const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the value; if negative, then the value need to be
|
||||
* null terminated,
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setExtension(ULocaleBuilder* builder, char key, const char* value, int32_t length);
|
||||
|
||||
/**
|
||||
* Sets the Unicode locale keyword type for the given key. If the type
|
||||
* StringPiece is constructed with a nullptr, the keyword is removed.
|
||||
* If the type is the empty string, the keyword is set without type subtags.
|
||||
* Otherwise, the key and type must be well-formed, or else the
|
||||
* ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() methods will later
|
||||
* report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>Keys and types are converted to lower case.
|
||||
*
|
||||
* <p><b>Note</b>:Setting the 'u' extension via {@link #ulocbld_setExtension}
|
||||
* replaces all Unicode locale keywords with those defined in the
|
||||
* extension.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param key the Unicode locale key, a const char * pointer (need not be
|
||||
* terminated when the length is non-negative)
|
||||
* @param keyLength the length of the key; if negative, then the key need to be
|
||||
* null terminated,
|
||||
* @param type the Unicode locale type, a const char * pointer (need not be
|
||||
* terminated when the length is non-negative)
|
||||
* @param typeLength the length of the type; if negative, then the type need to
|
||||
* be null terminated,
|
||||
* @return This builder.
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setUnicodeLocaleKeyword(ULocaleBuilder* builder,
|
||||
const char* key, int32_t keyLength, const char* type, int32_t typeLength);
|
||||
|
||||
/**
|
||||
* Adds a unicode locale attribute, if not already present, otherwise
|
||||
* has no effect. The attribute must not be empty string and must be
|
||||
* well-formed or U_ILLEGAL_ARGUMENT_ERROR will be set to status
|
||||
* during the ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() calls.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param attribute the attribute, a const char * pointer (need not be
|
||||
* terminated when the length is non-negative)
|
||||
* @param length the length of the attribute; if negative, then the attribute
|
||||
* need to be null terminated,
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_addUnicodeLocaleAttribute(
|
||||
ULocaleBuilder* builder, const char* attribute, int32_t length);
|
||||
|
||||
/**
|
||||
* Removes a unicode locale attribute, if present, otherwise has no
|
||||
* effect. The attribute must not be empty string and must be well-formed
|
||||
* or U_ILLEGAL_ARGUMENT_ERROR will be set to status during the ulocbld_buildLocaleID()
|
||||
* and ulocbld_buildLanguageTag() calls.
|
||||
*
|
||||
* <p>Attribute comparison for removal is case-insensitive.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param attribute the attribute, a const char * pointer (need not be
|
||||
* terminated when the length is non-negative)
|
||||
* @param length the length of the attribute; if negative, then the attribute
|
||||
* need to be null terminated,
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_removeUnicodeLocaleAttribute(
|
||||
ULocaleBuilder* builder, const char* attribute, int32_t length);
|
||||
|
||||
/**
|
||||
* Resets the builder to its initial, empty state.
|
||||
* <p>This method clears the internal UErrorCode.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_clear(ULocaleBuilder* builder);
|
||||
|
||||
/**
|
||||
* Resets the extensions to their initial, empty state.
|
||||
* Language, script, region and variant are unchanged.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_clearExtensions(ULocaleBuilder* builder);
|
||||
|
||||
/**
|
||||
* Build the LocaleID string from the fields set on this builder.
|
||||
* If any set methods or during the ulocbld_buildLocaleID() call require memory
|
||||
* allocation but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
|
||||
* If any of the fields set by the setters are not well-formed, the status
|
||||
* will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
|
||||
* not change after the ulocbld_buildLocaleID() call and the caller is
|
||||
* free to keep using the same builder to build more locales.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param locale the locale id
|
||||
* @param localeCapacity the size of the locale buffer to store the locale id
|
||||
* @param err the error code
|
||||
* @return the length of the locale id in buffer
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ulocbld_buildLocaleID(ULocaleBuilder* builder, char* locale,
|
||||
int32_t localeCapacity, UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Build the ULocale object from the fields set on this builder.
|
||||
* If any set methods or during the ulocbld_buildULocale() call require memory
|
||||
* allocation but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
|
||||
* If any of the fields set by the setters are not well-formed, the status
|
||||
* will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
|
||||
* not change after the ulocbld_buildULocale() call and the caller is
|
||||
* free to keep using the same builder to build more locales.
|
||||
*
|
||||
* @param builder the builder.
|
||||
* @param err the error code.
|
||||
* @return the locale, a ULocale* pointer. The created ULocale must be
|
||||
* destroyed by calling {@link ulocale_close}.
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI ULocale* U_EXPORT2
|
||||
ulocbld_buildULocale(ULocaleBuilder* builder, UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Build the IETF BCP 47 language tag string from the fields set on this builder.
|
||||
* If any set methods or during the ulocbld_buildLanguageTag() call require memory
|
||||
* allocation but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
|
||||
* If any of the fields set by the setters are not well-formed, the status
|
||||
* will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
|
||||
* not change after the ulocbld_buildLanguageTag() call and the caller is free
|
||||
* to keep using the same builder to build more locales.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param language the language tag
|
||||
* @param languageCapacity the size of the language buffer to store the language
|
||||
* tag
|
||||
* @param err the error code
|
||||
* @return the length of the language tag in buffer
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ulocbld_buildLanguageTag(ULocaleBuilder* builder, char* language,
|
||||
int32_t languageCapacity, UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Sets the UErrorCode if an error occurred while recording sets.
|
||||
* Preserves older error codes in the outErrorCode.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param outErrorCode Set to an error code that occurred while setting subtags.
|
||||
* Unchanged if there is no such error or if outErrorCode
|
||||
* already contained an error.
|
||||
* @return true if U_FAILURE(*outErrorCode)
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ulocbld_copyErrorTo(const ULocaleBuilder* builder, UErrorCode *outErrorCode);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalULocaleBuilderPointer
|
||||
* "Smart pointer" class, closes a ULocaleBuilder via ulocbld_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalULocaleBuilderPointer, ULocaleBuilder, ulocbld_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __ULOCBUILDER_H__
|
||||
451
thirdparty/icu4c/common/unicode/umachine.h
vendored
Normal file
451
thirdparty/icu4c/common/unicode/umachine.h
vendored
Normal file
@@ -0,0 +1,451 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: umachine.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep13
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* This file defines basic types and constants for ICU to be
|
||||
* platform-independent. umachine.h and utf.h are included into
|
||||
* utypes.h to provide all the general definitions for ICU.
|
||||
* All of these definitions used to be in utypes.h before
|
||||
* the UTF-handling macros made this unmaintainable.
|
||||
*/
|
||||
|
||||
#ifndef __UMACHINE_H__
|
||||
#define __UMACHINE_H__
|
||||
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Basic types and constants for UTF
|
||||
*
|
||||
* <h2> Basic types and constants for UTF </h2>
|
||||
* This file defines basic types and constants for utf.h to be
|
||||
* platform-independent. umachine.h and utf.h are included into
|
||||
* utypes.h to provide all the general definitions for ICU.
|
||||
* All of these definitions used to be in utypes.h before
|
||||
* the UTF-handling macros made this unmaintainable.
|
||||
*
|
||||
*/
|
||||
/*==========================================================================*/
|
||||
/* Include platform-dependent definitions */
|
||||
/* which are contained in the platform-specific file platform.h */
|
||||
/*==========================================================================*/
|
||||
|
||||
#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
|
||||
|
||||
/*
|
||||
* ANSI C headers:
|
||||
* stddef.h defines wchar_t
|
||||
*/
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/*==========================================================================*/
|
||||
/* For C wrappers, we use the symbol U_CAPI. */
|
||||
/* This works properly if the includer is C or C++. */
|
||||
/* Functions are declared U_CAPI return-type U_EXPORT2 function-name()... */
|
||||
/*==========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_CFUNC
|
||||
* This is used in a declaration of a library private ICU C function.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_CDECL_BEGIN
|
||||
* This is used to begin a declaration of a library private ICU C API.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_CDECL_END
|
||||
* This is used to end a declaration of a library private ICU C API
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
# define U_CFUNC extern "C"
|
||||
# define U_CDECL_BEGIN extern "C" {
|
||||
# define U_CDECL_END }
|
||||
#else
|
||||
# define U_CFUNC extern
|
||||
# define U_CDECL_BEGIN
|
||||
# define U_CDECL_END
|
||||
#endif
|
||||
|
||||
#ifndef U_ATTRIBUTE_DEPRECATED
|
||||
/**
|
||||
* \def U_ATTRIBUTE_DEPRECATED
|
||||
* This is used for GCC specific attributes
|
||||
* @internal
|
||||
*/
|
||||
#if U_GCC_MAJOR_MINOR >= 302
|
||||
# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
|
||||
/**
|
||||
* \def U_ATTRIBUTE_DEPRECATED
|
||||
* This is used for Visual C++ specific attributes
|
||||
* @internal
|
||||
*/
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
|
||||
# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
|
||||
#else
|
||||
# define U_ATTRIBUTE_DEPRECATED
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
|
||||
#define U_CAPI U_CFUNC U_EXPORT
|
||||
/** Obsolete/same as U_CAPI; was used to declare a function as a stable public ICU C API*/
|
||||
#define U_STABLE U_CAPI
|
||||
/** Obsolete/same as U_CAPI; was used to declare a function as a draft public ICU C API */
|
||||
#define U_DRAFT U_CAPI
|
||||
/** This is used to declare a function as a deprecated public ICU C API */
|
||||
#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
|
||||
/** Obsolete/same as U_CAPI; was used to declare a function as an obsolete public ICU C API */
|
||||
#define U_OBSOLETE U_CAPI
|
||||
/** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API */
|
||||
#define U_INTERNAL U_CAPI
|
||||
|
||||
// Before ICU 65, function-like, multi-statement ICU macros were just defined as
|
||||
// series of statements wrapped in { } blocks and the caller could choose to
|
||||
// either treat them as if they were actual functions and end the invocation
|
||||
// with a trailing ; creating an empty statement after the block or else omit
|
||||
// this trailing ; using the knowledge that the macro would expand to { }.
|
||||
//
|
||||
// But doing so doesn't work well with macros that look like functions and
|
||||
// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
|
||||
// switches to the standard solution of wrapping such macros in do { } while.
|
||||
//
|
||||
// This will however break existing code that depends on being able to invoke
|
||||
// these macros without a trailing ; so to be able to remain compatible with
|
||||
// such code the wrapper is itself defined as macros so that it's possible to
|
||||
// build ICU 65 and later with the old macro behaviour, like this:
|
||||
//
|
||||
// export CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
|
||||
// runConfigureICU ...
|
||||
//
|
||||
|
||||
/**
|
||||
* \def UPRV_BLOCK_MACRO_BEGIN
|
||||
* Defined as the "do" keyword by default.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UPRV_BLOCK_MACRO_BEGIN
|
||||
#define UPRV_BLOCK_MACRO_BEGIN do
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UPRV_BLOCK_MACRO_END
|
||||
* Defined as "while (false)" by default.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UPRV_BLOCK_MACRO_END
|
||||
#define UPRV_BLOCK_MACRO_END while (false)
|
||||
#endif
|
||||
|
||||
/*==========================================================================*/
|
||||
/* limits for int32_t etc., like in POSIX inttypes.h */
|
||||
/*==========================================================================*/
|
||||
|
||||
#ifndef INT8_MIN
|
||||
/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT8_MIN ((int8_t)(-128))
|
||||
#endif
|
||||
#ifndef INT16_MIN
|
||||
/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT16_MIN ((int16_t)(-32767-1))
|
||||
#endif
|
||||
#ifndef INT32_MIN
|
||||
/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT32_MIN ((int32_t)(-2147483647-1))
|
||||
#endif
|
||||
|
||||
#ifndef INT8_MAX
|
||||
/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT8_MAX ((int8_t)(127))
|
||||
#endif
|
||||
#ifndef INT16_MAX
|
||||
/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT16_MAX ((int16_t)(32767))
|
||||
#endif
|
||||
#ifndef INT32_MAX
|
||||
/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT32_MAX ((int32_t)(2147483647))
|
||||
#endif
|
||||
|
||||
#ifndef UINT8_MAX
|
||||
/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
|
||||
# define UINT8_MAX ((uint8_t)(255U))
|
||||
#endif
|
||||
#ifndef UINT16_MAX
|
||||
/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
|
||||
# define UINT16_MAX ((uint16_t)(65535U))
|
||||
#endif
|
||||
#ifndef UINT32_MAX
|
||||
/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
|
||||
# define UINT32_MAX ((uint32_t)(4294967295U))
|
||||
#endif
|
||||
|
||||
#if defined(U_INT64_T_UNAVAILABLE)
|
||||
# error int64_t is required for decimal format and rule-based number format.
|
||||
#else
|
||||
# ifndef INT64_C
|
||||
/**
|
||||
* Provides a platform independent way to specify a signed 64-bit integer constant.
|
||||
* note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
# define INT64_C(c) c ## LL
|
||||
# endif
|
||||
# ifndef UINT64_C
|
||||
/**
|
||||
* Provides a platform independent way to specify an unsigned 64-bit integer constant.
|
||||
* note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
# define UINT64_C(c) c ## ULL
|
||||
# endif
|
||||
# ifndef U_INT64_MIN
|
||||
/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
|
||||
# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
|
||||
# endif
|
||||
# ifndef U_INT64_MAX
|
||||
/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
|
||||
# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
|
||||
# endif
|
||||
# ifndef U_UINT64_MAX
|
||||
/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
|
||||
# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Boolean data type */
|
||||
/*==========================================================================*/
|
||||
|
||||
/**
|
||||
* The ICU boolean type, a signed-byte integer.
|
||||
* ICU-specific for historical reasons: The C and C++ standards used to not define type bool.
|
||||
* Also provides a fixed type definition, as opposed to
|
||||
* type bool whose details (e.g., sizeof) may vary by compiler and between C and C++.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef int8_t UBool;
|
||||
|
||||
/**
|
||||
* \def U_DEFINE_FALSE_AND_TRUE
|
||||
* Normally turns off defining macros FALSE=0 & TRUE=1 in public ICU headers.
|
||||
* These obsolete macros sometimes break compilation of other code that
|
||||
* defines enum constants or similar with these names.
|
||||
* C++ has long defined bool/false/true.
|
||||
* C99 also added definitions for these, although as macros; see stdbool.h.
|
||||
*
|
||||
* You may transitionally define U_DEFINE_FALSE_AND_TRUE=1 if you need time to migrate code.
|
||||
*
|
||||
* @internal ICU 68
|
||||
*/
|
||||
#ifdef U_DEFINE_FALSE_AND_TRUE
|
||||
// Use the predefined value.
|
||||
#else
|
||||
// Default to avoiding collision with non-macro definitions of FALSE & TRUE.
|
||||
# define U_DEFINE_FALSE_AND_TRUE 0
|
||||
#endif
|
||||
|
||||
#if U_DEFINE_FALSE_AND_TRUE || defined(U_IN_DOXYGEN)
|
||||
#ifndef TRUE
|
||||
/**
|
||||
* The TRUE value of a UBool.
|
||||
*
|
||||
* @deprecated ICU 68 Use standard "true" instead.
|
||||
*/
|
||||
# define TRUE 1
|
||||
#endif
|
||||
#ifndef FALSE
|
||||
/**
|
||||
* The FALSE value of a UBool.
|
||||
*
|
||||
* @deprecated ICU 68 Use standard "false" instead.
|
||||
*/
|
||||
# define FALSE 0
|
||||
#endif
|
||||
#endif // U_DEFINE_FALSE_AND_TRUE
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Unicode data types */
|
||||
/*==========================================================================*/
|
||||
|
||||
/* wchar_t-related definitions -------------------------------------------- */
|
||||
|
||||
/*
|
||||
* \def U_WCHAR_IS_UTF16
|
||||
* Defined if wchar_t uses UTF-16.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
/*
|
||||
* \def U_WCHAR_IS_UTF32
|
||||
* Defined if wchar_t uses UTF-32.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
|
||||
# ifdef __STDC_ISO_10646__
|
||||
# if (U_SIZEOF_WCHAR_T==2)
|
||||
# define U_WCHAR_IS_UTF16
|
||||
# elif (U_SIZEOF_WCHAR_T==4)
|
||||
# define U_WCHAR_IS_UTF32
|
||||
# endif
|
||||
# elif defined __UCS2__
|
||||
# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
|
||||
# define U_WCHAR_IS_UTF16
|
||||
# endif
|
||||
# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
|
||||
# if (U_SIZEOF_WCHAR_T==4)
|
||||
# define U_WCHAR_IS_UTF32
|
||||
# endif
|
||||
# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
|
||||
# define U_WCHAR_IS_UTF32
|
||||
# elif U_PLATFORM_HAS_WIN32_API
|
||||
# define U_WCHAR_IS_UTF16
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* UChar and UChar32 definitions -------------------------------------------- */
|
||||
|
||||
/** Number of bytes in a UChar (always 2). @stable ICU 2.0 */
|
||||
#define U_SIZEOF_UCHAR 2
|
||||
|
||||
/**
|
||||
* \def U_CHAR16_IS_TYPEDEF
|
||||
* If 1, then char16_t is a typedef and not a real type (yet)
|
||||
* @internal
|
||||
*/
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
|
||||
// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
|
||||
# define U_CHAR16_IS_TYPEDEF 1
|
||||
#else
|
||||
# define U_CHAR16_IS_TYPEDEF 0
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* \var UChar
|
||||
*
|
||||
* The base type for UTF-16 code units and pointers.
|
||||
* Unsigned 16-bit integer.
|
||||
* Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
|
||||
*
|
||||
* UChar is configurable by defining the macro UCHAR_TYPE
|
||||
* on the preprocessor or compiler command line:
|
||||
* -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
|
||||
* (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
|
||||
* This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
|
||||
*
|
||||
* The default is UChar=char16_t.
|
||||
*
|
||||
* C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
|
||||
*
|
||||
* In C, char16_t is a simple typedef of uint_least16_t.
|
||||
* ICU requires uint_least16_t=uint16_t for data memory mapping.
|
||||
* On macOS, char16_t is not available because the uchar.h standard header is missing.
|
||||
*
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
|
||||
#if 1
|
||||
// #if 1 is normal. UChar defaults to char16_t in C++.
|
||||
// For configuration testing of UChar=uint16_t temporarily change this to #if 0.
|
||||
#else
|
||||
# define UCHAR_TYPE uint16_t
|
||||
#endif
|
||||
|
||||
#if defined(U_ALL_IMPLEMENTATION) || !defined(UCHAR_TYPE)
|
||||
typedef char16_t UChar;
|
||||
#else
|
||||
typedef UCHAR_TYPE UChar;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \var OldUChar
|
||||
* Default ICU 58 definition of UChar.
|
||||
* A base type for UTF-16 code units and pointers.
|
||||
* Unsigned 16-bit integer.
|
||||
*
|
||||
* Define OldUChar to be wchar_t if that is 16 bits wide.
|
||||
* If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
|
||||
*
|
||||
* This makes the definition of OldUChar platform-dependent
|
||||
* but allows direct string type compatibility with platforms with
|
||||
* 16-bit wchar_t types.
|
||||
*
|
||||
* This is how UChar was defined in ICU 58, for transition convenience.
|
||||
* Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
|
||||
* The current UChar responds to UCHAR_TYPE but OldUChar does not.
|
||||
*
|
||||
* @stable ICU 59
|
||||
*/
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
typedef wchar_t OldUChar;
|
||||
#elif defined(__CHAR16_TYPE__)
|
||||
typedef __CHAR16_TYPE__ OldUChar;
|
||||
#else
|
||||
typedef uint16_t OldUChar;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Define UChar32 as a type for single Unicode code points.
|
||||
* UChar32 is a signed 32-bit integer (same as int32_t).
|
||||
*
|
||||
* The Unicode code point range is 0..0x10ffff.
|
||||
* All other values (negative or >=0x110000) are illegal as Unicode code points.
|
||||
* They may be used as sentinel values to indicate "done", "error"
|
||||
* or similar non-code point conditions.
|
||||
*
|
||||
* Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
|
||||
* to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
|
||||
* or else to be uint32_t.
|
||||
* That is, the definition of UChar32 was platform-dependent.
|
||||
*
|
||||
* @see U_SENTINEL
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
typedef int32_t UChar32;
|
||||
|
||||
/**
|
||||
* This value is intended for sentinel values for APIs that
|
||||
* (take or) return single code points (UChar32).
|
||||
* It is outside of the Unicode code point range 0..0x10ffff.
|
||||
*
|
||||
* For example, a "done" or "error" value in a new API
|
||||
* could be indicated with U_SENTINEL.
|
||||
*
|
||||
* ICU APIs designed before ICU 2.4 usually define service-specific "done"
|
||||
* values, mostly 0xffff.
|
||||
* Those may need to be distinguished from
|
||||
* actual U+ffff text contents by calling functions like
|
||||
* CharacterIterator::hasNext() or UnicodeString::length().
|
||||
*
|
||||
* @return -1
|
||||
* @see UChar32
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_SENTINEL (-1)
|
||||
|
||||
#include "unicode/urename.h"
|
||||
|
||||
#endif
|
||||
62
thirdparty/icu4c/common/unicode/umisc.h
vendored
Normal file
62
thirdparty/icu4c/common/unicode/umisc.h
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: umisc.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999oct15
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef UMISC_H
|
||||
#define UMISC_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Miscellaneous definitions
|
||||
*
|
||||
* This file contains miscellaneous definitions for the C APIs.
|
||||
*/
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/** A struct representing a range of text containing a specific field
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct UFieldPosition {
|
||||
/**
|
||||
* The field
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t field;
|
||||
/**
|
||||
* The start of the text range containing field
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t beginIndex;
|
||||
/**
|
||||
* The limit of the text range containing field
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t endIndex;
|
||||
} UFieldPosition;
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
/**
|
||||
* Opaque type returned by registerInstance, registerFactory and unregister for service registration.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
typedef const void* URegistryKey;
|
||||
#endif
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
||||
240
thirdparty/icu4c/common/unicode/umutablecptrie.h
vendored
Normal file
240
thirdparty/icu4c/common/unicode/umutablecptrie.h
vendored
Normal file
@@ -0,0 +1,240 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// umutablecptrie.h (split out of ucptrie.h)
|
||||
// created: 2018jan24 Markus W. Scherer
|
||||
|
||||
#ifndef __UMUTABLECPTRIE_H__
|
||||
#define __UMUTABLECPTRIE_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#include "unicode/ucpmap.h"
|
||||
#include "unicode/ucptrie.h"
|
||||
#include "unicode/utf8.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: This file defines a mutable Unicode code point trie.
|
||||
*
|
||||
* @see UCPTrie
|
||||
* @see UMutableCPTrie
|
||||
*/
|
||||
|
||||
/**
|
||||
* Mutable Unicode code point trie.
|
||||
* Fast map from Unicode code points (U+0000..U+10FFFF) to 32-bit integer values.
|
||||
* For details see https://icu.unicode.org/design/struct/utrie
|
||||
*
|
||||
* Setting values (especially ranges) and lookup is fast.
|
||||
* The mutable trie is only somewhat space-efficient.
|
||||
* It builds a compacted, immutable UCPTrie.
|
||||
*
|
||||
* This trie can be modified while iterating over its contents.
|
||||
* For example, it is possible to merge its values with those from another
|
||||
* set of ranges (e.g., another mutable or immutable trie):
|
||||
* Iterate over those source ranges; for each of them iterate over this trie;
|
||||
* add the source value into the value of each trie range.
|
||||
*
|
||||
* @see UCPTrie
|
||||
* @see umutablecptrie_buildImmutable
|
||||
* @stable ICU 63
|
||||
*/
|
||||
typedef struct UMutableCPTrie UMutableCPTrie;
|
||||
|
||||
/**
|
||||
* Creates a mutable trie that initially maps each Unicode code point to the same value.
|
||||
* It uses 32-bit data values until umutablecptrie_buildImmutable() is called.
|
||||
* umutablecptrie_buildImmutable() takes a valueWidth parameter which
|
||||
* determines the number of bits in the data value in the resulting UCPTrie.
|
||||
* You must umutablecptrie_close() the trie once you are done using it.
|
||||
*
|
||||
* @param initialValue the initial value that is set for all code points
|
||||
* @param errorValue the value for out-of-range code points and ill-formed UTF-8/16
|
||||
* @param pErrorCode an in/out ICU UErrorCode
|
||||
* @return the trie
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI UMutableCPTrie * U_EXPORT2
|
||||
umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Clones a mutable trie.
|
||||
* You must umutablecptrie_close() the clone once you are done using it.
|
||||
*
|
||||
* @param other the trie to clone
|
||||
* @param pErrorCode an in/out ICU UErrorCode
|
||||
* @return the trie clone
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI UMutableCPTrie * U_EXPORT2
|
||||
umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Closes a mutable trie and releases associated memory.
|
||||
*
|
||||
* @param trie the trie
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
umutablecptrie_close(UMutableCPTrie *trie);
|
||||
|
||||
/**
|
||||
* Creates a mutable trie with the same contents as the UCPMap.
|
||||
* You must umutablecptrie_close() the mutable trie once you are done using it.
|
||||
*
|
||||
* @param map the source map
|
||||
* @param pErrorCode an in/out ICU UErrorCode
|
||||
* @return the mutable trie
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI UMutableCPTrie * U_EXPORT2
|
||||
umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Creates a mutable trie with the same contents as the immutable one.
|
||||
* You must umutablecptrie_close() the mutable trie once you are done using it.
|
||||
*
|
||||
* @param trie the immutable trie
|
||||
* @param pErrorCode an in/out ICU UErrorCode
|
||||
* @return the mutable trie
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI UMutableCPTrie * U_EXPORT2
|
||||
umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns the value for a code point as stored in the trie.
|
||||
*
|
||||
* @param trie the trie
|
||||
* @param c the code point
|
||||
* @return the value
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c);
|
||||
|
||||
/**
|
||||
* Returns the last code point such that all those from start to there have the same value.
|
||||
* Can be used to efficiently iterate over all same-value ranges in a trie.
|
||||
* (This is normally faster than iterating over code points and get()ting each value,
|
||||
* but much slower than a data structure that stores ranges directly.)
|
||||
*
|
||||
* The trie can be modified between calls to this function.
|
||||
*
|
||||
* If the UCPMapValueFilter function pointer is not NULL, then
|
||||
* the value to be delivered is passed through that function, and the return value is the end
|
||||
* of the range where all values are modified to the same actual value.
|
||||
* The value is unchanged if that function pointer is NULL.
|
||||
*
|
||||
* See the same-signature ucptrie_getRange() for a code sample.
|
||||
*
|
||||
* @param trie the trie
|
||||
* @param start range start
|
||||
* @param option defines whether surrogates are treated normally,
|
||||
* or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
|
||||
* @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
|
||||
* @param filter a pointer to a function that may modify the trie data value,
|
||||
* or NULL if the values from the trie are to be used unmodified
|
||||
* @param context an opaque pointer that is passed on to the filter function
|
||||
* @param pValue if not NULL, receives the value that every code point start..end has;
|
||||
* may have been modified by filter(context, trie value)
|
||||
* if that function pointer is not NULL
|
||||
* @return the range end code point, or -1 if start is not a valid code point
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
|
||||
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
|
||||
|
||||
/**
|
||||
* Sets a value for a code point.
|
||||
*
|
||||
* @param trie the trie
|
||||
* @param c the code point
|
||||
* @param value the value
|
||||
* @param pErrorCode an in/out ICU UErrorCode
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Sets a value for each code point [start..end].
|
||||
* Faster and more space-efficient than setting the value for each code point separately.
|
||||
*
|
||||
* @param trie the trie
|
||||
* @param start the first code point to get the value
|
||||
* @param end the last code point to get the value (inclusive)
|
||||
* @param value the value
|
||||
* @param pErrorCode an in/out ICU UErrorCode
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
umutablecptrie_setRange(UMutableCPTrie *trie,
|
||||
UChar32 start, UChar32 end,
|
||||
uint32_t value, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Compacts the data and builds an immutable UCPTrie according to the parameters.
|
||||
* After this, the mutable trie will be empty.
|
||||
*
|
||||
* The mutable trie stores 32-bit values until buildImmutable() is called.
|
||||
* If values shorter than 32 bits are to be stored in the immutable trie,
|
||||
* then the upper bits are discarded.
|
||||
* For example, when the mutable trie contains values 0x81, -0x7f, and 0xa581,
|
||||
* and the value width is 8 bits, then each of these is stored as 0x81
|
||||
* and the immutable trie will return that as an unsigned value.
|
||||
* (Some implementations may want to make productive temporary use of the upper bits
|
||||
* until buildImmutable() discards them.)
|
||||
*
|
||||
* Not every possible set of mappings can be built into a UCPTrie,
|
||||
* because of limitations resulting from speed and space optimizations.
|
||||
* Every Unicode assigned character can be mapped to a unique value.
|
||||
* Typical data yields data structures far smaller than the limitations.
|
||||
*
|
||||
* It is possible to construct extremely unusual mappings that exceed the data structure limits.
|
||||
* In such a case this function will fail with a U_INDEX_OUTOFBOUNDS_ERROR.
|
||||
*
|
||||
* @param trie the trie trie
|
||||
* @param type selects the trie type
|
||||
* @param valueWidth selects the number of bits in a trie data value; if smaller than 32 bits,
|
||||
* then the values stored in the trie will be truncated first
|
||||
* @param pErrorCode an in/out ICU UErrorCode
|
||||
*
|
||||
* @see umutablecptrie_fromUCPTrie
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI UCPTrie * U_EXPORT2
|
||||
umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUMutableCPTriePointer
|
||||
* "Smart pointer" class, closes a UMutableCPTrie via umutablecptrie_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUMutableCPTriePointer, UMutableCPTrie, umutablecptrie_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
136
thirdparty/icu4c/common/unicode/unifilt.h
vendored
Normal file
136
thirdparty/icu4c/common/unicode/unifilt.h
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2010, International Business Machines Corporation and others.
|
||||
* All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 11/17/99 aliu Creation.
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef UNIFILT_H
|
||||
#define UNIFILT_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/unifunct.h"
|
||||
#include "unicode/unimatch.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Unicode Filter
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* U_ETHER is used to represent character values for positions outside
|
||||
* a range. For example, transliterator uses this to represent
|
||||
* characters outside the range contextStart..contextLimit-1. This
|
||||
* allows explicit matching by rules and UnicodeSets of text outside a
|
||||
* defined range.
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
#define U_ETHER ((char16_t)0xFFFF)
|
||||
|
||||
/**
|
||||
*
|
||||
* <code>UnicodeFilter</code> defines a protocol for selecting a
|
||||
* subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
|
||||
* Currently, filters are used in conjunction with classes like
|
||||
* {@link Transliterator} to only process selected characters through a
|
||||
* transformation.
|
||||
*
|
||||
* <p>Note: UnicodeFilter currently stubs out two pure virtual methods
|
||||
* of its base class, UnicodeMatcher. These methods are toPattern()
|
||||
* and matchesIndexValue(). This is done so that filter classes that
|
||||
* are not actually used as matchers -- specifically, those in the
|
||||
* UnicodeFilterLogic component, and those in tests -- can continue to
|
||||
* work without defining these methods. As long as a filter is not
|
||||
* used in an RBT during real transliteration, these methods will not
|
||||
* be called. However, this breaks the UnicodeMatcher base class
|
||||
* protocol, and it is not a correct solution.
|
||||
*
|
||||
* <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter
|
||||
* hierarchy and either redesign it, or simply remove the stubs in
|
||||
* UnicodeFilter and force subclasses to implement the full
|
||||
* UnicodeMatcher protocol.
|
||||
*
|
||||
* @see UnicodeFilterLogic
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
|
||||
|
||||
public:
|
||||
/**
|
||||
* Destructor
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~UnicodeFilter();
|
||||
|
||||
/**
|
||||
* Clones this object polymorphically.
|
||||
* The caller owns the result and should delete it when done.
|
||||
* @return clone, or nullptr if an error occurred
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UnicodeFilter* clone() const override = 0;
|
||||
|
||||
/**
|
||||
* Returns <tt>true</tt> for characters that are in the selected
|
||||
* subset. In other words, if a character is <b>to be
|
||||
* filtered</b>, then <tt>contains()</tt> returns
|
||||
* <b><tt>false</tt></b>.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool contains(UChar32 c) const = 0;
|
||||
|
||||
/**
|
||||
* UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
|
||||
* and return the pointer.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UnicodeMatcher* toMatcher() const override;
|
||||
|
||||
/**
|
||||
* Implement UnicodeMatcher API.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UMatchDegree matches(const Replaceable& text,
|
||||
int32_t& offset,
|
||||
int32_t limit,
|
||||
UBool incremental) override;
|
||||
|
||||
/**
|
||||
* UnicodeFunctor API. Nothing to do.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual void setData(const TransliterationRuleData*) override;
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for this class.
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
protected:
|
||||
|
||||
/*
|
||||
* Since this class has pure virtual functions,
|
||||
* a constructor can't be used.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
/* UnicodeFilter();*/
|
||||
};
|
||||
|
||||
/*inline UnicodeFilter::UnicodeFilter() {}*/
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
132
thirdparty/icu4c/common/unicode/unifunct.h
vendored
Normal file
132
thirdparty/icu4c/common/unicode/unifunct.h
vendored
Normal file
@@ -0,0 +1,132 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2002-2005, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 01/14/2002 aliu Creation.
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef UNIFUNCT_H
|
||||
#define UNIFUNCT_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Unicode Functor
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeMatcher;
|
||||
class UnicodeReplacer;
|
||||
class TransliterationRuleData;
|
||||
|
||||
/**
|
||||
* <code>UnicodeFunctor</code> is an abstract base class for objects
|
||||
* that perform match and/or replace operations on Unicode strings.
|
||||
* @author Alan Liu
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
class U_COMMON_API UnicodeFunctor : public UObject {
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Destructor
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual ~UnicodeFunctor();
|
||||
|
||||
/**
|
||||
* Return a copy of this object. All UnicodeFunctor objects
|
||||
* have to support cloning in order to allow classes using
|
||||
* UnicodeFunctor to implement cloning.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UnicodeFunctor* clone() const = 0;
|
||||
|
||||
/**
|
||||
* Cast 'this' to a UnicodeMatcher* pointer and return the
|
||||
* pointer, or null if this is not a UnicodeMatcher*. Subclasses
|
||||
* that mix in UnicodeMatcher as a base class must override this.
|
||||
* This protocol is required because a pointer to a UnicodeFunctor
|
||||
* cannot be cast to a pointer to a UnicodeMatcher, since
|
||||
* UnicodeMatcher is a mixin that does not derive from
|
||||
* UnicodeFunctor.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UnicodeMatcher* toMatcher() const;
|
||||
|
||||
/**
|
||||
* Cast 'this' to a UnicodeReplacer* pointer and return the
|
||||
* pointer, or null if this is not a UnicodeReplacer*. Subclasses
|
||||
* that mix in UnicodeReplacer as a base class must override this.
|
||||
* This protocol is required because a pointer to a UnicodeFunctor
|
||||
* cannot be cast to a pointer to a UnicodeReplacer, since
|
||||
* UnicodeReplacer is a mixin that does not derive from
|
||||
* UnicodeFunctor.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UnicodeReplacer* toReplacer() const;
|
||||
|
||||
/**
|
||||
* Return the class ID for this class. This is useful only for
|
||||
* comparing to a return value from getDynamicClassID().
|
||||
* @return The class ID for all objects of this class.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
/**
|
||||
* Returns a unique class ID <b>polymorphically</b>. This method
|
||||
* is to implement a simple version of RTTI, since not all C++
|
||||
* compilers support genuine RTTI. Polymorphic operator==() and
|
||||
* clone() methods call this method.
|
||||
*
|
||||
* <p>Concrete subclasses of UnicodeFunctor should use the macro
|
||||
* UOBJECT_DEFINE_RTTI_IMPLEMENTATION from uobject.h to
|
||||
* provide definitions getStaticClassID and getDynamicClassID.
|
||||
*
|
||||
* @return The class ID for this object. All objects of a given
|
||||
* class have the same class ID. Objects of other classes have
|
||||
* different class IDs.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const override = 0;
|
||||
|
||||
/**
|
||||
* Set the data object associated with this functor. The data
|
||||
* object provides context for functor-to-standin mapping. This
|
||||
* method is required when assigning a functor to a different data
|
||||
* object. This function MAY GO AWAY later if the architecture is
|
||||
* changed to pass data object pointers through the API.
|
||||
* @internal ICU 2.1
|
||||
*/
|
||||
virtual void setData(const TransliterationRuleData*) = 0;
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Since this class has pure virtual functions,
|
||||
* a constructor can't be used.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
/*UnicodeFunctor();*/
|
||||
|
||||
};
|
||||
|
||||
/*inline UnicodeFunctor::UnicodeFunctor() {}*/
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
168
thirdparty/icu4c/common/unicode/unimatch.h
vendored
Normal file
168
thirdparty/icu4c/common/unicode/unimatch.h
vendored
Normal file
@@ -0,0 +1,168 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
* Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 07/18/01 aliu Creation.
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef UNIMATCH_H
|
||||
#define UNIMATCH_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Unicode Matcher
|
||||
*/
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class Replaceable;
|
||||
class UnicodeString;
|
||||
class UnicodeSet;
|
||||
|
||||
/**
|
||||
* Constants returned by <code>UnicodeMatcher::matches()</code>
|
||||
* indicating the degree of match.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
enum UMatchDegree {
|
||||
/**
|
||||
* Constant returned by <code>matches()</code> indicating a
|
||||
* mismatch between the text and this matcher. The text contains
|
||||
* a character which does not match, or the text does not contain
|
||||
* all desired characters for a non-incremental match.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
U_MISMATCH,
|
||||
|
||||
/**
|
||||
* Constant returned by <code>matches()</code> indicating a
|
||||
* partial match between the text and this matcher. This value is
|
||||
* only returned for incremental match operations. All characters
|
||||
* of the text match, but more characters are required for a
|
||||
* complete match. Alternatively, for variable-length matchers,
|
||||
* all characters of the text match, and if more characters were
|
||||
* supplied at limit, they might also match.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
U_PARTIAL_MATCH,
|
||||
|
||||
/**
|
||||
* Constant returned by <code>matches()</code> indicating a
|
||||
* complete match between the text and this matcher. For an
|
||||
* incremental variable-length match, this value is returned if
|
||||
* the given text matches, and it is known that additional
|
||||
* characters would not alter the extent of the match.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
U_MATCH
|
||||
};
|
||||
|
||||
/**
|
||||
* <code>UnicodeMatcher</code> defines a protocol for objects that can
|
||||
* match a range of characters in a Replaceable string.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ {
|
||||
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual ~UnicodeMatcher();
|
||||
|
||||
/**
|
||||
* Return a UMatchDegree value indicating the degree of match for
|
||||
* the given text at the given offset. Zero, one, or more
|
||||
* characters may be matched.
|
||||
*
|
||||
* Matching in the forward direction is indicated by limit >
|
||||
* offset. Characters from offset forwards to limit-1 will be
|
||||
* considered for matching.
|
||||
*
|
||||
* Matching in the reverse direction is indicated by limit <
|
||||
* offset. Characters from offset backwards to limit+1 will be
|
||||
* considered for matching.
|
||||
*
|
||||
* If limit == offset then the only match possible is a zero
|
||||
* character match (which subclasses may implement if desired).
|
||||
*
|
||||
* As a side effect, advance the offset parameter to the limit of
|
||||
* the matched substring. In the forward direction, this will be
|
||||
* the index of the last matched character plus one. In the
|
||||
* reverse direction, this will be the index of the last matched
|
||||
* character minus one.
|
||||
*
|
||||
* <p>Note: This method is not const because some classes may
|
||||
* modify their state as the result of a match.
|
||||
*
|
||||
* @param text the text to be matched
|
||||
* @param offset on input, the index into text at which to begin
|
||||
* matching. On output, the limit of the matched text. The
|
||||
* number of matched characters is the output value of offset
|
||||
* minus the input value. Offset should always point to the
|
||||
* HIGH SURROGATE (leading code unit) of a pair of surrogates,
|
||||
* both on entry and upon return.
|
||||
* @param limit the limit index of text to be matched. Greater
|
||||
* than offset for a forward direction match, less than offset for
|
||||
* a backward direction match. The last character to be
|
||||
* considered for matching will be text.charAt(limit-1) in the
|
||||
* forward direction or text.charAt(limit+1) in the backward
|
||||
* direction.
|
||||
* @param incremental if true, then assume further characters may
|
||||
* be inserted at limit and check for partial matching. Otherwise
|
||||
* assume the text as given is complete.
|
||||
* @return a match degree value indicating a full match, a partial
|
||||
* match, or a mismatch. If incremental is false then
|
||||
* U_PARTIAL_MATCH should never be returned.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UMatchDegree matches(const Replaceable& text,
|
||||
int32_t& offset,
|
||||
int32_t limit,
|
||||
UBool incremental) = 0;
|
||||
|
||||
/**
|
||||
* Returns a string representation of this matcher. If the result of
|
||||
* calling this function is passed to the appropriate parser, it
|
||||
* will produce another matcher that is equal to this one.
|
||||
* @param result the string to receive the pattern. Previous
|
||||
* contents will be deleted.
|
||||
* @param escapeUnprintable if true then convert unprintable
|
||||
* character to their hex escape representations, \\uxxxx or
|
||||
* \\Uxxxxxxxx. Unprintable characters are those other than
|
||||
* U+000A, U+0020..U+007E.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UnicodeString& toPattern(UnicodeString& result,
|
||||
UBool escapeUnprintable = false) const = 0;
|
||||
|
||||
/**
|
||||
* Returns true if this matcher will match a character c, where c
|
||||
* & 0xFF == v, at offset, in the forward direction (with limit >
|
||||
* offset). This is used by <tt>RuleBasedTransliterator</tt> for
|
||||
* indexing.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UBool matchesIndexValue(uint8_t v) const = 0;
|
||||
|
||||
/**
|
||||
* Union the set of all characters that may be matched by this object
|
||||
* into the given set.
|
||||
* @param toUnionTo the set into which to union the source characters
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
1908
thirdparty/icu4c/common/unicode/uniset.h
vendored
Normal file
1908
thirdparty/icu4c/common/unicode/uniset.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
5095
thirdparty/icu4c/common/unicode/unistr.h
vendored
Normal file
5095
thirdparty/icu4c/common/unicode/unistr.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
476
thirdparty/icu4c/common/unicode/unorm.h
vendored
Normal file
476
thirdparty/icu4c/common/unicode/unorm.h
vendored
Normal file
@@ -0,0 +1,476 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (c) 1996-2016, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* File unorm.h
|
||||
*
|
||||
* Created by: Vladimir Weinstein 12052000
|
||||
*
|
||||
* Modification history :
|
||||
*
|
||||
* Date Name Description
|
||||
* 02/01/01 synwee Added normalization quickcheck enum and method.
|
||||
*/
|
||||
#ifndef UNORM_H
|
||||
#define UNORM_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/uiter.h"
|
||||
#include "unicode/unorm2.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Unicode Normalization
|
||||
*
|
||||
* Old Unicode normalization API.
|
||||
*
|
||||
* This API has been replaced by the unorm2.h API and is only available
|
||||
* for backward compatibility. The functions here simply delegate to the
|
||||
* unorm2.h functions, for example unorm2_getInstance() and unorm2_normalize().
|
||||
* There is one exception: The new API does not provide a replacement for unorm_compare().
|
||||
* Its declaration has been moved to unorm2.h.
|
||||
*
|
||||
* <code>unorm_normalize</code> transforms Unicode text into an equivalent composed or
|
||||
* decomposed form, allowing for easier sorting and searching of text.
|
||||
* <code>unorm_normalize</code> supports the standard normalization forms described in
|
||||
* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
|
||||
* Unicode Standard Annex #15: Unicode Normalization Forms</a>.
|
||||
*
|
||||
* Characters with accents or other adornments can be encoded in
|
||||
* several different ways in Unicode. For example, take the character A-acute.
|
||||
* In Unicode, this can be encoded as a single character (the
|
||||
* "composed" form):
|
||||
*
|
||||
* \code
|
||||
* 00C1 LATIN CAPITAL LETTER A WITH ACUTE
|
||||
* \endcode
|
||||
*
|
||||
* or as two separate characters (the "decomposed" form):
|
||||
*
|
||||
* \code
|
||||
* 0041 LATIN CAPITAL LETTER A
|
||||
* 0301 COMBINING ACUTE ACCENT
|
||||
* \endcode
|
||||
*
|
||||
* To a user of your program, however, both of these sequences should be
|
||||
* treated as the same "user-level" character "A with acute accent". When you are searching or
|
||||
* comparing text, you must ensure that these two sequences are treated
|
||||
* equivalently. In addition, you must handle characters with more than one
|
||||
* accent. Sometimes the order of a character's combining accents is
|
||||
* significant, while in other cases accent sequences in different orders are
|
||||
* really equivalent.
|
||||
*
|
||||
* Similarly, the string "ffi" can be encoded as three separate letters:
|
||||
*
|
||||
* \code
|
||||
* 0066 LATIN SMALL LETTER F
|
||||
* 0066 LATIN SMALL LETTER F
|
||||
* 0069 LATIN SMALL LETTER I
|
||||
* \endcode
|
||||
*
|
||||
* or as the single character
|
||||
*
|
||||
* \code
|
||||
* FB03 LATIN SMALL LIGATURE FFI
|
||||
* \endcode
|
||||
*
|
||||
* The ffi ligature is not a distinct semantic character, and strictly speaking
|
||||
* it shouldn't be in Unicode at all, but it was included for compatibility
|
||||
* with existing character sets that already provided it. The Unicode standard
|
||||
* identifies such characters by giving them "compatibility" decompositions
|
||||
* into the corresponding semantic characters. When sorting and searching, you
|
||||
* will often want to use these mappings.
|
||||
*
|
||||
* <code>unorm_normalize</code> helps solve these problems by transforming text into the
|
||||
* canonical composed and decomposed forms as shown in the first example above.
|
||||
* In addition, you can have it perform compatibility decompositions so that
|
||||
* you can treat compatibility characters the same as their equivalents.
|
||||
* Finally, <code>unorm_normalize</code> rearranges accents into the proper canonical
|
||||
* order, so that you do not have to worry about accent rearrangement on your
|
||||
* own.
|
||||
*
|
||||
* Form FCD, "Fast C or D", is also designed for collation.
|
||||
* It allows to work on strings that are not necessarily normalized
|
||||
* with an algorithm (like in collation) that works under "canonical closure", i.e., it treats precomposed
|
||||
* characters and their decomposed equivalents the same.
|
||||
*
|
||||
* It is not a normalization form because it does not provide for uniqueness of representation. Multiple strings
|
||||
* may be canonically equivalent (their NFDs are identical) and may all conform to FCD without being identical
|
||||
* themselves.
|
||||
*
|
||||
* The form is defined such that the "raw decomposition", the recursive canonical decomposition of each character,
|
||||
* results in a string that is canonically ordered. This means that precomposed characters are allowed for as long
|
||||
* as their decompositions do not need canonical reordering.
|
||||
*
|
||||
* Its advantage for a process like collation is that all NFD and most NFC texts - and many unnormalized texts -
|
||||
* already conform to FCD and do not need to be normalized (NFD) for such a process. The FCD quick check will
|
||||
* return UNORM_YES for most strings in practice.
|
||||
*
|
||||
* unorm_normalize(UNORM_FCD) may be implemented with UNORM_NFD.
|
||||
*
|
||||
* For more details on FCD see the collation design document:
|
||||
* https://htmlpreview.github.io/?https://github.com/unicode-org/icu-docs/blob/main/design/collation/ICU_collation_design.htm
|
||||
*
|
||||
* ICU collation performs either NFD or FCD normalization automatically if normalization
|
||||
* is turned on for the collator object.
|
||||
* Beyond collation and string search, normalized strings may be useful for string equivalence comparisons,
|
||||
* transliteration/transcription, unique representations, etc.
|
||||
*
|
||||
* The W3C generally recommends to exchange texts in NFC.
|
||||
* Note also that most legacy character encodings use only precomposed forms and often do not
|
||||
* encode any combining marks by themselves. For conversion to such character encodings the
|
||||
* Unicode text needs to be normalized to NFC.
|
||||
* For more usage examples, see the Unicode Standard Annex.
|
||||
*/
|
||||
|
||||
// Do not conditionalize the following enum with #ifndef U_HIDE_DEPRECATED_API,
|
||||
// it is needed for layout of Normalizer object.
|
||||
#ifndef U_FORCE_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* Constants for normalization modes.
|
||||
* @deprecated ICU 56 Use unorm2.h instead.
|
||||
*/
|
||||
typedef enum {
|
||||
/** No decomposition/composition. @deprecated ICU 56 Use unorm2.h instead. */
|
||||
UNORM_NONE = 1,
|
||||
/** Canonical decomposition. @deprecated ICU 56 Use unorm2.h instead. */
|
||||
UNORM_NFD = 2,
|
||||
/** Compatibility decomposition. @deprecated ICU 56 Use unorm2.h instead. */
|
||||
UNORM_NFKD = 3,
|
||||
/** Canonical decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */
|
||||
UNORM_NFC = 4,
|
||||
/** Default normalization. @deprecated ICU 56 Use unorm2.h instead. */
|
||||
UNORM_DEFAULT = UNORM_NFC,
|
||||
/** Compatibility decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */
|
||||
UNORM_NFKC =5,
|
||||
/** "Fast C or D" form. @deprecated ICU 56 Use unorm2.h instead. */
|
||||
UNORM_FCD = 6,
|
||||
|
||||
/** One more than the highest normalization mode constant. @deprecated ICU 56 Use unorm2.h instead. */
|
||||
UNORM_MODE_COUNT
|
||||
} UNormalizationMode;
|
||||
|
||||
#endif // U_FORCE_HIDE_DEPRECATED_API
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* Constants for options flags for normalization.
|
||||
* Use 0 for default options,
|
||||
* including normalization according to the Unicode version
|
||||
* that is currently supported by ICU (see u_getUnicodeVersion).
|
||||
* @deprecated ICU 56 Use unorm2.h instead.
|
||||
*/
|
||||
enum {
|
||||
/**
|
||||
* Options bit set value to select Unicode 3.2 normalization
|
||||
* (except NormalizationCorrections).
|
||||
* At most one Unicode version can be selected at a time.
|
||||
* @deprecated ICU 56 Use unorm2.h instead.
|
||||
*/
|
||||
UNORM_UNICODE_3_2=0x20
|
||||
};
|
||||
|
||||
/**
|
||||
* Lowest-order bit number of unorm_compare() options bits corresponding to
|
||||
* normalization options bits.
|
||||
*
|
||||
* The options parameter for unorm_compare() uses most bits for
|
||||
* itself and for various comparison and folding flags.
|
||||
* The most significant bits, however, are shifted down and passed on
|
||||
* to the normalization implementation.
|
||||
* (That is, from unorm_compare(..., options, ...),
|
||||
* options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT will be passed on to the
|
||||
* internal normalization functions.)
|
||||
*
|
||||
* @see unorm_compare
|
||||
* @deprecated ICU 56 Use unorm2.h instead.
|
||||
*/
|
||||
#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
|
||||
|
||||
/**
|
||||
* Normalize a string.
|
||||
* The string will be normalized according the specified normalization mode
|
||||
* and options.
|
||||
* The source and result buffers must not be the same, nor overlap.
|
||||
*
|
||||
* @param source The string to normalize.
|
||||
* @param sourceLength The length of source, or -1 if NUL-terminated.
|
||||
* @param mode The normalization mode; one of UNORM_NONE,
|
||||
* UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT.
|
||||
* @param options The normalization options, ORed together (0 for no options).
|
||||
* @param result A pointer to a buffer to receive the result string.
|
||||
* The result string is NUL-terminated if possible.
|
||||
* @param resultLength The maximum size of result.
|
||||
* @param status A pointer to a UErrorCode to receive any errors.
|
||||
* @return The total buffer size needed; if greater than resultLength,
|
||||
* the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR.
|
||||
* @deprecated ICU 56 Use unorm2.h instead.
|
||||
*/
|
||||
U_DEPRECATED int32_t U_EXPORT2
|
||||
unorm_normalize(const UChar *source, int32_t sourceLength,
|
||||
UNormalizationMode mode, int32_t options,
|
||||
UChar *result, int32_t resultLength,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Performing quick check on a string, to quickly determine if the string is
|
||||
* in a particular normalization format.
|
||||
* Three types of result can be returned UNORM_YES, UNORM_NO or
|
||||
* UNORM_MAYBE. Result UNORM_YES indicates that the argument
|
||||
* string is in the desired normalized format, UNORM_NO determines that
|
||||
* argument string is not in the desired normalized format. A
|
||||
* UNORM_MAYBE result indicates that a more thorough check is required,
|
||||
* the user may have to put the string in its normalized form and compare the
|
||||
* results.
|
||||
*
|
||||
* @param source string for determining if it is in a normalized format
|
||||
* @param sourcelength length of source to test, or -1 if NUL-terminated
|
||||
* @param mode which normalization form to test for
|
||||
* @param status a pointer to a UErrorCode to receive any errors
|
||||
* @return UNORM_YES, UNORM_NO or UNORM_MAYBE
|
||||
*
|
||||
* @see unorm_isNormalized
|
||||
* @deprecated ICU 56 Use unorm2.h instead.
|
||||
*/
|
||||
U_DEPRECATED UNormalizationCheckResult U_EXPORT2
|
||||
unorm_quickCheck(const UChar *source, int32_t sourcelength,
|
||||
UNormalizationMode mode,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Performing quick check on a string; same as unorm_quickCheck but
|
||||
* takes an extra options parameter like most normalization functions.
|
||||
*
|
||||
* @param src String that is to be tested if it is in a normalization format.
|
||||
* @param srcLength Length of source to test, or -1 if NUL-terminated.
|
||||
* @param mode Which normalization form to test for.
|
||||
* @param options The normalization options, ORed together (0 for no options).
|
||||
* @param pErrorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return UNORM_YES, UNORM_NO or UNORM_MAYBE
|
||||
*
|
||||
* @see unorm_quickCheck
|
||||
* @see unorm_isNormalized
|
||||
* @deprecated ICU 56 Use unorm2.h instead.
|
||||
*/
|
||||
U_DEPRECATED UNormalizationCheckResult U_EXPORT2
|
||||
unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
|
||||
UNormalizationMode mode, int32_t options,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Test if a string is in a given normalization form.
|
||||
* This is semantically equivalent to source.equals(normalize(source, mode)) .
|
||||
*
|
||||
* Unlike unorm_quickCheck(), this function returns a definitive result,
|
||||
* never a "maybe".
|
||||
* For NFD, NFKD, and FCD, both functions work exactly the same.
|
||||
* For NFC and NFKC where quickCheck may return "maybe", this function will
|
||||
* perform further tests to arrive at a true/false result.
|
||||
*
|
||||
* @param src String that is to be tested if it is in a normalization format.
|
||||
* @param srcLength Length of source to test, or -1 if NUL-terminated.
|
||||
* @param mode Which normalization form to test for.
|
||||
* @param pErrorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return Boolean value indicating whether the source string is in the
|
||||
* "mode" normalization form.
|
||||
*
|
||||
* @see unorm_quickCheck
|
||||
* @deprecated ICU 56 Use unorm2.h instead.
|
||||
*/
|
||||
U_DEPRECATED UBool U_EXPORT2
|
||||
unorm_isNormalized(const UChar *src, int32_t srcLength,
|
||||
UNormalizationMode mode,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Test if a string is in a given normalization form; same as unorm_isNormalized but
|
||||
* takes an extra options parameter like most normalization functions.
|
||||
*
|
||||
* @param src String that is to be tested if it is in a normalization format.
|
||||
* @param srcLength Length of source to test, or -1 if NUL-terminated.
|
||||
* @param mode Which normalization form to test for.
|
||||
* @param options The normalization options, ORed together (0 for no options).
|
||||
* @param pErrorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return Boolean value indicating whether the source string is in the
|
||||
* "mode/options" normalization form.
|
||||
*
|
||||
* @see unorm_quickCheck
|
||||
* @see unorm_isNormalized
|
||||
* @deprecated ICU 56 Use unorm2.h instead.
|
||||
*/
|
||||
U_DEPRECATED UBool U_EXPORT2
|
||||
unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
|
||||
UNormalizationMode mode, int32_t options,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Iterative normalization forward.
|
||||
* This function (together with unorm_previous) is somewhat
|
||||
* similar to the C++ Normalizer class (see its non-static functions).
|
||||
*
|
||||
* Iterative normalization is useful when only a small portion of a longer
|
||||
* string/text needs to be processed.
|
||||
*
|
||||
* For example, the likelihood may be high that processing the first 10% of some
|
||||
* text will be sufficient to find certain data.
|
||||
* Another example: When one wants to concatenate two normalized strings and get a
|
||||
* normalized result, it is much more efficient to normalize just a small part of
|
||||
* the result around the concatenation place instead of re-normalizing everything.
|
||||
*
|
||||
* The input text is an instance of the C character iteration API UCharIterator.
|
||||
* It may wrap around a simple string, a CharacterIterator, a Replaceable, or any
|
||||
* other kind of text object.
|
||||
*
|
||||
* If a buffer overflow occurs, then the caller needs to reset the iterator to the
|
||||
* old index and call the function again with a larger buffer - if the caller cares
|
||||
* for the actual output.
|
||||
* Regardless of the output buffer, the iterator will always be moved to the next
|
||||
* normalization boundary.
|
||||
*
|
||||
* This function (like unorm_previous) serves two purposes:
|
||||
*
|
||||
* 1) To find the next boundary so that the normalization of the part of the text
|
||||
* from the current position to that boundary does not affect and is not affected
|
||||
* by the part of the text beyond that boundary.
|
||||
*
|
||||
* 2) To normalize the text up to the boundary.
|
||||
*
|
||||
* The second step is optional, per the doNormalize parameter.
|
||||
* It is omitted for operations like string concatenation, where the two adjacent
|
||||
* string ends need to be normalized together.
|
||||
* In such a case, the output buffer will just contain a copy of the text up to the
|
||||
* boundary.
|
||||
*
|
||||
* pNeededToNormalize is an output-only parameter. Its output value is only defined
|
||||
* if normalization was requested (doNormalize) and successful (especially, no
|
||||
* buffer overflow).
|
||||
* It is useful for operations like a normalizing transliterator, where one would
|
||||
* not want to replace a piece of text if it is not modified.
|
||||
*
|
||||
* If doNormalize==true and pNeededToNormalize!=NULL then *pNeeded... is set true
|
||||
* if the normalization was necessary.
|
||||
*
|
||||
* If doNormalize==false then *pNeededToNormalize will be set to false.
|
||||
*
|
||||
* If the buffer overflows, then *pNeededToNormalize will be undefined;
|
||||
* essentially, whenever U_FAILURE is true (like in buffer overflows), this result
|
||||
* will be undefined.
|
||||
*
|
||||
* @param src The input text in the form of a C character iterator.
|
||||
* @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
|
||||
* @param destCapacity The number of UChars that fit into dest.
|
||||
* @param mode The normalization mode.
|
||||
* @param options The normalization options, ORed together (0 for no options).
|
||||
* @param doNormalize Indicates if the source text up to the next boundary
|
||||
* is to be normalized (true) or just copied (false).
|
||||
* @param pNeededToNormalize Output flag indicating if the normalization resulted in
|
||||
* different text from the input.
|
||||
* Not defined if an error occurs including buffer overflow.
|
||||
* Always false if !doNormalize.
|
||||
* @param pErrorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return Length of output (number of UChars) when successful or buffer overflow.
|
||||
*
|
||||
* @see unorm_previous
|
||||
* @see unorm_normalize
|
||||
*
|
||||
* @deprecated ICU 56 Use unorm2.h instead.
|
||||
*/
|
||||
U_DEPRECATED int32_t U_EXPORT2
|
||||
unorm_next(UCharIterator *src,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UNormalizationMode mode, int32_t options,
|
||||
UBool doNormalize, UBool *pNeededToNormalize,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Iterative normalization backward.
|
||||
* This function (together with unorm_next) is somewhat
|
||||
* similar to the C++ Normalizer class (see its non-static functions).
|
||||
* For all details see unorm_next.
|
||||
*
|
||||
* @param src The input text in the form of a C character iterator.
|
||||
* @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
|
||||
* @param destCapacity The number of UChars that fit into dest.
|
||||
* @param mode The normalization mode.
|
||||
* @param options The normalization options, ORed together (0 for no options).
|
||||
* @param doNormalize Indicates if the source text up to the next boundary
|
||||
* is to be normalized (true) or just copied (false).
|
||||
* @param pNeededToNormalize Output flag indicating if the normalization resulted in
|
||||
* different text from the input.
|
||||
* Not defined if an error occurs including buffer overflow.
|
||||
* Always false if !doNormalize.
|
||||
* @param pErrorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return Length of output (number of UChars) when successful or buffer overflow.
|
||||
*
|
||||
* @see unorm_next
|
||||
* @see unorm_normalize
|
||||
*
|
||||
* @deprecated ICU 56 Use unorm2.h instead.
|
||||
*/
|
||||
U_DEPRECATED int32_t U_EXPORT2
|
||||
unorm_previous(UCharIterator *src,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UNormalizationMode mode, int32_t options,
|
||||
UBool doNormalize, UBool *pNeededToNormalize,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Concatenate normalized strings, making sure that the result is normalized as well.
|
||||
*
|
||||
* If both the left and the right strings are in
|
||||
* the normalization form according to "mode/options",
|
||||
* then the result will be
|
||||
*
|
||||
* \code
|
||||
* dest=normalize(left+right, mode, options)
|
||||
* \endcode
|
||||
*
|
||||
* With the input strings already being normalized,
|
||||
* this function will use unorm_next() and unorm_previous()
|
||||
* to find the adjacent end pieces of the input strings.
|
||||
* Only the concatenation of these end pieces will be normalized and
|
||||
* then concatenated with the remaining parts of the input strings.
|
||||
*
|
||||
* It is allowed to have dest==left to avoid copying the entire left string.
|
||||
*
|
||||
* @param left Left source string, may be same as dest.
|
||||
* @param leftLength Length of left source string, or -1 if NUL-terminated.
|
||||
* @param right Right source string. Must not be the same as dest, nor overlap.
|
||||
* @param rightLength Length of right source string, or -1 if NUL-terminated.
|
||||
* @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
|
||||
* @param destCapacity The number of UChars that fit into dest.
|
||||
* @param mode The normalization mode.
|
||||
* @param options The normalization options, ORed together (0 for no options).
|
||||
* @param pErrorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return Length of output (number of UChars) when successful or buffer overflow.
|
||||
*
|
||||
* @see unorm_normalize
|
||||
* @see unorm_next
|
||||
* @see unorm_previous
|
||||
*
|
||||
* @deprecated ICU 56 Use unorm2.h instead.
|
||||
*/
|
||||
U_DEPRECATED int32_t U_EXPORT2
|
||||
unorm_concatenate(const UChar *left, int32_t leftLength,
|
||||
const UChar *right, int32_t rightLength,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UNormalizationMode mode, int32_t options,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
#endif
|
||||
626
thirdparty/icu4c/common/unicode/unorm2.h
vendored
Normal file
626
thirdparty/icu4c/common/unicode/unorm2.h
vendored
Normal file
@@ -0,0 +1,626 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: unorm2.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2009dec15
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UNORM2_H__
|
||||
#define __UNORM2_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: New API for Unicode Normalization.
|
||||
*
|
||||
* Unicode normalization functionality for standard Unicode normalization or
|
||||
* for using custom mapping tables.
|
||||
* All instances of UNormalizer2 are unmodifiable/immutable.
|
||||
* Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller.
|
||||
* For more details see the Normalizer2 C++ class.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/stringoptions.h"
|
||||
#include "unicode/uset.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* Constants for normalization modes.
|
||||
* For details about standard Unicode normalization forms
|
||||
* and about the algorithms which are also used with custom mapping tables
|
||||
* see http://www.unicode.org/unicode/reports/tr15/
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Decomposition followed by composition.
|
||||
* Same as standard NFC when using an "nfc" instance.
|
||||
* Same as standard NFKC when using an "nfkc" instance.
|
||||
* For details about standard Unicode normalization forms
|
||||
* see http://www.unicode.org/unicode/reports/tr15/
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
UNORM2_COMPOSE,
|
||||
/**
|
||||
* Map, and reorder canonically.
|
||||
* Same as standard NFD when using an "nfc" instance.
|
||||
* Same as standard NFKD when using an "nfkc" instance.
|
||||
* For details about standard Unicode normalization forms
|
||||
* see http://www.unicode.org/unicode/reports/tr15/
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
UNORM2_DECOMPOSE,
|
||||
/**
|
||||
* "Fast C or D" form.
|
||||
* If a string is in this form, then further decomposition <i>without reordering</i>
|
||||
* would yield the same form as DECOMPOSE.
|
||||
* Text in "Fast C or D" form can be processed efficiently with data tables
|
||||
* that are "canonically closed", that is, that provide equivalent data for
|
||||
* equivalent text, without having to be fully normalized.
|
||||
* Not a standard Unicode normalization form.
|
||||
* Not a unique form: Different FCD strings can be canonically equivalent.
|
||||
* For details see http://www.unicode.org/notes/tn5/#FCD
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
UNORM2_FCD,
|
||||
/**
|
||||
* Compose only contiguously.
|
||||
* Also known as "FCC" or "Fast C Contiguous".
|
||||
* The result will often but not always be in NFC.
|
||||
* The result will conform to FCD which is useful for processing.
|
||||
* Not a standard Unicode normalization form.
|
||||
* For details see http://www.unicode.org/notes/tn5/#FCC
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
UNORM2_COMPOSE_CONTIGUOUS
|
||||
} UNormalization2Mode;
|
||||
|
||||
/**
|
||||
* Result values for normalization quick check functions.
|
||||
* For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef enum UNormalizationCheckResult {
|
||||
/**
|
||||
* The input string is not in the normalization form.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UNORM_NO,
|
||||
/**
|
||||
* The input string is in the normalization form.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UNORM_YES,
|
||||
/**
|
||||
* The input string may or may not be in the normalization form.
|
||||
* This value is only returned for composition forms like NFC and FCC,
|
||||
* when a backward-combining character is found for which the surrounding text
|
||||
* would have to be analyzed further.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UNORM_MAYBE
|
||||
} UNormalizationCheckResult;
|
||||
|
||||
/**
|
||||
* Opaque C service object type for the new normalization API.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
struct UNormalizer2;
|
||||
typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance for Unicode NFC normalization.
|
||||
* Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFCInstance(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance for Unicode NFD normalization.
|
||||
* Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFDInstance(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance for Unicode NFKC normalization.
|
||||
* Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCInstance(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance for Unicode NFKD normalization.
|
||||
* Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKDInstance(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization
|
||||
* which is equivalent to applying the NFKC_Casefold mappings and then NFC.
|
||||
* See https://www.unicode.org/reports/tr44/#NFKC_Casefold
|
||||
*
|
||||
* Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization
|
||||
* which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC.
|
||||
* See https://www.unicode.org/reports/tr44/#NFKC_Simple_Casefold
|
||||
*
|
||||
* Same as unorm2_getInstance(NULL, "nfkc_scf", UNORM2_COMPOSE, pErrorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @stable ICU 74
|
||||
*/
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance which uses the specified data file
|
||||
* (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
|
||||
* and which composes or decomposes text according to the specified mode.
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
*
|
||||
* Use packageName=NULL for data files that are part of ICU's own data.
|
||||
* Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
|
||||
* Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
|
||||
* Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
|
||||
*
|
||||
* @param packageName NULL for ICU built-in data, otherwise application data package name
|
||||
* @param name "nfc" or "nfkc" or "nfkc_cf" or "nfkc_scf" or name of custom data file
|
||||
* @param mode normalization mode (compose or decompose etc.)
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested UNormalizer2, if successful
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getInstance(const char *packageName,
|
||||
const char *name,
|
||||
UNormalization2Mode mode,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Constructs a filtered normalizer wrapping any UNormalizer2 instance
|
||||
* and a filter set.
|
||||
* Both are aliased and must not be modified or deleted while this object
|
||||
* is used.
|
||||
* The filter set should be frozen; otherwise the performance will suffer greatly.
|
||||
* @param norm2 wrapped UNormalizer2 instance
|
||||
* @param filterSet USet which determines the characters to be normalized
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested UNormalizer2, if successful
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI UNormalizer2 * U_EXPORT2
|
||||
unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Closes a UNormalizer2 instance from unorm2_openFiltered().
|
||||
* Do not close instances from unorm2_getInstance()!
|
||||
* @param norm2 UNormalizer2 instance to be closed
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
unorm2_close(UNormalizer2 *norm2);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUNormalizer2Pointer
|
||||
* "Smart pointer" class, closes a UNormalizer2 via unorm2_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Writes the normalized form of the source string to the destination string
|
||||
* (replacing its contents) and returns the length of the destination string.
|
||||
* The source and destination strings must be different buffers.
|
||||
* @param norm2 UNormalizer2 instance
|
||||
* @param src source string
|
||||
* @param length length of the source string, or -1 if NUL-terminated
|
||||
* @param dest destination string; its contents is replaced with normalized src
|
||||
* @param capacity number of UChars that can be written to dest
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return dest
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
unorm2_normalize(const UNormalizer2 *norm2,
|
||||
const UChar *src, int32_t length,
|
||||
UChar *dest, int32_t capacity,
|
||||
UErrorCode *pErrorCode);
|
||||
/**
|
||||
* Appends the normalized form of the second string to the first string
|
||||
* (merging them at the boundary) and returns the length of the first string.
|
||||
* The result is normalized if the first string was normalized.
|
||||
* The first and second strings must be different buffers.
|
||||
* @param norm2 UNormalizer2 instance
|
||||
* @param first string, should be normalized
|
||||
* @param firstLength length of the first string, or -1 if NUL-terminated
|
||||
* @param firstCapacity number of UChars that can be written to first
|
||||
* @param second string, will be normalized
|
||||
* @param secondLength length of the source string, or -1 if NUL-terminated
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return first
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
|
||||
UChar *first, int32_t firstLength, int32_t firstCapacity,
|
||||
const UChar *second, int32_t secondLength,
|
||||
UErrorCode *pErrorCode);
|
||||
/**
|
||||
* Appends the second string to the first string
|
||||
* (merging them at the boundary) and returns the length of the first string.
|
||||
* The result is normalized if both the strings were normalized.
|
||||
* The first and second strings must be different buffers.
|
||||
* @param norm2 UNormalizer2 instance
|
||||
* @param first string, should be normalized
|
||||
* @param firstLength length of the first string, or -1 if NUL-terminated
|
||||
* @param firstCapacity number of UChars that can be written to first
|
||||
* @param second string, should be normalized
|
||||
* @param secondLength length of the source string, or -1 if NUL-terminated
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return first
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
unorm2_append(const UNormalizer2 *norm2,
|
||||
UChar *first, int32_t firstLength, int32_t firstCapacity,
|
||||
const UChar *second, int32_t secondLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Gets the decomposition mapping of c.
|
||||
* Roughly equivalent to normalizing the String form of c
|
||||
* on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function
|
||||
* returns a negative value and does not write a string
|
||||
* if c does not have a decomposition mapping in this instance's data.
|
||||
* This function is independent of the mode of the UNormalizer2.
|
||||
* @param norm2 UNormalizer2 instance
|
||||
* @param c code point
|
||||
* @param decomposition String buffer which will be set to c's
|
||||
* decomposition mapping, if there is one.
|
||||
* @param capacity number of UChars that can be written to decomposition
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the non-negative length of c's decomposition, if there is one; otherwise a negative value
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
unorm2_getDecomposition(const UNormalizer2 *norm2,
|
||||
UChar32 c, UChar *decomposition, int32_t capacity,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Gets the raw decomposition mapping of c.
|
||||
*
|
||||
* This is similar to the unorm2_getDecomposition() function but returns the
|
||||
* raw decomposition mapping as specified in UnicodeData.txt or
|
||||
* (for custom data) in the mapping files processed by the gennorm2 tool.
|
||||
* By contrast, unorm2_getDecomposition() returns the processed,
|
||||
* recursively-decomposed version of this mapping.
|
||||
*
|
||||
* When used on a standard NFKC Normalizer2 instance,
|
||||
* unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
|
||||
*
|
||||
* When used on a standard NFC Normalizer2 instance,
|
||||
* it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
|
||||
* in this case, the result contains either one or two code points (=1..4 UChars).
|
||||
*
|
||||
* This function is independent of the mode of the UNormalizer2.
|
||||
* @param norm2 UNormalizer2 instance
|
||||
* @param c code point
|
||||
* @param decomposition String buffer which will be set to c's
|
||||
* raw decomposition mapping, if there is one.
|
||||
* @param capacity number of UChars that can be written to decomposition
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
unorm2_getRawDecomposition(const UNormalizer2 *norm2,
|
||||
UChar32 c, UChar *decomposition, int32_t capacity,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Performs pairwise composition of a & b and returns the composite if there is one.
|
||||
*
|
||||
* Returns a composite code point c only if c has a two-way mapping to a+b.
|
||||
* In standard Unicode normalization, this means that
|
||||
* c has a canonical decomposition to a+b
|
||||
* and c does not have the Full_Composition_Exclusion property.
|
||||
*
|
||||
* This function is independent of the mode of the UNormalizer2.
|
||||
* @param norm2 UNormalizer2 instance
|
||||
* @param a A (normalization starter) code point.
|
||||
* @param b Another code point.
|
||||
* @return The non-negative composite code point if there is one; otherwise a negative value.
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
|
||||
|
||||
/**
|
||||
* Gets the combining class of c.
|
||||
* The default implementation returns 0
|
||||
* but all standard implementations return the Unicode Canonical_Combining_Class value.
|
||||
* @param norm2 UNormalizer2 instance
|
||||
* @param c code point
|
||||
* @return c's combining class
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI uint8_t U_EXPORT2
|
||||
unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c);
|
||||
|
||||
/**
|
||||
* Tests if the string is normalized.
|
||||
* Internally, in cases where the quickCheck() method would return "maybe"
|
||||
* (which is only possible for the two COMPOSE modes) this method
|
||||
* resolves to "yes" or "no" to provide a definitive result,
|
||||
* at the cost of doing more work in those cases.
|
||||
* @param norm2 UNormalizer2 instance
|
||||
* @param s input string
|
||||
* @param length length of the string, or -1 if NUL-terminated
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return true if s is normalized
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
unorm2_isNormalized(const UNormalizer2 *norm2,
|
||||
const UChar *s, int32_t length,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Tests if the string is normalized.
|
||||
* For the two COMPOSE modes, the result could be "maybe" in cases that
|
||||
* would take a little more work to resolve definitively.
|
||||
* Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
|
||||
* combination of quick check + normalization, to avoid
|
||||
* re-checking the "yes" prefix.
|
||||
* @param norm2 UNormalizer2 instance
|
||||
* @param s input string
|
||||
* @param length length of the string, or -1 if NUL-terminated
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return UNormalizationCheckResult
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI UNormalizationCheckResult U_EXPORT2
|
||||
unorm2_quickCheck(const UNormalizer2 *norm2,
|
||||
const UChar *s, int32_t length,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns the end of the normalized substring of the input string.
|
||||
* In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
|
||||
* the substring <code>UnicodeString(s, 0, end)</code>
|
||||
* will pass the quick check with a "yes" result.
|
||||
*
|
||||
* The returned end index is usually one or more characters before the
|
||||
* "no" or "maybe" character: The end index is at a normalization boundary.
|
||||
* (See the class documentation for more about normalization boundaries.)
|
||||
*
|
||||
* When the goal is a normalized string and most input strings are expected
|
||||
* to be normalized already, then call this method,
|
||||
* and if it returns a prefix shorter than the input string,
|
||||
* copy that prefix and use normalizeSecondAndAppend() for the remainder.
|
||||
* @param norm2 UNormalizer2 instance
|
||||
* @param s input string
|
||||
* @param length length of the string, or -1 if NUL-terminated
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return "yes" span end index
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
|
||||
const UChar *s, int32_t length,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Tests if the character always has a normalization boundary before it,
|
||||
* regardless of context.
|
||||
* For details see the Normalizer2 base class documentation.
|
||||
* @param norm2 UNormalizer2 instance
|
||||
* @param c character to test
|
||||
* @return true if c has a normalization boundary before it
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
|
||||
|
||||
/**
|
||||
* Tests if the character always has a normalization boundary after it,
|
||||
* regardless of context.
|
||||
* For details see the Normalizer2 base class documentation.
|
||||
* @param norm2 UNormalizer2 instance
|
||||
* @param c character to test
|
||||
* @return true if c has a normalization boundary after it
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
|
||||
|
||||
/**
|
||||
* Tests if the character is normalization-inert.
|
||||
* For details see the Normalizer2 base class documentation.
|
||||
* @param norm2 UNormalizer2 instance
|
||||
* @param c character to test
|
||||
* @return true if c is normalization-inert
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
|
||||
|
||||
/**
|
||||
* Compares two strings for canonical equivalence.
|
||||
* Further options include case-insensitive comparison and
|
||||
* code point order (as opposed to code unit order).
|
||||
*
|
||||
* Canonical equivalence between two strings is defined as their normalized
|
||||
* forms (NFD or NFC) being identical.
|
||||
* This function compares strings incrementally instead of normalizing
|
||||
* (and optionally case-folding) both strings entirely,
|
||||
* improving performance significantly.
|
||||
*
|
||||
* Bulk normalization is only necessary if the strings do not fulfill the FCD
|
||||
* conditions. Only in this case, and only if the strings are relatively long,
|
||||
* is memory allocated temporarily.
|
||||
* For FCD strings and short non-FCD strings there is no memory allocation.
|
||||
*
|
||||
* Semantically, this is equivalent to
|
||||
* strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
|
||||
* where code point order and foldCase are all optional.
|
||||
*
|
||||
* UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
|
||||
* the case folding must be performed first, then the normalization.
|
||||
*
|
||||
* @param s1 First source string.
|
||||
* @param length1 Length of first source string, or -1 if NUL-terminated.
|
||||
*
|
||||
* @param s2 Second source string.
|
||||
* @param length2 Length of second source string, or -1 if NUL-terminated.
|
||||
*
|
||||
* @param options A bit set of options:
|
||||
* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
|
||||
* Case-sensitive comparison in code unit order, and the input strings
|
||||
* are quick-checked for FCD.
|
||||
*
|
||||
* - UNORM_INPUT_IS_FCD
|
||||
* Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
|
||||
* If not set, the function will quickCheck for FCD
|
||||
* and normalize if necessary.
|
||||
*
|
||||
* - U_COMPARE_CODE_POINT_ORDER
|
||||
* Set to choose code point order instead of code unit order
|
||||
* (see u_strCompare for details).
|
||||
*
|
||||
* - U_COMPARE_IGNORE_CASE
|
||||
* Set to compare strings case-insensitively using case folding,
|
||||
* instead of case-sensitively.
|
||||
* If set, then the following case folding options are used.
|
||||
*
|
||||
* - Options as used with case-insensitive comparisons, currently:
|
||||
*
|
||||
* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
* (see u_strCaseCompare for details)
|
||||
*
|
||||
* - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
|
||||
*
|
||||
* @param pErrorCode ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return <0 or 0 or >0 as usual for string comparisons
|
||||
*
|
||||
* @see unorm_normalize
|
||||
* @see UNORM_FCD
|
||||
* @see u_strCompare
|
||||
* @see u_strCaseCompare
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
unorm_compare(const UChar *s1, int32_t length1,
|
||||
const UChar *s2, int32_t length2,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif /* !UCONFIG_NO_NORMALIZATION */
|
||||
#endif /* __UNORM2_H__ */
|
||||
324
thirdparty/icu4c/common/unicode/uobject.h
vendored
Normal file
324
thirdparty/icu4c/common/unicode/uobject.h
vendored
Normal file
@@ -0,0 +1,324 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: uobject.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002jun26
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UOBJECT_H__
|
||||
#define __UOBJECT_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/platform.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Common ICU base class UObject.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_NO_THROW
|
||||
* Since ICU 64, use noexcept instead.
|
||||
*
|
||||
* Previously, define this to define the throw() specification so
|
||||
* certain functions do not throw any exceptions
|
||||
*
|
||||
* UMemory operator new methods should have the throw() specification
|
||||
* appended to them, so that the compiler adds the additional nullptr check
|
||||
* before calling constructors. Without, if <code>operator new</code> returns nullptr the
|
||||
* constructor is still called, and if the constructor references member
|
||||
* data, (which it typically does), the result is a segmentation violation.
|
||||
*
|
||||
* @stable ICU 4.2. Since ICU 64, Use noexcept instead. See ICU-20422.
|
||||
*/
|
||||
#ifndef U_NO_THROW
|
||||
#define U_NO_THROW noexcept
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* UClassID-based RTTI */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* UClassID is used to identify classes without using the compiler's RTTI.
|
||||
* This was used before C++ compilers consistently supported RTTI.
|
||||
* ICU 4.6 requires compiler RTTI to be turned on.
|
||||
*
|
||||
* Each class hierarchy which needs
|
||||
* to implement polymorphic clone() or operator==() defines two methods,
|
||||
* described in detail below. UClassID values can be compared using
|
||||
* operator==(). Nothing else should be done with them.
|
||||
*
|
||||
* \par
|
||||
* In class hierarchies that implement "poor man's RTTI",
|
||||
* each concrete subclass implements getDynamicClassID() in the same way:
|
||||
*
|
||||
* \code
|
||||
* class Derived {
|
||||
* public:
|
||||
* virtual UClassID getDynamicClassID() const
|
||||
* { return Derived::getStaticClassID(); }
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* Each concrete class implements getStaticClassID() as well, which allows
|
||||
* clients to test for a specific type.
|
||||
*
|
||||
* \code
|
||||
* class Derived {
|
||||
* public:
|
||||
* static UClassID U_EXPORT2 getStaticClassID();
|
||||
* private:
|
||||
* static char fgClassID;
|
||||
* }
|
||||
*
|
||||
* // In Derived.cpp:
|
||||
* UClassID Derived::getStaticClassID()
|
||||
* { return (UClassID)&Derived::fgClassID; }
|
||||
* char Derived::fgClassID = 0; // Value is irrelevant
|
||||
* \endcode
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef void* UClassID;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* UMemory is the common ICU base class.
|
||||
* All other ICU C++ classes are derived from UMemory (starting with ICU 2.4).
|
||||
*
|
||||
* This is primarily to make it possible and simple to override the
|
||||
* C++ memory management by adding new/delete operators to this base class.
|
||||
*
|
||||
* To override ALL ICU memory management, including that from plain C code,
|
||||
* replace the allocation functions declared in cmemory.h
|
||||
*
|
||||
* UMemory does not contain any virtual functions.
|
||||
* Common "boilerplate" functions are defined in UObject.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
class U_COMMON_API UMemory {
|
||||
public:
|
||||
|
||||
/* test versions for debugging shaper heap memory problems */
|
||||
#ifdef SHAPER_MEMORY_DEBUG
|
||||
static void * NewArray(int size, int count);
|
||||
static void * GrowArray(void * array, int newSize );
|
||||
static void FreeArray(void * array );
|
||||
#endif
|
||||
|
||||
#if U_OVERRIDE_CXX_ALLOCATION
|
||||
/**
|
||||
* Override for ICU4C C++ memory management.
|
||||
* simple, non-class types are allocated using the macros in common/cmemory.h
|
||||
* (uprv_malloc(), uprv_free(), uprv_realloc());
|
||||
* they or something else could be used here to implement C++ new/delete
|
||||
* for ICU4C C++ classes
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static void * U_EXPORT2 operator new(size_t size) noexcept;
|
||||
|
||||
/**
|
||||
* Override for ICU4C C++ memory management.
|
||||
* See new().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static void * U_EXPORT2 operator new[](size_t size) noexcept;
|
||||
|
||||
/**
|
||||
* Override for ICU4C C++ memory management.
|
||||
* simple, non-class types are allocated using the macros in common/cmemory.h
|
||||
* (uprv_malloc(), uprv_free(), uprv_realloc());
|
||||
* they or something else could be used here to implement C++ new/delete
|
||||
* for ICU4C C++ classes
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static void U_EXPORT2 operator delete(void *p) noexcept;
|
||||
|
||||
/**
|
||||
* Override for ICU4C C++ memory management.
|
||||
* See delete().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static void U_EXPORT2 operator delete[](void *p) noexcept;
|
||||
|
||||
#if U_HAVE_PLACEMENT_NEW
|
||||
/**
|
||||
* Override for ICU4C C++ memory management for STL.
|
||||
* See new().
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
static inline void * U_EXPORT2 operator new(size_t, void *ptr) noexcept { return ptr; }
|
||||
|
||||
/**
|
||||
* Override for ICU4C C++ memory management for STL.
|
||||
* See delete().
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
static inline void U_EXPORT2 operator delete(void *, void *) noexcept {}
|
||||
#endif /* U_HAVE_PLACEMENT_NEW */
|
||||
#if U_HAVE_DEBUG_LOCATION_NEW
|
||||
/**
|
||||
* This method overrides the MFC debug version of the operator new
|
||||
*
|
||||
* @param size The requested memory size
|
||||
* @param file The file where the allocation was requested
|
||||
* @param line The line where the allocation was requested
|
||||
*/
|
||||
static void * U_EXPORT2 operator new(size_t size, const char* file, int line) noexcept;
|
||||
/**
|
||||
* This method provides a matching delete for the MFC debug new
|
||||
*
|
||||
* @param p The pointer to the allocated memory
|
||||
* @param file The file where the allocation was requested
|
||||
* @param line The line where the allocation was requested
|
||||
*/
|
||||
static void U_EXPORT2 operator delete(void* p, const char* file, int line) noexcept;
|
||||
#endif /* U_HAVE_DEBUG_LOCATION_NEW */
|
||||
#endif /* U_OVERRIDE_CXX_ALLOCATION */
|
||||
|
||||
/*
|
||||
* Assignment operator not declared. The compiler will provide one
|
||||
* which does nothing since this class does not contain any data members.
|
||||
* API/code coverage may show the assignment operator as present and
|
||||
* untested - ignore.
|
||||
* Subclasses need this assignment operator if they use compiler-provided
|
||||
* assignment operators of their own. An alternative to not declaring one
|
||||
* here would be to declare and empty-implement a protected or public one.
|
||||
UMemory &UMemory::operator=(const UMemory &);
|
||||
*/
|
||||
};
|
||||
|
||||
/**
|
||||
* UObject is the common ICU "boilerplate" class.
|
||||
* UObject inherits UMemory (starting with ICU 2.4),
|
||||
* and all other public ICU C++ classes
|
||||
* are derived from UObject (starting with ICU 2.2).
|
||||
*
|
||||
* UObject contains common virtual functions, in particular a virtual destructor.
|
||||
*
|
||||
* The clone() function is not available in UObject because it is not
|
||||
* implemented by all ICU classes.
|
||||
* Many ICU services provide a clone() function for their class trees,
|
||||
* defined on the service's C++ base class
|
||||
* (which itself is a subclass of UObject).
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
class U_COMMON_API UObject : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
virtual ~UObject();
|
||||
|
||||
/**
|
||||
* ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
|
||||
* The base class implementation returns a dummy value.
|
||||
*
|
||||
* Use compiler RTTI rather than ICU's "poor man's RTTI".
|
||||
* Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI".
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
protected:
|
||||
// the following functions are protected to prevent instantiation and
|
||||
// direct use of UObject itself
|
||||
|
||||
// default constructor
|
||||
// inline UObject() {}
|
||||
|
||||
// copy constructor
|
||||
// inline UObject(const UObject &other) {}
|
||||
|
||||
#if 0
|
||||
// TODO Sometime in the future. Implement operator==().
|
||||
// (This comment inserted in 2.2)
|
||||
// some or all of the following "boilerplate" functions may be made public
|
||||
// in a future ICU4C release when all subclasses implement them
|
||||
|
||||
// assignment operator
|
||||
// (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74)
|
||||
// commented out because the implementation is the same as a compiler's default
|
||||
// UObject &operator=(const UObject &other) { return *this; }
|
||||
|
||||
// comparison operators
|
||||
virtual inline bool operator==(const UObject &other) const { return this==&other; }
|
||||
inline bool operator!=(const UObject &other) const { return !operator==(other); }
|
||||
|
||||
// clone() commented out from the base class:
|
||||
// some compilers do not support co-variant return types
|
||||
// (i.e., subclasses would have to return UObject * as well, instead of SubClass *)
|
||||
// see also UObject class documentation.
|
||||
// virtual UObject *clone() const;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Assignment operator not declared. The compiler will provide one
|
||||
* which does nothing since this class does not contain any data members.
|
||||
* API/code coverage may show the assignment operator as present and
|
||||
* untested - ignore.
|
||||
* Subclasses need this assignment operator if they use compiler-provided
|
||||
* assignment operators of their own. An alternative to not declaring one
|
||||
* here would be to declare and empty-implement a protected or public one.
|
||||
UObject &UObject::operator=(const UObject &);
|
||||
*/
|
||||
};
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* This is a simple macro to add ICU RTTI to an ICU object implementation.
|
||||
* This does not go into the header. This should only be used in *.cpp files.
|
||||
*
|
||||
* @param myClass The name of the class that needs RTTI defined.
|
||||
* @internal
|
||||
*/
|
||||
#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
|
||||
UClassID U_EXPORT2 myClass::getStaticClassID() { \
|
||||
static char classID = 0; \
|
||||
return (UClassID)&classID; \
|
||||
} \
|
||||
UClassID myClass::getDynamicClassID() const \
|
||||
{ return myClass::getStaticClassID(); }
|
||||
|
||||
|
||||
/**
|
||||
* This macro adds ICU RTTI to an ICU abstract class implementation.
|
||||
* This macro should be invoked in *.cpp files. The corresponding
|
||||
* header should declare getStaticClassID.
|
||||
*
|
||||
* @param myClass The name of the class that needs RTTI defined.
|
||||
* @internal
|
||||
*/
|
||||
#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \
|
||||
UClassID U_EXPORT2 myClass::getStaticClassID() { \
|
||||
static char classID = 0; \
|
||||
return (UClassID)&classID; \
|
||||
}
|
||||
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
2043
thirdparty/icu4c/common/unicode/urename.h
vendored
Normal file
2043
thirdparty/icu4c/common/unicode/urename.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
157
thirdparty/icu4c/common/unicode/urep.h
vendored
Normal file
157
thirdparty/icu4c/common/unicode/urep.h
vendored
Normal file
@@ -0,0 +1,157 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1997-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
******************************************************************************
|
||||
* Date Name Description
|
||||
* 06/23/00 aliu Creation.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef __UREP_H
|
||||
#define __UREP_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/********************************************************************
|
||||
* General Notes
|
||||
********************************************************************
|
||||
* TODO
|
||||
* Add usage scenario
|
||||
* Add test code
|
||||
* Talk about pinning
|
||||
* Talk about "can truncate result if out of memory"
|
||||
*/
|
||||
|
||||
/********************************************************************
|
||||
* Data Structures
|
||||
********************************************************************/
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Callbacks for UReplaceable
|
||||
*/
|
||||
/**
|
||||
* An opaque replaceable text object. This will be manipulated only
|
||||
* through the caller-supplied UReplaceableFunctor struct. Related
|
||||
* to the C++ class Replaceable.
|
||||
* This is currently only used in the Transliterator C API, see utrans.h .
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef void* UReplaceable;
|
||||
|
||||
/**
|
||||
* A set of function pointers that transliterators use to manipulate a
|
||||
* UReplaceable. The caller should supply the required functions to
|
||||
* manipulate their text appropriately. Related to the C++ class
|
||||
* Replaceable.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct UReplaceableCallbacks {
|
||||
|
||||
/**
|
||||
* Function pointer that returns the number of UChar code units in
|
||||
* this text.
|
||||
*
|
||||
* @param rep A pointer to "this" UReplaceable object.
|
||||
* @return The length of the text.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t (*length)(const UReplaceable* rep);
|
||||
|
||||
/**
|
||||
* Function pointer that returns a UChar code units at the given
|
||||
* offset into this text; 0 <= offset < n, where n is the value
|
||||
* returned by (*length)(rep). See unistr.h for a description of
|
||||
* charAt() vs. char32At().
|
||||
*
|
||||
* @param rep A pointer to "this" UReplaceable object.
|
||||
* @param offset The index at which to fetch the UChar (code unit).
|
||||
* @return The UChar (code unit) at offset, or U+FFFF if the offset is out of bounds.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UChar (*charAt)(const UReplaceable* rep,
|
||||
int32_t offset);
|
||||
|
||||
/**
|
||||
* Function pointer that returns a UChar32 code point at the given
|
||||
* offset into this text. See unistr.h for a description of
|
||||
* charAt() vs. char32At().
|
||||
*
|
||||
* @param rep A pointer to "this" UReplaceable object.
|
||||
* @param offset The index at which to fetch the UChar32 (code point).
|
||||
* @return The UChar32 (code point) at offset, or U+FFFF if the offset is out of bounds.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UChar32 (*char32At)(const UReplaceable* rep,
|
||||
int32_t offset);
|
||||
|
||||
/**
|
||||
* Function pointer that replaces text between start and limit in
|
||||
* this text with the given text. Attributes (out of band info)
|
||||
* should be retained.
|
||||
*
|
||||
* @param rep A pointer to "this" UReplaceable object.
|
||||
* @param start the starting index of the text to be replaced,
|
||||
* inclusive.
|
||||
* @param limit the ending index of the text to be replaced,
|
||||
* exclusive.
|
||||
* @param text the new text to replace the UChars from
|
||||
* start..limit-1.
|
||||
* @param textLength the number of UChars at text, or -1 if text
|
||||
* is null-terminated.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
void (*replace)(UReplaceable* rep,
|
||||
int32_t start,
|
||||
int32_t limit,
|
||||
const UChar* text,
|
||||
int32_t textLength);
|
||||
|
||||
/**
|
||||
* Function pointer that copies the characters in the range
|
||||
* [<tt>start</tt>, <tt>limit</tt>) into the array <tt>dst</tt>.
|
||||
*
|
||||
* @param rep A pointer to "this" UReplaceable object.
|
||||
* @param start offset of first character which will be copied
|
||||
* into the array
|
||||
* @param limit offset immediately following the last character to
|
||||
* be copied
|
||||
* @param dst array in which to copy characters. The length of
|
||||
* <tt>dst</tt> must be at least <tt>(limit - start)</tt>.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
void (*extract)(UReplaceable* rep,
|
||||
int32_t start,
|
||||
int32_t limit,
|
||||
UChar* dst);
|
||||
|
||||
/**
|
||||
* Function pointer that copies text between start and limit in
|
||||
* this text to another index in the text. Attributes (out of
|
||||
* band info) should be retained. After this call, there will be
|
||||
* (at least) two copies of the characters originally located at
|
||||
* start..limit-1.
|
||||
*
|
||||
* @param rep A pointer to "this" UReplaceable object.
|
||||
* @param start the starting index of the text to be copied,
|
||||
* inclusive.
|
||||
* @param limit the ending index of the text to be copied,
|
||||
* exclusive.
|
||||
* @param dest the index at which the copy of the UChars should be
|
||||
* inserted.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
void (*copy)(UReplaceable* rep,
|
||||
int32_t start,
|
||||
int32_t limit,
|
||||
int32_t dest);
|
||||
|
||||
} UReplaceableCallbacks;
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
||||
912
thirdparty/icu4c/common/unicode/ures.h
vendored
Normal file
912
thirdparty/icu4c/common/unicode/ures.h
vendored
Normal file
@@ -0,0 +1,912 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1997-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
* File URES.H (formerly CRESBUND.H)
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 04/01/97 aliu Creation.
|
||||
* 02/22/99 damiba overhaul.
|
||||
* 04/04/99 helena Fixed internal header inclusion.
|
||||
* 04/15/99 Madhu Updated Javadoc
|
||||
* 06/14/99 stephen Removed functions taking a filename suffix.
|
||||
* 07/20/99 stephen Language-independent typedef to void*
|
||||
* 11/09/99 weiv Added ures_getLocale()
|
||||
* 06/24/02 weiv Added support for resource sharing
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef URES_H
|
||||
#define URES_H
|
||||
|
||||
#include "unicode/char16ptr.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uloc.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Resource Bundle
|
||||
*
|
||||
* <h2>C API: Resource Bundle</h2>
|
||||
*
|
||||
* C API representing a collection of resource information pertaining to a given
|
||||
* locale. A resource bundle provides a way of accessing locale- specific information in
|
||||
* a data file. You create a resource bundle that manages the resources for a given
|
||||
* locale and then ask it for individual resources.
|
||||
* <P>
|
||||
* Resource bundles in ICU4C are currently defined using text files which conform to the following
|
||||
* <a href="https://github.com/unicode-org/icu-docs/blob/main/design/bnf_rb.txt">BNF definition</a>.
|
||||
* More on resource bundle concepts and syntax can be found in the
|
||||
* <a href="https://unicode-org.github.io/icu/userguide/locale/resources">Users Guide</a>.
|
||||
* <P>
|
||||
*/
|
||||
|
||||
/**
|
||||
* UResourceBundle is an opaque type for handles for resource bundles in C APIs.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
struct UResourceBundle;
|
||||
|
||||
/**
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct UResourceBundle UResourceBundle;
|
||||
|
||||
/**
|
||||
* Numeric constants for types of resource items.
|
||||
* @see ures_getType
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef enum {
|
||||
/** Resource type constant for "no resource". @stable ICU 2.6 */
|
||||
URES_NONE=-1,
|
||||
|
||||
/** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */
|
||||
URES_STRING=0,
|
||||
|
||||
/** Resource type constant for binary data. @stable ICU 2.6 */
|
||||
URES_BINARY=1,
|
||||
|
||||
/** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */
|
||||
URES_TABLE=2,
|
||||
|
||||
/**
|
||||
* Resource type constant for aliases;
|
||||
* internally stores a string which identifies the actual resource
|
||||
* storing the data (can be in a different resource bundle).
|
||||
* Resolved internally before delivering the actual resource through the API.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
URES_ALIAS=3,
|
||||
|
||||
/**
|
||||
* Resource type constant for a single 28-bit integer, interpreted as
|
||||
* signed or unsigned by the ures_getInt() or ures_getUInt() function.
|
||||
* @see ures_getInt
|
||||
* @see ures_getUInt
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
URES_INT=7,
|
||||
|
||||
/** Resource type constant for arrays of resources. @stable ICU 2.6 */
|
||||
URES_ARRAY=8,
|
||||
|
||||
/**
|
||||
* Resource type constant for vectors of 32-bit integers.
|
||||
* @see ures_getIntVector
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
URES_INT_VECTOR = 14,
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/** @deprecated ICU 2.6 Use the URES_ constant instead. */
|
||||
RES_NONE=URES_NONE,
|
||||
/** @deprecated ICU 2.6 Use the URES_ constant instead. */
|
||||
RES_STRING=URES_STRING,
|
||||
/** @deprecated ICU 2.6 Use the URES_ constant instead. */
|
||||
RES_BINARY=URES_BINARY,
|
||||
/** @deprecated ICU 2.6 Use the URES_ constant instead. */
|
||||
RES_TABLE=URES_TABLE,
|
||||
/** @deprecated ICU 2.6 Use the URES_ constant instead. */
|
||||
RES_ALIAS=URES_ALIAS,
|
||||
/** @deprecated ICU 2.6 Use the URES_ constant instead. */
|
||||
RES_INT=URES_INT,
|
||||
/** @deprecated ICU 2.6 Use the URES_ constant instead. */
|
||||
RES_ARRAY=URES_ARRAY,
|
||||
/** @deprecated ICU 2.6 Use the URES_ constant instead. */
|
||||
RES_INT_VECTOR=URES_INT_VECTOR,
|
||||
/** @deprecated ICU 2.6 Not used. */
|
||||
RES_RESERVED=15,
|
||||
|
||||
/**
|
||||
* One more than the highest normal UResType value.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
URES_LIMIT = 16
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
} UResType;
|
||||
|
||||
/*
|
||||
* Functions to create and destroy resource bundles.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Opens a UResourceBundle, from which users can extract strings by using
|
||||
* their corresponding keys.
|
||||
* Note that the caller is responsible of calling <TT>ures_close</TT> on each successfully
|
||||
* opened resource bundle.
|
||||
* @param packageName The packageName and locale together point to an ICU udata object,
|
||||
* as defined by <code> udata_open( packageName, "res", locale, err) </code>
|
||||
* or equivalent. Typically, packageName will refer to a (.dat) file, or to
|
||||
* a package registered with udata_setAppData(). Using a full file or directory
|
||||
* pathname for packageName is deprecated. If NULL, ICU data will be used.
|
||||
* @param locale specifies the locale for which we want to open the resource
|
||||
* if NULL, the default locale will be used. If strlen(locale) == 0
|
||||
* root locale will be used.
|
||||
*
|
||||
* @param status fills in the outgoing error code.
|
||||
* The UErrorCode err parameter is used to return status information to the user. To
|
||||
* check whether the construction succeeded or not, you should check the value of
|
||||
* U_SUCCESS(err). If you wish more detailed information, you can check for
|
||||
* informational status results which still indicate success. U_USING_FALLBACK_WARNING
|
||||
* indicates that a fall back locale was used. For example, 'de_CH' was requested,
|
||||
* but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that
|
||||
* the default locale data or root locale data was used; neither the requested locale
|
||||
* nor any of its fall back locales could be found. Please see the users guide for more
|
||||
* information on this topic.
|
||||
* @return a newly allocated resource bundle.
|
||||
* @see ures_close
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UResourceBundle* U_EXPORT2
|
||||
ures_open(const char* packageName,
|
||||
const char* locale,
|
||||
UErrorCode* status);
|
||||
|
||||
|
||||
/** This function does not care what kind of localeID is passed in. It simply opens a bundle with
|
||||
* that name. Fallback mechanism is disabled for the new bundle. If the requested bundle contains
|
||||
* an %%ALIAS directive, the results are undefined.
|
||||
* @param packageName The packageName and locale together point to an ICU udata object,
|
||||
* as defined by <code> udata_open( packageName, "res", locale, err) </code>
|
||||
* or equivalent. Typically, packageName will refer to a (.dat) file, or to
|
||||
* a package registered with udata_setAppData(). Using a full file or directory
|
||||
* pathname for packageName is deprecated. If NULL, ICU data will be used.
|
||||
* @param locale specifies the locale for which we want to open the resource
|
||||
* if NULL, the default locale will be used. If strlen(locale) == 0
|
||||
* root locale will be used.
|
||||
*
|
||||
* @param status fills in the outgoing error code. Either U_ZERO_ERROR or U_MISSING_RESOURCE_ERROR
|
||||
* @return a newly allocated resource bundle or NULL if it doesn't exist.
|
||||
* @see ures_close
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UResourceBundle* U_EXPORT2
|
||||
ures_openDirect(const char* packageName,
|
||||
const char* locale,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Same as ures_open() but takes a const UChar *path.
|
||||
* This path will be converted to char * using the default converter,
|
||||
* then ures_open() is called.
|
||||
*
|
||||
* @param packageName The packageName and locale together point to an ICU udata object,
|
||||
* as defined by <code> udata_open( packageName, "res", locale, err) </code>
|
||||
* or equivalent. Typically, packageName will refer to a (.dat) file, or to
|
||||
* a package registered with udata_setAppData(). Using a full file or directory
|
||||
* pathname for packageName is deprecated. If NULL, ICU data will be used.
|
||||
* @param locale specifies the locale for which we want to open the resource
|
||||
* if NULL, the default locale will be used. If strlen(locale) == 0
|
||||
* root locale will be used.
|
||||
* @param status fills in the outgoing error code.
|
||||
* @return a newly allocated resource bundle.
|
||||
* @see ures_open
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UResourceBundle* U_EXPORT2
|
||||
ures_openU(const UChar* packageName,
|
||||
const char* locale,
|
||||
UErrorCode* status);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Returns the number of strings/arrays in resource bundles.
|
||||
* Better to use ures_getSize, as this function will be deprecated.
|
||||
*
|
||||
*@param resourceBundle resource bundle containing the desired strings
|
||||
*@param resourceKey key tagging the resource
|
||||
*@param err fills in the outgoing error code
|
||||
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
|
||||
* could be a non-failing error
|
||||
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_FALLBACK_WARNING </TT>
|
||||
*@return: for <STRONG>Arrays</STRONG>: returns the number of resources in the array
|
||||
* <STRONG>Tables</STRONG>: returns the number of resources in the table
|
||||
* <STRONG>single string</STRONG>: returns 1
|
||||
*@see ures_getSize
|
||||
* @deprecated ICU 2.8 User ures_getSize instead
|
||||
*/
|
||||
U_DEPRECATED int32_t U_EXPORT2
|
||||
ures_countArrayItems(const UResourceBundle* resourceBundle,
|
||||
const char* resourceKey,
|
||||
UErrorCode* err);
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Close a resource bundle, all pointers returned from the various ures_getXXX calls
|
||||
* on this particular bundle should be considered invalid henceforth.
|
||||
*
|
||||
* @param resourceBundle a pointer to a resourceBundle struct. Can be NULL.
|
||||
* @see ures_open
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ures_close(UResourceBundle* resourceBundle);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUResourceBundlePointer
|
||||
* "Smart pointer" class, closes a UResourceBundle via ures_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUResourceBundlePointer, UResourceBundle, ures_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Return the version number associated with this ResourceBundle as a string. Please
|
||||
* use ures_getVersion as this function is going to be deprecated.
|
||||
*
|
||||
* @param resourceBundle The resource bundle for which the version is checked.
|
||||
* @return A version number string as specified in the resource bundle or its parent.
|
||||
* The caller does not own this string.
|
||||
* @see ures_getVersion
|
||||
* @deprecated ICU 2.8 Use ures_getVersion instead.
|
||||
*/
|
||||
U_DEPRECATED const char* U_EXPORT2
|
||||
ures_getVersionNumber(const UResourceBundle* resourceBundle);
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Return the version number associated with this ResourceBundle as an
|
||||
* UVersionInfo array.
|
||||
*
|
||||
* @param resB The resource bundle for which the version is checked.
|
||||
* @param versionInfo A UVersionInfo array that is filled with the version number
|
||||
* as specified in the resource bundle or its parent.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ures_getVersion(const UResourceBundle* resB,
|
||||
UVersionInfo versionInfo);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Return the name of the Locale associated with this ResourceBundle. This API allows
|
||||
* you to query for the real locale of the resource. For example, if you requested
|
||||
* "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned.
|
||||
* For subresources, the locale where this resource comes from will be returned.
|
||||
* If fallback has occurred, getLocale will reflect this.
|
||||
*
|
||||
* @param resourceBundle resource bundle in question
|
||||
* @param status just for catching illegal arguments
|
||||
* @return A Locale name
|
||||
* @deprecated ICU 2.8 Use ures_getLocaleByType instead.
|
||||
*/
|
||||
U_DEPRECATED const char* U_EXPORT2
|
||||
ures_getLocale(const UResourceBundle* resourceBundle,
|
||||
UErrorCode* status);
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Return the name of the Locale associated with this ResourceBundle.
|
||||
* You can choose between requested, valid and real locale.
|
||||
*
|
||||
* @param resourceBundle resource bundle in question
|
||||
* @param type You can choose between requested, valid and actual
|
||||
* locale. For description see the definition of
|
||||
* ULocDataLocaleType in uloc.h
|
||||
* @param status just for catching illegal arguments
|
||||
* @return A Locale name
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ures_getLocaleByType(const UResourceBundle* resourceBundle,
|
||||
ULocDataLocaleType type,
|
||||
UErrorCode* status);
|
||||
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Same as ures_open() but uses the fill-in parameter instead of allocating a new bundle.
|
||||
*
|
||||
* TODO need to revisit usefulness of this function
|
||||
* and usage model for fillIn parameters without knowing sizeof(UResourceBundle)
|
||||
* @param r The existing UResourceBundle to fill in. If NULL then status will be
|
||||
* set to U_ILLEGAL_ARGUMENT_ERROR.
|
||||
* @param packageName The packageName and locale together point to an ICU udata object,
|
||||
* as defined by <code> udata_open( packageName, "res", locale, err) </code>
|
||||
* or equivalent. Typically, packageName will refer to a (.dat) file, or to
|
||||
* a package registered with udata_setAppData(). Using a full file or directory
|
||||
* pathname for packageName is deprecated. If NULL, ICU data will be used.
|
||||
* @param localeID specifies the locale for which we want to open the resource
|
||||
* @param status The error code.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ures_openFillIn(UResourceBundle *r,
|
||||
const char* packageName,
|
||||
const char* localeID,
|
||||
UErrorCode* status);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* Returns a string from a string resource type
|
||||
*
|
||||
* @param resourceBundle a string resource
|
||||
* @param len fills in the length of resulting string
|
||||
* @param status fills in the outgoing error code
|
||||
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
|
||||
* Always check the value of status. Don't count on returning NULL.
|
||||
* could be a non-failing error
|
||||
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
|
||||
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
|
||||
* @see ures_getBinary
|
||||
* @see ures_getIntVector
|
||||
* @see ures_getInt
|
||||
* @see ures_getUInt
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const UChar* U_EXPORT2
|
||||
ures_getString(const UResourceBundle* resourceBundle,
|
||||
int32_t* len,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Returns a UTF-8 string from a string resource.
|
||||
* The UTF-8 string may be returnable directly as a pointer, or
|
||||
* it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
|
||||
* or equivalent.
|
||||
*
|
||||
* If forceCopy==true, then the string is always written to the dest buffer
|
||||
* and dest is returned.
|
||||
*
|
||||
* If forceCopy==false, then the string is returned as a pointer if possible,
|
||||
* without needing a dest buffer (it can be NULL). If the string needs to be
|
||||
* copied or transformed, then it may be placed into dest at an arbitrary offset.
|
||||
*
|
||||
* If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
|
||||
* U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
|
||||
*
|
||||
* If the string is transformed from UTF-16, then a conversion error may occur
|
||||
* if an unpaired surrogate is encountered. If the function is successful, then
|
||||
* the output UTF-8 string is always well-formed.
|
||||
*
|
||||
* @param resB Resource bundle.
|
||||
* @param dest Destination buffer. Can be NULL only if capacity=*length==0.
|
||||
* @param length Input: Capacity of destination buffer.
|
||||
* Output: Actual length of the UTF-8 string, not counting the
|
||||
* terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
|
||||
* Can be NULL, meaning capacity=0 and the string length is not
|
||||
* returned to the caller.
|
||||
* @param forceCopy If true, then the output string will always be written to
|
||||
* dest, with U_BUFFER_OVERFLOW_ERROR and
|
||||
* U_STRING_NOT_TERMINATED_WARNING set if appropriate.
|
||||
* If false, then the dest buffer may or may not contain a
|
||||
* copy of the string. dest may or may not be modified.
|
||||
* If a copy needs to be written, then the UErrorCode parameter
|
||||
* indicates overflow etc. as usual.
|
||||
* @param status Pointer to a standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return The pointer to the UTF-8 string. It may be dest, or at some offset
|
||||
* from dest (only if !forceCopy), or in unrelated memory.
|
||||
* Always NUL-terminated unless the string was written to dest and
|
||||
* length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
|
||||
*
|
||||
* @see ures_getString
|
||||
* @see u_strToUTF8
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2
|
||||
ures_getUTF8String(const UResourceBundle *resB,
|
||||
char *dest, int32_t *length,
|
||||
UBool forceCopy,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Returns a binary data from a binary resource.
|
||||
*
|
||||
* @param resourceBundle a string resource
|
||||
* @param len fills in the length of resulting byte chunk
|
||||
* @param status fills in the outgoing error code
|
||||
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
|
||||
* Always check the value of status. Don't count on returning NULL.
|
||||
* could be a non-failing error
|
||||
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
|
||||
* @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file.
|
||||
* @see ures_getString
|
||||
* @see ures_getIntVector
|
||||
* @see ures_getInt
|
||||
* @see ures_getUInt
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const uint8_t* U_EXPORT2
|
||||
ures_getBinary(const UResourceBundle* resourceBundle,
|
||||
int32_t* len,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Returns a 32 bit integer array from a resource.
|
||||
*
|
||||
* @param resourceBundle an int vector resource
|
||||
* @param len fills in the length of resulting byte chunk
|
||||
* @param status fills in the outgoing error code
|
||||
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
|
||||
* Always check the value of status. Don't count on returning NULL.
|
||||
* could be a non-failing error
|
||||
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
|
||||
* @return a pointer to a chunk of integers which live in a memory mapped/DLL file.
|
||||
* @see ures_getBinary
|
||||
* @see ures_getString
|
||||
* @see ures_getInt
|
||||
* @see ures_getUInt
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const int32_t* U_EXPORT2
|
||||
ures_getIntVector(const UResourceBundle* resourceBundle,
|
||||
int32_t* len,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Returns an unsigned integer from a resource.
|
||||
* This integer is originally 28 bits.
|
||||
*
|
||||
* @param resourceBundle a string resource
|
||||
* @param status fills in the outgoing error code
|
||||
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
|
||||
* could be a non-failing error
|
||||
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
|
||||
* @return an integer value
|
||||
* @see ures_getInt
|
||||
* @see ures_getIntVector
|
||||
* @see ures_getBinary
|
||||
* @see ures_getString
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
ures_getUInt(const UResourceBundle* resourceBundle,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Returns a signed integer from a resource.
|
||||
* This integer is originally 28 bit and the sign gets propagated.
|
||||
*
|
||||
* @param resourceBundle a string resource
|
||||
* @param status fills in the outgoing error code
|
||||
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
|
||||
* could be a non-failing error
|
||||
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
|
||||
* @return an integer value
|
||||
* @see ures_getUInt
|
||||
* @see ures_getIntVector
|
||||
* @see ures_getBinary
|
||||
* @see ures_getString
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ures_getInt(const UResourceBundle* resourceBundle,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Returns the size of a resource. Size for scalar types is always 1,
|
||||
* and for vector/table types is the number of child resources.
|
||||
* @warning Integer array is treated as a scalar type. There are no
|
||||
* APIs to access individual members of an integer array. It
|
||||
* is always returned as a whole.
|
||||
* @param resourceBundle a resource
|
||||
* @return number of resources in a given resource.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ures_getSize(const UResourceBundle *resourceBundle);
|
||||
|
||||
/**
|
||||
* Returns the type of a resource. Available types are defined in enum UResType
|
||||
*
|
||||
* @param resourceBundle a resource
|
||||
* @return type of the given resource.
|
||||
* @see UResType
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UResType U_EXPORT2
|
||||
ures_getType(const UResourceBundle *resourceBundle);
|
||||
|
||||
/**
|
||||
* Returns the key associated with a given resource. Not all the resources have a key - only
|
||||
* those that are members of a table.
|
||||
*
|
||||
* @param resourceBundle a resource
|
||||
* @return a key associated to this resource, or NULL if it doesn't have a key
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2
|
||||
ures_getKey(const UResourceBundle *resourceBundle);
|
||||
|
||||
/* ITERATION API
|
||||
This API provides means for iterating through a resource
|
||||
*/
|
||||
|
||||
/**
|
||||
* Resets the internal context of a resource so that iteration starts from the first element.
|
||||
*
|
||||
* @param resourceBundle a resource
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ures_resetIterator(UResourceBundle *resourceBundle);
|
||||
|
||||
/**
|
||||
* Checks whether the given resource has another element to iterate over.
|
||||
*
|
||||
* @param resourceBundle a resource
|
||||
* @return true if there are more elements, false if there is no more elements
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ures_hasNext(const UResourceBundle *resourceBundle);
|
||||
|
||||
/**
|
||||
* Returns the next resource in a given resource or NULL if there are no more resources
|
||||
* to iterate over. Features a fill-in parameter.
|
||||
*
|
||||
* @param resourceBundle a resource
|
||||
* @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
|
||||
* Alternatively, you can supply a struct to be filled by this function.
|
||||
* @param status fills in the outgoing error code. You may still get a non NULL result even if an
|
||||
* error occurred. Check status instead.
|
||||
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UResourceBundle* U_EXPORT2
|
||||
ures_getNextResource(UResourceBundle *resourceBundle,
|
||||
UResourceBundle *fillIn,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Returns the next string in a given resource or NULL if there are no more resources
|
||||
* to iterate over.
|
||||
*
|
||||
* @param resourceBundle a resource
|
||||
* @param len fill in length of the string
|
||||
* @param key fill in for key associated with this string. NULL if no key
|
||||
* @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
|
||||
* count on it. Check status instead!
|
||||
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const UChar* U_EXPORT2
|
||||
ures_getNextString(UResourceBundle *resourceBundle,
|
||||
int32_t* len,
|
||||
const char ** key,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Returns the resource in a given resource at the specified index. Features a fill-in parameter.
|
||||
*
|
||||
* @param resourceBundle the resource bundle from which to get a sub-resource
|
||||
* @param indexR an index to the wanted resource.
|
||||
* @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
|
||||
* Alternatively, you can supply a struct to be filled by this function.
|
||||
* @param status fills in the outgoing error code. Don't count on NULL being returned if an error has
|
||||
* occurred. Check status instead.
|
||||
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UResourceBundle* U_EXPORT2
|
||||
ures_getByIndex(const UResourceBundle *resourceBundle,
|
||||
int32_t indexR,
|
||||
UResourceBundle *fillIn,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Returns the string in a given resource at the specified index.
|
||||
*
|
||||
* @param resourceBundle a resource
|
||||
* @param indexS an index to the wanted string.
|
||||
* @param len fill in length of the string
|
||||
* @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
|
||||
* count on it. Check status instead!
|
||||
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const UChar* U_EXPORT2
|
||||
ures_getStringByIndex(const UResourceBundle *resourceBundle,
|
||||
int32_t indexS,
|
||||
int32_t* len,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Returns a UTF-8 string from a resource at the specified index.
|
||||
* The UTF-8 string may be returnable directly as a pointer, or
|
||||
* it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
|
||||
* or equivalent.
|
||||
*
|
||||
* If forceCopy==true, then the string is always written to the dest buffer
|
||||
* and dest is returned.
|
||||
*
|
||||
* If forceCopy==false, then the string is returned as a pointer if possible,
|
||||
* without needing a dest buffer (it can be NULL). If the string needs to be
|
||||
* copied or transformed, then it may be placed into dest at an arbitrary offset.
|
||||
*
|
||||
* If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
|
||||
* U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
|
||||
*
|
||||
* If the string is transformed from UTF-16, then a conversion error may occur
|
||||
* if an unpaired surrogate is encountered. If the function is successful, then
|
||||
* the output UTF-8 string is always well-formed.
|
||||
*
|
||||
* @param resB Resource bundle.
|
||||
* @param stringIndex An index to the wanted string.
|
||||
* @param dest Destination buffer. Can be NULL only if capacity=*length==0.
|
||||
* @param pLength Input: Capacity of destination buffer.
|
||||
* Output: Actual length of the UTF-8 string, not counting the
|
||||
* terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
|
||||
* Can be NULL, meaning capacity=0 and the string length is not
|
||||
* returned to the caller.
|
||||
* @param forceCopy If true, then the output string will always be written to
|
||||
* dest, with U_BUFFER_OVERFLOW_ERROR and
|
||||
* U_STRING_NOT_TERMINATED_WARNING set if appropriate.
|
||||
* If false, then the dest buffer may or may not contain a
|
||||
* copy of the string. dest may or may not be modified.
|
||||
* If a copy needs to be written, then the UErrorCode parameter
|
||||
* indicates overflow etc. as usual.
|
||||
* @param status Pointer to a standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return The pointer to the UTF-8 string. It may be dest, or at some offset
|
||||
* from dest (only if !forceCopy), or in unrelated memory.
|
||||
* Always NUL-terminated unless the string was written to dest and
|
||||
* length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
|
||||
*
|
||||
* @see ures_getStringByIndex
|
||||
* @see u_strToUTF8
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2
|
||||
ures_getUTF8StringByIndex(const UResourceBundle *resB,
|
||||
int32_t stringIndex,
|
||||
char *dest, int32_t *pLength,
|
||||
UBool forceCopy,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Returns a resource in a given resource that has a given key. This procedure works only with table
|
||||
* resources. Features a fill-in parameter.
|
||||
*
|
||||
* @param resourceBundle a resource
|
||||
* @param key a key associated with the wanted resource
|
||||
* @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
|
||||
* Alternatively, you can supply a struct to be filled by this function.
|
||||
* @param status fills in the outgoing error code.
|
||||
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UResourceBundle* U_EXPORT2
|
||||
ures_getByKey(const UResourceBundle *resourceBundle,
|
||||
const char* key,
|
||||
UResourceBundle *fillIn,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Returns a string in a given resource that has a given key. This procedure works only with table
|
||||
* resources.
|
||||
*
|
||||
* @param resB a resource
|
||||
* @param key a key associated with the wanted string
|
||||
* @param len fill in length of the string
|
||||
* @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
|
||||
* count on it. Check status instead!
|
||||
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const UChar* U_EXPORT2
|
||||
ures_getStringByKey(const UResourceBundle *resB,
|
||||
const char* key,
|
||||
int32_t* len,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Returns a UTF-8 string from a resource and a key.
|
||||
* This function works only with table resources.
|
||||
*
|
||||
* The UTF-8 string may be returnable directly as a pointer, or
|
||||
* it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
|
||||
* or equivalent.
|
||||
*
|
||||
* If forceCopy==true, then the string is always written to the dest buffer
|
||||
* and dest is returned.
|
||||
*
|
||||
* If forceCopy==false, then the string is returned as a pointer if possible,
|
||||
* without needing a dest buffer (it can be NULL). If the string needs to be
|
||||
* copied or transformed, then it may be placed into dest at an arbitrary offset.
|
||||
*
|
||||
* If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
|
||||
* U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
|
||||
*
|
||||
* If the string is transformed from UTF-16, then a conversion error may occur
|
||||
* if an unpaired surrogate is encountered. If the function is successful, then
|
||||
* the output UTF-8 string is always well-formed.
|
||||
*
|
||||
* @param resB Resource bundle.
|
||||
* @param key A key associated with the wanted resource
|
||||
* @param dest Destination buffer. Can be NULL only if capacity=*length==0.
|
||||
* @param pLength Input: Capacity of destination buffer.
|
||||
* Output: Actual length of the UTF-8 string, not counting the
|
||||
* terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
|
||||
* Can be NULL, meaning capacity=0 and the string length is not
|
||||
* returned to the caller.
|
||||
* @param forceCopy If true, then the output string will always be written to
|
||||
* dest, with U_BUFFER_OVERFLOW_ERROR and
|
||||
* U_STRING_NOT_TERMINATED_WARNING set if appropriate.
|
||||
* If false, then the dest buffer may or may not contain a
|
||||
* copy of the string. dest may or may not be modified.
|
||||
* If a copy needs to be written, then the UErrorCode parameter
|
||||
* indicates overflow etc. as usual.
|
||||
* @param status Pointer to a standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return The pointer to the UTF-8 string. It may be dest, or at some offset
|
||||
* from dest (only if !forceCopy), or in unrelated memory.
|
||||
* Always NUL-terminated unless the string was written to dest and
|
||||
* length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
|
||||
*
|
||||
* @see ures_getStringByKey
|
||||
* @see u_strToUTF8
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2
|
||||
ures_getUTF8StringByKey(const UResourceBundle *resB,
|
||||
const char *key,
|
||||
char *dest, int32_t *pLength,
|
||||
UBool forceCopy,
|
||||
UErrorCode *status);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
/**
|
||||
* Returns the string value from a string resource bundle.
|
||||
*
|
||||
* @param resB a resource, should have type URES_STRING
|
||||
* @param status: fills in the outgoing error code
|
||||
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
|
||||
* could be a non-failing error
|
||||
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
|
||||
* @return The string value, or a bogus string if there is a failure UErrorCode.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline UnicodeString
|
||||
ures_getUnicodeString(const UResourceBundle *resB, UErrorCode* status) {
|
||||
UnicodeString result;
|
||||
int32_t len = 0;
|
||||
const char16_t *r = ConstChar16Ptr(ures_getString(resB, &len, status));
|
||||
if(U_SUCCESS(*status)) {
|
||||
result.setTo(true, r, len);
|
||||
} else {
|
||||
result.setToBogus();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next string in a resource, or an empty string if there are no more resources
|
||||
* to iterate over.
|
||||
* Use ures_getNextString() instead to distinguish between
|
||||
* the end of the iteration and a real empty string value.
|
||||
*
|
||||
* @param resB a resource
|
||||
* @param key fill in for key associated with this string
|
||||
* @param status fills in the outgoing error code
|
||||
* @return The string value, or a bogus string if there is a failure UErrorCode.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline UnicodeString
|
||||
ures_getNextUnicodeString(UResourceBundle *resB, const char ** key, UErrorCode* status) {
|
||||
UnicodeString result;
|
||||
int32_t len = 0;
|
||||
const char16_t* r = ConstChar16Ptr(ures_getNextString(resB, &len, key, status));
|
||||
if(U_SUCCESS(*status)) {
|
||||
result.setTo(true, r, len);
|
||||
} else {
|
||||
result.setToBogus();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string in a given resource array or table at the specified index.
|
||||
*
|
||||
* @param resB a resource
|
||||
* @param indexS an index to the wanted string.
|
||||
* @param status fills in the outgoing error code
|
||||
* @return The string value, or a bogus string if there is a failure UErrorCode.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline UnicodeString
|
||||
ures_getUnicodeStringByIndex(const UResourceBundle *resB, int32_t indexS, UErrorCode* status) {
|
||||
UnicodeString result;
|
||||
int32_t len = 0;
|
||||
const char16_t* r = ConstChar16Ptr(ures_getStringByIndex(resB, indexS, &len, status));
|
||||
if(U_SUCCESS(*status)) {
|
||||
result.setTo(true, r, len);
|
||||
} else {
|
||||
result.setToBogus();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string in a resource that has a given key.
|
||||
* This procedure works only with table resources.
|
||||
*
|
||||
* @param resB a resource
|
||||
* @param key a key associated with the wanted string
|
||||
* @param status fills in the outgoing error code
|
||||
* @return The string value, or a bogus string if there is a failure UErrorCode.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline UnicodeString
|
||||
ures_getUnicodeStringByKey(const UResourceBundle *resB, const char* key, UErrorCode* status) {
|
||||
UnicodeString result;
|
||||
int32_t len = 0;
|
||||
const char16_t* r = ConstChar16Ptr(ures_getStringByKey(resB, key, &len, status));
|
||||
if(U_SUCCESS(*status)) {
|
||||
result.setTo(true, r, len);
|
||||
} else {
|
||||
result.setToBogus();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Create a string enumerator, owned by the caller, of all locales located within
|
||||
* the specified resource tree.
|
||||
* @param packageName name of the tree, such as (NULL) or U_ICUDATA_ALIAS or or "ICUDATA-coll"
|
||||
* This call is similar to uloc_getAvailable().
|
||||
* @param status error code
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
ures_openAvailableLocales(const char *packageName, UErrorCode *status);
|
||||
|
||||
|
||||
#endif /*_URES*/
|
||||
/*eof*/
|
||||
742
thirdparty/icu4c/common/unicode/uscript.h
vendored
Normal file
742
thirdparty/icu4c/common/unicode/uscript.h
vendored
Normal file
@@ -0,0 +1,742 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1997-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
* File USCRIPT.H
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 07/06/2001 Ram Creation.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef USCRIPT_H
|
||||
#define USCRIPT_H
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Unicode Script Information
|
||||
*/
|
||||
|
||||
/**
|
||||
* Constants for ISO 15924 script codes.
|
||||
*
|
||||
* The current set of script code constants supports at least all scripts
|
||||
* that are encoded in the version of Unicode which ICU currently supports.
|
||||
* The names of the constants are usually derived from the
|
||||
* Unicode script property value aliases.
|
||||
* See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
|
||||
* and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
|
||||
*
|
||||
* In addition, constants for many ISO 15924 script codes
|
||||
* are included, for use with language tags, CLDR data, and similar.
|
||||
* Some of those codes are not used in the Unicode Character Database (UCD).
|
||||
* For example, there are no characters that have a UCD script property value of
|
||||
* Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
|
||||
*
|
||||
* Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
|
||||
*
|
||||
* Starting with ICU 55, script codes are only added when their scripts
|
||||
* have been or will certainly be encoded in Unicode,
|
||||
* and have been assigned Unicode script property value aliases,
|
||||
* to ensure that their script names are stable and match the names of the constants.
|
||||
* Script codes like Latf and Aran that are not subject to separate encoding
|
||||
* may be added at any time.
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
typedef enum UScriptCode {
|
||||
/*
|
||||
* Note: UScriptCode constants and their ISO script code comments
|
||||
* are parsed by preparseucd.py.
|
||||
* It matches lines like
|
||||
* USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * /
|
||||
*/
|
||||
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_INVALID_CODE = -1,
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_COMMON = 0, /* Zyyy */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_ARABIC = 2, /* Arab */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_ARMENIAN = 3, /* Armn */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_BENGALI = 4, /* Beng */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_BOPOMOFO = 5, /* Bopo */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_CHEROKEE = 6, /* Cher */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_COPTIC = 7, /* Copt */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_CYRILLIC = 8, /* Cyrl */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_DESERET = 9, /* Dsrt */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_DEVANAGARI = 10, /* Deva */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_ETHIOPIC = 11, /* Ethi */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_GEORGIAN = 12, /* Geor */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_GOTHIC = 13, /* Goth */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_GREEK = 14, /* Grek */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_GUJARATI = 15, /* Gujr */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_GURMUKHI = 16, /* Guru */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_HAN = 17, /* Hani */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_HANGUL = 18, /* Hang */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_HEBREW = 19, /* Hebr */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_HIRAGANA = 20, /* Hira */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_KANNADA = 21, /* Knda */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_KATAKANA = 22, /* Kana */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_KHMER = 23, /* Khmr */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_LAO = 24, /* Laoo */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_LATIN = 25, /* Latn */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_MALAYALAM = 26, /* Mlym */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_MONGOLIAN = 27, /* Mong */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_MYANMAR = 28, /* Mymr */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_OGHAM = 29, /* Ogam */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_OLD_ITALIC = 30, /* Ital */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_ORIYA = 31, /* Orya */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_RUNIC = 32, /* Runr */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_SINHALA = 33, /* Sinh */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_SYRIAC = 34, /* Syrc */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_TAMIL = 35, /* Taml */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_TELUGU = 36, /* Telu */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_THAANA = 37, /* Thaa */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_THAI = 38, /* Thai */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_TIBETAN = 39, /* Tibt */
|
||||
/** Canadian_Aboriginal script. @stable ICU 2.6 */
|
||||
USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */
|
||||
/** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
|
||||
USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_YI = 41, /* Yiii */
|
||||
/* New scripts in Unicode 3.2 */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_TAGALOG = 42, /* Tglg */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_HANUNOO = 43, /* Hano */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_BUHID = 44, /* Buhd */
|
||||
/** @stable ICU 2.2 */
|
||||
USCRIPT_TAGBANWA = 45, /* Tagb */
|
||||
|
||||
/* New scripts in Unicode 4 */
|
||||
/** @stable ICU 2.6 */
|
||||
USCRIPT_BRAILLE = 46, /* Brai */
|
||||
/** @stable ICU 2.6 */
|
||||
USCRIPT_CYPRIOT = 47, /* Cprt */
|
||||
/** @stable ICU 2.6 */
|
||||
USCRIPT_LIMBU = 48, /* Limb */
|
||||
/** @stable ICU 2.6 */
|
||||
USCRIPT_LINEAR_B = 49, /* Linb */
|
||||
/** @stable ICU 2.6 */
|
||||
USCRIPT_OSMANYA = 50, /* Osma */
|
||||
/** @stable ICU 2.6 */
|
||||
USCRIPT_SHAVIAN = 51, /* Shaw */
|
||||
/** @stable ICU 2.6 */
|
||||
USCRIPT_TAI_LE = 52, /* Tale */
|
||||
/** @stable ICU 2.6 */
|
||||
USCRIPT_UGARITIC = 53, /* Ugar */
|
||||
|
||||
/** New script code in Unicode 4.0.1 @stable ICU 3.0 */
|
||||
USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */
|
||||
|
||||
/* New scripts in Unicode 4.1 */
|
||||
/** @stable ICU 3.4 */
|
||||
USCRIPT_BUGINESE = 55, /* Bugi */
|
||||
/** @stable ICU 3.4 */
|
||||
USCRIPT_GLAGOLITIC = 56, /* Glag */
|
||||
/** @stable ICU 3.4 */
|
||||
USCRIPT_KHAROSHTHI = 57, /* Khar */
|
||||
/** @stable ICU 3.4 */
|
||||
USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */
|
||||
/** @stable ICU 3.4 */
|
||||
USCRIPT_NEW_TAI_LUE = 59, /* Talu */
|
||||
/** @stable ICU 3.4 */
|
||||
USCRIPT_TIFINAGH = 60, /* Tfng */
|
||||
/** @stable ICU 3.4 */
|
||||
USCRIPT_OLD_PERSIAN = 61, /* Xpeo */
|
||||
|
||||
/* New script codes from Unicode and ISO 15924 */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_BALINESE = 62, /* Bali */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_BATAK = 63, /* Batk */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_BLISSYMBOLS = 64, /* Blis */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_BRAHMI = 65, /* Brah */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_CHAM = 66, /* Cham */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_CIRTH = 67, /* Cirt */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_KHUTSURI = 72, /* Geok */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_TRADITIONAL_HAN = 74, /* Hant */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_PAHAWH_HMONG = 75, /* Hmng */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_OLD_HUNGARIAN = 76, /* Hung */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_HARAPPAN_INDUS = 77, /* Inds */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_JAVANESE = 78, /* Java */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_KAYAH_LI = 79, /* Kali */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_LATIN_FRAKTUR = 80, /* Latf */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_LATIN_GAELIC = 81, /* Latg */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_LEPCHA = 82, /* Lepc */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_LINEAR_A = 83, /* Lina */
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_MANDAIC = 84, /* Mand */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS,
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_NKO = 87, /* Nkoo */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_ORKHON = 88, /* Orkh */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_OLD_PERMIC = 89, /* Perm */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_PHAGS_PA = 90, /* Phag */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_PHOENICIAN = 91, /* Phnx */
|
||||
/** @stable ICU 52 */
|
||||
USCRIPT_MIAO = 92, /* Plrd */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO,
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_RONGORONGO = 93, /* Roro */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_SARATI = 94, /* Sara */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_TENGWAR = 98, /* Teng */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_VAI = 99, /* Vaii */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_VISIBLE_SPEECH = 100,/* Visp */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_CUNEIFORM = 101,/* Xsux */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */
|
||||
/** @stable ICU 3.6 */
|
||||
USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
|
||||
|
||||
/** @stable ICU 3.8 */
|
||||
USCRIPT_CARIAN = 104,/* Cari */
|
||||
/** @stable ICU 3.8 */
|
||||
USCRIPT_JAPANESE = 105,/* Jpan */
|
||||
/** @stable ICU 3.8 */
|
||||
USCRIPT_LANNA = 106,/* Lana */
|
||||
/** @stable ICU 3.8 */
|
||||
USCRIPT_LYCIAN = 107,/* Lyci */
|
||||
/** @stable ICU 3.8 */
|
||||
USCRIPT_LYDIAN = 108,/* Lydi */
|
||||
/** @stable ICU 3.8 */
|
||||
USCRIPT_OL_CHIKI = 109,/* Olck */
|
||||
/** @stable ICU 3.8 */
|
||||
USCRIPT_REJANG = 110,/* Rjng */
|
||||
/** @stable ICU 3.8 */
|
||||
USCRIPT_SAURASHTRA = 111,/* Saur */
|
||||
/** Sutton SignWriting @stable ICU 3.8 */
|
||||
USCRIPT_SIGN_WRITING = 112,/* Sgnw */
|
||||
/** @stable ICU 3.8 */
|
||||
USCRIPT_SUNDANESE = 113,/* Sund */
|
||||
/** @stable ICU 3.8 */
|
||||
USCRIPT_MOON = 114,/* Moon */
|
||||
/** @stable ICU 3.8 */
|
||||
USCRIPT_MEITEI_MAYEK = 115,/* Mtei */
|
||||
|
||||
/** @stable ICU 4.0 */
|
||||
USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */
|
||||
/** @stable ICU 4.0 */
|
||||
USCRIPT_AVESTAN = 117,/* Avst */
|
||||
/** @stable ICU 4.0 */
|
||||
USCRIPT_CHAKMA = 118,/* Cakm */
|
||||
/** @stable ICU 4.0 */
|
||||
USCRIPT_KOREAN = 119,/* Kore */
|
||||
/** @stable ICU 4.0 */
|
||||
USCRIPT_KAITHI = 120,/* Kthi */
|
||||
/** @stable ICU 4.0 */
|
||||
USCRIPT_MANICHAEAN = 121,/* Mani */
|
||||
/** @stable ICU 4.0 */
|
||||
USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */
|
||||
/** @stable ICU 4.0 */
|
||||
USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */
|
||||
/** @stable ICU 4.0 */
|
||||
USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */
|
||||
/** @stable ICU 4.0 */
|
||||
USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */
|
||||
/** @stable ICU 4.0 */
|
||||
USCRIPT_SAMARITAN = 126,/* Samr */
|
||||
/** @stable ICU 4.0 */
|
||||
USCRIPT_TAI_VIET = 127,/* Tavt */
|
||||
/** @stable ICU 4.0 */
|
||||
USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */
|
||||
/** @stable ICU 4.0 */
|
||||
USCRIPT_SYMBOLS = 129,/* Zsym */
|
||||
|
||||
/** @stable ICU 4.4 */
|
||||
USCRIPT_BAMUM = 130,/* Bamu */
|
||||
/** @stable ICU 4.4 */
|
||||
USCRIPT_LISU = 131,/* Lisu */
|
||||
/** @stable ICU 4.4 */
|
||||
USCRIPT_NAKHI_GEBA = 132,/* Nkgb */
|
||||
/** @stable ICU 4.4 */
|
||||
USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */
|
||||
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_BASSA_VAH = 134,/* Bass */
|
||||
/** @stable ICU 54 */
|
||||
USCRIPT_DUPLOYAN = 135,/* Dupl */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/** @deprecated ICU 54 Typo, use USCRIPT_DUPLOYAN */
|
||||
USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN,
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_ELBASAN = 136,/* Elba */
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_GRANTHA = 137,/* Gran */
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_KPELLE = 138,/* Kpel */
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_LOMA = 139,/* Loma */
|
||||
/** Mende Kikakui @stable ICU 4.6 */
|
||||
USCRIPT_MENDE = 140,/* Mend */
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_NABATAEAN = 143,/* Nbat */
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_PALMYRENE = 144,/* Palm */
|
||||
/** @stable ICU 54 */
|
||||
USCRIPT_KHUDAWADI = 145,/* Sind */
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_SINDHI = USCRIPT_KHUDAWADI,
|
||||
/** @stable ICU 4.6 */
|
||||
USCRIPT_WARANG_CITI = 146,/* Wara */
|
||||
|
||||
/** @stable ICU 4.8 */
|
||||
USCRIPT_AFAKA = 147,/* Afak */
|
||||
/** @stable ICU 4.8 */
|
||||
USCRIPT_JURCHEN = 148,/* Jurc */
|
||||
/** @stable ICU 4.8 */
|
||||
USCRIPT_MRO = 149,/* Mroo */
|
||||
/** @stable ICU 4.8 */
|
||||
USCRIPT_NUSHU = 150,/* Nshu */
|
||||
/** @stable ICU 4.8 */
|
||||
USCRIPT_SHARADA = 151,/* Shrd */
|
||||
/** @stable ICU 4.8 */
|
||||
USCRIPT_SORA_SOMPENG = 152,/* Sora */
|
||||
/** @stable ICU 4.8 */
|
||||
USCRIPT_TAKRI = 153,/* Takr */
|
||||
/** @stable ICU 4.8 */
|
||||
USCRIPT_TANGUT = 154,/* Tang */
|
||||
/** @stable ICU 4.8 */
|
||||
USCRIPT_WOLEAI = 155,/* Wole */
|
||||
|
||||
/** @stable ICU 49 */
|
||||
USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */
|
||||
/** @stable ICU 49 */
|
||||
USCRIPT_KHOJKI = 157,/* Khoj */
|
||||
/** @stable ICU 49 */
|
||||
USCRIPT_TIRHUTA = 158,/* Tirh */
|
||||
|
||||
/** @stable ICU 52 */
|
||||
USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */
|
||||
/** @stable ICU 52 */
|
||||
USCRIPT_MAHAJANI = 160,/* Mahj */
|
||||
|
||||
/** @stable ICU 54 */
|
||||
USCRIPT_AHOM = 161,/* Ahom */
|
||||
/** @stable ICU 54 */
|
||||
USCRIPT_HATRAN = 162,/* Hatr */
|
||||
/** @stable ICU 54 */
|
||||
USCRIPT_MODI = 163,/* Modi */
|
||||
/** @stable ICU 54 */
|
||||
USCRIPT_MULTANI = 164,/* Mult */
|
||||
/** @stable ICU 54 */
|
||||
USCRIPT_PAU_CIN_HAU = 165,/* Pauc */
|
||||
/** @stable ICU 54 */
|
||||
USCRIPT_SIDDHAM = 166,/* Sidd */
|
||||
|
||||
/** @stable ICU 58 */
|
||||
USCRIPT_ADLAM = 167,/* Adlm */
|
||||
/** @stable ICU 58 */
|
||||
USCRIPT_BHAIKSUKI = 168,/* Bhks */
|
||||
/** @stable ICU 58 */
|
||||
USCRIPT_MARCHEN = 169,/* Marc */
|
||||
/** @stable ICU 58 */
|
||||
USCRIPT_NEWA = 170,/* Newa */
|
||||
/** @stable ICU 58 */
|
||||
USCRIPT_OSAGE = 171,/* Osge */
|
||||
|
||||
/** @stable ICU 58 */
|
||||
USCRIPT_HAN_WITH_BOPOMOFO = 172,/* Hanb */
|
||||
/** @stable ICU 58 */
|
||||
USCRIPT_JAMO = 173,/* Jamo */
|
||||
/** @stable ICU 58 */
|
||||
USCRIPT_SYMBOLS_EMOJI = 174,/* Zsye */
|
||||
|
||||
/** @stable ICU 60 */
|
||||
USCRIPT_MASARAM_GONDI = 175,/* Gonm */
|
||||
/** @stable ICU 60 */
|
||||
USCRIPT_SOYOMBO = 176,/* Soyo */
|
||||
/** @stable ICU 60 */
|
||||
USCRIPT_ZANABAZAR_SQUARE = 177,/* Zanb */
|
||||
|
||||
/** @stable ICU 62 */
|
||||
USCRIPT_DOGRA = 178,/* Dogr */
|
||||
/** @stable ICU 62 */
|
||||
USCRIPT_GUNJALA_GONDI = 179,/* Gong */
|
||||
/** @stable ICU 62 */
|
||||
USCRIPT_MAKASAR = 180,/* Maka */
|
||||
/** @stable ICU 62 */
|
||||
USCRIPT_MEDEFAIDRIN = 181,/* Medf */
|
||||
/** @stable ICU 62 */
|
||||
USCRIPT_HANIFI_ROHINGYA = 182,/* Rohg */
|
||||
/** @stable ICU 62 */
|
||||
USCRIPT_SOGDIAN = 183,/* Sogd */
|
||||
/** @stable ICU 62 */
|
||||
USCRIPT_OLD_SOGDIAN = 184,/* Sogo */
|
||||
|
||||
/** @stable ICU 64 */
|
||||
USCRIPT_ELYMAIC = 185,/* Elym */
|
||||
/** @stable ICU 64 */
|
||||
USCRIPT_NYIAKENG_PUACHUE_HMONG = 186,/* Hmnp */
|
||||
/** @stable ICU 64 */
|
||||
USCRIPT_NANDINAGARI = 187,/* Nand */
|
||||
/** @stable ICU 64 */
|
||||
USCRIPT_WANCHO = 188,/* Wcho */
|
||||
|
||||
/** @stable ICU 66 */
|
||||
USCRIPT_CHORASMIAN = 189,/* Chrs */
|
||||
/** @stable ICU 66 */
|
||||
USCRIPT_DIVES_AKURU = 190,/* Diak */
|
||||
/** @stable ICU 66 */
|
||||
USCRIPT_KHITAN_SMALL_SCRIPT = 191,/* Kits */
|
||||
/** @stable ICU 66 */
|
||||
USCRIPT_YEZIDI = 192,/* Yezi */
|
||||
|
||||
/** @stable ICU 70 */
|
||||
USCRIPT_CYPRO_MINOAN = 193,/* Cpmn */
|
||||
/** @stable ICU 70 */
|
||||
USCRIPT_OLD_UYGHUR = 194,/* Ougr */
|
||||
/** @stable ICU 70 */
|
||||
USCRIPT_TANGSA = 195,/* Tnsa */
|
||||
/** @stable ICU 70 */
|
||||
USCRIPT_TOTO = 196,/* Toto */
|
||||
/** @stable ICU 70 */
|
||||
USCRIPT_VITHKUQI = 197,/* Vith */
|
||||
|
||||
/** @stable ICU 72 */
|
||||
USCRIPT_KAWI = 198,/* Kawi */
|
||||
/** @stable ICU 72 */
|
||||
USCRIPT_NAG_MUNDARI = 199,/* Nagm */
|
||||
|
||||
/** @stable ICU 75 */
|
||||
USCRIPT_ARABIC_NASTALIQ = 200, /* Aran */
|
||||
|
||||
/** @stable ICU 76 */
|
||||
USCRIPT_GARAY = 201, /* Gara */
|
||||
/** @stable ICU 76 */
|
||||
USCRIPT_GURUNG_KHEMA = 202, /* Gukh */
|
||||
/** @stable ICU 76 */
|
||||
USCRIPT_KIRAT_RAI = 203, /* Krai */
|
||||
/** @stable ICU 76 */
|
||||
USCRIPT_OL_ONAL = 204, /* Onao */
|
||||
/** @stable ICU 76 */
|
||||
USCRIPT_SUNUWAR = 205, /* Sunu */
|
||||
/** @stable ICU 76 */
|
||||
USCRIPT_TODHRI = 206, /* Todr */
|
||||
/** @stable ICU 76 */
|
||||
USCRIPT_TULU_TIGALARI = 207, /* Tutg */
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal UScriptCode value.
|
||||
* The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT).
|
||||
*
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
USCRIPT_CODE_LIMIT = 208
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
} UScriptCode;
|
||||
|
||||
/**
|
||||
* Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
|
||||
* Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
|
||||
* Fills in USCRIPT_LATIN given "en" OR "en_US"
|
||||
* If the required capacity is greater than the capacity of the destination buffer,
|
||||
* then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.
|
||||
*
|
||||
* <p>Note: To search by short or long script alias only, use
|
||||
* u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does
|
||||
* a fast lookup with no access of the locale data.
|
||||
*
|
||||
* @param nameOrAbbrOrLocale name of the script, as given in
|
||||
* PropertyValueAliases.txt, or ISO 15924 code or locale
|
||||
* @param fillIn the UScriptCode buffer to fill in the script code
|
||||
* @param capacity the capacity (size) of UScriptCode buffer passed in.
|
||||
* @param err the error status code.
|
||||
* @return The number of script codes filled in the buffer passed in
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
|
||||
|
||||
/**
|
||||
* Returns the long Unicode script name, if there is one.
|
||||
* Otherwise returns the 4-letter ISO 15924 script code.
|
||||
* Returns "Malayam" given USCRIPT_MALAYALAM.
|
||||
*
|
||||
* @param scriptCode UScriptCode enum
|
||||
* @return long script name as given in PropertyValueAliases.txt, or the 4-letter code,
|
||||
* or NULL if scriptCode is invalid
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uscript_getName(UScriptCode scriptCode);
|
||||
|
||||
/**
|
||||
* Returns the 4-letter ISO 15924 script code,
|
||||
* which is the same as the short Unicode script name if Unicode has names for the script.
|
||||
* Returns "Mlym" given USCRIPT_MALAYALAM.
|
||||
*
|
||||
* @param scriptCode UScriptCode enum
|
||||
* @return short script name (4-letter code), or NULL if scriptCode is invalid
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uscript_getShortName(UScriptCode scriptCode);
|
||||
|
||||
/**
|
||||
* Gets the script code associated with the given codepoint.
|
||||
* Returns USCRIPT_MALAYALAM given 0x0D02
|
||||
* @param codepoint UChar32 codepoint
|
||||
* @param err the error status code.
|
||||
* @return The UScriptCode, or 0 if codepoint is invalid
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
U_CAPI UScriptCode U_EXPORT2
|
||||
uscript_getScript(UChar32 codepoint, UErrorCode *err);
|
||||
|
||||
/**
|
||||
* Do the Script_Extensions of code point c contain script sc?
|
||||
* If c does not have explicit Script_Extensions, then this tests whether
|
||||
* c has the Script property value sc.
|
||||
*
|
||||
* Some characters are commonly used in multiple scripts.
|
||||
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
|
||||
* @param c code point
|
||||
* @param sc script code
|
||||
* @return true if sc is in Script_Extensions(c)
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uscript_hasScript(UChar32 c, UScriptCode sc);
|
||||
|
||||
/**
|
||||
* Writes code point c's Script_Extensions as a list of UScriptCode values
|
||||
* to the output scripts array and returns the number of script codes.
|
||||
* - If c does have Script_Extensions, then the Script property value
|
||||
* (normally Common or Inherited) is not included.
|
||||
* - If c does not have Script_Extensions, then the one Script code is written to the output array.
|
||||
* - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written.
|
||||
* In other words, if the return value is 1,
|
||||
* then the output array contains exactly c's single Script code.
|
||||
* If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.
|
||||
*
|
||||
* Some characters are commonly used in multiple scripts.
|
||||
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
|
||||
*
|
||||
* If there are more than capacity script codes to be written, then
|
||||
* U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
|
||||
* (Usual ICU buffer handling behavior.)
|
||||
*
|
||||
* @param c code point
|
||||
* @param scripts output script code array
|
||||
* @param capacity capacity of the scripts array
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return number of script codes in c's Script_Extensions, or 1 for the single Script value,
|
||||
* written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uscript_getScriptExtensions(UChar32 c,
|
||||
UScriptCode *scripts, int32_t capacity,
|
||||
UErrorCode *errorCode);
|
||||
|
||||
/**
|
||||
* Script usage constants.
|
||||
* See UAX #31 Unicode Identifier and Pattern Syntax.
|
||||
* http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
|
||||
*
|
||||
* @stable ICU 51
|
||||
*/
|
||||
typedef enum UScriptUsage {
|
||||
/** Not encoded in Unicode. @stable ICU 51 */
|
||||
USCRIPT_USAGE_NOT_ENCODED,
|
||||
/** Unknown script usage. @stable ICU 51 */
|
||||
USCRIPT_USAGE_UNKNOWN,
|
||||
/** Candidate for Exclusion from Identifiers. @stable ICU 51 */
|
||||
USCRIPT_USAGE_EXCLUDED,
|
||||
/** Limited Use script. @stable ICU 51 */
|
||||
USCRIPT_USAGE_LIMITED_USE,
|
||||
/** Aspirational Use script. @stable ICU 51 */
|
||||
USCRIPT_USAGE_ASPIRATIONAL,
|
||||
/** Recommended script. @stable ICU 51 */
|
||||
USCRIPT_USAGE_RECOMMENDED
|
||||
} UScriptUsage;
|
||||
|
||||
/**
|
||||
* Writes the script sample character string.
|
||||
* This string normally consists of one code point but might be longer.
|
||||
* The string is empty if the script is not encoded.
|
||||
*
|
||||
* @param script script code
|
||||
* @param dest output string array
|
||||
* @param capacity number of UChars in the dest array
|
||||
* @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input
|
||||
* @return the string length, even if U_BUFFER_OVERFLOW_ERROR
|
||||
* @stable ICU 51
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
class UnicodeString;
|
||||
U_NAMESPACE_END
|
||||
|
||||
/**
|
||||
* Returns the script sample character string.
|
||||
* This string normally consists of one code point but might be longer.
|
||||
* The string is empty if the script is not encoded.
|
||||
*
|
||||
* @param script script code
|
||||
* @return the sample character string
|
||||
* @stable ICU 51
|
||||
*/
|
||||
U_COMMON_API icu::UnicodeString U_EXPORT2
|
||||
uscript_getSampleUnicodeString(UScriptCode script);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
|
||||
* Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
|
||||
*
|
||||
* @param script script code
|
||||
* @return script usage
|
||||
* @see UScriptUsage
|
||||
* @stable ICU 51
|
||||
*/
|
||||
U_CAPI UScriptUsage U_EXPORT2
|
||||
uscript_getUsage(UScriptCode script);
|
||||
|
||||
/**
|
||||
* Returns true if the script is written right-to-left.
|
||||
* For example, Arab and Hebr.
|
||||
*
|
||||
* @param script script code
|
||||
* @return true if the script is right-to-left
|
||||
* @stable ICU 51
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uscript_isRightToLeft(UScriptCode script);
|
||||
|
||||
/**
|
||||
* Returns true if the script allows line breaks between letters (excluding hyphenation).
|
||||
* Such a script typically requires dictionary-based line breaking.
|
||||
* For example, Hani and Thai.
|
||||
*
|
||||
* @param script script code
|
||||
* @return true if the script allows line breaks between letters
|
||||
* @stable ICU 51
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uscript_breaksBetweenLetters(UScriptCode script);
|
||||
|
||||
/**
|
||||
* Returns true if in modern (or most recent) usage of the script case distinctions are customary.
|
||||
* For example, Latn and Cyrl.
|
||||
*
|
||||
* @param script script code
|
||||
* @return true if the script is cased
|
||||
* @stable ICU 51
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uscript_isCased(UScriptCode script);
|
||||
|
||||
#endif
|
||||
1908
thirdparty/icu4c/common/unicode/uset.h
vendored
Normal file
1908
thirdparty/icu4c/common/unicode/uset.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
323
thirdparty/icu4c/common/unicode/usetiter.h
vendored
Normal file
323
thirdparty/icu4c/common/unicode/usetiter.h
vendored
Normal file
@@ -0,0 +1,323 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2002-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef USETITER_H
|
||||
#define USETITER_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeSet;
|
||||
class UnicodeString;
|
||||
|
||||
/**
|
||||
*
|
||||
* UnicodeSetIterator iterates over the contents of a UnicodeSet. It
|
||||
* iterates over either code points or code point ranges. After all
|
||||
* code points or ranges have been returned, it returns the
|
||||
* multicharacter strings of the UnicodeSet, if any.
|
||||
*
|
||||
* This class is not intended for public subclassing.
|
||||
*
|
||||
* <p>To iterate over code points and strings, use a loop like this:
|
||||
* <pre>
|
||||
* UnicodeSetIterator it(set);
|
||||
* while (it.next()) {
|
||||
* processItem(it.getString());
|
||||
* }
|
||||
* </pre>
|
||||
* <p>Each item in the set is accessed as a string. Set elements
|
||||
* consisting of single code points are returned as strings containing
|
||||
* just the one code point.
|
||||
*
|
||||
* <p>To iterate over code point ranges, instead of individual code points,
|
||||
* use a loop like this:
|
||||
* <pre>
|
||||
* UnicodeSetIterator it(set);
|
||||
* while (it.nextRange()) {
|
||||
* if (it.isString()) {
|
||||
* processString(it.getString());
|
||||
* } else {
|
||||
* processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
|
||||
* }
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* To iterate over only the strings, start with <code>skipToStrings()</code>.
|
||||
*
|
||||
* @author M. Davis
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
class U_COMMON_API UnicodeSetIterator final : public UObject {
|
||||
/**
|
||||
* Value of <tt>codepoint</tt> if the iterator points to a string.
|
||||
* If <tt>codepoint == IS_STRING</tt>, then examine
|
||||
* <tt>string</tt> for the current iteration result.
|
||||
*/
|
||||
enum { IS_STRING = -1 };
|
||||
|
||||
/**
|
||||
* Current code point, or the special value <tt>IS_STRING</tt>, if
|
||||
* the iterator points to a string.
|
||||
*/
|
||||
UChar32 codepoint;
|
||||
|
||||
/**
|
||||
* When iterating over ranges using <tt>nextRange()</tt>,
|
||||
* <tt>codepointEnd</tt> contains the inclusive end of the
|
||||
* iteration range, if <tt>codepoint != IS_STRING</tt>. If
|
||||
* iterating over code points using <tt>next()</tt>, or if
|
||||
* <tt>codepoint == IS_STRING</tt>, then the value of
|
||||
* <tt>codepointEnd</tt> is undefined.
|
||||
*/
|
||||
UChar32 codepointEnd;
|
||||
|
||||
/**
|
||||
* If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
|
||||
* to the current string. If <tt>codepoint != IS_STRING</tt>, the
|
||||
* value of <tt>string</tt> is undefined.
|
||||
*/
|
||||
const UnicodeString* string;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Create an iterator over the given set. The iterator is valid
|
||||
* only so long as <tt>set</tt> is valid.
|
||||
* @param set set to iterate over
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
UnicodeSetIterator(const UnicodeSet& set);
|
||||
|
||||
/**
|
||||
* Create an iterator over nothing. <tt>next()</tt> and
|
||||
* <tt>nextRange()</tt> return false. This is a convenience
|
||||
* constructor allowing the target to be set later.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
UnicodeSetIterator();
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual ~UnicodeSetIterator();
|
||||
|
||||
/**
|
||||
* Returns true if the current element is a string. If so, the
|
||||
* caller can retrieve it with <tt>getString()</tt>. If this
|
||||
* method returns false, the current element is a code point or
|
||||
* code point range, depending on whether <tt>next()</tt> or
|
||||
* <tt>nextRange()</tt> was called.
|
||||
* Elements of types string and codepoint can both be retrieved
|
||||
* with the function <tt>getString()</tt>.
|
||||
* Elements of type codepoint can also be retrieved with
|
||||
* <tt>getCodepoint()</tt>.
|
||||
* For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
|
||||
* of the range, and <tt>getCodepointEnd()</tt> returns the end
|
||||
* of the range.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
inline UBool isString() const;
|
||||
|
||||
/**
|
||||
* Returns the current code point, if <tt>isString()</tt> returned
|
||||
* false. Otherwise returns an undefined result.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
inline UChar32 getCodepoint() const;
|
||||
|
||||
/**
|
||||
* Returns the end of the current code point range, if
|
||||
* <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
|
||||
* called. Otherwise returns an undefined result.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
inline UChar32 getCodepointEnd() const;
|
||||
|
||||
/**
|
||||
* Returns the current string, if <tt>isString()</tt> returned
|
||||
* true. If the current iteration item is a code point, a UnicodeString
|
||||
* containing that single code point is returned.
|
||||
*
|
||||
* Ownership of the returned string remains with the iterator.
|
||||
* The string is guaranteed to remain valid only until the iterator is
|
||||
* advanced to the next item, or until the iterator is deleted.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
const UnicodeString& getString();
|
||||
|
||||
/**
|
||||
* Skips over the remaining code points/ranges, if any.
|
||||
* A following call to next() or nextRange() will yield a string, if there is one.
|
||||
* No-op if next() would return false, or if it would yield a string anyway.
|
||||
*
|
||||
* @return *this
|
||||
* @stable ICU 70
|
||||
* @see UnicodeSet#strings()
|
||||
*/
|
||||
inline UnicodeSetIterator &skipToStrings() {
|
||||
// Finish code point/range iteration.
|
||||
range = endRange;
|
||||
endElement = -1;
|
||||
nextElement = 0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances the iteration position to the next element in the set,
|
||||
* which can be either a single code point or a string.
|
||||
* If there are no more elements in the set, return false.
|
||||
*
|
||||
* <p>
|
||||
* If <tt>isString() == true</tt>, the value is a
|
||||
* string, otherwise the value is a
|
||||
* single code point. Elements of either type can be retrieved
|
||||
* with the function <tt>getString()</tt>, while elements of
|
||||
* consisting of a single code point can be retrieved with
|
||||
* <tt>getCodepoint()</tt>
|
||||
*
|
||||
* <p>The order of iteration is all code points in sorted order,
|
||||
* followed by all strings sorted order. Do not mix
|
||||
* calls to <tt>next()</tt> and <tt>nextRange()</tt> without
|
||||
* calling <tt>reset()</tt> between them. The results of doing so
|
||||
* are undefined.
|
||||
*
|
||||
* @return true if there was another element in the set.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
UBool next();
|
||||
|
||||
/**
|
||||
* Returns the next element in the set, either a code point range
|
||||
* or a string. If there are no more elements in the set, return
|
||||
* false. If <tt>isString() == true</tt>, the value is a
|
||||
* string and can be accessed with <tt>getString()</tt>. Otherwise the value is a
|
||||
* range of one or more code points from <tt>getCodepoint()</tt> to
|
||||
* <tt>getCodepointeEnd()</tt> inclusive.
|
||||
*
|
||||
* <p>The order of iteration is all code points ranges in sorted
|
||||
* order, followed by all strings sorted order. Ranges are
|
||||
* disjoint and non-contiguous. The value returned from <tt>getString()</tt>
|
||||
* is undefined unless <tt>isString() == true</tt>. Do not mix calls to
|
||||
* <tt>next()</tt> and <tt>nextRange()</tt> without calling
|
||||
* <tt>reset()</tt> between them. The results of doing so are
|
||||
* undefined.
|
||||
*
|
||||
* @return true if there was another element in the set.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
UBool nextRange();
|
||||
|
||||
/**
|
||||
* Sets this iterator to visit the elements of the given set and
|
||||
* resets it to the start of that set. The iterator is valid only
|
||||
* so long as <tt>set</tt> is valid.
|
||||
* @param set the set to iterate over.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
void reset(const UnicodeSet& set);
|
||||
|
||||
/**
|
||||
* Resets this iterator to the start of the set.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
void reset();
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for this class.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const override;
|
||||
|
||||
// ======================= PRIVATES ===========================
|
||||
|
||||
private:
|
||||
|
||||
// endElement and nextElements are really UChar32's, but we keep
|
||||
// them as signed int32_t's so we can do comparisons with
|
||||
// endElement set to -1. Leave them as int32_t's.
|
||||
/** The set
|
||||
*/
|
||||
const UnicodeSet* set;
|
||||
/** End range
|
||||
*/
|
||||
int32_t endRange;
|
||||
/** Range
|
||||
*/
|
||||
int32_t range;
|
||||
/** End element
|
||||
*/
|
||||
int32_t endElement;
|
||||
/** Next element
|
||||
*/
|
||||
int32_t nextElement;
|
||||
/** Next string
|
||||
*/
|
||||
int32_t nextString;
|
||||
/** String count
|
||||
*/
|
||||
int32_t stringCount;
|
||||
|
||||
/**
|
||||
* Points to the string to use when the caller asks for a
|
||||
* string and the current iteration item is a code point, not a string.
|
||||
*/
|
||||
UnicodeString *cpString;
|
||||
|
||||
/** Copy constructor. Disallowed.
|
||||
*/
|
||||
UnicodeSetIterator(const UnicodeSetIterator&) = delete;
|
||||
|
||||
/** Assignment operator. Disallowed.
|
||||
*/
|
||||
UnicodeSetIterator& operator=(const UnicodeSetIterator&) = delete;
|
||||
|
||||
/** Load range
|
||||
*/
|
||||
void loadRange(int32_t range);
|
||||
};
|
||||
|
||||
inline UBool UnicodeSetIterator::isString() const {
|
||||
return codepoint < 0;
|
||||
}
|
||||
|
||||
inline UChar32 UnicodeSetIterator::getCodepoint() const {
|
||||
return codepoint;
|
||||
}
|
||||
|
||||
inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
|
||||
return codepointEnd;
|
||||
}
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
476
thirdparty/icu4c/common/unicode/ushape.h
vendored
Normal file
476
thirdparty/icu4c/common/unicode/ushape.h
vendored
Normal file
@@ -0,0 +1,476 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2000-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: ushape.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2000jun29
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __USHAPE_H__
|
||||
#define __USHAPE_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Arabic shaping
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* Shape Arabic text on a character basis.
|
||||
*
|
||||
* <p>This function performs basic operations for "shaping" Arabic text. It is most
|
||||
* useful for use with legacy data formats and legacy display technology
|
||||
* (simple terminals). All operations are performed on Unicode characters.</p>
|
||||
*
|
||||
* <p>Text-based shaping means that some character code points in the text are
|
||||
* replaced by others depending on the context. It transforms one kind of text
|
||||
* into another. In comparison, modern displays for Arabic text select
|
||||
* appropriate, context-dependent font glyphs for each text element, which means
|
||||
* that they transform text into a glyph vector.</p>
|
||||
*
|
||||
* <p>Text transformations are necessary when modern display technology is not
|
||||
* available or when text needs to be transformed to or from legacy formats that
|
||||
* use "shaped" characters. Since the Arabic script is cursive, connecting
|
||||
* adjacent letters to each other, computers select images for each letter based
|
||||
* on the surrounding letters. This usually results in four images per Arabic
|
||||
* letter: initial, middle, final, and isolated forms. In Unicode, on the other
|
||||
* hand, letters are normally stored abstract, and a display system is expected
|
||||
* to select the necessary glyphs. (This makes searching and other text
|
||||
* processing easier because the same letter has only one code.) It is possible
|
||||
* to mimic this with text transformations because there are characters in
|
||||
* Unicode that are rendered as letters with a specific shape
|
||||
* (or cursive connectivity). They were included for interoperability with
|
||||
* legacy systems and codepages, and for unsophisticated display systems.</p>
|
||||
*
|
||||
* <p>A second kind of text transformations is supported for Arabic digits:
|
||||
* For compatibility with legacy codepages that only include European digits,
|
||||
* it is possible to replace one set of digits by another, changing the
|
||||
* character code points. These operations can be performed for either
|
||||
* Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic
|
||||
* digits (U+06f0...U+06f9).</p>
|
||||
*
|
||||
* <p>Some replacements may result in more or fewer characters (code points).
|
||||
* By default, this means that the destination buffer may receive text with a
|
||||
* length different from the source length. Some legacy systems rely on the
|
||||
* length of the text to be constant. They expect extra spaces to be added
|
||||
* or consumed either next to the affected character or at the end of the
|
||||
* text.</p>
|
||||
*
|
||||
* <p>For details about the available operations, see the description of the
|
||||
* <code>U_SHAPE_...</code> options.</p>
|
||||
*
|
||||
* @param source The input text.
|
||||
*
|
||||
* @param sourceLength The number of UChars in <code>source</code>.
|
||||
*
|
||||
* @param dest The destination buffer that will receive the results of the
|
||||
* requested operations. It may be <code>NULL</code> only if
|
||||
* <code>destSize</code> is 0. The source and destination must not
|
||||
* overlap.
|
||||
*
|
||||
* @param destSize The size (capacity) of the destination buffer in UChars.
|
||||
* If <code>destSize</code> is 0, then no output is produced,
|
||||
* but the necessary buffer size is returned ("preflighting").
|
||||
*
|
||||
* @param options This is a 32-bit set of flags that specify the operations
|
||||
* that are performed on the input text. If no error occurs,
|
||||
* then the result will always be written to the destination
|
||||
* buffer.
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @return The number of UChars written to the destination buffer.
|
||||
* If an error occurred, then no output was written, or it may be
|
||||
* incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then
|
||||
* the return value indicates the necessary destination buffer size.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_shapeArabic(const UChar *source, int32_t sourceLength,
|
||||
UChar *dest, int32_t destSize,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Memory option: allow the result to have a different length than the source.
|
||||
* Affects: LamAlef options
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_SHAPE_LENGTH_GROW_SHRINK 0
|
||||
|
||||
/**
|
||||
* Memory option: allow the result to have a different length than the source.
|
||||
* Affects: LamAlef options
|
||||
* This option is an alias to U_SHAPE_LENGTH_GROW_SHRINK
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_LAMALEF_RESIZE 0
|
||||
|
||||
/**
|
||||
* Memory option: the result must have the same length as the source.
|
||||
* If more room is necessary, then try to consume spaces next to modified characters.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_SHAPE_LENGTH_FIXED_SPACES_NEAR 1
|
||||
|
||||
/**
|
||||
* Memory option: the result must have the same length as the source.
|
||||
* If more room is necessary, then try to consume spaces next to modified characters.
|
||||
* Affects: LamAlef options
|
||||
* This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_NEAR
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_LAMALEF_NEAR 1
|
||||
|
||||
/**
|
||||
* Memory option: the result must have the same length as the source.
|
||||
* If more room is necessary, then try to consume spaces at the end of the text.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_SHAPE_LENGTH_FIXED_SPACES_AT_END 2
|
||||
|
||||
/**
|
||||
* Memory option: the result must have the same length as the source.
|
||||
* If more room is necessary, then try to consume spaces at the end of the text.
|
||||
* Affects: LamAlef options
|
||||
* This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_END
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_LAMALEF_END 2
|
||||
|
||||
/**
|
||||
* Memory option: the result must have the same length as the source.
|
||||
* If more room is necessary, then try to consume spaces at the beginning of the text.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3
|
||||
|
||||
/**
|
||||
* Memory option: the result must have the same length as the source.
|
||||
* If more room is necessary, then try to consume spaces at the beginning of the text.
|
||||
* Affects: LamAlef options
|
||||
* This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_LAMALEF_BEGIN 3
|
||||
|
||||
|
||||
/**
|
||||
* Memory option: the result must have the same length as the source.
|
||||
* Shaping Mode: For each LAMALEF character found, expand LAMALEF using space at end.
|
||||
* If there is no space at end, use spaces at beginning of the buffer. If there
|
||||
* is no space at beginning of the buffer, use spaces at the near (i.e. the space
|
||||
* after the LAMALEF character).
|
||||
* If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h)
|
||||
* will be set in pErrorCode
|
||||
*
|
||||
* Deshaping Mode: Perform the same function as the flag equals U_SHAPE_LAMALEF_END.
|
||||
* Affects: LamAlef options
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_LAMALEF_AUTO 0x10000
|
||||
|
||||
/** Bit mask for memory options. @stable ICU 2.0 */
|
||||
#define U_SHAPE_LENGTH_MASK 0x10003 /* Changed old value 3 */
|
||||
|
||||
|
||||
/**
|
||||
* Bit mask for LamAlef memory options.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_LAMALEF_MASK 0x10003 /* updated */
|
||||
|
||||
/** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */
|
||||
#define U_SHAPE_TEXT_DIRECTION_LOGICAL 0
|
||||
|
||||
/**
|
||||
* Direction indicator:
|
||||
* the source is in visual RTL order,
|
||||
* the rightmost displayed character stored first.
|
||||
* This option is an alias to U_SHAPE_TEXT_DIRECTION_LOGICAL
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_TEXT_DIRECTION_VISUAL_RTL 0
|
||||
|
||||
/**
|
||||
* Direction indicator:
|
||||
* the source is in visual LTR order,
|
||||
* the leftmost displayed character stored first.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR 4
|
||||
|
||||
/** Bit mask for direction indicators. @stable ICU 2.0 */
|
||||
#define U_SHAPE_TEXT_DIRECTION_MASK 4
|
||||
|
||||
|
||||
/** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */
|
||||
#define U_SHAPE_LETTERS_NOOP 0
|
||||
|
||||
/** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */
|
||||
#define U_SHAPE_LETTERS_SHAPE 8
|
||||
|
||||
/** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */
|
||||
#define U_SHAPE_LETTERS_UNSHAPE 0x10
|
||||
|
||||
/**
|
||||
* Letter shaping option: replace abstract letter characters by "shaped" ones.
|
||||
* The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters
|
||||
* are always "shaped" into the isolated form instead of the medial form
|
||||
* (selecting code points from the Arabic Presentation Forms-B block).
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18
|
||||
|
||||
|
||||
/** Bit mask for letter shaping options. @stable ICU 2.0 */
|
||||
#define U_SHAPE_LETTERS_MASK 0x18
|
||||
|
||||
|
||||
/** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */
|
||||
#define U_SHAPE_DIGITS_NOOP 0
|
||||
|
||||
/**
|
||||
* Digit shaping option:
|
||||
* Replace European digits (U+0030...) by Arabic-Indic digits.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_SHAPE_DIGITS_EN2AN 0x20
|
||||
|
||||
/**
|
||||
* Digit shaping option:
|
||||
* Replace Arabic-Indic digits by European digits (U+0030...).
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_SHAPE_DIGITS_AN2EN 0x40
|
||||
|
||||
/**
|
||||
* Digit shaping option:
|
||||
* Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
|
||||
* strongly directional character is an Arabic letter
|
||||
* (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
|
||||
* The direction of "preceding" depends on the direction indicator option.
|
||||
* For the first characters, the preceding strongly directional character
|
||||
* (initial state) is assumed to be not an Arabic letter
|
||||
* (it is <code>U_LEFT_TO_RIGHT</code> [L] or <code>U_RIGHT_TO_LEFT</code> [R]).
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_SHAPE_DIGITS_ALEN2AN_INIT_LR 0x60
|
||||
|
||||
/**
|
||||
* Digit shaping option:
|
||||
* Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
|
||||
* strongly directional character is an Arabic letter
|
||||
* (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
|
||||
* The direction of "preceding" depends on the direction indicator option.
|
||||
* For the first characters, the preceding strongly directional character
|
||||
* (initial state) is assumed to be an Arabic letter.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_SHAPE_DIGITS_ALEN2AN_INIT_AL 0x80
|
||||
|
||||
/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
|
||||
#define U_SHAPE_DIGITS_RESERVED 0xa0
|
||||
|
||||
/** Bit mask for digit shaping options. @stable ICU 2.0 */
|
||||
#define U_SHAPE_DIGITS_MASK 0xe0
|
||||
|
||||
|
||||
/** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */
|
||||
#define U_SHAPE_DIGIT_TYPE_AN 0
|
||||
|
||||
/** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */
|
||||
#define U_SHAPE_DIGIT_TYPE_AN_EXTENDED 0x100
|
||||
|
||||
/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
|
||||
#define U_SHAPE_DIGIT_TYPE_RESERVED 0x200
|
||||
|
||||
/** Bit mask for digit type options. @stable ICU 2.0 */
|
||||
#define U_SHAPE_DIGIT_TYPE_MASK 0x300 /* I need to change this from 0x3f00 to 0x300 */
|
||||
|
||||
/**
|
||||
* Tashkeel aggregation option:
|
||||
* Replaces any combination of U+0651 with one of
|
||||
* U+064C, U+064D, U+064E, U+064F, U+0650 with
|
||||
* U+FC5E, U+FC5F, U+FC60, U+FC61, U+FC62 consecutively.
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
#define U_SHAPE_AGGREGATE_TASHKEEL 0x4000
|
||||
/** Tashkeel aggregation option: do not aggregate tashkeels. @stable ICU 3.6 */
|
||||
#define U_SHAPE_AGGREGATE_TASHKEEL_NOOP 0
|
||||
/** Bit mask for tashkeel aggregation. @stable ICU 3.6 */
|
||||
#define U_SHAPE_AGGREGATE_TASHKEEL_MASK 0x4000
|
||||
|
||||
/**
|
||||
* Presentation form option:
|
||||
* Don't replace Arabic Presentation Forms-A and Arabic Presentation Forms-B
|
||||
* characters with 0+06xx characters, before shaping.
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
#define U_SHAPE_PRESERVE_PRESENTATION 0x8000
|
||||
/** Presentation form option:
|
||||
* Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with
|
||||
* their unshaped correspondents in range 0+06xx, before shaping.
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
#define U_SHAPE_PRESERVE_PRESENTATION_NOOP 0
|
||||
/** Bit mask for preserve presentation form. @stable ICU 3.6 */
|
||||
#define U_SHAPE_PRESERVE_PRESENTATION_MASK 0x8000
|
||||
|
||||
/* Seen Tail option */
|
||||
/**
|
||||
* Memory option: the result must have the same length as the source.
|
||||
* Shaping mode: The SEEN family character will expand into two characters using space near
|
||||
* the SEEN family character(i.e. the space after the character).
|
||||
* If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h)
|
||||
* will be set in pErrorCode
|
||||
*
|
||||
* De-shaping mode: Any Seen character followed by Tail character will be
|
||||
* replaced by one cell Seen and a space will replace the Tail.
|
||||
* Affects: Seen options
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_SEEN_TWOCELL_NEAR 0x200000
|
||||
|
||||
/**
|
||||
* Bit mask for Seen memory options.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_SEEN_MASK 0x700000
|
||||
|
||||
/* YehHamza option */
|
||||
/**
|
||||
* Memory option: the result must have the same length as the source.
|
||||
* Shaping mode: The YEHHAMZA character will expand into two characters using space near it
|
||||
* (i.e. the space after the character
|
||||
* If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h)
|
||||
* will be set in pErrorCode
|
||||
*
|
||||
* De-shaping mode: Any Yeh (final or isolated) character followed by Hamza character will be
|
||||
* replaced by one cell YehHamza and space will replace the Hamza.
|
||||
* Affects: YehHamza options
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_YEHHAMZA_TWOCELL_NEAR 0x1000000
|
||||
|
||||
|
||||
/**
|
||||
* Bit mask for YehHamza memory options.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_YEHHAMZA_MASK 0x3800000
|
||||
|
||||
/* New Tashkeel options */
|
||||
/**
|
||||
* Memory option: the result must have the same length as the source.
|
||||
* Shaping mode: Tashkeel characters will be replaced by spaces.
|
||||
* Spaces will be placed at beginning of the buffer
|
||||
*
|
||||
* De-shaping mode: N/A
|
||||
* Affects: Tashkeel options
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_TASHKEEL_BEGIN 0x40000
|
||||
|
||||
/**
|
||||
* Memory option: the result must have the same length as the source.
|
||||
* Shaping mode: Tashkeel characters will be replaced by spaces.
|
||||
* Spaces will be placed at end of the buffer
|
||||
*
|
||||
* De-shaping mode: N/A
|
||||
* Affects: Tashkeel options
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_TASHKEEL_END 0x60000
|
||||
|
||||
/**
|
||||
* Memory option: allow the result to have a different length than the source.
|
||||
* Shaping mode: Tashkeel characters will be removed, buffer length will shrink.
|
||||
* De-shaping mode: N/A
|
||||
*
|
||||
* Affect: Tashkeel options
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_TASHKEEL_RESIZE 0x80000
|
||||
|
||||
/**
|
||||
* Memory option: the result must have the same length as the source.
|
||||
* Shaping mode: Tashkeel characters will be replaced by Tatweel if it is connected to adjacent
|
||||
* characters (i.e. shaped on Tatweel) or replaced by space if it is not connected.
|
||||
*
|
||||
* De-shaping mode: N/A
|
||||
* Affects: YehHamza options
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL 0xC0000
|
||||
|
||||
/**
|
||||
* Bit mask for Tashkeel replacement with Space or Tatweel memory options.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_TASHKEEL_MASK 0xE0000
|
||||
|
||||
|
||||
/* Space location Control options */
|
||||
/**
|
||||
* This option affect the meaning of BEGIN and END options. if this option is not used the default
|
||||
* for BEGIN and END will be as following:
|
||||
* The Default (for both Visual LTR, Visual RTL and Logical Text)
|
||||
* 1. BEGIN always refers to the start address of physical memory.
|
||||
* 2. END always refers to the end address of physical memory.
|
||||
*
|
||||
* If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text.
|
||||
*
|
||||
* The effect on BEGIN and END Memory Options will be as following:
|
||||
* A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text(
|
||||
* corresponding to the physical memory address end for Visual LTR text, Same as END in
|
||||
* default behavior)
|
||||
* B. BEGIN For Logical text: Same as BEGIN in default behavior.
|
||||
* C. END For Visual LTR text: This will be the end (left side) of the visual text (corresponding
|
||||
* to the physical memory address beginning for Visual LTR text, Same as BEGIN in default behavior.
|
||||
* D. END For Logical text: Same as END in default behavior).
|
||||
* Affects: All LamAlef BEGIN, END and AUTO options.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END 0x4000000
|
||||
|
||||
/**
|
||||
* Bit mask for swapping BEGIN and END for Visual LTR text
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK 0x4000000
|
||||
|
||||
/**
|
||||
* If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73).
|
||||
* If this option is not specified (Default), old unofficial Unicode TAIL code point is used (i.e. 0x200B)
|
||||
* De-shaping will not use this option as it will always search for both the new Unicode code point for the
|
||||
* TAIL (i.e. 0xFE73) or the old unofficial Unicode TAIL code point (i.e. 0x200B) and de-shape the
|
||||
* Seen-Family letter accordingly.
|
||||
*
|
||||
* Shaping Mode: Only shaping.
|
||||
* De-shaping Mode: N/A.
|
||||
* Affects: All Seen options
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#define U_SHAPE_TAIL_NEW_UNICODE 0x8000000
|
||||
|
||||
/**
|
||||
* Bit mask for new Unicode Tail option
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#define U_SHAPE_TAIL_TYPE_MASK 0x8000000
|
||||
|
||||
#endif
|
||||
274
thirdparty/icu4c/common/unicode/usprep.h
vendored
Normal file
274
thirdparty/icu4c/common/unicode/usprep.h
vendored
Normal file
@@ -0,0 +1,274 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: usprep.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003jul2
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#ifndef __USPREP_H__
|
||||
#define __USPREP_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Implements the StringPrep algorithm.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
*
|
||||
* StringPrep API implements the StingPrep framework as described by RFC 3454.
|
||||
* StringPrep prepares Unicode strings for use in network protocols.
|
||||
* Profiles of StingPrep are set of rules and data according to with the
|
||||
* Unicode Strings are prepared. Each profiles contains tables which describe
|
||||
* how a code point should be treated. The tables are broadly classified into
|
||||
* <ul>
|
||||
* <li> Unassigned Table: Contains code points that are unassigned
|
||||
* in the Unicode Version supported by StringPrep. Currently
|
||||
* RFC 3454 supports Unicode 3.2. </li>
|
||||
* <li> Prohibited Table: Contains code points that are prohibited from
|
||||
* the output of the StringPrep processing function. </li>
|
||||
* <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li>
|
||||
* </ul>
|
||||
*
|
||||
* The procedure for preparing Unicode strings:
|
||||
* <ol>
|
||||
* <li> Map: For each character in the input, check if it has a mapping
|
||||
* and, if so, replace it with its mapping. </li>
|
||||
* <li> Normalize: Possibly normalize the result of step 1 using Unicode
|
||||
* normalization. </li>
|
||||
* <li> Prohibit: Check for any characters that are not allowed in the
|
||||
* output. If any are found, return an error.</li>
|
||||
* <li> Check bidi: Possibly check for right-to-left characters, and if
|
||||
* any are found, make sure that the whole string satisfies the
|
||||
* requirements for bidirectional strings. If the string does not
|
||||
* satisfy the requirements for bidirectional strings, return an
|
||||
* error. </li>
|
||||
* </ol>
|
||||
* @author Ram Viswanadha
|
||||
*/
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include "unicode/parseerr.h"
|
||||
|
||||
/**
|
||||
* The StringPrep profile
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
typedef struct UStringPrepProfile UStringPrepProfile;
|
||||
|
||||
|
||||
/**
|
||||
* Option to prohibit processing of unassigned code points in the input
|
||||
*
|
||||
* @see usprep_prepare
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
#define USPREP_DEFAULT 0x0000
|
||||
|
||||
/**
|
||||
* Option to allow processing of unassigned code points in the input
|
||||
*
|
||||
* @see usprep_prepare
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
#define USPREP_ALLOW_UNASSIGNED 0x0001
|
||||
|
||||
/**
|
||||
* enums for the standard stringprep profile types
|
||||
* supported by usprep_openByType.
|
||||
* @see usprep_openByType
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
typedef enum UStringPrepProfileType {
|
||||
/**
|
||||
* RFC3491 Nameprep
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
USPREP_RFC3491_NAMEPREP,
|
||||
/**
|
||||
* RFC3530 nfs4_cs_prep
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
USPREP_RFC3530_NFS4_CS_PREP,
|
||||
/**
|
||||
* RFC3530 nfs4_cs_prep with case insensitive option
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
USPREP_RFC3530_NFS4_CS_PREP_CI,
|
||||
/**
|
||||
* RFC3530 nfs4_cis_prep
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
USPREP_RFC3530_NFS4_CIS_PREP,
|
||||
/**
|
||||
* RFC3530 nfs4_mixed_prep for prefix
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
USPREP_RFC3530_NFS4_MIXED_PREP_PREFIX,
|
||||
/**
|
||||
* RFC3530 nfs4_mixed_prep for suffix
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
USPREP_RFC3530_NFS4_MIXED_PREP_SUFFIX,
|
||||
/**
|
||||
* RFC3722 iSCSI
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
USPREP_RFC3722_ISCSI,
|
||||
/**
|
||||
* RFC3920 XMPP Nodeprep
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
USPREP_RFC3920_NODEPREP,
|
||||
/**
|
||||
* RFC3920 XMPP Resourceprep
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
USPREP_RFC3920_RESOURCEPREP,
|
||||
/**
|
||||
* RFC4011 Policy MIB Stringprep
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
USPREP_RFC4011_MIB,
|
||||
/**
|
||||
* RFC4013 SASLprep
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
USPREP_RFC4013_SASLPREP,
|
||||
/**
|
||||
* RFC4505 trace
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
USPREP_RFC4505_TRACE,
|
||||
/**
|
||||
* RFC4518 LDAP
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
USPREP_RFC4518_LDAP,
|
||||
/**
|
||||
* RFC4518 LDAP for case ignore, numeric and stored prefix
|
||||
* matching rules
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
USPREP_RFC4518_LDAP_CI
|
||||
} UStringPrepProfileType;
|
||||
|
||||
/**
|
||||
* Creates a StringPrep profile from the data file.
|
||||
*
|
||||
* @param path string containing the full path pointing to the directory
|
||||
* where the profile reside followed by the package name
|
||||
* e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system.
|
||||
* if NULL, ICU default data files will be used.
|
||||
* @param fileName name of the profile file to be opened
|
||||
* @param status ICU error code in/out parameter. Must not be NULL.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return Pointer to UStringPrepProfile that is opened. Should be closed by
|
||||
* calling usprep_close()
|
||||
* @see usprep_close()
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
U_CAPI UStringPrepProfile* U_EXPORT2
|
||||
usprep_open(const char* path,
|
||||
const char* fileName,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Creates a StringPrep profile for the specified profile type.
|
||||
*
|
||||
* @param type The profile type
|
||||
* @param status ICU error code in/out parameter. Must not be NULL.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return Pointer to UStringPrepProfile that is opened. Should be closed by
|
||||
* calling usprep_close()
|
||||
* @see usprep_close()
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
U_CAPI UStringPrepProfile* U_EXPORT2
|
||||
usprep_openByType(UStringPrepProfileType type,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Closes the profile
|
||||
* @param profile The profile to close
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
usprep_close(UStringPrepProfile* profile);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUStringPrepProfilePointer
|
||||
* "Smart pointer" class, closes a UStringPrepProfile via usprep_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUStringPrepProfilePointer, UStringPrepProfile, usprep_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
|
||||
* checks for prohibited and BiDi characters in the order defined by RFC 3454
|
||||
* depending on the options specified in the profile.
|
||||
*
|
||||
* @param prep The profile to use
|
||||
* @param src Pointer to UChar buffer containing the string to prepare
|
||||
* @param srcLength Number of characters in the source string
|
||||
* @param dest Pointer to the destination buffer to receive the output
|
||||
* @param destCapacity The capacity of destination array
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - USPREP_DEFAULT Prohibit processing of unassigned code points in the input
|
||||
*
|
||||
* - USPREP_ALLOW_UNASSIGNED Treat the unassigned code points are in the input
|
||||
* as normal Unicode code points.
|
||||
*
|
||||
* @param parseError Pointer to UParseError struct to receive information on position
|
||||
* of error if an error is encountered. Can be NULL.
|
||||
* @param status ICU in/out error code parameter.
|
||||
* U_INVALID_CHAR_FOUND if src contains
|
||||
* unmatched single surrogates.
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
|
||||
* too many code points.
|
||||
* U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
|
||||
* @return The number of UChars in the destination buffer
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
usprep_prepare( const UStringPrepProfile* prep,
|
||||
const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status );
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
||||
|
||||
#endif
|
||||
1685
thirdparty/icu4c/common/unicode/ustring.h
vendored
Normal file
1685
thirdparty/icu4c/common/unicode/ustring.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
97
thirdparty/icu4c/common/unicode/ustringtrie.h
vendored
Normal file
97
thirdparty/icu4c/common/unicode/ustringtrie.h
vendored
Normal file
@@ -0,0 +1,97 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: udicttrie.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010dec17
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __USTRINGTRIE_H__
|
||||
#define __USTRINGTRIE_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Helper definitions for dictionary trie APIs.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
|
||||
/**
|
||||
* Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
|
||||
* @see USTRINGTRIE_MATCHES
|
||||
* @see USTRINGTRIE_HAS_VALUE
|
||||
* @see USTRINGTRIE_HAS_NEXT
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
enum UStringTrieResult {
|
||||
/**
|
||||
* The input unit(s) did not continue a matching string.
|
||||
* Once current()/next() return USTRINGTRIE_NO_MATCH,
|
||||
* all further calls to current()/next() will also return USTRINGTRIE_NO_MATCH,
|
||||
* until the trie is reset to its original state or to a saved state.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
USTRINGTRIE_NO_MATCH,
|
||||
/**
|
||||
* The input unit(s) continued a matching string
|
||||
* but there is no value for the string so far.
|
||||
* (It is a prefix of a longer string.)
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
USTRINGTRIE_NO_VALUE,
|
||||
/**
|
||||
* The input unit(s) continued a matching string
|
||||
* and there is a value for the string so far.
|
||||
* This value will be returned by getValue().
|
||||
* No further input byte/unit can continue a matching string.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
USTRINGTRIE_FINAL_VALUE,
|
||||
/**
|
||||
* The input unit(s) continued a matching string
|
||||
* and there is a value for the string so far.
|
||||
* This value will be returned by getValue().
|
||||
* Another input byte/unit can continue a matching string.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
USTRINGTRIE_INTERMEDIATE_VALUE
|
||||
};
|
||||
|
||||
/**
|
||||
* Same as (result!=USTRINGTRIE_NO_MATCH).
|
||||
* @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
|
||||
* @return true if the input bytes/units so far are part of a matching string/byte sequence.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#define USTRINGTRIE_MATCHES(result) ((result)!=USTRINGTRIE_NO_MATCH)
|
||||
|
||||
/**
|
||||
* Equivalent to (result==USTRINGTRIE_INTERMEDIATE_VALUE || result==USTRINGTRIE_FINAL_VALUE) but
|
||||
* this macro evaluates result exactly once.
|
||||
* @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
|
||||
* @return true if there is a value for the input bytes/units so far.
|
||||
* @see BytesTrie::getValue
|
||||
* @see UCharsTrie::getValue
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#define USTRINGTRIE_HAS_VALUE(result) ((result)>=USTRINGTRIE_FINAL_VALUE)
|
||||
|
||||
/**
|
||||
* Equivalent to (result==USTRINGTRIE_NO_VALUE || result==USTRINGTRIE_INTERMEDIATE_VALUE) but
|
||||
* this macro evaluates result exactly once.
|
||||
* @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
|
||||
* @return true if another input byte/unit can continue a matching string.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#define USTRINGTRIE_HAS_NEXT(result) ((result)&1)
|
||||
|
||||
#endif /* __USTRINGTRIE_H__ */
|
||||
1603
thirdparty/icu4c/common/unicode/utext.h
vendored
Normal file
1603
thirdparty/icu4c/common/unicode/utext.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
225
thirdparty/icu4c/common/unicode/utf.h
vendored
Normal file
225
thirdparty/icu4c/common/unicode/utf.h
vendored
Normal file
@@ -0,0 +1,225 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: utf.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep09
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Code point macros
|
||||
*
|
||||
* This file defines macros for checking whether a code point is
|
||||
* a surrogate or a non-character etc.
|
||||
*
|
||||
* If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h
|
||||
* and itself includes utf8.h and utf16.h after some
|
||||
* common definitions.
|
||||
* If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 1 then each of these headers must be
|
||||
* included explicitly if their definitions are used.
|
||||
*
|
||||
* utf8.h and utf16.h define macros for efficiently getting code points
|
||||
* in and out of UTF-8/16 strings.
|
||||
* utf16.h macros have "U16_" prefixes.
|
||||
* utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling.
|
||||
*
|
||||
* ICU mostly processes 16-bit Unicode strings.
|
||||
* Most of the time, such strings are well-formed UTF-16.
|
||||
* Single, unpaired surrogates must be handled as well, and are treated in ICU
|
||||
* like regular code points where possible.
|
||||
* (Pairs of surrogate code points are indistinguishable from supplementary
|
||||
* code points encoded as pairs of supplementary code units.)
|
||||
*
|
||||
* In fact, almost all Unicode code points in normal text (>99%)
|
||||
* are on the BMP (<=U+ffff) and even <=U+d7ff.
|
||||
* ICU functions handle supplementary code points (U+10000..U+10ffff)
|
||||
* but are optimized for the much more frequently occurring BMP code points.
|
||||
*
|
||||
* umachine.h defines UChar to be an unsigned 16-bit integer.
|
||||
* Since ICU 59, ICU uses char16_t in C++, UChar only in C,
|
||||
* and defines UChar=char16_t by default. See the UChar API docs for details.
|
||||
*
|
||||
* UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
|
||||
* Unicode code point (Unicode scalar value, 0..0x10ffff) and U_SENTINEL (-1).
|
||||
* Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
|
||||
* the definition of UChar. For details see the documentation for UChar32 itself.
|
||||
*
|
||||
* utf.h defines a small number of C macros for single Unicode code points.
|
||||
* These are simple checks for surrogates and non-characters.
|
||||
* For actual Unicode character properties see uchar.h.
|
||||
*
|
||||
* By default, string operations must be done with error checking in case
|
||||
* a string is not well-formed UTF-16 or UTF-8.
|
||||
*
|
||||
* The U16_ macros detect if a surrogate code unit is unpaired
|
||||
* (lead unit without trail unit or vice versa) and just return the unit itself
|
||||
* as the code point.
|
||||
*
|
||||
* The U8_ macros detect illegal byte sequences and return a negative value.
|
||||
* Starting with ICU 60, the observable length of a single illegal byte sequence
|
||||
* skipped by one of these macros follows the Unicode 6+ recommendation
|
||||
* which is consistent with the W3C Encoding Standard.
|
||||
*
|
||||
* There are ..._OR_FFFD versions of both U16_ and U8_ macros
|
||||
* that return U+FFFD for illegal code unit sequences.
|
||||
*
|
||||
* The regular "safe" macros require that the initial, passed-in string index
|
||||
* is within bounds. They only check the index when they read more than one
|
||||
* code unit. This is usually done with code similar to the following loop:
|
||||
* <pre>while(i<length) {
|
||||
* U16_NEXT(s, i, length, c);
|
||||
* // use c
|
||||
* }</pre>
|
||||
*
|
||||
* When it is safe to assume that text is well-formed UTF-16
|
||||
* (does not contain single, unpaired surrogates), then one can use
|
||||
* U16_..._UNSAFE macros.
|
||||
* These do not check for proper code unit sequences or truncated text and may
|
||||
* yield wrong results or even cause a crash if they are used with "malformed"
|
||||
* text.
|
||||
* In practice, U16_..._UNSAFE macros will produce slightly less code but
|
||||
* should not be faster because the processing is only different when a
|
||||
* surrogate code unit is detected, which will be rare.
|
||||
*
|
||||
* Similarly for UTF-8, there are "safe" macros without a suffix,
|
||||
* and U8_..._UNSAFE versions.
|
||||
* The performance differences are much larger here because UTF-8 provides so
|
||||
* many opportunities for malformed sequences.
|
||||
* The unsafe UTF-8 macros are entirely implemented inside the macro definitions
|
||||
* and are fast, while the safe UTF-8 macros call functions for some complicated cases.
|
||||
*
|
||||
* Unlike with UTF-16, malformed sequences cannot be expressed with distinct
|
||||
* code point values (0..U+10ffff). They are indicated with negative values instead.
|
||||
*
|
||||
* For more information see the ICU User Guide Strings chapter
|
||||
* (https://unicode-org.github.io/icu/userguide/strings).
|
||||
*
|
||||
* <em>Usage:</em>
|
||||
* ICU coding guidelines for if() statements should be followed when using these macros.
|
||||
* Compound statements (curly braces {}) must be used for if-else-while...
|
||||
* bodies and all macro statements should be terminated with semicolon.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
#ifndef __UTF_H__
|
||||
#define __UTF_H__
|
||||
|
||||
#include "unicode/umachine.h"
|
||||
/* include the utfXX.h after the following definitions */
|
||||
|
||||
/* single-code point definitions -------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Is this code point a Unicode noncharacter?
|
||||
* @param c 32-bit code point
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_IS_UNICODE_NONCHAR(c) \
|
||||
((c)>=0xfdd0 && \
|
||||
((c)<=0xfdef || ((c)&0xfffe)==0xfffe) && (c)<=0x10ffff)
|
||||
|
||||
/**
|
||||
* Is c a Unicode code point value (0..U+10ffff)
|
||||
* that can be assigned a character?
|
||||
*
|
||||
* Code points that are not characters include:
|
||||
* - single surrogate code points (U+d800..U+dfff, 2048 code points)
|
||||
* - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
|
||||
* - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
|
||||
* - the highest Unicode code point value is U+10ffff
|
||||
*
|
||||
* This means that all code points below U+d800 are character code points,
|
||||
* and that boundary is tested first for performance.
|
||||
*
|
||||
* @param c 32-bit code point
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_IS_UNICODE_CHAR(c) \
|
||||
((uint32_t)(c)<0xd800 || \
|
||||
(0xdfff<(c) && (c)<=0x10ffff && !U_IS_UNICODE_NONCHAR(c)))
|
||||
|
||||
/**
|
||||
* Is this code point a BMP code point (U+0000..U+ffff)?
|
||||
* @param c 32-bit code point
|
||||
* @return true or false
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff)
|
||||
|
||||
/**
|
||||
* Is this code point a supplementary code point (U+10000..U+10ffff)?
|
||||
* @param c 32-bit code point
|
||||
* @return true or false
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff)
|
||||
|
||||
/**
|
||||
* Is this code point a lead surrogate (U+d800..U+dbff)?
|
||||
* @param c 32-bit code point
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
|
||||
|
||||
/**
|
||||
* Is this code point a trail surrogate (U+dc00..U+dfff)?
|
||||
* @param c 32-bit code point
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
|
||||
|
||||
/**
|
||||
* Is this code point a surrogate (U+d800..U+dfff)?
|
||||
* @param c 32-bit code point
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
|
||||
|
||||
/**
|
||||
* Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
|
||||
* is it a lead surrogate?
|
||||
* @param c 32-bit code point
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
|
||||
|
||||
/**
|
||||
* Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
|
||||
* is it a trail surrogate?
|
||||
* @param c 32-bit code point
|
||||
* @return true or false
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
|
||||
|
||||
/* include the utfXX.h ------------------------------------------------------ */
|
||||
|
||||
#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
||||
/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */
|
||||
#include "unicode/utf_old.h"
|
||||
|
||||
#endif /* !U_NO_DEFAULT_INCLUDE_UTF_HEADERS */
|
||||
|
||||
#endif /* __UTF_H__ */
|
||||
734
thirdparty/icu4c/common/unicode/utf16.h
vendored
Normal file
734
thirdparty/icu4c/common/unicode/utf16.h
vendored
Normal file
@@ -0,0 +1,734 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: utf16.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep09
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: 16-bit Unicode handling macros
|
||||
*
|
||||
* This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
|
||||
*
|
||||
* For more information see utf.h and the ICU User Guide Strings chapter
|
||||
* (https://unicode-org.github.io/icu/userguide/strings).
|
||||
*
|
||||
* <em>Usage:</em>
|
||||
* ICU coding guidelines for if() statements should be followed when using these macros.
|
||||
* Compound statements (curly braces {}) must be used for if-else-while...
|
||||
* bodies and all macro statements should be terminated with semicolon.
|
||||
*/
|
||||
|
||||
#ifndef __UTF16_H__
|
||||
#define __UTF16_H__
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "unicode/umachine.h"
|
||||
#ifndef __UTF_H__
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
||||
/* single-code point definitions -------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Does this code unit alone encode a code point (BMP, not a surrogate)?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
|
||||
|
||||
/**
|
||||
* Is this code unit a lead surrogate (U+d800..U+dbff)?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
|
||||
|
||||
/**
|
||||
* Is this code unit a trail surrogate (U+dc00..U+dfff)?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
|
||||
|
||||
/**
|
||||
* Is this code unit a surrogate (U+d800..U+dfff)?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
|
||||
|
||||
/**
|
||||
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
|
||||
* is it a lead surrogate?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
|
||||
|
||||
/**
|
||||
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
|
||||
* is it a trail surrogate?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
|
||||
|
||||
/**
|
||||
* Helper constant for U16_GET_SUPPLEMENTARY.
|
||||
* @internal
|
||||
*/
|
||||
#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
|
||||
|
||||
/**
|
||||
* Get a supplementary code point value (U+10000..U+10ffff)
|
||||
* from its lead and trail surrogates.
|
||||
* The result is undefined if the input values are not
|
||||
* lead and trail surrogates.
|
||||
*
|
||||
* @param lead lead surrogate (U+d800..U+dbff)
|
||||
* @param trail trail surrogate (U+dc00..U+dfff)
|
||||
* @return supplementary code point (U+10000..U+10ffff)
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_GET_SUPPLEMENTARY(lead, trail) \
|
||||
(((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
|
||||
|
||||
|
||||
/**
|
||||
* Get the lead surrogate (0xd800..0xdbff) for a
|
||||
* supplementary code point (0x10000..0x10ffff).
|
||||
* @param supplementary 32-bit code point (U+10000..U+10ffff)
|
||||
* @return lead surrogate (U+d800..U+dbff) for supplementary
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
|
||||
|
||||
/**
|
||||
* Get the trail surrogate (0xdc00..0xdfff) for a
|
||||
* supplementary code point (0x10000..0x10ffff).
|
||||
* @param supplementary 32-bit code point (U+10000..U+10ffff)
|
||||
* @return trail surrogate (U+dc00..U+dfff) for supplementary
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
|
||||
|
||||
/**
|
||||
* How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
|
||||
* The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
|
||||
* @param c 32-bit code point
|
||||
* @return 1 or 2
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
|
||||
|
||||
/**
|
||||
* The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
|
||||
* @return 2
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_MAX_LENGTH 2
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* The offset may point to either the lead or trail surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the adjacent matching surrogate as well.
|
||||
* The result is undefined if the offset points to a single, unpaired surrogate.
|
||||
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_GET
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[i]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
if(U16_IS_SURROGATE_LEAD(c)) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
|
||||
} else { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The offset may point to either the lead or trail surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the adjacent matching surrogate as well.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to a single, unpaired surrogate, then
|
||||
* c is set to that unpaired surrogate.
|
||||
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<=i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_GET_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[i]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_LEAD(c)) { \
|
||||
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} \
|
||||
} else { \
|
||||
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The offset may point to either the lead or trail surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the adjacent matching surrogate as well.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to a single, unpaired surrogate, then
|
||||
* c is set to U+FFFD.
|
||||
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<=i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_GET_UNSAFE
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[i]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_LEAD(c)) { \
|
||||
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} else { \
|
||||
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with forward iteration --------------------------------------- */
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* The offset may point to the lead surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the following trail surrogate as well.
|
||||
* If the offset points to a trail surrogate, then that itself
|
||||
* will be returned as the code point.
|
||||
* The result is undefined if the offset points to a single, unpaired lead surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_NEXT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if(U16_IS_LEAD(c)) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the following trail surrogate as well.
|
||||
* If the offset points to a trail surrogate or
|
||||
* to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_NEXT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if(U16_IS_LEAD(c)) { \
|
||||
uint16_t __c2; \
|
||||
if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
|
||||
++(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the following trail surrogate as well.
|
||||
* If the offset points to a trail surrogate or
|
||||
* to a single, unpaired lead surrogate, then c is set to U+FFFD.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_NEXT_UNSAFE
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
|
||||
++(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 or 2 code units.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
|
||||
* Otherwise, the result is undefined.
|
||||
*
|
||||
* @param s const UChar * string buffer
|
||||
* @param i string offset
|
||||
* @param c code point to append
|
||||
* @see U16_APPEND
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((uint32_t)(c)<=0xffff) { \
|
||||
(s)[(i)++]=(uint16_t)(c); \
|
||||
} else { \
|
||||
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
|
||||
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 or 2 code units.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Safe" macro, checks for a valid code point.
|
||||
* If a surrogate pair is written, checks for sufficient space in the string.
|
||||
* If the code point is not valid or a trail surrogate does not fit,
|
||||
* then isError is set to true.
|
||||
*
|
||||
* @param s const UChar * string buffer
|
||||
* @param i string offset, must be i<capacity
|
||||
* @param capacity size of the string buffer
|
||||
* @param c code point to append
|
||||
* @param isError output UBool set to true if an error occurs, otherwise not modified
|
||||
* @see U16_APPEND_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((uint32_t)(c)<=0xffff) { \
|
||||
(s)[(i)++]=(uint16_t)(c); \
|
||||
} else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
|
||||
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
|
||||
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
|
||||
} else /* c>0x10ffff or not enough space */ { \
|
||||
(isError)=true; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_FWD_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_LEAD((s)[(i)++])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @see U16_FWD_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U16_FWD_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U16_FWD_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param n number of code points to skip
|
||||
* @see U16_FWD_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
|
||||
U16_FWD_1(s, i, length); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to the trail surrogate of a surrogate pair,
|
||||
* then the offset is decremented.
|
||||
* Otherwise, it is not modified.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_SET_CP_START
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[i])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to the trail surrogate of a surrogate pair,
|
||||
* then the offset is decremented.
|
||||
* Otherwise, it is not modified.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<=i
|
||||
* @see U16_SET_CP_START_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with backward iteration -------------------------------------- */
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a trail surrogate unit
|
||||
* for a supplementary code point, then the macro will read
|
||||
* the preceding lead surrogate as well.
|
||||
* If the offset is behind a lead surrogate, then that itself
|
||||
* will be returned as the code point.
|
||||
* The result is undefined if the offset is behind a single, unpaired trail surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_PREV
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(U16_IS_TRAIL(c)) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a trail surrogate unit
|
||||
* for a supplementary code point, then the macro will read
|
||||
* the preceding lead surrogate as well.
|
||||
* If the offset is behind a lead surrogate or behind a single, unpaired
|
||||
* trail surrogate, then c is set to that unpaired surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<i
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_PREV_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(U16_IS_TRAIL(c)) { \
|
||||
uint16_t __c2; \
|
||||
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
--(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a trail surrogate unit
|
||||
* for a supplementary code point, then the macro will read
|
||||
* the preceding lead surrogate as well.
|
||||
* If the offset is behind a lead surrogate or behind a single, unpaired
|
||||
* trail surrogate, then c is set to U+FFFD.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<i
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_PREV_UNSAFE
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
--(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_BACK_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[--(i)])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<i
|
||||
* @see U16_BACK_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U16_BACK_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U16_BACK_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start start of string
|
||||
* @param i string offset, must be start<i
|
||||
* @param n number of code points to skip
|
||||
* @see U16_BACK_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && (i)>(start)) { \
|
||||
U16_BACK_1(s, start, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind the lead surrogate of a surrogate pair,
|
||||
* then the offset is incremented.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_SET_CP_LIMIT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_LEAD((s)[(i)-1])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind the lead surrogate of a surrogate pair,
|
||||
* then the offset is incremented.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, start<=i<=length
|
||||
* @param length int32_t string length
|
||||
* @see U16_SET_CP_LIMIT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
#endif
|
||||
25
thirdparty/icu4c/common/unicode/utf32.h
vendored
Normal file
25
thirdparty/icu4c/common/unicode/utf32.h
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: utf32.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep20
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: UTF-32 macros
|
||||
*
|
||||
* This file is obsolete and its contents moved to utf_old.h.
|
||||
* See utf_old.h and Jitterbug 2150 and its discussion on the ICU mailing list
|
||||
* in September 2002.
|
||||
*/
|
||||
882
thirdparty/icu4c/common/unicode/utf8.h
vendored
Normal file
882
thirdparty/icu4c/common/unicode/utf8.h
vendored
Normal file
@@ -0,0 +1,882 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: utf8.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep13
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: 8-bit Unicode handling macros
|
||||
*
|
||||
* This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
|
||||
*
|
||||
* For more information see utf.h and the ICU User Guide Strings chapter
|
||||
* (https://unicode-org.github.io/icu/userguide/strings).
|
||||
*
|
||||
* <em>Usage:</em>
|
||||
* ICU coding guidelines for if() statements should be followed when using these macros.
|
||||
* Compound statements (curly braces {}) must be used for if-else-while...
|
||||
* bodies and all macro statements should be terminated with semicolon.
|
||||
*/
|
||||
|
||||
#ifndef __UTF8_H__
|
||||
#define __UTF8_H__
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "unicode/umachine.h"
|
||||
#ifndef __UTF_H__
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
||||
/* internal definitions ----------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Counts the trail bytes for a UTF-8 lead byte.
|
||||
* Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
|
||||
* leadByte might be evaluated multiple times.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this file and thus must remain stable.
|
||||
*
|
||||
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_COUNT_TRAIL_BYTES(leadByte) \
|
||||
(U8_IS_LEAD(leadByte) ? \
|
||||
((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
|
||||
|
||||
/**
|
||||
* Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
|
||||
* Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
|
||||
* leadByte might be evaluated multiple times.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this file and thus must remain stable.
|
||||
*
|
||||
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
|
||||
(((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
|
||||
|
||||
/**
|
||||
* Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this file and thus must remain stable.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
|
||||
|
||||
/**
|
||||
* Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
|
||||
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
|
||||
* Lead byte E0..EF bits 3..0 are used as byte index,
|
||||
* first trail byte bits 7..5 are used as bit index into that byte.
|
||||
* @see U8_IS_VALID_LEAD3_AND_T1
|
||||
* @internal
|
||||
*/
|
||||
#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
|
||||
|
||||
/**
|
||||
* Internal 3-byte UTF-8 validity check.
|
||||
* Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
|
||||
|
||||
/**
|
||||
* Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
|
||||
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
|
||||
* First trail byte bits 7..4 are used as byte index,
|
||||
* lead byte F0..F4 bits 2..0 are used as bit index into that byte.
|
||||
* @see U8_IS_VALID_LEAD4_AND_T1
|
||||
* @internal
|
||||
*/
|
||||
#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
|
||||
|
||||
/**
|
||||
* Internal 4-byte UTF-8 validity check.
|
||||
* Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
|
||||
|
||||
/**
|
||||
* Function for handling "next code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, int8_t strict);
|
||||
|
||||
/**
|
||||
* Function for handling "append code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
|
||||
|
||||
/**
|
||||
* Function for handling "previous code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, int8_t strict);
|
||||
|
||||
/**
|
||||
* Function for handling "skip backward one code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
||||
|
||||
/* single-code point definitions -------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
|
||||
* @param c 8-bit code unit (byte)
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_IS_SINGLE(c) (((c)&0x80)==0)
|
||||
|
||||
/**
|
||||
* Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
|
||||
* @param c 8-bit code unit (byte)
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
|
||||
// 0x32=0xf4-0xc2
|
||||
|
||||
/**
|
||||
* Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
|
||||
* @param c 8-bit code unit (byte)
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
|
||||
|
||||
/**
|
||||
* How many code units (bytes) are used for the UTF-8 encoding
|
||||
* of this Unicode code point?
|
||||
* @param c 32-bit code point
|
||||
* @return 1..4, or 0 if c is a surrogate or not a Unicode code point
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_LENGTH(c) \
|
||||
((uint32_t)(c)<=0x7f ? 1 : \
|
||||
((uint32_t)(c)<=0x7ff ? 2 : \
|
||||
((uint32_t)(c)<=0xd7ff ? 3 : \
|
||||
((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
|
||||
((uint32_t)(c)<=0xffff ? 3 : 4)\
|
||||
) \
|
||||
) \
|
||||
) \
|
||||
)
|
||||
|
||||
/**
|
||||
* The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
|
||||
* @return 4
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_MAX_LENGTH 4
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* The offset may point to either the lead byte or one of the trail bytes
|
||||
* for a code point, in which case the macro will read all of the bytes
|
||||
* for the code point.
|
||||
* The result is undefined if the offset points to an illegal UTF-8
|
||||
* byte sequence.
|
||||
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U8_GET
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t _u8_get_unsafe_index=(int32_t)(i); \
|
||||
U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
|
||||
U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* The offset may point to either the lead byte or one of the trail bytes
|
||||
* for a code point, in which case the macro will read all of the bytes
|
||||
* for the code point.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to an illegal UTF-8 byte sequence, then
|
||||
* c is set to a negative value.
|
||||
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset
|
||||
* @param i int32_t string offset, must be start<=i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_GET_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t _u8_get_index=(i); \
|
||||
U8_SET_CP_START(s, start, _u8_get_index); \
|
||||
U8_NEXT(s, _u8_get_index, length, c); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* The offset may point to either the lead byte or one of the trail bytes
|
||||
* for a code point, in which case the macro will read all of the bytes
|
||||
* for the code point.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to an illegal UTF-8 byte sequence, then
|
||||
* c is set to U+FFFD.
|
||||
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
|
||||
*
|
||||
* This macro does not distinguish between a real U+FFFD in the text
|
||||
* and U+FFFD returned for an ill-formed sequence.
|
||||
* Use U8_GET() if that distinction is important.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset
|
||||
* @param i int32_t string offset, must be start<=i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to U+FFFD in case of an error
|
||||
* @see U8_GET
|
||||
* @stable ICU 51
|
||||
*/
|
||||
#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t _u8_get_index=(i); \
|
||||
U8_SET_CP_START(s, start, _u8_get_index); \
|
||||
U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with forward iteration --------------------------------------- */
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* The offset may point to the lead byte of a multi-byte sequence,
|
||||
* in which case the macro will read the whole sequence.
|
||||
* The result is undefined if the offset points to a trail byte
|
||||
* or an illegal UTF-8 sequence.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U8_NEXT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[(i)++]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
if((c)<0xe0) { \
|
||||
(c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
|
||||
} else if((c)<0xf0) { \
|
||||
/* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
|
||||
(c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
|
||||
(i)+=2; \
|
||||
} else { \
|
||||
(c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
|
||||
(i)+=3; \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead byte of a multi-byte sequence,
|
||||
* in which case the macro will read the whole sequence.
|
||||
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
|
||||
* c is set to a negative value.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_NEXT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead byte of a multi-byte sequence,
|
||||
* in which case the macro will read the whole sequence.
|
||||
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
|
||||
* c is set to U+FFFD.
|
||||
*
|
||||
* This macro does not distinguish between a real U+FFFD in the text
|
||||
* and U+FFFD returned for an ill-formed sequence.
|
||||
* Use U8_NEXT() if that distinction is important.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to U+FFFD in case of an error
|
||||
* @see U8_NEXT
|
||||
* @stable ICU 51
|
||||
*/
|
||||
#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
|
||||
|
||||
/** @internal */
|
||||
#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[(i)++]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
uint8_t __t = 0; \
|
||||
if((i)!=(length) && \
|
||||
/* fetch/validate/assemble all but last trail byte */ \
|
||||
((c)>=0xe0 ? \
|
||||
((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
|
||||
U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
|
||||
(__t&=0x3f, 1) \
|
||||
: /* U+10000..U+10FFFF */ \
|
||||
((c)-=0xf0)<=4 && \
|
||||
U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
|
||||
((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
|
||||
(__t=(s)[i]-0x80)<=0x3f) && \
|
||||
/* valid second-to-last trail byte */ \
|
||||
((c)=((c)<<6)|__t, ++(i)!=(length)) \
|
||||
: /* U+0080..U+07FF */ \
|
||||
(c)>=0xc2 && ((c)&=0x1f, 1)) && \
|
||||
/* last trail byte */ \
|
||||
(__t=(s)[i]-0x80)<=0x3f && \
|
||||
((c)=((c)<<6)|__t, ++(i), 1)) { \
|
||||
} else { \
|
||||
(c)=(sub); /* ill-formed*/ \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 to 4 bytes.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
|
||||
* Otherwise, the result is undefined.
|
||||
*
|
||||
* @param s const uint8_t * string buffer
|
||||
* @param i string offset
|
||||
* @param c code point to append
|
||||
* @see U8_APPEND
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
uint32_t __uc=(c); \
|
||||
if(__uc<=0x7f) { \
|
||||
(s)[(i)++]=(uint8_t)__uc; \
|
||||
} else { \
|
||||
if(__uc<=0x7ff) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
|
||||
} else { \
|
||||
if(__uc<=0xffff) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
|
||||
} else { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
|
||||
} \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
} \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 to 4 bytes.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Safe" macro, checks for a valid code point.
|
||||
* If a non-ASCII code point is written, checks for sufficient space in the string.
|
||||
* If the code point is not valid or trail bytes do not fit,
|
||||
* then isError is set to true.
|
||||
*
|
||||
* @param s const uint8_t * string buffer
|
||||
* @param i int32_t string offset, must be i<capacity
|
||||
* @param capacity int32_t size of the string buffer
|
||||
* @param c UChar32 code point to append
|
||||
* @param isError output UBool set to true if an error occurs, otherwise not modified
|
||||
* @see U8_APPEND_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
uint32_t __uc=(c); \
|
||||
if(__uc<=0x7f) { \
|
||||
(s)[(i)++]=(uint8_t)__uc; \
|
||||
} else if(__uc<=0x7ff && (i)+1<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else { \
|
||||
(isError)=true; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_FWD_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @see U8_FWD_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
uint8_t __b=(s)[(i)++]; \
|
||||
if(U8_IS_LEAD(__b) && (i)!=(length)) { \
|
||||
uint8_t __t1=(s)[i]; \
|
||||
if((0xe0<=__b && __b<0xf0)) { \
|
||||
if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} else if(__b<0xe0) { \
|
||||
if(U8_IS_TRAIL(__t1)) { \
|
||||
++(i); \
|
||||
} \
|
||||
} else /* c>=0xf0 */ { \
|
||||
if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U8_FWD_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U8_FWD_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param n number of code points to skip
|
||||
* @see U8_FWD_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
|
||||
U8_FWD_1(s, i, length); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to a UTF-8 trail byte,
|
||||
* then the offset is moved backward to the corresponding lead byte.
|
||||
* Otherwise, it is not modified.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_SET_CP_START
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
while(U8_IS_TRAIL((s)[i])) { --(i); } \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to a UTF-8 trail byte,
|
||||
* then the offset is moved backward to the corresponding lead byte.
|
||||
* Otherwise, it is not modified.
|
||||
*
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
* Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<=i
|
||||
* @see U8_SET_CP_START_UNSAFE
|
||||
* @see U8_TRUNCATE_IF_INCOMPLETE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U8_IS_TRAIL((s)[(i)])) { \
|
||||
(i)=utf8_back1SafeBody(s, start, (i)); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* If the string ends with a UTF-8 byte sequence that is valid so far
|
||||
* but incomplete, then reduce the length of the string to end before
|
||||
* the lead byte of that incomplete sequence.
|
||||
* For example, if the string ends with E1 80, the length is reduced by 2.
|
||||
*
|
||||
* In all other cases (the string ends with a complete sequence, or it is not
|
||||
* possible for any further trail byte to extend the trailing sequence)
|
||||
* the length remains unchanged.
|
||||
*
|
||||
* Useful for processing text split across multiple buffers
|
||||
* (save the incomplete sequence for later)
|
||||
* and for optimizing iteration
|
||||
* (check for string length only once per character).
|
||||
*
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
* Unlike U8_SET_CP_START(), this macro never reads s[length].
|
||||
*
|
||||
* (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param length int32_t string length (usually start<=length)
|
||||
* @see U8_SET_CP_START
|
||||
* @stable ICU 61
|
||||
*/
|
||||
#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((length)>(start)) { \
|
||||
uint8_t __b1=s[(length)-1]; \
|
||||
if(U8_IS_SINGLE(__b1)) { \
|
||||
/* common ASCII character */ \
|
||||
} else if(U8_IS_LEAD(__b1)) { \
|
||||
--(length); \
|
||||
} else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
|
||||
uint8_t __b2=s[(length)-2]; \
|
||||
if(0xe0<=__b2 && __b2<=0xf4) { \
|
||||
if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
|
||||
U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
|
||||
(length)-=2; \
|
||||
} \
|
||||
} else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
|
||||
uint8_t __b3=s[(length)-3]; \
|
||||
if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
|
||||
(length)-=3; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with backward iteration -------------------------------------- */
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a multi-byte sequence, then the macro will read
|
||||
* the whole sequence.
|
||||
* If the offset is behind a lead byte, then that itself
|
||||
* will be returned as the code point.
|
||||
* The result is undefined if the offset is behind an illegal UTF-8 sequence.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U8_PREV
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[--(i)]; \
|
||||
if(U8_IS_TRAIL(c)) { \
|
||||
uint8_t __b, __count=1, __shift=6; \
|
||||
\
|
||||
/* c is a trail byte */ \
|
||||
(c)&=0x3f; \
|
||||
for(;;) { \
|
||||
__b=(s)[--(i)]; \
|
||||
if(__b>=0xc0) { \
|
||||
U8_MASK_LEAD_BYTE(__b, __count); \
|
||||
(c)|=(UChar32)__b<<__shift; \
|
||||
break; \
|
||||
} else { \
|
||||
(c)|=(UChar32)(__b&0x3f)<<__shift; \
|
||||
++__count; \
|
||||
__shift+=6; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a multi-byte sequence, then the macro will read
|
||||
* the whole sequence.
|
||||
* If the offset is behind a lead byte, then that itself
|
||||
* will be returned as the code point.
|
||||
* If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_PREV_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[--(i)]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a multi-byte sequence, then the macro will read
|
||||
* the whole sequence.
|
||||
* If the offset is behind a lead byte, then that itself
|
||||
* will be returned as the code point.
|
||||
* If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
|
||||
*
|
||||
* This macro does not distinguish between a real U+FFFD in the text
|
||||
* and U+FFFD returned for an ill-formed sequence.
|
||||
* Use U8_PREV() if that distinction is important.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @param c output UChar32 variable, set to U+FFFD in case of an error
|
||||
* @see U8_PREV
|
||||
* @stable ICU 51
|
||||
*/
|
||||
#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[--(i)]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_BACK_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
while(U8_IS_TRAIL((s)[--(i)])) {} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @see U8_BACK_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U8_IS_TRAIL((s)[--(i)])) { \
|
||||
(i)=utf8_back1SafeBody(s, start, (i)); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U8_BACK_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U8_BACK_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t index of the start of the string
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @param n number of code points to skip
|
||||
* @see U8_BACK_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && (i)>(start)) { \
|
||||
U8_BACK_1(s, start, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind a partial multi-byte sequence,
|
||||
* then the offset is incremented to behind the whole sequence.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_SET_CP_LIMIT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
U8_BACK_1_UNSAFE(s, i); \
|
||||
U8_FWD_1_UNSAFE(s, i); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind a partial multi-byte sequence,
|
||||
* then the offset is incremented to behind the whole sequence.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<=i<=length
|
||||
* @param length int32_t string length
|
||||
* @see U8_SET_CP_LIMIT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((start)<(i) && ((i)<(length) || (length)<0)) { \
|
||||
U8_BACK_1(s, start, i); \
|
||||
U8_FWD_1(s, i, length); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
#endif
|
||||
1201
thirdparty/icu4c/common/unicode/utf_old.h
vendored
Normal file
1201
thirdparty/icu4c/common/unicode/utf_old.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
506
thirdparty/icu4c/common/unicode/utrace.h
vendored
Normal file
506
thirdparty/icu4c/common/unicode/utrace.h
vendored
Normal file
@@ -0,0 +1,506 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: utrace.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003aug06
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Definitions for ICU tracing/logging.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __UTRACE_H__
|
||||
#define __UTRACE_H__
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Definitions for ICU tracing/logging.
|
||||
*
|
||||
* This provides API for debugging the internals of ICU without the use of
|
||||
* a traditional debugger.
|
||||
*
|
||||
* By default, tracing is disabled in ICU. If you need to debug ICU with
|
||||
* tracing, please compile ICU with the --enable-tracing configure option.
|
||||
*/
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* Trace severity levels. Higher levels increase the verbosity of the trace output.
|
||||
* @see utrace_setLevel
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
typedef enum UTraceLevel {
|
||||
/** Disable all tracing @stable ICU 2.8*/
|
||||
UTRACE_OFF=-1,
|
||||
/** Trace error conditions only @stable ICU 2.8*/
|
||||
UTRACE_ERROR=0,
|
||||
/** Trace errors and warnings @stable ICU 2.8*/
|
||||
UTRACE_WARNING=3,
|
||||
/** Trace opens and closes of ICU services @stable ICU 2.8*/
|
||||
UTRACE_OPEN_CLOSE=5,
|
||||
/** Trace an intermediate number of ICU operations @stable ICU 2.8*/
|
||||
UTRACE_INFO=7,
|
||||
/** Trace the maximum number of ICU operations @stable ICU 2.8*/
|
||||
UTRACE_VERBOSE=9
|
||||
} UTraceLevel;
|
||||
|
||||
/**
|
||||
* These are the ICU functions that will be traced when tracing is enabled.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
typedef enum UTraceFunctionNumber {
|
||||
UTRACE_FUNCTION_START=0,
|
||||
UTRACE_U_INIT=UTRACE_FUNCTION_START,
|
||||
UTRACE_U_CLEANUP,
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal collation trace location.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UTRACE_FUNCTION_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
UTRACE_CONVERSION_START=0x1000,
|
||||
UTRACE_UCNV_OPEN=UTRACE_CONVERSION_START,
|
||||
UTRACE_UCNV_OPEN_PACKAGE,
|
||||
UTRACE_UCNV_OPEN_ALGORITHMIC,
|
||||
UTRACE_UCNV_CLONE,
|
||||
UTRACE_UCNV_CLOSE,
|
||||
UTRACE_UCNV_FLUSH_CACHE,
|
||||
UTRACE_UCNV_LOAD,
|
||||
UTRACE_UCNV_UNLOAD,
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal collation trace location.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UTRACE_CONVERSION_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
UTRACE_COLLATION_START=0x2000,
|
||||
UTRACE_UCOL_OPEN=UTRACE_COLLATION_START,
|
||||
UTRACE_UCOL_CLOSE,
|
||||
UTRACE_UCOL_STRCOLL,
|
||||
UTRACE_UCOL_GET_SORTKEY,
|
||||
UTRACE_UCOL_GETLOCALE,
|
||||
UTRACE_UCOL_NEXTSORTKEYPART,
|
||||
UTRACE_UCOL_STRCOLLITER,
|
||||
UTRACE_UCOL_OPEN_FROM_SHORT_STRING,
|
||||
UTRACE_UCOL_STRCOLLUTF8, /**< @stable ICU 50 */
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal collation trace location.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UTRACE_COLLATION_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* The lowest resource/data location.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
UTRACE_UDATA_START=0x3000,
|
||||
|
||||
/**
|
||||
* Indicates that a value was read from a resource bundle. Provides three
|
||||
* C-style strings to UTraceData: type, file name, and resource path. The
|
||||
* possible types are:
|
||||
*
|
||||
* - "string" (a string value was accessed)
|
||||
* - "binary" (a binary value was accessed)
|
||||
* - "intvector" (a integer vector value was accessed)
|
||||
* - "int" (a signed integer value was accessed)
|
||||
* - "uint" (a unsigned integer value was accessed)
|
||||
* - "get" (a path was loaded, but the value was not accessed)
|
||||
* - "getalias" (a path was loaded, and an alias was resolved)
|
||||
*
|
||||
* @stable ICU 65
|
||||
*/
|
||||
UTRACE_UDATA_RESOURCE=UTRACE_UDATA_START,
|
||||
|
||||
/**
|
||||
* Indicates that a resource bundle was opened.
|
||||
*
|
||||
* Provides one C-style string to UTraceData: file name.
|
||||
* @stable ICU 65
|
||||
*/
|
||||
UTRACE_UDATA_BUNDLE,
|
||||
|
||||
/**
|
||||
* Indicates that a data file was opened, but not *.res files.
|
||||
*
|
||||
* Provides one C-style string to UTraceData: file name.
|
||||
*
|
||||
* @stable ICU 65
|
||||
*/
|
||||
UTRACE_UDATA_DATA_FILE,
|
||||
|
||||
/**
|
||||
* Indicates that a *.res file was opened.
|
||||
*
|
||||
* This differs from UTRACE_UDATA_BUNDLE because a res file is typically
|
||||
* opened only once per application runtime, but the bundle corresponding
|
||||
* to that res file may be opened many times.
|
||||
*
|
||||
* Provides one C-style string to UTraceData: file name.
|
||||
*
|
||||
* @stable ICU 65
|
||||
*/
|
||||
UTRACE_UDATA_RES_FILE,
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* One more than the highest normal resource/data trace location.
|
||||
* @internal The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UTRACE_RES_DATA_LIMIT,
|
||||
#endif // U_HIDE_INTERNAL_API
|
||||
|
||||
/**
|
||||
* The lowest break iterator location.
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_START=0x4000,
|
||||
|
||||
/**
|
||||
* Indicates that a character instance of break iterator was created.
|
||||
*
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_CHARACTER = UTRACE_UBRK_START,
|
||||
|
||||
/**
|
||||
* Indicates that a word instance of break iterator was created.
|
||||
*
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_WORD,
|
||||
|
||||
/**
|
||||
* Indicates that a line instance of break iterator was created.
|
||||
*
|
||||
* Provides one C-style string to UTraceData: the lb value ("",
|
||||
* "loose", "strict", or "normal").
|
||||
*
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_LINE,
|
||||
|
||||
/**
|
||||
* Indicates that a sentence instance of break iterator was created.
|
||||
*
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_SENTENCE,
|
||||
|
||||
/**
|
||||
* Indicates that a title instance of break iterator was created.
|
||||
*
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_TITLE,
|
||||
|
||||
/**
|
||||
* Indicates that an internal dictionary break engine was created.
|
||||
*
|
||||
* Provides one C-style string to UTraceData: the script code of what
|
||||
* the break engine cover ("Hani", "Khmr", "Laoo", "Mymr", or "Thai").
|
||||
*
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_BREAK_ENGINE,
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* One more than the highest normal break iterator trace location.
|
||||
* @internal The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UTRACE_UBRK_LIMIT,
|
||||
#endif // U_HIDE_INTERNAL_API
|
||||
|
||||
} UTraceFunctionNumber;
|
||||
|
||||
/**
|
||||
* Setter for the trace level.
|
||||
* @param traceLevel A UTraceLevel value.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
utrace_setLevel(int32_t traceLevel);
|
||||
|
||||
/**
|
||||
* Getter for the trace level.
|
||||
* @return The UTraceLevel value being used by ICU.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utrace_getLevel(void);
|
||||
|
||||
/* Trace function pointers types ----------------------------- */
|
||||
|
||||
/**
|
||||
* Type signature for the trace function to be called when entering a function.
|
||||
* @param context value supplied at the time the trace functions are set.
|
||||
* @param fnNumber Enum value indicating the ICU function being entered.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
typedef void U_CALLCONV
|
||||
UTraceEntry(const void *context, int32_t fnNumber);
|
||||
|
||||
/**
|
||||
* Type signature for the trace function to be called when exiting from a function.
|
||||
* @param context value supplied at the time the trace functions are set.
|
||||
* @param fnNumber Enum value indicating the ICU function being exited.
|
||||
* @param fmt A formatting string that describes the number and types
|
||||
* of arguments included with the variable args. The fmt
|
||||
* string has the same form as the utrace_vformat format
|
||||
* string.
|
||||
* @param args A variable arguments list. Contents are described by
|
||||
* the fmt parameter.
|
||||
* @see utrace_vformat
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
typedef void U_CALLCONV
|
||||
UTraceExit(const void *context, int32_t fnNumber,
|
||||
const char *fmt, va_list args);
|
||||
|
||||
/**
|
||||
* Type signature for the trace function to be called from within an ICU function
|
||||
* to display data or messages.
|
||||
* @param context value supplied at the time the trace functions are set.
|
||||
* @param fnNumber Enum value indicating the ICU function being exited.
|
||||
* @param level The current tracing level
|
||||
* @param fmt A format string describing the tracing data that is supplied
|
||||
* as variable args
|
||||
* @param args The data being traced, passed as variable args.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
typedef void U_CALLCONV
|
||||
UTraceData(const void *context, int32_t fnNumber, int32_t level,
|
||||
const char *fmt, va_list args);
|
||||
|
||||
/**
|
||||
* Set ICU Tracing functions. Installs application-provided tracing
|
||||
* functions into ICU. After doing this, subsequent ICU operations
|
||||
* will call back to the installed functions, providing a trace
|
||||
* of the use of ICU. Passing a NULL pointer for a tracing function
|
||||
* is allowed, and inhibits tracing action at points where that function
|
||||
* would be called.
|
||||
* <p>
|
||||
* Tracing and Threads: Tracing functions are global to a process, and
|
||||
* will be called in response to ICU operations performed by any
|
||||
* thread. If tracing of an individual thread is desired, the
|
||||
* tracing functions must themselves filter by checking that the
|
||||
* current thread is the desired thread.
|
||||
*
|
||||
* @param context an uninterpreted pointer. Whatever is passed in
|
||||
* here will in turn be passed to each of the tracing
|
||||
* functions UTraceEntry, UTraceExit and UTraceData.
|
||||
* ICU does not use or alter this pointer.
|
||||
* @param e Callback function to be called on entry to a
|
||||
* a traced ICU function.
|
||||
* @param x Callback function to be called on exit from a
|
||||
* traced ICU function.
|
||||
* @param d Callback function to be called from within a
|
||||
* traced ICU function, for the purpose of providing
|
||||
* data to the trace.
|
||||
*
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
utrace_setFunctions(const void *context,
|
||||
UTraceEntry *e, UTraceExit *x, UTraceData *d);
|
||||
|
||||
/**
|
||||
* Get the currently installed ICU tracing functions. Note that a null function
|
||||
* pointer will be returned if no trace function has been set.
|
||||
*
|
||||
* @param context The currently installed tracing context.
|
||||
* @param e The currently installed UTraceEntry function.
|
||||
* @param x The currently installed UTraceExit function.
|
||||
* @param d The currently installed UTraceData function.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
utrace_getFunctions(const void **context,
|
||||
UTraceEntry **e, UTraceExit **x, UTraceData **d);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* ICU trace format string syntax
|
||||
*
|
||||
* Format Strings are passed to UTraceData functions, and define the
|
||||
* number and types of the trace data being passed on each call.
|
||||
*
|
||||
* The UTraceData function, which is supplied by the application,
|
||||
* not by ICU, can either forward the trace data (passed via
|
||||
* varargs) and the format string back to ICU for formatting into
|
||||
* a displayable string, or it can interpret the format itself,
|
||||
* and do as it wishes with the trace data.
|
||||
*
|
||||
*
|
||||
* Goals for the format string
|
||||
* - basic data output
|
||||
* - easy to use for trace programmer
|
||||
* - sufficient provision for data types for trace output readability
|
||||
* - well-defined types and binary portable APIs
|
||||
*
|
||||
* Non-goals
|
||||
* - printf compatibility
|
||||
* - fancy formatting
|
||||
* - argument reordering and other internationalization features
|
||||
*
|
||||
* ICU trace format strings contain plain text with argument inserts,
|
||||
* much like standard printf format strings.
|
||||
* Each insert begins with a '%', then optionally contains a 'v',
|
||||
* then exactly one type character.
|
||||
* Two '%' in a row represent a '%' instead of an insert.
|
||||
* The trace format strings need not have \n at the end.
|
||||
*
|
||||
*
|
||||
* Types
|
||||
* -----
|
||||
*
|
||||
* Type characters:
|
||||
* - c A char character in the default codepage.
|
||||
* - s A NUL-terminated char * string in the default codepage.
|
||||
* - S A UChar * string. Requires two params, (ptr, length). Length=-1 for nul term.
|
||||
* - b A byte (8-bit integer).
|
||||
* - h A 16-bit integer. Also a 16 bit Unicode code unit.
|
||||
* - d A 32-bit integer. Also a 20 bit Unicode code point value.
|
||||
* - l A 64-bit integer.
|
||||
* - p A data pointer.
|
||||
*
|
||||
* Vectors
|
||||
* -------
|
||||
*
|
||||
* If the 'v' is not specified, then one item of the specified type
|
||||
* is passed in.
|
||||
* If the 'v' (for "vector") is specified, then a vector of items of the
|
||||
* specified type is passed in, via a pointer to the first item
|
||||
* and an int32_t value for the length of the vector.
|
||||
* Length==-1 means zero or NUL termination. Works for vectors of all types.
|
||||
*
|
||||
* Note: %vS is a vector of (UChar *) strings. The strings must
|
||||
* be nul terminated as there is no way to provide a
|
||||
* separate length parameter for each string. The length
|
||||
* parameter (required for all vectors) is the number of
|
||||
* strings, not the length of the strings.
|
||||
*
|
||||
* Examples
|
||||
* --------
|
||||
*
|
||||
* These examples show the parameters that will be passed to an application's
|
||||
* UTraceData() function for various formats.
|
||||
*
|
||||
* - the precise formatting is up to the application!
|
||||
* - the examples use type casts for arguments only to _show_ the types of
|
||||
* arguments without needing variable declarations in the examples;
|
||||
* the type casts will not be necessary in actual code
|
||||
*
|
||||
* UTraceDataFunc(context, fnNumber, level,
|
||||
* "There is a character %c in the string %s.", // Format String
|
||||
* (char)c, (const char *)s); // varargs parameters
|
||||
* -> There is a character 0x42 'B' in the string "Bravo".
|
||||
*
|
||||
* UTraceDataFunc(context, fnNumber, level,
|
||||
* "Vector of bytes %vb vector of chars %vc",
|
||||
* (const uint8_t *)bytes, (int32_t)bytesLength,
|
||||
* (const char *)chars, (int32_t)charsLength);
|
||||
* -> Vector of bytes
|
||||
* 42 63 64 3f [4]
|
||||
* vector of chars
|
||||
* "Bcd?"[4]
|
||||
*
|
||||
* UTraceDataFunc(context, fnNumber, level,
|
||||
* "An int32_t %d and a whole bunch of them %vd",
|
||||
* (int32_t)-5, (const int32_t *)ints, (int32_t)intsLength);
|
||||
* -> An int32_t 0xfffffffb and a whole bunch of them
|
||||
* fffffffb 00000005 0000010a [3]
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Trace output Formatter. An application's UTraceData tracing functions may call
|
||||
* back to this function to format the trace output in a
|
||||
* human readable form. Note that a UTraceData function may choose
|
||||
* to not format the data; it could, for example, save it in
|
||||
* in the raw form it was received (more compact), leaving
|
||||
* formatting for a later trace analysis tool.
|
||||
* @param outBuf pointer to a buffer to receive the formatted output. Output
|
||||
* will be nul terminated if there is space in the buffer -
|
||||
* if the length of the requested output < the output buffer size.
|
||||
* @param capacity Length of the output buffer.
|
||||
* @param indent Number of spaces to indent the output. Intended to allow
|
||||
* data displayed from nested functions to be indented for readability.
|
||||
* @param fmt Format specification for the data to output
|
||||
* @param args Data to be formatted.
|
||||
* @return Length of formatted output, including the terminating NUL.
|
||||
* If buffer capacity is insufficient, the required capacity is returned.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utrace_vformat(char *outBuf, int32_t capacity,
|
||||
int32_t indent, const char *fmt, va_list args);
|
||||
|
||||
/**
|
||||
* Trace output Formatter. An application's UTraceData tracing functions may call
|
||||
* this function to format any additional trace data, beyond that
|
||||
* provided by default, in human readable form with the same
|
||||
* formatting conventions used by utrace_vformat().
|
||||
* @param outBuf pointer to a buffer to receive the formatted output. Output
|
||||
* will be nul terminated if there is space in the buffer -
|
||||
* if the length of the requested output < the output buffer size.
|
||||
* @param capacity Length of the output buffer.
|
||||
* @param indent Number of spaces to indent the output. Intended to allow
|
||||
* data displayed from nested functions to be indented for readability.
|
||||
* @param fmt Format specification for the data to output
|
||||
* @param ... Data to be formatted.
|
||||
* @return Length of formatted output, including the terminating NUL.
|
||||
* If buffer capacity is insufficient, the required capacity is returned.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utrace_format(char *outBuf, int32_t capacity,
|
||||
int32_t indent, const char *fmt, ...);
|
||||
|
||||
|
||||
|
||||
/* Trace function numbers --------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Get the name of a function from its trace function number.
|
||||
*
|
||||
* @param fnNumber The trace number for an ICU function.
|
||||
* @return The name string for the function.
|
||||
*
|
||||
* @see UTraceFunctionNumber
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2
|
||||
utrace_functionName(int32_t fnNumber);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
||||
761
thirdparty/icu4c/common/unicode/utypes.h
vendored
Normal file
761
thirdparty/icu4c/common/unicode/utypes.h
vendored
Normal file
@@ -0,0 +1,761 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1996-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
* FILE NAME : UTYPES.H (formerly ptypes.h)
|
||||
*
|
||||
* Date Name Description
|
||||
* 12/11/96 helena Creation.
|
||||
* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32,
|
||||
* uint8, uint16, and uint32.
|
||||
* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as
|
||||
* well as C++.
|
||||
* Modified to use memcpy() for uprv_arrayCopy() fns.
|
||||
* 04/14/97 aliu Added TPlatformUtilities.
|
||||
* 05/07/97 aliu Added import/export specifiers (replacing the old
|
||||
* broken EXT_CLASS). Added version number for our
|
||||
* code. Cleaned up header.
|
||||
* 6/20/97 helena Java class name change.
|
||||
* 08/11/98 stephen UErrorCode changed from typedef to enum
|
||||
* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3
|
||||
* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t
|
||||
* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066)
|
||||
* 04/20/99 stephen Cleaned up & reworked for autoconf.
|
||||
* Renamed to utypes.h.
|
||||
* 05/05/99 stephen Changed to use <inttypes.h>
|
||||
* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef UTYPES_H
|
||||
#define UTYPES_H
|
||||
|
||||
|
||||
#include "unicode/umachine.h"
|
||||
#include "unicode/uversion.h"
|
||||
#include "unicode/uconfig.h"
|
||||
#include <float.h>
|
||||
|
||||
#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
||||
/*!
|
||||
* \file
|
||||
* \brief Basic definitions for ICU, for both C and C++ APIs
|
||||
*
|
||||
* This file defines basic types, constants, and enumerations directly or
|
||||
* indirectly by including other header files, especially utf.h for the
|
||||
* basic character and string definitions and umachine.h for consistent
|
||||
* integer and other types.
|
||||
*/
|
||||
|
||||
/** @{ API visibility control */
|
||||
|
||||
/**
|
||||
* \def U_SHOW_CPLUSPLUS_API
|
||||
* When defined to 1 (=default) and compiled with a C++ compiler, both C and C++ APIs are visible.
|
||||
* Otherwise, only C APIs are visible; this is for C++ users who want to
|
||||
* restrict their usage to binary stable C APIs exported by ICU DLLs.
|
||||
* @internal
|
||||
*/
|
||||
/**
|
||||
* \def U_SHOW_CPLUSPLUS_HEADER_API
|
||||
* When defined to 1 (=default) and compiled with a C++ compiler, C++ header-only APIs are visible.
|
||||
* This is for C++ users who restrict their usage to binary stable C APIs exported by ICU DLLs
|
||||
* (U_SHOW_CPLUSPLUS_API=0)
|
||||
* but who still want to use C++ header-only APIs which do not rely on ICU DLL exports.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
# ifndef U_SHOW_CPLUSPLUS_API
|
||||
# define U_SHOW_CPLUSPLUS_API 1
|
||||
# endif
|
||||
# ifndef U_SHOW_CPLUSPLUS_HEADER_API
|
||||
# define U_SHOW_CPLUSPLUS_HEADER_API 1
|
||||
# endif
|
||||
#else
|
||||
# undef U_SHOW_CPLUSPLUS_API
|
||||
# define U_SHOW_CPLUSPLUS_API 0
|
||||
# undef U_SHOW_CPLUSPLUS_HEADER_API
|
||||
# define U_SHOW_CPLUSPLUS_HEADER_API 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_HIDE_DRAFT_API
|
||||
* Define this to 1 to request that draft API be "hidden"
|
||||
* @internal
|
||||
*/
|
||||
/**
|
||||
* \def U_HIDE_INTERNAL_API
|
||||
* Define this to 1 to request that internal API be "hidden"
|
||||
* @internal
|
||||
*/
|
||||
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API)
|
||||
#define U_HIDE_DRAFT_API 1
|
||||
#endif
|
||||
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API)
|
||||
#define U_HIDE_INTERNAL_API 1
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/*===========================================================================*/
|
||||
/* ICUDATA naming scheme */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_ICUDATA_TYPE_LETTER
|
||||
*
|
||||
* This is a platform-dependent string containing one letter:
|
||||
* - b for big-endian, ASCII-family platforms
|
||||
* - l for little-endian, ASCII-family platforms
|
||||
* - e for big-endian, EBCDIC-family platforms
|
||||
* This letter is part of the common data file name.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_ICUDATA_TYPE_LITLETTER
|
||||
* The non-string form of U_ICUDATA_TYPE_LETTER
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#if U_CHARSET_FAMILY
|
||||
# if U_IS_BIG_ENDIAN
|
||||
/* EBCDIC - should always be BE */
|
||||
# define U_ICUDATA_TYPE_LETTER "e"
|
||||
# define U_ICUDATA_TYPE_LITLETTER e
|
||||
# else
|
||||
# error "Don't know what to do with little endian EBCDIC!"
|
||||
# define U_ICUDATA_TYPE_LETTER "x"
|
||||
# define U_ICUDATA_TYPE_LITLETTER x
|
||||
# endif
|
||||
#else
|
||||
# if U_IS_BIG_ENDIAN
|
||||
/* Big-endian ASCII */
|
||||
# define U_ICUDATA_TYPE_LETTER "b"
|
||||
# define U_ICUDATA_TYPE_LITLETTER b
|
||||
# else
|
||||
/* Little-endian ASCII */
|
||||
# define U_ICUDATA_TYPE_LETTER "l"
|
||||
# define U_ICUDATA_TYPE_LITLETTER l
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* A single string literal containing the icudata stub name. i.e. 'icudt18e' for
|
||||
* ICU 1.8.x on EBCDIC, etc..
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */
|
||||
#define U_USE_USRDATA 0 /**< @internal */
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
|
||||
* Defined as a literal, not a string.
|
||||
* Tricky Preprocessor use - ## operator replaces macro parameters with the literal string
|
||||
* from the corresponding macro invocation, _before_ other macro substitutions.
|
||||
* Need a nested \#defines to get the actual version numbers rather than
|
||||
* the literal text U_ICU_VERSION_MAJOR_NUM into the name.
|
||||
* The net result will be something of the form
|
||||
* \#define U_ICU_ENTRY_POINT icudt19_dat
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME)
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Do not use. Note that it's OK for the 2nd argument to be undefined (literal).
|
||||
* @internal
|
||||
*/
|
||||
#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff)
|
||||
|
||||
/**
|
||||
* Do not use.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_DEF_ICUDATA_ENTRY_POINT
|
||||
/* affected by symbol renaming. See platform.h */
|
||||
#ifndef U_LIB_SUFFIX_C_NAME
|
||||
#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat
|
||||
#else
|
||||
#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat
|
||||
#endif
|
||||
#endif
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* \def NULL
|
||||
* Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifndef NULL
|
||||
#ifdef __cplusplus
|
||||
#define NULL nullptr
|
||||
#else
|
||||
#define NULL ((void *)0)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Calendar/TimeZone data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* Date and Time data type.
|
||||
* This is a primitive data type that holds the date and time
|
||||
* as the number of milliseconds since 1970-jan-01, 00:00 UTC.
|
||||
* UTC leap seconds are ignored.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef double UDate;
|
||||
|
||||
/** The number of milliseconds per second @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_SECOND (1000)
|
||||
/** The number of milliseconds per minute @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_MINUTE (60000)
|
||||
/** The number of milliseconds per hour @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_HOUR (3600000)
|
||||
/** The number of milliseconds per day @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_DAY (86400000)
|
||||
|
||||
/**
|
||||
* Maximum UDate value
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#define U_DATE_MAX DBL_MAX
|
||||
|
||||
/**
|
||||
* Minimum UDate value
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#define U_DATE_MIN -U_DATE_MAX
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Shared library/DLL import-export API control */
|
||||
/*===========================================================================*/
|
||||
|
||||
/*
|
||||
* Control of symbol import/export.
|
||||
* ICU is separated into three libraries.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_COMBINED_IMPLEMENTATION
|
||||
* Set to export library symbols from inside the ICU library
|
||||
* when all of ICU is in a single library.
|
||||
* This can be set as a compiler option while building ICU, and it
|
||||
* needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_DATA_API
|
||||
* Set to export library symbols from inside the stubdata library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_COMMON_API
|
||||
* Set to export library symbols from inside the common library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_I18N_API
|
||||
* Set to export library symbols from inside the i18n library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_LAYOUT_API
|
||||
* Set to export library symbols from inside the layout engine library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_LAYOUTEX_API
|
||||
* Set to export library symbols from inside the layout extensions library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_IO_API
|
||||
* Set to export library symbols from inside the ustdio library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_TOOLUTIL_API
|
||||
* Set to export library symbols from inside the toolutil library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
|
||||
#ifdef U_IN_DOXYGEN
|
||||
// This definition is required when generating the API docs.
|
||||
#define U_COMBINED_IMPLEMENTATION 1
|
||||
#endif
|
||||
|
||||
#if defined(U_COMBINED_IMPLEMENTATION)
|
||||
#define U_DATA_API U_EXPORT
|
||||
#define U_COMMON_API U_EXPORT
|
||||
#define U_I18N_API U_EXPORT
|
||||
#define U_LAYOUT_API U_EXPORT
|
||||
#define U_LAYOUTEX_API U_EXPORT
|
||||
#define U_IO_API U_EXPORT
|
||||
#define U_TOOLUTIL_API U_EXPORT
|
||||
#elif defined(U_STATIC_IMPLEMENTATION)
|
||||
#define U_DATA_API
|
||||
#define U_COMMON_API
|
||||
#define U_I18N_API
|
||||
#define U_LAYOUT_API
|
||||
#define U_LAYOUTEX_API
|
||||
#define U_IO_API
|
||||
#define U_TOOLUTIL_API
|
||||
#elif defined(U_COMMON_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_EXPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_I18N_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_EXPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_LAYOUT_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_EXPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_LAYOUTEX_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_EXPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_IO_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_EXPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_TOOLUTIL_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_EXPORT
|
||||
#else
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_STANDARD_CPP_NAMESPACE
|
||||
* Control of C++ Namespace
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
#define U_STANDARD_CPP_NAMESPACE ::
|
||||
#else
|
||||
#define U_STANDARD_CPP_NAMESPACE
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* UErrorCode */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* Standard ICU4C error code type, a substitute for exceptions.
|
||||
*
|
||||
* Initialize the UErrorCode with U_ZERO_ERROR, and check for success or
|
||||
* failure using U_SUCCESS() or U_FAILURE():
|
||||
*
|
||||
* UErrorCode errorCode = U_ZERO_ERROR;
|
||||
* // call ICU API that needs an error code parameter.
|
||||
* if (U_FAILURE(errorCode)) {
|
||||
* // An error occurred. Handle it here.
|
||||
* }
|
||||
*
|
||||
* C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a
|
||||
* suitable subclass.
|
||||
*
|
||||
* For more information, see:
|
||||
* https://unicode-org.github.io/icu/userguide/dev/codingguidelines#details-about-icu-error-codes
|
||||
*
|
||||
* Note: By convention, ICU functions that take a reference (C++) or a pointer
|
||||
* (C) to a UErrorCode first test:
|
||||
*
|
||||
* if (U_FAILURE(errorCode)) { return immediately; }
|
||||
*
|
||||
* so that in a chain of such functions the first one that sets an error code
|
||||
* causes the following ones to not perform any operations.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef enum UErrorCode {
|
||||
/* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird
|
||||
* and is that way because VC++ debugger displays first encountered constant,
|
||||
* which is not the what the code is used for
|
||||
*/
|
||||
|
||||
U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */
|
||||
|
||||
U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */
|
||||
|
||||
U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */
|
||||
|
||||
U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */
|
||||
|
||||
U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
|
||||
|
||||
U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
|
||||
|
||||
U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */
|
||||
|
||||
U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */
|
||||
|
||||
U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */
|
||||
|
||||
U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */
|
||||
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal UErrorCode warning value.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_ERROR_WARNING_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
U_ZERO_ERROR = 0, /**< No error, no warning. */
|
||||
|
||||
U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */
|
||||
U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */
|
||||
U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */
|
||||
U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */
|
||||
U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */
|
||||
U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */
|
||||
U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */
|
||||
U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */
|
||||
U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */
|
||||
U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
|
||||
U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */
|
||||
U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */
|
||||
U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */
|
||||
U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */
|
||||
U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
|
||||
U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */
|
||||
U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */
|
||||
U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */
|
||||
U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
|
||||
U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */
|
||||
U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */
|
||||
U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */
|
||||
U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */
|
||||
U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource.
|
||||
It is very possible that a circular alias definition has occurred */
|
||||
U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
|
||||
U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
|
||||
U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
|
||||
U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */
|
||||
U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */
|
||||
U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */
|
||||
/**
|
||||
* The input is impractically long for an operation.
|
||||
* It is rejected because it may lead to problems such as excessive
|
||||
* processing time, stack depth, or heap memory requirements.
|
||||
*
|
||||
* @stable ICU 68
|
||||
*/
|
||||
U_INPUT_TOO_LONG_ERROR = 31,
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest standard error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_STANDARD_ERROR_LIMIT = 32,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10000 0x10100 are reserved for Transliterator.
|
||||
*/
|
||||
U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */
|
||||
U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */
|
||||
U_MALFORMED_RULE, /**< Elements of a rule are misplaced */
|
||||
U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/
|
||||
U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */
|
||||
U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/
|
||||
U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */
|
||||
U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */
|
||||
U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */
|
||||
U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */
|
||||
U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */
|
||||
U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */
|
||||
U_MISSING_OPERATOR, /**< A rule contains no operator */
|
||||
U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */
|
||||
U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */
|
||||
U_MULTIPLE_CURSORS, /**< More than one cursor */
|
||||
U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */
|
||||
U_TRAILING_BACKSLASH, /**< A dangling backslash */
|
||||
U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */
|
||||
U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */
|
||||
U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */
|
||||
U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */
|
||||
U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */
|
||||
U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */
|
||||
U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */
|
||||
U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
|
||||
U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */
|
||||
U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */
|
||||
U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */
|
||||
U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */
|
||||
U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */
|
||||
U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */
|
||||
U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */
|
||||
U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */
|
||||
U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */
|
||||
U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal Transliterator error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_PARSE_ERROR_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10100 0x10200 are reserved for the formatting API.
|
||||
*/
|
||||
U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */
|
||||
U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */
|
||||
U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */
|
||||
U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */
|
||||
U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */
|
||||
U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */
|
||||
U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */
|
||||
U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */
|
||||
U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */
|
||||
U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */
|
||||
U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */
|
||||
U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */
|
||||
U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */
|
||||
U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */
|
||||
U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */
|
||||
U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */
|
||||
U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */
|
||||
U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
|
||||
U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
|
||||
U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
|
||||
U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @stable ICU 61 */
|
||||
U_NUMBER_SKELETON_SYNTAX_ERROR, /**< The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. @stable ICU 62 */
|
||||
|
||||
/* MessageFormat 2.0 errors */
|
||||
U_MF_UNRESOLVED_VARIABLE_ERROR, /**< A variable is referred to but not bound by any definition @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
|
||||
U_MF_SYNTAX_ERROR, /**< Includes all syntax errors @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
|
||||
U_MF_UNKNOWN_FUNCTION_ERROR, /**< An annotation refers to a function not defined by the standard or custom function registry @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
|
||||
U_MF_VARIANT_KEY_MISMATCH_ERROR, /**< In a match-construct, one or more variants had a different number of keys from the number of selectors @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
|
||||
U_MF_FORMATTING_ERROR, /**< Covers all runtime errors: for example, an internally inconsistent set of options. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
|
||||
U_MF_NONEXHAUSTIVE_PATTERN_ERROR, /**< In a match-construct, the variants do not cover all possible values @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
|
||||
U_MF_DUPLICATE_OPTION_NAME_ERROR, /**< In an annotation, the same option name appears more than once @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
|
||||
U_MF_SELECTOR_ERROR, /**< A selector function is applied to an operand of the wrong type @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
|
||||
U_MF_MISSING_SELECTOR_ANNOTATION_ERROR, /**< A selector expression evaluates to an unannotated operand. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
|
||||
U_MF_DUPLICATE_DECLARATION_ERROR, /**< The same variable is declared in more than one .local or .input declaration. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
|
||||
U_MF_OPERAND_MISMATCH_ERROR, /**< An operand provided to a function does not have the required form for that function @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
|
||||
U_MF_DUPLICATE_VARIANT_ERROR, /**< A message includes a variant with the same key list as another variant. @internal ICU 76 technology preview @deprecated This API is for technology preview only. */
|
||||
U_MF_BAD_OPTION, /**< An option value provided to a function does not have the required form for that option. @internal ICU 77 technology preview @deprecated This API is for technology preview only. */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal formatting API error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_FMT_PARSE_ERROR_LIMIT = 0x10121,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10200 0x102ff are reserved for BreakIterator.
|
||||
*/
|
||||
U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */
|
||||
U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */
|
||||
U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */
|
||||
U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */
|
||||
U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */
|
||||
U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */
|
||||
U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */
|
||||
U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */
|
||||
U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */
|
||||
U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */
|
||||
U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */
|
||||
U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
|
||||
U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
|
||||
U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
|
||||
U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal BreakIterator error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_BRK_ERROR_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10300-0x103ff are reserved for regular expression related errors.
|
||||
*/
|
||||
U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */
|
||||
U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */
|
||||
U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */
|
||||
U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */
|
||||
U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */
|
||||
U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */
|
||||
U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */
|
||||
U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */
|
||||
U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */
|
||||
U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */
|
||||
U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */
|
||||
U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */
|
||||
U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */
|
||||
U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */
|
||||
U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. @deprecated ICU 54. This error cannot occur. */
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
U_REGEX_MISSING_CLOSE_BRACKET=U_REGEX_SET_CONTAINS_STRING+2, /**< Missing closing bracket on a bracket expression. */
|
||||
U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */
|
||||
U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */
|
||||
U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
|
||||
U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
|
||||
U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @stable ICU 55 */
|
||||
U_REGEX_INVALID_CAPTURE_GROUP_NAME, /**< Invalid capture group name. @stable ICU 55 */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal regular expression error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_REGEX_ERROR_LIMIT=U_REGEX_STOPPED_BY_CALLER+3,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10400-0x104ff are reserved for IDNA related error codes.
|
||||
*/
|
||||
U_IDNA_PROHIBITED_ERROR=0x10400,
|
||||
U_IDNA_ERROR_START=0x10400,
|
||||
U_IDNA_UNASSIGNED_ERROR,
|
||||
U_IDNA_CHECK_BIDI_ERROR,
|
||||
U_IDNA_STD3_ASCII_RULES_ERROR,
|
||||
U_IDNA_ACE_PREFIX_ERROR,
|
||||
U_IDNA_VERIFICATION_ERROR,
|
||||
U_IDNA_LABEL_TOO_LONG_ERROR,
|
||||
U_IDNA_ZERO_LENGTH_LABEL_ERROR,
|
||||
U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal IDNA error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_IDNA_ERROR_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
/*
|
||||
* Aliases for StringPrep
|
||||
*/
|
||||
U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
|
||||
U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
|
||||
U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10500-0x105ff are reserved for Plugin related error codes.
|
||||
*/
|
||||
U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */
|
||||
U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */
|
||||
U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal plug-in error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_PLUGIN_ERROR_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
} UErrorCode;
|
||||
|
||||
/* Use the following to determine if an UErrorCode represents */
|
||||
/* operational success or failure. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
/**
|
||||
* Does the error code indicate success?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static
|
||||
inline UBool U_SUCCESS(UErrorCode code) { return code <= U_ZERO_ERROR; }
|
||||
/**
|
||||
* Does the error code indicate a failure?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static
|
||||
inline UBool U_FAILURE(UErrorCode code) { return code > U_ZERO_ERROR; }
|
||||
#else
|
||||
/**
|
||||
* Does the error code indicate success?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
|
||||
/**
|
||||
* Does the error code indicate a failure?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
# define U_FAILURE(x) ((x)>U_ZERO_ERROR)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Return a string for a UErrorCode value.
|
||||
* The string will be the same as the name of the error code constant
|
||||
* in the UErrorCode enum above.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2
|
||||
u_errorName(UErrorCode code);
|
||||
|
||||
|
||||
#endif /* _UTYPES */
|
||||
191
thirdparty/icu4c/common/unicode/uvernum.h
vendored
Normal file
191
thirdparty/icu4c/common/unicode/uvernum.h
vendored
Normal file
@@ -0,0 +1,191 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2000-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*
|
||||
* file name: uvernum.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* Created by: Vladimir Weinstein
|
||||
* Updated by: Steven R. Loomis
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: definitions of ICU version numbers
|
||||
*
|
||||
* This file is included by uversion.h and other files. This file contains only
|
||||
* macros and definitions. The actual version numbers are defined here.
|
||||
*/
|
||||
|
||||
/*
|
||||
* IMPORTANT: When updating version, the following things need to be done:
|
||||
* source/common/unicode/uvernum.h - this file: update major, minor,
|
||||
* patchlevel, suffix, version, short version constants, namespace,
|
||||
* renaming macro, and copyright
|
||||
*
|
||||
* The following files need to be updated as well, which can be done
|
||||
* by running the UNIX makefile target 'update-windows-makefiles' in icu4c/source.
|
||||
*
|
||||
* source/allinone/Build.Windows.IcuVersion.props - Update the IcuMajorVersion
|
||||
* source/data/makedata.mak - change U_ICUDATA_NAME so that it contains
|
||||
* the new major/minor combination, and UNICODE_VERSION
|
||||
* for the Unicode version.
|
||||
*/
|
||||
|
||||
#ifndef UVERNUM_H
|
||||
#define UVERNUM_H
|
||||
|
||||
/** The standard copyright notice that gets compiled into each library.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_COPYRIGHT_STRING \
|
||||
" Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html "
|
||||
|
||||
/** The current ICU major version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION_MAJOR_NUM 77
|
||||
|
||||
/** The current ICU minor version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_MINOR_NUM 1
|
||||
|
||||
/** The current ICU patchlevel version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION_PATCHLEVEL_NUM 0
|
||||
|
||||
/** The current ICU build level version as an integer.
|
||||
* This value is for use by ICU clients. It defaults to 0.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
#ifndef U_ICU_VERSION_BUILDLEVEL_NUM
|
||||
#define U_ICU_VERSION_BUILDLEVEL_NUM 0
|
||||
#endif
|
||||
|
||||
/** Glued version suffix for renamers
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SUFFIX _77
|
||||
|
||||
/**
|
||||
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
|
||||
* @internal
|
||||
*/
|
||||
/**
|
||||
* \def U_DEF_ICU_ENTRY_POINT_RENAME
|
||||
* @internal
|
||||
*/
|
||||
/** Glued version suffix function for renamers
|
||||
* This value will change in the subsequent releases of ICU.
|
||||
* If a custom suffix (such as matching library suffixes) is desired, this can be modified.
|
||||
* Note that if present, platform.h may contain an earlier definition of this macro.
|
||||
* \def U_ICU_ENTRY_POINT_RENAME
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
/**
|
||||
* Disable the version suffix. Use the custom suffix if exists.
|
||||
* \def U_DISABLE_VERSION_SUFFIX
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_DISABLE_VERSION_SUFFIX
|
||||
#define U_DISABLE_VERSION_SUFFIX 0
|
||||
#endif
|
||||
|
||||
#ifndef U_ICU_ENTRY_POINT_RENAME
|
||||
#ifdef U_HAVE_LIB_SUFFIX
|
||||
# if !U_DISABLE_VERSION_SUFFIX
|
||||
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ## z
|
||||
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z)
|
||||
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME)
|
||||
# else
|
||||
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
|
||||
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
|
||||
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_LIB_SUFFIX_C_NAME)
|
||||
# endif
|
||||
#else
|
||||
# if !U_DISABLE_VERSION_SUFFIX
|
||||
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
|
||||
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
|
||||
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX)
|
||||
# else
|
||||
# define U_ICU_ENTRY_POINT_RENAME(x) x
|
||||
# endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/** The current ICU library version as a dotted-decimal string. The patchlevel
|
||||
* only appears in this string if it non-zero.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION "77.1"
|
||||
|
||||
/**
|
||||
* The current ICU library major version number as a string, for library name suffixes.
|
||||
* This value will change in subsequent releases of ICU.
|
||||
*
|
||||
* Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers
|
||||
* into one string without dots ("48").
|
||||
* Since ICU 49, it is the double-digit major ICU version number.
|
||||
* See https://unicode-org.github.io/icu/userguide/design#version-numbers-in-icu
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SHORT "77"
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** Data version in ICU4C.
|
||||
* @internal ICU 4.4 Internal Use Only
|
||||
**/
|
||||
#define U_ICU_DATA_VERSION "77.1"
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/*===========================================================================
|
||||
* ICU collation framework version information
|
||||
* Version info that can be obtained from a collator is affected by these
|
||||
* numbers in a secret and magic way. Please use collator version as whole
|
||||
*===========================================================================
|
||||
*/
|
||||
|
||||
/**
|
||||
* Collation runtime version (sort key generator, strcoll).
|
||||
* If the version is different, sort keys for the same string could be different.
|
||||
* This value may change in subsequent releases of ICU.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define UCOL_RUNTIME_VERSION 9
|
||||
|
||||
/**
|
||||
* Collation builder code version.
|
||||
* When this is different, the same tailoring might result
|
||||
* in assigning different collation elements to code points.
|
||||
* This value may change in subsequent releases of ICU.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define UCOL_BUILDER_VERSION 9
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Constant 1.
|
||||
* This was intended to be the version of collation tailorings,
|
||||
* but instead the tailoring data carries a version number.
|
||||
* @deprecated ICU 54
|
||||
*/
|
||||
#define UCOL_TAILORINGS_VERSION 1
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#endif
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user