// © 2017 and later: Unicode, Inc. and others. | |
// License & terms of use: http://www.unicode.org/copyright.html | |
// casemap.h | |
// created: 2017jan12 Markus W. Scherer | |
#ifndef __CASEMAP_H__ | |
#define __CASEMAP_H__ | |
#include "unicode/utypes.h" | |
#include "unicode/stringpiece.h" | |
#include "unicode/uobject.h" | |
/** | |
* \file | |
* \brief C++ API: Low-level C++ case mapping functions. | |
*/ | |
U_NAMESPACE_BEGIN | |
class BreakIterator; | |
class ByteSink; | |
class Edits; | |
/** | |
* Low-level C++ case mapping functions. | |
* | |
* @stable ICU 59 | |
*/ | |
class U_COMMON_API CaseMap U_FINAL : public UMemory { | |
public: | |
/** | |
* Lowercases a UTF-16 string and optionally records edits. | |
* Casing is locale-dependent and context-sensitive. | |
* The result may be longer or shorter than the original. | |
* The source string and the destination buffer must not overlap. | |
* | |
* @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. | |
* @param src The original string. | |
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
* @param dest A buffer for the result string. The result will be NUL-terminated if | |
* the buffer is large enough. | |
* The contents is undefined in case of failure. | |
* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then | |
* dest may be NULL and the function will only return the length of the result | |
* without writing any of the result string. | |
* @param edits Records edits for index mapping, working with styled text, | |
* and getting only changes (if any). | |
* The Edits contents is undefined if any error occurs. | |
* This function calls edits->reset() first unless | |
* options includes U_EDITS_NO_RESET. edits can be NULL. | |
* @param errorCode Reference to an in/out error code value | |
* which must not indicate a failure before the function call. | |
* @return The length of the result string, if successful. | |
* When the result would be longer than destCapacity, | |
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
* | |
* @see u_strToLower | |
* @stable ICU 59 | |
*/ | |
static int32_t toLower( | |
const char *locale, uint32_t options, | |
const char16_t *src, int32_t srcLength, | |
char16_t *dest, int32_t destCapacity, Edits *edits, | |
UErrorCode &errorCode); | |
/** | |
* Uppercases a UTF-16 string and optionally records edits. | |
* Casing is locale-dependent and context-sensitive. | |
* The result may be longer or shorter than the original. | |
* The source string and the destination buffer must not overlap. | |
* | |
* @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. | |
* @param src The original string. | |
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
* @param dest A buffer for the result string. The result will be NUL-terminated if | |
* the buffer is large enough. | |
* The contents is undefined in case of failure. | |
* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then | |
* dest may be NULL and the function will only return the length of the result | |
* without writing any of the result string. | |
* @param edits Records edits for index mapping, working with styled text, | |
* and getting only changes (if any). | |
* The Edits contents is undefined if any error occurs. | |
* This function calls edits->reset() first unless | |
* options includes U_EDITS_NO_RESET. edits can be NULL. | |
* @param errorCode Reference to an in/out error code value | |
* which must not indicate a failure before the function call. | |
* @return The length of the result string, if successful. | |
* When the result would be longer than destCapacity, | |
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
* | |
* @see u_strToUpper | |
* @stable ICU 59 | |
*/ | |
static int32_t toUpper( | |
const char *locale, uint32_t options, | |
const char16_t *src, int32_t srcLength, | |
char16_t *dest, int32_t destCapacity, Edits *edits, | |
UErrorCode &errorCode); | |
#if !UCONFIG_NO_BREAK_ITERATION | |
/** | |
* Titlecases a UTF-16 string and optionally records edits. | |
* Casing is locale-dependent and context-sensitive. | |
* The result may be longer or shorter than the original. | |
* The source string and the destination buffer must not overlap. | |
* | |
* Titlecasing uses a break iterator to find the first characters of words | |
* that are to be titlecased. It titlecases those characters and lowercases | |
* all others. (This can be modified with options bits.) | |
* | |
* @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, | |
* U_TITLECASE_NO_LOWERCASE, | |
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, | |
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. | |
* @param iter A break iterator to find the first characters of words that are to be titlecased. | |
* It is set to the source string (setText()) | |
* and used one or more times for iteration (first() and next()). | |
* If NULL, then a word break iterator for the locale is used | |
* (or something equivalent). | |
* @param src The original string. | |
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
* @param dest A buffer for the result string. The result will be NUL-terminated if | |
* the buffer is large enough. | |
* The contents is undefined in case of failure. | |
* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then | |
* dest may be NULL and the function will only return the length of the result | |
* without writing any of the result string. | |
* @param edits Records edits for index mapping, working with styled text, | |
* and getting only changes (if any). | |
* The Edits contents is undefined if any error occurs. | |
* This function calls edits->reset() first unless | |
* options includes U_EDITS_NO_RESET. edits can be NULL. | |
* @param errorCode Reference to an in/out error code value | |
* which must not indicate a failure before the function call. | |
* @return The length of the result string, if successful. | |
* When the result would be longer than destCapacity, | |
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
* | |
* @see u_strToTitle | |
* @see ucasemap_toTitle | |
* @stable ICU 59 | |
*/ | |
static int32_t toTitle( | |
const char *locale, uint32_t options, BreakIterator *iter, | |
const char16_t *src, int32_t srcLength, | |
char16_t *dest, int32_t destCapacity, Edits *edits, | |
UErrorCode &errorCode); | |
#endif // UCONFIG_NO_BREAK_ITERATION | |
/** | |
* Case-folds a UTF-16 string and optionally records edits. | |
* | |
* Case folding is locale-independent and not context-sensitive, | |
* but there is an option for whether to include or exclude mappings for dotted I | |
* and dotless i that are marked with 'T' in CaseFolding.txt. | |
* | |
* The result may be longer or shorter than the original. | |
* The source string and the destination buffer must not overlap. | |
* | |
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, | |
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. | |
* @param src The original string. | |
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
* @param dest A buffer for the result string. The result will be NUL-terminated if | |
* the buffer is large enough. | |
* The contents is undefined in case of failure. | |
* @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then | |
* dest may be NULL and the function will only return the length of the result | |
* without writing any of the result string. | |
* @param edits Records edits for index mapping, working with styled text, | |
* and getting only changes (if any). | |
* The Edits contents is undefined if any error occurs. | |
* This function calls edits->reset() first unless | |
* options includes U_EDITS_NO_RESET. edits can be NULL. | |
* @param errorCode Reference to an in/out error code value | |
* which must not indicate a failure before the function call. | |
* @return The length of the result string, if successful. | |
* When the result would be longer than destCapacity, | |
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
* | |
* @see u_strFoldCase | |
* @stable ICU 59 | |
*/ | |
static int32_t fold( | |
uint32_t options, | |
const char16_t *src, int32_t srcLength, | |
char16_t *dest, int32_t destCapacity, Edits *edits, | |
UErrorCode &errorCode); | |
/** | |
* Lowercases a UTF-8 string and optionally records edits. | |
* Casing is locale-dependent and context-sensitive. | |
* The result may be longer or shorter than the original. | |
* | |
* @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. | |
* @param src The original string. | |
* @param sink A ByteSink to which the result string is written. | |
* sink.Flush() is called at the end. | |
* @param edits Records edits for index mapping, working with styled text, | |
* and getting only changes (if any). | |
* The Edits contents is undefined if any error occurs. | |
* This function calls edits->reset() first unless | |
* options includes U_EDITS_NO_RESET. edits can be NULL. | |
* @param errorCode Reference to an in/out error code value | |
* which must not indicate a failure before the function call. | |
* | |
* @see ucasemap_utf8ToLower | |
* @stable ICU 60 | |
*/ | |
static void utf8ToLower( | |
const char *locale, uint32_t options, | |
StringPiece src, ByteSink &sink, Edits *edits, | |
UErrorCode &errorCode); | |
/** | |
* Uppercases a UTF-8 string and optionally records edits. | |
* Casing is locale-dependent and context-sensitive. | |
* The result may be longer or shorter than the original. | |
* | |
* @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. | |
* @param src The original string. | |
* @param sink A ByteSink to which the result string is written. | |
* sink.Flush() is called at the end. | |
* @param edits Records edits for index mapping, working with styled text, | |
* and getting only changes (if any). | |
* The Edits contents is undefined if any error occurs. | |
* This function calls edits->reset() first unless | |
* options includes U_EDITS_NO_RESET. edits can be NULL. | |
* @param errorCode Reference to an in/out error code value | |
* which must not indicate a failure before the function call. | |
* | |
* @see ucasemap_utf8ToUpper | |
* @stable ICU 60 | |
*/ | |
static void utf8ToUpper( | |
const char *locale, uint32_t options, | |
StringPiece src, ByteSink &sink, Edits *edits, | |
UErrorCode &errorCode); | |
#if !UCONFIG_NO_BREAK_ITERATION | |
/** | |
* Titlecases a UTF-8 string and optionally records edits. | |
* Casing is locale-dependent and context-sensitive. | |
* The result may be longer or shorter than the original. | |
* | |
* Titlecasing uses a break iterator to find the first characters of words | |
* that are to be titlecased. It titlecases those characters and lowercases | |
* all others. (This can be modified with options bits.) | |
* | |
* @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, | |
* U_TITLECASE_NO_LOWERCASE, | |
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, | |
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. | |
* @param iter A break iterator to find the first characters of words that are to be titlecased. | |
* It is set to the source string (setUText()) | |
* and used one or more times for iteration (first() and next()). | |
* If NULL, then a word break iterator for the locale is used | |
* (or something equivalent). | |
* @param src The original string. | |
* @param sink A ByteSink to which the result string is written. | |
* sink.Flush() is called at the end. | |
* @param edits Records edits for index mapping, working with styled text, | |
* and getting only changes (if any). | |
* The Edits contents is undefined if any error occurs. | |
* This function calls edits->reset() first unless | |
* options includes U_EDITS_NO_RESET. edits can be NULL. | |
* @param errorCode Reference to an in/out error code value | |
* which must not indicate a failure before the function call. | |
* | |
* @see ucasemap_utf8ToTitle | |
* @stable ICU 60 | |
*/ | |
static void utf8ToTitle( | |
const char *locale, uint32_t options, BreakIterator *iter, | |
StringPiece src, ByteSink &sink, Edits *edits, | |
UErrorCode &errorCode); | |
#endif // UCONFIG_NO_BREAK_ITERATION | |
/** | |
* Case-folds a UTF-8 string and optionally records edits. | |
* | |
* Case folding is locale-independent and not context-sensitive, | |
* but there is an option for whether to include or exclude mappings for dotted I | |
* and dotless i that are marked with 'T' in CaseFolding.txt. | |
* | |
* The result may be longer or shorter than the original. | |
* | |
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. | |
* @param src The original string. | |
* @param sink A ByteSink to which the result string is written. | |
* sink.Flush() is called at the end. | |
* @param edits Records edits for index mapping, working with styled text, | |
* and getting only changes (if any). | |
* The Edits contents is undefined if any error occurs. | |
* This function calls edits->reset() first unless | |
* options includes U_EDITS_NO_RESET. edits can be NULL. | |
* @param errorCode Reference to an in/out error code value | |
* which must not indicate a failure before the function call. | |
* | |
* @see ucasemap_utf8FoldCase | |
* @stable ICU 60 | |
*/ | |
static void utf8Fold( | |
uint32_t options, | |
StringPiece src, ByteSink &sink, Edits *edits, | |
UErrorCode &errorCode); | |
/** | |
* Lowercases a UTF-8 string and optionally records edits. | |
* Casing is locale-dependent and context-sensitive. | |
* The result may be longer or shorter than the original. | |
* The source string and the destination buffer must not overlap. | |
* | |
* @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. | |
* @param src The original string. | |
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
* @param dest A buffer for the result string. The result will be NUL-terminated if | |
* the buffer is large enough. | |
* The contents is undefined in case of failure. | |
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then | |
* dest may be NULL and the function will only return the length of the result | |
* without writing any of the result string. | |
* @param edits Records edits for index mapping, working with styled text, | |
* and getting only changes (if any). | |
* The Edits contents is undefined if any error occurs. | |
* This function calls edits->reset() first unless | |
* options includes U_EDITS_NO_RESET. edits can be NULL. | |
* @param errorCode Reference to an in/out error code value | |
* which must not indicate a failure before the function call. | |
* @return The length of the result string, if successful. | |
* When the result would be longer than destCapacity, | |
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
* | |
* @see ucasemap_utf8ToLower | |
* @stable ICU 59 | |
*/ | |
static int32_t utf8ToLower( | |
const char *locale, uint32_t options, | |
const char *src, int32_t srcLength, | |
char *dest, int32_t destCapacity, Edits *edits, | |
UErrorCode &errorCode); | |
/** | |
* Uppercases a UTF-8 string and optionally records edits. | |
* Casing is locale-dependent and context-sensitive. | |
* The result may be longer or shorter than the original. | |
* The source string and the destination buffer must not overlap. | |
* | |
* @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. | |
* @param src The original string. | |
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
* @param dest A buffer for the result string. The result will be NUL-terminated if | |
* the buffer is large enough. | |
* The contents is undefined in case of failure. | |
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then | |
* dest may be NULL and the function will only return the length of the result | |
* without writing any of the result string. | |
* @param edits Records edits for index mapping, working with styled text, | |
* and getting only changes (if any). | |
* The Edits contents is undefined if any error occurs. | |
* This function calls edits->reset() first unless | |
* options includes U_EDITS_NO_RESET. edits can be NULL. | |
* @param errorCode Reference to an in/out error code value | |
* which must not indicate a failure before the function call. | |
* @return The length of the result string, if successful. | |
* When the result would be longer than destCapacity, | |
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
* | |
* @see ucasemap_utf8ToUpper | |
* @stable ICU 59 | |
*/ | |
static int32_t utf8ToUpper( | |
const char *locale, uint32_t options, | |
const char *src, int32_t srcLength, | |
char *dest, int32_t destCapacity, Edits *edits, | |
UErrorCode &errorCode); | |
#if !UCONFIG_NO_BREAK_ITERATION | |
/** | |
* Titlecases a UTF-8 string and optionally records edits. | |
* Casing is locale-dependent and context-sensitive. | |
* The result may be longer or shorter than the original. | |
* The source string and the destination buffer must not overlap. | |
* | |
* Titlecasing uses a break iterator to find the first characters of words | |
* that are to be titlecased. It titlecases those characters and lowercases | |
* all others. (This can be modified with options bits.) | |
* | |
* @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, | |
* U_TITLECASE_NO_LOWERCASE, | |
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, | |
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. | |
* @param iter A break iterator to find the first characters of words that are to be titlecased. | |
* It is set to the source string (setUText()) | |
* and used one or more times for iteration (first() and next()). | |
* If NULL, then a word break iterator for the locale is used | |
* (or something equivalent). | |
* @param src The original string. | |
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
* @param dest A buffer for the result string. The result will be NUL-terminated if | |
* the buffer is large enough. | |
* The contents is undefined in case of failure. | |
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then | |
* dest may be NULL and the function will only return the length of the result | |
* without writing any of the result string. | |
* @param edits Records edits for index mapping, working with styled text, | |
* and getting only changes (if any). | |
* The Edits contents is undefined if any error occurs. | |
* This function calls edits->reset() first unless | |
* options includes U_EDITS_NO_RESET. edits can be NULL. | |
* @param errorCode Reference to an in/out error code value | |
* which must not indicate a failure before the function call. | |
* @return The length of the result string, if successful. | |
* When the result would be longer than destCapacity, | |
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
* | |
* @see ucasemap_utf8ToTitle | |
* @stable ICU 59 | |
*/ | |
static int32_t utf8ToTitle( | |
const char *locale, uint32_t options, BreakIterator *iter, | |
const char *src, int32_t srcLength, | |
char *dest, int32_t destCapacity, Edits *edits, | |
UErrorCode &errorCode); | |
#endif // UCONFIG_NO_BREAK_ITERATION | |
/** | |
* Case-folds a UTF-8 string and optionally records edits. | |
* | |
* Case folding is locale-independent and not context-sensitive, | |
* but there is an option for whether to include or exclude mappings for dotted I | |
* and dotless i that are marked with 'T' in CaseFolding.txt. | |
* | |
* The result may be longer or shorter than the original. | |
* The source string and the destination buffer must not overlap. | |
* | |
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, | |
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. | |
* @param src The original string. | |
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
* @param dest A buffer for the result string. The result will be NUL-terminated if | |
* the buffer is large enough. | |
* The contents is undefined in case of failure. | |
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then | |
* dest may be NULL and the function will only return the length of the result | |
* without writing any of the result string. | |
* @param edits Records edits for index mapping, working with styled text, | |
* and getting only changes (if any). | |
* The Edits contents is undefined if any error occurs. | |
* This function calls edits->reset() first unless | |
* options includes U_EDITS_NO_RESET. edits can be NULL. | |
* @param errorCode Reference to an in/out error code value | |
* which must not indicate a failure before the function call. | |
* @return The length of the result string, if successful. | |
* When the result would be longer than destCapacity, | |
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
* | |
* @see ucasemap_utf8FoldCase | |
* @stable ICU 59 | |
*/ | |
static int32_t utf8Fold( | |
uint32_t options, | |
const char *src, int32_t srcLength, | |
char *dest, int32_t destCapacity, Edits *edits, | |
UErrorCode &errorCode); | |
private: | |
CaseMap() = delete; | |
CaseMap(const CaseMap &other) = delete; | |
CaseMap &operator=(const CaseMap &other) = delete; | |
}; | |
U_NAMESPACE_END | |
#endif // __CASEMAP_H__ |