utf_old.h File Reference

C API: Deprecated macros for Unicode string handling. More...

#include "unicode/utf.h"

Go to the source code of this file.

Defines

#define UTF_SIZE   16
 Number of bits in a Unicode string code unit - ICU uses 16-bit Unicode.
#define UTF_SAFE
 The default choice for general Unicode string macros is to use the ..._SAFE macro implementations with strict=FALSE.
#define UTF8_ERROR_VALUE_1   0x15
#define UTF8_ERROR_VALUE_2   0x9f
 See documentation on UTF8_ERROR_VALUE_1 for details.
#define UTF_ERROR_VALUE   0xffff
 Error value for all UTFs.
#define UTF_IS_ERROR(c)   (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
 Is a given 32-bit code an error value as returned by one of the macros for any UTF?
#define UTF_IS_VALID(c)
 This is a combined macro: Is c a valid Unicode value _and_ not an error code?
#define UTF_IS_SURROGATE(uchar)   (((uchar)&0xfffff800)==0xd800)
 Is this code unit or code point a surrogate (U+d800..U+dfff)?
#define UTF_IS_UNICODE_NONCHAR(c)
 Is a given 32-bit code point a Unicode noncharacter?
#define UTF_IS_UNICODE_CHAR(c)
 Is a given 32-bit value a Unicode code point value (0..U+10ffff) that can be assigned a character?
#define UTF8_COUNT_TRAIL_BYTES(leadByte)   (utf8_countTrailBytes[(uint8_t)leadByte])
 Count the trail bytes for a UTF-8 lead byte.
#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes)   ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
 Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
#define UTF8_IS_SINGLE(uchar)   (((uchar)&0x80)==0)
 Is this this code point a single code unit (byte)?
#define UTF8_IS_LEAD(uchar)   ((uint8_t)((uchar)-0xc0)<0x3e)
 Is this this code unit the lead code unit (byte) of a code point?
#define UTF8_IS_TRAIL(uchar)   (((uchar)&0xc0)==0x80)
 Is this this code unit a trailing code unit (byte) of a code point?
#define UTF8_NEED_MULTIPLE_UCHAR(c)   ((uint32_t)(c)>0x7f)
 Does this scalar Unicode value need multiple code units for storage?
#define UTF8_CHAR_LENGTH(c)
 Given the lead character, how many bytes are taken by this code point.
#define UTF8_MAX_CHAR_LENGTH   4
 The maximum number of bytes per code point.
#define UTF8_ARRAY_SIZE(size)   ((5*(size))/2)
 Average number of code units compared to UTF-16.
#define UTF8_GET_CHAR_UNSAFE(s, i, c)
#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict)
#define UTF8_NEXT_CHAR_UNSAFE(s, i, c)
#define UTF8_APPEND_CHAR_UNSAFE(s, i, c)
#define UTF8_FWD_1_UNSAFE(s, i)
#define UTF8_FWD_N_UNSAFE(s, i, n)
#define UTF8_SET_CHAR_START_UNSAFE(s, i)
#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict)
#define UTF8_APPEND_CHAR_SAFE(s, i, length, c)
#define UTF8_FWD_1_SAFE(s, i, length)   U8_FWD_1(s, i, length)
#define UTF8_FWD_N_SAFE(s, i, length, n)   U8_FWD_N(s, i, length, n)
#define UTF8_SET_CHAR_START_SAFE(s, start, i)   U8_SET_CP_START(s, start, i)
#define UTF8_PREV_CHAR_UNSAFE(s, i, c)
#define UTF8_BACK_1_UNSAFE(s, i)
#define UTF8_BACK_N_UNSAFE(s, i, n)
#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i)
#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict)
#define UTF8_BACK_1_SAFE(s, start, i)   U8_BACK_1(s, start, i)
#define UTF8_BACK_N_SAFE(s, start, i, n)   U8_BACK_N(s, start, i, n)
#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length)   U8_SET_CP_LIMIT(s, start, i, length)
#define UTF_IS_FIRST_SURROGATE(uchar)   (((uchar)&0xfffffc00)==0xd800)
 Is uchar a first/lead surrogate?
#define UTF_IS_SECOND_SURROGATE(uchar)   (((uchar)&0xfffffc00)==0xdc00)
 Is uchar a second/trail surrogate?
#define UTF_IS_SURROGATE_FIRST(c)   (((c)&0x400)==0)
 Assuming c is a surrogate, is it a first/lead surrogate?
#define UTF_SURROGATE_OFFSET   ((0xd800<<10UL)+0xdc00-0x10000)
 Helper constant for UTF16_GET_PAIR_VALUE.
#define UTF16_GET_PAIR_VALUE(first, second)   (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
 Get the UTF-32 value from the surrogate code units.
#define UTF_FIRST_SURROGATE(supplementary)   (UChar)(((supplementary)>>10)+0xd7c0)
#define UTF_SECOND_SURROGATE(supplementary)   (UChar)(((supplementary)&0x3ff)|0xdc00)
#define UTF16_LEAD(supplementary)   UTF_FIRST_SURROGATE(supplementary)
#define UTF16_TRAIL(supplementary)   UTF_SECOND_SURROGATE(supplementary)
#define UTF16_IS_SINGLE(uchar)   !UTF_IS_SURROGATE(uchar)
#define UTF16_IS_LEAD(uchar)   UTF_IS_FIRST_SURROGATE(uchar)
#define UTF16_IS_TRAIL(uchar)   UTF_IS_SECOND_SURROGATE(uchar)
#define UTF16_NEED_MULTIPLE_UCHAR(c)   ((uint32_t)(c)>0xffff)
 Does this scalar Unicode value need multiple code units for storage?
#define UTF16_CHAR_LENGTH(c)   ((uint32_t)(c)<=0xffff ? 1 : 2)
#define UTF16_MAX_CHAR_LENGTH   2
#define UTF16_ARRAY_SIZE(size)   (size)
 Average number of code units compared to UTF-16.
#define UTF16_GET_CHAR_UNSAFE(s, i, c)
 Get a single code point from an offset that points to any of the code units that belong to that code point.
#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
#define UTF16_NEXT_CHAR_UNSAFE(s, i, c)
#define UTF16_APPEND_CHAR_UNSAFE(s, i, c)
#define UTF16_FWD_1_UNSAFE(s, i)
#define UTF16_FWD_N_UNSAFE(s, i, n)
#define UTF16_SET_CHAR_START_UNSAFE(s, i)
#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
#define UTF16_APPEND_CHAR_SAFE(s, i, length, c)
#define UTF16_FWD_1_SAFE(s, i, length)   U16_FWD_1(s, i, length)
#define UTF16_FWD_N_SAFE(s, i, length, n)   U16_FWD_N(s, i, length, n)
#define UTF16_SET_CHAR_START_SAFE(s, start, i)   U16_SET_CP_START(s, start, i)
#define UTF16_PREV_CHAR_UNSAFE(s, i, c)
#define UTF16_BACK_1_UNSAFE(s, i)
#define UTF16_BACK_N_UNSAFE(s, i, n)
#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
#define UTF16_BACK_1_SAFE(s, start, i)   U16_BACK_1(s, start, i)
#define UTF16_BACK_N_SAFE(s, start, i, n)   U16_BACK_N(s, start, i, n)
#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)   U16_SET_CP_LIMIT(s, start, i, length)
#define UTF32_IS_SAFE(c, strict)
#define UTF32_IS_SINGLE(uchar)   1
#define UTF32_IS_LEAD(uchar)   0
#define UTF32_IS_TRAIL(uchar)   0
#define UTF32_NEED_MULTIPLE_UCHAR(c)   0
#define UTF32_CHAR_LENGTH(c)   1
#define UTF32_MAX_CHAR_LENGTH   1
#define UTF32_ARRAY_SIZE(size)   (size)
#define UTF32_GET_CHAR_UNSAFE(s, i, c)
#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict)
#define UTF32_NEXT_CHAR_UNSAFE(s, i, c)
#define UTF32_APPEND_CHAR_UNSAFE(s, i, c)
#define UTF32_FWD_1_UNSAFE(s, i)
#define UTF32_FWD_N_UNSAFE(s, i, n)
#define UTF32_SET_CHAR_START_UNSAFE(s, i)
#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict)
#define UTF32_APPEND_CHAR_SAFE(s, i, length, c)
#define UTF32_FWD_1_SAFE(s, i, length)
#define UTF32_FWD_N_SAFE(s, i, length, n)
#define UTF32_SET_CHAR_START_SAFE(s, start, i)
#define UTF32_PREV_CHAR_UNSAFE(s, i, c)
#define UTF32_BACK_1_UNSAFE(s, i)
#define UTF32_BACK_N_UNSAFE(s, i, n)
#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i)
#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict)
#define UTF32_BACK_1_SAFE(s, start, i)
#define UTF32_BACK_N_SAFE(s, start, i, n)
#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length)
#define UTF_ARRAY_SIZE(size)   UTF16_ARRAY_SIZE(size)
 Estimate the number of code units for a string based on the number of UTF-16 code units.
#define UTF_GET_CHAR_UNSAFE(s, i, c)   UTF16_GET_CHAR_UNSAFE(s, i, c)
#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict)   UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
#define UTF_NEXT_CHAR_UNSAFE(s, i, c)   UTF16_NEXT_CHAR_UNSAFE(s, i, c)
#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)   UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
#define UTF_APPEND_CHAR_UNSAFE(s, i, c)   UTF16_APPEND_CHAR_UNSAFE(s, i, c)
#define UTF_APPEND_CHAR_SAFE(s, i, length, c)   UTF16_APPEND_CHAR_SAFE(s, i, length, c)
#define UTF_FWD_1_UNSAFE(s, i)   UTF16_FWD_1_UNSAFE(s, i)
#define UTF_FWD_1_SAFE(s, i, length)   UTF16_FWD_1_SAFE(s, i, length)
#define UTF_FWD_N_UNSAFE(s, i, n)   UTF16_FWD_N_UNSAFE(s, i, n)
#define UTF_FWD_N_SAFE(s, i, length, n)   UTF16_FWD_N_SAFE(s, i, length, n)
#define UTF_SET_CHAR_START_UNSAFE(s, i)   UTF16_SET_CHAR_START_UNSAFE(s, i)
#define UTF_SET_CHAR_START_SAFE(s, start, i)   UTF16_SET_CHAR_START_SAFE(s, start, i)
#define UTF_PREV_CHAR_UNSAFE(s, i, c)   UTF16_PREV_CHAR_UNSAFE(s, i, c)
#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
#define UTF_BACK_1_UNSAFE(s, i)   UTF16_BACK_1_UNSAFE(s, i)
#define UTF_BACK_1_SAFE(s, start, i)   UTF16_BACK_1_SAFE(s, start, i)
#define UTF_BACK_N_UNSAFE(s, i, n)   UTF16_BACK_N_UNSAFE(s, i, n)
#define UTF_BACK_N_SAFE(s, start, i, n)   UTF16_BACK_N_SAFE(s, start, i, n)
#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i)   UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)   UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
#define UTF_IS_SINGLE(uchar)   U16_IS_SINGLE(uchar)
 Does this code unit alone encode a code point (BMP, not a surrogate)? Same as UTF16_IS_SINGLE.
#define UTF_IS_LEAD(uchar)   U16_IS_LEAD(uchar)
 Is this code unit the first one of several (a lead surrogate)? Same as UTF16_IS_LEAD.
#define UTF_IS_TRAIL(uchar)   U16_IS_TRAIL(uchar)
 Is this code unit one of several but not the first one (a trail surrogate)? Same as UTF16_IS_TRAIL.
#define UTF_NEED_MULTIPLE_UCHAR(c)   UTF16_NEED_MULTIPLE_UCHAR(c)
 Does this code point require multiple code units (is it a supplementary code point)? Same as UTF16_NEED_MULTIPLE_UCHAR.
#define UTF_CHAR_LENGTH(c)   U16_LENGTH(c)
 How many code units are used to encode this code point (1 or 2)? Same as UTF16_CHAR_LENGTH.
#define UTF_MAX_CHAR_LENGTH   U16_MAX_LENGTH
 How many code units are used at most for any Unicode code point (2)? Same as UTF16_MAX_CHAR_LENGTH.
#define UTF_GET_CHAR(s, start, i, length, c)   U16_GET(s, start, i, length, c)
 Set c to the code point that contains the code unit i.
#define UTF_NEXT_CHAR(s, i, length, c)   U16_NEXT(s, i, length, c)
 Set c to the code point that starts at code unit i and advance i to beyond the code units of this code point (post-increment).
#define UTF_APPEND_CHAR(s, i, length, c)   UTF16_APPEND_CHAR_SAFE(s, i, length, c)
 Append the code units of code point c to the string at index i and advance i to beyond the new code units (post-increment).
#define UTF_FWD_1(s, i, length)   U16_FWD_1(s, i, length)
 Advance i to beyond the code units of the code point that begins at i.
#define UTF_FWD_N(s, i, length, n)   U16_FWD_N(s, i, length, n)
 Advance i to beyond the code units of the n code points where the first one begins at i.
#define UTF_SET_CHAR_START(s, start, i)   U16_SET_CP_START(s, start, i)
 Take the random-access index i and adjust it so that it points to the beginning of a code point.
#define UTF_PREV_CHAR(s, start, i, c)   U16_PREV(s, start, i, c)
 Set c to the code point that has code units before i and move i backward (towards the beginning of the string) to the first code unit of this code point (pre-increment).
#define UTF_BACK_1(s, start, i)   U16_BACK_1(s, start, i)
 Move i backward (towards the beginning of the string) to the first code unit of the code point that has code units before i.
#define UTF_BACK_N(s, start, i, n)   U16_BACK_N(s, start, i, n)
 Move i backward (towards the beginning of the string) to the first code unit of the n code points that have code units before i.
#define UTF_SET_CHAR_LIMIT(s, start, i, length)   U16_SET_CP_LIMIT(s, start, i, length)
 Take the random-access index i and adjust it so that it points beyond a code point.


Detailed Description

C API: Deprecated macros for Unicode string handling.

Definition in file utf_old.h.


Define Documentation

#define UTF16_APPEND_CHAR_SAFE s,
i,
length,
 ) 
 

Value:

{ \
    if((uint32_t)(c)<=0xffff) { \
        (s)[(i)++]=(uint16_t)(c); \
    } else if((uint32_t)(c)<=0x10ffff) { \
        if((i)+1<(length)) { \
            (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
            (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
        } else /* not enough space */ { \
            (s)[(i)++]=UTF_ERROR_VALUE; \
        } \
    } else /* c>0x10ffff, write error value */ { \
        (s)[(i)++]=UTF_ERROR_VALUE; \
    } \
}
Deprecated:
ICU 2.4. Use U16_APPEND instead, see utf_old.h.

Definition at line 728 of file utf_old.h.

#define UTF16_APPEND_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    if((uint32_t)(c)<=0xffff) { \
        (s)[(i)++]=(uint16_t)(c); \
    } else { \
        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
    } \
}
Deprecated:
ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h.

Definition at line 671 of file utf_old.h.

#define UTF16_ARRAY_SIZE size   )     (size)
 

Average number of code units compared to UTF-16.

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 607 of file utf_old.h.

#define UTF16_BACK_1_SAFE s,
start,
 )     U16_BACK_1(s, start, i)
 

Deprecated:
ICU 2.4. Renamed to U16_BACK_1, see utf_old.h.

Definition at line 812 of file utf_old.h.

#define UTF16_BACK_1_UNSAFE s,
 ) 
 

Value:

{ \
    if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
        --(i); \
    } \
}
Deprecated:
ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h.

Definition at line 766 of file utf_old.h.

#define UTF16_BACK_N_SAFE s,
start,
i,
 )     U16_BACK_N(s, start, i, n)
 

Deprecated:
ICU 2.4. Renamed to U16_BACK_N, see utf_old.h.

Definition at line 816 of file utf_old.h.

#define UTF16_BACK_N_UNSAFE s,
i,
 ) 
 

Value:

{ \
    int32_t __N=(n); \
    while(__N>0) { \
        UTF16_BACK_1_UNSAFE(s, i); \
        --__N; \
    } \
}
Deprecated:
ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h.

Definition at line 774 of file utf_old.h.

#define UTF16_CHAR_LENGTH  )     ((uint32_t)(c)<=0xffff ? 1 : 2)
 

Deprecated:
ICU 2.4. Renamed to U16_LENGTH, see utf_old.h.

Definition at line 599 of file utf_old.h.

#define UTF16_FWD_1_SAFE s,
i,
length   )     U16_FWD_1(s, i, length)
 

Deprecated:
ICU 2.4. Renamed to U16_FWD_1, see utf_old.h.

Definition at line 745 of file utf_old.h.

#define UTF16_FWD_1_UNSAFE s,
 ) 
 

Value:

{ \
    if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
        ++(i); \
    } \
}
Deprecated:
ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h.

Definition at line 682 of file utf_old.h.

#define UTF16_FWD_N_SAFE s,
i,
length,
 )     U16_FWD_N(s, i, length, n)
 

Deprecated:
ICU 2.4. Renamed to U16_FWD_N, see utf_old.h.

Definition at line 749 of file utf_old.h.

#define UTF16_FWD_N_UNSAFE s,
i,
 ) 
 

Value:

{ \
    int32_t __N=(n); \
    while(__N>0) { \
        UTF16_FWD_1_UNSAFE(s, i); \
        --__N; \
    } \
}
Deprecated:
ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h.

Definition at line 690 of file utf_old.h.

#define UTF16_GET_CHAR_SAFE s,
start,
i,
length,
c,
strict   ) 
 

Value:

{ \
    (c)=(s)[i]; \
    if(UTF_IS_SURROGATE(c)) { \
        uint16_t __c2; \
        if(UTF_IS_SURROGATE_FIRST(c)) { \
            if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
                (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
                /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
            } else if(strict) {\
                /* unmatched first surrogate */ \
                (c)=UTF_ERROR_VALUE; \
            } \
        } else { \
            if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
                (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
                /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
            } else if(strict) {\
                /* unmatched second surrogate */ \
                (c)=UTF_ERROR_VALUE; \
            } \
        } \
    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
        (c)=UTF_ERROR_VALUE; \
    } \
}
Deprecated:
ICU 2.4. Use U16_GET instead, see utf_old.h.

Definition at line 634 of file utf_old.h.

#define UTF16_GET_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    (c)=(s)[i]; \
    if(UTF_IS_SURROGATE(c)) { \
        if(UTF_IS_SURROGATE_FIRST(c)) { \
            (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
        } else { \
            (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
        } \
    } \
}
Get a single code point from an offset that points to any of the code units that belong to that code point.

Assume 0<=i<length.

This could be used for iteration together with UTF16_CHAR_LENGTH() and UTF_IS_ERROR(), but the use of UTF16_NEXT_CHAR[_UNSAFE]() and UTF16_PREV_CHAR[_UNSAFE]() is more efficient for that.

Deprecated:
ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h.

Definition at line 621 of file utf_old.h.

#define UTF16_GET_PAIR_VALUE first,
second   )     (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
 

Get the UTF-32 value from the surrogate code units.

Deprecated:
ICU 2.4. Renamed to U16_GET_SUPPLEMENTARY, see utf_old.h.

Definition at line 562 of file utf_old.h.

#define UTF16_IS_LEAD uchar   )     UTF_IS_FIRST_SURROGATE(uchar)
 

Deprecated:
ICU 2.4. Renamed to U16_IS_LEAD, see utf_old.h.

Definition at line 587 of file utf_old.h.

#define UTF16_IS_SINGLE uchar   )     !UTF_IS_SURROGATE(uchar)
 

Deprecated:
ICU 2.4. Renamed to U16_IS_SINGLE, see utf_old.h.

Definition at line 583 of file utf_old.h.

#define UTF16_IS_TRAIL uchar   )     UTF_IS_SECOND_SURROGATE(uchar)
 

Deprecated:
ICU 2.4. Renamed to U16_IS_TRAIL, see utf_old.h.

Definition at line 591 of file utf_old.h.

#define UTF16_LEAD supplementary   )     UTF_FIRST_SURROGATE(supplementary)
 

Deprecated:
ICU 2.4. Renamed to U16_LEAD, see utf_old.h.

Definition at line 575 of file utf_old.h.

#define UTF16_MAX_CHAR_LENGTH   2
 

Deprecated:
ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h.

Definition at line 603 of file utf_old.h.

#define UTF16_NEED_MULTIPLE_UCHAR  )     ((uint32_t)(c)>0xffff)
 

Does this scalar Unicode value need multiple code units for storage?

Deprecated:
ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead, see utf_old.h.

Definition at line 595 of file utf_old.h.

#define UTF16_NEXT_CHAR_SAFE s,
i,
length,
c,
strict   ) 
 

Value:

{ \
    (c)=(s)[(i)++]; \
    if(UTF_IS_FIRST_SURROGATE(c)) { \
        uint16_t __c2; \
        if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
            ++(i); \
            (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
            /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
        } else if(strict) {\
            /* unmatched first surrogate */ \
            (c)=UTF_ERROR_VALUE; \
        } \
    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
        /* unmatched second surrogate or other non-character */ \
        (c)=UTF_ERROR_VALUE; \
    } \
}
Deprecated:
ICU 2.4. Use U16_NEXT instead, see utf_old.h.

Definition at line 708 of file utf_old.h.

#define UTF16_NEXT_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    (c)=(s)[(i)++]; \
    if(UTF_IS_FIRST_SURROGATE(c)) { \
        (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
    } \
}
Deprecated:
ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h.

Definition at line 662 of file utf_old.h.

#define UTF16_PREV_CHAR_SAFE s,
start,
i,
c,
strict   ) 
 

Value:

{ \
    (c)=(s)[--(i)]; \
    if(UTF_IS_SECOND_SURROGATE(c)) { \
        uint16_t __c2; \
        if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
            --(i); \
            (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
            /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
        } else if(strict) {\
            /* unmatched second surrogate */ \
            (c)=UTF_ERROR_VALUE; \
        } \
    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
        /* unmatched first surrogate or other non-character */ \
        (c)=UTF_ERROR_VALUE; \
    } \
}
Deprecated:
ICU 2.4. Use U16_PREV instead, see utf_old.h.

Definition at line 792 of file utf_old.h.

#define UTF16_PREV_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    (c)=(s)[--(i)]; \
    if(UTF_IS_SECOND_SURROGATE(c)) { \
        (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
    } \
}
Deprecated:
ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h.

Definition at line 757 of file utf_old.h.

#define UTF16_SET_CHAR_LIMIT_SAFE s,
start,
i,
length   )     U16_SET_CP_LIMIT(s, start, i, length)
 

Deprecated:
ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h.

Definition at line 820 of file utf_old.h.

#define UTF16_SET_CHAR_LIMIT_UNSAFE s,
 ) 
 

Value:

{ \
    if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
        ++(i); \
    } \
}
Deprecated:
ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h.

Definition at line 784 of file utf_old.h.

#define UTF16_SET_CHAR_START_SAFE s,
start,
 )     U16_SET_CP_START(s, start, i)
 

Deprecated:
ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h.

Definition at line 753 of file utf_old.h.

#define UTF16_SET_CHAR_START_UNSAFE s,
 ) 
 

Value:

{ \
    if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
        --(i); \
    } \
}
Deprecated:
ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h.

Definition at line 700 of file utf_old.h.

#define UTF16_TRAIL supplementary   )     UTF_SECOND_SURROGATE(supplementary)
 

Deprecated:
ICU 2.4. Renamed to U16_TRAIL, see utf_old.h.

Definition at line 579 of file utf_old.h.

#define UTF32_APPEND_CHAR_SAFE s,
i,
length,
 ) 
 

Value:

{ \
    if((uint32_t)(c)<=0x10ffff) { \
        (s)[(i)++]=(c); \
    } else /* c>0x10ffff, write 0xfffd */ { \
        (s)[(i)++]=0xfffd; \
    } \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 941 of file utf_old.h.

#define UTF32_APPEND_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    (s)[(i)++]=(c); \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 909 of file utf_old.h.

#define UTF32_ARRAY_SIZE size   )     (size)
 

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 882 of file utf_old.h.

#define UTF32_BACK_1_SAFE s,
start,
 ) 
 

Value:

{ \
    --(i); \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 1004 of file utf_old.h.

#define UTF32_BACK_1_UNSAFE s,
 ) 
 

Value:

{ \
    --(i); \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 978 of file utf_old.h.

#define UTF32_BACK_N_SAFE s,
start,
i,
 ) 
 

Value:

{ \
    (i)-=(n); \
    if((i)<(start)) { \
        (i)=(start); \
    } \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 1010 of file utf_old.h.

#define UTF32_BACK_N_UNSAFE s,
i,
 ) 
 

Value:

{ \
    (i)-=(n); \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 984 of file utf_old.h.

#define UTF32_CHAR_LENGTH  )     1
 

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 873 of file utf_old.h.

#define UTF32_FWD_1_SAFE s,
i,
length   ) 
 

Value:

{ \
    ++(i); \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 951 of file utf_old.h.

#define UTF32_FWD_1_UNSAFE s,
 ) 
 

Value:

{ \
    ++(i); \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 915 of file utf_old.h.

#define UTF32_FWD_N_SAFE s,
i,
length,
 ) 
 

Value:

{ \
    if(((i)+=(n))>(length)) { \
        (i)=(length); \
    } \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 957 of file utf_old.h.

#define UTF32_FWD_N_UNSAFE s,
i,
 ) 
 

Value:

{ \
    (i)+=(n); \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 921 of file utf_old.h.

#define UTF32_GET_CHAR_SAFE s,
start,
i,
length,
c,
strict   ) 
 

Value:

{ \
    (c)=(s)[i]; \
    if(!UTF32_IS_SAFE(c, strict)) { \
        (c)=UTF_ERROR_VALUE; \
    } \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 892 of file utf_old.h.

#define UTF32_GET_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    (c)=(s)[i]; \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 886 of file utf_old.h.

#define UTF32_IS_LEAD uchar   )     0
 

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 861 of file utf_old.h.

#define UTF32_IS_SAFE c,
strict   ) 
 

Value:

(!(strict) ? \
        (uint32_t)(c)<=0x10ffff : \
        UTF_IS_UNICODE_CHAR(c))
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 841 of file utf_old.h.

#define UTF32_IS_SINGLE uchar   )     1
 

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 858 of file utf_old.h.

#define UTF32_IS_TRAIL uchar   )     0
 

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 864 of file utf_old.h.

#define UTF32_MAX_CHAR_LENGTH   1
 

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 876 of file utf_old.h.

#define UTF32_NEED_MULTIPLE_UCHAR  )     0
 

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 870 of file utf_old.h.

#define UTF32_NEXT_CHAR_SAFE s,
i,
length,
c,
strict   ) 
 

Value:

{ \
    (c)=(s)[(i)++]; \
    if(!UTF32_IS_SAFE(c, strict)) { \
        (c)=UTF_ERROR_VALUE; \
    } \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 932 of file utf_old.h.

#define UTF32_NEXT_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    (c)=(s)[(i)++]; \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 903 of file utf_old.h.

#define UTF32_PREV_CHAR_SAFE s,
start,
i,
c,
strict   ) 
 

Value:

{ \
    (c)=(s)[--(i)]; \
    if(!UTF32_IS_SAFE(c, strict)) { \
        (c)=UTF_ERROR_VALUE; \
    } \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 995 of file utf_old.h.

#define UTF32_PREV_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    (c)=(s)[--(i)]; \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 972 of file utf_old.h.

#define UTF32_SET_CHAR_LIMIT_SAFE s,
i,
length   ) 
 

Value:

{ \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 1019 of file utf_old.h.

#define UTF32_SET_CHAR_LIMIT_UNSAFE s,
 ) 
 

Value:

{ \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 990 of file utf_old.h.

#define UTF32_SET_CHAR_START_SAFE s,
start,
 ) 
 

Value:

{ \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 965 of file utf_old.h.

#define UTF32_SET_CHAR_START_UNSAFE s,
 ) 
 

Value:

{ \
}
Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 927 of file utf_old.h.

#define UTF8_APPEND_CHAR_SAFE s,
i,
length,
 ) 
 

Value:

{ \
    if((uint32_t)(c)<=0x7f) { \
        (s)[(i)++]=(uint8_t)(c); \
    } else { \
        (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
    } \
}
Deprecated:
ICU 2.4. Use U8_APPEND instead, see utf_old.h.

Definition at line 450 of file utf_old.h.

#define UTF8_APPEND_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    if((uint32_t)(c)<=0x7f) { \
        (s)[(i)++]=(uint8_t)(c); \
    } else { \
        if((uint32_t)(c)<=0x7ff) { \
            (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
        } else { \
            if((uint32_t)(c)<=0xffff) { \
                (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
            } else { \
                (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
                (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
            } \
            (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
        } \
        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
    } \
}
Deprecated:
ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h.

Definition at line 394 of file utf_old.h.

#define UTF8_ARRAY_SIZE size   )     ((5*(size))/2)
 

Average number of code units compared to UTF-16.

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 353 of file utf_old.h.

#define UTF8_BACK_1_SAFE s,
start,
 )     U8_BACK_1(s, start, i)
 

Deprecated:
ICU 2.4. Renamed to U8_BACK_1, see utf_old.h.

Definition at line 532 of file utf_old.h.

#define UTF8_BACK_1_UNSAFE s,
 ) 
 

Value:

{ \
    while(UTF8_IS_TRAIL((s)[--(i)])) {} \
}
Deprecated:
ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h.

Definition at line 496 of file utf_old.h.

#define UTF8_BACK_N_SAFE s,
start,
i,
 )     U8_BACK_N(s, start, i, n)
 

Deprecated:
ICU 2.4. Renamed to U8_BACK_N, see utf_old.h.

Definition at line 536 of file utf_old.h.

#define UTF8_BACK_N_UNSAFE s,
i,
 ) 
 

Value:

{ \
    int32_t __N=(n); \
    while(__N>0) { \
        UTF8_BACK_1_UNSAFE(s, i); \
        --__N; \
    } \
}
Deprecated:
ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h.

Definition at line 502 of file utf_old.h.

#define UTF8_CHAR_LENGTH  ) 
 

Value:

((uint32_t)(c)<=0x7f ? 1 : \
            ((uint32_t)(c)<=0x7ff ? 2 : \
                ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
            ) \
        )
Given the lead character, how many bytes are taken by this code point.

ICU does not deal with code points >0x10ffff unless necessary for advancing in the byte stream.

These length macros take into account that for values >0x10ffff the UTF8_APPEND_CHAR_SAFE macros would write the error code point 0xffff with 3 bytes. Code point comparisons need to be in uint32_t because UChar32 may be a signed type, and negative values must be recognized.

Deprecated:
ICU 2.4. Use U8_LENGTH instead, see utf_old.h.

Definition at line 326 of file utf_old.h.

#define UTF8_COUNT_TRAIL_BYTES leadByte   )     (utf8_countTrailBytes[(uint8_t)leadByte])
 

Count the trail bytes for a UTF-8 lead byte.

Deprecated:
ICU 2.4. Renamed to U8_COUNT_TRAIL_BYTES, see utf_old.h.

Definition at line 288 of file utf_old.h.

#define UTF8_ERROR_VALUE_1   0x15
 

UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8, which need 1 or 2 bytes in UTF-8:
U+0015 = NAK = Negative Acknowledge, C0 control character
U+009f = highest C1 control character

These are used by UTF8_..._SAFE macros so that they can return an error value that needs the same number of code units (bytes) as were seen by a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID().

Definition at line 202 of file utf_old.h.

#define UTF8_ERROR_VALUE_2   0x9f
 

See documentation on UTF8_ERROR_VALUE_1 for details.

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 210 of file utf_old.h.

#define UTF8_FWD_1_SAFE s,
i,
length   )     U8_FWD_1(s, i, length)
 

Deprecated:
ICU 2.4. Renamed to U8_FWD_1, see utf_old.h.

Definition at line 460 of file utf_old.h.

#define UTF8_FWD_1_UNSAFE s,
 ) 
 

Value:

{ \
    (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
}
Deprecated:
ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h.

Definition at line 415 of file utf_old.h.

#define UTF8_FWD_N_SAFE s,
i,
length,
 )     U8_FWD_N(s, i, length, n)
 

Deprecated:
ICU 2.4. Renamed to U8_FWD_N, see utf_old.h.

Definition at line 464 of file utf_old.h.

#define UTF8_FWD_N_UNSAFE s,
i,
 ) 
 

Value:

{ \
    int32_t __N=(n); \
    while(__N>0) { \
        UTF8_FWD_1_UNSAFE(s, i); \
        --__N; \
    } \
}
Deprecated:
ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h.

Definition at line 421 of file utf_old.h.

#define UTF8_GET_CHAR_SAFE s,
start,
i,
length,
c,
strict   ) 
 

Value:

{ \
    int32_t _utf8_get_char_safe_index=(int32_t)(i); \
    UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
    UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
}
Deprecated:
ICU 2.4. Use U8_GET instead, see utf_old.h.

Definition at line 365 of file utf_old.h.

#define UTF8_GET_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
    UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
    UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
}
Deprecated:
ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h.

Definition at line 357 of file utf_old.h.

#define UTF8_IS_LEAD uchar   )     ((uint8_t)((uchar)-0xc0)<0x3e)
 

Is this this code unit the lead code unit (byte) of a code point?

Deprecated:
ICU 2.4. Renamed to U8_IS_LEAD, see utf_old.h.

Definition at line 302 of file utf_old.h.

#define UTF8_IS_SINGLE uchar   )     (((uchar)&0x80)==0)
 

Is this this code point a single code unit (byte)?

Deprecated:
ICU 2.4. Renamed to U8_IS_SINGLE, see utf_old.h.

Definition at line 299 of file utf_old.h.

#define UTF8_IS_TRAIL uchar   )     (((uchar)&0xc0)==0x80)
 

Is this this code unit a trailing code unit (byte) of a code point?

Deprecated:
ICU 2.4. Renamed to U8_IS_TRAIL, see utf_old.h.

Definition at line 305 of file utf_old.h.

#define UTF8_MASK_LEAD_BYTE leadByte,
countTrailBytes   )     ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
 

Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.

Deprecated:
ICU 2.4. Renamed to U8_MASK_LEAD_BYTE, see utf_old.h.

Definition at line 295 of file utf_old.h.

#define UTF8_MAX_CHAR_LENGTH   4
 

The maximum number of bytes per code point.

Deprecated:
ICU 2.4. Renamed to U8_MAX_LENGTH, see utf_old.h.

Definition at line 349 of file utf_old.h.

#define UTF8_NEED_MULTIPLE_UCHAR  )     ((uint32_t)(c)>0x7f)
 

Does this scalar Unicode value need multiple code units for storage?

Deprecated:
ICU 2.4. Use U8_LENGTH or test ((uint32_t)(c)>0x7f) instead, see utf_old.h.

Definition at line 309 of file utf_old.h.

#define UTF8_NEXT_CHAR_SAFE s,
i,
length,
c,
strict   ) 
 

Value:

{ \
    (c)=(s)[(i)++]; \
    if((c)>=0x80) { \
        if(UTF8_IS_LEAD(c)) { \
            (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
        } else { \
            (c)=UTF8_ERROR_VALUE_1; \
        } \
    } \
}
Deprecated:
ICU 2.4. Use U8_NEXT instead, see utf_old.h.

Definition at line 437 of file utf_old.h.

#define UTF8_NEXT_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    (c)=(s)[(i)++]; \
    if((uint8_t)((c)-0xc0)<0x35) { \
        uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
        UTF8_MASK_LEAD_BYTE(c, __count); \
        switch(__count) { \
        /* each following branch falls through to the next one */ \
        case 3: \
            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
        case 2: \
            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
        case 1: \
            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
        /* no other branches to optimize switch() */ \
            break; \
        } \
    } \
}
Deprecated:
ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h.

Definition at line 373 of file utf_old.h.

#define UTF8_PREV_CHAR_SAFE s,
start,
i,
c,
strict   ) 
 

Value:

{ \
    (c)=(s)[--(i)]; \
    if((c)>=0x80) { \
        if((c)<=0xbf) { \
            (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
        } else { \
            (c)=UTF8_ERROR_VALUE_1; \
        } \
    } \
}
Deprecated:
ICU 2.4. Use U8_PREV instead, see utf_old.h.

Definition at line 519 of file utf_old.h.

#define UTF8_PREV_CHAR_UNSAFE s,
i,
 ) 
 

Value:

{ \
    (c)=(s)[--(i)]; \
    if(UTF8_IS_TRAIL(c)) { \
        uint8_t __b, __count=1, __shift=6; \
\
        /* c is a trail byte */ \
        (c)&=0x3f; \
        for(;;) { \
            __b=(s)[--(i)]; \
            if(__b>=0xc0) { \
                UTF8_MASK_LEAD_BYTE(__b, __count); \
                (c)|=(UChar32)__b<<__shift; \
                break; \
            } else { \
                (c)|=(UChar32)(__b&0x3f)<<__shift; \
                ++__count; \
                __shift+=6; \
            } \
        } \
    } \
}
Deprecated:
ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h.

Definition at line 472 of file utf_old.h.

#define UTF8_SET_CHAR_LIMIT_SAFE s,
start,
i,
length   )     U8_SET_CP_LIMIT(s, start, i, length)
 

Deprecated:
ICU 2.4. Renamed to U8_SET_CP_LIMIT, see utf_old.h.

Definition at line 540 of file utf_old.h.

#define UTF8_SET_CHAR_LIMIT_UNSAFE s,
 ) 
 

Value:

{ \
    UTF8_BACK_1_UNSAFE(s, i); \
    UTF8_FWD_1_UNSAFE(s, i); \
}
Deprecated:
ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h.

Definition at line 512 of file utf_old.h.

#define UTF8_SET_CHAR_START_SAFE s,
start,
 )     U8_SET_CP_START(s, start, i)
 

Deprecated:
ICU 2.4. Renamed to U8_SET_CP_START, see utf_old.h.

Definition at line 468 of file utf_old.h.

#define UTF8_SET_CHAR_START_UNSAFE s,
 ) 
 

Value:

{ \
    while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
}
Deprecated:
ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h.

Definition at line 431 of file utf_old.h.

#define UTF_APPEND_CHAR s,
i,
length,
 )     UTF16_APPEND_CHAR_SAFE(s, i, length, c)
 

Append the code units of code point c to the string at index i and advance i to beyond the new code units (post-increment).

The code units beginning at index i will be overwritten. Same as UTF16_APPEND_CHAR.

Precondition:
0<=c<=0x10ffff

0<=i<length

Postcondition:
0<i<=length

Deprecated:
ICU 2.4. Use U16_APPEND instead, see utf_old.h.

Definition at line 1209 of file utf_old.h.

#define UTF_APPEND_CHAR_SAFE s,
i,
length,
 )     UTF16_APPEND_CHAR_SAFE(s, i, length, c)
 

Deprecated:
ICU 2.4. Use U16_APPEND instead, see utf_old.h.

Definition at line 1056 of file utf_old.h.

#define UTF_APPEND_CHAR_UNSAFE s,
i,
 )     UTF16_APPEND_CHAR_UNSAFE(s, i, c)
 

Deprecated:
ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h.

Definition at line 1052 of file utf_old.h.

#define UTF_ARRAY_SIZE size   )     UTF16_ARRAY_SIZE(size)
 

Estimate the number of code units for a string based on the number of UTF-16 code units.

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 1030 of file utf_old.h.

#define UTF_BACK_1 s,
start,
 )     U16_BACK_1(s, start, i)
 

Move i backward (towards the beginning of the string) to the first code unit of the code point that has code units before i.

I.e., move i backward by one code point. i must point to the first code unit after the last unit of a code point (i==length is allowed). Same as UTF16_BACK_1.

Precondition:
start<i<=length
Postcondition:
start<=i<length

Deprecated:
ICU 2.4. Renamed to U16_BACK_1, see utf_old.h.

Definition at line 1278 of file utf_old.h.

#define UTF_BACK_1_SAFE s,
start,
 )     UTF16_BACK_1_SAFE(s, start, i)
 

Deprecated:
ICU 2.4. Renamed to U16_BACK_1, see utf_old.h.

Definition at line 1101 of file utf_old.h.

#define UTF_BACK_1_UNSAFE s,
 )     UTF16_BACK_1_UNSAFE(s, i)
 

Deprecated:
ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h.

Definition at line 1097 of file utf_old.h.

#define UTF_BACK_N s,
start,
i,
 )     U16_BACK_N(s, start, i, n)
 

Move i backward (towards the beginning of the string) to the first code unit of the n code points that have code units before i.

I.e., move i backward by n code points. i must point to the first code unit after the last unit of a code point (i==length is allowed). Same as UTF16_BACK_N.

Precondition:
start<i<=length
Postcondition:
start<=i<length

Deprecated:
ICU 2.4. Renamed to U16_BACK_N, see utf_old.h.

Definition at line 1292 of file utf_old.h.

#define UTF_BACK_N_SAFE s,
start,
i,
 )     UTF16_BACK_N_SAFE(s, start, i, n)
 

Deprecated:
ICU 2.4. Renamed to U16_BACK_N, see utf_old.h.

Definition at line 1110 of file utf_old.h.

#define UTF_BACK_N_UNSAFE s,
i,
 )     UTF16_BACK_N_UNSAFE(s, i, n)
 

Deprecated:
ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h.

Definition at line 1106 of file utf_old.h.

#define UTF_CHAR_LENGTH  )     U16_LENGTH(c)
 

How many code units are used to encode this code point (1 or 2)? Same as UTF16_CHAR_LENGTH.

Deprecated:
ICU 2.4. Renamed to U16_LENGTH, see utf_old.h.

Definition at line 1161 of file utf_old.h.

#define UTF_ERROR_VALUE   0xffff
 

Error value for all UTFs.

This code point value will be set by macros with error checking if an error is detected.

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 219 of file utf_old.h.

#define UTF_FIRST_SURROGATE supplementary   )     (UChar)(((supplementary)>>10)+0xd7c0)
 

Deprecated:
ICU 2.4. Renamed to U16_LEAD, see utf_old.h.

Definition at line 567 of file utf_old.h.

#define UTF_FWD_1 s,
i,
length   )     U16_FWD_1(s, i, length)
 

Advance i to beyond the code units of the code point that begins at i.

I.e., advance i by one code point. Same as UTF16_FWD_1.

Precondition:
0<=i<length
Postcondition:
0<i<=length

Deprecated:
ICU 2.4. Renamed to U16_FWD_1, see utf_old.h.

Definition at line 1221 of file utf_old.h.

#define UTF_FWD_1_SAFE s,
i,
length   )     UTF16_FWD_1_SAFE(s, i, length)
 

Deprecated:
ICU 2.4. Renamed to U16_FWD_1, see utf_old.h.

Definition at line 1065 of file utf_old.h.

#define UTF_FWD_1_UNSAFE s,
 )     UTF16_FWD_1_UNSAFE(s, i)
 

Deprecated:
ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h.

Definition at line 1061 of file utf_old.h.

#define UTF_FWD_N s,
i,
length,
 )     U16_FWD_N(s, i, length, n)
 

Advance i to beyond the code units of the n code points where the first one begins at i.

I.e., advance i by n code points. Same as UT16_FWD_N.

Precondition:
0<=i<length
Postcondition:
0<i<=length

Deprecated:
ICU 2.4. Renamed to U16_FWD_N, see utf_old.h.

Definition at line 1233 of file utf_old.h.

#define UTF_FWD_N_SAFE s,
i,
length,
 )     UTF16_FWD_N_SAFE(s, i, length, n)
 

Deprecated:
ICU 2.4. Renamed to U16_FWD_N, see utf_old.h.

Definition at line 1074 of file utf_old.h.

#define UTF_FWD_N_UNSAFE s,
i,
 )     UTF16_FWD_N_UNSAFE(s, i, n)
 

Deprecated:
ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h.

Definition at line 1070 of file utf_old.h.

#define UTF_GET_CHAR s,
start,
i,
length,
 )     U16_GET(s, start, i, length, c)
 

Set c to the code point that contains the code unit i.

i could point to the lead or the trail surrogate for the code point. i is not modified. Same as UTF16_GET_CHAR.

Precondition:
0<=i<length

Deprecated:
ICU 2.4. Renamed to U16_GET, see utf_old.h.

Definition at line 1181 of file utf_old.h.

#define UTF_GET_CHAR_SAFE s,
start,
i,
length,
c,
strict   )     UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
 

Deprecated:
ICU 2.4. Use U16_GET instead, see utf_old.h.

Definition at line 1038 of file utf_old.h.

#define UTF_GET_CHAR_UNSAFE s,
i,
 )     UTF16_GET_CHAR_UNSAFE(s, i, c)
 

Deprecated:
ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h.

Definition at line 1034 of file utf_old.h.

#define UTF_IS_ERROR  )     (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
 

Is a given 32-bit code an error value as returned by one of the macros for any UTF?

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 228 of file utf_old.h.

#define UTF_IS_FIRST_SURROGATE uchar   )     (((uchar)&0xfffffc00)==0xd800)
 

Is uchar a first/lead surrogate?

Deprecated:
ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h.

Definition at line 546 of file utf_old.h.

#define UTF_IS_LEAD uchar   )     U16_IS_LEAD(uchar)
 

Is this code unit the first one of several (a lead surrogate)? Same as UTF16_IS_LEAD.

Deprecated:
ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h.

Definition at line 1137 of file utf_old.h.

#define UTF_IS_SECOND_SURROGATE uchar   )     (((uchar)&0xfffffc00)==0xdc00)
 

Is uchar a second/trail surrogate?

Deprecated:
ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h.

Definition at line 550 of file utf_old.h.

#define UTF_IS_SINGLE uchar   )     U16_IS_SINGLE(uchar)
 

Does this code unit alone encode a code point (BMP, not a surrogate)? Same as UTF16_IS_SINGLE.

Deprecated:
ICU 2.4. Renamed to U_IS_SINGLE and U16_IS_SINGLE, see utf_old.h.

Definition at line 1129 of file utf_old.h.

#define UTF_IS_SURROGATE uchar   )     (((uchar)&0xfffff800)==0xd800)
 

Is this code unit or code point a surrogate (U+d800..U+dfff)?

Deprecated:
ICU 2.4. Renamed to U_IS_SURROGATE and U16_IS_SURROGATE, see utf_old.h.

Definition at line 246 of file utf_old.h.

#define UTF_IS_SURROGATE_FIRST  )     (((c)&0x400)==0)
 

Assuming c is a surrogate, is it a first/lead surrogate?

Deprecated:
ICU 2.4. Renamed to U_IS_SURROGATE_LEAD and U16_IS_SURROGATE_LEAD, see utf_old.h.

Definition at line 554 of file utf_old.h.

#define UTF_IS_TRAIL uchar   )     U16_IS_TRAIL(uchar)
 

Is this code unit one of several but not the first one (a trail surrogate)? Same as UTF16_IS_TRAIL.

Deprecated:
ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h.

Definition at line 1145 of file utf_old.h.

#define UTF_IS_UNICODE_CHAR  ) 
 

Value:

((uint32_t)(c)<0xd800 || \
        ((uint32_t)(c)>0xdfff && \
         (uint32_t)(c)<=0x10ffff && \
         !UTF_IS_UNICODE_NONCHAR(c)))
Is a given 32-bit value a Unicode code point value (0..U+10ffff) that can be assigned a character?

Code points that are not characters include:

  • single surrogate code points (U+d800..U+dfff, 2048 code points)
  • the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
  • U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
  • the highest Unicode code point value is U+10ffff

This means that all code points below U+d800 are character code points, and that boundary is tested first for performance.

Deprecated:
ICU 2.4. Renamed to U_IS_UNICODE_CHAR, see utf_old.h.

Definition at line 275 of file utf_old.h.

#define UTF_IS_UNICODE_NONCHAR  ) 
 

Value:

((c)>=0xfdd0 && \
     ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
     (uint32_t)(c)<=0x10ffff)
Is a given 32-bit code point a Unicode noncharacter?

Deprecated:
ICU 2.4. Renamed to U_IS_UNICODE_NONCHAR, see utf_old.h.

Definition at line 254 of file utf_old.h.

#define UTF_IS_VALID  ) 
 

Value:

(UTF_IS_UNICODE_CHAR(c) && \
     (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
This is a combined macro: Is c a valid Unicode value _and_ not an error code?

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 237 of file utf_old.h.

#define UTF_MAX_CHAR_LENGTH   U16_MAX_LENGTH
 

How many code units are used at most for any Unicode code point (2)? Same as UTF16_MAX_CHAR_LENGTH.

Deprecated:
ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h.

Definition at line 1169 of file utf_old.h.

#define UTF_NEED_MULTIPLE_UCHAR  )     UTF16_NEED_MULTIPLE_UCHAR(c)
 

Does this code point require multiple code units (is it a supplementary code point)? Same as UTF16_NEED_MULTIPLE_UCHAR.

Deprecated:
ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead.

Definition at line 1153 of file utf_old.h.

#define UTF_NEXT_CHAR s,
i,
length,
 )     U16_NEXT(s, i, length, c)
 

Set c to the code point that starts at code unit i and advance i to beyond the code units of this code point (post-increment).

i must point to the first code unit of a code point. Otherwise c is set to the trail unit (surrogate) itself. Same as UTF16_NEXT_CHAR.

Precondition:
0<=i<length
Postcondition:
0<i<=length

Deprecated:
ICU 2.4. Renamed to U16_NEXT, see utf_old.h.

Definition at line 1195 of file utf_old.h.

#define UTF_NEXT_CHAR_SAFE s,
i,
length,
c,
strict   )     UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
 

Deprecated:
ICU 2.4. Use U16_NEXT instead, see utf_old.h.

Definition at line 1047 of file utf_old.h.

#define UTF_NEXT_CHAR_UNSAFE s,
i,
 )     UTF16_NEXT_CHAR_UNSAFE(s, i, c)
 

Deprecated:
ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h.

Definition at line 1043 of file utf_old.h.

#define UTF_PREV_CHAR s,
start,
i,
 )     U16_PREV(s, start, i, c)
 

Set c to the code point that has code units before i and move i backward (towards the beginning of the string) to the first code unit of this code point (pre-increment).

i must point to the first code unit after the last unit of a code point (i==length is allowed). Same as UTF16_PREV_CHAR.

Precondition:
start<i<=length
Postcondition:
start<=i<length

Deprecated:
ICU 2.4. Renamed to U16_PREV, see utf_old.h.

Definition at line 1264 of file utf_old.h.

#define UTF_PREV_CHAR_SAFE s,
start,
i,
c,
strict   )     UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
 

Deprecated:
ICU 2.4. Use U16_PREV instead, see utf_old.h.

Definition at line 1092 of file utf_old.h.

#define UTF_PREV_CHAR_UNSAFE s,
i,
 )     UTF16_PREV_CHAR_UNSAFE(s, i, c)
 

Deprecated:
ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h.

Definition at line 1088 of file utf_old.h.

#define UTF_SAFE
 

The default choice for general Unicode string macros is to use the ..._SAFE macro implementations with strict=FALSE.

Deprecated:
ICU 2.4. Obsolete, see utf_old.h.

Definition at line 181 of file utf_old.h.

#define UTF_SECOND_SURROGATE supplementary   )     (UChar)(((supplementary)&0x3ff)|0xdc00)
 

Deprecated:
ICU 2.4. Renamed to U16_TRAIL, see utf_old.h.

Definition at line 571 of file utf_old.h.

#define UTF_SET_CHAR_LIMIT s,
start,
i,
length   )     U16_SET_CP_LIMIT(s, start, i, length)
 

Take the random-access index i and adjust it so that it points beyond a code point.

The input index points beyond any code unit of a code point and is moved to point beyond the last code unit of the same code point. i is never decremented. In other words, if i points to a trail surrogate that is preceded by a matching lead surrogate, then i is incremented. Otherwise it is not modified. This can be used to start an iteration with UTF_PREV_CHAR() from a random index. Same as UTF16_SET_CHAR_LIMIT.

Precondition:
start<i<=length
Postcondition:
start<i<=length

Deprecated:
ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h.

Definition at line 1309 of file utf_old.h.

#define UTF_SET_CHAR_LIMIT_SAFE s,
start,
i,
length   )     UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
 

Deprecated:
ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h.

Definition at line 1119 of file utf_old.h.

#define UTF_SET_CHAR_LIMIT_UNSAFE s,
 )     UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
 

Deprecated:
ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h.

Definition at line 1115 of file utf_old.h.

#define UTF_SET_CHAR_START s,
start,
 )     U16_SET_CP_START(s, start, i)
 

Take the random-access index i and adjust it so that it points to the beginning of a code point.

The input index points to any code unit of a code point and is moved to point to the first code unit of the same code point. i is never incremented. In other words, if i points to a trail surrogate that is preceded by a matching lead surrogate, then i is decremented. Otherwise it is not modified. This can be used to start an iteration with UTF_NEXT_CHAR() from a random index. Same as UTF16_SET_CHAR_START.

Precondition:
start<=i<length
Postcondition:
start<=i<length

Deprecated:
ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h.

Definition at line 1250 of file utf_old.h.

#define UTF_SET_CHAR_START_SAFE s,
start,
 )     UTF16_SET_CHAR_START_SAFE(s, start, i)
 

Deprecated:
ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h.

Definition at line 1083 of file utf_old.h.

#define UTF_SET_CHAR_START_UNSAFE s,
 )     UTF16_SET_CHAR_START_UNSAFE(s, i)
 

Deprecated:
ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h.

Definition at line 1079 of file utf_old.h.

#define UTF_SURROGATE_OFFSET   ((0xd800<<10UL)+0xdc00-0x10000)
 

Helper constant for UTF16_GET_PAIR_VALUE.

Deprecated:
ICU 2.4. Renamed to U16_SURROGATE_OFFSET, see utf_old.h.

Definition at line 558 of file utf_old.h.


Generated on Mon Jul 14 00:23:54 2008 for ICU 3.6 by  doxygen 1.3.9.1