sl@0: /* sl@0: ********************************************************************** sl@0: * Copyright (C) 1999-2005, International Business Machines sl@0: * Corporation and others. All Rights Reserved. sl@0: ********************************************************************** sl@0: * sl@0: * sl@0: * ucnv_err.h: sl@0: */ sl@0: sl@0: /** sl@0: * \file sl@0: * \brief C UConverter predefined error callbacks sl@0: * sl@0: *

Error Behaviour Functions

sl@0: * Defines some error behaviour functions called by ucnv_{from,to}Unicode sl@0: * These are provided as part of ICU and many are stable, but they sl@0: * can also be considered only as an example of what can be done with sl@0: * callbacks. You may of course write your own. sl@0: * sl@0: * If you want to write your own, you may also find the functions from sl@0: * ucnv_cb.h useful when writing your own callbacks. sl@0: * sl@0: * These functions, although public, should NEVER be called directly. sl@0: * They should be used as parameters to the ucnv_setFromUCallback sl@0: * and ucnv_setToUCallback functions, to set the behaviour of a converter sl@0: * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. sl@0: * sl@0: * usage example: 'STOP' doesn't need any context, but newContext sl@0: * could be set to something other than 'NULL' if needed. The available sl@0: * contexts in this header can modify the default behavior of the callback. sl@0: * sl@0: * \code sl@0: * UErrorCode err = U_ZERO_ERROR; sl@0: * UConverter *myConverter = ucnv_open("ibm-949", &err); sl@0: * const void *oldContext; sl@0: * UConverterFromUCallback oldAction; sl@0: * sl@0: * sl@0: * if (U_SUCCESS(err)) sl@0: * { sl@0: * ucnv_setFromUCallBack(myConverter, sl@0: * UCNV_FROM_U_CALLBACK_STOP, sl@0: * NULL, sl@0: * &oldAction, sl@0: * &oldContext, sl@0: * &status); sl@0: * } sl@0: * \endcode sl@0: * sl@0: * The code above tells "myConverter" to stop when it encounters an sl@0: * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from sl@0: * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed, sl@0: * and ucnv_setToUCallBack would need to be called in order to change sl@0: * that behavior too. sl@0: * sl@0: * Here is an example with a context: sl@0: * sl@0: * \code sl@0: * UErrorCode err = U_ZERO_ERROR; sl@0: * UConverter *myConverter = ucnv_open("ibm-949", &err); sl@0: * const void *oldContext; sl@0: * UConverterFromUCallback oldAction; sl@0: * sl@0: * sl@0: * if (U_SUCCESS(err)) sl@0: * { sl@0: * ucnv_setToUCallBack(myConverter, sl@0: * UCNV_TO_U_CALLBACK_SUBSTITUTE, sl@0: * UCNV_SUB_STOP_ON_ILLEGAL, sl@0: * &oldAction, sl@0: * &oldContext, sl@0: * &status); sl@0: * } sl@0: * \endcode sl@0: * sl@0: * The code above tells "myConverter" to stop when it encounters an sl@0: * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from sl@0: * Codepage -> Unicode. Any unmapped and legal characters will be sl@0: * substituted to be the default substitution character. sl@0: */ sl@0: sl@0: #ifndef UCNV_ERR_H sl@0: #define UCNV_ERR_H sl@0: sl@0: #include "unicode/utypes.h" sl@0: sl@0: #if !UCONFIG_NO_CONVERSION sl@0: sl@0: /** Forward declaring the UConverter structure. @stable ICU 2.0 */ sl@0: struct UConverter; sl@0: sl@0: /** @stable ICU 2.0 */ sl@0: typedef struct UConverter UConverter; sl@0: sl@0: /** sl@0: * FROM_U, TO_U context options for sub callback sl@0: * @stable ICU 2.0 sl@0: */ sl@0: #define UCNV_SUB_STOP_ON_ILLEGAL "i" sl@0: sl@0: /** sl@0: * FROM_U, TO_U context options for skip callback sl@0: * @stable ICU 2.0 sl@0: */ sl@0: #define UCNV_SKIP_STOP_ON_ILLEGAL "i" sl@0: sl@0: /** sl@0: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) sl@0: * @stable ICU 2.0 sl@0: */ sl@0: #define UCNV_ESCAPE_ICU NULL sl@0: /** sl@0: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX) sl@0: * @stable ICU 2.0 sl@0: */ sl@0: #define UCNV_ESCAPE_JAVA "J" sl@0: /** sl@0: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) sl@0: * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX) sl@0: * @stable ICU 2.0 sl@0: */ sl@0: #define UCNV_ESCAPE_C "C" sl@0: /** sl@0: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly sl@0: * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly sl@0: * @stable ICU 2.0 sl@0: */ sl@0: #define UCNV_ESCAPE_XML_DEC "D" sl@0: /** sl@0: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly sl@0: * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly sl@0: * @stable ICU 2.0 sl@0: */ sl@0: #define UCNV_ESCAPE_XML_HEX "X" sl@0: /** sl@0: * FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX) sl@0: * @stable ICU 2.0 sl@0: */ sl@0: #define UCNV_ESCAPE_UNICODE "U" sl@0: sl@0: /** sl@0: * The process condition code to be used with the callbacks. sl@0: * Codes which are greater than UCNV_IRREGULAR should be sl@0: * passed on to any chained callbacks. sl@0: * @stable ICU 2.0 sl@0: */ sl@0: typedef enum { sl@0: UCNV_UNASSIGNED = 0, /**< The code point is unassigned. sl@0: The error code U_INVALID_CHAR_FOUND will be set. */ sl@0: UCNV_ILLEGAL = 1, /**< The code point is illegal. For example, sl@0: \\x81\\x2E is illegal in SJIS because \\x2E sl@0: is not a valid trail byte for the \\x81 sl@0: lead byte. sl@0: Also, starting with Unicode 3.0.1, non-shortest byte sequences sl@0: in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061) sl@0: are also illegal, not just irregular. sl@0: The error code U_ILLEGAL_CHAR_FOUND will be set. */ sl@0: UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in sl@0: the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF sl@0: are irregular UTF-8 byte sequences for single surrogate sl@0: code points. sl@0: The error code U_INVALID_CHAR_FOUND will be set. */ sl@0: UCNV_RESET = 3, /**< The callback is called with this reason when a sl@0: 'reset' has occured. Callback should reset all sl@0: state. */ sl@0: UCNV_CLOSE = 4, /**< Called when the converter is closed. The sl@0: callback should release any allocated memory.*/ sl@0: UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the sl@0: converter. the pointer available as the sl@0: 'context' is an alias to the original converters' sl@0: context pointer. If the context must be owned sl@0: by the new converter, the callback must clone sl@0: the data and call ucnv_setFromUCallback sl@0: (or setToUCallback) with the correct pointer. sl@0: @stable ICU 2.2 sl@0: */ sl@0: } UConverterCallbackReason; sl@0: sl@0: sl@0: /** sl@0: * The structure for the fromUnicode callback function parameter. sl@0: * @stable ICU 2.0 sl@0: */ sl@0: typedef struct { sl@0: uint16_t size; /**< The size of this struct. @stable ICU 2.0 */ sl@0: UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ sl@0: UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ sl@0: const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ sl@0: const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ sl@0: char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ sl@0: const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ sl@0: int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ sl@0: } UConverterFromUnicodeArgs; sl@0: sl@0: sl@0: /** sl@0: * The structure for the toUnicode callback function parameter. sl@0: * @stable ICU 2.0 sl@0: */ sl@0: typedef struct { sl@0: uint16_t size; /**< The size of this struct @stable ICU 2.0 */ sl@0: UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ sl@0: UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ sl@0: const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ sl@0: const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ sl@0: UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ sl@0: const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ sl@0: int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ sl@0: } UConverterToUnicodeArgs; sl@0: sl@0: sl@0: /** sl@0: * DO NOT CALL THIS FUNCTION DIRECTLY! sl@0: * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, sl@0: * returning the error code back to the caller immediately. sl@0: * sl@0: * @param context Pointer to the callback's private data sl@0: * @param fromUArgs Information about the conversion in progress sl@0: * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence sl@0: * @param length Size (in bytes) of the concerned codepage sequence sl@0: * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. sl@0: * @param reason Defines the reason the callback was invoked sl@0: * @param err This should always be set to a failure status prior to calling. sl@0: * @stable ICU 2.0 sl@0: */ sl@0: U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP ( sl@0: const void *context, sl@0: UConverterFromUnicodeArgs *fromUArgs, sl@0: const UChar* codeUnits, sl@0: int32_t length, sl@0: UChar32 codePoint, sl@0: UConverterCallbackReason reason, sl@0: UErrorCode * err); sl@0: sl@0: sl@0: sl@0: /** sl@0: * DO NOT CALL THIS FUNCTION DIRECTLY! sl@0: * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, sl@0: * returning the error code back to the caller immediately. sl@0: * sl@0: * @param context Pointer to the callback's private data sl@0: * @param toUArgs Information about the conversion in progress sl@0: * @param codeUnits Points to 'length' bytes of the concerned codepage sequence sl@0: * @param length Size (in bytes) of the concerned codepage sequence sl@0: * @param reason Defines the reason the callback was invoked sl@0: * @param err This should always be set to a failure status prior to calling. sl@0: * @stable ICU 2.0 sl@0: */ sl@0: U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP ( sl@0: const void *context, sl@0: UConverterToUnicodeArgs *toUArgs, sl@0: const char* codeUnits, sl@0: int32_t length, sl@0: UConverterCallbackReason reason, sl@0: UErrorCode * err); sl@0: sl@0: /** sl@0: * DO NOT CALL THIS FUNCTION DIRECTLY! sl@0: * This From Unicode callback skips any ILLEGAL_SEQUENCE, or sl@0: * skips only UNASSINGED_SEQUENCE depending on the context parameter sl@0: * simply ignoring those characters. sl@0: * sl@0: * @param context The function currently recognizes the callback options: sl@0: * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, sl@0: * returning the error code back to the caller immediately. sl@0: * NULL: Skips any ILLEGAL_SEQUENCE sl@0: * @param fromUArgs Information about the conversion in progress sl@0: * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence sl@0: * @param length Size (in bytes) of the concerned codepage sequence sl@0: * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. sl@0: * @param reason Defines the reason the callback was invoked sl@0: * @param err Return value will be set to success if the callback was handled, sl@0: * otherwise this value will be set to a failure status. sl@0: * @stable ICU 2.0 sl@0: */ sl@0: U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP ( sl@0: const void *context, sl@0: UConverterFromUnicodeArgs *fromUArgs, sl@0: const UChar* codeUnits, sl@0: int32_t length, sl@0: UChar32 codePoint, sl@0: UConverterCallbackReason reason, sl@0: UErrorCode * err); sl@0: sl@0: /** sl@0: * DO NOT CALL THIS FUNCTION DIRECTLY! sl@0: * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or sl@0: * UNASSIGNED_SEQUENCE depending on context parameter, with the sl@0: * current substitution string for the converter. This is the default sl@0: * callback. sl@0: * sl@0: * @param context The function currently recognizes the callback options: sl@0: * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, sl@0: * returning the error code back to the caller immediately. sl@0: * NULL: Substitutes any ILLEGAL_SEQUENCE sl@0: * @param fromUArgs Information about the conversion in progress sl@0: * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence sl@0: * @param length Size (in bytes) of the concerned codepage sequence sl@0: * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. sl@0: * @param reason Defines the reason the callback was invoked sl@0: * @param err Return value will be set to success if the callback was handled, sl@0: * otherwise this value will be set to a failure status. sl@0: * @see ucnv_setSubstChars sl@0: * @stable ICU 2.0 sl@0: */ sl@0: U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( sl@0: const void *context, sl@0: UConverterFromUnicodeArgs *fromUArgs, sl@0: const UChar* codeUnits, sl@0: int32_t length, sl@0: UChar32 codePoint, sl@0: UConverterCallbackReason reason, sl@0: UErrorCode * err); sl@0: sl@0: /** sl@0: * DO NOT CALL THIS FUNCTION DIRECTLY! sl@0: * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the sl@0: * hexadecimal representation of the illegal codepoints sl@0: * sl@0: * @param context The function currently recognizes the callback options: sl@0: *

UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal sl@0: * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). sl@0: * In the Event the converter doesn't support the characters {%,U}[A-F][0-9], sl@0: * it will substitute the illegal sequence with the substitution characters. sl@0: * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as sl@0: * %UD84D%UDC56
UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal sl@0: * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). sl@0: * In the Event the converter doesn't support the characters {\,u}[A-F][0-9], sl@0: * it will substitute the illegal sequence with the substitution characters. sl@0: * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as sl@0: * \\uD84D\\uDC56
UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal sl@0: * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). sl@0: * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], sl@0: * it will substitute the illegal sequence with the substitution characters. sl@0: * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as sl@0: * \\U00023456
UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal sl@0: * representation in the format \htmlonly&#DDDDDDDD;, e.g. "¬죾")\endhtmlonly. sl@0: * In the Event the converter doesn't support the characters {&,#}[0-9], sl@0: * it will substitute the illegal sequence with the substitution characters. sl@0: * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as sl@0: * 𣑖 and Zero padding is ignored.
UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal sl@0: * representation in the format \htmlonly&#xXXXX; e.g. "¬죾")\endhtmlonly. sl@0: * In the Event the converter doesn't support the characters {&,#,x}[0-9], sl@0: * it will substitute the illegal sequence with the substitution characters. sl@0: * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as sl@0: * \htmlonly𣑖\endhtmlonly

sl@0: * @param fromUArgs Information about the conversion in progress sl@0: * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence sl@0: * @param length Size (in bytes) of the concerned codepage sequence sl@0: * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. sl@0: * @param reason Defines the reason the callback was invoked sl@0: * @param err Return value will be set to success if the callback was handled, sl@0: * otherwise this value will be set to a failure status. sl@0: * @stable ICU 2.0 sl@0: */ sl@0: U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( sl@0: const void *context, sl@0: UConverterFromUnicodeArgs *fromUArgs, sl@0: const UChar* codeUnits, sl@0: int32_t length, sl@0: UChar32 codePoint, sl@0: UConverterCallbackReason reason, sl@0: UErrorCode * err); sl@0: sl@0: sl@0: /** sl@0: * DO NOT CALL THIS FUNCTION DIRECTLY! sl@0: * This To Unicode callback skips any ILLEGAL_SEQUENCE, or sl@0: * skips only UNASSINGED_SEQUENCE depending on the context parameter sl@0: * simply ignoring those characters. sl@0: * sl@0: * @param context The function currently recognizes the callback options: sl@0: * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, sl@0: * returning the error code back to the caller immediately. sl@0: * NULL: Skips any ILLEGAL_SEQUENCE sl@0: * @param toUArgs Information about the conversion in progress sl@0: * @param codeUnits Points to 'length' bytes of the concerned codepage sequence sl@0: * @param length Size (in bytes) of the concerned codepage sequence sl@0: * @param reason Defines the reason the callback was invoked sl@0: * @param err Return value will be set to success if the callback was handled, sl@0: * otherwise this value will be set to a failure status. sl@0: * @stable ICU 2.0 sl@0: */ sl@0: U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP ( sl@0: const void *context, sl@0: UConverterToUnicodeArgs *toUArgs, sl@0: const char* codeUnits, sl@0: int32_t length, sl@0: UConverterCallbackReason reason, sl@0: UErrorCode * err); sl@0: sl@0: /** sl@0: * DO NOT CALL THIS FUNCTION DIRECTLY! sl@0: * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or sl@0: * UNASSIGNED_SEQUENCE depending on context parameter, with the sl@0: * Unicode substitution character, U+FFFD. sl@0: * sl@0: * @param context The function currently recognizes the callback options: sl@0: * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, sl@0: * returning the error code back to the caller immediately. sl@0: * NULL: Substitutes any ILLEGAL_SEQUENCE sl@0: * @param toUArgs Information about the conversion in progress sl@0: * @param codeUnits Points to 'length' bytes of the concerned codepage sequence sl@0: * @param length Size (in bytes) of the concerned codepage sequence sl@0: * @param reason Defines the reason the callback was invoked sl@0: * @param err Return value will be set to success if the callback was handled, sl@0: * otherwise this value will be set to a failure status. sl@0: * @stable ICU 2.0 sl@0: */ sl@0: U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE ( sl@0: const void *context, sl@0: UConverterToUnicodeArgs *toUArgs, sl@0: const char* codeUnits, sl@0: int32_t length, sl@0: UConverterCallbackReason reason, sl@0: UErrorCode * err); sl@0: sl@0: /** sl@0: * DO NOT CALL THIS FUNCTION DIRECTLY! sl@0: * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the sl@0: * hexadecimal representation of the illegal bytes sl@0: * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03"). sl@0: * sl@0: * @param context This function currently recognizes the callback options: sl@0: * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC, sl@0: * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE. sl@0: * @param toUArgs Information about the conversion in progress sl@0: * @param codeUnits Points to 'length' bytes of the concerned codepage sequence sl@0: * @param length Size (in bytes) of the concerned codepage sequence sl@0: * @param reason Defines the reason the callback was invoked sl@0: * @param err Return value will be set to success if the callback was handled, sl@0: * otherwise this value will be set to a failure status. sl@0: * @stable ICU 2.0 sl@0: */ sl@0: sl@0: U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE ( sl@0: const void *context, sl@0: UConverterToUnicodeArgs *toUArgs, sl@0: const char* codeUnits, sl@0: int32_t length, sl@0: UConverterCallbackReason reason, sl@0: UErrorCode * err); sl@0: sl@0: #endif sl@0: sl@0: #endif sl@0: sl@0: /*UCNV_ERR_H*/