Symaptic: os/textandloc/fontservices/textshaperplugin/IcuSource/common/ucnv

sl@0	1	/*
sl@0	2	**********************************************************************
sl@0	3	* Copyright (C) 1999-2004, International Business Machines
sl@0	4	* Corporation and others. All Rights Reserved.
sl@0	5	**********************************************************************
sl@0	6	*
sl@0	7	* uconv_cnv.h:
sl@0	8	* defines all the low level conversion functions
sl@0	9	* T_UnicodeConverter_{to,from}Unicode_$ConversionType
sl@0	10	*
sl@0	11	* Modification History:
sl@0	12	*
sl@0	13	* Date Name Description
sl@0	14	* 05/09/00 helena Added implementation to handle fallback mappings.
sl@0	15	* 06/29/2000 helena Major rewrite of the callback APIs.
sl@0	16	*/
sl@0	17
sl@0	18	#ifndef UCNV_CNV_H
sl@0	19	#define UCNV_CNV_H
sl@0	20
sl@0	21	#include "unicode/utypes.h"
sl@0	22
sl@0	23	#if !UCONFIG_NO_CONVERSION
sl@0	24
sl@0	25	#include "unicode/ucnv.h"
sl@0	26	#include "unicode/ucnv_err.h"
sl@0	27	#include "unicode/uset.h"
sl@0	28	#include "uset_imp.h"
sl@0	29
sl@0	30	U_CDECL_BEGIN
sl@0	31
sl@0	32	/* this is used in fromUnicode DBCS tables as an "unassigned" marker */
sl@0	33	#define missingCharMarker 0xFFFF
sl@0	34
sl@0	35	/*
sl@0	36	* #define missingUCharMarker 0xfffe
sl@0	37	*
sl@0	38	* commented out because there are actually two values used in toUnicode tables:
sl@0	39	* U+fffe "unassigned"
sl@0	40	* U+ffff "illegal"
sl@0	41	*/
sl@0	42
sl@0	43	/** Forward declaration, see ucnv_bld.h */
sl@0	44	struct UConverterSharedData;
sl@0	45	typedef struct UConverterSharedData UConverterSharedData;
sl@0	46
sl@0	47	/* function types for UConverterImpl ---------------------------------------- */
sl@0	48
sl@0	49	/* struct with arguments for UConverterLoad and ucnv_load() */
sl@0	50	typedef struct {
sl@0	51	int32_t size; /* sizeof(UConverterLoadArgs) */
sl@0	52	int32_t nestedLoads; /* count nested ucnv_load() calls */
sl@0	53	int32_t reserved; /* reserved - for good alignment of the pointers */
sl@0	54	uint32_t options;
sl@0	55	const char pkg, name;
sl@0	56	} UConverterLoadArgs;
sl@0	57
sl@0	58	typedef void (UConverterLoad) (UConverterSharedData sharedData,
sl@0	59	UConverterLoadArgs *pArgs,
sl@0	60	const uint8_t raw, UErrorCode pErrorCode);
sl@0	61	typedef void (UConverterUnload) (UConverterSharedData sharedData);
sl@0	62
sl@0	63	typedef void (UConverterOpen) (UConverter cnv, const char name, const char locale,uint32_t options, UErrorCode *pErrorCode);
sl@0	64	typedef void (UConverterClose) (UConverter cnv);
sl@0	65
sl@0	66	typedef enum UConverterResetChoice {
sl@0	67	UCNV_RESET_BOTH,
sl@0	68	UCNV_RESET_TO_UNICODE,
sl@0	69	UCNV_RESET_FROM_UNICODE
sl@0	70	} UConverterResetChoice;
sl@0	71
sl@0	72	typedef void (UConverterReset) (UConverter cnv, UConverterResetChoice choice);
sl@0	73
sl@0	74	/*
sl@0	75	* Converter implementation function(s) for ucnv_toUnicode().
sl@0	76	* If the toUnicodeWithOffsets function pointer is NULL,
sl@0	77	* then the toUnicode function will be used and the offsets will be set to -1.
sl@0	78	*
sl@0	79	* Must maintain state across buffers. Use toUBytes[toULength] for partial input
sl@0	80	* sequences; it will be checked in ucnv.c at the end of the input stream
sl@0	81	* to detect truncated input.
sl@0	82	* Some converters may need additional detection and may then set U_TRUNCATED_CHAR_FOUND.
sl@0	83	*
sl@0	84	* The toUnicodeWithOffsets must write exactly as many offset values as target
sl@0	85	* units. Write offset values of -1 for when the source index corresponding to
sl@0	86	* the output unit is not known (e.g., the character started in an earlier buffer).
sl@0	87	* The pArgs->offsets pointer need not be moved forward.
sl@0	88	*
sl@0	89	* At function return, either one of the following conditions must be true:
sl@0	90	* - U_BUFFER_OVERFLOW_ERROR and the target is full: target==targetLimit
sl@0	91	* - another error code with toUBytes[toULength] set to the offending input
sl@0	92	* - no error, and the source is consumed: source==sourceLimit
sl@0	93	*
sl@0	94	* The ucnv.c code will handle the end of the input (reset)
sl@0	95	* (reset, and truncation detection) and callbacks.
sl@0	96	*/
sl@0	97	typedef void (UConverterToUnicode) (UConverterToUnicodeArgs , UErrorCode *);
sl@0	98
sl@0	99	/*
sl@0	100	* Same rules as for UConverterToUnicode.
sl@0	101	* A lead surrogate is kept in fromUChar32 across buffers, and if an error
sl@0	102	* occurs, then the offending input code point must be put into fromUChar32
sl@0	103	* as well.
sl@0	104	*/
sl@0	105	typedef void (UConverterFromUnicode) (UConverterFromUnicodeArgs , UErrorCode *);
sl@0	106
sl@0	107	/*
sl@0	108	* Converter implementation function for ucnv_getNextUChar().
sl@0	109	* If the function pointer is NULL, then the toUnicode function will be used.
sl@0	110	*
sl@0	111	* Will be called at a character boundary (toULength==0).
sl@0	112	* May return with
sl@0	113	* - U_INDEX_OUTOFBOUNDS_ERROR if there was no output for the input
sl@0	114	* (the return value will be ignored)
sl@0	115	* - U_TRUNCATED_CHAR_FOUND or another error code (never U_BUFFER_OVERFLOW_ERROR!)
sl@0	116	* with toUBytes[toULength] set to the offending input
sl@0	117	* (the return value will be ignored)
sl@0	118	* - return UCNV_GET_NEXT_UCHAR_USE_TO_U, without moving the source pointer,
sl@0	119	* to indicate that the ucnv.c code shall call the toUnicode function instead
sl@0	120	* - return a real code point result
sl@0	121	*
sl@0	122	* Unless UCNV_GET_NEXT_UCHAR_USE_TO_U is returned, the source bytes must be consumed.
sl@0	123	*
sl@0	124	* The ucnv.c code will handle the end of the input (reset)
sl@0	125	* (except for truncation detection!) and callbacks.
sl@0	126	*/
sl@0	127	typedef UChar32 (UConverterGetNextUChar) (UConverterToUnicodeArgs , UErrorCode *);
sl@0	128
sl@0	129	typedef void (UConverterGetStarters)(const UConverter converter,
sl@0	130	UBool starters[256],
sl@0	131	UErrorCode *pErrorCode);
sl@0	132
sl@0	133	/* If this function pointer is null or if the function returns null
sl@0	134	* the name field in static data struct should be returned by
sl@0	135	* ucnv_getName() API function
sl@0	136	*/
sl@0	137	typedef const char * (UConverterGetName) (const UConverter cnv);
sl@0	138
sl@0	139	/**
sl@0	140	* Write the codepage substitution character.
sl@0	141	* If this function is not set, then ucnv_cbFromUWriteSub() writes
sl@0	142	* the substitution character from UConverter.
sl@0	143	* For stateful converters, it is typically necessary to handle this
sl@0	144	* specificially for the converter in order to properly maintain the state.
sl@0	145	*/
sl@0	146	typedef void (UConverterWriteSub) (UConverterFromUnicodeArgs pArgs, int32_t offsetIndex, UErrorCode *pErrorCode);
sl@0	147
sl@0	148	/**
sl@0	149	* For converter-specific safeClone processing
sl@0	150	* If this function is not set, then ucnv_safeClone assumes that the converter has no private data that changes
sl@0	151	* after the converter is done opening.
sl@0	152	* If this function is set, then it is called just after a memcpy() of
sl@0	153	* converter data to the new, empty converter, and is expected to set up
sl@0	154	* the initial state of the converter. It is not expected to increment the
sl@0	155	* reference counts of the standard data types such as the shared data.
sl@0	156	*/
sl@0	157	typedef UConverter * (UConverterSafeClone) (const UConverter cnv,
sl@0	158	void *stackBuffer,
sl@0	159	int32_t *pBufferSize,
sl@0	160	UErrorCode *status);
sl@0	161
sl@0	162	/**
sl@0	163	* Fills the set of Unicode code points that can be converted by an ICU converter.
sl@0	164	* The API function ucnv_getUnicodeSet() clears the USet before calling
sl@0	165	* the converter's getUnicodeSet() implementation; the converter should only
sl@0	166	* add the appropriate code points to allow recursive use.
sl@0	167	* For example, the ISO-2022-JP converter will call each subconverter's
sl@0	168	* getUnicodeSet() implementation to consecutively add code points to
sl@0	169	* the same USet, which will result in a union of the sets of all subconverters.
sl@0	170	*
sl@0	171	* For more documentation, see ucnv_getUnicodeSet() in ucnv.h.
sl@0	172	*/
sl@0	173	typedef void (UConverterGetUnicodeSet) (const UConverter cnv,
sl@0	174	const USetAdder *sa,
sl@0	175	UConverterUnicodeSet which,
sl@0	176	UErrorCode *pErrorCode);
sl@0	177
sl@0	178	UBool CONVERSION_U_SUCCESS (UErrorCode err);
sl@0	179
sl@0	180	/**
sl@0	181	* UConverterImpl contains all the data and functions for a converter type.
sl@0	182	* Its function pointers work much like a C++ vtable.
sl@0	183	* Many converter types need to define only a subset of the functions;
sl@0	184	* when a function pointer is NULL, then a default action will be performed.
sl@0	185	*
sl@0	186	* Every converter type must implement toUnicode, fromUnicode, and getNextUChar,
sl@0	187	* otherwise the converter may crash.
sl@0	188	* Every converter type that has variable-length codepage sequences should
sl@0	189	* also implement toUnicodeWithOffsets and fromUnicodeWithOffsets for
sl@0	190	* correct offset handling.
sl@0	191	* All other functions may or may not be implemented - it depends only on
sl@0	192	* whether the converter type needs them.
sl@0	193	*
sl@0	194	* When open() fails, then close() will be called, if present.
sl@0	195	*/
sl@0	196	struct UConverterImpl {
sl@0	197	UConverterType type;
sl@0	198
sl@0	199	UConverterLoad load;
sl@0	200	UConverterUnload unload;
sl@0	201
sl@0	202	UConverterOpen open;
sl@0	203	UConverterClose close;
sl@0	204	UConverterReset reset;
sl@0	205
sl@0	206	UConverterToUnicode toUnicode;
sl@0	207	UConverterToUnicode toUnicodeWithOffsets;
sl@0	208	UConverterFromUnicode fromUnicode;
sl@0	209	UConverterFromUnicode fromUnicodeWithOffsets;
sl@0	210	UConverterGetNextUChar getNextUChar;
sl@0	211
sl@0	212	UConverterGetStarters getStarters;
sl@0	213	UConverterGetName getName;
sl@0	214	UConverterWriteSub writeSub;
sl@0	215	UConverterSafeClone safeClone;
sl@0	216	UConverterGetUnicodeSet getUnicodeSet;
sl@0	217	};
sl@0	218
sl@0	219	extern const UConverterSharedData
sl@0	220	_MBCSData, _Latin1Data,
sl@0	221	_UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,
sl@0	222	_ISO2022Data,
sl@0	223	_LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
sl@0	224	_LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19,
sl@0	225	_HZData,_ISCIIData, _SCSUData, _ASCIIData,
sl@0	226	_UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData;
sl@0	227
sl@0	228	U_CDECL_END
sl@0	229
sl@0	230	/** Always use fallbacks from codepage to Unicode */
sl@0	231	#define TO_U_USE_FALLBACK(useFallback) TRUE
sl@0	232	#define UCNV_TO_U_USE_FALLBACK(cnv) TRUE
sl@0	233
sl@0	234	/** Use fallbacks from Unicode to codepage when cnv->useFallback or for private-use code points */
sl@0	235	#define IS_PRIVATE_USE(c) ((uint32_t)((c)-0xe000)<0x1900 \|\| (uint32_t)((c)-0xf0000)<0x20000)
sl@0	236	#define FROM_U_USE_FALLBACK(useFallback, c) ((useFallback) \|\| IS_PRIVATE_USE(c))
sl@0	237	#define UCNV_FROM_U_USE_FALLBACK(cnv, c) FROM_U_USE_FALLBACK((cnv)->useFallback, c)
sl@0	238
sl@0	239	/**
sl@0	240	* Magic number for ucnv_getNextUChar(), returned by a
sl@0	241	* getNextUChar() implementation to indicate to use the converter's toUnicode()
sl@0	242	* instead of the native function.
sl@0	243	* @internal
sl@0	244	*/
sl@0	245	#define UCNV_GET_NEXT_UCHAR_USE_TO_U -9
sl@0	246
sl@0	247	U_CFUNC void
sl@0	248	ucnv_getCompleteUnicodeSet(const UConverter *cnv,
sl@0	249	const USetAdder *sa,
sl@0	250	UConverterUnicodeSet which,
sl@0	251	UErrorCode *pErrorCode);
sl@0	252
sl@0	253	U_CFUNC void
sl@0	254	ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
sl@0	255	const USetAdder *sa,
sl@0	256	UConverterUnicodeSet which,
sl@0	257	UErrorCode *pErrorCode);
sl@0	258
sl@0	259	U_CFUNC void
sl@0	260	ucnv_fromUWriteBytes(UConverter *cnv,
sl@0	261	const char *bytes, int32_t length,
sl@0	262	char *target, const char targetLimit,
sl@0	263	int32_t **offsets,
sl@0	264	int32_t sourceIndex,
sl@0	265	UErrorCode *pErrorCode);
sl@0	266	U_CFUNC void
sl@0	267	ucnv_toUWriteUChars(UConverter *cnv,
sl@0	268	const UChar *uchars, int32_t length,
sl@0	269	UChar *target, const UChar targetLimit,
sl@0	270	int32_t **offsets,
sl@0	271	int32_t sourceIndex,
sl@0	272	UErrorCode *pErrorCode);
sl@0	273
sl@0	274	U_CFUNC void
sl@0	275	ucnv_toUWriteCodePoint(UConverter *cnv,
sl@0	276	UChar32 c,
sl@0	277	UChar *target, const UChar targetLimit,
sl@0	278	int32_t **offsets,
sl@0	279	int32_t sourceIndex,
sl@0	280	UErrorCode *pErrorCode);
sl@0	281
sl@0	282	#endif
sl@0	283
sl@0	284	#endif /* UCNV_CNV */

author	sl
	Tue, 10 Jun 2014 14:32:02 +0200
changeset 1	260cb5ec6c19
permissions	-rw-r--r--