epoc32/include/stdapis/glib-2.0/glib/gunicode.h
author William Roberts <williamr@symbian.org>
Wed, 31 Mar 2010 12:33:34 +0100
branchSymbian3
changeset 4 837f303aceeb
parent 0 061f57f2323e
permissions -rw-r--r--
Current Symbian^3 public API header files (from PDK 3.0.h)
This is the epoc32/include tree with the "platform" subtrees removed, and
all but a selected few mbg and rsg files removed.
williamr@2
     1
/* gunicode.h - Unicode manipulation functions
williamr@2
     2
 *
williamr@2
     3
 *  Copyright (C) 1999, 2000 Tom Tromey
williamr@2
     4
 *  Copyright 2000, 2005 Red Hat, Inc.
williamr@2
     5
 * Portions copyright (c) 2006 Nokia Corporation.  All rights reserved.
williamr@2
     6
 *
williamr@2
     7
 * The Gnome Library is free software; you can redistribute it and/or
williamr@2
     8
 * modify it under the terms of the GNU Lesser General Public License as
williamr@2
     9
 * published by the Free Software Foundation; either version 2 of the
williamr@2
    10
 * License, or (at your option) any later version.
williamr@2
    11
 *
williamr@2
    12
 * The Gnome Library is distributed in the hope that it will be useful,
williamr@2
    13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
williamr@2
    14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
williamr@2
    15
 * Lesser General Public License for more details.
williamr@2
    16
 *
williamr@2
    17
 * You should have received a copy of the GNU Lesser General Public
williamr@2
    18
 * License along with the Gnome Library; see the file COPYING.LIB.  If not,
williamr@2
    19
 * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
williamr@2
    20
 *   Boston, MA 02111-1307, USA.
williamr@2
    21
 */
williamr@2
    22
williamr@2
    23
#ifndef __G_UNICODE_H__
williamr@2
    24
#define __G_UNICODE_H__
williamr@2
    25
williamr@2
    26
#include <_ansi.h>
williamr@2
    27
#include <glib/gerror.h>
williamr@2
    28
#include <glib/gtypes.h>
williamr@2
    29
williamr@2
    30
G_BEGIN_DECLS
williamr@2
    31
williamr@2
    32
typedef guint32 gunichar;
williamr@2
    33
typedef guint16 gunichar2;
williamr@2
    34
williamr@2
    35
/* These are the possible character classifications.
williamr@2
    36
 * See http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
williamr@2
    37
 */
williamr@2
    38
typedef enum
williamr@2
    39
{
williamr@2
    40
  G_UNICODE_CONTROL,
williamr@2
    41
  G_UNICODE_FORMAT,
williamr@2
    42
  G_UNICODE_UNASSIGNED,
williamr@2
    43
  G_UNICODE_PRIVATE_USE,
williamr@2
    44
  G_UNICODE_SURROGATE,
williamr@2
    45
  G_UNICODE_LOWERCASE_LETTER,
williamr@2
    46
  G_UNICODE_MODIFIER_LETTER,
williamr@2
    47
  G_UNICODE_OTHER_LETTER,
williamr@2
    48
  G_UNICODE_TITLECASE_LETTER,
williamr@2
    49
  G_UNICODE_UPPERCASE_LETTER,
williamr@2
    50
  G_UNICODE_COMBINING_MARK,
williamr@2
    51
  G_UNICODE_ENCLOSING_MARK,
williamr@2
    52
  G_UNICODE_NON_SPACING_MARK,
williamr@2
    53
  G_UNICODE_DECIMAL_NUMBER,
williamr@2
    54
  G_UNICODE_LETTER_NUMBER,
williamr@2
    55
  G_UNICODE_OTHER_NUMBER,
williamr@2
    56
  G_UNICODE_CONNECT_PUNCTUATION,
williamr@2
    57
  G_UNICODE_DASH_PUNCTUATION,
williamr@2
    58
  G_UNICODE_CLOSE_PUNCTUATION,
williamr@2
    59
  G_UNICODE_FINAL_PUNCTUATION,
williamr@2
    60
  G_UNICODE_INITIAL_PUNCTUATION,
williamr@2
    61
  G_UNICODE_OTHER_PUNCTUATION,
williamr@2
    62
  G_UNICODE_OPEN_PUNCTUATION,
williamr@2
    63
  G_UNICODE_CURRENCY_SYMBOL,
williamr@2
    64
  G_UNICODE_MODIFIER_SYMBOL,
williamr@2
    65
  G_UNICODE_MATH_SYMBOL,
williamr@2
    66
  G_UNICODE_OTHER_SYMBOL,
williamr@2
    67
  G_UNICODE_LINE_SEPARATOR,
williamr@2
    68
  G_UNICODE_PARAGRAPH_SEPARATOR,
williamr@2
    69
  G_UNICODE_SPACE_SEPARATOR
williamr@2
    70
} GUnicodeType;
williamr@2
    71
williamr@2
    72
/* These are the possible line break classifications.
williamr@2
    73
 * Note that new types may be added in the future.
williamr@2
    74
 * Implementations may regard unknown values like G_UNICODE_BREAK_UNKNOWN
williamr@2
    75
 * See http://www.unicode.org/unicode/reports/tr14/
williamr@2
    76
 */
williamr@2
    77
typedef enum
williamr@2
    78
{
williamr@2
    79
  G_UNICODE_BREAK_MANDATORY,
williamr@2
    80
  G_UNICODE_BREAK_CARRIAGE_RETURN,
williamr@2
    81
  G_UNICODE_BREAK_LINE_FEED,
williamr@2
    82
  G_UNICODE_BREAK_COMBINING_MARK,
williamr@2
    83
  G_UNICODE_BREAK_SURROGATE,
williamr@2
    84
  G_UNICODE_BREAK_ZERO_WIDTH_SPACE,
williamr@2
    85
  G_UNICODE_BREAK_INSEPARABLE,
williamr@2
    86
  G_UNICODE_BREAK_NON_BREAKING_GLUE,
williamr@2
    87
  G_UNICODE_BREAK_CONTINGENT,
williamr@2
    88
  G_UNICODE_BREAK_SPACE,
williamr@2
    89
  G_UNICODE_BREAK_AFTER,
williamr@2
    90
  G_UNICODE_BREAK_BEFORE,
williamr@2
    91
  G_UNICODE_BREAK_BEFORE_AND_AFTER,
williamr@2
    92
  G_UNICODE_BREAK_HYPHEN,
williamr@2
    93
  G_UNICODE_BREAK_NON_STARTER,
williamr@2
    94
  G_UNICODE_BREAK_OPEN_PUNCTUATION,
williamr@2
    95
  G_UNICODE_BREAK_CLOSE_PUNCTUATION,
williamr@2
    96
  G_UNICODE_BREAK_QUOTATION,
williamr@2
    97
  G_UNICODE_BREAK_EXCLAMATION,
williamr@2
    98
  G_UNICODE_BREAK_IDEOGRAPHIC,
williamr@2
    99
  G_UNICODE_BREAK_NUMERIC,
williamr@2
   100
  G_UNICODE_BREAK_INFIX_SEPARATOR,
williamr@2
   101
  G_UNICODE_BREAK_SYMBOL,
williamr@2
   102
  G_UNICODE_BREAK_ALPHABETIC,
williamr@2
   103
  G_UNICODE_BREAK_PREFIX,
williamr@2
   104
  G_UNICODE_BREAK_POSTFIX,
williamr@2
   105
  G_UNICODE_BREAK_COMPLEX_CONTEXT,
williamr@2
   106
  G_UNICODE_BREAK_AMBIGUOUS,
williamr@2
   107
  G_UNICODE_BREAK_UNKNOWN,
williamr@2
   108
  G_UNICODE_BREAK_NEXT_LINE,
williamr@2
   109
  G_UNICODE_BREAK_WORD_JOINER,
williamr@2
   110
  G_UNICODE_BREAK_HANGUL_L_JAMO,
williamr@2
   111
  G_UNICODE_BREAK_HANGUL_V_JAMO,
williamr@2
   112
  G_UNICODE_BREAK_HANGUL_T_JAMO,
williamr@2
   113
  G_UNICODE_BREAK_HANGUL_LV_SYLLABLE,
williamr@2
   114
  G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE
williamr@2
   115
} GUnicodeBreakType;
williamr@2
   116
williamr@2
   117
/* Returns TRUE if current locale uses UTF-8 charset.  If CHARSET is
williamr@2
   118
 * not null, sets *CHARSET to the name of the current locale's
williamr@2
   119
 * charset.  This value is statically allocated, and should be copied
williamr@2
   120
 * in case the locale's charset will be changed later using setlocale()
williamr@2
   121
 * or in some other way.
williamr@2
   122
 */
williamr@2
   123
IMPORT_C gboolean g_get_charset (G_CONST_RETURN char **charset);
williamr@2
   124
williamr@2
   125
/* These are all analogs of the <ctype.h> functions.
williamr@2
   126
 */
williamr@2
   127
IMPORT_C gboolean g_unichar_isalnum   (gunichar c) G_GNUC_CONST;
williamr@2
   128
IMPORT_C gboolean g_unichar_isalpha   (gunichar c) G_GNUC_CONST;
williamr@2
   129
IMPORT_C gboolean g_unichar_iscntrl   (gunichar c) G_GNUC_CONST;
williamr@2
   130
IMPORT_C gboolean g_unichar_isdigit   (gunichar c) G_GNUC_CONST;
williamr@2
   131
IMPORT_C gboolean g_unichar_isgraph   (gunichar c) G_GNUC_CONST;
williamr@2
   132
IMPORT_C gboolean g_unichar_islower   (gunichar c) G_GNUC_CONST;
williamr@2
   133
IMPORT_C gboolean g_unichar_isprint   (gunichar c) G_GNUC_CONST;
williamr@2
   134
IMPORT_C gboolean g_unichar_ispunct   (gunichar c) G_GNUC_CONST;
williamr@2
   135
IMPORT_C gboolean g_unichar_isspace   (gunichar c) G_GNUC_CONST;
williamr@2
   136
IMPORT_C gboolean g_unichar_isupper   (gunichar c) G_GNUC_CONST;
williamr@2
   137
IMPORT_C gboolean g_unichar_isxdigit  (gunichar c) G_GNUC_CONST;
williamr@2
   138
IMPORT_C gboolean g_unichar_istitle   (gunichar c) G_GNUC_CONST;
williamr@2
   139
IMPORT_C gboolean g_unichar_isdefined (gunichar c) G_GNUC_CONST;
williamr@2
   140
IMPORT_C gboolean g_unichar_iswide    (gunichar c) G_GNUC_CONST;
williamr@2
   141
williamr@2
   142
/* More <ctype.h> functions.  These convert between the three cases.
williamr@2
   143
 * See the Unicode book to understand title case.  */
williamr@2
   144
IMPORT_C gunichar g_unichar_toupper (gunichar c) G_GNUC_CONST;
williamr@2
   145
IMPORT_C gunichar g_unichar_tolower (gunichar c) G_GNUC_CONST;
williamr@2
   146
IMPORT_C gunichar g_unichar_totitle (gunichar c) G_GNUC_CONST;
williamr@2
   147
williamr@2
   148
/* If C is a digit (according to `g_unichar_isdigit'), then return its
williamr@2
   149
   numeric value.  Otherwise return -1.  */
williamr@2
   150
IMPORT_C gint g_unichar_digit_value (gunichar c) G_GNUC_CONST;
williamr@2
   151
williamr@2
   152
IMPORT_C gint g_unichar_xdigit_value (gunichar c) G_GNUC_CONST;
williamr@2
   153
williamr@2
   154
/* Return the Unicode character type of a given character.  */
williamr@2
   155
IMPORT_C GUnicodeType g_unichar_type (gunichar c) G_GNUC_CONST;
williamr@2
   156
williamr@2
   157
/* Return the line break property for a given character */
williamr@2
   158
IMPORT_C GUnicodeBreakType g_unichar_break_type (gunichar c) G_GNUC_CONST;
williamr@2
   159
williamr@2
   160
williamr@2
   161
/* Compute canonical ordering of a string in-place.  This rearranges
williamr@2
   162
   decomposed characters in the string according to their combining
williamr@2
   163
   classes.  See the Unicode manual for more information.  */
williamr@2
   164
IMPORT_C void g_unicode_canonical_ordering (gunichar *string,
williamr@2
   165
				   gsize     len);
williamr@2
   166
williamr@2
   167
/* Compute canonical decomposition of a character.  Returns g_malloc()d
williamr@2
   168
   string of Unicode characters.  RESULT_LEN is set to the resulting
williamr@2
   169
   length of the string.  */
williamr@2
   170
IMPORT_C gunichar *g_unicode_canonical_decomposition (gunichar  ch,
williamr@2
   171
					     gsize    *result_len) G_GNUC_MALLOC;
williamr@2
   172
williamr@2
   173
/* Array of skip-bytes-per-initial character.
williamr@2
   174
 */
williamr@2
   175
#ifdef __SYMBIAN32__
williamr@2
   176
IMPORT_C const gchar * const * _g_utf8_skip();
williamr@2
   177
#endif /* __SYMBIAN32__ */
williamr@2
   178
GLIB_VAR const gchar * const g_utf8_skip;
williamr@2
   179
williamr@2
   180
#define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)])
williamr@2
   181
williamr@2
   182
IMPORT_C gunichar g_utf8_get_char           (const gchar  *p);
williamr@2
   183
IMPORT_C gunichar g_utf8_get_char_validated (const  gchar *p,
williamr@2
   184
				    gssize        max_len);
williamr@2
   185
williamr@2
   186
IMPORT_C gchar*   g_utf8_offset_to_pointer (const gchar *str,
williamr@2
   187
                                   glong        offset);  
williamr@2
   188
IMPORT_C glong    g_utf8_pointer_to_offset (const gchar *str,      
williamr@2
   189
				   const gchar *pos);
williamr@2
   190
IMPORT_C gchar*   g_utf8_prev_char         (const gchar *p);
williamr@2
   191
IMPORT_C gchar*   g_utf8_find_next_char    (const gchar *p,
williamr@2
   192
				   const gchar *end);
williamr@2
   193
IMPORT_C gchar*   g_utf8_find_prev_char    (const gchar *str,
williamr@2
   194
				   const gchar *p);
williamr@2
   195
williamr@2
   196
IMPORT_C glong g_utf8_strlen (const gchar *p,  
williamr@2
   197
		     gssize       max);        
williamr@2
   198
williamr@2
   199
/* Copies n characters from src to dest */
williamr@2
   200
IMPORT_C gchar* g_utf8_strncpy (gchar       *dest,
williamr@2
   201
		       const gchar *src,
williamr@2
   202
		       gsize        n);
williamr@2
   203
williamr@2
   204
/* Find the UTF-8 character corresponding to ch, in string p. These
williamr@2
   205
   functions are equivalants to strchr and strrchr */
williamr@2
   206
IMPORT_C gchar* g_utf8_strchr  (const gchar *p,
williamr@2
   207
		       gssize       len,
williamr@2
   208
		       gunichar     c);
williamr@2
   209
IMPORT_C gchar* g_utf8_strrchr (const gchar *p,
williamr@2
   210
		       gssize       len,
williamr@2
   211
		       gunichar     c);
williamr@2
   212
IMPORT_C gchar* g_utf8_strreverse (const gchar *str,
williamr@2
   213
			  gssize len);
williamr@2
   214
williamr@2
   215
IMPORT_C gunichar2 *g_utf8_to_utf16     (const gchar      *str,
williamr@2
   216
				glong             len,            
williamr@2
   217
				glong            *items_read,     
williamr@2
   218
				glong            *items_written,  
williamr@2
   219
				GError          **error) G_GNUC_MALLOC;
williamr@2
   220
IMPORT_C gunichar * g_utf8_to_ucs4      (const gchar      *str,
williamr@2
   221
				glong             len,            
williamr@2
   222
				glong            *items_read,     
williamr@2
   223
				glong            *items_written,  
williamr@2
   224
				GError          **error) G_GNUC_MALLOC;
williamr@2
   225
IMPORT_C gunichar * g_utf8_to_ucs4_fast (const gchar      *str,
williamr@2
   226
				glong             len,            
williamr@2
   227
				glong            *items_written) G_GNUC_MALLOC; 
williamr@2
   228
IMPORT_C gunichar * g_utf16_to_ucs4     (const gunichar2  *str,
williamr@2
   229
				glong             len,            
williamr@2
   230
				glong            *items_read,     
williamr@2
   231
				glong            *items_written,  
williamr@2
   232
				GError          **error) G_GNUC_MALLOC;
williamr@2
   233
IMPORT_C gchar*     g_utf16_to_utf8     (const gunichar2  *str,
williamr@2
   234
				glong             len,            
williamr@2
   235
				glong            *items_read,     
williamr@2
   236
				glong            *items_written,  
williamr@2
   237
				GError          **error) G_GNUC_MALLOC;
williamr@2
   238
IMPORT_C gunichar2 *g_ucs4_to_utf16     (const gunichar   *str,
williamr@2
   239
				glong             len,            
williamr@2
   240
				glong            *items_read,     
williamr@2
   241
				glong            *items_written,  
williamr@2
   242
				GError          **error) G_GNUC_MALLOC;
williamr@2
   243
IMPORT_C gchar*     g_ucs4_to_utf8      (const gunichar   *str,
williamr@2
   244
				glong             len,            
williamr@2
   245
				glong            *items_read,     
williamr@2
   246
				glong            *items_written,  
williamr@2
   247
				GError          **error) G_GNUC_MALLOC;
williamr@2
   248
williamr@2
   249
/* Convert a single character into UTF-8. outbuf must have at
williamr@2
   250
 * least 6 bytes of space. Returns the number of bytes in the
williamr@2
   251
 * result.
williamr@2
   252
 */
williamr@2
   253
IMPORT_C gint      g_unichar_to_utf8 (gunichar    c,
williamr@2
   254
			     gchar      *outbuf);
williamr@2
   255
williamr@2
   256
/* Validate a UTF8 string, return TRUE if valid, put pointer to
williamr@2
   257
 * first invalid char in **end
williamr@2
   258
 */
williamr@2
   259
williamr@2
   260
IMPORT_C gboolean g_utf8_validate (const gchar  *str,
williamr@2
   261
                          gssize        max_len,  
williamr@2
   262
                          const gchar **end);
williamr@2
   263
williamr@2
   264
/* Validate a Unicode character */
williamr@2
   265
IMPORT_C gboolean g_unichar_validate (gunichar ch);
williamr@2
   266
williamr@2
   267
IMPORT_C gchar *g_utf8_strup   (const gchar *str,
williamr@2
   268
		       gssize       len) G_GNUC_MALLOC;
williamr@2
   269
IMPORT_C gchar *g_utf8_strdown (const gchar *str,
williamr@2
   270
		       gssize       len) G_GNUC_MALLOC;
williamr@2
   271
IMPORT_C gchar *g_utf8_casefold (const gchar *str,
williamr@2
   272
			gssize       len) G_GNUC_MALLOC;
williamr@2
   273
williamr@2
   274
typedef enum {
williamr@2
   275
  G_NORMALIZE_DEFAULT,
williamr@2
   276
  G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT,
williamr@2
   277
  G_NORMALIZE_DEFAULT_COMPOSE,
williamr@2
   278
  G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE,
williamr@2
   279
  G_NORMALIZE_ALL,
williamr@2
   280
  G_NORMALIZE_NFKD = G_NORMALIZE_ALL,
williamr@2
   281
  G_NORMALIZE_ALL_COMPOSE,
williamr@2
   282
  G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
williamr@2
   283
} GNormalizeMode;
williamr@2
   284
williamr@2
   285
IMPORT_C gchar *g_utf8_normalize (const gchar   *str,
williamr@2
   286
			 gssize         len,
williamr@2
   287
			 GNormalizeMode mode) G_GNUC_MALLOC;
williamr@2
   288
williamr@2
   289
IMPORT_C gint   g_utf8_collate     (const gchar *str1,
williamr@2
   290
			   const gchar *str2);
williamr@2
   291
IMPORT_C gchar *g_utf8_collate_key (const gchar *str,
williamr@2
   292
			   gssize       len) G_GNUC_MALLOC;
williamr@2
   293
IMPORT_C gchar *g_utf8_collate_key_for_filename (const gchar *str,
williamr@2
   294
			                gssize       len) G_GNUC_MALLOC;
williamr@2
   295
williamr@2
   296
IMPORT_C gboolean g_unichar_get_mirror_char (gunichar ch,
williamr@2
   297
                                    gunichar *mirrored_ch);
williamr@2
   298
williamr@2
   299
G_END_DECLS
williamr@2
   300
williamr@2
   301
#endif /* __G_UNICODE_H__ */