1.1 --- a/epoc32/include/stdapis/glib-2.0/glib/gunicode.h Tue Nov 24 13:55:44 2009 +0000
1.2 +++ b/epoc32/include/stdapis/glib-2.0/glib/gunicode.h Tue Mar 16 16:12:26 2010 +0000
1.3 @@ -1,1 +1,301 @@
1.4 -gunicode.h
1.5 +/* gunicode.h - Unicode manipulation functions
1.6 + *
1.7 + * Copyright (C) 1999, 2000 Tom Tromey
1.8 + * Copyright 2000, 2005 Red Hat, Inc.
1.9 + * Portions copyright (c) 2006 Nokia Corporation. All rights reserved.
1.10 + *
1.11 + * The Gnome Library is free software; you can redistribute it and/or
1.12 + * modify it under the terms of the GNU Lesser General Public License as
1.13 + * published by the Free Software Foundation; either version 2 of the
1.14 + * License, or (at your option) any later version.
1.15 + *
1.16 + * The Gnome Library is distributed in the hope that it will be useful,
1.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1.19 + * Lesser General Public License for more details.
1.20 + *
1.21 + * You should have received a copy of the GNU Lesser General Public
1.22 + * License along with the Gnome Library; see the file COPYING.LIB. If not,
1.23 + * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
1.24 + * Boston, MA 02111-1307, USA.
1.25 + */
1.26 +
1.27 +#ifndef __G_UNICODE_H__
1.28 +#define __G_UNICODE_H__
1.29 +
1.30 +#include <_ansi.h>
1.31 +#include <glib/gerror.h>
1.32 +#include <glib/gtypes.h>
1.33 +
1.34 +G_BEGIN_DECLS
1.35 +
1.36 +typedef guint32 gunichar;
1.37 +typedef guint16 gunichar2;
1.38 +
1.39 +/* These are the possible character classifications.
1.40 + * See http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
1.41 + */
1.42 +typedef enum
1.43 +{
1.44 + G_UNICODE_CONTROL,
1.45 + G_UNICODE_FORMAT,
1.46 + G_UNICODE_UNASSIGNED,
1.47 + G_UNICODE_PRIVATE_USE,
1.48 + G_UNICODE_SURROGATE,
1.49 + G_UNICODE_LOWERCASE_LETTER,
1.50 + G_UNICODE_MODIFIER_LETTER,
1.51 + G_UNICODE_OTHER_LETTER,
1.52 + G_UNICODE_TITLECASE_LETTER,
1.53 + G_UNICODE_UPPERCASE_LETTER,
1.54 + G_UNICODE_COMBINING_MARK,
1.55 + G_UNICODE_ENCLOSING_MARK,
1.56 + G_UNICODE_NON_SPACING_MARK,
1.57 + G_UNICODE_DECIMAL_NUMBER,
1.58 + G_UNICODE_LETTER_NUMBER,
1.59 + G_UNICODE_OTHER_NUMBER,
1.60 + G_UNICODE_CONNECT_PUNCTUATION,
1.61 + G_UNICODE_DASH_PUNCTUATION,
1.62 + G_UNICODE_CLOSE_PUNCTUATION,
1.63 + G_UNICODE_FINAL_PUNCTUATION,
1.64 + G_UNICODE_INITIAL_PUNCTUATION,
1.65 + G_UNICODE_OTHER_PUNCTUATION,
1.66 + G_UNICODE_OPEN_PUNCTUATION,
1.67 + G_UNICODE_CURRENCY_SYMBOL,
1.68 + G_UNICODE_MODIFIER_SYMBOL,
1.69 + G_UNICODE_MATH_SYMBOL,
1.70 + G_UNICODE_OTHER_SYMBOL,
1.71 + G_UNICODE_LINE_SEPARATOR,
1.72 + G_UNICODE_PARAGRAPH_SEPARATOR,
1.73 + G_UNICODE_SPACE_SEPARATOR
1.74 +} GUnicodeType;
1.75 +
1.76 +/* These are the possible line break classifications.
1.77 + * Note that new types may be added in the future.
1.78 + * Implementations may regard unknown values like G_UNICODE_BREAK_UNKNOWN
1.79 + * See http://www.unicode.org/unicode/reports/tr14/
1.80 + */
1.81 +typedef enum
1.82 +{
1.83 + G_UNICODE_BREAK_MANDATORY,
1.84 + G_UNICODE_BREAK_CARRIAGE_RETURN,
1.85 + G_UNICODE_BREAK_LINE_FEED,
1.86 + G_UNICODE_BREAK_COMBINING_MARK,
1.87 + G_UNICODE_BREAK_SURROGATE,
1.88 + G_UNICODE_BREAK_ZERO_WIDTH_SPACE,
1.89 + G_UNICODE_BREAK_INSEPARABLE,
1.90 + G_UNICODE_BREAK_NON_BREAKING_GLUE,
1.91 + G_UNICODE_BREAK_CONTINGENT,
1.92 + G_UNICODE_BREAK_SPACE,
1.93 + G_UNICODE_BREAK_AFTER,
1.94 + G_UNICODE_BREAK_BEFORE,
1.95 + G_UNICODE_BREAK_BEFORE_AND_AFTER,
1.96 + G_UNICODE_BREAK_HYPHEN,
1.97 + G_UNICODE_BREAK_NON_STARTER,
1.98 + G_UNICODE_BREAK_OPEN_PUNCTUATION,
1.99 + G_UNICODE_BREAK_CLOSE_PUNCTUATION,
1.100 + G_UNICODE_BREAK_QUOTATION,
1.101 + G_UNICODE_BREAK_EXCLAMATION,
1.102 + G_UNICODE_BREAK_IDEOGRAPHIC,
1.103 + G_UNICODE_BREAK_NUMERIC,
1.104 + G_UNICODE_BREAK_INFIX_SEPARATOR,
1.105 + G_UNICODE_BREAK_SYMBOL,
1.106 + G_UNICODE_BREAK_ALPHABETIC,
1.107 + G_UNICODE_BREAK_PREFIX,
1.108 + G_UNICODE_BREAK_POSTFIX,
1.109 + G_UNICODE_BREAK_COMPLEX_CONTEXT,
1.110 + G_UNICODE_BREAK_AMBIGUOUS,
1.111 + G_UNICODE_BREAK_UNKNOWN,
1.112 + G_UNICODE_BREAK_NEXT_LINE,
1.113 + G_UNICODE_BREAK_WORD_JOINER,
1.114 + G_UNICODE_BREAK_HANGUL_L_JAMO,
1.115 + G_UNICODE_BREAK_HANGUL_V_JAMO,
1.116 + G_UNICODE_BREAK_HANGUL_T_JAMO,
1.117 + G_UNICODE_BREAK_HANGUL_LV_SYLLABLE,
1.118 + G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE
1.119 +} GUnicodeBreakType;
1.120 +
1.121 +/* Returns TRUE if current locale uses UTF-8 charset. If CHARSET is
1.122 + * not null, sets *CHARSET to the name of the current locale's
1.123 + * charset. This value is statically allocated, and should be copied
1.124 + * in case the locale's charset will be changed later using setlocale()
1.125 + * or in some other way.
1.126 + */
1.127 +IMPORT_C gboolean g_get_charset (G_CONST_RETURN char **charset);
1.128 +
1.129 +/* These are all analogs of the <ctype.h> functions.
1.130 + */
1.131 +IMPORT_C gboolean g_unichar_isalnum (gunichar c) G_GNUC_CONST;
1.132 +IMPORT_C gboolean g_unichar_isalpha (gunichar c) G_GNUC_CONST;
1.133 +IMPORT_C gboolean g_unichar_iscntrl (gunichar c) G_GNUC_CONST;
1.134 +IMPORT_C gboolean g_unichar_isdigit (gunichar c) G_GNUC_CONST;
1.135 +IMPORT_C gboolean g_unichar_isgraph (gunichar c) G_GNUC_CONST;
1.136 +IMPORT_C gboolean g_unichar_islower (gunichar c) G_GNUC_CONST;
1.137 +IMPORT_C gboolean g_unichar_isprint (gunichar c) G_GNUC_CONST;
1.138 +IMPORT_C gboolean g_unichar_ispunct (gunichar c) G_GNUC_CONST;
1.139 +IMPORT_C gboolean g_unichar_isspace (gunichar c) G_GNUC_CONST;
1.140 +IMPORT_C gboolean g_unichar_isupper (gunichar c) G_GNUC_CONST;
1.141 +IMPORT_C gboolean g_unichar_isxdigit (gunichar c) G_GNUC_CONST;
1.142 +IMPORT_C gboolean g_unichar_istitle (gunichar c) G_GNUC_CONST;
1.143 +IMPORT_C gboolean g_unichar_isdefined (gunichar c) G_GNUC_CONST;
1.144 +IMPORT_C gboolean g_unichar_iswide (gunichar c) G_GNUC_CONST;
1.145 +
1.146 +/* More <ctype.h> functions. These convert between the three cases.
1.147 + * See the Unicode book to understand title case. */
1.148 +IMPORT_C gunichar g_unichar_toupper (gunichar c) G_GNUC_CONST;
1.149 +IMPORT_C gunichar g_unichar_tolower (gunichar c) G_GNUC_CONST;
1.150 +IMPORT_C gunichar g_unichar_totitle (gunichar c) G_GNUC_CONST;
1.151 +
1.152 +/* If C is a digit (according to `g_unichar_isdigit'), then return its
1.153 + numeric value. Otherwise return -1. */
1.154 +IMPORT_C gint g_unichar_digit_value (gunichar c) G_GNUC_CONST;
1.155 +
1.156 +IMPORT_C gint g_unichar_xdigit_value (gunichar c) G_GNUC_CONST;
1.157 +
1.158 +/* Return the Unicode character type of a given character. */
1.159 +IMPORT_C GUnicodeType g_unichar_type (gunichar c) G_GNUC_CONST;
1.160 +
1.161 +/* Return the line break property for a given character */
1.162 +IMPORT_C GUnicodeBreakType g_unichar_break_type (gunichar c) G_GNUC_CONST;
1.163 +
1.164 +
1.165 +/* Compute canonical ordering of a string in-place. This rearranges
1.166 + decomposed characters in the string according to their combining
1.167 + classes. See the Unicode manual for more information. */
1.168 +IMPORT_C void g_unicode_canonical_ordering (gunichar *string,
1.169 + gsize len);
1.170 +
1.171 +/* Compute canonical decomposition of a character. Returns g_malloc()d
1.172 + string of Unicode characters. RESULT_LEN is set to the resulting
1.173 + length of the string. */
1.174 +IMPORT_C gunichar *g_unicode_canonical_decomposition (gunichar ch,
1.175 + gsize *result_len) G_GNUC_MALLOC;
1.176 +
1.177 +/* Array of skip-bytes-per-initial character.
1.178 + */
1.179 +#ifdef __SYMBIAN32__
1.180 +IMPORT_C const gchar * const * _g_utf8_skip();
1.181 +#endif /* __SYMBIAN32__ */
1.182 +GLIB_VAR const gchar * const g_utf8_skip;
1.183 +
1.184 +#define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)])
1.185 +
1.186 +IMPORT_C gunichar g_utf8_get_char (const gchar *p);
1.187 +IMPORT_C gunichar g_utf8_get_char_validated (const gchar *p,
1.188 + gssize max_len);
1.189 +
1.190 +IMPORT_C gchar* g_utf8_offset_to_pointer (const gchar *str,
1.191 + glong offset);
1.192 +IMPORT_C glong g_utf8_pointer_to_offset (const gchar *str,
1.193 + const gchar *pos);
1.194 +IMPORT_C gchar* g_utf8_prev_char (const gchar *p);
1.195 +IMPORT_C gchar* g_utf8_find_next_char (const gchar *p,
1.196 + const gchar *end);
1.197 +IMPORT_C gchar* g_utf8_find_prev_char (const gchar *str,
1.198 + const gchar *p);
1.199 +
1.200 +IMPORT_C glong g_utf8_strlen (const gchar *p,
1.201 + gssize max);
1.202 +
1.203 +/* Copies n characters from src to dest */
1.204 +IMPORT_C gchar* g_utf8_strncpy (gchar *dest,
1.205 + const gchar *src,
1.206 + gsize n);
1.207 +
1.208 +/* Find the UTF-8 character corresponding to ch, in string p. These
1.209 + functions are equivalants to strchr and strrchr */
1.210 +IMPORT_C gchar* g_utf8_strchr (const gchar *p,
1.211 + gssize len,
1.212 + gunichar c);
1.213 +IMPORT_C gchar* g_utf8_strrchr (const gchar *p,
1.214 + gssize len,
1.215 + gunichar c);
1.216 +IMPORT_C gchar* g_utf8_strreverse (const gchar *str,
1.217 + gssize len);
1.218 +
1.219 +IMPORT_C gunichar2 *g_utf8_to_utf16 (const gchar *str,
1.220 + glong len,
1.221 + glong *items_read,
1.222 + glong *items_written,
1.223 + GError **error) G_GNUC_MALLOC;
1.224 +IMPORT_C gunichar * g_utf8_to_ucs4 (const gchar *str,
1.225 + glong len,
1.226 + glong *items_read,
1.227 + glong *items_written,
1.228 + GError **error) G_GNUC_MALLOC;
1.229 +IMPORT_C gunichar * g_utf8_to_ucs4_fast (const gchar *str,
1.230 + glong len,
1.231 + glong *items_written) G_GNUC_MALLOC;
1.232 +IMPORT_C gunichar * g_utf16_to_ucs4 (const gunichar2 *str,
1.233 + glong len,
1.234 + glong *items_read,
1.235 + glong *items_written,
1.236 + GError **error) G_GNUC_MALLOC;
1.237 +IMPORT_C gchar* g_utf16_to_utf8 (const gunichar2 *str,
1.238 + glong len,
1.239 + glong *items_read,
1.240 + glong *items_written,
1.241 + GError **error) G_GNUC_MALLOC;
1.242 +IMPORT_C gunichar2 *g_ucs4_to_utf16 (const gunichar *str,
1.243 + glong len,
1.244 + glong *items_read,
1.245 + glong *items_written,
1.246 + GError **error) G_GNUC_MALLOC;
1.247 +IMPORT_C gchar* g_ucs4_to_utf8 (const gunichar *str,
1.248 + glong len,
1.249 + glong *items_read,
1.250 + glong *items_written,
1.251 + GError **error) G_GNUC_MALLOC;
1.252 +
1.253 +/* Convert a single character into UTF-8. outbuf must have at
1.254 + * least 6 bytes of space. Returns the number of bytes in the
1.255 + * result.
1.256 + */
1.257 +IMPORT_C gint g_unichar_to_utf8 (gunichar c,
1.258 + gchar *outbuf);
1.259 +
1.260 +/* Validate a UTF8 string, return TRUE if valid, put pointer to
1.261 + * first invalid char in **end
1.262 + */
1.263 +
1.264 +IMPORT_C gboolean g_utf8_validate (const gchar *str,
1.265 + gssize max_len,
1.266 + const gchar **end);
1.267 +
1.268 +/* Validate a Unicode character */
1.269 +IMPORT_C gboolean g_unichar_validate (gunichar ch);
1.270 +
1.271 +IMPORT_C gchar *g_utf8_strup (const gchar *str,
1.272 + gssize len) G_GNUC_MALLOC;
1.273 +IMPORT_C gchar *g_utf8_strdown (const gchar *str,
1.274 + gssize len) G_GNUC_MALLOC;
1.275 +IMPORT_C gchar *g_utf8_casefold (const gchar *str,
1.276 + gssize len) G_GNUC_MALLOC;
1.277 +
1.278 +typedef enum {
1.279 + G_NORMALIZE_DEFAULT,
1.280 + G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT,
1.281 + G_NORMALIZE_DEFAULT_COMPOSE,
1.282 + G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE,
1.283 + G_NORMALIZE_ALL,
1.284 + G_NORMALIZE_NFKD = G_NORMALIZE_ALL,
1.285 + G_NORMALIZE_ALL_COMPOSE,
1.286 + G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
1.287 +} GNormalizeMode;
1.288 +
1.289 +IMPORT_C gchar *g_utf8_normalize (const gchar *str,
1.290 + gssize len,
1.291 + GNormalizeMode mode) G_GNUC_MALLOC;
1.292 +
1.293 +IMPORT_C gint g_utf8_collate (const gchar *str1,
1.294 + const gchar *str2);
1.295 +IMPORT_C gchar *g_utf8_collate_key (const gchar *str,
1.296 + gssize len) G_GNUC_MALLOC;
1.297 +IMPORT_C gchar *g_utf8_collate_key_for_filename (const gchar *str,
1.298 + gssize len) G_GNUC_MALLOC;
1.299 +
1.300 +IMPORT_C gboolean g_unichar_get_mirror_char (gunichar ch,
1.301 + gunichar *mirrored_ch);
1.302 +
1.303 +G_END_DECLS
1.304 +
1.305 +#endif /* __G_UNICODE_H__ */