os/ossrv/glib/glib/gunidecomp.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/* decomp.c - Character decomposition.
sl@0
     2
 *
sl@0
     3
 *  Copyright (C) 1999, 2000 Tom Tromey
sl@0
     4
 *  Copyright 2000 Red Hat, Inc.
sl@0
     5
 * Portions copyright (c) 2006-2009 Nokia Corporation.  All rights reserved.
sl@0
     6
 *
sl@0
     7
 * The Gnome Library is free software; you can redistribute it and/or
sl@0
     8
 * modify it under the terms of the GNU Lesser General Public License as
sl@0
     9
 * published by the Free Software Foundation; either version 2 of the
sl@0
    10
 * License, or (at your option) any later version.
sl@0
    11
 *
sl@0
    12
 * The Gnome Library is distributed in the hope that it will be useful,
sl@0
    13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
sl@0
    14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
sl@0
    15
 * Lesser General Public License for more details.
sl@0
    16
 *
sl@0
    17
 * You should have received a copy of the GNU Lesser General Public
sl@0
    18
 * License along with the Gnome Library; see the file COPYING.LIB.  If not,
sl@0
    19
 * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
sl@0
    20
 *   Boston, MA 02111-1307, USA.
sl@0
    21
 */
sl@0
    22
sl@0
    23
#include "config.h"
sl@0
    24
sl@0
    25
#include <stdlib.h>
sl@0
    26
sl@0
    27
#include "glib.h"
sl@0
    28
#include "gunidecomp.h"
sl@0
    29
#include "gunicomp.h"
sl@0
    30
#include "gunicodeprivate.h"
sl@0
    31
#include "galias.h"
sl@0
    32
sl@0
    33
sl@0
    34
#define CC_PART1(Page, Char) \
sl@0
    35
  ((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
sl@0
    36
   ? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
sl@0
    37
   : (cclass_data[combining_class_table_part1[Page]][Char]))
sl@0
    38
sl@0
    39
#define CC_PART2(Page, Char) \
sl@0
    40
  ((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
sl@0
    41
   ? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
sl@0
    42
   : (cclass_data[combining_class_table_part2[Page]][Char]))
sl@0
    43
sl@0
    44
#define COMBINING_CLASS(Char) \
sl@0
    45
  (((Char) <= G_UNICODE_LAST_CHAR_PART1) \
sl@0
    46
   ? CC_PART1 ((Char) >> 8, (Char) & 0xff) \
sl@0
    47
   : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
sl@0
    48
      ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
sl@0
    49
      : 0))
sl@0
    50
sl@0
    51
/**
sl@0
    52
 * g_unichar_combining_class:
sl@0
    53
 * @uc: a Unicode character
sl@0
    54
 * 
sl@0
    55
 * Determines the canonical combining class of a Unicode character.
sl@0
    56
 * 
sl@0
    57
 * Return value: the combining class of the character
sl@0
    58
 *
sl@0
    59
 * Since: 2.14
sl@0
    60
 **/
sl@0
    61
EXPORT_C gint
sl@0
    62
g_unichar_combining_class (gunichar uc)
sl@0
    63
{
sl@0
    64
  return COMBINING_CLASS (uc);
sl@0
    65
}
sl@0
    66
sl@0
    67
/* constants for hangul syllable [de]composition */
sl@0
    68
#define SBase 0xAC00 
sl@0
    69
#define LBase 0x1100 
sl@0
    70
#define VBase 0x1161 
sl@0
    71
#define TBase 0x11A7
sl@0
    72
#define LCount 19 
sl@0
    73
#define VCount 21
sl@0
    74
#define TCount 28
sl@0
    75
#define NCount (VCount * TCount)
sl@0
    76
#define SCount (LCount * NCount)
sl@0
    77
sl@0
    78
/**
sl@0
    79
 * g_unicode_canonical_ordering:
sl@0
    80
 * @string: a UCS-4 encoded string.
sl@0
    81
 * @len: the maximum length of @string to use.
sl@0
    82
 *
sl@0
    83
 * Computes the canonical ordering of a string in-place.  
sl@0
    84
 * This rearranges decomposed characters in the string 
sl@0
    85
 * according to their combining classes.  See the Unicode 
sl@0
    86
 * manual for more information. 
sl@0
    87
 **/
sl@0
    88
EXPORT_C void
sl@0
    89
g_unicode_canonical_ordering (gunichar *string,
sl@0
    90
			      gsize     len)
sl@0
    91
{
sl@0
    92
  gsize i;
sl@0
    93
  int swap = 1;
sl@0
    94
sl@0
    95
  while (swap)
sl@0
    96
    {
sl@0
    97
      int last;
sl@0
    98
      swap = 0;
sl@0
    99
      last = COMBINING_CLASS (string[0]);
sl@0
   100
      for (i = 0; i < len - 1; ++i)
sl@0
   101
	{
sl@0
   102
	  int next = COMBINING_CLASS (string[i + 1]);
sl@0
   103
	  if (next != 0 && last > next)
sl@0
   104
	    {
sl@0
   105
	      gsize j;
sl@0
   106
	      /* Percolate item leftward through string.  */
sl@0
   107
	      for (j = i + 1; j > 0; --j)
sl@0
   108
		{
sl@0
   109
		  gunichar t;
sl@0
   110
		  if (COMBINING_CLASS (string[j - 1]) <= next)
sl@0
   111
		    break;
sl@0
   112
		  t = string[j];
sl@0
   113
		  string[j] = string[j - 1];
sl@0
   114
		  string[j - 1] = t;
sl@0
   115
		  swap = 1;
sl@0
   116
		}
sl@0
   117
	      /* We're re-entering the loop looking at the old
sl@0
   118
		 character again.  */
sl@0
   119
	      next = last;
sl@0
   120
	    }
sl@0
   121
	  last = next;
sl@0
   122
	}
sl@0
   123
    }
sl@0
   124
}
sl@0
   125
sl@0
   126
/* http://www.unicode.org/unicode/reports/tr15/#Hangul
sl@0
   127
 * r should be null or have sufficient space. Calling with r == NULL will
sl@0
   128
 * only calculate the result_len; however, a buffer with space for three
sl@0
   129
 * characters will always be big enough. */
sl@0
   130
static void
sl@0
   131
decompose_hangul (gunichar s, 
sl@0
   132
                  gunichar *r,
sl@0
   133
                  gsize *result_len)
sl@0
   134
{
sl@0
   135
  gint SIndex = s - SBase;
sl@0
   136
sl@0
   137
  /* not a hangul syllable */
sl@0
   138
  if (SIndex < 0 || SIndex >= SCount)
sl@0
   139
    {
sl@0
   140
      if (r)
sl@0
   141
        r[0] = s;
sl@0
   142
      *result_len = 1;
sl@0
   143
    }
sl@0
   144
  else
sl@0
   145
    {
sl@0
   146
      gunichar L = LBase + SIndex / NCount;
sl@0
   147
      gunichar V = VBase + (SIndex % NCount) / TCount;
sl@0
   148
      gunichar T = TBase + SIndex % TCount;
sl@0
   149
sl@0
   150
      if (r)
sl@0
   151
        {
sl@0
   152
          r[0] = L;
sl@0
   153
          r[1] = V;
sl@0
   154
        }
sl@0
   155
sl@0
   156
      if (T != TBase) 
sl@0
   157
        {
sl@0
   158
          if (r)
sl@0
   159
            r[2] = T;
sl@0
   160
          *result_len = 3;
sl@0
   161
        }
sl@0
   162
      else
sl@0
   163
        *result_len = 2;
sl@0
   164
    }
sl@0
   165
}
sl@0
   166
sl@0
   167
/* returns a pointer to a null-terminated UTF-8 string */
sl@0
   168
static const gchar *
sl@0
   169
find_decomposition (gunichar ch,
sl@0
   170
		    gboolean compat)
sl@0
   171
{
sl@0
   172
  int start = 0;
sl@0
   173
  int end = G_N_ELEMENTS (decomp_table);
sl@0
   174
  
sl@0
   175
  if (ch >= decomp_table[start].ch &&
sl@0
   176
      ch <= decomp_table[end - 1].ch)
sl@0
   177
    {
sl@0
   178
      while (TRUE)
sl@0
   179
	{
sl@0
   180
	  int half = (start + end) / 2;
sl@0
   181
	  if (ch == decomp_table[half].ch)
sl@0
   182
	    {
sl@0
   183
	      int offset;
sl@0
   184
sl@0
   185
	      if (compat)
sl@0
   186
		{
sl@0
   187
		  offset = decomp_table[half].compat_offset;
sl@0
   188
		  if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
sl@0
   189
		    offset = decomp_table[half].canon_offset;
sl@0
   190
		}
sl@0
   191
	      else
sl@0
   192
		{
sl@0
   193
		  offset = decomp_table[half].canon_offset;
sl@0
   194
		  if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
sl@0
   195
		    return NULL;
sl@0
   196
		}
sl@0
   197
	      
sl@0
   198
	      return &(decomp_expansion_string[offset]);
sl@0
   199
	    }
sl@0
   200
	  else if (half == start)
sl@0
   201
	    break;
sl@0
   202
	  else if (ch > decomp_table[half].ch)
sl@0
   203
	    start = half;
sl@0
   204
	  else
sl@0
   205
	    end = half;
sl@0
   206
	}
sl@0
   207
    }
sl@0
   208
sl@0
   209
  return NULL;
sl@0
   210
}
sl@0
   211
sl@0
   212
/**
sl@0
   213
 * g_unicode_canonical_decomposition:
sl@0
   214
 * @ch: a Unicode character.
sl@0
   215
 * @result_len: location to store the length of the return value.
sl@0
   216
 *
sl@0
   217
 * Computes the canonical decomposition of a Unicode character.  
sl@0
   218
 * 
sl@0
   219
 * Return value: a newly allocated string of Unicode characters.
sl@0
   220
 *   @result_len is set to the resulting length of the string.
sl@0
   221
 **/
sl@0
   222
EXPORT_C gunichar *
sl@0
   223
g_unicode_canonical_decomposition (gunichar ch,
sl@0
   224
				   gsize   *result_len)
sl@0
   225
{
sl@0
   226
  const gchar *decomp;
sl@0
   227
  const gchar *p;
sl@0
   228
  gunichar *r;
sl@0
   229
sl@0
   230
  /* Hangul syllable */
sl@0
   231
  if (ch >= 0xac00 && ch <= 0xd7a3)
sl@0
   232
    {
sl@0
   233
      decompose_hangul (ch, NULL, result_len);
sl@0
   234
      r = g_malloc (*result_len * sizeof (gunichar));
sl@0
   235
      decompose_hangul (ch, r, result_len);
sl@0
   236
    }
sl@0
   237
  else if ((decomp = find_decomposition (ch, FALSE)) != NULL)
sl@0
   238
    {
sl@0
   239
      /* Found it.  */
sl@0
   240
      int i;
sl@0
   241
      
sl@0
   242
      *result_len = g_utf8_strlen (decomp, -1);
sl@0
   243
      r = g_malloc (*result_len * sizeof (gunichar));
sl@0
   244
      
sl@0
   245
      for (p = decomp, i = 0; *p != '\0'; p = g_utf8_next_char (p), i++)
sl@0
   246
        r[i] = g_utf8_get_char (p);
sl@0
   247
    }
sl@0
   248
  else
sl@0
   249
    {
sl@0
   250
      /* Not in our table.  */
sl@0
   251
      r = g_malloc (sizeof (gunichar));
sl@0
   252
      *r = ch;
sl@0
   253
      *result_len = 1;
sl@0
   254
    }
sl@0
   255
sl@0
   256
  /* Supposedly following the Unicode 2.1.9 table means that the
sl@0
   257
     decompositions come out in canonical order.  I haven't tested
sl@0
   258
     this, but we rely on it here.  */
sl@0
   259
  return r;
sl@0
   260
}
sl@0
   261
sl@0
   262
/* L,V => LV and LV,T => LVT  */
sl@0
   263
static gboolean
sl@0
   264
combine_hangul (gunichar a,
sl@0
   265
                gunichar b,
sl@0
   266
                gunichar *result)
sl@0
   267
{
sl@0
   268
  gint LIndex = a - LBase;
sl@0
   269
  gint SIndex = a - SBase;
sl@0
   270
sl@0
   271
  gint VIndex = b - VBase;
sl@0
   272
  gint TIndex = b - TBase;
sl@0
   273
sl@0
   274
  if (0 <= LIndex && LIndex < LCount
sl@0
   275
      && 0 <= VIndex && VIndex < VCount)
sl@0
   276
    {
sl@0
   277
      *result = SBase + (LIndex * VCount + VIndex) * TCount;
sl@0
   278
      return TRUE;
sl@0
   279
    }
sl@0
   280
  else if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0
sl@0
   281
           && 0 < TIndex && TIndex < TCount)
sl@0
   282
    {
sl@0
   283
      *result = a + TIndex;
sl@0
   284
      return TRUE;
sl@0
   285
    }
sl@0
   286
sl@0
   287
  return FALSE;
sl@0
   288
}
sl@0
   289
sl@0
   290
#define CI(Page, Char) \
sl@0
   291
  ((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
sl@0
   292
   ? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
sl@0
   293
   : (compose_data[compose_table[Page]][Char]))
sl@0
   294
sl@0
   295
#define COMPOSE_INDEX(Char) \
sl@0
   296
     (((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))
sl@0
   297
sl@0
   298
static gboolean
sl@0
   299
combine (gunichar  a,
sl@0
   300
	 gunichar  b,
sl@0
   301
	 gunichar *result)
sl@0
   302
{
sl@0
   303
  gushort index_a, index_b;
sl@0
   304
sl@0
   305
  if (combine_hangul (a, b, result))
sl@0
   306
    return TRUE;
sl@0
   307
sl@0
   308
  index_a = COMPOSE_INDEX(a);
sl@0
   309
sl@0
   310
  if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)
sl@0
   311
    {
sl@0
   312
      if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])
sl@0
   313
	{
sl@0
   314
	  *result = compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][1];
sl@0
   315
	  return TRUE;
sl@0
   316
	}
sl@0
   317
      else
sl@0
   318
        return FALSE;
sl@0
   319
    }
sl@0
   320
  
sl@0
   321
  index_b = COMPOSE_INDEX(b);
sl@0
   322
sl@0
   323
  if (index_b >= COMPOSE_SECOND_SINGLE_START)
sl@0
   324
    {
sl@0
   325
      if (a == compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])
sl@0
   326
	{
sl@0
   327
	  *result = compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][1];
sl@0
   328
	  return TRUE;
sl@0
   329
	}
sl@0
   330
      else
sl@0
   331
        return FALSE;
sl@0
   332
    }
sl@0
   333
sl@0
   334
  if (index_a >= COMPOSE_FIRST_START && index_a < COMPOSE_FIRST_SINGLE_START &&
sl@0
   335
      index_b >= COMPOSE_SECOND_START && index_b < COMPOSE_SECOND_SINGLE_START)
sl@0
   336
    {
sl@0
   337
      gunichar res = compose_array[index_a - COMPOSE_FIRST_START][index_b - COMPOSE_SECOND_START];
sl@0
   338
sl@0
   339
      if (res)
sl@0
   340
	{
sl@0
   341
	  *result = res;
sl@0
   342
	  return TRUE;
sl@0
   343
	}
sl@0
   344
    }
sl@0
   345
sl@0
   346
  return FALSE;
sl@0
   347
}
sl@0
   348
sl@0
   349
gunichar *
sl@0
   350
_g_utf8_normalize_wc (const gchar    *str,
sl@0
   351
		      gssize          max_len,
sl@0
   352
		      GNormalizeMode  mode)
sl@0
   353
{
sl@0
   354
  gsize n_wc;
sl@0
   355
  gunichar *wc_buffer;
sl@0
   356
  const char *p;
sl@0
   357
  gsize last_start;
sl@0
   358
  gboolean do_compat = (mode == G_NORMALIZE_NFKC ||
sl@0
   359
			mode == G_NORMALIZE_NFKD);
sl@0
   360
  gboolean do_compose = (mode == G_NORMALIZE_NFC ||
sl@0
   361
			 mode == G_NORMALIZE_NFKC);
sl@0
   362
sl@0
   363
  n_wc = 0;
sl@0
   364
  p = str;
sl@0
   365
  while ((max_len < 0 || p < str + max_len) && *p)
sl@0
   366
    {
sl@0
   367
      const gchar *decomp;
sl@0
   368
      gunichar wc = g_utf8_get_char (p);
sl@0
   369
sl@0
   370
      if (wc >= 0xac00 && wc <= 0xd7a3)
sl@0
   371
        {
sl@0
   372
          gsize result_len;
sl@0
   373
          decompose_hangul (wc, NULL, &result_len);
sl@0
   374
          n_wc += result_len;
sl@0
   375
        }
sl@0
   376
      else 
sl@0
   377
        {
sl@0
   378
          decomp = find_decomposition (wc, do_compat);
sl@0
   379
sl@0
   380
          if (decomp)
sl@0
   381
            n_wc += g_utf8_strlen (decomp, -1);
sl@0
   382
          else
sl@0
   383
            n_wc++;
sl@0
   384
        }
sl@0
   385
sl@0
   386
      p = g_utf8_next_char (p);
sl@0
   387
    }
sl@0
   388
sl@0
   389
  wc_buffer = g_new (gunichar, n_wc + 1);
sl@0
   390
sl@0
   391
  last_start = 0;
sl@0
   392
  n_wc = 0;
sl@0
   393
  p = str;
sl@0
   394
  while ((max_len < 0 || p < str + max_len) && *p)
sl@0
   395
    {
sl@0
   396
      gunichar wc = g_utf8_get_char (p);
sl@0
   397
      const gchar *decomp;
sl@0
   398
      int cc;
sl@0
   399
      gsize old_n_wc = n_wc;
sl@0
   400
	  
sl@0
   401
      if (wc >= 0xac00 && wc <= 0xd7a3)
sl@0
   402
        {
sl@0
   403
          gsize result_len;
sl@0
   404
          decompose_hangul (wc, wc_buffer + n_wc, &result_len);
sl@0
   405
          n_wc += result_len;
sl@0
   406
        }
sl@0
   407
      else
sl@0
   408
        {
sl@0
   409
          decomp = find_decomposition (wc, do_compat);
sl@0
   410
          
sl@0
   411
          if (decomp)
sl@0
   412
            {
sl@0
   413
              const char *pd;
sl@0
   414
              for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd))
sl@0
   415
                wc_buffer[n_wc++] = g_utf8_get_char (pd);
sl@0
   416
            }
sl@0
   417
          else
sl@0
   418
            wc_buffer[n_wc++] = wc;
sl@0
   419
        }
sl@0
   420
sl@0
   421
      if (n_wc > 0)
sl@0
   422
	{
sl@0
   423
	  cc = COMBINING_CLASS (wc_buffer[old_n_wc]);
sl@0
   424
sl@0
   425
	  if (cc == 0)
sl@0
   426
	    {
sl@0
   427
	      g_unicode_canonical_ordering (wc_buffer + last_start, n_wc - last_start);
sl@0
   428
	      last_start = old_n_wc;
sl@0
   429
	    }
sl@0
   430
	}
sl@0
   431
      
sl@0
   432
      p = g_utf8_next_char (p);
sl@0
   433
    }
sl@0
   434
sl@0
   435
  if (n_wc > 0)
sl@0
   436
    {
sl@0
   437
      g_unicode_canonical_ordering (wc_buffer + last_start, n_wc - last_start);
sl@0
   438
      last_start = n_wc;
sl@0
   439
    }
sl@0
   440
	  
sl@0
   441
  wc_buffer[n_wc] = 0;
sl@0
   442
sl@0
   443
  /* All decomposed and reordered */ 
sl@0
   444
sl@0
   445
  if (do_compose && n_wc > 0)
sl@0
   446
    {
sl@0
   447
      gsize i, j;
sl@0
   448
      int last_cc = 0;
sl@0
   449
      last_start = 0;
sl@0
   450
      
sl@0
   451
      for (i = 0; i < n_wc; i++)
sl@0
   452
	{
sl@0
   453
	  int cc = COMBINING_CLASS (wc_buffer[i]);
sl@0
   454
sl@0
   455
	  if (i > 0 &&
sl@0
   456
	      (last_cc == 0 || last_cc < cc) &&
sl@0
   457
	      combine (wc_buffer[last_start], wc_buffer[i],
sl@0
   458
		       &wc_buffer[last_start]))
sl@0
   459
	    {
sl@0
   460
	      for (j = i + 1; j < n_wc; j++)
sl@0
   461
		wc_buffer[j-1] = wc_buffer[j];
sl@0
   462
	      n_wc--;
sl@0
   463
	      i--;
sl@0
   464
	      
sl@0
   465
	      if (i == last_start)
sl@0
   466
		last_cc = 0;
sl@0
   467
	      else
sl@0
   468
		last_cc = COMBINING_CLASS (wc_buffer[i-1]);
sl@0
   469
	      
sl@0
   470
	      continue;
sl@0
   471
	    }
sl@0
   472
sl@0
   473
	  if (cc == 0)
sl@0
   474
	    last_start = i;
sl@0
   475
sl@0
   476
	  last_cc = cc;
sl@0
   477
	}
sl@0
   478
    }
sl@0
   479
sl@0
   480
  wc_buffer[n_wc] = 0;
sl@0
   481
sl@0
   482
  return wc_buffer;
sl@0
   483
}
sl@0
   484
sl@0
   485
/**
sl@0
   486
 * g_utf8_normalize:
sl@0
   487
 * @str: a UTF-8 encoded string.
sl@0
   488
 * @len: length of @str, in bytes, or -1 if @str is nul-terminated.
sl@0
   489
 * @mode: the type of normalization to perform.
sl@0
   490
 *
sl@0
   491
 * Converts a string into canonical form, standardizing
sl@0
   492
 * such issues as whether a character with an accent
sl@0
   493
 * is represented as a base character and combining
sl@0
   494
 * accent or as a single precomposed character. The
sl@0
   495
 * string has to be valid UTF-8, otherwise %NULL is
sl@0
   496
 * returned. You should generally call g_utf8_normalize()
sl@0
   497
 * before comparing two Unicode strings.
sl@0
   498
 *
sl@0
   499
 * The normalization mode %G_NORMALIZE_DEFAULT only
sl@0
   500
 * standardizes differences that do not affect the
sl@0
   501
 * text content, such as the above-mentioned accent
sl@0
   502
 * representation. %G_NORMALIZE_ALL also standardizes
sl@0
   503
 * the "compatibility" characters in Unicode, such
sl@0
   504
 * as SUPERSCRIPT THREE to the standard forms
sl@0
   505
 * (in this case DIGIT THREE). Formatting information
sl@0
   506
 * may be lost but for most text operations such
sl@0
   507
 * characters should be considered the same.
sl@0
   508
 *
sl@0
   509
 * %G_NORMALIZE_DEFAULT_COMPOSE and %G_NORMALIZE_ALL_COMPOSE
sl@0
   510
 * are like %G_NORMALIZE_DEFAULT and %G_NORMALIZE_ALL,
sl@0
   511
 * but returned a result with composed forms rather
sl@0
   512
 * than a maximally decomposed form. This is often
sl@0
   513
 * useful if you intend to convert the string to
sl@0
   514
 * a legacy encoding or pass it to a system with
sl@0
   515
 * less capable Unicode handling.
sl@0
   516
 *
sl@0
   517
 * Return value: a newly allocated string, that is the
sl@0
   518
 *   normalized form of @str, or %NULL if @str is not
sl@0
   519
 *   valid UTF-8.
sl@0
   520
 **/
sl@0
   521
EXPORT_C gchar *
sl@0
   522
g_utf8_normalize (const gchar    *str,
sl@0
   523
		  gssize          len,
sl@0
   524
		  GNormalizeMode  mode)
sl@0
   525
{
sl@0
   526
  gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
sl@0
   527
  gchar *result;
sl@0
   528
sl@0
   529
  result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);
sl@0
   530
  g_free (result_wc);
sl@0
   531
sl@0
   532
  return result;
sl@0
   533
}
sl@0
   534
sl@0
   535
#define __G_UNIDECOMP_C__
sl@0
   536
#include "galiasdef.c"