os/ossrv/glib/tsrc/BC/tests/unicode-encoding.c
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/ossrv/glib/tsrc/BC/tests/unicode-encoding.c	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,442 @@
     1.4 +/* Portion Copyright © 2008-09 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.*/
     1.5 +#undef G_DISABLE_ASSERT
     1.6 +#undef G_LOG_DOMAIN
     1.7 +
     1.8 +#include <stdarg.h>
     1.9 +#include <stdio.h>
    1.10 +#include <stdlib.h>
    1.11 +#include <string.h>
    1.12 +#include <glib.h>
    1.13 +
    1.14 +#ifdef SYMBIAN
    1.15 +#include "mrt2_glib2_test.h"
    1.16 +#endif /*SYMBIAN*/
    1.17 +static gint exit_status = 0;
    1.18 +
    1.19 +static void
    1.20 +croak (char *format, ...)
    1.21 +{
    1.22 +  va_list va;
    1.23 +  
    1.24 +  va_start (va, format);
    1.25 +  vfprintf (stderr, format, va);
    1.26 +  va_end (va);
    1.27 +
    1.28 +  exit (1);
    1.29 +}
    1.30 +
    1.31 +static void
    1.32 +fail (char *format, ...)
    1.33 +{
    1.34 +  va_list va;
    1.35 +  
    1.36 +  va_start (va, format);
    1.37 +  vfprintf (stderr, format, va);
    1.38 +  va_end (va);
    1.39 +
    1.40 +  exit_status |= 1;
    1.41 +}
    1.42 +
    1.43 +typedef enum
    1.44 +{
    1.45 +  VALID,
    1.46 +  INCOMPLETE,
    1.47 +  NOTUNICODE,
    1.48 +  OVERLONG,
    1.49 +  MALFORMED
    1.50 +} Status;
    1.51 +
    1.52 +static gboolean
    1.53 +ucs4_equal (gunichar *a, gunichar *b)
    1.54 +{
    1.55 +  while (*a && *b && (*a == *b))
    1.56 +    {
    1.57 +      a++;
    1.58 +      b++;
    1.59 +    }
    1.60 +
    1.61 +  return (*a == *b);
    1.62 +}
    1.63 +
    1.64 +static gboolean
    1.65 +utf16_equal (gunichar2 *a, gunichar2 *b)
    1.66 +{
    1.67 +  while (*a && *b && (*a == *b))
    1.68 +    {
    1.69 +      a++;
    1.70 +      b++;
    1.71 +    }
    1.72 +
    1.73 +  return (*a == *b);
    1.74 +}
    1.75 +
    1.76 +static gint
    1.77 +utf16_count (gunichar2 *a)
    1.78 +{
    1.79 +  gint result = 0;
    1.80 +  
    1.81 +  while (a[result])
    1.82 +    result++;
    1.83 +
    1.84 +  return result;
    1.85 +}
    1.86 +
    1.87 +static void
    1.88 +process (gint      line,
    1.89 +	 gchar    *utf8,
    1.90 +	 Status    status,
    1.91 +	 gunichar *ucs4,
    1.92 +	 gint      ucs4_len)
    1.93 +{
    1.94 +  const gchar *end;
    1.95 +  gboolean is_valid = g_utf8_validate (utf8, -1, &end);
    1.96 +  GError *error = NULL;
    1.97 +  glong items_read, items_written;
    1.98 +
    1.99 +  switch (status)
   1.100 +    {
   1.101 +    case VALID:
   1.102 +      if (!is_valid)
   1.103 +	{
   1.104 +	  fail ("line %d: valid but g_utf8_validate returned FALSE\n", line);
   1.105 +	  return;
   1.106 +	}
   1.107 +      break;
   1.108 +    case NOTUNICODE:
   1.109 +    case INCOMPLETE:
   1.110 +    case OVERLONG:
   1.111 +    case MALFORMED:
   1.112 +      if (is_valid)
   1.113 +	{
   1.114 +	  fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line);
   1.115 +	  return;
   1.116 +	}
   1.117 +      break;
   1.118 +    }
   1.119 +
   1.120 +  if (status == INCOMPLETE)
   1.121 +    {
   1.122 +      gunichar *ucs4_result;      
   1.123 +
   1.124 +      ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error);
   1.125 +
   1.126 +      if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT))
   1.127 +	{
   1.128 +	  fail ("line %d: incomplete input not properly detected\n", line);
   1.129 +	  return;
   1.130 +	}
   1.131 +      g_clear_error (&error);
   1.132 +
   1.133 +      ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error);
   1.134 +
   1.135 +      if (!ucs4_result || items_read == strlen (utf8))
   1.136 +	{
   1.137 +	  fail ("line %d: incomplete input not properly detected\n", line);
   1.138 +	  return;
   1.139 +	}
   1.140 +
   1.141 +      g_free (ucs4_result);
   1.142 +    }
   1.143 +
   1.144 +  if (status == VALID || status == NOTUNICODE)
   1.145 +    {
   1.146 +      gunichar *ucs4_result;
   1.147 +      gchar *utf8_result;
   1.148 +
   1.149 +      ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error);
   1.150 +      if (!ucs4_result)
   1.151 +	{
   1.152 +	  fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message);
   1.153 +	  return;
   1.154 +	}
   1.155 +      
   1.156 +      if (!ucs4_equal (ucs4_result, ucs4) ||
   1.157 +	  items_read != strlen (utf8) ||
   1.158 +	  items_written != ucs4_len)
   1.159 +	{
   1.160 +	  fail ("line %d: results of conversion to ucs4 do not match expected.\n", line);
   1.161 +	  return;
   1.162 +	}
   1.163 +
   1.164 +      g_free (ucs4_result);
   1.165 +
   1.166 +      ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written);
   1.167 +      
   1.168 +      if (!ucs4_equal (ucs4_result, ucs4) ||
   1.169 +	  items_written != ucs4_len)
   1.170 +	{
   1.171 +	  fail ("line %d: results of conversion to ucs4 do not match expected.\n", line);
   1.172 +	  return;
   1.173 +	}
   1.174 +
   1.175 +      utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error);
   1.176 +      if (!utf8_result)
   1.177 +	{
   1.178 +	  fail ("line %d: conversion back to utf8 failed: %s", line, error->message);
   1.179 +	  return;
   1.180 +	}
   1.181 +
   1.182 +      if (strcmp (utf8_result, utf8) != 0 ||
   1.183 +	  items_read != ucs4_len ||
   1.184 +	  items_written != strlen (utf8))
   1.185 +	{
   1.186 +	  fail ("line %d: conversion back to utf8 did not match original\n", line);
   1.187 +	  return;
   1.188 +	}
   1.189 +
   1.190 +      g_free (utf8_result);
   1.191 +      g_free (ucs4_result);
   1.192 +    }
   1.193 +
   1.194 +  if (status == VALID)
   1.195 +    {
   1.196 +      gunichar2 *utf16_expected_tmp;
   1.197 +      gunichar2 *utf16_expected;
   1.198 +      gunichar2 *utf16_from_utf8;
   1.199 +      gunichar2 *utf16_from_ucs4;
   1.200 +      gunichar *ucs4_result;
   1.201 +      gsize bytes_written;
   1.202 +      gint n_chars;
   1.203 +      gchar *utf8_result;
   1.204 +
   1.205 +#if defined(G_PLATFORM_WIN32) || defined(SYMBIAN)
   1.206 +#define TARGET "UTF-16LE"
   1.207 +#else
   1.208 +#define TARGET "UTF-16"
   1.209 +#endif
   1.210 +
   1.211 +      if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8",
   1.212 +							 NULL, &bytes_written, NULL)))
   1.213 +	{
   1.214 +	  fail ("line %d: could not convert to UTF-16 via g_convert\n", line);
   1.215 +	  return;
   1.216 +	}
   1.217 +
   1.218 +      /* zero-terminate and remove BOM
   1.219 +       */
   1.220 +      n_chars = bytes_written / 2;
   1.221 +      if (utf16_expected_tmp[0] == 0xfeff) /* BOM */
   1.222 +	{
   1.223 +	  n_chars--;
   1.224 +	  utf16_expected = g_new (gunichar2, n_chars + 1);
   1.225 +	  memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars);
   1.226 +	}
   1.227 +      else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */
   1.228 +	{
   1.229 +	  fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line);
   1.230 +	  return;
   1.231 +	}
   1.232 +      else
   1.233 +	{
   1.234 +	  utf16_expected = g_new (gunichar2, n_chars + 1);
   1.235 +	  memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars);
   1.236 +	}
   1.237 +
   1.238 +      utf16_expected[n_chars] = '\0';
   1.239 +      
   1.240 +      if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error)))
   1.241 +	{
   1.242 +	  fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
   1.243 +	  return;
   1.244 +	}
   1.245 +
   1.246 +      if (items_read != strlen (utf8) ||
   1.247 +	  utf16_count (utf16_from_utf8) != items_written)
   1.248 +	{
   1.249 +	  fail ("line %d: length error in conversion to ucs16\n", line);
   1.250 +	  return;
   1.251 +	}
   1.252 +
   1.253 +      if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error)))
   1.254 +	{
   1.255 +	  fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
   1.256 +	  return;
   1.257 +	}
   1.258 +
   1.259 +      if (items_read != ucs4_len ||
   1.260 +	  utf16_count (utf16_from_ucs4) != items_written)
   1.261 +	{
   1.262 +	  fail ("line %d: length error in conversion to ucs16\n", line);
   1.263 +	  return;
   1.264 +	}
   1.265 +
   1.266 +      if (!utf16_equal (utf16_from_utf8, utf16_expected) ||
   1.267 +	  !utf16_equal (utf16_from_ucs4, utf16_expected))
   1.268 +	{
   1.269 +	  fail ("line %d: results of conversion to ucs16 do not match\n", line);
   1.270 +	  return;
   1.271 +	}
   1.272 +
   1.273 +      if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error)))
   1.274 +	{
   1.275 +	  fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message);
   1.276 +	  return;
   1.277 +	}
   1.278 +
   1.279 +      if (items_read != utf16_count (utf16_from_utf8) ||
   1.280 +	  items_written != strlen (utf8))
   1.281 +	{
   1.282 +	  fail ("line %d: length error in conversion from ucs16 to utf8\n", line);
   1.283 +	  return;
   1.284 +	}
   1.285 +
   1.286 +      if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error)))
   1.287 +	{
   1.288 +	  fail ("line %d: conversion back to utf8/ucs4 failed\n", line);
   1.289 +	  return;
   1.290 +	}
   1.291 +
   1.292 +      if (items_read != utf16_count (utf16_from_utf8) ||
   1.293 +	  items_written != ucs4_len)
   1.294 +	{
   1.295 +	  fail ("line %d: length error in conversion from ucs16 to ucs4\n", line);
   1.296 +	  return;
   1.297 +	}
   1.298 +
   1.299 +      if (strcmp (utf8, utf8_result) != 0 ||
   1.300 +	  !ucs4_equal (ucs4, ucs4_result))
   1.301 +	{
   1.302 +	  fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line);
   1.303 +	  return;
   1.304 +	}
   1.305 +      
   1.306 +      g_free (utf16_expected_tmp);
   1.307 +      g_free (utf16_expected);
   1.308 +      g_free (utf16_from_utf8);
   1.309 +      g_free (utf16_from_ucs4);
   1.310 +      g_free (utf8_result);
   1.311 +      g_free (ucs4_result);
   1.312 +    }
   1.313 +}
   1.314 +
   1.315 +int
   1.316 +main (int argc, char **argv)
   1.317 +{
   1.318 +  gchar *srcdir = getenv ("srcdir");
   1.319 +  gchar *testfile;
   1.320 +  gchar *contents;
   1.321 +  GError *error = NULL;
   1.322 +  gchar *p, *end;
   1.323 +  char *tmp;
   1.324 +  gint state = 0;
   1.325 +  gint line = 1;
   1.326 +  gint start_line = 0;		/* Quiet GCC */
   1.327 +  gchar *utf8 = NULL;		/* Quiet GCC */
   1.328 +  GArray *ucs4;
   1.329 +  Status status = VALID;	/* Quiet GCC */
   1.330 +
   1.331 +  #ifdef SYMBIAN
   1.332 + 
   1.333 +  g_log_set_handler (NULL,  G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG, &mrtLogHandler, NULL);
   1.334 +  g_set_print_handler(mrtPrintHandler);
   1.335 +  #endif /*SYMBIAN*/
   1.336 +  if (!srcdir)
   1.337 +    srcdir = "c:";
   1.338 +  
   1.339 +  testfile = g_strconcat (srcdir, G_DIR_SEPARATOR_S "utf8.txt", NULL);
   1.340 +  
   1.341 +  g_file_get_contents (testfile, &contents, NULL, &error);
   1.342 +  if (error)
   1.343 +  {
   1.344 +  	croak ("Cannot open utf8.txt: %s", error->message);
   1.345 +  	
   1.346 +  	#ifdef SYMBIAN
   1.347 +  	testResultXml("unicode-encoding");
   1.348 +  	#endif /* EMULATOR */
   1.349 +  	
   1.350 +  	exit(1);
   1.351 +  }
   1.352 +
   1.353 +  ucs4 = g_array_new (TRUE, FALSE, sizeof(gunichar));
   1.354 +
   1.355 +  p = contents;
   1.356 +
   1.357 +  /* Loop over lines */
   1.358 +  while (*p)
   1.359 +    {
   1.360 +      while (*p && (*p == ' ' || *p == '\t'))
   1.361 +	p++;
   1.362 +
   1.363 +      end = p;
   1.364 +      while (*end && (*end != '\r' && *end != '\n'))
   1.365 +	end++;
   1.366 +      
   1.367 +      if (!*p || *p == '#' || *p == '\r' || *p == '\n')
   1.368 +	goto next_line;
   1.369 +
   1.370 +      tmp = g_strstrip (g_strndup (p, end - p));
   1.371 +      
   1.372 +      switch (state)
   1.373 +	{
   1.374 +	case 0:
   1.375 +	  /* UTF-8 string */
   1.376 +	  start_line = line;
   1.377 +	  utf8 = tmp;
   1.378 +	  tmp = NULL;
   1.379 +	  break;
   1.380 +	  
   1.381 +	case 1:
   1.382 +	  /* Status */
   1.383 +	  if (!strcmp (tmp, "VALID"))
   1.384 +	    status = VALID;
   1.385 +	  else if (!strcmp (tmp, "INCOMPLETE"))
   1.386 +	    status = INCOMPLETE;
   1.387 +	  else if (!strcmp (tmp, "NOTUNICODE"))
   1.388 +	    status = NOTUNICODE;
   1.389 +	  else if (!strcmp (tmp, "OVERLONG"))
   1.390 +	    status = OVERLONG;
   1.391 +	  else if (!strcmp (tmp, "MALFORMED"))
   1.392 +	    status = MALFORMED;
   1.393 +	  else
   1.394 +	    croak ("Invalid status on line %d\n", line);
   1.395 +
   1.396 +	  if (status != VALID && status != NOTUNICODE)
   1.397 +	    state++;		/* No UCS-4 data */
   1.398 +	  
   1.399 +	  break;
   1.400 +	  
   1.401 +	case 2:
   1.402 +	  /* UCS-4 version */
   1.403 +
   1.404 +	  p = strtok (tmp, " \t");
   1.405 +	  while (p)
   1.406 +	    {
   1.407 +	      gchar *endptr;
   1.408 +	      
   1.409 +	      gunichar ch = strtoul (p, &endptr, 16);
   1.410 +	      if (*endptr != '\0')
   1.411 +		croak ("Invalid UCS-4 character on line %d\n", line);
   1.412 +
   1.413 +	      g_array_append_val (ucs4, ch);
   1.414 +	      
   1.415 +	      p = strtok (NULL, " \t");
   1.416 +	    }
   1.417 +
   1.418 +	  break;
   1.419 +	}
   1.420 +
   1.421 +      g_free (tmp);
   1.422 +      state = (state + 1) % 3;
   1.423 +
   1.424 +      if (state == 0)
   1.425 +	{
   1.426 +	  process (start_line, utf8, status, (gunichar *)ucs4->data, ucs4->len);
   1.427 +	  g_array_set_size (ucs4, 0);
   1.428 +	  g_free (utf8);
   1.429 +	}
   1.430 +      
   1.431 +    next_line:
   1.432 +      p = end;
   1.433 +      if (*p && *p == '\r')
   1.434 +	p++;
   1.435 +      if (*p && *p == '\n')
   1.436 +	p++;
   1.437 +      
   1.438 +      line++;
   1.439 +    }
   1.440 +
   1.441 +    #ifdef SYMBIAN
   1.442 +  	testResultXml("unicode-encoding");
   1.443 +  	#endif /* EMULATOR */
   1.444 +  return exit_status;
   1.445 +}