1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/glib/tsrc/BC/tests/unicode-encoding.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,442 @@
1.4 +/* Portion Copyright © 2008-09 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.*/
1.5 +#undef G_DISABLE_ASSERT
1.6 +#undef G_LOG_DOMAIN
1.7 +
1.8 +#include <stdarg.h>
1.9 +#include <stdio.h>
1.10 +#include <stdlib.h>
1.11 +#include <string.h>
1.12 +#include <glib.h>
1.13 +
1.14 +#ifdef SYMBIAN
1.15 +#include "mrt2_glib2_test.h"
1.16 +#endif /*SYMBIAN*/
1.17 +static gint exit_status = 0;
1.18 +
1.19 +static void
1.20 +croak (char *format, ...)
1.21 +{
1.22 + va_list va;
1.23 +
1.24 + va_start (va, format);
1.25 + vfprintf (stderr, format, va);
1.26 + va_end (va);
1.27 +
1.28 + exit (1);
1.29 +}
1.30 +
1.31 +static void
1.32 +fail (char *format, ...)
1.33 +{
1.34 + va_list va;
1.35 +
1.36 + va_start (va, format);
1.37 + vfprintf (stderr, format, va);
1.38 + va_end (va);
1.39 +
1.40 + exit_status |= 1;
1.41 +}
1.42 +
1.43 +typedef enum
1.44 +{
1.45 + VALID,
1.46 + INCOMPLETE,
1.47 + NOTUNICODE,
1.48 + OVERLONG,
1.49 + MALFORMED
1.50 +} Status;
1.51 +
1.52 +static gboolean
1.53 +ucs4_equal (gunichar *a, gunichar *b)
1.54 +{
1.55 + while (*a && *b && (*a == *b))
1.56 + {
1.57 + a++;
1.58 + b++;
1.59 + }
1.60 +
1.61 + return (*a == *b);
1.62 +}
1.63 +
1.64 +static gboolean
1.65 +utf16_equal (gunichar2 *a, gunichar2 *b)
1.66 +{
1.67 + while (*a && *b && (*a == *b))
1.68 + {
1.69 + a++;
1.70 + b++;
1.71 + }
1.72 +
1.73 + return (*a == *b);
1.74 +}
1.75 +
1.76 +static gint
1.77 +utf16_count (gunichar2 *a)
1.78 +{
1.79 + gint result = 0;
1.80 +
1.81 + while (a[result])
1.82 + result++;
1.83 +
1.84 + return result;
1.85 +}
1.86 +
1.87 +static void
1.88 +process (gint line,
1.89 + gchar *utf8,
1.90 + Status status,
1.91 + gunichar *ucs4,
1.92 + gint ucs4_len)
1.93 +{
1.94 + const gchar *end;
1.95 + gboolean is_valid = g_utf8_validate (utf8, -1, &end);
1.96 + GError *error = NULL;
1.97 + glong items_read, items_written;
1.98 +
1.99 + switch (status)
1.100 + {
1.101 + case VALID:
1.102 + if (!is_valid)
1.103 + {
1.104 + fail ("line %d: valid but g_utf8_validate returned FALSE\n", line);
1.105 + return;
1.106 + }
1.107 + break;
1.108 + case NOTUNICODE:
1.109 + case INCOMPLETE:
1.110 + case OVERLONG:
1.111 + case MALFORMED:
1.112 + if (is_valid)
1.113 + {
1.114 + fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line);
1.115 + return;
1.116 + }
1.117 + break;
1.118 + }
1.119 +
1.120 + if (status == INCOMPLETE)
1.121 + {
1.122 + gunichar *ucs4_result;
1.123 +
1.124 + ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error);
1.125 +
1.126 + if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT))
1.127 + {
1.128 + fail ("line %d: incomplete input not properly detected\n", line);
1.129 + return;
1.130 + }
1.131 + g_clear_error (&error);
1.132 +
1.133 + ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error);
1.134 +
1.135 + if (!ucs4_result || items_read == strlen (utf8))
1.136 + {
1.137 + fail ("line %d: incomplete input not properly detected\n", line);
1.138 + return;
1.139 + }
1.140 +
1.141 + g_free (ucs4_result);
1.142 + }
1.143 +
1.144 + if (status == VALID || status == NOTUNICODE)
1.145 + {
1.146 + gunichar *ucs4_result;
1.147 + gchar *utf8_result;
1.148 +
1.149 + ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error);
1.150 + if (!ucs4_result)
1.151 + {
1.152 + fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message);
1.153 + return;
1.154 + }
1.155 +
1.156 + if (!ucs4_equal (ucs4_result, ucs4) ||
1.157 + items_read != strlen (utf8) ||
1.158 + items_written != ucs4_len)
1.159 + {
1.160 + fail ("line %d: results of conversion to ucs4 do not match expected.\n", line);
1.161 + return;
1.162 + }
1.163 +
1.164 + g_free (ucs4_result);
1.165 +
1.166 + ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written);
1.167 +
1.168 + if (!ucs4_equal (ucs4_result, ucs4) ||
1.169 + items_written != ucs4_len)
1.170 + {
1.171 + fail ("line %d: results of conversion to ucs4 do not match expected.\n", line);
1.172 + return;
1.173 + }
1.174 +
1.175 + utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error);
1.176 + if (!utf8_result)
1.177 + {
1.178 + fail ("line %d: conversion back to utf8 failed: %s", line, error->message);
1.179 + return;
1.180 + }
1.181 +
1.182 + if (strcmp (utf8_result, utf8) != 0 ||
1.183 + items_read != ucs4_len ||
1.184 + items_written != strlen (utf8))
1.185 + {
1.186 + fail ("line %d: conversion back to utf8 did not match original\n", line);
1.187 + return;
1.188 + }
1.189 +
1.190 + g_free (utf8_result);
1.191 + g_free (ucs4_result);
1.192 + }
1.193 +
1.194 + if (status == VALID)
1.195 + {
1.196 + gunichar2 *utf16_expected_tmp;
1.197 + gunichar2 *utf16_expected;
1.198 + gunichar2 *utf16_from_utf8;
1.199 + gunichar2 *utf16_from_ucs4;
1.200 + gunichar *ucs4_result;
1.201 + gsize bytes_written;
1.202 + gint n_chars;
1.203 + gchar *utf8_result;
1.204 +
1.205 +#if defined(G_PLATFORM_WIN32) || defined(SYMBIAN)
1.206 +#define TARGET "UTF-16LE"
1.207 +#else
1.208 +#define TARGET "UTF-16"
1.209 +#endif
1.210 +
1.211 + if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8",
1.212 + NULL, &bytes_written, NULL)))
1.213 + {
1.214 + fail ("line %d: could not convert to UTF-16 via g_convert\n", line);
1.215 + return;
1.216 + }
1.217 +
1.218 + /* zero-terminate and remove BOM
1.219 + */
1.220 + n_chars = bytes_written / 2;
1.221 + if (utf16_expected_tmp[0] == 0xfeff) /* BOM */
1.222 + {
1.223 + n_chars--;
1.224 + utf16_expected = g_new (gunichar2, n_chars + 1);
1.225 + memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars);
1.226 + }
1.227 + else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */
1.228 + {
1.229 + fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line);
1.230 + return;
1.231 + }
1.232 + else
1.233 + {
1.234 + utf16_expected = g_new (gunichar2, n_chars + 1);
1.235 + memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars);
1.236 + }
1.237 +
1.238 + utf16_expected[n_chars] = '\0';
1.239 +
1.240 + if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error)))
1.241 + {
1.242 + fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
1.243 + return;
1.244 + }
1.245 +
1.246 + if (items_read != strlen (utf8) ||
1.247 + utf16_count (utf16_from_utf8) != items_written)
1.248 + {
1.249 + fail ("line %d: length error in conversion to ucs16\n", line);
1.250 + return;
1.251 + }
1.252 +
1.253 + if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error)))
1.254 + {
1.255 + fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
1.256 + return;
1.257 + }
1.258 +
1.259 + if (items_read != ucs4_len ||
1.260 + utf16_count (utf16_from_ucs4) != items_written)
1.261 + {
1.262 + fail ("line %d: length error in conversion to ucs16\n", line);
1.263 + return;
1.264 + }
1.265 +
1.266 + if (!utf16_equal (utf16_from_utf8, utf16_expected) ||
1.267 + !utf16_equal (utf16_from_ucs4, utf16_expected))
1.268 + {
1.269 + fail ("line %d: results of conversion to ucs16 do not match\n", line);
1.270 + return;
1.271 + }
1.272 +
1.273 + if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error)))
1.274 + {
1.275 + fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message);
1.276 + return;
1.277 + }
1.278 +
1.279 + if (items_read != utf16_count (utf16_from_utf8) ||
1.280 + items_written != strlen (utf8))
1.281 + {
1.282 + fail ("line %d: length error in conversion from ucs16 to utf8\n", line);
1.283 + return;
1.284 + }
1.285 +
1.286 + if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error)))
1.287 + {
1.288 + fail ("line %d: conversion back to utf8/ucs4 failed\n", line);
1.289 + return;
1.290 + }
1.291 +
1.292 + if (items_read != utf16_count (utf16_from_utf8) ||
1.293 + items_written != ucs4_len)
1.294 + {
1.295 + fail ("line %d: length error in conversion from ucs16 to ucs4\n", line);
1.296 + return;
1.297 + }
1.298 +
1.299 + if (strcmp (utf8, utf8_result) != 0 ||
1.300 + !ucs4_equal (ucs4, ucs4_result))
1.301 + {
1.302 + fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line);
1.303 + return;
1.304 + }
1.305 +
1.306 + g_free (utf16_expected_tmp);
1.307 + g_free (utf16_expected);
1.308 + g_free (utf16_from_utf8);
1.309 + g_free (utf16_from_ucs4);
1.310 + g_free (utf8_result);
1.311 + g_free (ucs4_result);
1.312 + }
1.313 +}
1.314 +
1.315 +int
1.316 +main (int argc, char **argv)
1.317 +{
1.318 + gchar *srcdir = getenv ("srcdir");
1.319 + gchar *testfile;
1.320 + gchar *contents;
1.321 + GError *error = NULL;
1.322 + gchar *p, *end;
1.323 + char *tmp;
1.324 + gint state = 0;
1.325 + gint line = 1;
1.326 + gint start_line = 0; /* Quiet GCC */
1.327 + gchar *utf8 = NULL; /* Quiet GCC */
1.328 + GArray *ucs4;
1.329 + Status status = VALID; /* Quiet GCC */
1.330 +
1.331 + #ifdef SYMBIAN
1.332 +
1.333 + g_log_set_handler (NULL, G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG, &mrtLogHandler, NULL);
1.334 + g_set_print_handler(mrtPrintHandler);
1.335 + #endif /*SYMBIAN*/
1.336 + if (!srcdir)
1.337 + srcdir = "c:";
1.338 +
1.339 + testfile = g_strconcat (srcdir, G_DIR_SEPARATOR_S "utf8.txt", NULL);
1.340 +
1.341 + g_file_get_contents (testfile, &contents, NULL, &error);
1.342 + if (error)
1.343 + {
1.344 + croak ("Cannot open utf8.txt: %s", error->message);
1.345 +
1.346 + #ifdef SYMBIAN
1.347 + testResultXml("unicode-encoding");
1.348 + #endif /* EMULATOR */
1.349 +
1.350 + exit(1);
1.351 + }
1.352 +
1.353 + ucs4 = g_array_new (TRUE, FALSE, sizeof(gunichar));
1.354 +
1.355 + p = contents;
1.356 +
1.357 + /* Loop over lines */
1.358 + while (*p)
1.359 + {
1.360 + while (*p && (*p == ' ' || *p == '\t'))
1.361 + p++;
1.362 +
1.363 + end = p;
1.364 + while (*end && (*end != '\r' && *end != '\n'))
1.365 + end++;
1.366 +
1.367 + if (!*p || *p == '#' || *p == '\r' || *p == '\n')
1.368 + goto next_line;
1.369 +
1.370 + tmp = g_strstrip (g_strndup (p, end - p));
1.371 +
1.372 + switch (state)
1.373 + {
1.374 + case 0:
1.375 + /* UTF-8 string */
1.376 + start_line = line;
1.377 + utf8 = tmp;
1.378 + tmp = NULL;
1.379 + break;
1.380 +
1.381 + case 1:
1.382 + /* Status */
1.383 + if (!strcmp (tmp, "VALID"))
1.384 + status = VALID;
1.385 + else if (!strcmp (tmp, "INCOMPLETE"))
1.386 + status = INCOMPLETE;
1.387 + else if (!strcmp (tmp, "NOTUNICODE"))
1.388 + status = NOTUNICODE;
1.389 + else if (!strcmp (tmp, "OVERLONG"))
1.390 + status = OVERLONG;
1.391 + else if (!strcmp (tmp, "MALFORMED"))
1.392 + status = MALFORMED;
1.393 + else
1.394 + croak ("Invalid status on line %d\n", line);
1.395 +
1.396 + if (status != VALID && status != NOTUNICODE)
1.397 + state++; /* No UCS-4 data */
1.398 +
1.399 + break;
1.400 +
1.401 + case 2:
1.402 + /* UCS-4 version */
1.403 +
1.404 + p = strtok (tmp, " \t");
1.405 + while (p)
1.406 + {
1.407 + gchar *endptr;
1.408 +
1.409 + gunichar ch = strtoul (p, &endptr, 16);
1.410 + if (*endptr != '\0')
1.411 + croak ("Invalid UCS-4 character on line %d\n", line);
1.412 +
1.413 + g_array_append_val (ucs4, ch);
1.414 +
1.415 + p = strtok (NULL, " \t");
1.416 + }
1.417 +
1.418 + break;
1.419 + }
1.420 +
1.421 + g_free (tmp);
1.422 + state = (state + 1) % 3;
1.423 +
1.424 + if (state == 0)
1.425 + {
1.426 + process (start_line, utf8, status, (gunichar *)ucs4->data, ucs4->len);
1.427 + g_array_set_size (ucs4, 0);
1.428 + g_free (utf8);
1.429 + }
1.430 +
1.431 + next_line:
1.432 + p = end;
1.433 + if (*p && *p == '\r')
1.434 + p++;
1.435 + if (*p && *p == '\n')
1.436 + p++;
1.437 +
1.438 + line++;
1.439 + }
1.440 +
1.441 + #ifdef SYMBIAN
1.442 + testResultXml("unicode-encoding");
1.443 + #endif /* EMULATOR */
1.444 + return exit_status;
1.445 +}