sl@0: /* Portion Copyright © 2008-09 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.*/ sl@0: #undef G_DISABLE_ASSERT sl@0: #undef G_LOG_DOMAIN sl@0: sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: #include "mrt2_glib2_test.h" sl@0: #endif /*__SYMBIAN32__*/ sl@0: static gint exit_status = 0; sl@0: sl@0: static void sl@0: croak (char *format, ...) sl@0: { sl@0: va_list va; sl@0: sl@0: va_start (va, format); sl@0: vfprintf (stderr, format, va); sl@0: va_end (va); sl@0: sl@0: exit (1); sl@0: } sl@0: sl@0: static void sl@0: fail (char *format, ...) sl@0: { sl@0: va_list va; sl@0: sl@0: va_start (va, format); sl@0: vfprintf (stderr, format, va); sl@0: va_end (va); sl@0: sl@0: exit_status |= 1; sl@0: } sl@0: sl@0: typedef enum sl@0: { sl@0: VALID, sl@0: INCOMPLETE, sl@0: NOTUNICODE, sl@0: OVERLONG, sl@0: MALFORMED sl@0: } Status; sl@0: sl@0: static gboolean sl@0: ucs4_equal (gunichar *a, gunichar *b) sl@0: { sl@0: while (*a && *b && (*a == *b)) sl@0: { sl@0: a++; sl@0: b++; sl@0: } sl@0: sl@0: return (*a == *b); sl@0: } sl@0: sl@0: static gboolean sl@0: utf16_equal (gunichar2 *a, gunichar2 *b) sl@0: { sl@0: while (*a && *b && (*a == *b)) sl@0: { sl@0: a++; sl@0: b++; sl@0: } sl@0: sl@0: return (*a == *b); sl@0: } sl@0: sl@0: static gint sl@0: utf16_count (gunichar2 *a) sl@0: { sl@0: gint result = 0; sl@0: sl@0: while (a[result]) sl@0: result++; sl@0: sl@0: return result; sl@0: } sl@0: sl@0: static void sl@0: process (gint line, sl@0: gchar *utf8, sl@0: Status status, sl@0: gunichar *ucs4, sl@0: gint ucs4_len) sl@0: { sl@0: const gchar *end; sl@0: gboolean is_valid = g_utf8_validate (utf8, -1, &end); sl@0: GError *error = NULL; sl@0: glong items_read, items_written; sl@0: sl@0: switch (status) sl@0: { sl@0: case VALID: sl@0: if (!is_valid) sl@0: { sl@0: fail ("line %d: valid but g_utf8_validate returned FALSE\n", line); sl@0: return; sl@0: } sl@0: break; sl@0: case NOTUNICODE: sl@0: case INCOMPLETE: sl@0: case OVERLONG: sl@0: case MALFORMED: sl@0: if (is_valid) sl@0: { sl@0: fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line); sl@0: return; sl@0: } sl@0: break; sl@0: } sl@0: sl@0: if (status == INCOMPLETE) sl@0: { sl@0: gunichar *ucs4_result; sl@0: sl@0: ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error); sl@0: sl@0: if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT)) sl@0: { sl@0: fail ("line %d: incomplete input not properly detected\n", line); sl@0: return; sl@0: } sl@0: g_clear_error (&error); sl@0: sl@0: ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error); sl@0: sl@0: if (!ucs4_result || items_read == strlen (utf8)) sl@0: { sl@0: fail ("line %d: incomplete input not properly detected\n", line); sl@0: return; sl@0: } sl@0: sl@0: g_free (ucs4_result); sl@0: } sl@0: sl@0: if (status == VALID || status == NOTUNICODE) sl@0: { sl@0: gunichar *ucs4_result; sl@0: gchar *utf8_result; sl@0: sl@0: ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error); sl@0: if (!ucs4_result) sl@0: { sl@0: fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message); sl@0: return; sl@0: } sl@0: sl@0: if (!ucs4_equal (ucs4_result, ucs4) || sl@0: items_read != strlen (utf8) || sl@0: items_written != ucs4_len) sl@0: { sl@0: fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); sl@0: return; sl@0: } sl@0: sl@0: g_free (ucs4_result); sl@0: sl@0: ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written); sl@0: sl@0: if (!ucs4_equal (ucs4_result, ucs4) || sl@0: items_written != ucs4_len) sl@0: { sl@0: fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); sl@0: return; sl@0: } sl@0: sl@0: utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error); sl@0: if (!utf8_result) sl@0: { sl@0: fail ("line %d: conversion back to utf8 failed: %s", line, error->message); sl@0: return; sl@0: } sl@0: sl@0: if (strcmp (utf8_result, utf8) != 0 || sl@0: items_read != ucs4_len || sl@0: items_written != strlen (utf8)) sl@0: { sl@0: fail ("line %d: conversion back to utf8 did not match original\n", line); sl@0: return; sl@0: } sl@0: sl@0: g_free (utf8_result); sl@0: g_free (ucs4_result); sl@0: } sl@0: sl@0: if (status == VALID) sl@0: { sl@0: gunichar2 *utf16_expected_tmp; sl@0: gunichar2 *utf16_expected; sl@0: gunichar2 *utf16_from_utf8; sl@0: gunichar2 *utf16_from_ucs4; sl@0: gunichar *ucs4_result; sl@0: gsize bytes_written; sl@0: gint n_chars; sl@0: gchar *utf8_result; sl@0: sl@0: #if defined(G_PLATFORM_WIN32) || defined(__SYMBIAN32__) sl@0: #define TARGET "UTF-16LE" sl@0: #else sl@0: #define TARGET "UTF-16" sl@0: #endif sl@0: sl@0: if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8", sl@0: NULL, &bytes_written, NULL))) sl@0: { sl@0: fail ("line %d: could not convert to UTF-16 via g_convert\n", line); sl@0: return; sl@0: } sl@0: sl@0: /* zero-terminate and remove BOM sl@0: */ sl@0: n_chars = bytes_written / 2; sl@0: if (utf16_expected_tmp[0] == 0xfeff) /* BOM */ sl@0: { sl@0: n_chars--; sl@0: utf16_expected = g_new (gunichar2, n_chars + 1); sl@0: memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars); sl@0: } sl@0: else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */ sl@0: { sl@0: fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line); sl@0: return; sl@0: } sl@0: else sl@0: { sl@0: utf16_expected = g_new (gunichar2, n_chars + 1); sl@0: memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars); sl@0: } sl@0: sl@0: utf16_expected[n_chars] = '\0'; sl@0: sl@0: if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error))) sl@0: { sl@0: fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); sl@0: return; sl@0: } sl@0: sl@0: if (items_read != strlen (utf8) || sl@0: utf16_count (utf16_from_utf8) != items_written) sl@0: { sl@0: fail ("line %d: length error in conversion to ucs16\n", line); sl@0: return; sl@0: } sl@0: sl@0: if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error))) sl@0: { sl@0: fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); sl@0: return; sl@0: } sl@0: sl@0: if (items_read != ucs4_len || sl@0: utf16_count (utf16_from_ucs4) != items_written) sl@0: { sl@0: fail ("line %d: length error in conversion to ucs16\n", line); sl@0: return; sl@0: } sl@0: sl@0: if (!utf16_equal (utf16_from_utf8, utf16_expected) || sl@0: !utf16_equal (utf16_from_ucs4, utf16_expected)) sl@0: { sl@0: fail ("line %d: results of conversion to ucs16 do not match\n", line); sl@0: return; sl@0: } sl@0: sl@0: if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error))) sl@0: { sl@0: fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message); sl@0: return; sl@0: } sl@0: sl@0: if (items_read != utf16_count (utf16_from_utf8) || sl@0: items_written != strlen (utf8)) sl@0: { sl@0: fail ("line %d: length error in conversion from ucs16 to utf8\n", line); sl@0: return; sl@0: } sl@0: sl@0: if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error))) sl@0: { sl@0: fail ("line %d: conversion back to utf8/ucs4 failed\n", line); sl@0: return; sl@0: } sl@0: sl@0: if (items_read != utf16_count (utf16_from_utf8) || sl@0: items_written != ucs4_len) sl@0: { sl@0: fail ("line %d: length error in conversion from ucs16 to ucs4\n", line); sl@0: return; sl@0: } sl@0: sl@0: if (strcmp (utf8, utf8_result) != 0 || sl@0: !ucs4_equal (ucs4, ucs4_result)) sl@0: { sl@0: fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line); sl@0: return; sl@0: } sl@0: sl@0: g_free (utf16_expected_tmp); sl@0: g_free (utf16_expected); sl@0: g_free (utf16_from_utf8); sl@0: g_free (utf16_from_ucs4); sl@0: g_free (utf8_result); sl@0: g_free (ucs4_result); sl@0: } sl@0: } sl@0: sl@0: int sl@0: main (int argc, char **argv) sl@0: { sl@0: gchar *srcdir = getenv ("srcdir"); sl@0: gchar *testfile; sl@0: gchar *contents; sl@0: GError *error = NULL; sl@0: gchar *p, *end; sl@0: char *tmp; sl@0: gint state = 0; sl@0: gint line = 1; sl@0: gint start_line = 0; /* Quiet GCC */ sl@0: gchar *utf8 = NULL; /* Quiet GCC */ sl@0: GArray *ucs4; sl@0: Status status = VALID; /* Quiet GCC */ sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: sl@0: g_log_set_handler (NULL, G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG, &mrtLogHandler, NULL); sl@0: g_set_print_handler(mrtPrintHandler); sl@0: #endif /*__SYMBIAN32__*/ sl@0: if (!srcdir) sl@0: srcdir = "c:"; sl@0: sl@0: testfile = g_strconcat (srcdir, G_DIR_SEPARATOR_S "utf8.txt", NULL); sl@0: sl@0: g_file_get_contents (testfile, &contents, NULL, &error); sl@0: if (error) sl@0: { sl@0: croak ("Cannot open utf8.txt: %s", error->message); sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: testResultXml("unicode-encoding"); sl@0: #endif /* EMULATOR */ sl@0: sl@0: exit(1); sl@0: } sl@0: sl@0: ucs4 = g_array_new (TRUE, FALSE, sizeof(gunichar)); sl@0: sl@0: p = contents; sl@0: sl@0: /* Loop over lines */ sl@0: while (*p) sl@0: { sl@0: while (*p && (*p == ' ' || *p == '\t')) sl@0: p++; sl@0: sl@0: end = p; sl@0: while (*end && (*end != '\r' && *end != '\n')) sl@0: end++; sl@0: sl@0: if (!*p || *p == '#' || *p == '\r' || *p == '\n') sl@0: goto next_line; sl@0: sl@0: tmp = g_strstrip (g_strndup (p, end - p)); sl@0: sl@0: switch (state) sl@0: { sl@0: case 0: sl@0: /* UTF-8 string */ sl@0: start_line = line; sl@0: utf8 = tmp; sl@0: tmp = NULL; sl@0: break; sl@0: sl@0: case 1: sl@0: /* Status */ sl@0: if (!strcmp (tmp, "VALID")) sl@0: status = VALID; sl@0: else if (!strcmp (tmp, "INCOMPLETE")) sl@0: status = INCOMPLETE; sl@0: else if (!strcmp (tmp, "NOTUNICODE")) sl@0: status = NOTUNICODE; sl@0: else if (!strcmp (tmp, "OVERLONG")) sl@0: status = OVERLONG; sl@0: else if (!strcmp (tmp, "MALFORMED")) sl@0: status = MALFORMED; sl@0: else sl@0: croak ("Invalid status on line %d\n", line); sl@0: sl@0: if (status != VALID && status != NOTUNICODE) sl@0: state++; /* No UCS-4 data */ sl@0: sl@0: break; sl@0: sl@0: case 2: sl@0: /* UCS-4 version */ sl@0: sl@0: p = strtok (tmp, " \t"); sl@0: while (p) sl@0: { sl@0: gchar *endptr; sl@0: sl@0: gunichar ch = strtoul (p, &endptr, 16); sl@0: if (*endptr != '\0') sl@0: croak ("Invalid UCS-4 character on line %d\n", line); sl@0: sl@0: g_array_append_val (ucs4, ch); sl@0: sl@0: p = strtok (NULL, " \t"); sl@0: } sl@0: sl@0: break; sl@0: } sl@0: sl@0: g_free (tmp); sl@0: state = (state + 1) % 3; sl@0: sl@0: if (state == 0) sl@0: { sl@0: process (start_line, utf8, status, (gunichar *)ucs4->data, ucs4->len); sl@0: g_array_set_size (ucs4, 0); sl@0: g_free (utf8); sl@0: } sl@0: sl@0: next_line: sl@0: p = end; sl@0: if (*p && *p == '\r') sl@0: p++; sl@0: if (*p && *p == '\n') sl@0: p++; sl@0: sl@0: line++; sl@0: } sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: testResultXml("unicode-encoding"); sl@0: #endif /* EMULATOR */ sl@0: return exit_status; sl@0: }