sl@0: /* GLIB - Library of useful routines for C programming sl@0: * Copyright (C) 2001 Matthias Clasen sl@0: * Portion Copyright © 2008-09 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. sl@0: * This library is free software; you can redistribute it and/or sl@0: * modify it under the terms of the GNU Lesser General Public sl@0: * License as published by the Free Software Foundation; either sl@0: * version 2 of the License, or (at your option) any later version. sl@0: * sl@0: * This library is distributed in the hope that it will be useful, sl@0: * but WITHOUT ANY WARRANTY; without even the implied warranty of sl@0: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU sl@0: * Lesser General Public License for more details. sl@0: * sl@0: * You should have received a copy of the GNU Lesser General Public sl@0: * License along with this library; if not, write to the sl@0: * Free Software Foundation, Inc., 59 Temple Place - Suite 330, sl@0: * Boston, MA 02111-1307, USA. sl@0: */ sl@0: sl@0: #include "glib.h" sl@0: #ifdef __SYMBIAN32__ sl@0: #include "mrt2_glib2_test.h" sl@0: #endif /*__SYMBIAN32__*/ sl@0: sl@0: #define UNICODE_VALID(Char) \ sl@0: ((Char) < 0x110000 && \ sl@0: (((Char) & 0xFFFFF800) != 0xD800) && \ sl@0: ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \ sl@0: ((Char) & 0xFFFE) != 0xFFFE) sl@0: sl@0: sl@0: sl@0: static gboolean any_failed = FALSE; sl@0: sl@0: struct { sl@0: const gchar *text; sl@0: gint max_len; sl@0: gint offset; sl@0: gboolean valid; sl@0: } test[] = { sl@0: /* some tests to check max_len handling */ sl@0: /* length 1 */ sl@0: { "abcde", -1, 5, TRUE }, sl@0: { "abcde", 3, 3, TRUE }, sl@0: { "abcde", 5, 5, TRUE }, sl@0: { "abcde", 7, 5, FALSE }, sl@0: /* length 2 */ sl@0: { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE }, sl@0: { "\xc2\xa9\xc2\xa9\xc2\xa9", 1, 0, FALSE }, sl@0: { "\xc2\xa9\xc2\xa9\xc2\xa9", 2, 2, TRUE }, sl@0: { "\xc2\xa9\xc2\xa9\xc2\xa9", 3, 2, FALSE }, sl@0: { "\xc2\xa9\xc2\xa9\xc2\xa9", 4, 4, TRUE }, sl@0: { "\xc2\xa9\xc2\xa9\xc2\xa9", 5, 4, FALSE }, sl@0: { "\xc2\xa9\xc2\xa9\xc2\xa9", 6, 6, TRUE }, sl@0: { "\xc2\xa9\xc2\xa9\xc2\xa9", 7, 6, FALSE }, sl@0: /* length 3 */ sl@0: { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE }, sl@0: { "\xe2\x89\xa0\xe2\x89\xa0", 1, 0, FALSE }, sl@0: { "\xe2\x89\xa0\xe2\x89\xa0", 2, 0, FALSE }, sl@0: { "\xe2\x89\xa0\xe2\x89\xa0", 3, 3, TRUE }, sl@0: { "\xe2\x89\xa0\xe2\x89\xa0", 4, 3, FALSE }, sl@0: { "\xe2\x89\xa0\xe2\x89\xa0", 5, 3, FALSE }, sl@0: { "\xe2\x89\xa0\xe2\x89\xa0", 6, 6, TRUE }, sl@0: { "\xe2\x89\xa0\xe2\x89\xa0", 7, 6, FALSE }, sl@0: sl@0: /* examples from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */ sl@0: /* greek 'kosme' */ sl@0: { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE }, sl@0: /* first sequence of each length */ sl@0: { "\x00", -1, 0, TRUE }, sl@0: { "\xc2\x80", -1, 2, TRUE }, sl@0: { "\xe0\xa0\x80", -1, 3, TRUE }, sl@0: { "\xf0\x90\x80\x80", -1, 4, TRUE }, sl@0: { "\xf8\x88\x80\x80\x80", -1, 0, FALSE }, sl@0: { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE }, sl@0: /* last sequence of each length */ sl@0: { "\x7f", -1, 1, TRUE }, sl@0: { "\xdf\xbf", -1, 2, TRUE }, sl@0: { "\xef\xbf\xbf", -1, 0, FALSE }, sl@0: { "\xf7\xbf\xbf\xbf", -1, 0, FALSE }, sl@0: { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE }, sl@0: { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE }, sl@0: /* other boundary conditions */ sl@0: { "\xed\x9f\xbf", -1, 3, TRUE }, sl@0: { "\xee\x80\x80", -1, 3, TRUE }, sl@0: { "\xef\xbf\xbd", -1, 3, TRUE }, sl@0: { "\xf4\x8f\xbf\xbf", -1, 0, FALSE }, sl@0: { "\xf4\x90\x80\x80", -1, 0, FALSE }, sl@0: /* malformed sequences */ sl@0: /* continuation bytes */ sl@0: { "\x80", -1, 0, FALSE }, sl@0: { "\xbf", -1, 0, FALSE }, sl@0: { "\x80\xbf", -1, 0, FALSE }, sl@0: { "\x80\xbf\x80", -1, 0, FALSE }, sl@0: { "\x80\xbf\x80\xbf", -1, 0, FALSE }, sl@0: { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE }, sl@0: { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE }, sl@0: { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE }, sl@0: sl@0: /* all possible continuation byte */ sl@0: { "\x80", -1, 0, FALSE }, sl@0: { "\x81", -1, 0, FALSE }, sl@0: { "\x82", -1, 0, FALSE }, sl@0: { "\x83", -1, 0, FALSE }, sl@0: { "\x84", -1, 0, FALSE }, sl@0: { "\x85", -1, 0, FALSE }, sl@0: { "\x86", -1, 0, FALSE }, sl@0: { "\x87", -1, 0, FALSE }, sl@0: { "\x88", -1, 0, FALSE }, sl@0: { "\x89", -1, 0, FALSE }, sl@0: { "\x8a", -1, 0, FALSE }, sl@0: { "\x8b", -1, 0, FALSE }, sl@0: { "\x8c", -1, 0, FALSE }, sl@0: { "\x8d", -1, 0, FALSE }, sl@0: { "\x8e", -1, 0, FALSE }, sl@0: { "\x8f", -1, 0, FALSE }, sl@0: { "\x90", -1, 0, FALSE }, sl@0: { "\x91", -1, 0, FALSE }, sl@0: { "\x92", -1, 0, FALSE }, sl@0: { "\x93", -1, 0, FALSE }, sl@0: { "\x94", -1, 0, FALSE }, sl@0: { "\x95", -1, 0, FALSE }, sl@0: { "\x96", -1, 0, FALSE }, sl@0: { "\x97", -1, 0, FALSE }, sl@0: { "\x98", -1, 0, FALSE }, sl@0: { "\x99", -1, 0, FALSE }, sl@0: { "\x9a", -1, 0, FALSE }, sl@0: { "\x9b", -1, 0, FALSE }, sl@0: { "\x9c", -1, 0, FALSE }, sl@0: { "\x9d", -1, 0, FALSE }, sl@0: { "\x9e", -1, 0, FALSE }, sl@0: { "\x9f", -1, 0, FALSE }, sl@0: { "\xa0", -1, 0, FALSE }, sl@0: { "\xa1", -1, 0, FALSE }, sl@0: { "\xa2", -1, 0, FALSE }, sl@0: { "\xa3", -1, 0, FALSE }, sl@0: { "\xa4", -1, 0, FALSE }, sl@0: { "\xa5", -1, 0, FALSE }, sl@0: { "\xa6", -1, 0, FALSE }, sl@0: { "\xa7", -1, 0, FALSE }, sl@0: { "\xa8", -1, 0, FALSE }, sl@0: { "\xa9", -1, 0, FALSE }, sl@0: { "\xaa", -1, 0, FALSE }, sl@0: { "\xab", -1, 0, FALSE }, sl@0: { "\xac", -1, 0, FALSE }, sl@0: { "\xad", -1, 0, FALSE }, sl@0: { "\xae", -1, 0, FALSE }, sl@0: { "\xaf", -1, 0, FALSE }, sl@0: { "\xb0", -1, 0, FALSE }, sl@0: { "\xb1", -1, 0, FALSE }, sl@0: { "\xb2", -1, 0, FALSE }, sl@0: { "\xb3", -1, 0, FALSE }, sl@0: { "\xb4", -1, 0, FALSE }, sl@0: { "\xb5", -1, 0, FALSE }, sl@0: { "\xb6", -1, 0, FALSE }, sl@0: { "\xb7", -1, 0, FALSE }, sl@0: { "\xb8", -1, 0, FALSE }, sl@0: { "\xb9", -1, 0, FALSE }, sl@0: { "\xba", -1, 0, FALSE }, sl@0: { "\xbb", -1, 0, FALSE }, sl@0: { "\xbc", -1, 0, FALSE }, sl@0: { "\xbd", -1, 0, FALSE }, sl@0: { "\xbe", -1, 0, FALSE }, sl@0: { "\xbf", -1, 0, FALSE }, sl@0: /* lone start characters */ sl@0: { "\xc0\x20", -1, 0, FALSE }, sl@0: { "\xc1\x20", -1, 0, FALSE }, sl@0: { "\xc2\x20", -1, 0, FALSE }, sl@0: { "\xc3\x20", -1, 0, FALSE }, sl@0: { "\xc4\x20", -1, 0, FALSE }, sl@0: { "\xc5\x20", -1, 0, FALSE }, sl@0: { "\xc6\x20", -1, 0, FALSE }, sl@0: { "\xc7\x20", -1, 0, FALSE }, sl@0: { "\xc8\x20", -1, 0, FALSE }, sl@0: { "\xc9\x20", -1, 0, FALSE }, sl@0: { "\xca\x20", -1, 0, FALSE }, sl@0: { "\xcb\x20", -1, 0, FALSE }, sl@0: { "\xcc\x20", -1, 0, FALSE }, sl@0: { "\xcd\x20", -1, 0, FALSE }, sl@0: { "\xce\x20", -1, 0, FALSE }, sl@0: { "\xcf\x20", -1, 0, FALSE }, sl@0: { "\xd0\x20", -1, 0, FALSE }, sl@0: { "\xd1\x20", -1, 0, FALSE }, sl@0: { "\xd2\x20", -1, 0, FALSE }, sl@0: { "\xd3\x20", -1, 0, FALSE }, sl@0: { "\xd4\x20", -1, 0, FALSE }, sl@0: { "\xd5\x20", -1, 0, FALSE }, sl@0: { "\xd6\x20", -1, 0, FALSE }, sl@0: { "\xd7\x20", -1, 0, FALSE }, sl@0: { "\xd8\x20", -1, 0, FALSE }, sl@0: { "\xd9\x20", -1, 0, FALSE }, sl@0: { "\xda\x20", -1, 0, FALSE }, sl@0: { "\xdb\x20", -1, 0, FALSE }, sl@0: { "\xdc\x20", -1, 0, FALSE }, sl@0: { "\xdd\x20", -1, 0, FALSE }, sl@0: { "\xde\x20", -1, 0, FALSE }, sl@0: { "\xdf\x20", -1, 0, FALSE }, sl@0: { "\xe0\x20", -1, 0, FALSE }, sl@0: { "\xe1\x20", -1, 0, FALSE }, sl@0: { "\xe2\x20", -1, 0, FALSE }, sl@0: { "\xe3\x20", -1, 0, FALSE }, sl@0: { "\xe4\x20", -1, 0, FALSE }, sl@0: { "\xe5\x20", -1, 0, FALSE }, sl@0: { "\xe6\x20", -1, 0, FALSE }, sl@0: { "\xe7\x20", -1, 0, FALSE }, sl@0: { "\xe8\x20", -1, 0, FALSE }, sl@0: { "\xe9\x20", -1, 0, FALSE }, sl@0: { "\xea\x20", -1, 0, FALSE }, sl@0: { "\xeb\x20", -1, 0, FALSE }, sl@0: { "\xec\x20", -1, 0, FALSE }, sl@0: { "\xed\x20", -1, 0, FALSE }, sl@0: { "\xee\x20", -1, 0, FALSE }, sl@0: { "\xef\x20", -1, 0, FALSE }, sl@0: { "\xf0\x20", -1, 0, FALSE }, sl@0: { "\xf1\x20", -1, 0, FALSE }, sl@0: { "\xf2\x20", -1, 0, FALSE }, sl@0: { "\xf3\x20", -1, 0, FALSE }, sl@0: { "\xf4\x20", -1, 0, FALSE }, sl@0: { "\xf5\x20", -1, 0, FALSE }, sl@0: { "\xf6\x20", -1, 0, FALSE }, sl@0: { "\xf7\x20", -1, 0, FALSE }, sl@0: { "\xf8\x20", -1, 0, FALSE }, sl@0: { "\xf9\x20", -1, 0, FALSE }, sl@0: { "\xfa\x20", -1, 0, FALSE }, sl@0: { "\xfb\x20", -1, 0, FALSE }, sl@0: { "\xfc\x20", -1, 0, FALSE }, sl@0: { "\xfd\x20", -1, 0, FALSE }, sl@0: /* missing continuation bytes */ sl@0: { "\x20\xc0", -1, 1, FALSE }, sl@0: { "\x20\xe0\x80", -1, 1, FALSE }, sl@0: { "\x20\xf0\x80\x80", -1, 1, FALSE }, sl@0: { "\x20\xf8\x80\x80\x80", -1, 1, FALSE }, sl@0: { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE }, sl@0: { "\x20\xdf", -1, 1, FALSE }, sl@0: { "\x20\xef\xbf", -1, 1, FALSE }, sl@0: { "\x20\xf7\xbf\xbf", -1, 1, FALSE }, sl@0: { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE }, sl@0: { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE }, sl@0: /* impossible bytes */ sl@0: { "\x20\xfe\x20", -1, 1, FALSE }, sl@0: { "\x20\xff\x20", -1, 1, FALSE }, sl@0: /* overlong sequences */ sl@0: { "\x20\xc0\xaf\x20", -1, 1, FALSE }, sl@0: { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE }, sl@0: { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE }, sl@0: { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE }, sl@0: { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE }, sl@0: { "\x20\xc1\xbf\x20", -1, 1, FALSE }, sl@0: { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE }, sl@0: { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE }, sl@0: { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE }, sl@0: { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE }, sl@0: { "\x20\xc0\x80\x20", -1, 1, FALSE }, sl@0: { "\x20\xe0\x80\x80\x20", -1, 1, FALSE }, sl@0: { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE }, sl@0: { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE }, sl@0: { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE }, sl@0: /* illegal code positions */ sl@0: { "\x20\xed\xa0\x80\x20", -1, 1, FALSE }, sl@0: { "\x20\xed\xad\xbf\x20", -1, 1, FALSE }, sl@0: { "\x20\xed\xae\x80\x20", -1, 1, FALSE }, sl@0: { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE }, sl@0: { "\x20\xed\xb0\x80\x20", -1, 1, FALSE }, sl@0: { "\x20\xed\xbe\x80\x20", -1, 1, FALSE }, sl@0: { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE }, sl@0: { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE }, sl@0: { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE }, sl@0: { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE }, sl@0: { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE }, sl@0: { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE }, sl@0: { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE }, sl@0: { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE }, sl@0: { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE }, sl@0: { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE }, sl@0: { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE }, sl@0: sl@0: { NULL, } sl@0: }; sl@0: sl@0: static void sl@0: do_test (gint index, sl@0: const gchar *text, sl@0: gint max_len, sl@0: gint offset, sl@0: gboolean valid) sl@0: { sl@0: const gchar *end; sl@0: gboolean result; sl@0: sl@0: result = g_utf8_validate (text, max_len, &end); sl@0: sl@0: if (result != valid || end - text != offset) sl@0: { sl@0: GString *str; sl@0: const gchar *p; sl@0: sl@0: any_failed = TRUE; sl@0: sl@0: str = g_string_new (0); sl@0: for (p = text; *p; p++) sl@0: g_string_append_printf (str, "\\x%02hhx", *p); sl@0: g_print ("%d: g_utf8_validate (\"%s\", %d) failed, " sl@0: "expected %s %d, got %s %d\n", sl@0: index, sl@0: str->str, max_len, sl@0: valid ? "TRUE" : "FALSE", offset, sl@0: result ? "TRUE" : "FALSE", (gint) (end - text)); sl@0: g_string_free (str, FALSE); sl@0: } sl@0: } sl@0: sl@0: int sl@0: main (int argc, char *argv[]) sl@0: { sl@0: gint i; sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: g_log_set_handler (NULL, G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG, &mrtLogHandler, NULL); sl@0: g_set_print_handler(mrtPrintHandler); sl@0: #endif /*__SYMBIAN32__*/ sl@0: sl@0: sl@0: for (i = 0; test[i].text; i++) sl@0: do_test (i, test[i].text, test[i].max_len, sl@0: test[i].offset, test[i].valid); sl@0: sl@0: #ifdef __SYMBIAN32__ sl@0: assert_failed = any_failed; sl@0: testResultXml("utf8-validate"); sl@0: #endif /* EMULATOR */ sl@0: sl@0: return any_failed ? 1 : 0; sl@0: }