1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/glib/tsrc/BC/tests/utf8-validate.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,336 @@
1.4 +/* GLIB - Library of useful routines for C programming
1.5 + * Copyright (C) 2001 Matthias Clasen <matthiasc@poet.de>
1.6 + * Portion Copyright © 2008-09 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
1.7 + * This library is free software; you can redistribute it and/or
1.8 + * modify it under the terms of the GNU Lesser General Public
1.9 + * License as published by the Free Software Foundation; either
1.10 + * version 2 of the License, or (at your option) any later version.
1.11 + *
1.12 + * This library is distributed in the hope that it will be useful,
1.13 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1.14 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1.15 + * Lesser General Public License for more details.
1.16 + *
1.17 + * You should have received a copy of the GNU Lesser General Public
1.18 + * License along with this library; if not, write to the
1.19 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
1.20 + * Boston, MA 02111-1307, USA.
1.21 + */
1.22 +
1.23 +#include "glib.h"
1.24 +#include <stdio.h>
1.25 +
1.26 +#ifdef SYMBIAN
1.27 +#include "mrt2_glib2_test.h"
1.28 +#endif /*SYMBIAN*/
1.29 +
1.30 +#define UNICODE_VALID(Char) \
1.31 + ((Char) < 0x110000 && \
1.32 + (((Char) & 0xFFFFF800) != 0xD800) && \
1.33 + ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \
1.34 + ((Char) & 0xFFFE) != 0xFFFE)
1.35 +
1.36 +
1.37 +
1.38 +static gboolean any_failed = FALSE;
1.39 +
1.40 +struct {
1.41 + const gchar *text;
1.42 + gint max_len;
1.43 + gint offset;
1.44 + gboolean valid;
1.45 +} test[] = {
1.46 + /* some tests to check max_len handling */
1.47 + /* length 1 */
1.48 + { "abcde", -1, 5, TRUE },
1.49 + { "abcde", 3, 3, TRUE },
1.50 + { "abcde", 5, 5, TRUE },
1.51 + { "abcde", 7, 5, FALSE },
1.52 + /* length 2 */
1.53 + { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE },
1.54 + { "\xc2\xa9\xc2\xa9\xc2\xa9", 1, 0, FALSE },
1.55 + { "\xc2\xa9\xc2\xa9\xc2\xa9", 2, 2, TRUE },
1.56 + { "\xc2\xa9\xc2\xa9\xc2\xa9", 3, 2, FALSE },
1.57 + { "\xc2\xa9\xc2\xa9\xc2\xa9", 4, 4, TRUE },
1.58 + { "\xc2\xa9\xc2\xa9\xc2\xa9", 5, 4, FALSE },
1.59 + { "\xc2\xa9\xc2\xa9\xc2\xa9", 6, 6, TRUE },
1.60 + { "\xc2\xa9\xc2\xa9\xc2\xa9", 7, 6, FALSE },
1.61 + /* length 3 */
1.62 + { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE },
1.63 + { "\xe2\x89\xa0\xe2\x89\xa0", 1, 0, FALSE },
1.64 + { "\xe2\x89\xa0\xe2\x89\xa0", 2, 0, FALSE },
1.65 + { "\xe2\x89\xa0\xe2\x89\xa0", 3, 3, TRUE },
1.66 + { "\xe2\x89\xa0\xe2\x89\xa0", 4, 3, FALSE },
1.67 + { "\xe2\x89\xa0\xe2\x89\xa0", 5, 3, FALSE },
1.68 + { "\xe2\x89\xa0\xe2\x89\xa0", 6, 6, TRUE },
1.69 + { "\xe2\x89\xa0\xe2\x89\xa0", 7, 6, FALSE },
1.70 +
1.71 + /* examples from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
1.72 + /* greek 'kosme' */
1.73 + { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
1.74 + /* first sequence of each length */
1.75 + { "\x00", -1, 0, TRUE },
1.76 + { "\xc2\x80", -1, 2, TRUE },
1.77 + { "\xe0\xa0\x80", -1, 3, TRUE },
1.78 + { "\xf0\x90\x80\x80", -1, 4, TRUE },
1.79 + { "\xf8\x88\x80\x80\x80", -1, 0, FALSE },
1.80 + { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE },
1.81 + /* last sequence of each length */
1.82 + { "\x7f", -1, 1, TRUE },
1.83 + { "\xdf\xbf", -1, 2, TRUE },
1.84 + { "\xef\xbf\xbf", -1, 0, FALSE },
1.85 + { "\xf7\xbf\xbf\xbf", -1, 0, FALSE },
1.86 + { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
1.87 + { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
1.88 + /* other boundary conditions */
1.89 + { "\xed\x9f\xbf", -1, 3, TRUE },
1.90 + { "\xee\x80\x80", -1, 3, TRUE },
1.91 + { "\xef\xbf\xbd", -1, 3, TRUE },
1.92 + { "\xf4\x8f\xbf\xbf", -1, 0, FALSE },
1.93 + { "\xf4\x90\x80\x80", -1, 0, FALSE },
1.94 + /* malformed sequences */
1.95 + /* continuation bytes */
1.96 + { "\x80", -1, 0, FALSE },
1.97 + { "\xbf", -1, 0, FALSE },
1.98 + { "\x80\xbf", -1, 0, FALSE },
1.99 + { "\x80\xbf\x80", -1, 0, FALSE },
1.100 + { "\x80\xbf\x80\xbf", -1, 0, FALSE },
1.101 + { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
1.102 + { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE },
1.103 + { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
1.104 +
1.105 + /* all possible continuation byte */
1.106 + { "\x80", -1, 0, FALSE },
1.107 + { "\x81", -1, 0, FALSE },
1.108 + { "\x82", -1, 0, FALSE },
1.109 + { "\x83", -1, 0, FALSE },
1.110 + { "\x84", -1, 0, FALSE },
1.111 + { "\x85", -1, 0, FALSE },
1.112 + { "\x86", -1, 0, FALSE },
1.113 + { "\x87", -1, 0, FALSE },
1.114 + { "\x88", -1, 0, FALSE },
1.115 + { "\x89", -1, 0, FALSE },
1.116 + { "\x8a", -1, 0, FALSE },
1.117 + { "\x8b", -1, 0, FALSE },
1.118 + { "\x8c", -1, 0, FALSE },
1.119 + { "\x8d", -1, 0, FALSE },
1.120 + { "\x8e", -1, 0, FALSE },
1.121 + { "\x8f", -1, 0, FALSE },
1.122 + { "\x90", -1, 0, FALSE },
1.123 + { "\x91", -1, 0, FALSE },
1.124 + { "\x92", -1, 0, FALSE },
1.125 + { "\x93", -1, 0, FALSE },
1.126 + { "\x94", -1, 0, FALSE },
1.127 + { "\x95", -1, 0, FALSE },
1.128 + { "\x96", -1, 0, FALSE },
1.129 + { "\x97", -1, 0, FALSE },
1.130 + { "\x98", -1, 0, FALSE },
1.131 + { "\x99", -1, 0, FALSE },
1.132 + { "\x9a", -1, 0, FALSE },
1.133 + { "\x9b", -1, 0, FALSE },
1.134 + { "\x9c", -1, 0, FALSE },
1.135 + { "\x9d", -1, 0, FALSE },
1.136 + { "\x9e", -1, 0, FALSE },
1.137 + { "\x9f", -1, 0, FALSE },
1.138 + { "\xa0", -1, 0, FALSE },
1.139 + { "\xa1", -1, 0, FALSE },
1.140 + { "\xa2", -1, 0, FALSE },
1.141 + { "\xa3", -1, 0, FALSE },
1.142 + { "\xa4", -1, 0, FALSE },
1.143 + { "\xa5", -1, 0, FALSE },
1.144 + { "\xa6", -1, 0, FALSE },
1.145 + { "\xa7", -1, 0, FALSE },
1.146 + { "\xa8", -1, 0, FALSE },
1.147 + { "\xa9", -1, 0, FALSE },
1.148 + { "\xaa", -1, 0, FALSE },
1.149 + { "\xab", -1, 0, FALSE },
1.150 + { "\xac", -1, 0, FALSE },
1.151 + { "\xad", -1, 0, FALSE },
1.152 + { "\xae", -1, 0, FALSE },
1.153 + { "\xaf", -1, 0, FALSE },
1.154 + { "\xb0", -1, 0, FALSE },
1.155 + { "\xb1", -1, 0, FALSE },
1.156 + { "\xb2", -1, 0, FALSE },
1.157 + { "\xb3", -1, 0, FALSE },
1.158 + { "\xb4", -1, 0, FALSE },
1.159 + { "\xb5", -1, 0, FALSE },
1.160 + { "\xb6", -1, 0, FALSE },
1.161 + { "\xb7", -1, 0, FALSE },
1.162 + { "\xb8", -1, 0, FALSE },
1.163 + { "\xb9", -1, 0, FALSE },
1.164 + { "\xba", -1, 0, FALSE },
1.165 + { "\xbb", -1, 0, FALSE },
1.166 + { "\xbc", -1, 0, FALSE },
1.167 + { "\xbd", -1, 0, FALSE },
1.168 + { "\xbe", -1, 0, FALSE },
1.169 + { "\xbf", -1, 0, FALSE },
1.170 + /* lone start characters */
1.171 + { "\xc0\x20", -1, 0, FALSE },
1.172 + { "\xc1\x20", -1, 0, FALSE },
1.173 + { "\xc2\x20", -1, 0, FALSE },
1.174 + { "\xc3\x20", -1, 0, FALSE },
1.175 + { "\xc4\x20", -1, 0, FALSE },
1.176 + { "\xc5\x20", -1, 0, FALSE },
1.177 + { "\xc6\x20", -1, 0, FALSE },
1.178 + { "\xc7\x20", -1, 0, FALSE },
1.179 + { "\xc8\x20", -1, 0, FALSE },
1.180 + { "\xc9\x20", -1, 0, FALSE },
1.181 + { "\xca\x20", -1, 0, FALSE },
1.182 + { "\xcb\x20", -1, 0, FALSE },
1.183 + { "\xcc\x20", -1, 0, FALSE },
1.184 + { "\xcd\x20", -1, 0, FALSE },
1.185 + { "\xce\x20", -1, 0, FALSE },
1.186 + { "\xcf\x20", -1, 0, FALSE },
1.187 + { "\xd0\x20", -1, 0, FALSE },
1.188 + { "\xd1\x20", -1, 0, FALSE },
1.189 + { "\xd2\x20", -1, 0, FALSE },
1.190 + { "\xd3\x20", -1, 0, FALSE },
1.191 + { "\xd4\x20", -1, 0, FALSE },
1.192 + { "\xd5\x20", -1, 0, FALSE },
1.193 + { "\xd6\x20", -1, 0, FALSE },
1.194 + { "\xd7\x20", -1, 0, FALSE },
1.195 + { "\xd8\x20", -1, 0, FALSE },
1.196 + { "\xd9\x20", -1, 0, FALSE },
1.197 + { "\xda\x20", -1, 0, FALSE },
1.198 + { "\xdb\x20", -1, 0, FALSE },
1.199 + { "\xdc\x20", -1, 0, FALSE },
1.200 + { "\xdd\x20", -1, 0, FALSE },
1.201 + { "\xde\x20", -1, 0, FALSE },
1.202 + { "\xdf\x20", -1, 0, FALSE },
1.203 + { "\xe0\x20", -1, 0, FALSE },
1.204 + { "\xe1\x20", -1, 0, FALSE },
1.205 + { "\xe2\x20", -1, 0, FALSE },
1.206 + { "\xe3\x20", -1, 0, FALSE },
1.207 + { "\xe4\x20", -1, 0, FALSE },
1.208 + { "\xe5\x20", -1, 0, FALSE },
1.209 + { "\xe6\x20", -1, 0, FALSE },
1.210 + { "\xe7\x20", -1, 0, FALSE },
1.211 + { "\xe8\x20", -1, 0, FALSE },
1.212 + { "\xe9\x20", -1, 0, FALSE },
1.213 + { "\xea\x20", -1, 0, FALSE },
1.214 + { "\xeb\x20", -1, 0, FALSE },
1.215 + { "\xec\x20", -1, 0, FALSE },
1.216 + { "\xed\x20", -1, 0, FALSE },
1.217 + { "\xee\x20", -1, 0, FALSE },
1.218 + { "\xef\x20", -1, 0, FALSE },
1.219 + { "\xf0\x20", -1, 0, FALSE },
1.220 + { "\xf1\x20", -1, 0, FALSE },
1.221 + { "\xf2\x20", -1, 0, FALSE },
1.222 + { "\xf3\x20", -1, 0, FALSE },
1.223 + { "\xf4\x20", -1, 0, FALSE },
1.224 + { "\xf5\x20", -1, 0, FALSE },
1.225 + { "\xf6\x20", -1, 0, FALSE },
1.226 + { "\xf7\x20", -1, 0, FALSE },
1.227 + { "\xf8\x20", -1, 0, FALSE },
1.228 + { "\xf9\x20", -1, 0, FALSE },
1.229 + { "\xfa\x20", -1, 0, FALSE },
1.230 + { "\xfb\x20", -1, 0, FALSE },
1.231 + { "\xfc\x20", -1, 0, FALSE },
1.232 + { "\xfd\x20", -1, 0, FALSE },
1.233 + /* missing continuation bytes */
1.234 + { "\x20\xc0", -1, 1, FALSE },
1.235 + { "\x20\xe0\x80", -1, 1, FALSE },
1.236 + { "\x20\xf0\x80\x80", -1, 1, FALSE },
1.237 + { "\x20\xf8\x80\x80\x80", -1, 1, FALSE },
1.238 + { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE },
1.239 + { "\x20\xdf", -1, 1, FALSE },
1.240 + { "\x20\xef\xbf", -1, 1, FALSE },
1.241 + { "\x20\xf7\xbf\xbf", -1, 1, FALSE },
1.242 + { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE },
1.243 + { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE },
1.244 + /* impossible bytes */
1.245 + { "\x20\xfe\x20", -1, 1, FALSE },
1.246 + { "\x20\xff\x20", -1, 1, FALSE },
1.247 + /* overlong sequences */
1.248 + { "\x20\xc0\xaf\x20", -1, 1, FALSE },
1.249 + { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
1.250 + { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE },
1.251 + { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE },
1.252 + { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE },
1.253 + { "\x20\xc1\xbf\x20", -1, 1, FALSE },
1.254 + { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE },
1.255 + { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE },
1.256 + { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE },
1.257 + { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE },
1.258 + { "\x20\xc0\x80\x20", -1, 1, FALSE },
1.259 + { "\x20\xe0\x80\x80\x20", -1, 1, FALSE },
1.260 + { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE },
1.261 + { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE },
1.262 + { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE },
1.263 + /* illegal code positions */
1.264 + { "\x20\xed\xa0\x80\x20", -1, 1, FALSE },
1.265 + { "\x20\xed\xad\xbf\x20", -1, 1, FALSE },
1.266 + { "\x20\xed\xae\x80\x20", -1, 1, FALSE },
1.267 + { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE },
1.268 + { "\x20\xed\xb0\x80\x20", -1, 1, FALSE },
1.269 + { "\x20\xed\xbe\x80\x20", -1, 1, FALSE },
1.270 + { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE },
1.271 + { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE },
1.272 + { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
1.273 + { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
1.274 + { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
1.275 + { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE },
1.276 + { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
1.277 + { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
1.278 + { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
1.279 + { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
1.280 + { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
1.281 +
1.282 + { NULL, }
1.283 +};
1.284 +
1.285 +static void
1.286 +do_test (gint index,
1.287 + const gchar *text,
1.288 + gint max_len,
1.289 + gint offset,
1.290 + gboolean valid)
1.291 +{
1.292 + const gchar *end;
1.293 + gboolean result;
1.294 +
1.295 + result = g_utf8_validate (text, max_len, &end);
1.296 +
1.297 + if (result != valid || end - text != offset)
1.298 + {
1.299 + GString *str;
1.300 + const gchar *p;
1.301 +
1.302 + any_failed = TRUE;
1.303 +
1.304 + str = g_string_new (0);
1.305 + for (p = text; *p; p++)
1.306 + g_string_append_printf (str, "\\x%02hhx", *p);
1.307 + g_print ("%d: g_utf8_validate (\"%s\", %d) failed, "
1.308 + "expected %s %d, got %s %d\n",
1.309 + index,
1.310 + str->str, max_len,
1.311 + valid ? "TRUE" : "FALSE", offset,
1.312 + result ? "TRUE" : "FALSE", (gint) (end - text));
1.313 + g_string_free (str, FALSE);
1.314 + }
1.315 +}
1.316 +
1.317 +int
1.318 +main (int argc, char *argv[])
1.319 +{
1.320 + gint i;
1.321 +
1.322 + #ifdef SYMBIAN
1.323 + g_log_set_handler (NULL, G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG, &mrtLogHandler, NULL);
1.324 + g_set_print_handler(mrtPrintHandler);
1.325 + #endif /*SYMBIAN*/
1.326 +
1.327 +
1.328 + for (i = 0; test[i].text; i++)
1.329 + do_test (i, test[i].text, test[i].max_len,
1.330 + test[i].offset, test[i].valid);
1.331 +
1.332 +
1.333 + #ifdef SYMBIAN
1.334 + assert_failed = any_failed;
1.335 + testResultXml("utf8-validate");
1.336 + #endif /* EMULATOR */
1.337 +
1.338 + return any_failed ? 1 : 0;
1.339 +}