os/ossrv/glib/tsrc/BC/tests/utf8-validate.c
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/ossrv/glib/tsrc/BC/tests/utf8-validate.c	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,336 @@
     1.4 +/* GLIB - Library of useful routines for C programming
     1.5 + * Copyright (C) 2001 Matthias Clasen <matthiasc@poet.de>
     1.6 + * Portion Copyright © 2008-09 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
     1.7 + * This library is free software; you can redistribute it and/or
     1.8 + * modify it under the terms of the GNU Lesser General Public
     1.9 + * License as published by the Free Software Foundation; either
    1.10 + * version 2 of the License, or (at your option) any later version.
    1.11 + *
    1.12 + * This library is distributed in the hope that it will be useful,
    1.13 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.14 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    1.15 + * Lesser General Public License for more details.
    1.16 + *
    1.17 + * You should have received a copy of the GNU Lesser General Public
    1.18 + * License along with this library; if not, write to the
    1.19 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    1.20 + * Boston, MA 02111-1307, USA.
    1.21 + */
    1.22 +
    1.23 +#include "glib.h"
    1.24 +#include <stdio.h>
    1.25 +
    1.26 +#ifdef SYMBIAN
    1.27 +#include "mrt2_glib2_test.h"
    1.28 +#endif /*SYMBIAN*/
    1.29 +
    1.30 +#define UNICODE_VALID(Char)                   \
    1.31 +    ((Char) < 0x110000 &&                     \
    1.32 +     (((Char) & 0xFFFFF800) != 0xD800) &&     \
    1.33 +     ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&  \
    1.34 +     ((Char) & 0xFFFE) != 0xFFFE)
    1.35 +
    1.36 +
    1.37 +
    1.38 +static gboolean any_failed = FALSE;
    1.39 +
    1.40 +struct {
    1.41 +  const gchar *text;
    1.42 +  gint max_len;
    1.43 +  gint offset;
    1.44 +  gboolean valid;
    1.45 +} test[] = {  
    1.46 +  /* some tests to check max_len handling */
    1.47 +  /* length 1 */
    1.48 +  { "abcde", -1, 5, TRUE },
    1.49 +  { "abcde", 3, 3, TRUE },
    1.50 +  { "abcde", 5, 5, TRUE },
    1.51 +  { "abcde", 7, 5, FALSE },
    1.52 +  /* length 2 */
    1.53 +  { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE }, 
    1.54 +  { "\xc2\xa9\xc2\xa9\xc2\xa9",  1, 0, FALSE }, 
    1.55 +  { "\xc2\xa9\xc2\xa9\xc2\xa9",  2, 2, TRUE }, 
    1.56 +  { "\xc2\xa9\xc2\xa9\xc2\xa9",  3, 2, FALSE }, 
    1.57 +  { "\xc2\xa9\xc2\xa9\xc2\xa9",  4, 4, TRUE }, 
    1.58 +  { "\xc2\xa9\xc2\xa9\xc2\xa9",  5, 4, FALSE }, 
    1.59 +  { "\xc2\xa9\xc2\xa9\xc2\xa9",  6, 6, TRUE }, 
    1.60 +  { "\xc2\xa9\xc2\xa9\xc2\xa9",  7, 6, FALSE }, 
    1.61 +  /* length 3 */
    1.62 +  { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE },
    1.63 +  { "\xe2\x89\xa0\xe2\x89\xa0",  1, 0, FALSE },
    1.64 +  { "\xe2\x89\xa0\xe2\x89\xa0",  2, 0, FALSE },
    1.65 +  { "\xe2\x89\xa0\xe2\x89\xa0",  3, 3, TRUE },
    1.66 +  { "\xe2\x89\xa0\xe2\x89\xa0",  4, 3, FALSE },
    1.67 +  { "\xe2\x89\xa0\xe2\x89\xa0",  5, 3, FALSE },
    1.68 +  { "\xe2\x89\xa0\xe2\x89\xa0",  6, 6, TRUE },
    1.69 +  { "\xe2\x89\xa0\xe2\x89\xa0",  7, 6, FALSE },
    1.70 +
    1.71 +  /* examples from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
    1.72 +  /* greek 'kosme' */
    1.73 +  { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
    1.74 +  /* first sequence of each length */
    1.75 +  { "\x00", -1, 0, TRUE },
    1.76 +  { "\xc2\x80", -1, 2, TRUE },
    1.77 +  { "\xe0\xa0\x80", -1, 3, TRUE },
    1.78 +  { "\xf0\x90\x80\x80", -1, 4, TRUE },
    1.79 +  { "\xf8\x88\x80\x80\x80", -1, 0, FALSE },
    1.80 +  { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE },
    1.81 +  /* last sequence of each length */
    1.82 +  { "\x7f", -1, 1, TRUE },
    1.83 +  { "\xdf\xbf", -1, 2, TRUE },
    1.84 +  { "\xef\xbf\xbf", -1, 0, FALSE },
    1.85 +  { "\xf7\xbf\xbf\xbf", -1, 0, FALSE },
    1.86 +  { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
    1.87 +  { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
    1.88 +  /* other boundary conditions */
    1.89 +  { "\xed\x9f\xbf", -1, 3, TRUE },
    1.90 +  { "\xee\x80\x80", -1, 3, TRUE },
    1.91 +  { "\xef\xbf\xbd", -1, 3, TRUE },
    1.92 +  { "\xf4\x8f\xbf\xbf", -1, 0, FALSE },
    1.93 +  { "\xf4\x90\x80\x80", -1, 0, FALSE },
    1.94 +  /* malformed sequences */
    1.95 +  /* continuation bytes */
    1.96 +  { "\x80", -1, 0, FALSE },
    1.97 +  { "\xbf", -1, 0, FALSE },
    1.98 +  { "\x80\xbf", -1, 0, FALSE },
    1.99 +  { "\x80\xbf\x80", -1, 0, FALSE },
   1.100 +  { "\x80\xbf\x80\xbf", -1, 0, FALSE },
   1.101 +  { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
   1.102 +  { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE },
   1.103 +  { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
   1.104 +
   1.105 +  /* all possible continuation byte */
   1.106 +  { "\x80", -1, 0, FALSE },
   1.107 +  { "\x81", -1, 0, FALSE },
   1.108 +  { "\x82", -1, 0, FALSE },
   1.109 +  { "\x83", -1, 0, FALSE },
   1.110 +  { "\x84", -1, 0, FALSE },
   1.111 +  { "\x85", -1, 0, FALSE },
   1.112 +  { "\x86", -1, 0, FALSE },
   1.113 +  { "\x87", -1, 0, FALSE },
   1.114 +  { "\x88", -1, 0, FALSE },
   1.115 +  { "\x89", -1, 0, FALSE },
   1.116 +  { "\x8a", -1, 0, FALSE },
   1.117 +  { "\x8b", -1, 0, FALSE },
   1.118 +  { "\x8c", -1, 0, FALSE },
   1.119 +  { "\x8d", -1, 0, FALSE },
   1.120 +  { "\x8e", -1, 0, FALSE },
   1.121 +  { "\x8f", -1, 0, FALSE },
   1.122 +  { "\x90", -1, 0, FALSE },
   1.123 +  { "\x91", -1, 0, FALSE },
   1.124 +  { "\x92", -1, 0, FALSE },
   1.125 +  { "\x93", -1, 0, FALSE },
   1.126 +  { "\x94", -1, 0, FALSE },
   1.127 +  { "\x95", -1, 0, FALSE },
   1.128 +  { "\x96", -1, 0, FALSE },
   1.129 +  { "\x97", -1, 0, FALSE },
   1.130 +  { "\x98", -1, 0, FALSE },
   1.131 +  { "\x99", -1, 0, FALSE },
   1.132 +  { "\x9a", -1, 0, FALSE },
   1.133 +  { "\x9b", -1, 0, FALSE },
   1.134 +  { "\x9c", -1, 0, FALSE },
   1.135 +  { "\x9d", -1, 0, FALSE },
   1.136 +  { "\x9e", -1, 0, FALSE },
   1.137 +  { "\x9f", -1, 0, FALSE },
   1.138 +  { "\xa0", -1, 0, FALSE },
   1.139 +  { "\xa1", -1, 0, FALSE },
   1.140 +  { "\xa2", -1, 0, FALSE },
   1.141 +  { "\xa3", -1, 0, FALSE },
   1.142 +  { "\xa4", -1, 0, FALSE },
   1.143 +  { "\xa5", -1, 0, FALSE },
   1.144 +  { "\xa6", -1, 0, FALSE },
   1.145 +  { "\xa7", -1, 0, FALSE },
   1.146 +  { "\xa8", -1, 0, FALSE },
   1.147 +  { "\xa9", -1, 0, FALSE },
   1.148 +  { "\xaa", -1, 0, FALSE },
   1.149 +  { "\xab", -1, 0, FALSE },
   1.150 +  { "\xac", -1, 0, FALSE },
   1.151 +  { "\xad", -1, 0, FALSE },
   1.152 +  { "\xae", -1, 0, FALSE },
   1.153 +  { "\xaf", -1, 0, FALSE },
   1.154 +  { "\xb0", -1, 0, FALSE },
   1.155 +  { "\xb1", -1, 0, FALSE },
   1.156 +  { "\xb2", -1, 0, FALSE },
   1.157 +  { "\xb3", -1, 0, FALSE },
   1.158 +  { "\xb4", -1, 0, FALSE },
   1.159 +  { "\xb5", -1, 0, FALSE },
   1.160 +  { "\xb6", -1, 0, FALSE },
   1.161 +  { "\xb7", -1, 0, FALSE },
   1.162 +  { "\xb8", -1, 0, FALSE },
   1.163 +  { "\xb9", -1, 0, FALSE },
   1.164 +  { "\xba", -1, 0, FALSE },
   1.165 +  { "\xbb", -1, 0, FALSE },
   1.166 +  { "\xbc", -1, 0, FALSE },
   1.167 +  { "\xbd", -1, 0, FALSE },
   1.168 +  { "\xbe", -1, 0, FALSE },
   1.169 +  { "\xbf", -1, 0, FALSE },
   1.170 +  /* lone start characters */
   1.171 +  { "\xc0\x20", -1, 0, FALSE },
   1.172 +  { "\xc1\x20", -1, 0, FALSE },
   1.173 +  { "\xc2\x20", -1, 0, FALSE },
   1.174 +  { "\xc3\x20", -1, 0, FALSE },
   1.175 +  { "\xc4\x20", -1, 0, FALSE },
   1.176 +  { "\xc5\x20", -1, 0, FALSE },
   1.177 +  { "\xc6\x20", -1, 0, FALSE },
   1.178 +  { "\xc7\x20", -1, 0, FALSE },
   1.179 +  { "\xc8\x20", -1, 0, FALSE },
   1.180 +  { "\xc9\x20", -1, 0, FALSE },
   1.181 +  { "\xca\x20", -1, 0, FALSE },
   1.182 +  { "\xcb\x20", -1, 0, FALSE },
   1.183 +  { "\xcc\x20", -1, 0, FALSE },
   1.184 +  { "\xcd\x20", -1, 0, FALSE },
   1.185 +  { "\xce\x20", -1, 0, FALSE },
   1.186 +  { "\xcf\x20", -1, 0, FALSE },
   1.187 +  { "\xd0\x20", -1, 0, FALSE },
   1.188 +  { "\xd1\x20", -1, 0, FALSE },
   1.189 +  { "\xd2\x20", -1, 0, FALSE },
   1.190 +  { "\xd3\x20", -1, 0, FALSE },
   1.191 +  { "\xd4\x20", -1, 0, FALSE },
   1.192 +  { "\xd5\x20", -1, 0, FALSE },
   1.193 +  { "\xd6\x20", -1, 0, FALSE },
   1.194 +  { "\xd7\x20", -1, 0, FALSE },
   1.195 +  { "\xd8\x20", -1, 0, FALSE },
   1.196 +  { "\xd9\x20", -1, 0, FALSE },
   1.197 +  { "\xda\x20", -1, 0, FALSE },
   1.198 +  { "\xdb\x20", -1, 0, FALSE },
   1.199 +  { "\xdc\x20", -1, 0, FALSE },
   1.200 +  { "\xdd\x20", -1, 0, FALSE },
   1.201 +  { "\xde\x20", -1, 0, FALSE },
   1.202 +  { "\xdf\x20", -1, 0, FALSE },
   1.203 +  { "\xe0\x20", -1, 0, FALSE },
   1.204 +  { "\xe1\x20", -1, 0, FALSE },
   1.205 +  { "\xe2\x20", -1, 0, FALSE },
   1.206 +  { "\xe3\x20", -1, 0, FALSE },
   1.207 +  { "\xe4\x20", -1, 0, FALSE },
   1.208 +  { "\xe5\x20", -1, 0, FALSE },
   1.209 +  { "\xe6\x20", -1, 0, FALSE },
   1.210 +  { "\xe7\x20", -1, 0, FALSE },
   1.211 +  { "\xe8\x20", -1, 0, FALSE },
   1.212 +  { "\xe9\x20", -1, 0, FALSE },
   1.213 +  { "\xea\x20", -1, 0, FALSE },
   1.214 +  { "\xeb\x20", -1, 0, FALSE },
   1.215 +  { "\xec\x20", -1, 0, FALSE },
   1.216 +  { "\xed\x20", -1, 0, FALSE },
   1.217 +  { "\xee\x20", -1, 0, FALSE },
   1.218 +  { "\xef\x20", -1, 0, FALSE },
   1.219 +  { "\xf0\x20", -1, 0, FALSE },
   1.220 +  { "\xf1\x20", -1, 0, FALSE },
   1.221 +  { "\xf2\x20", -1, 0, FALSE },
   1.222 +  { "\xf3\x20", -1, 0, FALSE },
   1.223 +  { "\xf4\x20", -1, 0, FALSE },
   1.224 +  { "\xf5\x20", -1, 0, FALSE },
   1.225 +  { "\xf6\x20", -1, 0, FALSE },
   1.226 +  { "\xf7\x20", -1, 0, FALSE },
   1.227 +  { "\xf8\x20", -1, 0, FALSE },
   1.228 +  { "\xf9\x20", -1, 0, FALSE },
   1.229 +  { "\xfa\x20", -1, 0, FALSE },
   1.230 +  { "\xfb\x20", -1, 0, FALSE },
   1.231 +  { "\xfc\x20", -1, 0, FALSE },
   1.232 +  { "\xfd\x20", -1, 0, FALSE },
   1.233 +  /* missing continuation bytes */
   1.234 +  { "\x20\xc0", -1, 1, FALSE },
   1.235 +  { "\x20\xe0\x80", -1, 1, FALSE },
   1.236 +  { "\x20\xf0\x80\x80", -1, 1, FALSE },
   1.237 +  { "\x20\xf8\x80\x80\x80", -1, 1, FALSE },
   1.238 +  { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE },
   1.239 +  { "\x20\xdf", -1, 1, FALSE },
   1.240 +  { "\x20\xef\xbf", -1, 1, FALSE },
   1.241 +  { "\x20\xf7\xbf\xbf", -1, 1, FALSE },
   1.242 +  { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE },
   1.243 +  { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE },
   1.244 +  /* impossible bytes */
   1.245 +  { "\x20\xfe\x20", -1, 1, FALSE },
   1.246 +  { "\x20\xff\x20", -1, 1, FALSE },
   1.247 +  /* overlong sequences */
   1.248 +  { "\x20\xc0\xaf\x20", -1, 1, FALSE },
   1.249 +  { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
   1.250 +  { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE },
   1.251 +  { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE },
   1.252 +  { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE },
   1.253 +  { "\x20\xc1\xbf\x20", -1, 1, FALSE },
   1.254 +  { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE },
   1.255 +  { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE },
   1.256 +  { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE },
   1.257 +  { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE },
   1.258 +  { "\x20\xc0\x80\x20", -1, 1, FALSE },
   1.259 +  { "\x20\xe0\x80\x80\x20", -1, 1, FALSE },
   1.260 +  { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE },
   1.261 +  { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE },
   1.262 +  { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE },
   1.263 +  /* illegal code positions */
   1.264 +  { "\x20\xed\xa0\x80\x20", -1, 1, FALSE },
   1.265 +  { "\x20\xed\xad\xbf\x20", -1, 1, FALSE },
   1.266 +  { "\x20\xed\xae\x80\x20", -1, 1, FALSE },
   1.267 +  { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE },
   1.268 +  { "\x20\xed\xb0\x80\x20", -1, 1, FALSE },
   1.269 +  { "\x20\xed\xbe\x80\x20", -1, 1, FALSE },
   1.270 +  { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE },
   1.271 +  { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE },
   1.272 +  { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
   1.273 +  { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
   1.274 +  { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
   1.275 +  { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE },
   1.276 +  { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
   1.277 +  { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
   1.278 +  { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
   1.279 +  { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
   1.280 +  { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
   1.281 +
   1.282 +  { NULL, }
   1.283 +};
   1.284 +
   1.285 +static void 
   1.286 +do_test (gint         index,
   1.287 +	 const gchar *text, 
   1.288 +	 gint         max_len,
   1.289 +	 gint         offset,
   1.290 +	 gboolean     valid)
   1.291 +{
   1.292 +  const gchar *end;
   1.293 +  gboolean result;
   1.294 +  
   1.295 +  result = g_utf8_validate (text, max_len, &end);
   1.296 +
   1.297 +  if (result != valid || end - text != offset)
   1.298 +    {
   1.299 +      GString *str;
   1.300 +      const gchar *p;
   1.301 +
   1.302 +      any_failed = TRUE;
   1.303 +      
   1.304 +      str = g_string_new (0);
   1.305 +      for (p = text; *p; p++)
   1.306 +	g_string_append_printf (str, "\\x%02hhx", *p);
   1.307 +      g_print ("%d: g_utf8_validate (\"%s\", %d) failed, "
   1.308 +	       "expected %s %d, got %s %d\n",
   1.309 +	       index,
   1.310 +	       str->str, max_len, 
   1.311 +	       valid ? "TRUE" : "FALSE", offset,
   1.312 +	       result ? "TRUE" : "FALSE", (gint) (end - text));
   1.313 +      g_string_free (str, FALSE);
   1.314 +    }
   1.315 +}
   1.316 +
   1.317 +int
   1.318 +main (int argc, char *argv[])
   1.319 +{
   1.320 +  gint i;
   1.321 +
   1.322 +  #ifdef SYMBIAN
   1.323 +  g_log_set_handler (NULL,  G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG, &mrtLogHandler, NULL);
   1.324 +  g_set_print_handler(mrtPrintHandler);
   1.325 +  #endif /*SYMBIAN*/
   1.326 +	  
   1.327 +
   1.328 +  for (i = 0; test[i].text; i++)
   1.329 +    do_test (i, test[i].text, test[i].max_len, 
   1.330 +	     test[i].offset, test[i].valid);
   1.331 +  
   1.332 +
   1.333 +  #ifdef SYMBIAN
   1.334 +  assert_failed = any_failed;
   1.335 +  testResultXml("utf8-validate");
   1.336 +  #endif /* EMULATOR */
   1.337 +  
   1.338 +  return any_failed ? 1 : 0;
   1.339 +}