os/ossrv/glib/tsrc/BC/tests/utf8-validate.c
author sl@SLION-WIN7.fritz.box
Fri, 15 Jun 2012 03:10:57 +0200
changeset 0 bde4ae8d615e
permissions -rw-r--r--
First public contribution.
     1 /* GLIB - Library of useful routines for C programming
     2  * Copyright (C) 2001 Matthias Clasen <matthiasc@poet.de>
     3  * Portion Copyright © 2008-09 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
     4  * This library is free software; you can redistribute it and/or
     5  * modify it under the terms of the GNU Lesser General Public
     6  * License as published by the Free Software Foundation; either
     7  * version 2 of the License, or (at your option) any later version.
     8  *
     9  * This library is distributed in the hope that it will be useful,
    10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    12  * Lesser General Public License for more details.
    13  *
    14  * You should have received a copy of the GNU Lesser General Public
    15  * License along with this library; if not, write to the
    16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    17  * Boston, MA 02111-1307, USA.
    18  */
    19 
    20 #include "glib.h"
    21 #include <stdio.h>
    22 
    23 #ifdef SYMBIAN
    24 #include "mrt2_glib2_test.h"
    25 #endif /*SYMBIAN*/
    26 
    27 #define UNICODE_VALID(Char)                   \
    28     ((Char) < 0x110000 &&                     \
    29      (((Char) & 0xFFFFF800) != 0xD800) &&     \
    30      ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&  \
    31      ((Char) & 0xFFFE) != 0xFFFE)
    32 
    33 
    34 
    35 static gboolean any_failed = FALSE;
    36 
    37 struct {
    38   const gchar *text;
    39   gint max_len;
    40   gint offset;
    41   gboolean valid;
    42 } test[] = {  
    43   /* some tests to check max_len handling */
    44   /* length 1 */
    45   { "abcde", -1, 5, TRUE },
    46   { "abcde", 3, 3, TRUE },
    47   { "abcde", 5, 5, TRUE },
    48   { "abcde", 7, 5, FALSE },
    49   /* length 2 */
    50   { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE }, 
    51   { "\xc2\xa9\xc2\xa9\xc2\xa9",  1, 0, FALSE }, 
    52   { "\xc2\xa9\xc2\xa9\xc2\xa9",  2, 2, TRUE }, 
    53   { "\xc2\xa9\xc2\xa9\xc2\xa9",  3, 2, FALSE }, 
    54   { "\xc2\xa9\xc2\xa9\xc2\xa9",  4, 4, TRUE }, 
    55   { "\xc2\xa9\xc2\xa9\xc2\xa9",  5, 4, FALSE }, 
    56   { "\xc2\xa9\xc2\xa9\xc2\xa9",  6, 6, TRUE }, 
    57   { "\xc2\xa9\xc2\xa9\xc2\xa9",  7, 6, FALSE }, 
    58   /* length 3 */
    59   { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE },
    60   { "\xe2\x89\xa0\xe2\x89\xa0",  1, 0, FALSE },
    61   { "\xe2\x89\xa0\xe2\x89\xa0",  2, 0, FALSE },
    62   { "\xe2\x89\xa0\xe2\x89\xa0",  3, 3, TRUE },
    63   { "\xe2\x89\xa0\xe2\x89\xa0",  4, 3, FALSE },
    64   { "\xe2\x89\xa0\xe2\x89\xa0",  5, 3, FALSE },
    65   { "\xe2\x89\xa0\xe2\x89\xa0",  6, 6, TRUE },
    66   { "\xe2\x89\xa0\xe2\x89\xa0",  7, 6, FALSE },
    67 
    68   /* examples from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
    69   /* greek 'kosme' */
    70   { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
    71   /* first sequence of each length */
    72   { "\x00", -1, 0, TRUE },
    73   { "\xc2\x80", -1, 2, TRUE },
    74   { "\xe0\xa0\x80", -1, 3, TRUE },
    75   { "\xf0\x90\x80\x80", -1, 4, TRUE },
    76   { "\xf8\x88\x80\x80\x80", -1, 0, FALSE },
    77   { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE },
    78   /* last sequence of each length */
    79   { "\x7f", -1, 1, TRUE },
    80   { "\xdf\xbf", -1, 2, TRUE },
    81   { "\xef\xbf\xbf", -1, 0, FALSE },
    82   { "\xf7\xbf\xbf\xbf", -1, 0, FALSE },
    83   { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
    84   { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
    85   /* other boundary conditions */
    86   { "\xed\x9f\xbf", -1, 3, TRUE },
    87   { "\xee\x80\x80", -1, 3, TRUE },
    88   { "\xef\xbf\xbd", -1, 3, TRUE },
    89   { "\xf4\x8f\xbf\xbf", -1, 0, FALSE },
    90   { "\xf4\x90\x80\x80", -1, 0, FALSE },
    91   /* malformed sequences */
    92   /* continuation bytes */
    93   { "\x80", -1, 0, FALSE },
    94   { "\xbf", -1, 0, FALSE },
    95   { "\x80\xbf", -1, 0, FALSE },
    96   { "\x80\xbf\x80", -1, 0, FALSE },
    97   { "\x80\xbf\x80\xbf", -1, 0, FALSE },
    98   { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
    99   { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE },
   100   { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
   101 
   102   /* all possible continuation byte */
   103   { "\x80", -1, 0, FALSE },
   104   { "\x81", -1, 0, FALSE },
   105   { "\x82", -1, 0, FALSE },
   106   { "\x83", -1, 0, FALSE },
   107   { "\x84", -1, 0, FALSE },
   108   { "\x85", -1, 0, FALSE },
   109   { "\x86", -1, 0, FALSE },
   110   { "\x87", -1, 0, FALSE },
   111   { "\x88", -1, 0, FALSE },
   112   { "\x89", -1, 0, FALSE },
   113   { "\x8a", -1, 0, FALSE },
   114   { "\x8b", -1, 0, FALSE },
   115   { "\x8c", -1, 0, FALSE },
   116   { "\x8d", -1, 0, FALSE },
   117   { "\x8e", -1, 0, FALSE },
   118   { "\x8f", -1, 0, FALSE },
   119   { "\x90", -1, 0, FALSE },
   120   { "\x91", -1, 0, FALSE },
   121   { "\x92", -1, 0, FALSE },
   122   { "\x93", -1, 0, FALSE },
   123   { "\x94", -1, 0, FALSE },
   124   { "\x95", -1, 0, FALSE },
   125   { "\x96", -1, 0, FALSE },
   126   { "\x97", -1, 0, FALSE },
   127   { "\x98", -1, 0, FALSE },
   128   { "\x99", -1, 0, FALSE },
   129   { "\x9a", -1, 0, FALSE },
   130   { "\x9b", -1, 0, FALSE },
   131   { "\x9c", -1, 0, FALSE },
   132   { "\x9d", -1, 0, FALSE },
   133   { "\x9e", -1, 0, FALSE },
   134   { "\x9f", -1, 0, FALSE },
   135   { "\xa0", -1, 0, FALSE },
   136   { "\xa1", -1, 0, FALSE },
   137   { "\xa2", -1, 0, FALSE },
   138   { "\xa3", -1, 0, FALSE },
   139   { "\xa4", -1, 0, FALSE },
   140   { "\xa5", -1, 0, FALSE },
   141   { "\xa6", -1, 0, FALSE },
   142   { "\xa7", -1, 0, FALSE },
   143   { "\xa8", -1, 0, FALSE },
   144   { "\xa9", -1, 0, FALSE },
   145   { "\xaa", -1, 0, FALSE },
   146   { "\xab", -1, 0, FALSE },
   147   { "\xac", -1, 0, FALSE },
   148   { "\xad", -1, 0, FALSE },
   149   { "\xae", -1, 0, FALSE },
   150   { "\xaf", -1, 0, FALSE },
   151   { "\xb0", -1, 0, FALSE },
   152   { "\xb1", -1, 0, FALSE },
   153   { "\xb2", -1, 0, FALSE },
   154   { "\xb3", -1, 0, FALSE },
   155   { "\xb4", -1, 0, FALSE },
   156   { "\xb5", -1, 0, FALSE },
   157   { "\xb6", -1, 0, FALSE },
   158   { "\xb7", -1, 0, FALSE },
   159   { "\xb8", -1, 0, FALSE },
   160   { "\xb9", -1, 0, FALSE },
   161   { "\xba", -1, 0, FALSE },
   162   { "\xbb", -1, 0, FALSE },
   163   { "\xbc", -1, 0, FALSE },
   164   { "\xbd", -1, 0, FALSE },
   165   { "\xbe", -1, 0, FALSE },
   166   { "\xbf", -1, 0, FALSE },
   167   /* lone start characters */
   168   { "\xc0\x20", -1, 0, FALSE },
   169   { "\xc1\x20", -1, 0, FALSE },
   170   { "\xc2\x20", -1, 0, FALSE },
   171   { "\xc3\x20", -1, 0, FALSE },
   172   { "\xc4\x20", -1, 0, FALSE },
   173   { "\xc5\x20", -1, 0, FALSE },
   174   { "\xc6\x20", -1, 0, FALSE },
   175   { "\xc7\x20", -1, 0, FALSE },
   176   { "\xc8\x20", -1, 0, FALSE },
   177   { "\xc9\x20", -1, 0, FALSE },
   178   { "\xca\x20", -1, 0, FALSE },
   179   { "\xcb\x20", -1, 0, FALSE },
   180   { "\xcc\x20", -1, 0, FALSE },
   181   { "\xcd\x20", -1, 0, FALSE },
   182   { "\xce\x20", -1, 0, FALSE },
   183   { "\xcf\x20", -1, 0, FALSE },
   184   { "\xd0\x20", -1, 0, FALSE },
   185   { "\xd1\x20", -1, 0, FALSE },
   186   { "\xd2\x20", -1, 0, FALSE },
   187   { "\xd3\x20", -1, 0, FALSE },
   188   { "\xd4\x20", -1, 0, FALSE },
   189   { "\xd5\x20", -1, 0, FALSE },
   190   { "\xd6\x20", -1, 0, FALSE },
   191   { "\xd7\x20", -1, 0, FALSE },
   192   { "\xd8\x20", -1, 0, FALSE },
   193   { "\xd9\x20", -1, 0, FALSE },
   194   { "\xda\x20", -1, 0, FALSE },
   195   { "\xdb\x20", -1, 0, FALSE },
   196   { "\xdc\x20", -1, 0, FALSE },
   197   { "\xdd\x20", -1, 0, FALSE },
   198   { "\xde\x20", -1, 0, FALSE },
   199   { "\xdf\x20", -1, 0, FALSE },
   200   { "\xe0\x20", -1, 0, FALSE },
   201   { "\xe1\x20", -1, 0, FALSE },
   202   { "\xe2\x20", -1, 0, FALSE },
   203   { "\xe3\x20", -1, 0, FALSE },
   204   { "\xe4\x20", -1, 0, FALSE },
   205   { "\xe5\x20", -1, 0, FALSE },
   206   { "\xe6\x20", -1, 0, FALSE },
   207   { "\xe7\x20", -1, 0, FALSE },
   208   { "\xe8\x20", -1, 0, FALSE },
   209   { "\xe9\x20", -1, 0, FALSE },
   210   { "\xea\x20", -1, 0, FALSE },
   211   { "\xeb\x20", -1, 0, FALSE },
   212   { "\xec\x20", -1, 0, FALSE },
   213   { "\xed\x20", -1, 0, FALSE },
   214   { "\xee\x20", -1, 0, FALSE },
   215   { "\xef\x20", -1, 0, FALSE },
   216   { "\xf0\x20", -1, 0, FALSE },
   217   { "\xf1\x20", -1, 0, FALSE },
   218   { "\xf2\x20", -1, 0, FALSE },
   219   { "\xf3\x20", -1, 0, FALSE },
   220   { "\xf4\x20", -1, 0, FALSE },
   221   { "\xf5\x20", -1, 0, FALSE },
   222   { "\xf6\x20", -1, 0, FALSE },
   223   { "\xf7\x20", -1, 0, FALSE },
   224   { "\xf8\x20", -1, 0, FALSE },
   225   { "\xf9\x20", -1, 0, FALSE },
   226   { "\xfa\x20", -1, 0, FALSE },
   227   { "\xfb\x20", -1, 0, FALSE },
   228   { "\xfc\x20", -1, 0, FALSE },
   229   { "\xfd\x20", -1, 0, FALSE },
   230   /* missing continuation bytes */
   231   { "\x20\xc0", -1, 1, FALSE },
   232   { "\x20\xe0\x80", -1, 1, FALSE },
   233   { "\x20\xf0\x80\x80", -1, 1, FALSE },
   234   { "\x20\xf8\x80\x80\x80", -1, 1, FALSE },
   235   { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE },
   236   { "\x20\xdf", -1, 1, FALSE },
   237   { "\x20\xef\xbf", -1, 1, FALSE },
   238   { "\x20\xf7\xbf\xbf", -1, 1, FALSE },
   239   { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE },
   240   { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE },
   241   /* impossible bytes */
   242   { "\x20\xfe\x20", -1, 1, FALSE },
   243   { "\x20\xff\x20", -1, 1, FALSE },
   244   /* overlong sequences */
   245   { "\x20\xc0\xaf\x20", -1, 1, FALSE },
   246   { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
   247   { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE },
   248   { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE },
   249   { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE },
   250   { "\x20\xc1\xbf\x20", -1, 1, FALSE },
   251   { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE },
   252   { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE },
   253   { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE },
   254   { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE },
   255   { "\x20\xc0\x80\x20", -1, 1, FALSE },
   256   { "\x20\xe0\x80\x80\x20", -1, 1, FALSE },
   257   { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE },
   258   { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE },
   259   { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE },
   260   /* illegal code positions */
   261   { "\x20\xed\xa0\x80\x20", -1, 1, FALSE },
   262   { "\x20\xed\xad\xbf\x20", -1, 1, FALSE },
   263   { "\x20\xed\xae\x80\x20", -1, 1, FALSE },
   264   { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE },
   265   { "\x20\xed\xb0\x80\x20", -1, 1, FALSE },
   266   { "\x20\xed\xbe\x80\x20", -1, 1, FALSE },
   267   { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE },
   268   { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE },
   269   { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
   270   { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
   271   { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
   272   { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE },
   273   { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
   274   { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
   275   { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
   276   { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
   277   { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
   278 
   279   { NULL, }
   280 };
   281 
   282 static void 
   283 do_test (gint         index,
   284 	 const gchar *text, 
   285 	 gint         max_len,
   286 	 gint         offset,
   287 	 gboolean     valid)
   288 {
   289   const gchar *end;
   290   gboolean result;
   291   
   292   result = g_utf8_validate (text, max_len, &end);
   293 
   294   if (result != valid || end - text != offset)
   295     {
   296       GString *str;
   297       const gchar *p;
   298 
   299       any_failed = TRUE;
   300       
   301       str = g_string_new (0);
   302       for (p = text; *p; p++)
   303 	g_string_append_printf (str, "\\x%02hhx", *p);
   304       g_print ("%d: g_utf8_validate (\"%s\", %d) failed, "
   305 	       "expected %s %d, got %s %d\n",
   306 	       index,
   307 	       str->str, max_len, 
   308 	       valid ? "TRUE" : "FALSE", offset,
   309 	       result ? "TRUE" : "FALSE", (gint) (end - text));
   310       g_string_free (str, FALSE);
   311     }
   312 }
   313 
   314 int
   315 main (int argc, char *argv[])
   316 {
   317   gint i;
   318 
   319   #ifdef SYMBIAN
   320   g_log_set_handler (NULL,  G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG, &mrtLogHandler, NULL);
   321   g_set_print_handler(mrtPrintHandler);
   322   #endif /*SYMBIAN*/
   323 	  
   324 
   325   for (i = 0; test[i].text; i++)
   326     do_test (i, test[i].text, test[i].max_len, 
   327 	     test[i].offset, test[i].valid);
   328   
   329 
   330   #ifdef SYMBIAN
   331   assert_failed = any_failed;
   332   testResultXml("utf8-validate");
   333   #endif /* EMULATOR */
   334   
   335   return any_failed ? 1 : 0;
   336 }