os/ossrv/glib/tests/utf8-validate.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /* GLIB - Library of useful routines for C programming
     2  * Copyright (C) 2001 Matthias Clasen <matthiasc@poet.de>
     3  * Portion Copyright © 2008-09 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
     4  * This library is free software; you can redistribute it and/or
     5  * modify it under the terms of the GNU Lesser General Public
     6  * License as published by the Free Software Foundation; either
     7  * version 2 of the License, or (at your option) any later version.
     8  *
     9  * This library is distributed in the hope that it will be useful,
    10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    12  * Lesser General Public License for more details.
    13  *
    14  * You should have received a copy of the GNU Lesser General Public
    15  * License along with this library; if not, write to the
    16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    17  * Boston, MA 02111-1307, USA.
    18  */
    19 
    20 #include "glib.h"
    21 #ifdef __SYMBIAN32__
    22 #include "mrt2_glib2_test.h"
    23 #endif /*__SYMBIAN32__*/
    24 
    25 #define UNICODE_VALID(Char)                   \
    26     ((Char) < 0x110000 &&                     \
    27      (((Char) & 0xFFFFF800) != 0xD800) &&     \
    28      ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&  \
    29      ((Char) & 0xFFFE) != 0xFFFE)
    30 
    31 
    32 
    33 static gboolean any_failed = FALSE;
    34 
    35 struct {
    36   const gchar *text;
    37   gint max_len;
    38   gint offset;
    39   gboolean valid;
    40 } test[] = {  
    41   /* some tests to check max_len handling */
    42   /* length 1 */
    43   { "abcde", -1, 5, TRUE },
    44   { "abcde", 3, 3, TRUE },
    45   { "abcde", 5, 5, TRUE },
    46   { "abcde", 7, 5, FALSE },
    47   /* length 2 */
    48   { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE }, 
    49   { "\xc2\xa9\xc2\xa9\xc2\xa9",  1, 0, FALSE }, 
    50   { "\xc2\xa9\xc2\xa9\xc2\xa9",  2, 2, TRUE }, 
    51   { "\xc2\xa9\xc2\xa9\xc2\xa9",  3, 2, FALSE }, 
    52   { "\xc2\xa9\xc2\xa9\xc2\xa9",  4, 4, TRUE }, 
    53   { "\xc2\xa9\xc2\xa9\xc2\xa9",  5, 4, FALSE }, 
    54   { "\xc2\xa9\xc2\xa9\xc2\xa9",  6, 6, TRUE }, 
    55   { "\xc2\xa9\xc2\xa9\xc2\xa9",  7, 6, FALSE }, 
    56   /* length 3 */
    57   { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE },
    58   { "\xe2\x89\xa0\xe2\x89\xa0",  1, 0, FALSE },
    59   { "\xe2\x89\xa0\xe2\x89\xa0",  2, 0, FALSE },
    60   { "\xe2\x89\xa0\xe2\x89\xa0",  3, 3, TRUE },
    61   { "\xe2\x89\xa0\xe2\x89\xa0",  4, 3, FALSE },
    62   { "\xe2\x89\xa0\xe2\x89\xa0",  5, 3, FALSE },
    63   { "\xe2\x89\xa0\xe2\x89\xa0",  6, 6, TRUE },
    64   { "\xe2\x89\xa0\xe2\x89\xa0",  7, 6, FALSE },
    65 
    66   /* examples from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
    67   /* greek 'kosme' */
    68   { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
    69   /* first sequence of each length */
    70   { "\x00", -1, 0, TRUE },
    71   { "\xc2\x80", -1, 2, TRUE },
    72   { "\xe0\xa0\x80", -1, 3, TRUE },
    73   { "\xf0\x90\x80\x80", -1, 4, TRUE },
    74   { "\xf8\x88\x80\x80\x80", -1, 0, FALSE },
    75   { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE },
    76   /* last sequence of each length */
    77   { "\x7f", -1, 1, TRUE },
    78   { "\xdf\xbf", -1, 2, TRUE },
    79   { "\xef\xbf\xbf", -1, 0, FALSE },
    80   { "\xf7\xbf\xbf\xbf", -1, 0, FALSE },
    81   { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
    82   { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
    83   /* other boundary conditions */
    84   { "\xed\x9f\xbf", -1, 3, TRUE },
    85   { "\xee\x80\x80", -1, 3, TRUE },
    86   { "\xef\xbf\xbd", -1, 3, TRUE },
    87   { "\xf4\x8f\xbf\xbf", -1, 0, FALSE },
    88   { "\xf4\x90\x80\x80", -1, 0, FALSE },
    89   /* malformed sequences */
    90   /* continuation bytes */
    91   { "\x80", -1, 0, FALSE },
    92   { "\xbf", -1, 0, FALSE },
    93   { "\x80\xbf", -1, 0, FALSE },
    94   { "\x80\xbf\x80", -1, 0, FALSE },
    95   { "\x80\xbf\x80\xbf", -1, 0, FALSE },
    96   { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
    97   { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE },
    98   { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
    99 
   100   /* all possible continuation byte */
   101   { "\x80", -1, 0, FALSE },
   102   { "\x81", -1, 0, FALSE },
   103   { "\x82", -1, 0, FALSE },
   104   { "\x83", -1, 0, FALSE },
   105   { "\x84", -1, 0, FALSE },
   106   { "\x85", -1, 0, FALSE },
   107   { "\x86", -1, 0, FALSE },
   108   { "\x87", -1, 0, FALSE },
   109   { "\x88", -1, 0, FALSE },
   110   { "\x89", -1, 0, FALSE },
   111   { "\x8a", -1, 0, FALSE },
   112   { "\x8b", -1, 0, FALSE },
   113   { "\x8c", -1, 0, FALSE },
   114   { "\x8d", -1, 0, FALSE },
   115   { "\x8e", -1, 0, FALSE },
   116   { "\x8f", -1, 0, FALSE },
   117   { "\x90", -1, 0, FALSE },
   118   { "\x91", -1, 0, FALSE },
   119   { "\x92", -1, 0, FALSE },
   120   { "\x93", -1, 0, FALSE },
   121   { "\x94", -1, 0, FALSE },
   122   { "\x95", -1, 0, FALSE },
   123   { "\x96", -1, 0, FALSE },
   124   { "\x97", -1, 0, FALSE },
   125   { "\x98", -1, 0, FALSE },
   126   { "\x99", -1, 0, FALSE },
   127   { "\x9a", -1, 0, FALSE },
   128   { "\x9b", -1, 0, FALSE },
   129   { "\x9c", -1, 0, FALSE },
   130   { "\x9d", -1, 0, FALSE },
   131   { "\x9e", -1, 0, FALSE },
   132   { "\x9f", -1, 0, FALSE },
   133   { "\xa0", -1, 0, FALSE },
   134   { "\xa1", -1, 0, FALSE },
   135   { "\xa2", -1, 0, FALSE },
   136   { "\xa3", -1, 0, FALSE },
   137   { "\xa4", -1, 0, FALSE },
   138   { "\xa5", -1, 0, FALSE },
   139   { "\xa6", -1, 0, FALSE },
   140   { "\xa7", -1, 0, FALSE },
   141   { "\xa8", -1, 0, FALSE },
   142   { "\xa9", -1, 0, FALSE },
   143   { "\xaa", -1, 0, FALSE },
   144   { "\xab", -1, 0, FALSE },
   145   { "\xac", -1, 0, FALSE },
   146   { "\xad", -1, 0, FALSE },
   147   { "\xae", -1, 0, FALSE },
   148   { "\xaf", -1, 0, FALSE },
   149   { "\xb0", -1, 0, FALSE },
   150   { "\xb1", -1, 0, FALSE },
   151   { "\xb2", -1, 0, FALSE },
   152   { "\xb3", -1, 0, FALSE },
   153   { "\xb4", -1, 0, FALSE },
   154   { "\xb5", -1, 0, FALSE },
   155   { "\xb6", -1, 0, FALSE },
   156   { "\xb7", -1, 0, FALSE },
   157   { "\xb8", -1, 0, FALSE },
   158   { "\xb9", -1, 0, FALSE },
   159   { "\xba", -1, 0, FALSE },
   160   { "\xbb", -1, 0, FALSE },
   161   { "\xbc", -1, 0, FALSE },
   162   { "\xbd", -1, 0, FALSE },
   163   { "\xbe", -1, 0, FALSE },
   164   { "\xbf", -1, 0, FALSE },
   165   /* lone start characters */
   166   { "\xc0\x20", -1, 0, FALSE },
   167   { "\xc1\x20", -1, 0, FALSE },
   168   { "\xc2\x20", -1, 0, FALSE },
   169   { "\xc3\x20", -1, 0, FALSE },
   170   { "\xc4\x20", -1, 0, FALSE },
   171   { "\xc5\x20", -1, 0, FALSE },
   172   { "\xc6\x20", -1, 0, FALSE },
   173   { "\xc7\x20", -1, 0, FALSE },
   174   { "\xc8\x20", -1, 0, FALSE },
   175   { "\xc9\x20", -1, 0, FALSE },
   176   { "\xca\x20", -1, 0, FALSE },
   177   { "\xcb\x20", -1, 0, FALSE },
   178   { "\xcc\x20", -1, 0, FALSE },
   179   { "\xcd\x20", -1, 0, FALSE },
   180   { "\xce\x20", -1, 0, FALSE },
   181   { "\xcf\x20", -1, 0, FALSE },
   182   { "\xd0\x20", -1, 0, FALSE },
   183   { "\xd1\x20", -1, 0, FALSE },
   184   { "\xd2\x20", -1, 0, FALSE },
   185   { "\xd3\x20", -1, 0, FALSE },
   186   { "\xd4\x20", -1, 0, FALSE },
   187   { "\xd5\x20", -1, 0, FALSE },
   188   { "\xd6\x20", -1, 0, FALSE },
   189   { "\xd7\x20", -1, 0, FALSE },
   190   { "\xd8\x20", -1, 0, FALSE },
   191   { "\xd9\x20", -1, 0, FALSE },
   192   { "\xda\x20", -1, 0, FALSE },
   193   { "\xdb\x20", -1, 0, FALSE },
   194   { "\xdc\x20", -1, 0, FALSE },
   195   { "\xdd\x20", -1, 0, FALSE },
   196   { "\xde\x20", -1, 0, FALSE },
   197   { "\xdf\x20", -1, 0, FALSE },
   198   { "\xe0\x20", -1, 0, FALSE },
   199   { "\xe1\x20", -1, 0, FALSE },
   200   { "\xe2\x20", -1, 0, FALSE },
   201   { "\xe3\x20", -1, 0, FALSE },
   202   { "\xe4\x20", -1, 0, FALSE },
   203   { "\xe5\x20", -1, 0, FALSE },
   204   { "\xe6\x20", -1, 0, FALSE },
   205   { "\xe7\x20", -1, 0, FALSE },
   206   { "\xe8\x20", -1, 0, FALSE },
   207   { "\xe9\x20", -1, 0, FALSE },
   208   { "\xea\x20", -1, 0, FALSE },
   209   { "\xeb\x20", -1, 0, FALSE },
   210   { "\xec\x20", -1, 0, FALSE },
   211   { "\xed\x20", -1, 0, FALSE },
   212   { "\xee\x20", -1, 0, FALSE },
   213   { "\xef\x20", -1, 0, FALSE },
   214   { "\xf0\x20", -1, 0, FALSE },
   215   { "\xf1\x20", -1, 0, FALSE },
   216   { "\xf2\x20", -1, 0, FALSE },
   217   { "\xf3\x20", -1, 0, FALSE },
   218   { "\xf4\x20", -1, 0, FALSE },
   219   { "\xf5\x20", -1, 0, FALSE },
   220   { "\xf6\x20", -1, 0, FALSE },
   221   { "\xf7\x20", -1, 0, FALSE },
   222   { "\xf8\x20", -1, 0, FALSE },
   223   { "\xf9\x20", -1, 0, FALSE },
   224   { "\xfa\x20", -1, 0, FALSE },
   225   { "\xfb\x20", -1, 0, FALSE },
   226   { "\xfc\x20", -1, 0, FALSE },
   227   { "\xfd\x20", -1, 0, FALSE },
   228   /* missing continuation bytes */
   229   { "\x20\xc0", -1, 1, FALSE },
   230   { "\x20\xe0\x80", -1, 1, FALSE },
   231   { "\x20\xf0\x80\x80", -1, 1, FALSE },
   232   { "\x20\xf8\x80\x80\x80", -1, 1, FALSE },
   233   { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE },
   234   { "\x20\xdf", -1, 1, FALSE },
   235   { "\x20\xef\xbf", -1, 1, FALSE },
   236   { "\x20\xf7\xbf\xbf", -1, 1, FALSE },
   237   { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE },
   238   { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE },
   239   /* impossible bytes */
   240   { "\x20\xfe\x20", -1, 1, FALSE },
   241   { "\x20\xff\x20", -1, 1, FALSE },
   242   /* overlong sequences */
   243   { "\x20\xc0\xaf\x20", -1, 1, FALSE },
   244   { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
   245   { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE },
   246   { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE },
   247   { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE },
   248   { "\x20\xc1\xbf\x20", -1, 1, FALSE },
   249   { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE },
   250   { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE },
   251   { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE },
   252   { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE },
   253   { "\x20\xc0\x80\x20", -1, 1, FALSE },
   254   { "\x20\xe0\x80\x80\x20", -1, 1, FALSE },
   255   { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE },
   256   { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE },
   257   { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE },
   258   /* illegal code positions */
   259   { "\x20\xed\xa0\x80\x20", -1, 1, FALSE },
   260   { "\x20\xed\xad\xbf\x20", -1, 1, FALSE },
   261   { "\x20\xed\xae\x80\x20", -1, 1, FALSE },
   262   { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE },
   263   { "\x20\xed\xb0\x80\x20", -1, 1, FALSE },
   264   { "\x20\xed\xbe\x80\x20", -1, 1, FALSE },
   265   { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE },
   266   { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE },
   267   { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
   268   { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
   269   { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
   270   { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE },
   271   { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
   272   { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
   273   { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
   274   { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
   275   { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
   276 
   277   { NULL, }
   278 };
   279 
   280 static void 
   281 do_test (gint         index,
   282 	 const gchar *text, 
   283 	 gint         max_len,
   284 	 gint         offset,
   285 	 gboolean     valid)
   286 {
   287   const gchar *end;
   288   gboolean result;
   289   
   290   result = g_utf8_validate (text, max_len, &end);
   291 
   292   if (result != valid || end - text != offset)
   293     {
   294       GString *str;
   295       const gchar *p;
   296 
   297       any_failed = TRUE;
   298       
   299       str = g_string_new (0);
   300       for (p = text; *p; p++)
   301 	g_string_append_printf (str, "\\x%02hhx", *p);
   302       g_print ("%d: g_utf8_validate (\"%s\", %d) failed, "
   303 	       "expected %s %d, got %s %d\n",
   304 	       index,
   305 	       str->str, max_len, 
   306 	       valid ? "TRUE" : "FALSE", offset,
   307 	       result ? "TRUE" : "FALSE", (gint) (end - text));
   308       g_string_free (str, FALSE);
   309     }
   310 }
   311 
   312 int
   313 main (int argc, char *argv[])
   314 {
   315   gint i;
   316 
   317   #ifdef __SYMBIAN32__
   318   g_log_set_handler (NULL,  G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG, &mrtLogHandler, NULL);
   319   g_set_print_handler(mrtPrintHandler);
   320   #endif /*__SYMBIAN32__*/
   321 	  
   322 
   323   for (i = 0; test[i].text; i++)
   324     do_test (i, test[i].text, test[i].max_len, 
   325 	     test[i].offset, test[i].valid);
   326 
   327   #ifdef __SYMBIAN32__
   328   assert_failed = any_failed;
   329   testResultXml("utf8-validate");
   330   #endif /* EMULATOR */
   331   
   332   return any_failed ? 1 : 0;
   333 }