os/ossrv/glib/tests/utf8-validate.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/* GLIB - Library of useful routines for C programming
sl@0
     2
 * Copyright (C) 2001 Matthias Clasen <matthiasc@poet.de>
sl@0
     3
 * Portion Copyright © 2008-09 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
sl@0
     4
 * This library is free software; you can redistribute it and/or
sl@0
     5
 * modify it under the terms of the GNU Lesser General Public
sl@0
     6
 * License as published by the Free Software Foundation; either
sl@0
     7
 * version 2 of the License, or (at your option) any later version.
sl@0
     8
 *
sl@0
     9
 * This library is distributed in the hope that it will be useful,
sl@0
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
sl@0
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
sl@0
    12
 * Lesser General Public License for more details.
sl@0
    13
 *
sl@0
    14
 * You should have received a copy of the GNU Lesser General Public
sl@0
    15
 * License along with this library; if not, write to the
sl@0
    16
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
sl@0
    17
 * Boston, MA 02111-1307, USA.
sl@0
    18
 */
sl@0
    19
sl@0
    20
#include "glib.h"
sl@0
    21
#ifdef __SYMBIAN32__
sl@0
    22
#include "mrt2_glib2_test.h"
sl@0
    23
#endif /*__SYMBIAN32__*/
sl@0
    24
sl@0
    25
#define UNICODE_VALID(Char)                   \
sl@0
    26
    ((Char) < 0x110000 &&                     \
sl@0
    27
     (((Char) & 0xFFFFF800) != 0xD800) &&     \
sl@0
    28
     ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&  \
sl@0
    29
     ((Char) & 0xFFFE) != 0xFFFE)
sl@0
    30
sl@0
    31
sl@0
    32
sl@0
    33
static gboolean any_failed = FALSE;
sl@0
    34
sl@0
    35
struct {
sl@0
    36
  const gchar *text;
sl@0
    37
  gint max_len;
sl@0
    38
  gint offset;
sl@0
    39
  gboolean valid;
sl@0
    40
} test[] = {  
sl@0
    41
  /* some tests to check max_len handling */
sl@0
    42
  /* length 1 */
sl@0
    43
  { "abcde", -1, 5, TRUE },
sl@0
    44
  { "abcde", 3, 3, TRUE },
sl@0
    45
  { "abcde", 5, 5, TRUE },
sl@0
    46
  { "abcde", 7, 5, FALSE },
sl@0
    47
  /* length 2 */
sl@0
    48
  { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE }, 
sl@0
    49
  { "\xc2\xa9\xc2\xa9\xc2\xa9",  1, 0, FALSE }, 
sl@0
    50
  { "\xc2\xa9\xc2\xa9\xc2\xa9",  2, 2, TRUE }, 
sl@0
    51
  { "\xc2\xa9\xc2\xa9\xc2\xa9",  3, 2, FALSE }, 
sl@0
    52
  { "\xc2\xa9\xc2\xa9\xc2\xa9",  4, 4, TRUE }, 
sl@0
    53
  { "\xc2\xa9\xc2\xa9\xc2\xa9",  5, 4, FALSE }, 
sl@0
    54
  { "\xc2\xa9\xc2\xa9\xc2\xa9",  6, 6, TRUE }, 
sl@0
    55
  { "\xc2\xa9\xc2\xa9\xc2\xa9",  7, 6, FALSE }, 
sl@0
    56
  /* length 3 */
sl@0
    57
  { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE },
sl@0
    58
  { "\xe2\x89\xa0\xe2\x89\xa0",  1, 0, FALSE },
sl@0
    59
  { "\xe2\x89\xa0\xe2\x89\xa0",  2, 0, FALSE },
sl@0
    60
  { "\xe2\x89\xa0\xe2\x89\xa0",  3, 3, TRUE },
sl@0
    61
  { "\xe2\x89\xa0\xe2\x89\xa0",  4, 3, FALSE },
sl@0
    62
  { "\xe2\x89\xa0\xe2\x89\xa0",  5, 3, FALSE },
sl@0
    63
  { "\xe2\x89\xa0\xe2\x89\xa0",  6, 6, TRUE },
sl@0
    64
  { "\xe2\x89\xa0\xe2\x89\xa0",  7, 6, FALSE },
sl@0
    65
sl@0
    66
  /* examples from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
sl@0
    67
  /* greek 'kosme' */
sl@0
    68
  { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
sl@0
    69
  /* first sequence of each length */
sl@0
    70
  { "\x00", -1, 0, TRUE },
sl@0
    71
  { "\xc2\x80", -1, 2, TRUE },
sl@0
    72
  { "\xe0\xa0\x80", -1, 3, TRUE },
sl@0
    73
  { "\xf0\x90\x80\x80", -1, 4, TRUE },
sl@0
    74
  { "\xf8\x88\x80\x80\x80", -1, 0, FALSE },
sl@0
    75
  { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE },
sl@0
    76
  /* last sequence of each length */
sl@0
    77
  { "\x7f", -1, 1, TRUE },
sl@0
    78
  { "\xdf\xbf", -1, 2, TRUE },
sl@0
    79
  { "\xef\xbf\xbf", -1, 0, FALSE },
sl@0
    80
  { "\xf7\xbf\xbf\xbf", -1, 0, FALSE },
sl@0
    81
  { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
sl@0
    82
  { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
sl@0
    83
  /* other boundary conditions */
sl@0
    84
  { "\xed\x9f\xbf", -1, 3, TRUE },
sl@0
    85
  { "\xee\x80\x80", -1, 3, TRUE },
sl@0
    86
  { "\xef\xbf\xbd", -1, 3, TRUE },
sl@0
    87
  { "\xf4\x8f\xbf\xbf", -1, 0, FALSE },
sl@0
    88
  { "\xf4\x90\x80\x80", -1, 0, FALSE },
sl@0
    89
  /* malformed sequences */
sl@0
    90
  /* continuation bytes */
sl@0
    91
  { "\x80", -1, 0, FALSE },
sl@0
    92
  { "\xbf", -1, 0, FALSE },
sl@0
    93
  { "\x80\xbf", -1, 0, FALSE },
sl@0
    94
  { "\x80\xbf\x80", -1, 0, FALSE },
sl@0
    95
  { "\x80\xbf\x80\xbf", -1, 0, FALSE },
sl@0
    96
  { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
sl@0
    97
  { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE },
sl@0
    98
  { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
sl@0
    99
sl@0
   100
  /* all possible continuation byte */
sl@0
   101
  { "\x80", -1, 0, FALSE },
sl@0
   102
  { "\x81", -1, 0, FALSE },
sl@0
   103
  { "\x82", -1, 0, FALSE },
sl@0
   104
  { "\x83", -1, 0, FALSE },
sl@0
   105
  { "\x84", -1, 0, FALSE },
sl@0
   106
  { "\x85", -1, 0, FALSE },
sl@0
   107
  { "\x86", -1, 0, FALSE },
sl@0
   108
  { "\x87", -1, 0, FALSE },
sl@0
   109
  { "\x88", -1, 0, FALSE },
sl@0
   110
  { "\x89", -1, 0, FALSE },
sl@0
   111
  { "\x8a", -1, 0, FALSE },
sl@0
   112
  { "\x8b", -1, 0, FALSE },
sl@0
   113
  { "\x8c", -1, 0, FALSE },
sl@0
   114
  { "\x8d", -1, 0, FALSE },
sl@0
   115
  { "\x8e", -1, 0, FALSE },
sl@0
   116
  { "\x8f", -1, 0, FALSE },
sl@0
   117
  { "\x90", -1, 0, FALSE },
sl@0
   118
  { "\x91", -1, 0, FALSE },
sl@0
   119
  { "\x92", -1, 0, FALSE },
sl@0
   120
  { "\x93", -1, 0, FALSE },
sl@0
   121
  { "\x94", -1, 0, FALSE },
sl@0
   122
  { "\x95", -1, 0, FALSE },
sl@0
   123
  { "\x96", -1, 0, FALSE },
sl@0
   124
  { "\x97", -1, 0, FALSE },
sl@0
   125
  { "\x98", -1, 0, FALSE },
sl@0
   126
  { "\x99", -1, 0, FALSE },
sl@0
   127
  { "\x9a", -1, 0, FALSE },
sl@0
   128
  { "\x9b", -1, 0, FALSE },
sl@0
   129
  { "\x9c", -1, 0, FALSE },
sl@0
   130
  { "\x9d", -1, 0, FALSE },
sl@0
   131
  { "\x9e", -1, 0, FALSE },
sl@0
   132
  { "\x9f", -1, 0, FALSE },
sl@0
   133
  { "\xa0", -1, 0, FALSE },
sl@0
   134
  { "\xa1", -1, 0, FALSE },
sl@0
   135
  { "\xa2", -1, 0, FALSE },
sl@0
   136
  { "\xa3", -1, 0, FALSE },
sl@0
   137
  { "\xa4", -1, 0, FALSE },
sl@0
   138
  { "\xa5", -1, 0, FALSE },
sl@0
   139
  { "\xa6", -1, 0, FALSE },
sl@0
   140
  { "\xa7", -1, 0, FALSE },
sl@0
   141
  { "\xa8", -1, 0, FALSE },
sl@0
   142
  { "\xa9", -1, 0, FALSE },
sl@0
   143
  { "\xaa", -1, 0, FALSE },
sl@0
   144
  { "\xab", -1, 0, FALSE },
sl@0
   145
  { "\xac", -1, 0, FALSE },
sl@0
   146
  { "\xad", -1, 0, FALSE },
sl@0
   147
  { "\xae", -1, 0, FALSE },
sl@0
   148
  { "\xaf", -1, 0, FALSE },
sl@0
   149
  { "\xb0", -1, 0, FALSE },
sl@0
   150
  { "\xb1", -1, 0, FALSE },
sl@0
   151
  { "\xb2", -1, 0, FALSE },
sl@0
   152
  { "\xb3", -1, 0, FALSE },
sl@0
   153
  { "\xb4", -1, 0, FALSE },
sl@0
   154
  { "\xb5", -1, 0, FALSE },
sl@0
   155
  { "\xb6", -1, 0, FALSE },
sl@0
   156
  { "\xb7", -1, 0, FALSE },
sl@0
   157
  { "\xb8", -1, 0, FALSE },
sl@0
   158
  { "\xb9", -1, 0, FALSE },
sl@0
   159
  { "\xba", -1, 0, FALSE },
sl@0
   160
  { "\xbb", -1, 0, FALSE },
sl@0
   161
  { "\xbc", -1, 0, FALSE },
sl@0
   162
  { "\xbd", -1, 0, FALSE },
sl@0
   163
  { "\xbe", -1, 0, FALSE },
sl@0
   164
  { "\xbf", -1, 0, FALSE },
sl@0
   165
  /* lone start characters */
sl@0
   166
  { "\xc0\x20", -1, 0, FALSE },
sl@0
   167
  { "\xc1\x20", -1, 0, FALSE },
sl@0
   168
  { "\xc2\x20", -1, 0, FALSE },
sl@0
   169
  { "\xc3\x20", -1, 0, FALSE },
sl@0
   170
  { "\xc4\x20", -1, 0, FALSE },
sl@0
   171
  { "\xc5\x20", -1, 0, FALSE },
sl@0
   172
  { "\xc6\x20", -1, 0, FALSE },
sl@0
   173
  { "\xc7\x20", -1, 0, FALSE },
sl@0
   174
  { "\xc8\x20", -1, 0, FALSE },
sl@0
   175
  { "\xc9\x20", -1, 0, FALSE },
sl@0
   176
  { "\xca\x20", -1, 0, FALSE },
sl@0
   177
  { "\xcb\x20", -1, 0, FALSE },
sl@0
   178
  { "\xcc\x20", -1, 0, FALSE },
sl@0
   179
  { "\xcd\x20", -1, 0, FALSE },
sl@0
   180
  { "\xce\x20", -1, 0, FALSE },
sl@0
   181
  { "\xcf\x20", -1, 0, FALSE },
sl@0
   182
  { "\xd0\x20", -1, 0, FALSE },
sl@0
   183
  { "\xd1\x20", -1, 0, FALSE },
sl@0
   184
  { "\xd2\x20", -1, 0, FALSE },
sl@0
   185
  { "\xd3\x20", -1, 0, FALSE },
sl@0
   186
  { "\xd4\x20", -1, 0, FALSE },
sl@0
   187
  { "\xd5\x20", -1, 0, FALSE },
sl@0
   188
  { "\xd6\x20", -1, 0, FALSE },
sl@0
   189
  { "\xd7\x20", -1, 0, FALSE },
sl@0
   190
  { "\xd8\x20", -1, 0, FALSE },
sl@0
   191
  { "\xd9\x20", -1, 0, FALSE },
sl@0
   192
  { "\xda\x20", -1, 0, FALSE },
sl@0
   193
  { "\xdb\x20", -1, 0, FALSE },
sl@0
   194
  { "\xdc\x20", -1, 0, FALSE },
sl@0
   195
  { "\xdd\x20", -1, 0, FALSE },
sl@0
   196
  { "\xde\x20", -1, 0, FALSE },
sl@0
   197
  { "\xdf\x20", -1, 0, FALSE },
sl@0
   198
  { "\xe0\x20", -1, 0, FALSE },
sl@0
   199
  { "\xe1\x20", -1, 0, FALSE },
sl@0
   200
  { "\xe2\x20", -1, 0, FALSE },
sl@0
   201
  { "\xe3\x20", -1, 0, FALSE },
sl@0
   202
  { "\xe4\x20", -1, 0, FALSE },
sl@0
   203
  { "\xe5\x20", -1, 0, FALSE },
sl@0
   204
  { "\xe6\x20", -1, 0, FALSE },
sl@0
   205
  { "\xe7\x20", -1, 0, FALSE },
sl@0
   206
  { "\xe8\x20", -1, 0, FALSE },
sl@0
   207
  { "\xe9\x20", -1, 0, FALSE },
sl@0
   208
  { "\xea\x20", -1, 0, FALSE },
sl@0
   209
  { "\xeb\x20", -1, 0, FALSE },
sl@0
   210
  { "\xec\x20", -1, 0, FALSE },
sl@0
   211
  { "\xed\x20", -1, 0, FALSE },
sl@0
   212
  { "\xee\x20", -1, 0, FALSE },
sl@0
   213
  { "\xef\x20", -1, 0, FALSE },
sl@0
   214
  { "\xf0\x20", -1, 0, FALSE },
sl@0
   215
  { "\xf1\x20", -1, 0, FALSE },
sl@0
   216
  { "\xf2\x20", -1, 0, FALSE },
sl@0
   217
  { "\xf3\x20", -1, 0, FALSE },
sl@0
   218
  { "\xf4\x20", -1, 0, FALSE },
sl@0
   219
  { "\xf5\x20", -1, 0, FALSE },
sl@0
   220
  { "\xf6\x20", -1, 0, FALSE },
sl@0
   221
  { "\xf7\x20", -1, 0, FALSE },
sl@0
   222
  { "\xf8\x20", -1, 0, FALSE },
sl@0
   223
  { "\xf9\x20", -1, 0, FALSE },
sl@0
   224
  { "\xfa\x20", -1, 0, FALSE },
sl@0
   225
  { "\xfb\x20", -1, 0, FALSE },
sl@0
   226
  { "\xfc\x20", -1, 0, FALSE },
sl@0
   227
  { "\xfd\x20", -1, 0, FALSE },
sl@0
   228
  /* missing continuation bytes */
sl@0
   229
  { "\x20\xc0", -1, 1, FALSE },
sl@0
   230
  { "\x20\xe0\x80", -1, 1, FALSE },
sl@0
   231
  { "\x20\xf0\x80\x80", -1, 1, FALSE },
sl@0
   232
  { "\x20\xf8\x80\x80\x80", -1, 1, FALSE },
sl@0
   233
  { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE },
sl@0
   234
  { "\x20\xdf", -1, 1, FALSE },
sl@0
   235
  { "\x20\xef\xbf", -1, 1, FALSE },
sl@0
   236
  { "\x20\xf7\xbf\xbf", -1, 1, FALSE },
sl@0
   237
  { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE },
sl@0
   238
  { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE },
sl@0
   239
  /* impossible bytes */
sl@0
   240
  { "\x20\xfe\x20", -1, 1, FALSE },
sl@0
   241
  { "\x20\xff\x20", -1, 1, FALSE },
sl@0
   242
  /* overlong sequences */
sl@0
   243
  { "\x20\xc0\xaf\x20", -1, 1, FALSE },
sl@0
   244
  { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
sl@0
   245
  { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE },
sl@0
   246
  { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE },
sl@0
   247
  { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE },
sl@0
   248
  { "\x20\xc1\xbf\x20", -1, 1, FALSE },
sl@0
   249
  { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE },
sl@0
   250
  { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE },
sl@0
   251
  { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE },
sl@0
   252
  { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE },
sl@0
   253
  { "\x20\xc0\x80\x20", -1, 1, FALSE },
sl@0
   254
  { "\x20\xe0\x80\x80\x20", -1, 1, FALSE },
sl@0
   255
  { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE },
sl@0
   256
  { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE },
sl@0
   257
  { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE },
sl@0
   258
  /* illegal code positions */
sl@0
   259
  { "\x20\xed\xa0\x80\x20", -1, 1, FALSE },
sl@0
   260
  { "\x20\xed\xad\xbf\x20", -1, 1, FALSE },
sl@0
   261
  { "\x20\xed\xae\x80\x20", -1, 1, FALSE },
sl@0
   262
  { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE },
sl@0
   263
  { "\x20\xed\xb0\x80\x20", -1, 1, FALSE },
sl@0
   264
  { "\x20\xed\xbe\x80\x20", -1, 1, FALSE },
sl@0
   265
  { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE },
sl@0
   266
  { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE },
sl@0
   267
  { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
sl@0
   268
  { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
sl@0
   269
  { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
sl@0
   270
  { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE },
sl@0
   271
  { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
sl@0
   272
  { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
sl@0
   273
  { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
sl@0
   274
  { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
sl@0
   275
  { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
sl@0
   276
sl@0
   277
  { NULL, }
sl@0
   278
};
sl@0
   279
sl@0
   280
static void 
sl@0
   281
do_test (gint         index,
sl@0
   282
	 const gchar *text, 
sl@0
   283
	 gint         max_len,
sl@0
   284
	 gint         offset,
sl@0
   285
	 gboolean     valid)
sl@0
   286
{
sl@0
   287
  const gchar *end;
sl@0
   288
  gboolean result;
sl@0
   289
  
sl@0
   290
  result = g_utf8_validate (text, max_len, &end);
sl@0
   291
sl@0
   292
  if (result != valid || end - text != offset)
sl@0
   293
    {
sl@0
   294
      GString *str;
sl@0
   295
      const gchar *p;
sl@0
   296
sl@0
   297
      any_failed = TRUE;
sl@0
   298
      
sl@0
   299
      str = g_string_new (0);
sl@0
   300
      for (p = text; *p; p++)
sl@0
   301
	g_string_append_printf (str, "\\x%02hhx", *p);
sl@0
   302
      g_print ("%d: g_utf8_validate (\"%s\", %d) failed, "
sl@0
   303
	       "expected %s %d, got %s %d\n",
sl@0
   304
	       index,
sl@0
   305
	       str->str, max_len, 
sl@0
   306
	       valid ? "TRUE" : "FALSE", offset,
sl@0
   307
	       result ? "TRUE" : "FALSE", (gint) (end - text));
sl@0
   308
      g_string_free (str, FALSE);
sl@0
   309
    }
sl@0
   310
}
sl@0
   311
sl@0
   312
int
sl@0
   313
main (int argc, char *argv[])
sl@0
   314
{
sl@0
   315
  gint i;
sl@0
   316
sl@0
   317
  #ifdef __SYMBIAN32__
sl@0
   318
  g_log_set_handler (NULL,  G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING | G_LOG_LEVEL_MESSAGE | G_LOG_LEVEL_INFO | G_LOG_LEVEL_DEBUG, &mrtLogHandler, NULL);
sl@0
   319
  g_set_print_handler(mrtPrintHandler);
sl@0
   320
  #endif /*__SYMBIAN32__*/
sl@0
   321
	  
sl@0
   322
sl@0
   323
  for (i = 0; test[i].text; i++)
sl@0
   324
    do_test (i, test[i].text, test[i].max_len, 
sl@0
   325
	     test[i].offset, test[i].valid);
sl@0
   326
sl@0
   327
  #ifdef __SYMBIAN32__
sl@0
   328
  assert_failed = any_failed;
sl@0
   329
  testResultXml("utf8-validate");
sl@0
   330
  #endif /* EMULATOR */
sl@0
   331
  
sl@0
   332
  return any_failed ? 1 : 0;
sl@0
   333
}