os/ossrv/glib/tsrc/BC/group/data/utf8.txt
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/ossrv/glib/tsrc/BC/group/data/utf8.txt	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,282 @@
     1.4 +# This file is derived from 
     1.5 +#
     1.6 +#    http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
     1.7 +#    
     1.8 +# Which was created by   Markus Kuhn <mkuhn@acm.org> - 2000-09-02 
     1.9 +#
    1.10 +# lines begining with # and blank lines are ignored
    1.11 +#
    1.12 +# Beyond that, this file consists of a series of test cases. Each test case consists of
    1.13 +# 2 or 3 lines:
    1.14 +#
    1.15 +#  1. A UTF-8 string
    1.16 +#  2. A status
    1.17 +#      VALID      : The string is a valid UTF-8 representation of valid Unicode
    1.18 +#      INCOMPLETE : The string has a partial character at the end
    1.19 +#      NOTUNICODE : The string is valid UTF-8, but the characters represented
    1.20 +#                   are not valid unicode (
    1.21 +#      OVERLONG   : The string includes overlong sequences
    1.22 +#      MALFORMED  : The string is not valid UTF-8
    1.23 +# 3. If the status is VALID or NOTUNICODE, the UCS-4 representation of the string,
    1.24 +#    as a series of hex numbers.
    1.25 +
    1.26 +# 1  Some correct UTF-8 text
    1.27 +κόσμε
    1.28 +VALID
    1.29 +03ba 1f79 03c3 03bc 03b5
    1.30 +
    1.31 +# 2.1  First possible sequence of a certain length
    1.32 +#
    1.33 +# FIXME - handle NULLS?
    1.34 +#
    1.35 +# [ NULL BYTE ]
    1.36 +#VALID
    1.37 +#0000
    1.38 +
    1.39    1.40 +VALID
    1.41 +0080
    1.42 +
    1.43 +
    1.44 +NOTUNICODE
    1.45 +00200000
    1.46 +
    1.47 +
    1.48 +NOTUNICODE
    1.49 +04000000
    1.50 +
    1.51 +
    1.52 +VALID
    1.53 +0000007f
    1.54 +
    1.55 +߿
    1.56 +VALID
    1.57 +000007ff
    1.58 +
    1.59 +￿
    1.60 +NOTUNICODE
    1.61 +0000ffff
    1.62 +
    1.63 +
    1.64 +NOTUNICODE
    1.65 +001fffff
    1.66 +
    1.67 +
    1.68 +NOTUNICODE
    1.69 +03ffffff
    1.70 +
    1.71 +
    1.72 +NOTUNICODE
    1.73 +7fffffff
    1.74 +
    1.75 +# 2.3  Other boundary conditions
    1.76 +
    1.77 +퟿
    1.78 +VALID
    1.79 +d7ff
    1.80 +
    1.81 +�
    1.82 +VALID
    1.83 +fffd
    1.84 +
    1.85 +􏿿
    1.86 +NOTUNICODE
    1.87 +0010ffff
    1.88 +
    1.89 +
    1.90 +NOTUNICODE
    1.91 +00110000
    1.92 +
    1.93 +# 3.1  Unexpected continuation bytes
    1.94 +
    1.95 +
    1.96 +MALFORMED
    1.97 +
    1.98 +MALFORMED
    1.99 +
   1.100 +MALFORMED
   1.101 +
   1.102 +MALFORMED
   1.103 +
   1.104 +MALFORMED
   1.105 +
   1.106 +MALFORMED
   1.107 +
   1.108 +MALFORMED
   1.109 +
   1.110 +MALFORMED
   1.111 +
   1.112 +MALFORMED
   1.113 +
   1.114 +# 3.2  Lonely start characters
   1.115 +
   1.116 +                                
   1.117 +MALFORMED
   1.118 +                
   1.119 +MALFORMED
   1.120 +        
   1.121 +MALFORMED
   1.122 +    
   1.123 +MALFORMED
   1.124 +  
   1.125 +MALFORMED
   1.126 +
   1.127 +# 3.3  Sequences with last continuation byte missing
   1.128 +
   1.129 +
   1.130 +INCOMPLETE
   1.131 +
   1.132 +INCOMPLETE
   1.133 +
   1.134 +INCOMPLETE
   1.135 +
   1.136 +INCOMPLETE
   1.137 +
   1.138 +INCOMPLETE
   1.139 +
   1.140 +INCOMPLETE
   1.141 +
   1.142 +INCOMPLETE
   1.143 +
   1.144 +INCOMPLETE
   1.145 +
   1.146 +INCOMPLETE
   1.147 +
   1.148 +INCOMPLETE
   1.149 +
   1.150 +# 3.4  Concatenation of incomplete sequences
   1.151 +
   1.152 +
   1.153 +MALFORMED
   1.154 +
   1.155 +# 3.5  Impossible bytes
   1.156 +
   1.157 +
   1.158 +MALFORMED
   1.159 +
   1.160 +MALFORMED
   1.161 +
   1.162 +MALFORMED
   1.163 +
   1.164 +#  Examples of an overlong ASCII character
   1.165 +
   1.166 +
   1.167 +OVERLONG
   1.168 +
   1.169 +OVERLONG
   1.170 +
   1.171 +OVERLONG
   1.172 +
   1.173 +OVERLONG
   1.174 +
   1.175 +OVERLONG
   1.176 +
   1.177 +#  Maximum overlong sequences
   1.178 +
   1.179 +
   1.180 +OVERLONG
   1.181 +
   1.182 +OVERLONG
   1.183 +
   1.184 +OVERLONG
   1.185 +
   1.186 +OVERLONG
   1.187 +
   1.188 +OVERLONG
   1.189 +
   1.190 +# Overlong representation of the NUL character
   1.191 +
   1.192 +
   1.193 +OVERLONG
   1.194 +
   1.195 +OVERLONG
   1.196 +
   1.197 +OVERLONG
   1.198 +
   1.199 +OVERLONG
   1.200 +
   1.201 +OVERLONG
   1.202 +
   1.203 +# Illegal code positions
   1.204 +
   1.205 +# Single UTF-16 surrogates
   1.206 +
   1.207 +
   1.208 +NOTUNICODE
   1.209 +d800
   1.210 +
   1.211 +
   1.212 +NOTUNICODE
   1.213 +db7f
   1.214 +
   1.215 +
   1.216 +NOTUNICODE
   1.217 +db80
   1.218 +
   1.219 +
   1.220 +NOTUNICODE
   1.221 +dbff
   1.222 +
   1.223 +
   1.224 +NOTUNICODE
   1.225 +dc00
   1.226 +
   1.227 +
   1.228 +NOTUNICODE
   1.229 +df80
   1.230 +
   1.231 +
   1.232 +NOTUNICODE
   1.233 +dfff
   1.234 +
   1.235 +# Paired UTF-16 surrogates
   1.236 +
   1.237 +
   1.238 +NOTUNICODE
   1.239 +d800 dc00
   1.240 +
   1.241 +
   1.242 +NOTUNICODE
   1.243 +d800 dfff
   1.244 +
   1.245 +
   1.246 +NOTUNICODE
   1.247 +db7f dc00
   1.248 +
   1.249 +
   1.250 +NOTUNICODE
   1.251 +db7f dfff
   1.252 +
   1.253 +
   1.254 +NOTUNICODE
   1.255 +db80 dc00
   1.256 +
   1.257 +
   1.258 +NOTUNICODE
   1.259 +db80 dfff
   1.260 +
   1.261 +
   1.262 +NOTUNICODE
   1.263 +dbff dc00
   1.264 +
   1.265 +
   1.266 +NOTUNICODE
   1.267 +dbff dfff
   1.268 +
   1.269 +# Other illegal code positions
   1.270 +
   1.271 +￾
   1.272 +NOTUNICODE
   1.273 +fffe
   1.274 +
   1.275 +￿
   1.276 +NOTUNICODE
   1.277 +ffff
   1.278 +
   1.279 +################
   1.280 +#
   1.281 +# Some more tests, not from Markus Kuhn's file
   1.282 +#
   1.283 +
   1.284 +# Mixed plane 0 and higher planes
   1.285 +