1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/glib/tsrc/BC/group/data/utf8.txt Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,282 @@
1.4 +# This file is derived from
1.5 +#
1.6 +# http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
1.7 +#
1.8 +# Which was created by Markus Kuhn <mkuhn@acm.org> - 2000-09-02
1.9 +#
1.10 +# lines begining with # and blank lines are ignored
1.11 +#
1.12 +# Beyond that, this file consists of a series of test cases. Each test case consists of
1.13 +# 2 or 3 lines:
1.14 +#
1.15 +# 1. A UTF-8 string
1.16 +# 2. A status
1.17 +# VALID : The string is a valid UTF-8 representation of valid Unicode
1.18 +# INCOMPLETE : The string has a partial character at the end
1.19 +# NOTUNICODE : The string is valid UTF-8, but the characters represented
1.20 +# are not valid unicode (
1.21 +# OVERLONG : The string includes overlong sequences
1.22 +# MALFORMED : The string is not valid UTF-8
1.23 +# 3. If the status is VALID or NOTUNICODE, the UCS-4 representation of the string,
1.24 +# as a series of hex numbers.
1.25 +
1.26 +# 1 Some correct UTF-8 text
1.27 +κόσμε
1.28 +VALID
1.29 +03ba 1f79 03c3 03bc 03b5
1.30 +
1.31 +# 2.1 First possible sequence of a certain length
1.32 +#
1.33 +# FIXME - handle NULLS?
1.34 +#
1.35 +# [ NULL BYTE ]
1.36 +#VALID
1.37 +#0000
1.38 +
1.39 +
1.40 +VALID
1.41 +0080
1.42 +
1.43 +
1.44 +NOTUNICODE
1.45 +00200000
1.46 +
1.47 +
1.48 +NOTUNICODE
1.49 +04000000
1.50 +
1.51 +
1.52 +VALID
1.53 +0000007f
1.54 +
1.55 +߿
1.56 +VALID
1.57 +000007ff
1.58 +
1.59 +
1.60 +NOTUNICODE
1.61 +0000ffff
1.62 +
1.63 +
1.64 +NOTUNICODE
1.65 +001fffff
1.66 +
1.67 +
1.68 +NOTUNICODE
1.69 +03ffffff
1.70 +
1.71 +
1.72 +NOTUNICODE
1.73 +7fffffff
1.74 +
1.75 +# 2.3 Other boundary conditions
1.76 +
1.77 +
1.78 +VALID
1.79 +d7ff
1.80 +
1.81 +�
1.82 +VALID
1.83 +fffd
1.84 +
1.85 +
1.86 +NOTUNICODE
1.87 +0010ffff
1.88 +
1.89 +
1.90 +NOTUNICODE
1.91 +00110000
1.92 +
1.93 +# 3.1 Unexpected continuation bytes
1.94 +
1.95 +
1.96 +MALFORMED
1.97 +
1.98 +MALFORMED
1.99 +
1.100 +MALFORMED
1.101 +
1.102 +MALFORMED
1.103 +
1.104 +MALFORMED
1.105 +
1.106 +MALFORMED
1.107 +
1.108 +MALFORMED
1.109 +
1.110 +MALFORMED
1.111 +
1.112 +MALFORMED
1.113 +
1.114 +# 3.2 Lonely start characters
1.115 +
1.116 +
1.117 +MALFORMED
1.118 +
1.119 +MALFORMED
1.120 +
1.121 +MALFORMED
1.122 +
1.123 +MALFORMED
1.124 +
1.125 +MALFORMED
1.126 +
1.127 +# 3.3 Sequences with last continuation byte missing
1.128 +
1.129 +
1.130 +INCOMPLETE
1.131 +
1.132 +INCOMPLETE
1.133 +
1.134 +INCOMPLETE
1.135 +
1.136 +INCOMPLETE
1.137 +
1.138 +INCOMPLETE
1.139 +
1.140 +INCOMPLETE
1.141 +
1.142 +INCOMPLETE
1.143 +
1.144 +INCOMPLETE
1.145 +
1.146 +INCOMPLETE
1.147 +
1.148 +INCOMPLETE
1.149 +
1.150 +# 3.4 Concatenation of incomplete sequences
1.151 +
1.152 +
1.153 +MALFORMED
1.154 +
1.155 +# 3.5 Impossible bytes
1.156 +
1.157 +
1.158 +MALFORMED
1.159 +
1.160 +MALFORMED
1.161 +
1.162 +MALFORMED
1.163 +
1.164 +# Examples of an overlong ASCII character
1.165 +
1.166 +
1.167 +OVERLONG
1.168 +
1.169 +OVERLONG
1.170 +
1.171 +OVERLONG
1.172 +
1.173 +OVERLONG
1.174 +
1.175 +OVERLONG
1.176 +
1.177 +# Maximum overlong sequences
1.178 +
1.179 +
1.180 +OVERLONG
1.181 +
1.182 +OVERLONG
1.183 +
1.184 +OVERLONG
1.185 +
1.186 +OVERLONG
1.187 +
1.188 +OVERLONG
1.189 +
1.190 +# Overlong representation of the NUL character
1.191 +
1.192 +
1.193 +OVERLONG
1.194 +
1.195 +OVERLONG
1.196 +
1.197 +OVERLONG
1.198 +
1.199 +OVERLONG
1.200 +
1.201 +OVERLONG
1.202 +
1.203 +# Illegal code positions
1.204 +
1.205 +# Single UTF-16 surrogates
1.206 +
1.207 +
1.208 +NOTUNICODE
1.209 +d800
1.210 +
1.211 +
1.212 +NOTUNICODE
1.213 +db7f
1.214 +
1.215 +
1.216 +NOTUNICODE
1.217 +db80
1.218 +
1.219 +
1.220 +NOTUNICODE
1.221 +dbff
1.222 +
1.223 +
1.224 +NOTUNICODE
1.225 +dc00
1.226 +
1.227 +
1.228 +NOTUNICODE
1.229 +df80
1.230 +
1.231 +
1.232 +NOTUNICODE
1.233 +dfff
1.234 +
1.235 +# Paired UTF-16 surrogates
1.236 +
1.237 +
1.238 +NOTUNICODE
1.239 +d800 dc00
1.240 +
1.241 +
1.242 +NOTUNICODE
1.243 +d800 dfff
1.244 +
1.245 +
1.246 +NOTUNICODE
1.247 +db7f dc00
1.248 +
1.249 +
1.250 +NOTUNICODE
1.251 +db7f dfff
1.252 +
1.253 +
1.254 +NOTUNICODE
1.255 +db80 dc00
1.256 +
1.257 +
1.258 +NOTUNICODE
1.259 +db80 dfff
1.260 +
1.261 +
1.262 +NOTUNICODE
1.263 +dbff dc00
1.264 +
1.265 +
1.266 +NOTUNICODE
1.267 +dbff dfff
1.268 +
1.269 +# Other illegal code positions
1.270 +
1.271 +
1.272 +NOTUNICODE
1.273 +fffe
1.274 +
1.275 +
1.276 +NOTUNICODE
1.277 +ffff
1.278 +
1.279 +################
1.280 +#
1.281 +# Some more tests, not from Markus Kuhn's file
1.282 +#
1.283 +
1.284 +# Mixed plane 0 and higher planes
1.285 +