os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/uidna.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2  *******************************************************************************
     3  *
     4  *   Copyright (C) 2003-2005, International Business Machines
     5  *   Corporation and others.  All Rights Reserved.
     6  *
     7  *******************************************************************************
     8  *   file name:  uidna.h
     9  *   encoding:   US-ASCII
    10  *   tab size:   8 (not used)
    11  *   indentation:4
    12  *
    13  *   created on: 2003feb1
    14  *   created by: Ram Viswanadha
    15  */
    16 
    17 #ifndef __UIDNA_H__
    18 #define __UIDNA_H__
    19 
    20 #include "unicode/utypes.h"
    21 
    22 #if !UCONFIG_NO_IDNA
    23 
    24 #include "unicode/parseerr.h"
    25   
    26 /**
    27  * \file
    28  * \brief C API: Internationalized Domain Names in Applications Tranformation
    29  *
    30  * UIDNA API implements the IDNA protocol as defined in the IDNA RFC 
    31  * (http://www.ietf.org/rfc/rfc3490.txt).
    32  * The RFC defines 2 operations: ToASCII and ToUnicode. Domain labels 
    33  * containing non-ASCII code points are required to be processed by
    34  * ToASCII operation before passing it to resolver libraries. Domain names
    35  * that are obtained from resolver libraries are required to be processed by
    36  * ToUnicode operation before displaying the domain name to the user.
    37  * IDNA requires that implementations process input strings with Nameprep
    38  * (http://www.ietf.org/rfc/rfc3491.txt), 
    39  * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt), 
    40  * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt). 
    41  * Implementations of IDNA MUST fully implement Nameprep and Punycode; 
    42  * neither Nameprep nor Punycode are optional.
    43  * The input and output of ToASCII and ToUnicode operations are Unicode 
    44  * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
    45  * multiple times to an input string will yield the same result as applying the operation
    46  * once.
    47  * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) 
    48  * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
    49  *
    50  */
    51 
    52 #ifndef U_HIDE_DRAFT_API
    53 
    54 /** 
    55  * Option to prohibit processing of unassigned codepoints in the input and
    56  * do not check if the input conforms to STD-3 ASCII rules.
    57  * 
    58  * @see  uidna_toASCII uidna_toUnicode
    59  * @stable ICU 2.6
    60  */
    61 #define UIDNA_DEFAULT          0x0000
    62 /** 
    63  * Option to allow processing of unassigned codepoints in the input
    64  * 
    65  * @see  uidna_toASCII uidna_toUnicode
    66  * @stable ICU 2.6
    67  */
    68 #define UIDNA_ALLOW_UNASSIGNED 0x0001
    69 /** 
    70  * Option to check if input conforms to STD-3 ASCII rules
    71  * 
    72  * @see  uidna_toASCII uidna_toUnicode
    73  * @stable ICU 2.6
    74  */
    75 #define UIDNA_USE_STD3_RULES   0x0002
    76 
    77 #endif /*U_HIDE_DRAFT_API*/
    78     
    79 /**
    80  * This function implements the ToASCII operation as defined in the IDNA RFC.
    81  * This operation is done on <b>single labels</b> before sending it to something that expects
    82  * ASCII names. A label is an individual part of a domain name. Labels are usually
    83  * separated by dots; e.g." "www.example.com" is composed of 3 labels 
    84  * "www","example", and "com".
    85  *
    86  *
    87  * @param src               Input UChar array containing label in Unicode.
    88  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
    89  * @param dest              Output UChar array with ASCII (ACE encoded) label.
    90  * @param destCapacity      Size of dest.
    91  * @param options           A bit set of options:
    92  *
    93  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
    94  *                              and do not use STD3 ASCII rules
    95  *                              If unassigned code points are found the operation fails with 
    96  *                              U_UNASSIGNED_ERROR error code.
    97  *
    98  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
    99  *                              If this option is set, the unassigned code points are in the input 
   100  *                              are treated as normal Unicode code points.
   101  *                          
   102  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
   103  *                              If this option is set and the input does not satisfy STD3 rules,  
   104  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
   105  *
   106  * @param parseError        Pointer to UParseError struct to receive information on position 
   107  *                          of error if an error is encountered. Can be NULL.
   108  * @param status            ICU in/out error code parameter.
   109  *                          U_INVALID_CHAR_FOUND if src contains
   110  *                          unmatched single surrogates.
   111  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
   112  *                          too many code points.
   113  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
   114  * @return                  Number of ASCII characters converted.
   115  * @stable ICU 2.6
   116  */
   117 U_STABLE int32_t U_EXPORT2
   118 uidna_toASCII(const UChar* src, int32_t srcLength, 
   119               UChar* dest, int32_t destCapacity,
   120               int32_t options,
   121               UParseError* parseError,
   122               UErrorCode* status);
   123 
   124 
   125 /**
   126  * This function implements the ToUnicode operation as defined in the IDNA RFC.
   127  * This operation is done on <b>single labels</b> before sending it to something that expects
   128  * Unicode names. A label is an individual part of a domain name. Labels are usually
   129  * separated by dots; for e.g." "www.example.com" is composed of 3 labels 
   130  * "www","example", and "com".
   131  *
   132  * @param src               Input UChar array containing ASCII (ACE encoded) label.
   133  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
   134  * @param dest Output       Converted UChar array containing Unicode equivalent of label.
   135  * @param destCapacity      Size of dest.
   136  * @param options           A bit set of options:
   137  *  
   138  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
   139  *                              and do not use STD3 ASCII rules
   140  *                              If unassigned code points are found the operation fails with 
   141  *                              U_UNASSIGNED_ERROR error code.
   142  *
   143  *  - UIDNA_ALLOW_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
   144  *                              If this option is set, the unassigned code points are in the input 
   145  *                              are treated as normal Unicode code points. <b> Note: </b> This option is 
   146  *                              required on toUnicode operation because the RFC mandates 
   147  *                              verification of decoded ACE input by applying toASCII and comparing
   148  *                              its output with source
   149  *
   150  *                          
   151  *                          
   152  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
   153  *                              If this option is set and the input does not satisfy STD3 rules,  
   154  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
   155  *
   156  * @param parseError        Pointer to UParseError struct to receive information on position 
   157  *                          of error if an error is encountered. Can be NULL.
   158  * @param status            ICU in/out error code parameter.
   159  *                          U_INVALID_CHAR_FOUND if src contains
   160  *                          unmatched single surrogates.
   161  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
   162  *                          too many code points.
   163  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
   164  * @return                  Number of Unicode characters converted.
   165  * @stable ICU 2.6
   166  */
   167 U_STABLE int32_t U_EXPORT2
   168 uidna_toUnicode(const UChar* src, int32_t srcLength,
   169                 UChar* dest, int32_t destCapacity,
   170                 int32_t options,
   171                 UParseError* parseError,
   172                 UErrorCode* status);
   173 
   174 
   175 /**
   176  * Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
   177  * This operation is done on complete domain names, e.g: "www.example.com". 
   178  * It is important to note that this operation can fail. If it fails, then the input 
   179  * domain name cannot be used as an Internationalized Domain Name and the application
   180  * should have methods defined to deal with the failure.
   181  * 
   182  * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
   183  * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 
   184  * and then convert. This function does not offer that level of granularity. The options once  
   185  * set will apply to all labels in the domain name
   186  *
   187  * @param src               Input UChar array containing IDN in Unicode.
   188  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
   189  * @param dest              Output UChar array with ASCII (ACE encoded) IDN.
   190  * @param destCapacity      Size of dest.
   191  * @param options           A bit set of options:
   192  *  
   193  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
   194  *                              and do not use STD3 ASCII rules
   195  *                              If unassigned code points are found the operation fails with 
   196  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
   197  *
   198  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
   199  *                              If this option is set, the unassigned code points are in the input 
   200  *                              are treated as normal Unicode code points.
   201  *                          
   202  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
   203  *                              If this option is set and the input does not satisfy STD3 rules,  
   204  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
   205  * 
   206  * @param parseError        Pointer to UParseError struct to receive information on position 
   207  *                          of error if an error is encountered. Can be NULL.
   208  * @param status            ICU in/out error code parameter.
   209  *                          U_INVALID_CHAR_FOUND if src contains
   210  *                          unmatched single surrogates.
   211  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
   212  *                          too many code points.
   213  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
   214  * @return                  Number of ASCII characters converted.
   215  * @stable ICU 2.6
   216  */
   217 U_STABLE int32_t U_EXPORT2
   218 uidna_IDNToASCII(  const UChar* src, int32_t srcLength,
   219                    UChar* dest, int32_t destCapacity,
   220                    int32_t options,
   221                    UParseError* parseError,
   222                    UErrorCode* status);
   223 
   224 /**
   225  * Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
   226  * This operation is done on complete domain names, e.g: "www.example.com". 
   227  *
   228  * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
   229  * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 
   230  * and then convert. This function does not offer that level of granularity. The options once  
   231  * set will apply to all labels in the domain name
   232  *
   233  * @param src               Input UChar array containing IDN in ASCII (ACE encoded) form.
   234  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
   235  * @param dest Output       UChar array containing Unicode equivalent of source IDN.
   236  * @param destCapacity      Size of dest.
   237  * @param options           A bit set of options:
   238  *  
   239  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
   240  *                              and do not use STD3 ASCII rules
   241  *                              If unassigned code points are found the operation fails with 
   242  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
   243  *
   244  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
   245  *                              If this option is set, the unassigned code points are in the input 
   246  *                              are treated as normal Unicode code points.
   247  *                          
   248  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
   249  *                              If this option is set and the input does not satisfy STD3 rules,  
   250  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
   251  *
   252  * @param parseError        Pointer to UParseError struct to receive information on position 
   253  *                          of error if an error is encountered. Can be NULL.
   254  * @param status            ICU in/out error code parameter.
   255  *                          U_INVALID_CHAR_FOUND if src contains
   256  *                          unmatched single surrogates.
   257  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
   258  *                          too many code points.
   259  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
   260  * @return                  Number of ASCII characters converted.
   261  * @stable ICU 2.6
   262  */
   263 U_STABLE int32_t U_EXPORT2
   264 uidna_IDNToUnicode(  const UChar* src, int32_t srcLength,
   265                      UChar* dest, int32_t destCapacity,
   266                      int32_t options,
   267                      UParseError* parseError,
   268                      UErrorCode* status);
   269 
   270 /**
   271  * Compare two IDN strings for equivalence.
   272  * This function splits the domain names into labels and compares them.
   273  * According to IDN RFC, whenever two labels are compared, they are 
   274  * considered equal if and only if their ASCII forms (obtained by 
   275  * applying toASCII) match using an case-insensitive ASCII comparison.
   276  * Two domain names are considered a match if and only if all labels 
   277  * match regardless of whether label separators match.
   278  *
   279  * @param s1                First source string.
   280  * @param length1           Length of first source string, or -1 if NUL-terminated.
   281  *
   282  * @param s2                Second source string.
   283  * @param length2           Length of second source string, or -1 if NUL-terminated.
   284  * @param options           A bit set of options:
   285  *  
   286  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
   287  *                              and do not use STD3 ASCII rules
   288  *                              If unassigned code points are found the operation fails with 
   289  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
   290  *
   291  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
   292  *                              If this option is set, the unassigned code points are in the input 
   293  *                              are treated as normal Unicode code points.
   294  *                          
   295  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
   296  *                              If this option is set and the input does not satisfy STD3 rules,  
   297  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
   298  *
   299  * @param status            ICU error code in/out parameter.
   300  *                          Must fulfill U_SUCCESS before the function call.
   301  * @return <0 or 0 or >0 as usual for string comparisons
   302  * @stable ICU 2.6
   303  */
   304 U_STABLE int32_t U_EXPORT2
   305 uidna_compare(  const UChar *s1, int32_t length1,
   306                 const UChar *s2, int32_t length2,
   307                 int32_t options,
   308                 UErrorCode* status);
   309 
   310 #endif /* #if !UCONFIG_NO_IDNA */
   311 
   312 #endif