os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/ushape.h
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/ushape.h	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,234 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2000-2004, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +******************************************************************************
    1.11 +*   file name:  ushape.h
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2000jun29
    1.17 +*   created by: Markus W. Scherer
    1.18 +*/
    1.19 +
    1.20 +#ifndef __USHAPE_H__
    1.21 +#define __USHAPE_H__
    1.22 +
    1.23 +#include "unicode/utypes.h"
    1.24 +
    1.25 +/**
    1.26 + * \file
    1.27 + * \brief C API:  Arabic shaping
    1.28 + * 
    1.29 + */
    1.30 +
    1.31 +/**
    1.32 + * Shape Arabic text on a character basis.
    1.33 + *
    1.34 + * <p>This function performs basic operations for "shaping" Arabic text. It is most
    1.35 + * useful for use with legacy data formats and legacy display technology
    1.36 + * (simple terminals). All operations are performed on Unicode characters.</p>
    1.37 + *
    1.38 + * <p>Text-based shaping means that some character code points in the text are
    1.39 + * replaced by others depending on the context. It transforms one kind of text
    1.40 + * into another. In comparison, modern displays for Arabic text select
    1.41 + * appropriate, context-dependent font glyphs for each text element, which means
    1.42 + * that they transform text into a glyph vector.</p>
    1.43 + *
    1.44 + * <p>Text transformations are necessary when modern display technology is not
    1.45 + * available or when text needs to be transformed to or from legacy formats that
    1.46 + * use "shaped" characters. Since the Arabic script is cursive, connecting
    1.47 + * adjacent letters to each other, computers select images for each letter based
    1.48 + * on the surrounding letters. This usually results in four images per Arabic
    1.49 + * letter: initial, middle, final, and isolated forms. In Unicode, on the other
    1.50 + * hand, letters are normally stored abstract, and a display system is expected
    1.51 + * to select the necessary glyphs. (This makes searching and other text
    1.52 + * processing easier because the same letter has only one code.) It is possible
    1.53 + * to mimic this with text transformations because there are characters in
    1.54 + * Unicode that are rendered as letters with a specific shape
    1.55 + * (or cursive connectivity). They were included for interoperability with
    1.56 + * legacy systems and codepages, and for unsophisticated display systems.</p>
    1.57 + *
    1.58 + * <p>A second kind of text transformations is supported for Arabic digits:
    1.59 + * For compatibility with legacy codepages that only include European digits,
    1.60 + * it is possible to replace one set of digits by another, changing the
    1.61 + * character code points. These operations can be performed for either
    1.62 + * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic
    1.63 + * digits (U+06f0...U+06f9).</p>
    1.64 + *
    1.65 + * <p>Some replacements may result in more or fewer characters (code points).
    1.66 + * By default, this means that the destination buffer may receive text with a
    1.67 + * length different from the source length. Some legacy systems rely on the
    1.68 + * length of the text to be constant. They expect extra spaces to be added
    1.69 + * or consumed either next to the affected character or at the end of the
    1.70 + * text.</p>
    1.71 + *
    1.72 + * <p>For details about the available operations, see the description of the
    1.73 + * <code>U_SHAPE_...</code> options.</p>
    1.74 + *
    1.75 + * @param source The input text.
    1.76 + *
    1.77 + * @param sourceLength The number of UChars in <code>source</code>.
    1.78 + *
    1.79 + * @param dest The destination buffer that will receive the results of the
    1.80 + *             requested operations. It may be <code>NULL</code> only if
    1.81 + *             <code>destSize</code> is 0. The source and destination must not
    1.82 + *             overlap.
    1.83 + *
    1.84 + * @param destSize The size (capacity) of the destination buffer in UChars.
    1.85 + *                 If <code>destSize</code> is 0, then no output is produced,
    1.86 + *                 but the necessary buffer size is returned ("preflighting").
    1.87 + *
    1.88 + * @param options This is a 32-bit set of flags that specify the operations
    1.89 + *                that are performed on the input text. If no error occurs,
    1.90 + *                then the result will always be written to the destination
    1.91 + *                buffer.
    1.92 + *
    1.93 + * @param pErrorCode must be a valid pointer to an error code value,
    1.94 + *        which must not indicate a failure before the function call.
    1.95 + *
    1.96 + * @return The number of UChars written to the destination buffer.
    1.97 + *         If an error occured, then no output was written, or it may be
    1.98 + *         incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then
    1.99 + *         the return value indicates the necessary destination buffer size.
   1.100 + * @stable ICU 2.0
   1.101 + */
   1.102 +U_STABLE int32_t U_EXPORT2
   1.103 +u_shapeArabic(const UChar *source, int32_t sourceLength,
   1.104 +              UChar *dest, int32_t destSize,
   1.105 +              uint32_t options,
   1.106 +              UErrorCode *pErrorCode);
   1.107 +
   1.108 +/**
   1.109 + * Memory option: allow the result to have a different length than the source.
   1.110 + * @stable ICU 2.0
   1.111 + */
   1.112 +#define U_SHAPE_LENGTH_GROW_SHRINK              0
   1.113 +
   1.114 +/**
   1.115 + * Memory option: the result must have the same length as the source.
   1.116 + * If more room is necessary, then try to consume spaces next to modified characters.
   1.117 + * @stable ICU 2.0
   1.118 + */
   1.119 +#define U_SHAPE_LENGTH_FIXED_SPACES_NEAR        1
   1.120 +
   1.121 +/**
   1.122 + * Memory option: the result must have the same length as the source.
   1.123 + * If more room is necessary, then try to consume spaces at the end of the text.
   1.124 + * @stable ICU 2.0
   1.125 + */
   1.126 +#define U_SHAPE_LENGTH_FIXED_SPACES_AT_END      2
   1.127 +
   1.128 +/**
   1.129 + * Memory option: the result must have the same length as the source.
   1.130 + * If more room is necessary, then try to consume spaces at the beginning of the text.
   1.131 + * @stable ICU 2.0
   1.132 + */
   1.133 +#define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3
   1.134 +
   1.135 +/** Bit mask for memory options. @stable ICU 2.0 */
   1.136 +#define U_SHAPE_LENGTH_MASK                     3
   1.137 +
   1.138 +
   1.139 +/** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */
   1.140 +#define U_SHAPE_TEXT_DIRECTION_LOGICAL          0
   1.141 +
   1.142 +/**
   1.143 + * Direction indicator:
   1.144 + * the source is in visual LTR order,
   1.145 + * the leftmost displayed character stored first.
   1.146 + * @stable ICU 2.0
   1.147 + */
   1.148 +#define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR       4
   1.149 +
   1.150 +/** Bit mask for direction indicators. @stable ICU 2.0 */
   1.151 +#define U_SHAPE_TEXT_DIRECTION_MASK             4
   1.152 +
   1.153 +
   1.154 +/** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */
   1.155 +#define U_SHAPE_LETTERS_NOOP                    0
   1.156 +
   1.157 +/** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */
   1.158 +#define U_SHAPE_LETTERS_SHAPE                   8
   1.159 +
   1.160 +/** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */
   1.161 +#define U_SHAPE_LETTERS_UNSHAPE                 0x10
   1.162 +
   1.163 +/**
   1.164 + * Letter shaping option: replace abstract letter characters by "shaped" ones.
   1.165 + * The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters
   1.166 + * are always "shaped" into the isolated form instead of the medial form
   1.167 + * (selecting code points from the Arabic Presentation Forms-B block).
   1.168 + * @stable ICU 2.0
   1.169 + */
   1.170 +#define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18
   1.171 +
   1.172 +/** Bit mask for letter shaping options. @stable ICU 2.0 */
   1.173 +#define U_SHAPE_LETTERS_MASK                    0x18
   1.174 +
   1.175 +
   1.176 +/** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */
   1.177 +#define U_SHAPE_DIGITS_NOOP                     0
   1.178 +
   1.179 +/**
   1.180 + * Digit shaping option:
   1.181 + * Replace European digits (U+0030...) by Arabic-Indic digits.
   1.182 + * @stable ICU 2.0
   1.183 + */
   1.184 +#define U_SHAPE_DIGITS_EN2AN                    0x20
   1.185 +
   1.186 +/**
   1.187 + * Digit shaping option:
   1.188 + * Replace Arabic-Indic digits by European digits (U+0030...).
   1.189 + * @stable ICU 2.0
   1.190 + */
   1.191 +#define U_SHAPE_DIGITS_AN2EN                    0x40
   1.192 +
   1.193 +/**
   1.194 + * Digit shaping option:
   1.195 + * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
   1.196 + * strongly directional character is an Arabic letter
   1.197 + * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
   1.198 + * The direction of "preceding" depends on the direction indicator option.
   1.199 + * For the first characters, the preceding strongly directional character
   1.200 + * (initial state) is assumed to be not an Arabic letter
   1.201 + * (it is <code>U_LEFT_TO_RIGHT</code> [L] or <code>U_RIGHT_TO_LEFT</code> [R]).
   1.202 + * @stable ICU 2.0
   1.203 + */
   1.204 +#define U_SHAPE_DIGITS_ALEN2AN_INIT_LR          0x60
   1.205 +
   1.206 +/**
   1.207 + * Digit shaping option:
   1.208 + * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
   1.209 + * strongly directional character is an Arabic letter
   1.210 + * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
   1.211 + * The direction of "preceding" depends on the direction indicator option.
   1.212 + * For the first characters, the preceding strongly directional character
   1.213 + * (initial state) is assumed to be an Arabic letter.
   1.214 + * @stable ICU 2.0
   1.215 + */
   1.216 +#define U_SHAPE_DIGITS_ALEN2AN_INIT_AL          0x80
   1.217 +
   1.218 +/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
   1.219 +#define U_SHAPE_DIGITS_RESERVED                 0xa0
   1.220 +
   1.221 +/** Bit mask for digit shaping options. @stable ICU 2.0 */
   1.222 +#define U_SHAPE_DIGITS_MASK                     0xe0
   1.223 +
   1.224 +
   1.225 +/** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */
   1.226 +#define U_SHAPE_DIGIT_TYPE_AN                   0
   1.227 +
   1.228 +/** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */
   1.229 +#define U_SHAPE_DIGIT_TYPE_AN_EXTENDED          0x100
   1.230 +
   1.231 +/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
   1.232 +#define U_SHAPE_DIGIT_TYPE_RESERVED             0x200
   1.233 +
   1.234 +/** Bit mask for digit type options. @stable ICU 2.0 */
   1.235 +#define U_SHAPE_DIGIT_TYPE_MASK                 0x3f00
   1.236 +
   1.237 +#endif