1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/ushape.h Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,234 @@
1.4 +/*
1.5 +******************************************************************************
1.6 +*
1.7 +* Copyright (C) 2000-2004, International Business Machines
1.8 +* Corporation and others. All Rights Reserved.
1.9 +*
1.10 +******************************************************************************
1.11 +* file name: ushape.h
1.12 +* encoding: US-ASCII
1.13 +* tab size: 8 (not used)
1.14 +* indentation:4
1.15 +*
1.16 +* created on: 2000jun29
1.17 +* created by: Markus W. Scherer
1.18 +*/
1.19 +
1.20 +#ifndef __USHAPE_H__
1.21 +#define __USHAPE_H__
1.22 +
1.23 +#include "unicode/utypes.h"
1.24 +
1.25 +/**
1.26 + * \file
1.27 + * \brief C API: Arabic shaping
1.28 + *
1.29 + */
1.30 +
1.31 +/**
1.32 + * Shape Arabic text on a character basis.
1.33 + *
1.34 + * <p>This function performs basic operations for "shaping" Arabic text. It is most
1.35 + * useful for use with legacy data formats and legacy display technology
1.36 + * (simple terminals). All operations are performed on Unicode characters.</p>
1.37 + *
1.38 + * <p>Text-based shaping means that some character code points in the text are
1.39 + * replaced by others depending on the context. It transforms one kind of text
1.40 + * into another. In comparison, modern displays for Arabic text select
1.41 + * appropriate, context-dependent font glyphs for each text element, which means
1.42 + * that they transform text into a glyph vector.</p>
1.43 + *
1.44 + * <p>Text transformations are necessary when modern display technology is not
1.45 + * available or when text needs to be transformed to or from legacy formats that
1.46 + * use "shaped" characters. Since the Arabic script is cursive, connecting
1.47 + * adjacent letters to each other, computers select images for each letter based
1.48 + * on the surrounding letters. This usually results in four images per Arabic
1.49 + * letter: initial, middle, final, and isolated forms. In Unicode, on the other
1.50 + * hand, letters are normally stored abstract, and a display system is expected
1.51 + * to select the necessary glyphs. (This makes searching and other text
1.52 + * processing easier because the same letter has only one code.) It is possible
1.53 + * to mimic this with text transformations because there are characters in
1.54 + * Unicode that are rendered as letters with a specific shape
1.55 + * (or cursive connectivity). They were included for interoperability with
1.56 + * legacy systems and codepages, and for unsophisticated display systems.</p>
1.57 + *
1.58 + * <p>A second kind of text transformations is supported for Arabic digits:
1.59 + * For compatibility with legacy codepages that only include European digits,
1.60 + * it is possible to replace one set of digits by another, changing the
1.61 + * character code points. These operations can be performed for either
1.62 + * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic
1.63 + * digits (U+06f0...U+06f9).</p>
1.64 + *
1.65 + * <p>Some replacements may result in more or fewer characters (code points).
1.66 + * By default, this means that the destination buffer may receive text with a
1.67 + * length different from the source length. Some legacy systems rely on the
1.68 + * length of the text to be constant. They expect extra spaces to be added
1.69 + * or consumed either next to the affected character or at the end of the
1.70 + * text.</p>
1.71 + *
1.72 + * <p>For details about the available operations, see the description of the
1.73 + * <code>U_SHAPE_...</code> options.</p>
1.74 + *
1.75 + * @param source The input text.
1.76 + *
1.77 + * @param sourceLength The number of UChars in <code>source</code>.
1.78 + *
1.79 + * @param dest The destination buffer that will receive the results of the
1.80 + * requested operations. It may be <code>NULL</code> only if
1.81 + * <code>destSize</code> is 0. The source and destination must not
1.82 + * overlap.
1.83 + *
1.84 + * @param destSize The size (capacity) of the destination buffer in UChars.
1.85 + * If <code>destSize</code> is 0, then no output is produced,
1.86 + * but the necessary buffer size is returned ("preflighting").
1.87 + *
1.88 + * @param options This is a 32-bit set of flags that specify the operations
1.89 + * that are performed on the input text. If no error occurs,
1.90 + * then the result will always be written to the destination
1.91 + * buffer.
1.92 + *
1.93 + * @param pErrorCode must be a valid pointer to an error code value,
1.94 + * which must not indicate a failure before the function call.
1.95 + *
1.96 + * @return The number of UChars written to the destination buffer.
1.97 + * If an error occured, then no output was written, or it may be
1.98 + * incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then
1.99 + * the return value indicates the necessary destination buffer size.
1.100 + * @stable ICU 2.0
1.101 + */
1.102 +U_STABLE int32_t U_EXPORT2
1.103 +u_shapeArabic(const UChar *source, int32_t sourceLength,
1.104 + UChar *dest, int32_t destSize,
1.105 + uint32_t options,
1.106 + UErrorCode *pErrorCode);
1.107 +
1.108 +/**
1.109 + * Memory option: allow the result to have a different length than the source.
1.110 + * @stable ICU 2.0
1.111 + */
1.112 +#define U_SHAPE_LENGTH_GROW_SHRINK 0
1.113 +
1.114 +/**
1.115 + * Memory option: the result must have the same length as the source.
1.116 + * If more room is necessary, then try to consume spaces next to modified characters.
1.117 + * @stable ICU 2.0
1.118 + */
1.119 +#define U_SHAPE_LENGTH_FIXED_SPACES_NEAR 1
1.120 +
1.121 +/**
1.122 + * Memory option: the result must have the same length as the source.
1.123 + * If more room is necessary, then try to consume spaces at the end of the text.
1.124 + * @stable ICU 2.0
1.125 + */
1.126 +#define U_SHAPE_LENGTH_FIXED_SPACES_AT_END 2
1.127 +
1.128 +/**
1.129 + * Memory option: the result must have the same length as the source.
1.130 + * If more room is necessary, then try to consume spaces at the beginning of the text.
1.131 + * @stable ICU 2.0
1.132 + */
1.133 +#define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3
1.134 +
1.135 +/** Bit mask for memory options. @stable ICU 2.0 */
1.136 +#define U_SHAPE_LENGTH_MASK 3
1.137 +
1.138 +
1.139 +/** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */
1.140 +#define U_SHAPE_TEXT_DIRECTION_LOGICAL 0
1.141 +
1.142 +/**
1.143 + * Direction indicator:
1.144 + * the source is in visual LTR order,
1.145 + * the leftmost displayed character stored first.
1.146 + * @stable ICU 2.0
1.147 + */
1.148 +#define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR 4
1.149 +
1.150 +/** Bit mask for direction indicators. @stable ICU 2.0 */
1.151 +#define U_SHAPE_TEXT_DIRECTION_MASK 4
1.152 +
1.153 +
1.154 +/** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */
1.155 +#define U_SHAPE_LETTERS_NOOP 0
1.156 +
1.157 +/** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */
1.158 +#define U_SHAPE_LETTERS_SHAPE 8
1.159 +
1.160 +/** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */
1.161 +#define U_SHAPE_LETTERS_UNSHAPE 0x10
1.162 +
1.163 +/**
1.164 + * Letter shaping option: replace abstract letter characters by "shaped" ones.
1.165 + * The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters
1.166 + * are always "shaped" into the isolated form instead of the medial form
1.167 + * (selecting code points from the Arabic Presentation Forms-B block).
1.168 + * @stable ICU 2.0
1.169 + */
1.170 +#define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18
1.171 +
1.172 +/** Bit mask for letter shaping options. @stable ICU 2.0 */
1.173 +#define U_SHAPE_LETTERS_MASK 0x18
1.174 +
1.175 +
1.176 +/** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */
1.177 +#define U_SHAPE_DIGITS_NOOP 0
1.178 +
1.179 +/**
1.180 + * Digit shaping option:
1.181 + * Replace European digits (U+0030...) by Arabic-Indic digits.
1.182 + * @stable ICU 2.0
1.183 + */
1.184 +#define U_SHAPE_DIGITS_EN2AN 0x20
1.185 +
1.186 +/**
1.187 + * Digit shaping option:
1.188 + * Replace Arabic-Indic digits by European digits (U+0030...).
1.189 + * @stable ICU 2.0
1.190 + */
1.191 +#define U_SHAPE_DIGITS_AN2EN 0x40
1.192 +
1.193 +/**
1.194 + * Digit shaping option:
1.195 + * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
1.196 + * strongly directional character is an Arabic letter
1.197 + * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
1.198 + * The direction of "preceding" depends on the direction indicator option.
1.199 + * For the first characters, the preceding strongly directional character
1.200 + * (initial state) is assumed to be not an Arabic letter
1.201 + * (it is <code>U_LEFT_TO_RIGHT</code> [L] or <code>U_RIGHT_TO_LEFT</code> [R]).
1.202 + * @stable ICU 2.0
1.203 + */
1.204 +#define U_SHAPE_DIGITS_ALEN2AN_INIT_LR 0x60
1.205 +
1.206 +/**
1.207 + * Digit shaping option:
1.208 + * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
1.209 + * strongly directional character is an Arabic letter
1.210 + * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
1.211 + * The direction of "preceding" depends on the direction indicator option.
1.212 + * For the first characters, the preceding strongly directional character
1.213 + * (initial state) is assumed to be an Arabic letter.
1.214 + * @stable ICU 2.0
1.215 + */
1.216 +#define U_SHAPE_DIGITS_ALEN2AN_INIT_AL 0x80
1.217 +
1.218 +/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
1.219 +#define U_SHAPE_DIGITS_RESERVED 0xa0
1.220 +
1.221 +/** Bit mask for digit shaping options. @stable ICU 2.0 */
1.222 +#define U_SHAPE_DIGITS_MASK 0xe0
1.223 +
1.224 +
1.225 +/** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */
1.226 +#define U_SHAPE_DIGIT_TYPE_AN 0
1.227 +
1.228 +/** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */
1.229 +#define U_SHAPE_DIGIT_TYPE_AN_EXTENDED 0x100
1.230 +
1.231 +/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
1.232 +#define U_SHAPE_DIGIT_TYPE_RESERVED 0x200
1.233 +
1.234 +/** Bit mask for digit type options. @stable ICU 2.0 */
1.235 +#define U_SHAPE_DIGIT_TYPE_MASK 0x3f00
1.236 +
1.237 +#endif