Update contrib.
2 ******************************************************************************
4 * Copyright (C) 1999-2005, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 1999jul27
14 * created by: Markus W. Scherer
20 #include "unicode/utypes.h"
21 #include "unicode/uchar.h"
24 * javadoc-style comments are intended to be transformed into HTML
26 * http://www.zib.de/Visual/software/doc++/index.html .
28 * The HTML documentation is created with
31 * The following #define trick allows us to do it all in one file
32 * and still be able to compile it.
35 /*#define BIDI_SAMPLE_CODE*/
39 * \brief C API: BIDI algorithm
41 * <h2>BIDI algorithm for ICU</h2>
43 * This is an implementation of the Unicode Bidirectional algorithm.
44 * The algorithm is defined in the
45 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
46 * version 13, also described in The Unicode Standard, Version 4.0 .<p>
48 * Note: Libraries that perform a bidirectional algorithm and
49 * reorder strings accordingly are sometimes called "Storage Layout Engines".
50 * ICU's BiDi and shaping (u_shapeArabic()) APIs can be used at the core of such
51 * "Storage Layout Engines".
53 * <h3>General remarks about the API:</h3>
55 * In functions with an error code parameter,
56 * the <code>pErrorCode</code> pointer must be valid
57 * and the value that it points to must not indicate a failure before
58 * the function call. Otherwise, the function returns immediately.
59 * After the function call, the value indicates success or failure.<p>
61 * The "limit" of a sequence of characters is the position just after their
62 * last character, i.e., one more than that position.<p>
64 * Some of the API functions provide access to "runs".
65 * Such a "run" is defined as a sequence of characters
66 * that are at the same embedding level
67 * after performing the BIDI algorithm.<p>
69 * @author Markus W. Scherer
73 * <h4> Sample code for the ICU BIDI API </h4>
75 * <h5>Rendering a paragraph with the ICU BiDi API</h5>
77 * This is (hypothetical) sample code that illustrates
78 * how the ICU BiDi API could be used to render a paragraph of text.
79 * Rendering code depends highly on the graphics system,
80 * therefore this sample code must make a lot of assumptions,
81 * which may or may not match any existing graphics system's properties.
83 * <p>The basic assumptions are:</p>
85 * <li>Rendering is done from left to right on a horizontal line.</li>
86 * <li>A run of single-style, unidirectional text can be rendered at once.</li>
87 * <li>Such a run of text is passed to the graphics system with
88 * characters (code units) in logical order.</li>
89 * <li>The line-breaking algorithm is very complicated
90 * and Locale-dependent -
91 * and therefore its implementation omitted from this sample code.</li>
96 *#include "unicode/ubidi.h"
99 * styleNormal=0, styleSelected=1,
100 * styleBold=2, styleItalics=4,
101 * styleSuper=8, styleSub=16
104 *typedef struct { int32_t limit; Style style; } StyleRun;
106 *int getTextWidth(const UChar *text, int32_t start, int32_t limit,
107 * const StyleRun *styleRuns, int styleRunCount);
109 * // set *pLimit and *pStyleRunLimit for a line
110 * // from text[start] and from styleRuns[styleRunStart]
111 * // using ubidi_getLogicalRun(para, ...)
112 *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit,
114 * const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
117 * // render runs on a line sequentially, always from left to right
119 * // prepare rendering a new line
120 * void startLine(UBiDiDirection textDirection, int lineWidth);
122 * // render a run of text and advance to the right by the run width
123 * // the text[start..limit-1] is always in logical order
124 * void renderRun(const UChar *text, int32_t start, int32_t limit,
125 * UBiDiDirection textDirection, Style style);
127 * // We could compute a cross-product
128 * // from the style runs with the directional runs
129 * // and then reorder it.
130 * // Instead, here we iterate over each run type
131 * // and render the intersections -
132 * // with shortcuts in simple (and common) cases.
133 * // renderParagraph() is the main function.
135 * // render a directional run with
136 * // (possibly) multiple style runs intersecting with it
137 * void renderDirectionalRun(const UChar *text,
138 * int32_t start, int32_t limit,
139 * UBiDiDirection direction,
140 * const StyleRun *styleRuns, int styleRunCount) {
143 * // iterate over style runs
144 * if(direction==UBIDI_LTR) {
147 * for(i=0; i<styleRunCount; ++i) {
148 * styleLimit=styleRun[i].limit;
149 * if(start<styleLimit) {
150 * if(styleLimit>limit) { styleLimit=limit; }
151 * renderRun(text, start, styleLimit,
152 * direction, styleRun[i].style);
153 * if(styleLimit==limit) { break; }
160 * for(i=styleRunCount-1; i>=0; --i) {
162 * styleStart=styleRun[i-1].limit;
166 * if(limit>=styleStart) {
167 * if(styleStart<start) { styleStart=start; }
168 * renderRun(text, styleStart, limit,
169 * direction, styleRun[i].style);
170 * if(styleStart==start) { break; }
177 * // the line object represents text[start..limit-1]
178 * void renderLine(UBiDi *line, const UChar *text,
179 * int32_t start, int32_t limit,
180 * const StyleRun *styleRuns, int styleRunCount) {
181 * UBiDiDirection direction=ubidi_getDirection(line);
182 * if(direction!=UBIDI_MIXED) {
184 * if(styleRunCount<=1) {
185 * renderRun(text, start, limit, direction, styleRuns[0].style);
187 * renderDirectionalRun(text, start, limit,
188 * direction, styleRuns, styleRunCount);
191 * // mixed-directional
192 * int32_t count, i, length;
195 * count=ubidi_countRuns(para, pErrorCode);
196 * if(U_SUCCESS(*pErrorCode)) {
197 * if(styleRunCount<=1) {
198 * Style style=styleRuns[0].style;
200 * // iterate over directional runs
201 * for(i=0; i<count; ++i) {
202 * direction=ubidi_getVisualRun(para, i, &start, &length);
203 * renderRun(text, start, start+length, direction, style);
208 * // iterate over both directional and style runs
209 * for(i=0; i<count; ++i) {
210 * direction=ubidi_getVisualRun(line, i, &start, &length);
211 * renderDirectionalRun(text, start, start+length,
212 * direction, styleRuns, styleRunCount);
219 *void renderParagraph(const UChar *text, int32_t length,
220 * UBiDiDirection textDirection,
221 * const StyleRun *styleRuns, int styleRunCount,
223 * UErrorCode *pErrorCode) {
226 * if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) {
230 * para=ubidi_openSized(length, 0, pErrorCode);
231 * if(para==NULL) { return; }
233 * ubidi_setPara(para, text, length,
234 * textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR,
236 * if(U_SUCCESS(*pErrorCode)) {
237 * UBiDiLevel paraLevel=1&ubidi_getParaLevel(para);
238 * StyleRun styleRun={ length, styleNormal };
241 * if(styleRuns==NULL || styleRunCount<=0) {
243 * styleRuns=&styleRun;
246 * // assume styleRuns[styleRunCount-1].limit>=length
248 * width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
249 * if(width<=lineWidth) {
250 * // everything fits onto one line
252 * // prepare rendering a new line from either left or right
253 * startLine(paraLevel, width);
255 * renderLine(para, text, 0, length,
256 * styleRuns, styleRunCount);
260 * // we need to render several lines
261 * line=ubidi_openSized(length, 0, pErrorCode);
263 * int32_t start=0, limit;
264 * int styleRunStart=0, styleRunLimit;
268 * styleRunLimit=styleRunCount;
269 * getLineBreak(text, start, &limit, para,
270 * styleRuns, styleRunStart, &styleRunLimit,
272 * ubidi_setLine(para, start, limit, line, pErrorCode);
273 * if(U_SUCCESS(*pErrorCode)) {
274 * // prepare rendering a new line
275 * // from either left or right
276 * startLine(paraLevel, width);
278 * renderLine(line, text, start, limit,
279 * styleRuns+styleRunStart,
280 * styleRunLimit-styleRunStart);
282 * if(limit==length) { break; }
284 * styleRunStart=styleRunLimit-1;
285 * if(start>=styleRuns[styleRunStart].limit) {
305 * UBiDiLevel is the type of the level values in this
306 * BiDi implementation.
307 * It holds an embedding level and indicates the visual direction
308 * by its bit 0 (even/odd value).<p>
310 * It can also hold non-level values for the
311 * <code>paraLevel</code> and <code>embeddingLevels</code>
312 * arguments of <code>ubidi_setPara()</code>; there:
314 * <li>bit 7 of an <code>embeddingLevels[]</code>
315 * value indicates whether the using application is
316 * specifying the level of a character to <i>override</i> whatever the
317 * BiDi implementation would resolve it to.</li>
318 * <li><code>paraLevel</code> can be set to the
319 * pseudo-level values <code>UBIDI_DEFAULT_LTR</code>
320 * and <code>UBIDI_DEFAULT_RTL</code>.</li>
325 * <p>The related constants are not real, valid level values.
326 * <code>UBIDI_DEFAULT_XXX</code> can be used to specify
327 * a default for the paragraph level for
328 * when the <code>ubidi_setPara()</code> function
329 * shall determine it but there is no
330 * strongly typed character in the input.<p>
332 * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even
333 * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd,
334 * just like with normal LTR and RTL level values -
335 * these special values are designed that way. Also, the implementation
336 * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
338 * @see UBIDI_DEFAULT_LTR
339 * @see UBIDI_DEFAULT_RTL
340 * @see UBIDI_LEVEL_OVERRIDE
341 * @see UBIDI_MAX_EXPLICIT_LEVEL
344 typedef uint8_t UBiDiLevel;
346 /** Paragraph level setting.
347 * If there is no strong character, then set the paragraph level to 0 (left-to-right).
350 #define UBIDI_DEFAULT_LTR 0xfe
352 /** Paragraph level setting.
353 * If there is no strong character, then set the paragraph level to 1 (right-to-left).
356 #define UBIDI_DEFAULT_RTL 0xff
359 * Maximum explicit embedding level.
360 * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
363 #define UBIDI_MAX_EXPLICIT_LEVEL 61
365 /** Bit flag for level input.
366 * Overrides directional properties.
369 #define UBIDI_LEVEL_OVERRIDE 0x80
372 * <code>UBiDiDirection</code> values indicate the text direction.
375 enum UBiDiDirection {
376 /** All left-to-right text. This is a 0 value. @stable ICU 2.0 */
378 /** All right-to-left text. This is a 1 value. @stable ICU 2.0 */
380 /** Mixed-directional text. @stable ICU 2.0 */
384 /** @stable ICU 2.0 */
385 typedef enum UBiDiDirection UBiDiDirection;
388 * Forward declaration of the <code>UBiDi</code> structure for the declaration of
389 * the API functions. Its fields are implementation-specific.<p>
390 * This structure holds information about a paragraph (or multiple paragraphs)
391 * of text with BiDi-algorithm-related details, or about one line of
392 * such a paragraph.<p>
393 * Reordering can be done on a line, or on one or more paragraphs which are
394 * then interpreted each as one single line.
399 /** @stable ICU 2.0 */
400 typedef struct UBiDi UBiDi;
403 * Allocate a <code>UBiDi</code> structure.
404 * Such an object is initially empty. It is assigned
405 * the BiDi properties of a piece of text containing one or more paragraphs
406 * by <code>ubidi_setPara()</code>
407 * or the BiDi properties of a line within a paragraph by
408 * <code>ubidi_setLine()</code>.<p>
409 * This object can be reused for as long as it is not deallocated
410 * by calling <code>ubidi_close()</code>.<p>
411 * <code>ubidi_setPara()</code> and <code>ubidi_setLine()</code> will allocate
412 * additional memory for internal structures as necessary.
414 * @return An empty <code>UBiDi</code> object.
417 U_STABLE UBiDi * U_EXPORT2
421 * Allocate a <code>UBiDi</code> structure with preallocated memory
422 * for internal structures.
423 * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code>
424 * with no arguments, but it also preallocates memory for internal structures
425 * according to the sizings supplied by the caller.<p>
426 * Subsequent functions will not allocate any more memory, and are thus
427 * guaranteed not to fail because of lack of memory.<p>
428 * The preallocation can be limited to some of the internal memory
429 * by setting some values to 0 here. That means that if, e.g.,
430 * <code>maxRunCount</code> cannot be reasonably predetermined and should not
431 * be set to <code>maxLength</code> (the only failproof value) to avoid
432 * wasting memory, then <code>maxRunCount</code> could be set to 0 here
433 * and the internal structures that are associated with it will be allocated
434 * on demand, just like with <code>ubidi_open()</code>.
436 * @param maxLength is the maximum text or line length that internal memory
437 * will be preallocated for. An attempt to associate this object with a
438 * longer text will fail, unless this value is 0, which leaves the allocation
439 * up to the implementation.
441 * @param maxRunCount is the maximum anticipated number of same-level runs
442 * that internal memory will be preallocated for. An attempt to access
443 * visual runs on an object that was not preallocated for as many runs
444 * as the text was actually resolved to will fail,
445 * unless this value is 0, which leaves the allocation up to the implementation.<p>
446 * The number of runs depends on the actual text and maybe anywhere between
447 * 1 and <code>maxLength</code>. It is typically small.<p>
449 * @param pErrorCode must be a valid pointer to an error code value.
451 * @return An empty <code>UBiDi</code> object with preallocated memory.
454 U_STABLE UBiDi * U_EXPORT2
455 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode);
458 * <code>ubidi_close()</code> must be called to free the memory
459 * associated with a UBiDi object.<p>
461 * <strong>Important: </strong>
462 * A parent <code>UBiDi</code> object must not be destroyed or reused if
463 * it still has children.
464 * If a <code>UBiDi</code> object is the <i>child</i>
465 * of another one (its <i>parent</i>), after calling
466 * <code>ubidi_setLine()</code>, then the child object must
467 * be destroyed (closed) or reused (by calling
468 * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>)
469 * before the parent object.
471 * @param pBiDi is a <code>UBiDi</code> object.
477 U_STABLE void U_EXPORT2
478 ubidi_close(UBiDi *pBiDi);
481 * Modify the operation of the BiDi algorithm such that it
482 * approximates an "inverse BiDi" algorithm. This function
483 * must be called before <code>ubidi_setPara()</code>.
485 * <p>The normal operation of the BiDi algorithm as described
486 * in the Unicode Technical Report is to take text stored in logical
487 * (keyboard, typing) order and to determine the reordering of it for visual
489 * Some legacy systems store text in visual order, and for operations
490 * with standard, Unicode-based algorithms, the text needs to be transformed
491 * to logical order. This is effectively the inverse algorithm of the
492 * described BiDi algorithm. Note that there is no standard algorithm for
493 * this "inverse BiDi" and that the current implementation provides only an
494 * approximation of "inverse BiDi".</p>
496 * <p>With <code>isInverse</code> set to <code>TRUE</code>,
497 * this function changes the behavior of some of the subsequent functions
498 * in a way that they can be used for the inverse BiDi algorithm.
499 * Specifically, runs of text with numeric characters will be treated in a
500 * special way and may need to be surrounded with LRM characters when they are
501 * written in reordered sequence.</p>
503 * <p>Output runs should be retrieved using <code>ubidi_getVisualRun()</code>.
504 * Since the actual input for "inverse BiDi" is visually ordered text and
505 * <code>ubidi_getVisualRun()</code> gets the reordered runs, these are actually
506 * the runs of the logically ordered output.</p>
508 * @param pBiDi is a <code>UBiDi</code> object.
510 * @param isInverse specifies "forward" or "inverse" BiDi operation
513 * @see ubidi_writeReordered
516 U_STABLE void U_EXPORT2
517 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse);
520 * Is this BiDi object set to perform the inverse BiDi algorithm?
522 * @param pBiDi is a <code>UBiDi</code> object.
523 * @return TRUE if the BiDi object is set to perform the inverse BiDi algorithm
525 * @see ubidi_setInverse
529 U_STABLE UBool U_EXPORT2
530 ubidi_isInverse(UBiDi *pBiDi);
533 * Specify whether block separators must be allocated level zero,
534 * so that successive paragraphs will progress from left to right.
535 * This function must be called before <code>ubidi_setPara()</code>.
536 * Paragraph separators (B) may appear in the text. Setting them to level zero
537 * means that all paragraph separators (including one possibly appearing
538 * in the last text position) are kept in the reordered text after the text
539 * that they follow in the source text.
540 * When this feature is not enabled, a paragraph separator at the last
541 * position of the text before reordering will go to the first position
542 * of the reordered text when the paragraph level is odd.
544 * @param pBiDi is a <code>UBiDi</code> object.
546 * @param orderParagraphsLTR specifies whether paragraph separators (B) must
547 * receive level 0, so that successive paragraphs progress from left to right.
552 U_STABLE void U_EXPORT2
553 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR);
556 * Is this BiDi object set to allocate level 0 to block separators so that
557 * successive paragraphs progress from left to right?
559 * @param pBiDi is a <code>UBiDi</code> object.
560 * @return TRUE if the BiDi object is set to allocate level 0 to block
563 * @see ubidi_orderParagraphsLTR
566 U_STABLE UBool U_EXPORT2
567 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);
570 * Perform the Unicode BiDi algorithm. It is defined in the
571 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>,
573 * also described in The Unicode Standard, Version 4.0 .<p>
575 * This function takes a piece of plain text containing one or more paragraphs,
576 * with or without externally specified embedding levels from <i>styled</i>
577 * text and computes the left-right-directionality of each character.<p>
579 * If the entire text is all of the same directionality, then
580 * the function may not perform all the steps described by the algorithm,
581 * i.e., some levels may not be the same as if all steps were performed.
582 * This is not relevant for unidirectional text.<br>
583 * For example, in pure LTR text with numbers the numbers would get
584 * a resolved level of 2 higher than the surrounding text according to
585 * the algorithm. This implementation may set all resolved levels to
586 * the same value in such a case.<p>
588 * The text can be composed of multiple paragraphs. Occurrence of a block
589 * separator in the text terminates a paragraph, and whatever comes next starts
590 * a new paragraph. The exception to this rule is when a Carriage Return (CR)
591 * is followed by a Line Feed (LF). Both CR and LF are block separators, but
592 * in that case, the pair of characters is considered as terminating the
593 * preceding paragraph, and a new paragraph will be started by a character
594 * coming after the LF.
596 * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code>
597 * which will be set to contain the reordering information,
598 * especially the resolved levels for all the characters in <code>text</code>.
600 * @param text is a pointer to the text that the
601 * BiDi algorithm will be performed on
602 * <strong>The text must be (at least) <code>length</code> long.</strong>
603 * This pointer is stored in the UBiDi object and can be retrieved
604 * with <code>ubidi_getText()</code>.
606 * @param length is the length of the text; if <code>length==-1</code> then
607 * the text must be zero-terminated.
609 * @param paraLevel specifies the default level for the text;
610 * it is typically 0 (LTR) or 1 (RTL).
611 * If the function shall determine the paragraph level from the text,
612 * then <code>paraLevel</code> can be set to
613 * either <code>UBIDI_DEFAULT_LTR</code>
614 * or <code>UBIDI_DEFAULT_RTL</code>; if the text contains multiple
615 * paragraphs, the paragraph level shall be determined separately for
616 * each paragraph; if a paragraph does not include any strongly typed
617 * character, then the desired default is used (0 for LTR or 1 for RTL).
618 * Any other value between 0 and <code>UBIDI_MAX_EXPLICIT_LEVEL</code> is also valid,
619 * with odd levels indicating RTL.
621 * @param embeddingLevels (in) may be used to preset the embedding and override levels,
622 * ignoring characters like LRE and PDF in the text.
623 * A level overrides the directional property of its corresponding
624 * (same index) character if the level has the
625 * <code>UBIDI_LEVEL_OVERRIDE</code> bit set.<p>
626 * Except for that bit, it must be
627 * <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>,
628 * with one exception: a level of zero may be specified for a paragraph
629 * separator even if <code>paraLevel>0</code> when multiple paragraphs
630 * are submitted in the same call to <code>ubidi_setPara()</code>.<p>
631 * <strong>Caution: </strong>A copy of this pointer, not of the levels,
632 * will be stored in the <code>UBiDi</code> object;
633 * the <code>embeddingLevels</code> array must not be
634 * deallocated before the <code>UBiDi</code> structure is destroyed or reused,
635 * and the <code>embeddingLevels</code>
636 * should not be modified to avoid unexpected results on subsequent BiDi operations.
637 * However, the <code>ubidi_setPara()</code> and
638 * <code>ubidi_setLine()</code> functions may modify some or all of the levels.<p>
639 * After the <code>UBiDi</code> object is reused or destroyed, the caller
640 * must take care of the deallocation of the <code>embeddingLevels</code> array.<p>
641 * <strong>The <code>embeddingLevels</code> array must be
642 * at least <code>length</code> long.</strong>
644 * @param pErrorCode must be a valid pointer to an error code value.
647 U_STABLE void U_EXPORT2
648 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
649 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
650 UErrorCode *pErrorCode);
653 * <code>ubidi_setLine()</code> sets a <code>UBiDi</code> to
654 * contain the reordering information, especially the resolved levels,
655 * for all the characters in a line of text. This line of text is
656 * specified by referring to a <code>UBiDi</code> object representing
657 * this information for a piece of text containing one or more paragraphs,
658 * and by specifying a range of indexes in this text.<p>
659 * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
661 * This is used after calling <code>ubidi_setPara()</code>
662 * for a piece of text, and after line-breaking on that text.
663 * It is not necessary if each paragraph is treated as a single line.<p>
665 * After line-breaking, rules (L1) and (L2) for the treatment of
666 * trailing WS and for reordering are performed on
667 * a <code>UBiDi</code> object that represents a line.<p>
669 * <strong>Important: </strong><code>pLineBiDi</code> shares data with
670 * <code>pParaBiDi</code>.
671 * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>.
672 * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line
673 * before the object for its parent paragraph.<p>
675 * The text pointer that was stored in <code>pParaBiDi</code> is also copied,
676 * and <code>start</code> is added to it so that it points to the beginning of the
677 * line for this object.
679 * @param pParaBiDi is the parent paragraph object. It must have been set
680 * by a successful call to ubidi_setPara.
682 * @param start is the line's first index into the text.
684 * @param limit is just behind the line's last index into the text
685 * (its last index +1).<br>
686 * It must be <code>0<=start<=limit<=</code>containing paragraph limit.
687 * If the specified line crosses a paragraph boundary, the function
688 * will terminate with error code U_ILLEGAL_ARGUMENT_ERROR.
690 * @param pLineBiDi is the object that will now represent a line of the text.
692 * @param pErrorCode must be a valid pointer to an error code value.
697 U_STABLE void U_EXPORT2
698 ubidi_setLine(const UBiDi *pParaBiDi,
699 int32_t start, int32_t limit,
701 UErrorCode *pErrorCode);
704 * Get the directionality of the text.
706 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
708 * @return A <code>UBIDI_XXX</code> value that indicates if the entire text
709 * represented by this object is unidirectional,
710 * and which direction, or if it is mixed-directional.
712 * @see UBiDiDirection
715 U_STABLE UBiDiDirection U_EXPORT2
716 ubidi_getDirection(const UBiDi *pBiDi);
719 * Get the pointer to the text.
721 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
723 * @return The pointer to the text that the UBiDi object was created for.
729 U_STABLE const UChar * U_EXPORT2
730 ubidi_getText(const UBiDi *pBiDi);
733 * Get the length of the text.
735 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
737 * @return The length of the text that the UBiDi object was created for.
740 U_STABLE int32_t U_EXPORT2
741 ubidi_getLength(const UBiDi *pBiDi);
744 * Get the paragraph level of the text.
746 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
748 * @return The paragraph level. If there are multiple paragraphs, their
749 * level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or
750 * UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph
754 * @see ubidi_getParagraph
755 * @see ubidi_getParagraphByIndex
758 U_STABLE UBiDiLevel U_EXPORT2
759 ubidi_getParaLevel(const UBiDi *pBiDi);
762 * Get the number of paragraphs.
764 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
766 * @return The number of paragraphs.
769 U_STABLE int32_t U_EXPORT2
770 ubidi_countParagraphs(UBiDi *pBiDi);
773 * Get a paragraph, given a position within the text.
774 * This function returns information about a paragraph.<p>
776 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
778 * @param charIndex is the index of a character within the text, in the
779 * range <code>[0..ubidi_getLength(pBiDi)-1]</code>.
781 * @param pParaStart will receive the index of the first character of the
782 * paragraph in the text.
783 * This pointer can be <code>NULL</code> if this
784 * value is not necessary.
786 * @param pParaLimit will receive the limit of the paragraph.
787 * The l-value that you point to here may be the
788 * same expression (variable) as the one for
789 * <code>charIndex</code>.
790 * This pointer can be <code>NULL</code> if this
791 * value is not necessary.
793 * @param pParaLevel will receive the level of the paragraph.
794 * This pointer can be <code>NULL</code> if this
795 * value is not necessary.
797 * @param pErrorCode must be a valid pointer to an error code value.
799 * @return The index of the paragraph containing the specified position.
802 U_STABLE int32_t U_EXPORT2
803 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart,
804 int32_t *pParaLimit, UBiDiLevel *pParaLevel,
805 UErrorCode *pErrorCode);
808 * Get a paragraph, given the index of this paragraph.
810 * This function returns information about a paragraph.<p>
812 * @param pBiDi is the paragraph <code>UBiDi</code> object.
814 * @param paraIndex is the number of the paragraph, in the
815 * range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>.
817 * @param pParaStart will receive the index of the first character of the
818 * paragraph in the text.
819 * This pointer can be <code>NULL</code> if this
820 * value is not necessary.
822 * @param pParaLimit will receive the limit of the paragraph.
823 * This pointer can be <code>NULL</code> if this
824 * value is not necessary.
826 * @param pParaLevel will receive the level of the paragraph.
827 * This pointer can be <code>NULL</code> if this
828 * value is not necessary.
830 * @param pErrorCode must be a valid pointer to an error code value.
834 U_STABLE void U_EXPORT2
835 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
836 int32_t *pParaStart, int32_t *pParaLimit,
837 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode);
840 * Get the level for one character.
842 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
844 * @param charIndex the index of a character.
846 * @return The level for the character at charIndex.
851 U_STABLE UBiDiLevel U_EXPORT2
852 ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex);
855 * Get an array of levels for each character.<p>
857 * Note that this function may allocate memory under some
858 * circumstances, unlike <code>ubidi_getLevelAt()</code>.
860 * @param pBiDi is the paragraph or line <code>UBiDi</code> object, whose
861 * text length must be strictly positive.
863 * @param pErrorCode must be a valid pointer to an error code value.
865 * @return The levels array for the text,
866 * or <code>NULL</code> if an error occurs.
871 U_STABLE const UBiDiLevel * U_EXPORT2
872 ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
876 * This function returns information about a run and is used
877 * to retrieve runs in logical order.<p>
878 * This is especially useful for line-breaking on a paragraph.
880 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
882 * @param logicalStart is the first character of the run.
884 * @param pLogicalLimit will receive the limit of the run.
885 * The l-value that you point to here may be the
886 * same expression (variable) as the one for
887 * <code>logicalStart</code>.
888 * This pointer can be <code>NULL</code> if this
889 * value is not necessary.
891 * @param pLevel will receive the level of the run.
892 * This pointer can be <code>NULL</code> if this
893 * value is not necessary.
896 U_STABLE void U_EXPORT2
897 ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalStart,
898 int32_t *pLogicalLimit, UBiDiLevel *pLevel);
901 * Get the number of runs.
902 * This function may invoke the actual reordering on the
903 * <code>UBiDi</code> object, after <code>ubidi_setPara()</code>
904 * may have resolved only the levels of the text. Therefore,
905 * <code>ubidi_countRuns()</code> may have to allocate memory,
906 * and may fail doing so.
908 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
910 * @param pErrorCode must be a valid pointer to an error code value.
912 * @return The number of runs.
915 U_STABLE int32_t U_EXPORT2
916 ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
919 * Get one run's logical start, length, and directionality,
920 * which can be 0 for LTR or 1 for RTL.
921 * In an RTL run, the character at the logical start is
922 * visually on the right of the displayed run.
923 * The length is the number of characters in the run.<p>
924 * <code>ubidi_countRuns()</code> should be called
925 * before the runs are retrieved.
927 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
929 * @param runIndex is the number of the run in visual order, in the
930 * range <code>[0..ubidi_countRuns(pBiDi)-1]</code>.
932 * @param pLogicalStart is the first logical character index in the text.
933 * The pointer may be <code>NULL</code> if this index is not needed.
935 * @param pLength is the number of characters (at least one) in the run.
936 * The pointer may be <code>NULL</code> if this is not needed.
938 * @return the directionality of the run,
939 * <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>,
940 * never <code>UBIDI_MIXED</code>.
942 * @see ubidi_countRuns
947 * int32_t i, count=ubidi_countRuns(pBiDi),
948 * logicalStart, visualIndex=0, length;
949 * for(i=0; i<count; ++i) {
950 * if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) {
952 * show_char(text[logicalStart++], visualIndex++);
953 * } while(--length>0);
955 * logicalStart+=length; // logicalLimit
957 * show_char(text[--logicalStart], visualIndex++);
958 * } while(--length>0);
964 * Note that in right-to-left runs, code like this places
965 * modifier letters before base characters and second surrogates
969 U_STABLE UBiDiDirection U_EXPORT2
970 ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
971 int32_t *pLogicalStart, int32_t *pLength);
974 * Get the visual position from a logical text position.
975 * If such a mapping is used many times on the same
976 * <code>UBiDi</code> object, then calling
977 * <code>ubidi_getLogicalMap()</code> is more efficient.<p>
979 * Note that in right-to-left runs, this mapping places
980 * modifier letters before base characters and second surrogates
983 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
985 * @param logicalIndex is the index of a character in the text.
987 * @param pErrorCode must be a valid pointer to an error code value.
989 * @return The visual position of this character.
991 * @see ubidi_getLogicalMap
992 * @see ubidi_getLogicalIndex
995 U_STABLE int32_t U_EXPORT2
996 ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode);
999 * Get the logical text position from a visual position.
1000 * If such a mapping is used many times on the same
1001 * <code>UBiDi</code> object, then calling
1002 * <code>ubidi_getVisualMap()</code> is more efficient.<p>
1004 * This is the inverse function to <code>ubidi_getVisualIndex()</code>.
1006 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1008 * @param visualIndex is the visual position of a character.
1010 * @param pErrorCode must be a valid pointer to an error code value.
1012 * @return The index of this character in the text.
1014 * @see ubidi_getVisualMap
1015 * @see ubidi_getVisualIndex
1018 U_STABLE int32_t U_EXPORT2
1019 ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode);
1022 * Get a logical-to-visual index map (array) for the characters in the UBiDi
1023 * (paragraph or line) object.
1025 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1027 * @param indexMap is a pointer to an array of <code>ubidi_getLength()</code>
1028 * indexes which will reflect the reordering of the characters.
1029 * The array does not need to be initialized.<p>
1030 * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.<p>
1032 * @param pErrorCode must be a valid pointer to an error code value.
1034 * @see ubidi_getVisualMap
1035 * @see ubidi_getVisualIndex
1038 U_STABLE void U_EXPORT2
1039 ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
1042 * Get a visual-to-logical index map (array) for the characters in the UBiDi
1043 * (paragraph or line) object.
1045 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1047 * @param indexMap is a pointer to an array of <code>ubidi_getLength()</code>
1048 * indexes which will reflect the reordering of the characters.
1049 * The array does not need to be initialized.<p>
1050 * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.<p>
1052 * @param pErrorCode must be a valid pointer to an error code value.
1054 * @see ubidi_getLogicalMap
1055 * @see ubidi_getLogicalIndex
1058 U_STABLE void U_EXPORT2
1059 ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
1062 * This is a convenience function that does not use a UBiDi object.
1063 * It is intended to be used for when an application has determined the levels
1064 * of objects (character sequences) and just needs to have them reordered (L2).
1065 * This is equivalent to using <code>ubidi_getLogicalMap</code> on a
1066 * <code>UBiDi</code> object.
1068 * @param levels is an array with <code>length</code> levels that have been determined by
1071 * @param length is the number of levels in the array, or, semantically,
1072 * the number of objects to be reordered.
1073 * It must be <code>length>0</code>.
1075 * @param indexMap is a pointer to an array of <code>length</code>
1076 * indexes which will reflect the reordering of the characters.
1077 * The array does not need to be initialized.<p>
1078 * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
1081 U_STABLE void U_EXPORT2
1082 ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
1085 * This is a convenience function that does not use a UBiDi object.
1086 * It is intended to be used for when an application has determined the levels
1087 * of objects (character sequences) and just needs to have them reordered (L2).
1088 * This is equivalent to using <code>ubidi_getVisualMap</code> on a
1089 * <code>UBiDi</code> object.
1091 * @param levels is an array with <code>length</code> levels that have been determined by
1094 * @param length is the number of levels in the array, or, semantically,
1095 * the number of objects to be reordered.
1096 * It must be <code>length>0</code>.
1098 * @param indexMap is a pointer to an array of <code>length</code>
1099 * indexes which will reflect the reordering of the characters.
1100 * The array does not need to be initialized.<p>
1101 * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
1104 U_STABLE void U_EXPORT2
1105 ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
1108 * Invert an index map.
1109 * The one-to-one index mapping of the first map is inverted and written to
1112 * @param srcMap is an array with <code>length</code> indexes
1113 * which define the original mapping.
1115 * @param destMap is an array with <code>length</code> indexes
1116 * which will be filled with the inverse mapping.
1118 * @param length is the length of each array.
1121 U_STABLE void U_EXPORT2
1122 ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length);
1124 /** option flags for ubidi_writeReordered() */
1127 * option bit for ubidi_writeReordered():
1128 * keep combining characters after their base characters in RTL runs
1130 * @see ubidi_writeReordered
1133 #define UBIDI_KEEP_BASE_COMBINING 1
1136 * option bit for ubidi_writeReordered():
1137 * replace characters with the "mirrored" property in RTL runs
1138 * by their mirror-image mappings
1140 * @see ubidi_writeReordered
1143 #define UBIDI_DO_MIRRORING 2
1146 * option bit for ubidi_writeReordered():
1147 * surround the run with LRMs if necessary;
1148 * this is part of the approximate "inverse BiDi" algorithm
1150 * @see ubidi_setInverse
1151 * @see ubidi_writeReordered
1154 #define UBIDI_INSERT_LRM_FOR_NUMERIC 4
1157 * option bit for ubidi_writeReordered():
1158 * remove BiDi control characters
1159 * (this does not affect UBIDI_INSERT_LRM_FOR_NUMERIC)
1161 * @see ubidi_writeReordered
1164 #define UBIDI_REMOVE_BIDI_CONTROLS 8
1167 * option bit for ubidi_writeReordered():
1168 * write the output in reverse order
1170 * <p>This has the same effect as calling <code>ubidi_writeReordered()</code>
1171 * first without this option, and then calling
1172 * <code>ubidi_writeReverse()</code> without mirroring.
1173 * Doing this in the same step is faster and avoids a temporary buffer.
1174 * An example for using this option is output to a character terminal that
1175 * is designed for RTL scripts and stores text in reverse order.</p>
1177 * @see ubidi_writeReordered
1180 #define UBIDI_OUTPUT_REVERSE 16
1183 * Take a <code>UBiDi</code> object containing the reordering
1184 * information for a piece of text (one or more paragraphs) set by
1185 * <code>ubidi_setPara()</code> or for a line of text set by <code>ubidi_setLine()</code>
1186 * and write a reordered string to the destination buffer.
1188 * This function preserves the integrity of characters with multiple
1189 * code units and (optionally) modifier letters.
1190 * Characters in RTL runs can be replaced by mirror-image characters
1191 * in the destination buffer. Note that "real" mirroring has
1192 * to be done in a rendering engine by glyph selection
1193 * and that for many "mirrored" characters there are no
1194 * Unicode characters as mirror-image equivalents.
1195 * There are also options to insert or remove BiDi control
1196 * characters; see the description of the <code>destSize</code>
1197 * and <code>options</code> parameters and of the option bit flags.
1199 * @see UBIDI_DO_MIRRORING
1200 * @see UBIDI_INSERT_LRM_FOR_NUMERIC
1201 * @see UBIDI_KEEP_BASE_COMBINING
1202 * @see UBIDI_OUTPUT_REVERSE
1203 * @see UBIDI_REMOVE_BIDI_CONTROLS
1205 * @param pBiDi A pointer to a <code>UBiDi</code> object that
1206 * is set by <code>ubidi_setPara()</code> or
1207 * <code>ubidi_setLine()</code> and contains the reordering
1208 * information for the text that it was defined for,
1209 * as well as a pointer to that text.
1210 * <p>The text was aliased (only the pointer was stored
1211 * without copying the contents) and must not have been modified
1212 * since the <code>ubidi_setPara()</code> call.</p>
1214 * @param dest A pointer to where the reordered text is to be copied.
1215 * The source text and <code>dest[destSize]</code>
1218 * @param destSize The size of the <code>dest</code> buffer,
1219 * in number of UChars.
1220 * If the <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>
1221 * option is set, then the destination length could be
1223 * <code>ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)</code>.
1224 * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
1225 * is set, then the destination length may be less than
1226 * <code>ubidi_getLength(pBiDi)</code>.
1227 * If none of these options is set, then the destination length
1228 * will be exactly <code>ubidi_getLength(pBiDi)</code>.
1230 * @param options A bit set of options for the reordering that control
1231 * how the reordered text is written.
1232 * The options include mirroring the characters on a code
1233 * point basis and inserting LRM characters, which is used
1234 * especially for transforming visually stored text
1235 * to logically stored text (although this is still an
1236 * imperfect implementation of an "inverse BiDi" algorithm
1237 * because it uses the "forward BiDi" algorithm at its core).
1238 * The available options are:
1239 * <code>#UBIDI_DO_MIRRORING</code>,
1240 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
1241 * <code>#UBIDI_KEEP_BASE_COMBINING</code>,
1242 * <code>#UBIDI_OUTPUT_REVERSE</code>,
1243 * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
1245 * @param pErrorCode must be a valid pointer to an error code value.
1247 * @return The length of the output string.
1250 U_STABLE int32_t U_EXPORT2
1251 ubidi_writeReordered(UBiDi *pBiDi,
1252 UChar *dest, int32_t destSize,
1254 UErrorCode *pErrorCode);
1257 * Reverse a Right-To-Left run of Unicode text.
1259 * This function preserves the integrity of characters with multiple
1260 * code units and (optionally) modifier letters.
1261 * Characters can be replaced by mirror-image characters
1262 * in the destination buffer. Note that "real" mirroring has
1263 * to be done in a rendering engine by glyph selection
1264 * and that for many "mirrored" characters there are no
1265 * Unicode characters as mirror-image equivalents.
1266 * There are also options to insert or remove BiDi control
1269 * This function is the implementation for reversing RTL runs as part
1270 * of <code>ubidi_writeReordered()</code>. For detailed descriptions
1271 * of the parameters, see there.
1272 * Since no BiDi controls are inserted here, the output string length
1273 * will never exceed <code>srcLength</code>.
1275 * @see ubidi_writeReordered
1277 * @param src A pointer to the RTL run text.
1279 * @param srcLength The length of the RTL run.
1281 * @param dest A pointer to where the reordered text is to be copied.
1282 * <code>src[srcLength]</code> and <code>dest[destSize]</code>
1285 * @param destSize The size of the <code>dest</code> buffer,
1286 * in number of UChars.
1287 * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
1288 * is set, then the destination length may be less than
1289 * <code>srcLength</code>.
1290 * If this option is not set, then the destination length
1291 * will be exactly <code>srcLength</code>.
1293 * @param options A bit set of options for the reordering that control
1294 * how the reordered text is written.
1295 * See the <code>options</code> parameter in <code>ubidi_writeReordered()</code>.
1297 * @param pErrorCode must be a valid pointer to an error code value.
1299 * @return The length of the output string.
1302 U_STABLE int32_t U_EXPORT2
1303 ubidi_writeReverse(const UChar *src, int32_t srcLength,
1304 UChar *dest, int32_t destSize,
1306 UErrorCode *pErrorCode);
1308 /*#define BIDI_SAMPLE_CODE*/