os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/brkiter.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
/*
sl@0
     2
********************************************************************************
sl@0
     3
*   Copyright (C) 1997-2005, International Business Machines
sl@0
     4
*   Corporation and others.  All Rights Reserved.
sl@0
     5
********************************************************************************
sl@0
     6
*
sl@0
     7
* File brkiter.h
sl@0
     8
*
sl@0
     9
* Modification History:
sl@0
    10
*
sl@0
    11
*   Date        Name        Description
sl@0
    12
*   02/18/97    aliu        Added typedef for TextCount.  Made DONE const.
sl@0
    13
*   05/07/97    aliu        Fixed DLL declaration.
sl@0
    14
*   07/09/97    jfitz       Renamed BreakIterator and interface synced with JDK
sl@0
    15
*   08/11/98    helena      Sync-up JDK1.2.
sl@0
    16
*   01/13/2000  helena      Added UErrorCode parameter to createXXXInstance methods.
sl@0
    17
********************************************************************************
sl@0
    18
*/
sl@0
    19
sl@0
    20
#ifndef BRKITER_H
sl@0
    21
#define BRKITER_H
sl@0
    22
sl@0
    23
#include "unicode/utypes.h"
sl@0
    24
sl@0
    25
/**
sl@0
    26
 * \file
sl@0
    27
 * \brief C++ API: Break Iterator.
sl@0
    28
 */
sl@0
    29
 
sl@0
    30
#if UCONFIG_NO_BREAK_ITERATION
sl@0
    31
sl@0
    32
U_NAMESPACE_BEGIN
sl@0
    33
sl@0
    34
/*
sl@0
    35
 * Allow the declaration of APIs with pointers to BreakIterator
sl@0
    36
 * even when break iteration is removed from the build.
sl@0
    37
 */
sl@0
    38
class BreakIterator;
sl@0
    39
sl@0
    40
U_NAMESPACE_END
sl@0
    41
sl@0
    42
#else
sl@0
    43
sl@0
    44
#include "unicode/uobject.h"
sl@0
    45
#include "unicode/unistr.h"
sl@0
    46
#include "unicode/chariter.h"
sl@0
    47
#include "unicode/locid.h"
sl@0
    48
#include "unicode/ubrk.h"
sl@0
    49
#include "unicode/strenum.h"
sl@0
    50
#include "unicode/utext.h"
sl@0
    51
sl@0
    52
U_NAMESPACE_BEGIN
sl@0
    53
sl@0
    54
#if !UCONFIG_NO_SERVICE
sl@0
    55
/**
sl@0
    56
 * Opaque type returned by registerInstance.
sl@0
    57
 * @stable
sl@0
    58
 */
sl@0
    59
typedef const void* URegistryKey;
sl@0
    60
#endif
sl@0
    61
sl@0
    62
/**
sl@0
    63
 * The BreakIterator class implements methods for finding the location
sl@0
    64
 * of boundaries in text. BreakIterator is an abstract base class.
sl@0
    65
 * Instances of BreakIterator maintain a current position and scan over
sl@0
    66
 * text returning the index of characters where boundaries occur.
sl@0
    67
 * <P>
sl@0
    68
 * Line boundary analysis determines where a text string can be broken
sl@0
    69
 * when line-wrapping. The mechanism correctly handles punctuation and
sl@0
    70
 * hyphenated words.
sl@0
    71
 * <P>
sl@0
    72
 * Sentence boundary analysis allows selection with correct
sl@0
    73
 * interpretation of periods within numbers and abbreviations, and
sl@0
    74
 * trailing punctuation marks such as quotation marks and parentheses.
sl@0
    75
 * <P>
sl@0
    76
 * Word boundary analysis is used by search and replace functions, as
sl@0
    77
 * well as within text editing applications that allow the user to
sl@0
    78
 * select words with a double click. Word selection provides correct
sl@0
    79
 * interpretation of punctuation marks within and following
sl@0
    80
 * words. Characters that are not part of a word, such as symbols or
sl@0
    81
 * punctuation marks, have word-breaks on both sides.
sl@0
    82
 * <P>
sl@0
    83
 * Character boundary analysis allows users to interact with
sl@0
    84
 * characters as they expect to, for example, when moving the cursor
sl@0
    85
 * through a text string. Character boundary analysis provides correct
sl@0
    86
 * navigation of through character strings, regardless of how the
sl@0
    87
 * character is stored.  For example, an accented character might be
sl@0
    88
 * stored as a base character and a diacritical mark. What users
sl@0
    89
 * consider to be a character can differ between languages.
sl@0
    90
 * <P>
sl@0
    91
 * This is the interface for all text boundaries.
sl@0
    92
 * <P>
sl@0
    93
 * Examples:
sl@0
    94
 * <P>
sl@0
    95
 * Helper function to output text
sl@0
    96
 * <pre>
sl@0
    97
 * \code
sl@0
    98
 *    void printTextRange( BreakIterator& iterator, int32_t start, int32_t end )
sl@0
    99
 *    {
sl@0
   100
 *        UnicodeString textBuffer, temp;
sl@0
   101
 *        CharacterIterator *strIter = iterator.createText();
sl@0
   102
 *        strIter->getText(temp);
sl@0
   103
 *        cout << " " << start << " " << end << " |"
sl@0
   104
 *             << temp.extractBetween(start, end, textBuffer)
sl@0
   105
 *             << "|" << endl;
sl@0
   106
 *        delete strIter;
sl@0
   107
 *    }
sl@0
   108
 * \endcode
sl@0
   109
 * </pre>
sl@0
   110
 * Print each element in order:
sl@0
   111
 * <pre>
sl@0
   112
 * \code
sl@0
   113
 *    void printEachForward( BreakIterator& boundary)
sl@0
   114
 *    {
sl@0
   115
 *       int32_t start = boundary.first();
sl@0
   116
 *       for (int32_t end = boundary.next();
sl@0
   117
 *         end != BreakIterator::DONE;
sl@0
   118
 *         start = end, end = boundary.next())
sl@0
   119
 *         {
sl@0
   120
 *             printTextRange( boundary, start, end );
sl@0
   121
 *         }
sl@0
   122
 *    }
sl@0
   123
 * \endcode
sl@0
   124
 * </pre>
sl@0
   125
 * Print each element in reverse order:
sl@0
   126
 * <pre>
sl@0
   127
 * \code
sl@0
   128
 *    void printEachBackward( BreakIterator& boundary)
sl@0
   129
 *    {
sl@0
   130
 *       int32_t end = boundary.last();
sl@0
   131
 *       for (int32_t start = boundary.previous();
sl@0
   132
 *         start != BreakIterator::DONE;
sl@0
   133
 *         end = start, start = boundary.previous())
sl@0
   134
 *         {
sl@0
   135
 *             printTextRange( boundary, start, end );
sl@0
   136
 *         }
sl@0
   137
 *    }
sl@0
   138
 * \endcode
sl@0
   139
 * </pre>
sl@0
   140
 * Print first element
sl@0
   141
 * <pre>
sl@0
   142
 * \code
sl@0
   143
 *    void printFirst(BreakIterator& boundary)
sl@0
   144
 *    {
sl@0
   145
 *        int32_t start = boundary.first();
sl@0
   146
 *        int32_t end = boundary.next();
sl@0
   147
 *        printTextRange( boundary, start, end );
sl@0
   148
 *    }
sl@0
   149
 * \endcode
sl@0
   150
 * </pre>
sl@0
   151
 * Print last element
sl@0
   152
 * <pre>
sl@0
   153
 *  \code
sl@0
   154
 *    void printLast(BreakIterator& boundary)
sl@0
   155
 *    {
sl@0
   156
 *        int32_t end = boundary.last();
sl@0
   157
 *        int32_t start = boundary.previous();
sl@0
   158
 *        printTextRange( boundary, start, end );
sl@0
   159
 *    }
sl@0
   160
 * \endcode
sl@0
   161
 * </pre>
sl@0
   162
 * Print the element at a specified position
sl@0
   163
 * <pre>
sl@0
   164
 * \code
sl@0
   165
 *    void printAt(BreakIterator &boundary, int32_t pos )
sl@0
   166
 *    {
sl@0
   167
 *        int32_t end = boundary.following(pos);
sl@0
   168
 *        int32_t start = boundary.previous();
sl@0
   169
 *        printTextRange( boundary, start, end );
sl@0
   170
 *    }
sl@0
   171
 * \endcode
sl@0
   172
 * </pre>
sl@0
   173
 * Creating and using text boundaries
sl@0
   174
 * <pre>
sl@0
   175
 * \code
sl@0
   176
 *       void BreakIterator_Example( void )
sl@0
   177
 *       {
sl@0
   178
 *           BreakIterator* boundary;
sl@0
   179
 *           UnicodeString stringToExamine("Aaa bbb ccc. Ddd eee fff.");
sl@0
   180
 *           cout << "Examining: " << stringToExamine << endl;
sl@0
   181
 *
sl@0
   182
 *           //print each sentence in forward and reverse order
sl@0
   183
 *           boundary = BreakIterator::createSentenceInstance( Locale::US );
sl@0
   184
 *           boundary->setText(stringToExamine);
sl@0
   185
 *           cout << "----- forward: -----------" << endl;
sl@0
   186
 *           printEachForward(*boundary);
sl@0
   187
 *           cout << "----- backward: ----------" << endl;
sl@0
   188
 *           printEachBackward(*boundary);
sl@0
   189
 *           delete boundary;
sl@0
   190
 *
sl@0
   191
 *           //print each word in order
sl@0
   192
 *           boundary = BreakIterator::createWordInstance();
sl@0
   193
 *           boundary->setText(stringToExamine);
sl@0
   194
 *           cout << "----- forward: -----------" << endl;
sl@0
   195
 *           printEachForward(*boundary);
sl@0
   196
 *           //print first element
sl@0
   197
 *           cout << "----- first: -------------" << endl;
sl@0
   198
 *           printFirst(*boundary);
sl@0
   199
 *           //print last element
sl@0
   200
 *           cout << "----- last: --------------" << endl;
sl@0
   201
 *           printLast(*boundary);
sl@0
   202
 *           //print word at charpos 10
sl@0
   203
 *           cout << "----- at pos 10: ---------" << endl;
sl@0
   204
 *           printAt(*boundary, 10 );
sl@0
   205
 *
sl@0
   206
 *           delete boundary;
sl@0
   207
 *       }
sl@0
   208
 * \endcode
sl@0
   209
 * </pre>
sl@0
   210
 */
sl@0
   211
class U_COMMON_API BreakIterator : public UObject {
sl@0
   212
public:
sl@0
   213
    /**
sl@0
   214
     *  destructor
sl@0
   215
     *  @stable ICU 2.0
sl@0
   216
     */
sl@0
   217
    virtual ~BreakIterator();
sl@0
   218
sl@0
   219
    /**
sl@0
   220
     * Return true if another object is semantically equal to this
sl@0
   221
     * one. The other object should be an instance of the same subclass of
sl@0
   222
     * BreakIterator. Objects of different subclasses are considered
sl@0
   223
     * unequal.
sl@0
   224
     * <P>
sl@0
   225
     * Return true if this BreakIterator is at the same position in the
sl@0
   226
     * same text, and is the same class and type (word, line, etc.) of
sl@0
   227
     * BreakIterator, as the argument.  Text is considered the same if
sl@0
   228
     * it contains the same characters, it need not be the same
sl@0
   229
     * object, and styles are not considered.
sl@0
   230
     * @stable ICU 2.0
sl@0
   231
     */
sl@0
   232
    virtual UBool operator==(const BreakIterator&) const = 0;
sl@0
   233
sl@0
   234
    /**
sl@0
   235
     * Returns the complement of the result of operator==
sl@0
   236
     * @param rhs The BreakIterator to be compared for inequality
sl@0
   237
     * @return the complement of the result of operator==
sl@0
   238
     * @stable ICU 2.0
sl@0
   239
     */
sl@0
   240
    UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
sl@0
   241
sl@0
   242
    /**
sl@0
   243
     * Return a polymorphic copy of this object.  This is an abstract
sl@0
   244
     * method which subclasses implement.
sl@0
   245
     * @stable ICU 2.0
sl@0
   246
     */
sl@0
   247
    virtual BreakIterator* clone(void) const = 0;
sl@0
   248
sl@0
   249
    /**
sl@0
   250
     * Return a polymorphic class ID for this object. Different subclasses
sl@0
   251
     * will return distinct unequal values.
sl@0
   252
     * @stable ICU 2.0
sl@0
   253
     */
sl@0
   254
    virtual UClassID getDynamicClassID(void) const = 0;
sl@0
   255
sl@0
   256
    /**
sl@0
   257
     * Return a CharacterIterator over the text being analyzed.
sl@0
   258
     * Changing the state of the returned iterator can have undefined consequences
sl@0
   259
     * on the operation of the break iterator.  If you need to change it, clone it first.
sl@0
   260
     * @stable ICU 2.0
sl@0
   261
     */
sl@0
   262
    virtual const CharacterIterator& getText(void) const = 0;
sl@0
   263
sl@0
   264
sl@0
   265
    /**
sl@0
   266
      *  Get a UText for the text being analyzed.
sl@0
   267
      *  The returned UText is a shallow clone of the UText used internally
sl@0
   268
      *  by the break iterator implementation.  It can safely be used to
sl@0
   269
      *  access the text without impacting any break iterator operations,
sl@0
   270
      *  but the underlying text itself must not be altered.
sl@0
   271
      *
sl@0
   272
      * @param fillIn A UText to be filled in.  If NULL, a new UText will be
sl@0
   273
      *           allocated to hold the result.
sl@0
   274
      * @param status receives any error codes.
sl@0
   275
      * @return   The current UText for this break iterator.  If an input
sl@0
   276
      *           UText was provided, it will always be returned.
sl@0
   277
      * @draft ICU 3.4
sl@0
   278
      */
sl@0
   279
     virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
sl@0
   280
sl@0
   281
    /**
sl@0
   282
     * Change the text over which this operates. The text boundary is
sl@0
   283
     * reset to the start.
sl@0
   284
     * @param text The UnicodeString used to change the text.
sl@0
   285
     * @stable ICU 2.0
sl@0
   286
     */
sl@0
   287
    virtual void  setText(const UnicodeString &text) = 0;
sl@0
   288
sl@0
   289
    /**
sl@0
   290
     * Reset the break iterator to operate over the text represented by 
sl@0
   291
     * the UText.  The iterator position is reset to the start.
sl@0
   292
     *
sl@0
   293
     * This function makes a shallow clone of the supplied UText.  This means
sl@0
   294
     * that the caller is free to immediately close or otherwise reuse the
sl@0
   295
     * Utext that was passed as a parameter, but that the underlying text itself
sl@0
   296
     * must not be altered while being referenced by the break iterator.
sl@0
   297
     *
sl@0
   298
     * @param text The UText used to change the text.
sl@0
   299
     * @param status receives any error codes.
sl@0
   300
     * @draft ICU 3.4
sl@0
   301
     */
sl@0
   302
    virtual void  setText(UText *text, UErrorCode &status) = 0;
sl@0
   303
sl@0
   304
    /**
sl@0
   305
     * Change the text over which this operates. The text boundary is
sl@0
   306
     * reset to the start.
sl@0
   307
     * @param it The CharacterIterator used to change the text.
sl@0
   308
     * @stable ICU 2.0
sl@0
   309
     */
sl@0
   310
    virtual void  adoptText(CharacterIterator* it) = 0;
sl@0
   311
sl@0
   312
    enum {
sl@0
   313
        /**
sl@0
   314
         * DONE is returned by previous() and next() after all valid
sl@0
   315
         * boundaries have been returned.
sl@0
   316
         * @stable ICU 2.0
sl@0
   317
         */
sl@0
   318
        DONE = (int32_t)-1
sl@0
   319
    };
sl@0
   320
sl@0
   321
    /**
sl@0
   322
     * Return the index of the first character in the text being scanned.
sl@0
   323
     * @stable ICU 2.0
sl@0
   324
     */
sl@0
   325
    virtual int32_t first(void) = 0;
sl@0
   326
sl@0
   327
    /**
sl@0
   328
     * Return the index immediately BEYOND the last character in the text being scanned.
sl@0
   329
     * @stable ICU 2.0
sl@0
   330
     */
sl@0
   331
    virtual int32_t last(void) = 0;
sl@0
   332
sl@0
   333
    /**
sl@0
   334
     * Return the boundary preceding the current boundary.
sl@0
   335
     * @return The character index of the previous text boundary or DONE if all
sl@0
   336
     * boundaries have been returned.
sl@0
   337
     * @stable ICU 2.0
sl@0
   338
     */
sl@0
   339
    virtual int32_t previous(void) = 0;
sl@0
   340
sl@0
   341
    /**
sl@0
   342
     * Return the boundary following the current boundary.
sl@0
   343
     * @return The character index of the next text boundary or DONE if all
sl@0
   344
     * boundaries have been returned.
sl@0
   345
     * @stable ICU 2.0
sl@0
   346
     */
sl@0
   347
    virtual int32_t next(void) = 0;
sl@0
   348
sl@0
   349
    /**
sl@0
   350
     * Return character index of the current interator position within the text.
sl@0
   351
     * @return The boundary most recently returned.
sl@0
   352
     * @stable ICU 2.0
sl@0
   353
     */
sl@0
   354
    virtual int32_t current(void) const = 0;
sl@0
   355
sl@0
   356
    /**
sl@0
   357
     * Return the first boundary following the specified offset.
sl@0
   358
     * The value returned is always greater than the offset or
sl@0
   359
     * the value BreakIterator.DONE
sl@0
   360
     * @param offset the offset to begin scanning.
sl@0
   361
     * @return The first boundary after the specified offset.
sl@0
   362
     * @stable ICU 2.0
sl@0
   363
     */
sl@0
   364
    virtual int32_t following(int32_t offset) = 0;
sl@0
   365
sl@0
   366
    /**
sl@0
   367
     * Return the first boundary preceding the specified offset.
sl@0
   368
     * The value returned is always smaller than the offset or
sl@0
   369
     * the value BreakIterator.DONE
sl@0
   370
     * @param offset the offset to begin scanning.
sl@0
   371
     * @return The first boundary before the specified offset.
sl@0
   372
     * @stable ICU 2.0
sl@0
   373
     */
sl@0
   374
    virtual int32_t preceding(int32_t offset) = 0;
sl@0
   375
sl@0
   376
    /**
sl@0
   377
     * Return true if the specfied position is a boundary position.
sl@0
   378
     * As a side effect, the current position of the iterator is set
sl@0
   379
     * to the first boundary position at or following the specified offset.
sl@0
   380
     * @param offset the offset to check.
sl@0
   381
     * @return True if "offset" is a boundary position.
sl@0
   382
     * @stable ICU 2.0
sl@0
   383
     */
sl@0
   384
    virtual UBool isBoundary(int32_t offset) = 0;
sl@0
   385
sl@0
   386
    /**
sl@0
   387
     * Return the nth boundary from the current boundary
sl@0
   388
     * @param n which boundary to return.  A value of 0
sl@0
   389
     * does nothing.  Negative values move to previous boundaries
sl@0
   390
     * and positive values move to later boundaries.
sl@0
   391
     * @return The index of the nth boundary from the current position, or
sl@0
   392
     * DONE if there are fewer than |n| boundaries in the specfied direction.
sl@0
   393
     * @stable ICU 2.0
sl@0
   394
     */
sl@0
   395
    virtual int32_t next(int32_t n) = 0;
sl@0
   396
sl@0
   397
    /**
sl@0
   398
     * Create BreakIterator for word-breaks using the given locale.
sl@0
   399
     * Returns an instance of a BreakIterator implementing word breaks.
sl@0
   400
     * WordBreak is useful for word selection (ex. double click)
sl@0
   401
     * @param where the locale.
sl@0
   402
     * @param status the error code
sl@0
   403
     * @return A BreakIterator for word-breaks.  The UErrorCode& status
sl@0
   404
     * parameter is used to return status information to the user.
sl@0
   405
     * To check whether the construction succeeded or not, you should check
sl@0
   406
     * the value of U_SUCCESS(err).  If you wish more detailed information, you
sl@0
   407
     * can check for informational error results which still indicate success.
sl@0
   408
     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
sl@0
   409
     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
sl@0
   410
     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
sl@0
   411
     * used; neither the requested locale nor any of its fall back locales
sl@0
   412
     * could be found.
sl@0
   413
     * The caller owns the returned object and is responsible for deleting it.
sl@0
   414
     * @stable ICU 2.0
sl@0
   415
     */
sl@0
   416
    static BreakIterator* U_EXPORT2
sl@0
   417
    createWordInstance(const Locale& where, UErrorCode& status);
sl@0
   418
sl@0
   419
    /**
sl@0
   420
     * Create BreakIterator for line-breaks using specified locale.
sl@0
   421
     * Returns an instance of a BreakIterator implementing line breaks. Line
sl@0
   422
     * breaks are logically possible line breaks, actual line breaks are
sl@0
   423
     * usually determined based on display width.
sl@0
   424
     * LineBreak is useful for word wrapping text.
sl@0
   425
     * @param where the locale.
sl@0
   426
     * @param status The error code.
sl@0
   427
     * @return A BreakIterator for line-breaks.  The UErrorCode& status
sl@0
   428
     * parameter is used to return status information to the user.
sl@0
   429
     * To check whether the construction succeeded or not, you should check
sl@0
   430
     * the value of U_SUCCESS(err).  If you wish more detailed information, you
sl@0
   431
     * can check for informational error results which still indicate success.
sl@0
   432
     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
sl@0
   433
     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
sl@0
   434
     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
sl@0
   435
     * used; neither the requested locale nor any of its fall back locales
sl@0
   436
     * could be found.
sl@0
   437
     * The caller owns the returned object and is responsible for deleting it.
sl@0
   438
     * @stable ICU 2.0
sl@0
   439
     */
sl@0
   440
    static BreakIterator* U_EXPORT2
sl@0
   441
    createLineInstance(const Locale& where, UErrorCode& status);
sl@0
   442
sl@0
   443
    /**
sl@0
   444
     * Create BreakIterator for character-breaks using specified locale
sl@0
   445
     * Returns an instance of a BreakIterator implementing character breaks.
sl@0
   446
     * Character breaks are boundaries of combining character sequences.
sl@0
   447
     * @param where the locale.
sl@0
   448
     * @param status The error code.
sl@0
   449
     * @return A BreakIterator for character-breaks.  The UErrorCode& status
sl@0
   450
     * parameter is used to return status information to the user.
sl@0
   451
     * To check whether the construction succeeded or not, you should check
sl@0
   452
     * the value of U_SUCCESS(err).  If you wish more detailed information, you
sl@0
   453
     * can check for informational error results which still indicate success.
sl@0
   454
     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
sl@0
   455
     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
sl@0
   456
     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
sl@0
   457
     * used; neither the requested locale nor any of its fall back locales
sl@0
   458
     * could be found.
sl@0
   459
     * The caller owns the returned object and is responsible for deleting it.
sl@0
   460
     * @stable ICU 2.0
sl@0
   461
     */
sl@0
   462
    static BreakIterator* U_EXPORT2
sl@0
   463
    createCharacterInstance(const Locale& where, UErrorCode& status);
sl@0
   464
sl@0
   465
    /**
sl@0
   466
     * Create BreakIterator for sentence-breaks using specified locale
sl@0
   467
     * Returns an instance of a BreakIterator implementing sentence breaks.
sl@0
   468
     * @param where the locale.
sl@0
   469
     * @param status The error code.
sl@0
   470
     * @return A BreakIterator for sentence-breaks.  The UErrorCode& status
sl@0
   471
     * parameter is used to return status information to the user.
sl@0
   472
     * To check whether the construction succeeded or not, you should check
sl@0
   473
     * the value of U_SUCCESS(err).  If you wish more detailed information, you
sl@0
   474
     * can check for informational error results which still indicate success.
sl@0
   475
     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
sl@0
   476
     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
sl@0
   477
     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
sl@0
   478
     * used; neither the requested locale nor any of its fall back locales
sl@0
   479
     * could be found.
sl@0
   480
     * The caller owns the returned object and is responsible for deleting it.
sl@0
   481
     * @stable ICU 2.0
sl@0
   482
     */
sl@0
   483
    static BreakIterator* U_EXPORT2
sl@0
   484
    createSentenceInstance(const Locale& where, UErrorCode& status);
sl@0
   485
sl@0
   486
    /**
sl@0
   487
     * Create BreakIterator for title-casing breaks using the specified locale
sl@0
   488
     * Returns an instance of a BreakIterator implementing title breaks.
sl@0
   489
     * The iterator returned locates title boundaries as described for
sl@0
   490
     * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
sl@0
   491
     * please use Word Boundary iterator.{@link #createWordInstance }
sl@0
   492
     *
sl@0
   493
     * @param where the locale.
sl@0
   494
     * @param status The error code.
sl@0
   495
     * @return A BreakIterator for title-breaks.  The UErrorCode& status
sl@0
   496
     * parameter is used to return status information to the user.
sl@0
   497
     * To check whether the construction succeeded or not, you should check
sl@0
   498
     * the value of U_SUCCESS(err).  If you wish more detailed information, you
sl@0
   499
     * can check for informational error results which still indicate success.
sl@0
   500
     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
sl@0
   501
     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
sl@0
   502
     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
sl@0
   503
     * used; neither the requested locale nor any of its fall back locales
sl@0
   504
     * could be found.
sl@0
   505
     * The caller owns the returned object and is responsible for deleting it.
sl@0
   506
     * @stable ICU 2.1
sl@0
   507
     */
sl@0
   508
    static BreakIterator* U_EXPORT2
sl@0
   509
    createTitleInstance(const Locale& where, UErrorCode& status);
sl@0
   510
sl@0
   511
    /**
sl@0
   512
     * Get the set of Locales for which TextBoundaries are installed.
sl@0
   513
     * <p><b>Note:</b> this will not return locales added through the register
sl@0
   514
     * call. To see the registered locales too, use the getAvailableLocales
sl@0
   515
     * function that returns a StringEnumeration object </p>
sl@0
   516
     * @param count the output parameter of number of elements in the locale list
sl@0
   517
     * @return available locales
sl@0
   518
     * @stable ICU 2.0
sl@0
   519
     */
sl@0
   520
    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
sl@0
   521
sl@0
   522
    /**
sl@0
   523
     * Get name of the object for the desired Locale, in the desired langauge.
sl@0
   524
     * @param objectLocale must be from getAvailableLocales.
sl@0
   525
     * @param displayLocale specifies the desired locale for output.
sl@0
   526
     * @param name the fill-in parameter of the return value
sl@0
   527
     * Uses best match.
sl@0
   528
     * @return user-displayable name
sl@0
   529
     * @stable ICU 2.0
sl@0
   530
     */
sl@0
   531
    static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
sl@0
   532
                                         const Locale& displayLocale,
sl@0
   533
                                         UnicodeString& name);
sl@0
   534
sl@0
   535
    /**
sl@0
   536
     * Get name of the object for the desired Locale, in the langauge of the
sl@0
   537
     * default locale.
sl@0
   538
     * @param objectLocale must be from getMatchingLocales
sl@0
   539
     * @param name the fill-in parameter of the return value
sl@0
   540
     * @return user-displayable name
sl@0
   541
     * @stable ICU 2.0
sl@0
   542
     */
sl@0
   543
    static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
sl@0
   544
                                         UnicodeString& name);
sl@0
   545
sl@0
   546
    /**
sl@0
   547
     * Thread safe client-buffer-based cloning operation
sl@0
   548
     *    Do NOT call delete on a safeclone, since 'new' is not used to create it.
sl@0
   549
     * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
sl@0
   550
     * If buffer is not large enough, new memory will be allocated.
sl@0
   551
     * @param BufferSize reference to size of allocated space.
sl@0
   552
     * If BufferSize == 0, a sufficient size for use in cloning will
sl@0
   553
     * be returned ('pre-flighting')
sl@0
   554
     * If BufferSize is not enough for a stack-based safe clone,
sl@0
   555
     * new memory will be allocated.
sl@0
   556
     * @param status to indicate whether the operation went on smoothly or there were errors
sl@0
   557
     *  An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
sl@0
   558
     *  necessary.
sl@0
   559
     * @return pointer to the new clone
sl@0
   560
     *
sl@0
   561
     * @stable ICU 2.0
sl@0
   562
     */
sl@0
   563
    virtual BreakIterator *  createBufferClone(void *stackBuffer,
sl@0
   564
                                               int32_t &BufferSize,
sl@0
   565
                                               UErrorCode &status) = 0;
sl@0
   566
sl@0
   567
    /**
sl@0
   568
     *   Determine whether the BreakIterator was created in user memory by
sl@0
   569
     *   createBufferClone(), and thus should not be deleted.  Such objects
sl@0
   570
     *   must be closed by an explicit call to the destructor (not delete).
sl@0
   571
     *  @stable ICU 2.0
sl@0
   572
     */
sl@0
   573
    inline UBool isBufferClone(void);
sl@0
   574
sl@0
   575
#if !UCONFIG_NO_SERVICE
sl@0
   576
    /**
sl@0
   577
     * Register a new break iterator of the indicated kind, to use in the given locale.
sl@0
   578
     * The break iterator will be adopted.  Clones of the iterator will be returned
sl@0
   579
     * if a request for a break iterator of the given kind matches or falls back to
sl@0
   580
     * this locale.
sl@0
   581
     * @param toAdopt the BreakIterator instance to be adopted
sl@0
   582
     * @param locale the Locale for which this instance is to be registered
sl@0
   583
     * @param kind the type of iterator for which this instance is to be registered
sl@0
   584
     * @param status the in/out status code, no special meanings are assigned
sl@0
   585
     * @return a registry key that can be used to unregister this instance
sl@0
   586
     * @stable ICU 2.4
sl@0
   587
     */
sl@0
   588
    static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
sl@0
   589
                                        const Locale& locale,
sl@0
   590
                                        UBreakIteratorType kind,
sl@0
   591
                                        UErrorCode& status);
sl@0
   592
sl@0
   593
    /**
sl@0
   594
     * Unregister a previously-registered BreakIterator using the key returned from the
sl@0
   595
     * register call.  Key becomes invalid after a successful call and should not be used again.
sl@0
   596
     * The BreakIterator corresponding to the key will be deleted.
sl@0
   597
     * @param key the registry key returned by a previous call to registerInstance
sl@0
   598
     * @param status the in/out status code, no special meanings are assigned
sl@0
   599
     * @return TRUE if the iterator for the key was successfully unregistered
sl@0
   600
     * @stable ICU 2.4
sl@0
   601
     */
sl@0
   602
    static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
sl@0
   603
sl@0
   604
    /**
sl@0
   605
     * Return a StringEnumeration over the locales available at the time of the call,
sl@0
   606
     * including registered locales.
sl@0
   607
     * @return a StringEnumeration over the locales available at the time of the call
sl@0
   608
     * @stable ICU 2.4
sl@0
   609
     */
sl@0
   610
    static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
sl@0
   611
#endif
sl@0
   612
sl@0
   613
    /**
sl@0
   614
     * Returns the locale for this break iterator. Two flavors are available: valid and
sl@0
   615
     * actual locale.
sl@0
   616
     * @draft ICU 2.8 likely to change after ICU 3.0, based on feedback
sl@0
   617
     */
sl@0
   618
    Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
sl@0
   619
sl@0
   620
    /** Get the locale for this break iterator object. You can choose between valid and actual locale.
sl@0
   621
     *  @param type type of the locale we're looking for (valid or actual)
sl@0
   622
     *  @param status error code for the operation
sl@0
   623
     *  @return the locale
sl@0
   624
     *  @internal
sl@0
   625
     */
sl@0
   626
    const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
sl@0
   627
sl@0
   628
 private:
sl@0
   629
    static BreakIterator* buildInstance(const Locale& loc, const char *type, UBool dict, UErrorCode& status);
sl@0
   630
    static BreakIterator* createInstance(const Locale& loc, UBreakIteratorType kind, UErrorCode& status);
sl@0
   631
    static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
sl@0
   632
sl@0
   633
    friend class ICUBreakIteratorFactory;
sl@0
   634
    friend class ICUBreakIteratorService;
sl@0
   635
sl@0
   636
protected:
sl@0
   637
    /** @internal */
sl@0
   638
    BreakIterator();
sl@0
   639
    /** @internal */
sl@0
   640
    UBool fBufferClone;
sl@0
   641
    /** @internal */
sl@0
   642
    BreakIterator (const BreakIterator &other) : UObject(other), fBufferClone(FALSE) {}
sl@0
   643
sl@0
   644
private:
sl@0
   645
sl@0
   646
    /** @internal */
sl@0
   647
    char actualLocale[ULOC_FULLNAME_CAPACITY];
sl@0
   648
    char validLocale[ULOC_FULLNAME_CAPACITY];
sl@0
   649
sl@0
   650
    /**
sl@0
   651
     * The assignment operator has no real implementation.
sl@0
   652
     * It's provided to make the compiler happy. Do not call.
sl@0
   653
     */
sl@0
   654
    BreakIterator& operator=(const BreakIterator&);
sl@0
   655
};
sl@0
   656
sl@0
   657
inline UBool BreakIterator::isBufferClone()
sl@0
   658
{
sl@0
   659
    return fBufferClone;
sl@0
   660
}
sl@0
   661
sl@0
   662
U_NAMESPACE_END
sl@0
   663
sl@0
   664
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
sl@0
   665
sl@0
   666
#endif // _BRKITER
sl@0
   667
//eof
sl@0
   668