sl@0: // Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies). sl@0: // All rights reserved. sl@0: // This component and the accompanying materials are made available sl@0: // under the terms of "Eclipse Public License v1.0" sl@0: // which accompanies this distribution, and is available sl@0: // at the URL "http://www.eclipse.org/legal/epl-v10.html". sl@0: // sl@0: // Initial Contributors: sl@0: // Nokia Corporation - initial contribution. sl@0: // sl@0: // Contributors: sl@0: // sl@0: // Description: sl@0: // sl@0: sl@0: sl@0: #include sl@0: #include sl@0: #include "LineBreak.h" sl@0: #include "LineBreakImp.h" sl@0: #include "GlyphSel.h" sl@0: sl@0: const TText16 KThaiCodePageStart = 0x0E00; sl@0: const TText16 KThaiCodePageEnd = 0x0E5C; sl@0: const TUint KNumThaiCharacters = KThaiCodePageEnd - KThaiCodePageStart; sl@0: sl@0: /** sl@0: Ecanpsulates rules for when Thai character sequence line breaking. sl@0: @internalComponent sl@0: */ sl@0: class ThaiLinebreakRules sl@0: { sl@0: public: sl@0: enum TCharClassification sl@0: { sl@0: EOutOfRange, sl@0: sl@0: EConsOAng, sl@0: EConsYoYak, sl@0: EConsHoHip, sl@0: EConsWoWean, sl@0: EConsDigraph, sl@0: EConsOther, sl@0: sl@0: EPostVowelA, sl@0: EPostVowelAA, sl@0: EPostVowelOther, sl@0: EPreVowel, sl@0: sl@0: EDepMaiHanAkat, sl@0: EDepSaraI, sl@0: EDepOther, sl@0: sl@0: // marker for end sl@0: EMaxClassification sl@0: }; sl@0: enum sl@0: { sl@0: KOutOfRangeFlag = 1 << EOutOfRange, sl@0: KConsOAngFlag = 1 << EConsOAng, sl@0: KConsYoYakFlag = 1 << EConsYoYak, sl@0: KConsHoHipFlag = 1 << EConsHoHip, sl@0: KConsWoWeanFlag = 1 << EConsWoWean, sl@0: KConsDigraphFlag = 1 << EConsDigraph, sl@0: KConsOtherFlag = 1 << EConsOther, sl@0: KPostVowelAFlag = 1 << EPostVowelA, sl@0: KPostVowelAAFlag = 1 << EPostVowelAA, sl@0: KPostVowelOtherFlag = 1 << EPostVowelOther, sl@0: KPreVowelFlag = 1 << EPreVowel, sl@0: KDepMaiHanAkatFlag = 1 << EDepMaiHanAkat, sl@0: KDepSaraIFlag = 1 << EDepSaraI, sl@0: KDepOtherFlag = 1 << EDepOther, sl@0: sl@0: KSpecialDepFlags = KDepMaiHanAkatFlag | KDepSaraIFlag, sl@0: KPostVowelFlags = KPostVowelAFlag | KPostVowelAAFlag | KPostVowelOtherFlag, sl@0: KConsFlags = KConsOtherFlag | KConsDigraphFlag | KConsWoWeanFlag sl@0: | KConsHoHipFlag | KConsYoYakFlag | KConsOAngFlag, sl@0: KAllFlags = KOutOfRangeFlag | KConsOAngFlag | KConsYoYakFlag sl@0: | KConsHoHipFlag | KConsWoWeanFlag | KConsDigraphFlag sl@0: | KConsOtherFlag | KPostVowelAFlag | KPostVowelAAFlag sl@0: | KPostVowelOtherFlag | KPreVowelFlag | KDepMaiHanAkatFlag sl@0: | KDepSaraIFlag | KDepOtherFlag sl@0: }; sl@0: sl@0: /** Returns the Thai linebreaking class of the character. */ sl@0: static TCharClassification Class(TInt aChar); sl@0: /** Returns true if aChar is a combining character according to aBreaker. */ sl@0: static TBool IsCombiningChar(TInt aChar, const MLineBreaker& aBreaker); sl@0: /** Returns the Thai linebreaking class of the character at (*aChar), sl@0: searching backwards for the base character if it is foreign and of type CM. sl@0: @param aChar The position of the character within the string. sl@0: @param aStart The start of the string. sl@0: @param aBreaker The line breaker to query for line breaking class. sl@0: */ sl@0: static TCharClassification DerivedClass(const TText* aChar, sl@0: const TText* aStart, const MLineBreaker& aBreaker); sl@0: /** sl@0: Gets the line break rule for the previous and current character pair sl@0: @param aPrevClass Class of the previous character. sl@0: @param aClass Class of the current character. sl@0: @return the rule code corresponding to the input pair. sl@0: */ sl@0: static TBool BreakAllowedBetween( sl@0: TCharClassification aPrevClass, TCharClassification aClass); sl@0: /** Returns whether a line break is allowed before the SA character at sl@0: (*aChar). sl@0: @param aChar The position of the character within the string. sl@0: @param aStart The start of the string. sl@0: @param aBreaker The line breaker to query for line breaking class. sl@0: */ sl@0: static TBool BreakAllowedAt(const TText* aChar, sl@0: const TText* aStart, const MLineBreaker& aBreaker); sl@0: sl@0: /** sl@0: Classification of each character sl@0: */ sl@0: static const TUint8 KCharClassifications[KNumThaiCharacters]; sl@0: sl@0: /** sl@0: Rules table of prev to next character sl@0: */ sl@0: static const TUint32 KRules[EMaxClassification]; sl@0: }; sl@0: sl@0: const TUint8 ThaiLinebreakRules::KCharClassifications[KNumThaiCharacters] = sl@0: { sl@0: EOutOfRange, // 0x0E00 sl@0: EConsOther, // 0x0E01 sl@0: EConsOther, // 0x0E02 sl@0: EConsOther, // 0x0E03 sl@0: EConsOther, // 0x0E04 sl@0: EConsOther, // 0x0E05 sl@0: EConsOther, // 0x0E06 sl@0: EConsDigraph, // 0x0E07 sl@0: EConsOther, // 0x0E08 sl@0: EConsOther, // 0x0E09 sl@0: EConsOther, // 0x0E0A sl@0: EConsOther, // 0x0E0B sl@0: EConsOther, // 0x0E0C sl@0: EConsOther, // 0x0E0D sl@0: EConsOther, // 0x0E0E sl@0: EConsOther, // 0x0E0F sl@0: sl@0: EConsOther, // 0x0E10 sl@0: EConsOther, // 0x0E11 sl@0: EConsOther, // 0x0E12 sl@0: EConsOther, // 0x0E13 sl@0: EConsOther, // 0x0E14 sl@0: EConsOther, // 0x0E15 sl@0: EConsOther, // 0x0E16 sl@0: EConsOther, // 0x0E17 sl@0: EConsOther, // 0x0E18 sl@0: EConsDigraph, // 0x0E19 sl@0: EConsOther, // 0x0E1A sl@0: EConsOther, // 0x0E1B sl@0: EConsOther, // 0x0E1C sl@0: EConsOther, // 0x0E1D sl@0: EConsOther, // 0x0E1E sl@0: EConsOther, // 0x0E1F sl@0: sl@0: EConsOther, // 0x0E20 sl@0: EConsDigraph, // 0x0E21 sl@0: EConsYoYak, // 0x0E22 sl@0: EConsDigraph, // 0x0E23 sl@0: EConsOther, // 0x0E24 sl@0: EConsDigraph, // 0x0E25 sl@0: EConsOther, // 0x0E26 sl@0: EConsWoWean, // 0x0E27 sl@0: EConsOther, // 0x0E28 sl@0: EConsOther, // 0x0E29 sl@0: EConsOther, // 0x0E2A sl@0: EConsHoHip, // 0x0E2B sl@0: EConsOther, // 0x0E2C sl@0: EConsOAng, // 0x0E2D sl@0: EConsOther, // 0x0E2E sl@0: EOutOfRange, // 0x0E2F sl@0: sl@0: EPostVowelA, // 0x0E30 sl@0: EDepMaiHanAkat, // 0x0E31 sl@0: EPostVowelAA, // 0x0E32 sl@0: EPostVowelOther,// 0x0E33 sl@0: sl@0: EDepSaraI, // 0x0E34 sl@0: EDepOther, // 0x0E35 sl@0: EDepOther, // 0x0E36 sl@0: EDepOther, // 0x0E37 sl@0: EDepOther, // 0x0E38 sl@0: EDepOther, // 0x0E39 sl@0: EDepOther, // 0x0E3A sl@0: sl@0: EOutOfRange, // 0x0E3B sl@0: EOutOfRange, // 0x0E3C sl@0: EOutOfRange, // 0x0E3D sl@0: EOutOfRange, // 0x0E3E sl@0: EOutOfRange, // 0x0E3F sl@0: sl@0: EPreVowel, // 0x0E40 sl@0: EPreVowel, // 0x0E41 sl@0: EPreVowel, // 0x0E42 sl@0: EPreVowel, // 0x0E43 sl@0: EPreVowel, // 0x0E44 sl@0: sl@0: EPostVowelOther,// 0x0E45 sl@0: EOutOfRange, // 0x0E46 sl@0: sl@0: EDepOther, // 0x0E47 sl@0: EDepOther, // 0x0E48 sl@0: EDepOther, // 0x0E49 sl@0: EDepOther, // 0x0E4A sl@0: EDepOther, // 0x0E4B sl@0: EDepOther, // 0x0E4C sl@0: EDepOther, // 0x0E4D sl@0: EDepOther, // 0x0E4E sl@0: EOutOfRange, // 0x0E4F sl@0: sl@0: EOutOfRange, // 0x0E50 sl@0: EOutOfRange, // 0x0E51 sl@0: EOutOfRange, // 0x0E52 sl@0: EOutOfRange, // 0x0E53 sl@0: EOutOfRange, // 0x0E54 sl@0: EOutOfRange, // 0x0E55 sl@0: EOutOfRange, // 0x0E56 sl@0: EOutOfRange, // 0x0E57 sl@0: EOutOfRange, // 0x0E58 sl@0: EOutOfRange, // 0x0E59 sl@0: EOutOfRange, // 0x0E5A sl@0: EOutOfRange // 0x0E5B sl@0: }; sl@0: sl@0: const TUint32 KNormalBreaksBeforeCons = sl@0: ThaiLinebreakRules::KPreVowelFlag sl@0: | ThaiLinebreakRules::KConsOtherFlag sl@0: | ThaiLinebreakRules::KConsDigraphFlag sl@0: | ThaiLinebreakRules::KConsHoHipFlag sl@0: | ThaiLinebreakRules::KOutOfRangeFlag; sl@0: const TUint32 KNormalBreaksBeforePostVowel = sl@0: ThaiLinebreakRules::KPreVowelFlag sl@0: | ThaiLinebreakRules::KPostVowelFlags sl@0: | ThaiLinebreakRules::KConsFlags sl@0: | ThaiLinebreakRules::KOutOfRangeFlag; sl@0: const TUint32 ThaiLinebreakRules::KRules[EMaxClassification] = sl@0: { sl@0: /* Prev Char EOutOfRange */ sl@0: KAllFlags - KOutOfRangeFlag, sl@0: /* Prev Char EConsOAng */ sl@0: KNormalBreaksBeforeCons, sl@0: /* Prev Char EConsYoYak */ sl@0: KNormalBreaksBeforeCons, sl@0: /* Prev Char EConsHoHip */ sl@0: KNormalBreaksBeforeCons - KConsDigraphFlag, sl@0: /* Prev Char EConsWoWean */ sl@0: KNormalBreaksBeforeCons - KConsDigraphFlag - KConsHoHipFlag, sl@0: /* Prev Char EConsDigraph */ sl@0: KNormalBreaksBeforeCons, sl@0: /* Prev Char EConsOther */ sl@0: KNormalBreaksBeforeCons, sl@0: /* Prev Char EPostVowelA */ sl@0: KNormalBreaksBeforePostVowel, sl@0: /* Prev Char EPostVowelAA */ sl@0: KNormalBreaksBeforePostVowel - KPostVowelAFlag, sl@0: /* Prev Char EPostVowelOther */ sl@0: KNormalBreaksBeforePostVowel, sl@0: /* Prev Char EPreVowel */ sl@0: KPreVowelFlag | KPostVowelFlags | KOutOfRangeFlag, sl@0: /* Prev Char EDepMaiHanAkat */ sl@0: KSpecialDepFlags | KPreVowelFlag | KPostVowelAAFlag sl@0: | KPostVowelAFlag | KOutOfRangeFlag, sl@0: /* Prev Char EDepSaraI */ sl@0: KSpecialDepFlags | KPreVowelFlag | KPostVowelAAFlag sl@0: | KPostVowelAFlag | KOutOfRangeFlag, sl@0: /* Prev Char EDepOther */ sl@0: KSpecialDepFlags | KPreVowelFlag | KPostVowelAAFlag sl@0: | KPostVowelAFlag | KConsOtherFlag | KConsDigraphFlag sl@0: | KConsWoWeanFlag | KConsHoHipFlag | KOutOfRangeFlag sl@0: }; sl@0: sl@0: ThaiLinebreakRules::TCharClassification ThaiLinebreakRules::Class(TInt a) sl@0: { sl@0: return static_cast( sl@0: (KThaiCodePageStart <= a && a < KThaiCodePageEnd) ? sl@0: KCharClassifications[a - KThaiCodePageStart] : sl@0: EOutOfRange); sl@0: } sl@0: sl@0: TBool ThaiLinebreakRules::IsCombiningChar(TInt aChar, sl@0: const MLineBreaker& aBreaker) sl@0: { sl@0: TUint dummy1, dummy2; sl@0: return aBreaker.LineBreakClass(aChar, dummy1, dummy2) == MLineBreaker::ECmLineBreakClass; sl@0: } sl@0: sl@0: ThaiLinebreakRules::TCharClassification ThaiLinebreakRules::DerivedClass( sl@0: const TText* aChar, const TText* aStart, const MLineBreaker& aBreaker) sl@0: { sl@0: ThaiLinebreakRules::TCharClassification c = Class(*aChar); sl@0: while (c == EOutOfRange && aChar != aStart sl@0: && IsCombiningChar(*aChar, aBreaker)) sl@0: { sl@0: --aChar; sl@0: c = Class(*aChar); sl@0: } sl@0: return c; sl@0: } sl@0: sl@0: TBool ThaiLinebreakRules::BreakAllowedBetween( sl@0: ThaiLinebreakRules::TCharClassification aClass, sl@0: ThaiLinebreakRules::TCharClassification aNextClass) sl@0: { sl@0: return KRules[aClass] & (1 << aNextClass); sl@0: } sl@0: sl@0: TBool ThaiLinebreakRules::BreakAllowedAt(const TText* aChar, sl@0: const TText* aStart, const MLineBreaker& aBreaker) sl@0: { sl@0: __ASSERT_DEBUG(aStart < aChar, User::Invariant()); sl@0: TCharClassification c = Class(*aChar); sl@0: if (c == EOutOfRange && IsCombiningChar(*aChar, aBreaker)) sl@0: return EFalse; sl@0: return BreakAllowedBetween(DerivedClass(aChar - 1, aStart, aBreaker), c); sl@0: } sl@0: sl@0: /** sl@0: Returns whether a line break is possible within a run of characters all having sl@0: the class ESaLineBreakClass (Complex content). Languages with Unicocde sl@0: characters having such a class include: Thai, Lao, Myanmar and Khmer. This sl@0: default implementation of the GetLineBreakInContext() method only supports the sl@0: Thai script. Breaks are determined in Thai based on a simple understanding of sl@0: syllable boundaries. When characters from the other unsupported Sa class sl@0: languages are found the method exits with EFalse. sl@0: @param aText sl@0: The text to be searched, which is a contiguous run of characters of class SA sl@0: (or CM attatched to SA). The break position may be restricted further by sl@0: aMinBreakPos and aMaxBreakPos, but more text is provided for extra context sl@0: should it be needed. sl@0: @param aMinBreakPos sl@0: The start of the text to be considered for line breaks. sl@0: @param aMaxBreakPos sl@0: The end of the text to be considered for line breaks. sl@0: @param aForwards sl@0: ETrue if aBreakPos is to be set with the first legal break position, sl@0: EFalse if aBreakPos is to be set with the last legal break position. sl@0: @param aBreakPos sl@0: If break position found on exit its value is >= Min and <= Max sl@0: positions supplied. sl@0: @return TBool sl@0: ETrue if and only if a legal break was found, EFalse otherwise. sl@0: @publishedAll sl@0: @released sl@0: */ sl@0: EXPORT_C TBool MLineBreaker::GetLineBreakInContext(const TDesC16& aText, sl@0: TInt aMinBreakPos, TInt aMaxBreakPos, TBool aForwards, sl@0: TInt& aBreakPos) const sl@0: { sl@0: __ASSERT_DEBUG (0 <= aMinBreakPos && aMaxBreakPos <= aText.Length(), sl@0: Panic(ELineBreakPanic_InvalidInputParam)); sl@0: sl@0: TInt length = aText.Length(); sl@0: sl@0: if (aMinBreakPos < 1) sl@0: aMinBreakPos = 1; sl@0: if (length - 1 < aMaxBreakPos) sl@0: aMaxBreakPos = length - 1; sl@0: if (aMaxBreakPos < aMinBreakPos) sl@0: return EFalse; sl@0: sl@0: const TText16* text = aText.Ptr(); sl@0: sl@0: if (*text == KZeroWidthSpace) sl@0: { sl@0: aBreakPos = aMinBreakPos; sl@0: return ETrue; sl@0: } sl@0: else if (*(text+length-1) == KZeroWidthSpace) sl@0: return EFalse; sl@0: sl@0: TInt start = aForwards? aMinBreakPos : aMaxBreakPos; sl@0: TInt end = aForwards? aMaxBreakPos + 1 : aMinBreakPos - 1; sl@0: TInt direction = aForwards? 1 : -1; sl@0: for (TInt i = start; i != end; i += direction) sl@0: { sl@0: if (ThaiLinebreakRules::BreakAllowedAt(text + i, text, *this)) sl@0: { sl@0: aBreakPos = i; sl@0: return ETrue; sl@0: } sl@0: } sl@0: return EFalse; sl@0: }