os/textandloc/fontservices/textbase/sgdi/LinebreakComplex.cpp
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/textandloc/fontservices/textbase/sgdi/LinebreakComplex.cpp	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,388 @@
     1.4 +// Copyright (c) 2003-2010 Nokia Corporation and/or its subsidiary(-ies).
     1.5 +// All rights reserved.
     1.6 +// This component and the accompanying materials are made available
     1.7 +// under the terms of "Eclipse Public License v1.0"
     1.8 +// which accompanies this distribution, and is available
     1.9 +// at the URL "http://www.eclipse.org/legal/epl-v10.html".
    1.10 +//
    1.11 +// Initial Contributors:
    1.12 +// Nokia Corporation - initial contribution.
    1.13 +//
    1.14 +// Contributors:
    1.15 +//
    1.16 +// Description:
    1.17 +//
    1.18 +
    1.19 +
    1.20 +#include <e32std.h>
    1.21 +#include <e32svr.h>
    1.22 +#include "LineBreak.h"
    1.23 +#include "LineBreakImp.h"
    1.24 +#include "GlyphSel.h"
    1.25 +
    1.26 +const TText16 KThaiCodePageStart = 0x0E00;
    1.27 +const TText16 KThaiCodePageEnd   = 0x0E5C;
    1.28 +const TUint KNumThaiCharacters = KThaiCodePageEnd - KThaiCodePageStart;
    1.29 +
    1.30 +/**
    1.31 + Ecanpsulates rules for when Thai character sequence line breaking.
    1.32 +@internalComponent
    1.33 +*/
    1.34 +class ThaiLinebreakRules
    1.35 +	{
    1.36 +public:
    1.37 +	enum TCharClassification
    1.38 +		{
    1.39 +		EOutOfRange,
    1.40 +
    1.41 +		EConsOAng,
    1.42 +		EConsYoYak,
    1.43 +		EConsHoHip,
    1.44 +		EConsWoWean,
    1.45 +		EConsDigraph,
    1.46 +		EConsOther,
    1.47 +
    1.48 +		EPostVowelA,
    1.49 +		EPostVowelAA,
    1.50 +		EPostVowelOther,
    1.51 +		EPreVowel,
    1.52 +
    1.53 +		EDepMaiHanAkat,
    1.54 +		EDepSaraI,
    1.55 +		EDepOther,
    1.56 +
    1.57 +		// marker for end
    1.58 +		EMaxClassification
    1.59 +		};
    1.60 +	enum
    1.61 +		{
    1.62 +		KOutOfRangeFlag = 1 << EOutOfRange,
    1.63 +		KConsOAngFlag = 1 << EConsOAng,
    1.64 +		KConsYoYakFlag = 1 << EConsYoYak,
    1.65 +		KConsHoHipFlag = 1 << EConsHoHip,
    1.66 +		KConsWoWeanFlag = 1 << EConsWoWean,
    1.67 +		KConsDigraphFlag = 1 << EConsDigraph,
    1.68 +		KConsOtherFlag = 1 << EConsOther,
    1.69 +		KPostVowelAFlag = 1 << EPostVowelA,
    1.70 +		KPostVowelAAFlag = 1 << EPostVowelAA,
    1.71 +		KPostVowelOtherFlag = 1 << EPostVowelOther,
    1.72 +		KPreVowelFlag = 1 << EPreVowel,
    1.73 +		KDepMaiHanAkatFlag = 1 << EDepMaiHanAkat,
    1.74 +		KDepSaraIFlag = 1 << EDepSaraI,
    1.75 +		KDepOtherFlag = 1 << EDepOther,
    1.76 +
    1.77 +		KSpecialDepFlags = KDepMaiHanAkatFlag | KDepSaraIFlag,
    1.78 +		KPostVowelFlags = KPostVowelAFlag | KPostVowelAAFlag | KPostVowelOtherFlag,
    1.79 +		KConsFlags = KConsOtherFlag | KConsDigraphFlag | KConsWoWeanFlag
    1.80 +			| KConsHoHipFlag | KConsYoYakFlag | KConsOAngFlag,
    1.81 +		KAllFlags = KOutOfRangeFlag | KConsOAngFlag | KConsYoYakFlag
    1.82 +			| KConsHoHipFlag | KConsWoWeanFlag | KConsDigraphFlag
    1.83 +			| KConsOtherFlag | KPostVowelAFlag | KPostVowelAAFlag
    1.84 +			| KPostVowelOtherFlag | KPreVowelFlag | KDepMaiHanAkatFlag
    1.85 +			| KDepSaraIFlag | KDepOtherFlag
    1.86 +		};
    1.87 +
    1.88 +	/** Returns the Thai linebreaking class of the character. */
    1.89 +	static TCharClassification Class(TInt aChar);
    1.90 +	/** Returns true if aChar is a combining character according to aBreaker. */
    1.91 +	static TBool IsCombiningChar(TInt aChar, const MLineBreaker& aBreaker);
    1.92 +	/** Returns the Thai linebreaking class of the character at (*aChar),
    1.93 +	searching backwards for the base character if it is foreign and of type CM.
    1.94 +	@param aChar The position of the character within the string.
    1.95 +	@param aStart The start of the string.
    1.96 +	@param aBreaker The line breaker to query for line breaking class.
    1.97 +	*/
    1.98 +	static TCharClassification DerivedClass(const TText* aChar,
    1.99 +		const TText* aStart, const MLineBreaker& aBreaker);
   1.100 +	/**
   1.101 +	 Gets the line break rule for the previous and current character pair
   1.102 +	@param aPrevClass Class of the previous character.
   1.103 +	@param aClass Class of the current character.
   1.104 +	@return the rule code corresponding to the input pair.
   1.105 +	*/
   1.106 +	static TBool BreakAllowedBetween(
   1.107 +		TCharClassification aPrevClass, TCharClassification aClass);
   1.108 +	/** Returns whether a line break is allowed before the SA character at
   1.109 +	(*aChar).
   1.110 +	@param aChar The position of the character within the string.
   1.111 +	@param aStart The start of the string.
   1.112 +	@param aBreaker The line breaker to query for line breaking class.
   1.113 +	*/
   1.114 +	static TBool BreakAllowedAt(const TText* aChar,
   1.115 +		const TText* aStart, const MLineBreaker& aBreaker);
   1.116 +
   1.117 +	/**
   1.118 +	 Classification of each character
   1.119 +	*/
   1.120 +	static const TUint8 KCharClassifications[KNumThaiCharacters];
   1.121 +
   1.122 +	/**
   1.123 +	 Rules table of prev to next character
   1.124 +	*/
   1.125 +	static const TUint32 KRules[EMaxClassification];
   1.126 +	};
   1.127 +
   1.128 +const TUint8 ThaiLinebreakRules::KCharClassifications[KNumThaiCharacters] = 
   1.129 +	{
   1.130 +	EOutOfRange,	// 0x0E00
   1.131 +	EConsOther,		// 0x0E01
   1.132 +	EConsOther,		// 0x0E02
   1.133 +	EConsOther,		// 0x0E03
   1.134 +	EConsOther,		// 0x0E04
   1.135 +	EConsOther,		// 0x0E05
   1.136 +	EConsOther,		// 0x0E06
   1.137 +	EConsDigraph,	// 0x0E07
   1.138 +	EConsOther,		// 0x0E08
   1.139 +	EConsOther,		// 0x0E09
   1.140 +	EConsOther,		// 0x0E0A
   1.141 +	EConsOther,		// 0x0E0B
   1.142 +	EConsOther,		// 0x0E0C
   1.143 +	EConsOther,		// 0x0E0D
   1.144 +	EConsOther,		// 0x0E0E
   1.145 +	EConsOther,		// 0x0E0F
   1.146 +
   1.147 +	EConsOther,		// 0x0E10
   1.148 +	EConsOther,		// 0x0E11
   1.149 +	EConsOther,		// 0x0E12
   1.150 +	EConsOther,		// 0x0E13
   1.151 +	EConsOther,		// 0x0E14
   1.152 +	EConsOther,		// 0x0E15
   1.153 +	EConsOther,		// 0x0E16
   1.154 +	EConsOther,		// 0x0E17
   1.155 +	EConsOther,		// 0x0E18
   1.156 +	EConsDigraph,	// 0x0E19
   1.157 +	EConsOther,		// 0x0E1A
   1.158 +	EConsOther,		// 0x0E1B
   1.159 +	EConsOther,		// 0x0E1C
   1.160 +	EConsOther,		// 0x0E1D
   1.161 +	EConsOther,		// 0x0E1E
   1.162 +	EConsOther,		// 0x0E1F
   1.163 +
   1.164 +	EConsOther,		// 0x0E20
   1.165 +	EConsDigraph,	// 0x0E21
   1.166 +	EConsYoYak,		// 0x0E22
   1.167 +	EConsDigraph,	// 0x0E23
   1.168 +	EConsOther,		// 0x0E24
   1.169 +	EConsDigraph,	// 0x0E25
   1.170 +	EConsOther,		// 0x0E26
   1.171 +	EConsWoWean,	// 0x0E27
   1.172 +	EConsOther,		// 0x0E28
   1.173 +	EConsOther,		// 0x0E29
   1.174 +	EConsOther,		// 0x0E2A
   1.175 +	EConsHoHip,		// 0x0E2B
   1.176 +	EConsOther,		// 0x0E2C
   1.177 +	EConsOAng,		// 0x0E2D
   1.178 +	EConsOther,		// 0x0E2E
   1.179 +	EOutOfRange,	// 0x0E2F
   1.180 +
   1.181 +	EPostVowelA,	// 0x0E30
   1.182 +	EDepMaiHanAkat,	// 0x0E31
   1.183 +	EPostVowelAA,	// 0x0E32
   1.184 +	EPostVowelOther,// 0x0E33
   1.185 +
   1.186 +	EDepSaraI,		// 0x0E34
   1.187 +	EDepOther,		// 0x0E35
   1.188 +	EDepOther,		// 0x0E36
   1.189 +	EDepOther,		// 0x0E37
   1.190 +	EDepOther,		// 0x0E38
   1.191 +	EDepOther,		// 0x0E39
   1.192 +	EDepOther,		// 0x0E3A
   1.193 +
   1.194 +	EOutOfRange,	// 0x0E3B
   1.195 +	EOutOfRange,	// 0x0E3C
   1.196 +	EOutOfRange,	// 0x0E3D
   1.197 +	EOutOfRange,	// 0x0E3E
   1.198 +	EOutOfRange,	// 0x0E3F
   1.199 +
   1.200 +	EPreVowel,		// 0x0E40
   1.201 +	EPreVowel,		// 0x0E41
   1.202 +	EPreVowel,		// 0x0E42
   1.203 +	EPreVowel,		// 0x0E43
   1.204 +	EPreVowel,		// 0x0E44
   1.205 +
   1.206 +	EPostVowelOther,// 0x0E45
   1.207 +	EOutOfRange,	// 0x0E46
   1.208 +
   1.209 +	EDepOther,		// 0x0E47
   1.210 +	EDepOther,		// 0x0E48
   1.211 +	EDepOther,		// 0x0E49
   1.212 +	EDepOther,		// 0x0E4A
   1.213 +	EDepOther,		// 0x0E4B
   1.214 +	EDepOther,		// 0x0E4C
   1.215 +	EDepOther,		// 0x0E4D
   1.216 +	EDepOther,		// 0x0E4E
   1.217 +	EOutOfRange,	// 0x0E4F
   1.218 +
   1.219 +	EOutOfRange,		// 0x0E50
   1.220 +	EOutOfRange,		// 0x0E51
   1.221 +	EOutOfRange,		// 0x0E52
   1.222 +	EOutOfRange,		// 0x0E53
   1.223 +	EOutOfRange,		// 0x0E54
   1.224 +	EOutOfRange,		// 0x0E55
   1.225 +	EOutOfRange,		// 0x0E56
   1.226 +	EOutOfRange,		// 0x0E57
   1.227 +	EOutOfRange,		// 0x0E58
   1.228 +	EOutOfRange,		// 0x0E59
   1.229 +	EOutOfRange,		// 0x0E5A
   1.230 +	EOutOfRange			// 0x0E5B
   1.231 +	};
   1.232 +
   1.233 +const TUint32 KNormalBreaksBeforeCons =
   1.234 +	ThaiLinebreakRules::KPreVowelFlag
   1.235 +	| ThaiLinebreakRules::KConsOtherFlag
   1.236 +	| ThaiLinebreakRules::KConsDigraphFlag
   1.237 +	| ThaiLinebreakRules::KConsHoHipFlag
   1.238 +	| ThaiLinebreakRules::KOutOfRangeFlag;
   1.239 +const TUint32 KNormalBreaksBeforePostVowel =
   1.240 +	ThaiLinebreakRules::KPreVowelFlag
   1.241 +	| ThaiLinebreakRules::KPostVowelFlags
   1.242 +	| ThaiLinebreakRules::KConsFlags
   1.243 +	| ThaiLinebreakRules::KOutOfRangeFlag;
   1.244 +const TUint32 ThaiLinebreakRules::KRules[EMaxClassification] =
   1.245 +	{
   1.246 +	/* Prev Char EOutOfRange */
   1.247 +	KAllFlags - KOutOfRangeFlag,
   1.248 +	/* Prev Char EConsOAng */
   1.249 +	KNormalBreaksBeforeCons,
   1.250 +	/* Prev Char EConsYoYak */
   1.251 +	KNormalBreaksBeforeCons,
   1.252 +	/* Prev Char EConsHoHip */
   1.253 +	KNormalBreaksBeforeCons - KConsDigraphFlag,
   1.254 +	/* Prev Char EConsWoWean */
   1.255 +	KNormalBreaksBeforeCons - KConsDigraphFlag - KConsHoHipFlag,
   1.256 +	/* Prev Char EConsDigraph */
   1.257 +	KNormalBreaksBeforeCons,
   1.258 +	/* Prev Char EConsOther */
   1.259 +	KNormalBreaksBeforeCons,
   1.260 +	/* Prev Char EPostVowelA */
   1.261 +	KNormalBreaksBeforePostVowel,
   1.262 +	/* Prev Char EPostVowelAA */
   1.263 +	KNormalBreaksBeforePostVowel - KPostVowelAFlag,
   1.264 +	/* Prev Char EPostVowelOther */
   1.265 +	KNormalBreaksBeforePostVowel,
   1.266 +	/* Prev Char EPreVowel */
   1.267 +	KPreVowelFlag | KPostVowelFlags | KOutOfRangeFlag,
   1.268 +	/* Prev Char EDepMaiHanAkat */
   1.269 +	KSpecialDepFlags | KPreVowelFlag | KPostVowelAAFlag
   1.270 +		| KPostVowelAFlag | KOutOfRangeFlag,
   1.271 +	/* Prev Char EDepSaraI */
   1.272 +	KSpecialDepFlags | KPreVowelFlag | KPostVowelAAFlag
   1.273 +		| KPostVowelAFlag | KOutOfRangeFlag,
   1.274 +	/* Prev Char EDepOther */
   1.275 +	KSpecialDepFlags | KPreVowelFlag | KPostVowelAAFlag
   1.276 +		| KPostVowelAFlag | KConsOtherFlag | KConsDigraphFlag
   1.277 +		| KConsWoWeanFlag | KConsHoHipFlag | KOutOfRangeFlag
   1.278 +	};
   1.279 +
   1.280 +ThaiLinebreakRules::TCharClassification ThaiLinebreakRules::Class(TInt a)
   1.281 +    {
   1.282 +	return static_cast<TCharClassification>(
   1.283 +		(KThaiCodePageStart <= a && a < KThaiCodePageEnd) ?
   1.284 +			KCharClassifications[a - KThaiCodePageStart] :
   1.285 +			EOutOfRange);
   1.286 +    }
   1.287 +
   1.288 +TBool ThaiLinebreakRules::IsCombiningChar(TInt aChar,
   1.289 +	const MLineBreaker& aBreaker)
   1.290 +	{
   1.291 +	TUint dummy1, dummy2;
   1.292 +	return aBreaker.LineBreakClass(aChar, dummy1, dummy2) == MLineBreaker::ECmLineBreakClass;
   1.293 +	}
   1.294 +
   1.295 +ThaiLinebreakRules::TCharClassification ThaiLinebreakRules::DerivedClass(
   1.296 +	const TText* aChar, const TText* aStart, const MLineBreaker& aBreaker)
   1.297 +	{
   1.298 +	ThaiLinebreakRules::TCharClassification c = Class(*aChar);
   1.299 +	while (c == EOutOfRange && aChar != aStart
   1.300 +		&& IsCombiningChar(*aChar, aBreaker))
   1.301 +		{
   1.302 +		--aChar;
   1.303 +		c = Class(*aChar);
   1.304 +		}
   1.305 +	return c;
   1.306 +	}
   1.307 +
   1.308 +TBool ThaiLinebreakRules::BreakAllowedBetween(
   1.309 +	ThaiLinebreakRules::TCharClassification aClass,
   1.310 +	ThaiLinebreakRules::TCharClassification aNextClass)
   1.311 +	{
   1.312 +	return KRules[aClass] & (1 << aNextClass);
   1.313 +	}
   1.314 +
   1.315 +TBool ThaiLinebreakRules::BreakAllowedAt(const TText* aChar,
   1.316 +	const TText* aStart, const MLineBreaker& aBreaker)
   1.317 +	{
   1.318 +	__ASSERT_DEBUG(aStart < aChar, User::Invariant());
   1.319 +	TCharClassification c = Class(*aChar);
   1.320 +	if (c == EOutOfRange && IsCombiningChar(*aChar, aBreaker))
   1.321 +		return EFalse;
   1.322 +	return BreakAllowedBetween(DerivedClass(aChar - 1, aStart, aBreaker), c);
   1.323 +	}
   1.324 +
   1.325 +/**
   1.326 + Returns whether a line break is possible within a run of characters all having
   1.327 + the class ESaLineBreakClass (Complex content). Languages with Unicocde
   1.328 + characters having such a class include: Thai, Lao, Myanmar and Khmer. This
   1.329 + default implementation of the GetLineBreakInContext() method only supports the
   1.330 + Thai script. Breaks are determined in Thai based on a simple understanding of
   1.331 + syllable boundaries. When characters from the other unsupported Sa class
   1.332 + languages are found the method exits with EFalse.
   1.333 +@param aText
   1.334 + The text to be searched, which is a contiguous run of characters of class SA
   1.335 + (or CM attatched to SA). The break position may be restricted further by
   1.336 + aMinBreakPos and aMaxBreakPos, but more text is provided for extra context
   1.337 + should it be needed.
   1.338 +@param aMinBreakPos
   1.339 + The start of the text to be considered for line breaks.
   1.340 +@param aMaxBreakPos
   1.341 + The end of the text to be considered for line breaks.
   1.342 +@param aForwards
   1.343 + ETrue if aBreakPos is to be set with the first legal break position,
   1.344 + EFalse if aBreakPos is to be set with the last legal break position.
   1.345 +@param aBreakPos
   1.346 + If break position found on exit its value is >= Min and <= Max 
   1.347 + positions supplied.
   1.348 +@return TBool
   1.349 + ETrue if and only if a legal break was found, EFalse otherwise.
   1.350 +@publishedAll
   1.351 +@released
   1.352 +*/
   1.353 +EXPORT_C TBool MLineBreaker::GetLineBreakInContext(const TDesC16& aText,
   1.354 +		TInt aMinBreakPos, TInt aMaxBreakPos, TBool aForwards,
   1.355 +		TInt& aBreakPos) const
   1.356 +	{
   1.357 +	__ASSERT_DEBUG (0 <= aMinBreakPos && aMaxBreakPos <= aText.Length(), 
   1.358 +			Panic(ELineBreakPanic_InvalidInputParam));
   1.359 +
   1.360 +	TInt length = aText.Length();
   1.361 +
   1.362 +	if (aMinBreakPos < 1)
   1.363 +		aMinBreakPos = 1;
   1.364 +	if (length - 1 < aMaxBreakPos)
   1.365 +		aMaxBreakPos = length - 1;
   1.366 +	if (aMaxBreakPos < aMinBreakPos)
   1.367 +		return EFalse;
   1.368 +
   1.369 +	const TText16* text = aText.Ptr();
   1.370 +
   1.371 +	if (*text == KZeroWidthSpace)
   1.372 +		{
   1.373 +		aBreakPos = aMinBreakPos;
   1.374 +		return ETrue;
   1.375 +		}
   1.376 +	else if (*(text+length-1) == KZeroWidthSpace)
   1.377 +		return EFalse;
   1.378 +	
   1.379 +	TInt start = aForwards? aMinBreakPos : aMaxBreakPos;
   1.380 +	TInt end = aForwards? aMaxBreakPos + 1 : aMinBreakPos - 1;
   1.381 +	TInt direction = aForwards? 1 : -1;
   1.382 +	for (TInt i = start; i != end; i += direction)
   1.383 +		{
   1.384 +		if (ThaiLinebreakRules::BreakAllowedAt(text + i, text, *this))
   1.385 +			{
   1.386 +			aBreakPos = i;
   1.387 +			return ETrue;
   1.388 +			}
   1.389 +		}
   1.390 +	return EFalse;
   1.391 +	}