sl@0: // Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0: // All rights reserved.
sl@0: // This component and the accompanying materials are made available
sl@0: // under the terms of "Eclipse Public License v1.0"
sl@0: // which accompanies this distribution, and is available
sl@0: // at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0: //
sl@0: // Initial Contributors:
sl@0: // Nokia Corporation - initial contribution.
sl@0: //
sl@0: // Contributors:
sl@0: //
sl@0: // Description:
sl@0: //
sl@0: 
sl@0: #include <uriutils.h>
sl@0: #include <uriutilscommon.h>
sl@0: #include "UriUtilsInternal.h"
sl@0: #include <escapeutils.h>
sl@0: 
sl@0: _LIT8(KDot, ".");
sl@0: _LIT8(KDotDot, "..");
sl@0: _LIT8(KDotSlash, "./");
sl@0: _LIT8(KDotDotSlash, "../");
sl@0: _LIT8(KSlash, "/");	
sl@0: _LIT8(KSlashDot, "/.");
sl@0: _LIT8(KSlashDotDot, "/..");
sl@0: _LIT8(KSlashDotSlash, "/./");
sl@0: _LIT8(KSlashDotDotSlash, "/../");
sl@0: 
sl@0: _LIT(KHexDigit, "0123456789ABCDEF");
sl@0: _LIT(KUnreserved, "-.~_");
sl@0: #ifdef _DEBUG
sl@0: _LIT(KNormalisationUriPanicCategory, "URI-NORMALIZATION");
sl@0: #endif
sl@0: const TInt KEscapeIndicator					= '%';
sl@0: const TInt KEscapeTripleLength				= 3;
sl@0: const TInt KEscDelimiterPos					= 0;
sl@0: const TInt KMostSignificantNibblePos		= 1;
sl@0: const TInt KLeastSignificantNibblePos		= 2;
sl@0: const TInt KSubstringLength					= 3;
sl@0: const TInt KUpdateLength					= 2;
sl@0: const TInt KAttachLength					= 1;
sl@0: 
sl@0: const TInt KDotLength						= 1;
sl@0: const TInt KDotDotLength					= 2;
sl@0: const TInt KDotDotSlashLength				= 3;
sl@0: const TInt KSlashDotDotSlashLength			= 4;
sl@0: 
sl@0: //
sl@0: //
sl@0: // Implementation of UriUtils
sl@0: //
sl@0: //
sl@0: 
sl@0: /**
sl@0: 	Converts a 16-bit format uri into its internet form. Any Unicode characters 
sl@0: 	are converted into Utf8 representation and then any excluded characters are 
sl@0: 	escape encoded.  Reserved characters specified in RFC2396 will not be escape 
sl@0: 	encoded however, these include ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ",". 
sl@0: 	For example http://localhost will not be encoded to http%3A%2F%2Flocalhost.
sl@0: 	
sl@0: 	@since			6.0
sl@0: 	@deprecated	 Deprecated in 9.1
sl@0: 	@leave			KUriUtilsCannotConvert. When the input data cannot be converted.
sl@0: 	@leave			KUriUtilsErr16BitChar. When the input data has a 16-Bit character to be escape encoded.
sl@0: 	@param			aUri	The 16-bit format uri.
sl@0: 	@return			A pointer to a newly created 8-bit uri.
sl@0:  */
sl@0: EXPORT_C CUri8* UriUtils::ConvertToInternetFormL(const TUriC16& aUri)
sl@0: 	{
sl@0: 	// Need to convert to utf8
sl@0: 	HBufC8* utf8Buf = EscapeUtils::ConvertFromUnicodeToUtf8L(aUri.UriDes());
sl@0: 	CleanupStack::PushL(utf8Buf);
sl@0: 
sl@0: 	// Ok need to parse for the uri without the fragment
sl@0: 	TUriParser8 parser;
sl@0: 	parser.Parse(*utf8Buf);
sl@0: 	TPtrC8 uriNoFragment;
sl@0: 	parser.UriWithoutFragment(uriNoFragment);
sl@0: 
sl@0: 	// Now escape encode the uri without the fragment
sl@0: 	HBufC8* escapedBuf = EscapeUtils::EscapeEncodeL(uriNoFragment, EscapeUtils::EEscapeNormal);
sl@0: 	CleanupStack::PushL(escapedBuf);
sl@0: 
sl@0: 	// Now escape encode the fragment if there is one...
sl@0: 	HBufC8* escapedFragmentBuf = NULL;
sl@0: 	if( parser.IsPresent(EUriFragment) )
sl@0: 		{
sl@0: 		escapedFragmentBuf = EscapeUtils::EscapeEncodeL(parser.Extract(EUriFragment), EscapeUtils::EEscapeNormal);
sl@0: 		CleanupStack::PushL(escapedFragmentBuf);
sl@0: 		}
sl@0: 
sl@0: 	// Parse and then create the CUri8 object
sl@0: 	parser.Parse(*escapedBuf);
sl@0: 	CUri8* netForm = CUri8::NewL(parser);
sl@0: 
sl@0: 	// Set the fragment if there was one...
sl@0: 	if( escapedFragmentBuf != NULL )
sl@0: 		{
sl@0: 		CleanupStack::PushL(netForm);
sl@0: 		netForm->SetComponentL(*escapedFragmentBuf, EUriFragment);
sl@0: 		CleanupStack::Pop(netForm);
sl@0: 		CleanupStack::PopAndDestroy(escapedFragmentBuf);
sl@0: 		}
sl@0: 
sl@0: 	// Cleanup and return
sl@0: 	CleanupStack::PopAndDestroy(2, utf8Buf);	// utf8Buf, escapedBuf
sl@0: 	return netForm;
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	Converts an 8-bit format uri its into display form. Any escape tripes are decoded and 
sl@0: 	sets of Utf8 format characters are converted into Unicode.
sl@0: 	
sl@0: 	@since			6.0
sl@0: 	@deprecated	 Deprecated in 9.1	
sl@0: 	@leave			KUriUtilsCannotConvert. When the input data cannot be converted.
sl@0: 	@param			aUri	The 8-bit format uri.
sl@0: 	@return			A pointer to a newly created 16-bit uri.
sl@0:  */
sl@0: EXPORT_C CUri16* UriUtils::ConvertToDisplayFormL(const TUriC8& aUri)
sl@0: 	{
sl@0: 	// Need decode escape triples
sl@0: 	HBufC8* unescapedBuf = EscapeUtils::EscapeDecodeL(aUri.UriDes());
sl@0: 	CleanupStack::PushL(unescapedBuf);
sl@0: 
sl@0: 	// Now need to convert utf8 to unicode
sl@0: 	HBufC16* utf8Buf = EscapeUtils::ConvertToUnicodeFromUtf8L(*unescapedBuf);
sl@0: 	CleanupStack::PushL(utf8Buf);
sl@0: 
sl@0: 	// Parse and then create the CUri16 object
sl@0: 	TUriParser16 parser;
sl@0: 	parser.Parse(*utf8Buf);
sl@0: 	CUri16* displayForm = CUri16::NewL(parser);
sl@0: 
sl@0: 	// Cleanup and return
sl@0: 	CleanupStack::PopAndDestroy(2, unescapedBuf);	// unescapedBuf, utf8Buf
sl@0: 	return displayForm;
sl@0: 	}
sl@0: 	
sl@0: /** 
sl@0: 	Create a new CUri8 object from a Unicode descriptor.
sl@0: 	
sl@0: 	@param aUri a Unicode string containing the URI to parse.
sl@0: 	@return the new CUri8 object
sl@0: 	@leave EUriUtilsParserErrInvalidUri  if the descriptor is an invalid URI.
sl@0:  */
sl@0: EXPORT_C CUri8* UriUtils::CreateUriL(const TDesC& aUri) 
sl@0: 	{
sl@0: 	// convert to UTF8
sl@0: 	HBufC8* unsafe8 = EscapeUtils::ConvertFromUnicodeToUtf8L(aUri);
sl@0:      	CleanupStack::PushL(unsafe8);
sl@0: 	// escape encode only those characters that cannot be in a URI. assume all %hh are %encoded already
sl@0: 	HBufC8* uri8desc = EscapeUtils::ReEscapeEncodeL(*unsafe8);
sl@0: 	CleanupStack::PopAndDestroy(unsafe8);
sl@0:      	CleanupStack::PushL(uri8desc);
sl@0: 	TUriParser8 parser;
sl@0: 	// parse the descriptor into a URI, Leave if it cannot be parsed
sl@0: 	User::LeaveIfError( parser.Parse(*uri8desc) );
sl@0: 
sl@0: 	CUri8* uri8 = CUri8::NewL(parser);
sl@0: 	CleanupStack::PopAndDestroy(uri8desc);
sl@0: 	return uri8;
sl@0: 	}
sl@0: 
sl@0: /** 
sl@0: 	Create a new CAuthority8 object from a Unicode descriptor.
sl@0: 	
sl@0: 	@param aAuthority a Unicode string containing the Authority to parse.
sl@0: 	@return the new CAuthority8 object
sl@0: 	@leave EUriUtilsParserErrInvalidUri  if the descriptor is an invalid Authority.
sl@0:  */
sl@0: EXPORT_C CAuthority8* UriUtils::CreateAuthorityL(const TDesC& aAuthority) 
sl@0: 	{
sl@0: 	// convert to UTF8
sl@0: 	HBufC8* unsafe8 = EscapeUtils::ConvertFromUnicodeToUtf8L(aAuthority);
sl@0:      	CleanupStack::PushL(unsafe8);
sl@0: 	// escape encode only those characters that cannot be in the authority. assume all %s are %encoded already
sl@0: 	HBufC8* authority8desc = EscapeUtils::ReEscapeEncodeL(*unsafe8);
sl@0: 	CleanupStack::PopAndDestroy(unsafe8);
sl@0:      	CleanupStack::PushL(authority8desc);
sl@0: 	TAuthorityParser8 parser;
sl@0: 	// parse the descriptor into the authority, Leave if it cannot be parsed
sl@0: 	User::LeaveIfError( parser.Parse(*authority8desc) );
sl@0: 
sl@0: 	CAuthority8* authority8 = CAuthority8::NewL(parser);
sl@0: 	CleanupStack::PopAndDestroy(authority8desc);
sl@0: 	return authority8;
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	Checks a descriptor for excluded (invalid) characters. Excluded characters include all 
sl@0: 	control characters (values 0x00 to 0x1F and greater than 0x7F), space (0x20), delimiter 
sl@0: 	characters ('<', '>', '#', '%',	'"') and unwise characters ('{', '}', '|', '\', '^', '[', ']', '`').
sl@0: 	
sl@0: 	@since			6.0
sl@0: 	@param			aData	The descriptor to be checked.
sl@0: 	@return			A boolean value of ETrue if the descriptor contains invalid
sl@0: 	characters, otherwise EFalse.
sl@0:  */
sl@0: EXPORT_C TBool UriUtils::HasInvalidChars(const TDesC8& aData)
sl@0: 	{
sl@0: 	return CheckForExcludedChars(aData);
sl@0: 	}
sl@0: 	
sl@0: /**
sl@0: 	Checks a descriptor for excluded (invalid) characters. Excluded characters include all 
sl@0: 	control characters (values 0x00 to 0x1F and	greater than 0x7F), space (0x20), delimiter 
sl@0: 	characters ('<', '>', '#', '%','"') and unwise characters ('{', '}', '|', '\', '^', '[', ']', '`').
sl@0: 	
sl@0: 	@since			6.0
sl@0: 	@param			aData	The descriptor to be checked.
sl@0: 	@return			A boolean value of ETrue if the descriptor contains invalid
sl@0: 	characters, otherwise EFalse.
sl@0:  */
sl@0: EXPORT_C TBool UriUtils::HasInvalidChars(const TDesC16& aData)
sl@0: 	{
sl@0: 	return CheckForExcludedChars(aData);
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	Checks the supplied host for an IPv4, IPv6 or text format host
sl@0: 	
sl@0: 	@since			7.0
sl@0: 	@param			aHost	The descriptor containing the host to check
sl@0: 	@return			A TUriHostType enum of either EIPv6, EIPv4, EText or EUnknown
sl@0:  */
sl@0: EXPORT_C UriUtils::TUriHostType UriUtils::HostType(const TDesC8& aHost)
sl@0: 	{
sl@0: 	return CheckHostType(aHost);
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	Checks the supplied host for an IPv4, IPv6 or text format host
sl@0: 	
sl@0: 	@since			7.0
sl@0: 	@param			aHost	The descriptor containing the host to check
sl@0: 	@return			A TUriHostType enum of either EIPv6, EIPv4, EText or EUnknown
sl@0:  */
sl@0: EXPORT_C UriUtils::TUriHostType UriUtils::HostType(const TDesC16& aHost)
sl@0: 	{
sl@0: 	return CheckHostType(aHost);
sl@0: 	}
sl@0: 
sl@0: 
sl@0: 
sl@0: //
sl@0: //
sl@0: // Implementation of component internal functions
sl@0: //
sl@0: //
sl@0: 
sl@0: /**
sl@0: 	@internalComponent
sl@0: 
sl@0: 	Checks whether the given scheme is a network scheme or not
sl@0: 	
sl@0: 	@param aScheme The descriptor with the scheme.
sl@0: 	@return A boolean value of EFalse if the scheme is SIP.	For all other schemes returns ETrue.
sl@0:  */
sl@0: TBool IsNetworkScheme(const TDesC8& aScheme)
sl@0: 	{
sl@0: 	TUriSchemeType scheme = SchemeType(aScheme);
sl@0: 	if (scheme == ESchemeTypeSip)
sl@0: 		{
sl@0: 		return EFalse;
sl@0: 		}
sl@0: 	return ETrue;
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	@internalComponent
sl@0: 
sl@0: 	Checks whether the given scheme is a network scheme or not
sl@0: 	
sl@0: 	@param aScheme The descriptor with the scheme.
sl@0: 	@return A boolean value of EFalse if the scheme is SIP.	For all other schemes returns ETrue.
sl@0:  */
sl@0: TBool IsNetworkScheme(const TDesC16& aScheme)
sl@0: 	{
sl@0: 	TUriSchemeType scheme = SchemeType(aScheme);
sl@0: 	if (scheme == ESchemeTypeSip)
sl@0: 		{
sl@0: 		return EFalse;
sl@0: 		}
sl@0: 	return ETrue;
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	@internalComponent
sl@0: 
sl@0: 	Returns the type of the URIs scheme
sl@0: 	
sl@0: 	@param			aScheme	The descriptor with the scheme.
sl@0: 	@return			The scheme type
sl@0:  */
sl@0: TUriSchemeType SchemeType(const TDesC8& aScheme)
sl@0: 	{
sl@0: 	// Compares the scheme with both sip and sips
sl@0: 	if (aScheme.CompareF(KSipScheme8()) == 0 || aScheme.CompareF(KSipsScheme8()) == 0)
sl@0: 		{
sl@0: 		// there's a match so this is a sip scheme
sl@0: 		return ESchemeTypeSip;
sl@0: 		}
sl@0: 	//Compares the scheme with tel 
sl@0: 	else if (aScheme.CompareF(KTelScheme8()) == 0) 
sl@0: 		{
sl@0: 		return ESchemeTypeTel;
sl@0: 		} 
sl@0: 
sl@0: 	return ESchemeTypeUnknown;
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	@internalComponent
sl@0: 
sl@0: 	Returns the type of the URIs scheme
sl@0: 	
sl@0: 	@param			aScheme	The descriptor with the scheme.
sl@0: 	@return			The scheme type
sl@0:  */
sl@0: TUriSchemeType SchemeType(const TDesC16& aScheme)
sl@0: 	{
sl@0: 	// Compares the scheme with both sip and sips
sl@0: 	if (aScheme.CompareF(KSipScheme()) == 0 || aScheme.CompareF(KSipsScheme()) == 0)
sl@0: 		{
sl@0: 		// there's a match so this is a sip scheme
sl@0: 		return ESchemeTypeSip;
sl@0: 		}
sl@0: 
sl@0: 	return ESchemeTypeUnknown;
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	@internalComponent
sl@0: 
sl@0: 	Checks that a text host is in a valid form
sl@0: 	
sl@0: 	@param			aHost	The descriptor containing the host to check
sl@0: 	@return			ETrue if the host is valid otherwise EFalse
sl@0:  */
sl@0: TBool IsTextHostValid(const TDesC8& aHost)
sl@0: 	{
sl@0: 	return CheckValidTextHost(aHost);
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	@internalComponent
sl@0: 
sl@0: 	Checks that a text host is in a valid form
sl@0: 	
sl@0: 	@param			aHost	The descriptor containing the host to check
sl@0: 	@return			ETrue if the host is valid otherwise EFalse
sl@0:  */
sl@0: TBool IsTextHostValid(const TDesC16& aHost)
sl@0: 	{
sl@0: 	return CheckValidTextHost(aHost);
sl@0: 	}
sl@0: 
sl@0: 
sl@0: /**
sl@0: 	@internalComponent
sl@0: 
sl@0: 	Parses a segment of the form name=value and returns the name and value parts
sl@0: 	
sl@0: 	@param			aSegment	the name-value segemnt to parse
sl@0: 	@param			aName		the name part that is returned
sl@0: 	@param			aValue		the value part that is returned
sl@0:  */
sl@0: void GetNameValuePair(const TDesC8& aSegment, TPtrC8& aName, TPtrC8& aValue)
sl@0: 	{
sl@0: 	TPtrC8 value;
sl@0: 	TInt sepPos = aSegment.Locate(KEqualsSeparator);
sl@0: 	if (sepPos != KErrNotFound)
sl@0: 		{
sl@0: 		aName.Set(aSegment.Left(sepPos));
sl@0: 		value.Set(aSegment.Mid(sepPos+1));
sl@0: 		}
sl@0: 	else
sl@0: 		{
sl@0: 		aName.Set(aSegment);
sl@0: 		}
sl@0: 
sl@0: 	aValue.Set(value);
sl@0: 	}
sl@0: 
sl@0: 
sl@0: //
sl@0: //
sl@0: // Implementation of LOCAL functions
sl@0: //
sl@0: //
sl@0: 
sl@0: /**
sl@0: 	Checks the descriptor for any excluded characters. These are characters that 
sl@0: 	should have been escaped encoded or ocnverted to Utf8 from Unicode.
sl@0: 						
sl@0: 	@since			6.0
sl@0: 	@param			aData	The descriptor to be checked.
sl@0: 	@return		A boolean value of ETrue if the descriptor contains excluded
sl@0: 				characters, EFalse if it does not.
sl@0:  */
sl@0: template<class TDesCType>
sl@0: LOCAL_C TBool CheckForExcludedChars(const TDesCType& aData)
sl@0: 	{
sl@0: 	// Run through the descriptor
sl@0: 	TBool valid = ETrue;
sl@0: 	const TInt length = aData.Length();
sl@0: 	TInt i=0;
sl@0: 	while( valid && i<length )
sl@0: 		{
sl@0: 		TInt notUsed;
sl@0: 		// See if the character is an excluded one, or is part of an escape triple...
sl@0: 		if( EscapeUtils::IsExcludedChar(aData[i]) && !EscapeUtils::IsEscapeTriple(aData.Mid(i), notUsed) )
sl@0: 			{
sl@0: 			valid = EFalse;
sl@0: 			}
sl@0: 		else
sl@0: 			{
sl@0: 			++i;
sl@0: 			}
sl@0: 		}
sl@0: 	return !valid;
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	Checks the supplied host for an IPv4, IPv6 or text format host
sl@0: 	
sl@0: 	@since			7.0
sl@0: 	@param			aHost	The descriptor containing the host to check
sl@0: 	@return			A TUriHostType enum of either EIPv6, EIPv4, EText or EUnknown
sl@0:  */
sl@0: template<class TDesCType>
sl@0: LOCAL_C UriUtils::TUriHostType CheckHostType(const TDesCType& aHost)
sl@0: 	{
sl@0: 	UriUtils::TUriHostType hostType;
sl@0: 
sl@0: 	TInt dotCount=0;
sl@0: 	TBool colonPresent=EFalse;
sl@0: 	TBool numeric=ETrue;
sl@0: 
sl@0: 	TInt len = aHost.Length();
sl@0: 	for (TInt ii=0; ii < len && !colonPresent; ++ii)
sl@0: 		{
sl@0: 		TChar ch(aHost[ii]);
sl@0: 
sl@0: 		// host contains a character that is not '0'..'9' or '.'
sl@0: 		if ((ch < 48 || ch > 57) && ch != 46)
sl@0: 			numeric=EFalse;
sl@0: 
sl@0: 		// need to check that IPv4 address has the 3 dots
sl@0: 		if (ch == 46)
sl@0: 			++dotCount;
sl@0: 		else
sl@0: 			if (ch == 58)
sl@0: 				colonPresent=ETrue;
sl@0: 		}
sl@0: 
sl@0: 	if (colonPresent) // if theres a colon, it has to be an IPv6 address
sl@0: 		hostType = UriUtils::EIPv6Host;
sl@0: 	else
sl@0: 		if (numeric  && (dotCount==3)) // if its numeric only, and has three seperators...
sl@0: 			hostType = UriUtils::EIPv4Host;
sl@0: 		else
sl@0: 			hostType = UriUtils::ETextHost;
sl@0: 
sl@0: 	return hostType;
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	@internalComponent
sl@0: 
sl@0: 	Checks that a text host is in a valid form
sl@0: 	
sl@0: 	@param			aHost	The descriptor containing the host to check
sl@0: 	@return			ETrue if the host is valid otherwise EFalse
sl@0:  */
sl@0: template<class TDesCType>
sl@0: LOCAL_C TBool CheckValidTextHost(const TDesCType& aHost)
sl@0: 	{
sl@0: 	TInt len = aHost.Length();
sl@0: 	if (len == 0)
sl@0: 		return EFalse;
sl@0: 
sl@0: 	// host name can't start with a dot or dash
sl@0: 	TChar firstChar(aHost[0]);
sl@0: 	if (firstChar == '-' || firstChar == '.')
sl@0: 		return EFalse;
sl@0: 
sl@0: 	TChar prev = '\0';
sl@0: 	TInt ii;
sl@0: 	for (ii=0; ii < len; ii++)
sl@0: 		{
sl@0: 		TChar ch(aHost[ii]);
sl@0: 
sl@0: 		// Valid characters are a-z, 0-9, '-' and '.'
sl@0: 		if ((ch < 'A' || ch > 'Z') && (ch < 'a' || ch > 'z') && (ch < '0' || ch > '9') && ch != '-' && ch != '.')
sl@0: 			{
sl@0: 			return EFalse;
sl@0: 			}
sl@0: 
sl@0: 		// dot is the section separator. Check the previous section is not empty
sl@0: 		if (ch == '.' && prev == '.')
sl@0: 			{
sl@0: 			// can't have an empty section
sl@0: 			return EFalse;
sl@0: 			}
sl@0: 			prev = ch;
sl@0: 		}
sl@0: 
sl@0: 	// host name can't end with a dot or dash
sl@0: 	if (prev == '-' || prev == '.')
sl@0: 		return EFalse;
sl@0: 
sl@0: 	return ETrue;
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	Supports Syntax-Based Normalization as specifed in section 6.2.2 of RFC3986.
sl@0: 	returns a new CUri8 object containing a normalised URI from a parsed URI object.
sl@0: 	
sl@0: 	@param aUri	A reference to a parsed uri object.
sl@0: 	@return A pointer to a CUri8 object containing normalised URI.
sl@0: 	@leave KErrNoMemory
sl@0: 	@internalAll
sl@0:  */
sl@0: EXPORT_C CUri8* UriUtils:: NormaliseUriL(const TUriC8& aUri)	
sl@0: 	{
sl@0: 	CUri8* normalisedUri = CUri8::NewLC(aUri);
sl@0: 	PercentEncodeL(normalisedUri); 
sl@0: 	CaseNormaliseL(normalisedUri);
sl@0: 	RemoveDotSegmentsL(normalisedUri);
sl@0: 	CleanupStack::Pop(normalisedUri);
sl@0: 	return normalisedUri;
sl@0: 	}
sl@0: 	
sl@0: /**
sl@0: 	Performs Case Normalization for CUri8 object as specified 
sl@0: 	in section 6.2.2.1 of RFC3986.
sl@0: 	
sl@0: 	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer 
sl@0: 	to CUri8 object with an uri needs to be case normalised and returns with 
sl@0: 	case normalised.
sl@0: 	@leave KErrNoMemory
sl@0:  */
sl@0: void CaseNormaliseL(CUri8* aNormalisedUri )	
sl@0: 	{
sl@0: 	//Case normalise the scheme
sl@0: 	DoCaseNormaliseL(aNormalisedUri, EUriScheme);	
sl@0: 	//Case normalise the Userinfo
sl@0: 	DoCaseNormaliseL(aNormalisedUri, EUriUserinfo);	
sl@0: 	//Case normalise the Host
sl@0: 	DoCaseNormaliseL(aNormalisedUri, EUriHost);	
sl@0: 	//Case normalise the Port
sl@0: 	DoCaseNormaliseL(aNormalisedUri, EUriPort);	
sl@0: 	//Case normalise the Path
sl@0: 	DoCaseNormaliseL(aNormalisedUri, EUriPath);	
sl@0: 	//Case normalise the Query
sl@0: 	DoCaseNormaliseL(aNormalisedUri, EUriQuery);	
sl@0: 	//Case normalise the Fragment
sl@0: 	DoCaseNormaliseL(aNormalisedUri, EUriFragment);	
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	Performs Case Normalization for specified sub component of URI.
sl@0: 	
sl@0: 	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer 
sl@0: 	to CUri8 object with an uri needs to be case normalised and returns with 
sl@0: 	case normalised for specified sub component.
sl@0: 	@param aComponent Enumeration of TUriComponent.
sl@0: 	@leave KErrNoMemory
sl@0: */
sl@0: void DoCaseNormaliseL(CUri8* aNormalisedUri, TUriComponent aComponent)
sl@0: 	{
sl@0: 	const TUriC8& uri(aNormalisedUri->Uri());
sl@0: 	if(!uri.IsPresent(aComponent) )
sl@0: 		{
sl@0: 		return;
sl@0: 		}
sl@0: 	//extracts subcomponent of uri which needs to be case-normalised
sl@0: 	HBufC8* heapBuf = uri.Extract(aComponent).AllocLC(); 	
sl@0: 	TPtr8 normalisedComponent(heapBuf->Des());
sl@0: 	TBool normalised = EFalse;
sl@0: 	if(aComponent == EUriScheme || aComponent == EUriHost )
sl@0: 		{
sl@0: 		//change this component to lower case
sl@0: 		normalisedComponent.LowerCase();
sl@0: 		normalised = ETrue;
sl@0: 		}
sl@0: 	
sl@0: 	TInt len =  normalisedComponent.Length();
sl@0: 	TBuf8<KSubstringLength> subString;
sl@0: 	//case normalise the component
sl@0: 	for (TInt pos = 0; pos < len; pos++)
sl@0: 		{
sl@0: 		if (normalisedComponent[pos] == KEscapeIndicator )
sl@0: 			{
sl@0: 			__ASSERT_DEBUG( ((len-pos) >= KSubstringLength), User::Panic(KNormalisationUriPanicCategory, KUriUtilsErrBadEscapeTriple) );
sl@0: 			TPtrC8 componentBuf(normalisedComponent.Mid(pos,KSubstringLength));
sl@0: 			if (ValidateAndConvertPercentEncodedTriple(componentBuf,subString))
sl@0: 				{
sl@0: 				normalisedComponent.Replace(pos,KSubstringLength,subString);
sl@0: 				pos += KUpdateLength;
sl@0: 				normalised = ETrue;
sl@0: 				subString.Zero();
sl@0: 				}
sl@0: 			}
sl@0: 		}
sl@0: 
sl@0: 	//updating the uri with normalised string
sl@0: 	if( normalised )
sl@0: 		{
sl@0: 		if(aComponent<EUriMaxComponents && aComponent >=EUriScheme)
sl@0: 		   {
sl@0: 		   aNormalisedUri->SetComponentL(normalisedComponent, aComponent);
sl@0: 		   }
sl@0: 		else
sl@0: 		   {
sl@0: 			User::Leave(KErrArgument);	
sl@0: 		   }
sl@0: 
sl@0: 		}
sl@0: 	CleanupStack::PopAndDestroy(heapBuf);
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	Validates and Converts the valid Percent encoded triplets to Uppercase for specified 
sl@0: 	sub component of URI. For eg: Converts %3a to %3A
sl@0: 	
sl@0: 	@param aData A reference to a string to be validated and converted to upper case.
sl@0: 	@param aCaseNormalizedData A reference to a descriptor that is converted to 
sl@0: 	uppercase that is to be returned.
sl@0: 	@return returns a bool whether it is a valid Percent encoded triplet
sl@0: */
sl@0: TBool ValidateAndConvertPercentEncodedTriple(TDesC8& aData , TDes8& aCaseNormalizedData )	
sl@0: 	{
sl@0: 	// See if the descriptor is actually long enough and
sl@0: 	// Check that the three characters form an escape triple - first char is '%'
sl@0: 	if( aData.Length() < KEscapeTripleLength || aData[KEscDelimiterPos] != KEscapeIndicator )
sl@0: 		{
sl@0: 		return EFalse;//do nothing
sl@0: 		}
sl@0: 	
sl@0: 	// Check that next two characters are valid
sl@0: 	TInt mostSignificantDigitValue = KHexDigit().LocateF(aData[KMostSignificantNibblePos] );
sl@0: 	TInt leastSignificantDigitValue = KHexDigit().LocateF(aData[KLeastSignificantNibblePos] );
sl@0: 
sl@0: 	if( mostSignificantDigitValue== KErrNotFound || leastSignificantDigitValue == KErrNotFound )
sl@0: 		{
sl@0: 		// Either of the characters were not a valid hex character
sl@0: 		return EFalse;
sl@0: 		}
sl@0: 	aCaseNormalizedData.Zero();
sl@0: 	aCaseNormalizedData.Append(KEscapeIndicator); 
sl@0: 	
sl@0: 	//Coverts most significant hex character to uppercase
sl@0: 	(mostSignificantDigitValue >= 0 && mostSignificantDigitValue <= 0xF) ? 
sl@0: 		aCaseNormalizedData.Append(KHexDigit().Mid(mostSignificantDigitValue,1)) :
sl@0: 		aCaseNormalizedData.Append(KHexDigit().Mid(mostSignificantDigitValue,1));
sl@0: 	
sl@0: 	//Coverts least significant hex character to uppercase
sl@0: 	(leastSignificantDigitValue >= 0 && leastSignificantDigitValue <= 0xF) ? 
sl@0: 		aCaseNormalizedData.Append(KHexDigit().Mid(leastSignificantDigitValue,1)) :
sl@0: 		aCaseNormalizedData.Append(aData[KLeastSignificantNibblePos]);
sl@0: 	
sl@0: 	return ETrue;
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	Performs Percent-Encoding Normalization for CUri8 object as specifed in 
sl@0: 	section 6.2.2.2 of RFC3986.
sl@0: 	
sl@0: 	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer to 
sl@0: 	CUri8 object with an uri needs to be Percent-Encoded and returns with Percent-Encode 
sl@0: 	normalised form.
sl@0: 	@leave KErrNoMemory
sl@0:  */
sl@0: void  PercentEncodeL(CUri8* aNormalisedUri)
sl@0: 	{
sl@0: 	//PercentEncode the scheme
sl@0: 	DoPercentEncodeL(aNormalisedUri, EUriScheme);	
sl@0: 	//PercentEncode the Userinfo
sl@0: 	DoPercentEncodeL(aNormalisedUri, EUriUserinfo);	
sl@0: 	//PercentEncode the Host
sl@0: 	DoPercentEncodeL(aNormalisedUri, EUriHost);	
sl@0: 	//PercentEncode the Port
sl@0: 	DoPercentEncodeL(aNormalisedUri, EUriPort);	
sl@0: 	//PercentEncode the Path
sl@0: 	DoPercentEncodeL(aNormalisedUri, EUriPath);	
sl@0: 	//PercentEncode the Query
sl@0: 	DoPercentEncodeL(aNormalisedUri, EUriQuery);	
sl@0: 	//PercentEncode the Fragment
sl@0: 	DoPercentEncodeL(aNormalisedUri, EUriFragment);	
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	Performs Percent-Encoding for specified sub component of URI.
sl@0: 	
sl@0: 	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer to 
sl@0: 	CUri8 object with an uri needs to be Percent-Encoded and returns with Percent-Encoded 
sl@0: 	for specified sub component.
sl@0: 	@param aComponent Enumeration of TUriComponent.
sl@0: 	@leave KErrNoMemory
sl@0: */
sl@0: void DoPercentEncodeL(CUri8* aNormalisedUri, TUriComponent aComponent)
sl@0: 	{
sl@0: 	const TUriC8& uri(aNormalisedUri->Uri());
sl@0: 	if(!uri.IsPresent(aComponent))
sl@0: 		{
sl@0: 		return;
sl@0: 		}
sl@0: 	
sl@0: 	HBufC8* heapBuf = uri.Extract(aComponent).AllocLC();
sl@0: 	TPtr8 percentNormalisedComponent(heapBuf->Des());
sl@0: 	TBool normalised = EFalse;
sl@0: 	TInt len = percentNormalisedComponent.Length();	
sl@0: 	for (TInt pos = 0; pos < len; pos++)
sl@0: 		{
sl@0: 		TInt hex;
sl@0: 		// check for and decode '%' encoded characters
sl@0: 		if (percentNormalisedComponent[pos] == KEscapeIndicator && EscapeUtils::IsEscapeTriple(percentNormalisedComponent.Mid(pos, KSubstringLength), hex))
sl@0: 			{
sl@0: 			TChar replacedChar(hex);
sl@0: 			if( KUnreserved().LocateF(hex) != KErrNotFound || replacedChar.IsAlphaDigit() )
sl@0: 				{
sl@0: 				TBuf8<KAttachLength> subString;
sl@0: 				subString.Append(replacedChar);
sl@0: 				percentNormalisedComponent.Replace(pos, KSubstringLength, subString);
sl@0: 				normalised = ETrue;
sl@0: 				len = percentNormalisedComponent.Length();
sl@0: 				}
sl@0: 			}
sl@0: 		}
sl@0: 	if( normalised )
sl@0: 		{
sl@0: 		if(aComponent<EUriMaxComponents && aComponent >=EUriScheme)
sl@0: 		   {
sl@0: 		    aNormalisedUri->SetComponentL(percentNormalisedComponent, aComponent);
sl@0: 		   }
sl@0: 		else
sl@0: 		   {
sl@0: 			User::Leave(KErrArgument);	
sl@0: 		   }
sl@0: 
sl@0: 		}
sl@0: 	CleanupStack::PopAndDestroy(heapBuf); 	
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	Performs Path Segment Normalization for CUri8 object as specifed in 
sl@0: 	section 6.2.2.3 of RFC3986.
sl@0: 	
sl@0: 	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer to 
sl@0: 	CUri8 object with uri needs to be Path Segment normalised and returns with 
sl@0: 	Path Segment normalised form.
sl@0: 	@leave KErrNoMemory
sl@0:  */
sl@0: void  RemoveDotSegmentsL(CUri8* aNormalisedUri)
sl@0: 	{
sl@0: 	const TUriC8& uri( aNormalisedUri->Uri() );
sl@0: 	if(uri.IsPresent(EUriPath))
sl@0: 		{
sl@0: 		HBufC8* dotSegmentsPath = uri.Extract(EUriPath).AllocLC();
sl@0: 		RemoveExtraneousDotSegmentsL(dotSegmentsPath);
sl@0: 		aNormalisedUri->SetComponentL(*dotSegmentsPath, EUriPath);	
sl@0: 		CleanupStack::PopAndDestroy(dotSegmentsPath);	
sl@0: 		}
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	Performs Remove_dot_segments algorithm as specifed in section 5.2.4 of RFC3986.
sl@0: 	
sl@0: 	@param aUriInputPath It is an in-out parameter. aUriInputPath is a pointer to the 
sl@0: 	path descriptor to be normalised for extraneous dot_segments and returns with 
sl@0: 	normalised dot_segments.
sl@0: 	@leave KErrNoMemory
sl@0: */
sl@0: void RemoveExtraneousDotSegmentsL(HBufC8* aUriInputPath)
sl@0: 	{
sl@0: 	TPtr8 uriPathBuf(aUriInputPath->Des());
sl@0: 	TInt length = uriPathBuf.Length();	
sl@0: 	HBufC8* path = HBufC8::NewLC(length);
sl@0: 	TPtr8 transitionalBuf(path->Des());
sl@0: 
sl@0: 	while(length > 0)	
sl@0: 		{
sl@0: 		//step a of section 5.2.4 of RFC 3986
sl@0: 		if(length >= KDotDotSlashLength && 
sl@0: 			KDotDotSlash().Compare(uriPathBuf.Mid(0, KDotDotSlashLength)) == 0 )
sl@0: 			{
sl@0: 			uriPathBuf.Delete(0,KDotDotSlashLength);
sl@0: 			}
sl@0: 		//step a of section 5.2.4 of RFC 3986
sl@0: 		else if(length >= KDotDotLength && 
sl@0: 				KDotSlash().Compare(uriPathBuf.Mid(0, KDotDotLength)) == 0)
sl@0: 			{
sl@0: 			uriPathBuf.Delete(0,KDotDotLength);	
sl@0: 			}
sl@0: 		//step b of section 5.2.4 of RFC 3986
sl@0: 		else if(length >= KDotDotSlashLength && 
sl@0: 				KSlashDotSlash().Compare(uriPathBuf.Mid(0, KDotDotSlashLength)) == 0)
sl@0: 			{
sl@0: 			uriPathBuf.Replace(0, KDotDotSlashLength, KSlash);
sl@0: 			}
sl@0: 		//step c of section 5.2.4 of RFC 3986
sl@0: 		else if(length >= KSlashDotDotSlashLength && 
sl@0: 				KSlashDotDotSlash().Compare(uriPathBuf.Mid(0, KSlashDotDotSlashLength)) == 0)
sl@0: 			{
sl@0: 			updateStrings(uriPathBuf, transitionalBuf, KSlashDotDotSlashLength);
sl@0: 			}
sl@0: 		//step c of section 5.2.4 of RFC 3986 --complete path segment
sl@0: 		else if(length == KDotDotSlashLength && 
sl@0: 				KSlashDotDot().Compare(uriPathBuf.Mid(0, KDotDotSlashLength)) == 0)
sl@0: 			{
sl@0: 			updateStrings(uriPathBuf, transitionalBuf, KDotDotSlashLength);
sl@0: 			}
sl@0: 		//step b of section 5.2.4 of RFC 3986--complete path segment
sl@0: 		else if(length == KDotDotLength && 
sl@0: 				KSlashDot().Compare(uriPathBuf.Mid(0, KDotDotLength)) == 0)
sl@0: 			{
sl@0: 			uriPathBuf.Replace(0, KDotDotLength, KSlash);
sl@0: 			}
sl@0: 		//step d of section 5.2.4 of RFC 3986
sl@0: 		else if(length == KDotDotLength && 
sl@0: 				KDotDot().Compare(uriPathBuf.Mid(0)) == 0)
sl@0: 			{
sl@0: 			uriPathBuf.Delete(0,KDotDotLength);	
sl@0: 			}
sl@0: 		//step d of section 5.2.4 of RFC 3986
sl@0: 		else if(length == KDotLength && 
sl@0: 				KDot().Compare(uriPathBuf.Mid(0)) == 0)
sl@0: 			{
sl@0: 			uriPathBuf.Delete(0,KDotLength);	
sl@0: 			}
sl@0: 		//step e of section 5.2.4 of RFC 3986
sl@0: 		else 
sl@0: 			{
sl@0: 			//get the first path segment including initial / (if any)from uriPathBuf
sl@0: 			// till next slash (but not including next slash)..append it to the output Buf	
sl@0: 			TInt substrLength;
sl@0: 			TInt nextSlashPos = uriPathBuf.Find(KSlash);
sl@0: 			if(nextSlashPos == 0 && length > KDotLength)
sl@0: 				//replace with locate next
sl@0: 				{
sl@0: 				nextSlashPos = uriPathBuf.Mid(1).Find(KSlash);
sl@0: 				if(nextSlashPos != KErrNotFound)
sl@0: 					{
sl@0: 					++nextSlashPos;
sl@0: 					}
sl@0: 				}
sl@0: 			if(length == KDotLength)
sl@0: 				//only '/' is exist
sl@0: 				{
sl@0: 				substrLength = length;	
sl@0: 				}
sl@0: 			else
sl@0: 				{
sl@0: 				substrLength = nextSlashPos == KErrNotFound ? length : nextSlashPos ;	
sl@0: 				}
sl@0: 			transitionalBuf.Append(uriPathBuf.Mid(0,substrLength));
sl@0: 			uriPathBuf.Delete(0,substrLength);	
sl@0: 			}
sl@0: 		length = uriPathBuf.Length();
sl@0: 		}
sl@0: 	uriPathBuf.Copy(transitionalBuf);
sl@0: 	CleanupStack::PopAndDestroy(path);
sl@0: 	}
sl@0: 
sl@0: /**
sl@0: 	Updates the strings specified in step c of section 5.2.4 of RFC 3986
sl@0: 	
sl@0: 	@param aInputBuf A reference to the inputBuf needs to be modified 
sl@0: 	@param aOutPutBuf A reference to the outPutBuf needs to be modified
sl@0: 	@param aLength length of the string to be replaced.
sl@0:  */
sl@0: void  updateStrings(TPtr8& aInputBuf, TPtr8& aOutPutBuf, TInt aLength)
sl@0: 	{
sl@0: 	aInputBuf.Replace(0,aLength,KSlash);
sl@0: 
sl@0: 	//In outPutBuf to remove the last segment starting with / (if exist)
sl@0: 	//eg: /abc/def/fgh --> /abc/def
sl@0: 	TInt outputBufLength = aOutPutBuf.Length();
sl@0: 	TInt pos = aOutPutBuf.LocateReverse('/');	
sl@0: 	//remove the last segment including '/'
sl@0: 	pos != KErrNotFound ? aOutPutBuf.Delete( pos, outputBufLength - pos ) : aOutPutBuf.Delete( 0,outputBufLength );
sl@0: 	}
sl@0: