Symaptic: os/ossrv/genericservices/httputils/UriUtils/UriUtils.cpp@260cb5ec6c19 (annotated)

sl@0	1	// Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0	2	// All rights reserved.
sl@0	3	// This component and the accompanying materials are made available
sl@0	4	// under the terms of "Eclipse Public License v1.0"
sl@0	5	// which accompanies this distribution, and is available
sl@0	6	// at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0	7	//
sl@0	8	// Initial Contributors:
sl@0	9	// Nokia Corporation - initial contribution.
sl@0	10	//
sl@0	11	// Contributors:
sl@0	12	//
sl@0	13	// Description:
sl@0	14	//
sl@0	15
sl@0	16	#include <uriutils.h>
sl@0	17	#include <uriutilscommon.h>
sl@0	18	#include "UriUtilsInternal.h"
sl@0	19	#include <escapeutils.h>
sl@0	20
sl@0	21	_LIT8(KDot, ".");
sl@0	22	_LIT8(KDotDot, "..");
sl@0	23	_LIT8(KDotSlash, "./");
sl@0	24	_LIT8(KDotDotSlash, "../");
sl@0	25	_LIT8(KSlash, "/");
sl@0	26	_LIT8(KSlashDot, "/.");
sl@0	27	_LIT8(KSlashDotDot, "/..");
sl@0	28	_LIT8(KSlashDotSlash, "/./");
sl@0	29	_LIT8(KSlashDotDotSlash, "/../");
sl@0	30
sl@0	31	_LIT(KHexDigit, "0123456789ABCDEF");
sl@0	32	_LIT(KUnreserved, "-.~_");
sl@0	33	#ifdef _DEBUG
sl@0	34	_LIT(KNormalisationUriPanicCategory, "URI-NORMALIZATION");
sl@0	35	#endif
sl@0	36	const TInt KEscapeIndicator = '%';
sl@0	37	const TInt KEscapeTripleLength = 3;
sl@0	38	const TInt KEscDelimiterPos = 0;
sl@0	39	const TInt KMostSignificantNibblePos = 1;
sl@0	40	const TInt KLeastSignificantNibblePos = 2;
sl@0	41	const TInt KSubstringLength = 3;
sl@0	42	const TInt KUpdateLength = 2;
sl@0	43	const TInt KAttachLength = 1;
sl@0	44
sl@0	45	const TInt KDotLength = 1;
sl@0	46	const TInt KDotDotLength = 2;
sl@0	47	const TInt KDotDotSlashLength = 3;
sl@0	48	const TInt KSlashDotDotSlashLength = 4;
sl@0	49
sl@0	50	//
sl@0	51	//
sl@0	52	// Implementation of UriUtils
sl@0	53	//
sl@0	54	//
sl@0	55
sl@0	56	/**
sl@0	57	Converts a 16-bit format uri into its internet form. Any Unicode characters
sl@0	58	are converted into Utf8 representation and then any excluded characters are
sl@0	59	escape encoded. Reserved characters specified in RFC2396 will not be escape
sl@0	60	encoded however, these include ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| ",".
sl@0	61	For example http://localhost will not be encoded to http%3A%2F%2Flocalhost.
sl@0	62
sl@0	63	@since 6.0
sl@0	64	@deprecated Deprecated in 9.1
sl@0	65	@leave KUriUtilsCannotConvert. When the input data cannot be converted.
sl@0	66	@leave KUriUtilsErr16BitChar. When the input data has a 16-Bit character to be escape encoded.
sl@0	67	@param aUri The 16-bit format uri.
sl@0	68	@return A pointer to a newly created 8-bit uri.
sl@0	69	*/
sl@0	70	EXPORT_C CUri8* UriUtils::ConvertToInternetFormL(const TUriC16& aUri)
sl@0	71	{
sl@0	72	// Need to convert to utf8
sl@0	73	HBufC8* utf8Buf = EscapeUtils::ConvertFromUnicodeToUtf8L(aUri.UriDes());
sl@0	74	CleanupStack::PushL(utf8Buf);
sl@0	75
sl@0	76	// Ok need to parse for the uri without the fragment
sl@0	77	TUriParser8 parser;
sl@0	78	parser.Parse(*utf8Buf);
sl@0	79	TPtrC8 uriNoFragment;
sl@0	80	parser.UriWithoutFragment(uriNoFragment);
sl@0	81
sl@0	82	// Now escape encode the uri without the fragment
sl@0	83	HBufC8* escapedBuf = EscapeUtils::EscapeEncodeL(uriNoFragment, EscapeUtils::EEscapeNormal);
sl@0	84	CleanupStack::PushL(escapedBuf);
sl@0	85
sl@0	86	// Now escape encode the fragment if there is one...
sl@0	87	HBufC8* escapedFragmentBuf = NULL;
sl@0	88	if( parser.IsPresent(EUriFragment) )
sl@0	89	{
sl@0	90	escapedFragmentBuf = EscapeUtils::EscapeEncodeL(parser.Extract(EUriFragment), EscapeUtils::EEscapeNormal);
sl@0	91	CleanupStack::PushL(escapedFragmentBuf);
sl@0	92	}
sl@0	93
sl@0	94	// Parse and then create the CUri8 object
sl@0	95	parser.Parse(*escapedBuf);
sl@0	96	CUri8* netForm = CUri8::NewL(parser);
sl@0	97
sl@0	98	// Set the fragment if there was one...
sl@0	99	if( escapedFragmentBuf != NULL )
sl@0	100	{
sl@0	101	CleanupStack::PushL(netForm);
sl@0	102	netForm->SetComponentL(*escapedFragmentBuf, EUriFragment);
sl@0	103	CleanupStack::Pop(netForm);
sl@0	104	CleanupStack::PopAndDestroy(escapedFragmentBuf);
sl@0	105	}
sl@0	106
sl@0	107	// Cleanup and return
sl@0	108	CleanupStack::PopAndDestroy(2, utf8Buf); // utf8Buf, escapedBuf
sl@0	109	return netForm;
sl@0	110	}
sl@0	111
sl@0	112	/**
sl@0	113	Converts an 8-bit format uri its into display form. Any escape tripes are decoded and
sl@0	114	sets of Utf8 format characters are converted into Unicode.
sl@0	115
sl@0	116	@since 6.0
sl@0	117	@deprecated Deprecated in 9.1
sl@0	118	@leave KUriUtilsCannotConvert. When the input data cannot be converted.
sl@0	119	@param aUri The 8-bit format uri.
sl@0	120	@return A pointer to a newly created 16-bit uri.
sl@0	121	*/
sl@0	122	EXPORT_C CUri16* UriUtils::ConvertToDisplayFormL(const TUriC8& aUri)
sl@0	123	{
sl@0	124	// Need decode escape triples
sl@0	125	HBufC8* unescapedBuf = EscapeUtils::EscapeDecodeL(aUri.UriDes());
sl@0	126	CleanupStack::PushL(unescapedBuf);
sl@0	127
sl@0	128	// Now need to convert utf8 to unicode
sl@0	129	HBufC16* utf8Buf = EscapeUtils::ConvertToUnicodeFromUtf8L(*unescapedBuf);
sl@0	130	CleanupStack::PushL(utf8Buf);
sl@0	131
sl@0	132	// Parse and then create the CUri16 object
sl@0	133	TUriParser16 parser;
sl@0	134	parser.Parse(*utf8Buf);
sl@0	135	CUri16* displayForm = CUri16::NewL(parser);
sl@0	136
sl@0	137	// Cleanup and return
sl@0	138	CleanupStack::PopAndDestroy(2, unescapedBuf); // unescapedBuf, utf8Buf
sl@0	139	return displayForm;
sl@0	140	}
sl@0	141
sl@0	142	/**
sl@0	143	Create a new CUri8 object from a Unicode descriptor.
sl@0	144
sl@0	145	@param aUri a Unicode string containing the URI to parse.
sl@0	146	@return the new CUri8 object
sl@0	147	@leave EUriUtilsParserErrInvalidUri if the descriptor is an invalid URI.
sl@0	148	*/
sl@0	149	EXPORT_C CUri8* UriUtils::CreateUriL(const TDesC& aUri)
sl@0	150	{
sl@0	151	// convert to UTF8
sl@0	152	HBufC8* unsafe8 = EscapeUtils::ConvertFromUnicodeToUtf8L(aUri);
sl@0	153	CleanupStack::PushL(unsafe8);
sl@0	154	// escape encode only those characters that cannot be in a URI. assume all %hh are %encoded already
sl@0	155	HBufC8* uri8desc = EscapeUtils::ReEscapeEncodeL(*unsafe8);
sl@0	156	CleanupStack::PopAndDestroy(unsafe8);
sl@0	157	CleanupStack::PushL(uri8desc);
sl@0	158	TUriParser8 parser;
sl@0	159	// parse the descriptor into a URI, Leave if it cannot be parsed
sl@0	160	User::LeaveIfError( parser.Parse(*uri8desc) );
sl@0	161
sl@0	162	CUri8* uri8 = CUri8::NewL(parser);
sl@0	163	CleanupStack::PopAndDestroy(uri8desc);
sl@0	164	return uri8;
sl@0	165	}
sl@0	166
sl@0	167	/**
sl@0	168	Create a new CAuthority8 object from a Unicode descriptor.
sl@0	169
sl@0	170	@param aAuthority a Unicode string containing the Authority to parse.
sl@0	171	@return the new CAuthority8 object
sl@0	172	@leave EUriUtilsParserErrInvalidUri if the descriptor is an invalid Authority.
sl@0	173	*/
sl@0	174	EXPORT_C CAuthority8* UriUtils::CreateAuthorityL(const TDesC& aAuthority)
sl@0	175	{
sl@0	176	// convert to UTF8
sl@0	177	HBufC8* unsafe8 = EscapeUtils::ConvertFromUnicodeToUtf8L(aAuthority);
sl@0	178	CleanupStack::PushL(unsafe8);
sl@0	179	// escape encode only those characters that cannot be in the authority. assume all %s are %encoded already
sl@0	180	HBufC8* authority8desc = EscapeUtils::ReEscapeEncodeL(*unsafe8);
sl@0	181	CleanupStack::PopAndDestroy(unsafe8);
sl@0	182	CleanupStack::PushL(authority8desc);
sl@0	183	TAuthorityParser8 parser;
sl@0	184	// parse the descriptor into the authority, Leave if it cannot be parsed
sl@0	185	User::LeaveIfError( parser.Parse(*authority8desc) );
sl@0	186
sl@0	187	CAuthority8* authority8 = CAuthority8::NewL(parser);
sl@0	188	CleanupStack::PopAndDestroy(authority8desc);
sl@0	189	return authority8;
sl@0	190	}
sl@0	191
sl@0	192	/**
sl@0	193	Checks a descriptor for excluded (invalid) characters. Excluded characters include all
sl@0	194	control characters (values 0x00 to 0x1F and greater than 0x7F), space (0x20), delimiter
sl@0	195	characters ('<', '>', '#', '%', '"') and unwise characters ('{', '}', '\|', '\', '^', '[', ']', '`').
sl@0	196
sl@0	197	@since 6.0
sl@0	198	@param aData The descriptor to be checked.
sl@0	199	@return A boolean value of ETrue if the descriptor contains invalid
sl@0	200	characters, otherwise EFalse.
sl@0	201	*/
sl@0	202	EXPORT_C TBool UriUtils::HasInvalidChars(const TDesC8& aData)
sl@0	203	{
sl@0	204	return CheckForExcludedChars(aData);
sl@0	205	}
sl@0	206
sl@0	207	/**
sl@0	208	Checks a descriptor for excluded (invalid) characters. Excluded characters include all
sl@0	209	control characters (values 0x00 to 0x1F and greater than 0x7F), space (0x20), delimiter
sl@0	210	characters ('<', '>', '#', '%','"') and unwise characters ('{', '}', '\|', '\', '^', '[', ']', '`').
sl@0	211
sl@0	212	@since 6.0
sl@0	213	@param aData The descriptor to be checked.
sl@0	214	@return A boolean value of ETrue if the descriptor contains invalid
sl@0	215	characters, otherwise EFalse.
sl@0	216	*/
sl@0	217	EXPORT_C TBool UriUtils::HasInvalidChars(const TDesC16& aData)
sl@0	218	{
sl@0	219	return CheckForExcludedChars(aData);
sl@0	220	}
sl@0	221
sl@0	222	/**
sl@0	223	Checks the supplied host for an IPv4, IPv6 or text format host
sl@0	224
sl@0	225	@since 7.0
sl@0	226	@param aHost The descriptor containing the host to check
sl@0	227	@return A TUriHostType enum of either EIPv6, EIPv4, EText or EUnknown
sl@0	228	*/
sl@0	229	EXPORT_C UriUtils::TUriHostType UriUtils::HostType(const TDesC8& aHost)
sl@0	230	{
sl@0	231	return CheckHostType(aHost);
sl@0	232	}
sl@0	233
sl@0	234	/**
sl@0	235	Checks the supplied host for an IPv4, IPv6 or text format host
sl@0	236
sl@0	237	@since 7.0
sl@0	238	@param aHost The descriptor containing the host to check
sl@0	239	@return A TUriHostType enum of either EIPv6, EIPv4, EText or EUnknown
sl@0	240	*/
sl@0	241	EXPORT_C UriUtils::TUriHostType UriUtils::HostType(const TDesC16& aHost)
sl@0	242	{
sl@0	243	return CheckHostType(aHost);
sl@0	244	}
sl@0	245
sl@0	246
sl@0	247
sl@0	248	//
sl@0	249	//
sl@0	250	// Implementation of component internal functions
sl@0	251	//
sl@0	252	//
sl@0	253
sl@0	254	/**
sl@0	255	@internalComponent
sl@0	256
sl@0	257	Checks whether the given scheme is a network scheme or not
sl@0	258
sl@0	259	@param aScheme The descriptor with the scheme.
sl@0	260	@return A boolean value of EFalse if the scheme is SIP. For all other schemes returns ETrue.
sl@0	261	*/
sl@0	262	TBool IsNetworkScheme(const TDesC8& aScheme)
sl@0	263	{
sl@0	264	TUriSchemeType scheme = SchemeType(aScheme);
sl@0	265	if (scheme == ESchemeTypeSip)
sl@0	266	{
sl@0	267	return EFalse;
sl@0	268	}
sl@0	269	return ETrue;
sl@0	270	}
sl@0	271
sl@0	272	/**
sl@0	273	@internalComponent
sl@0	274
sl@0	275	Checks whether the given scheme is a network scheme or not
sl@0	276
sl@0	277	@param aScheme The descriptor with the scheme.
sl@0	278	@return A boolean value of EFalse if the scheme is SIP. For all other schemes returns ETrue.
sl@0	279	*/
sl@0	280	TBool IsNetworkScheme(const TDesC16& aScheme)
sl@0	281	{
sl@0	282	TUriSchemeType scheme = SchemeType(aScheme);
sl@0	283	if (scheme == ESchemeTypeSip)
sl@0	284	{
sl@0	285	return EFalse;
sl@0	286	}
sl@0	287	return ETrue;
sl@0	288	}
sl@0	289
sl@0	290	/**
sl@0	291	@internalComponent
sl@0	292
sl@0	293	Returns the type of the URIs scheme
sl@0	294
sl@0	295	@param aScheme The descriptor with the scheme.
sl@0	296	@return The scheme type
sl@0	297	*/
sl@0	298	TUriSchemeType SchemeType(const TDesC8& aScheme)
sl@0	299	{
sl@0	300	// Compares the scheme with both sip and sips
sl@0	301	if (aScheme.CompareF(KSipScheme8()) == 0 \|\| aScheme.CompareF(KSipsScheme8()) == 0)
sl@0	302	{
sl@0	303	// there's a match so this is a sip scheme
sl@0	304	return ESchemeTypeSip;
sl@0	305	}
sl@0	306	//Compares the scheme with tel
sl@0	307	else if (aScheme.CompareF(KTelScheme8()) == 0)
sl@0	308	{
sl@0	309	return ESchemeTypeTel;
sl@0	310	}
sl@0	311
sl@0	312	return ESchemeTypeUnknown;
sl@0	313	}
sl@0	314
sl@0	315	/**
sl@0	316	@internalComponent
sl@0	317
sl@0	318	Returns the type of the URIs scheme
sl@0	319
sl@0	320	@param aScheme The descriptor with the scheme.
sl@0	321	@return The scheme type
sl@0	322	*/
sl@0	323	TUriSchemeType SchemeType(const TDesC16& aScheme)
sl@0	324	{
sl@0	325	// Compares the scheme with both sip and sips
sl@0	326	if (aScheme.CompareF(KSipScheme()) == 0 \|\| aScheme.CompareF(KSipsScheme()) == 0)
sl@0	327	{
sl@0	328	// there's a match so this is a sip scheme
sl@0	329	return ESchemeTypeSip;
sl@0	330	}
sl@0	331
sl@0	332	return ESchemeTypeUnknown;
sl@0	333	}
sl@0	334
sl@0	335	/**
sl@0	336	@internalComponent
sl@0	337
sl@0	338	Checks that a text host is in a valid form
sl@0	339
sl@0	340	@param aHost The descriptor containing the host to check
sl@0	341	@return ETrue if the host is valid otherwise EFalse
sl@0	342	*/
sl@0	343	TBool IsTextHostValid(const TDesC8& aHost)
sl@0	344	{
sl@0	345	return CheckValidTextHost(aHost);
sl@0	346	}
sl@0	347
sl@0	348	/**
sl@0	349	@internalComponent
sl@0	350
sl@0	351	Checks that a text host is in a valid form
sl@0	352
sl@0	353	@param aHost The descriptor containing the host to check
sl@0	354	@return ETrue if the host is valid otherwise EFalse
sl@0	355	*/
sl@0	356	TBool IsTextHostValid(const TDesC16& aHost)
sl@0	357	{
sl@0	358	return CheckValidTextHost(aHost);
sl@0	359	}
sl@0	360
sl@0	361
sl@0	362	/**
sl@0	363	@internalComponent
sl@0	364
sl@0	365	Parses a segment of the form name=value and returns the name and value parts
sl@0	366
sl@0	367	@param aSegment the name-value segemnt to parse
sl@0	368	@param aName the name part that is returned
sl@0	369	@param aValue the value part that is returned
sl@0	370	*/
sl@0	371	void GetNameValuePair(const TDesC8& aSegment, TPtrC8& aName, TPtrC8& aValue)
sl@0	372	{
sl@0	373	TPtrC8 value;
sl@0	374	TInt sepPos = aSegment.Locate(KEqualsSeparator);
sl@0	375	if (sepPos != KErrNotFound)
sl@0	376	{
sl@0	377	aName.Set(aSegment.Left(sepPos));
sl@0	378	value.Set(aSegment.Mid(sepPos+1));
sl@0	379	}
sl@0	380	else
sl@0	381	{
sl@0	382	aName.Set(aSegment);
sl@0	383	}
sl@0	384
sl@0	385	aValue.Set(value);
sl@0	386	}
sl@0	387
sl@0	388
sl@0	389	//
sl@0	390	//
sl@0	391	// Implementation of LOCAL functions
sl@0	392	//
sl@0	393	//
sl@0	394
sl@0	395	/**
sl@0	396	Checks the descriptor for any excluded characters. These are characters that
sl@0	397	should have been escaped encoded or ocnverted to Utf8 from Unicode.
sl@0	398
sl@0	399	@since 6.0
sl@0	400	@param aData The descriptor to be checked.
sl@0	401	@return A boolean value of ETrue if the descriptor contains excluded
sl@0	402	characters, EFalse if it does not.
sl@0	403	*/
sl@0	404	template<class TDesCType>
sl@0	405	LOCAL_C TBool CheckForExcludedChars(const TDesCType& aData)
sl@0	406	{
sl@0	407	// Run through the descriptor
sl@0	408	TBool valid = ETrue;
sl@0	409	const TInt length = aData.Length();
sl@0	410	TInt i=0;
sl@0	411	while( valid && i<length )
sl@0	412	{
sl@0	413	TInt notUsed;
sl@0	414	// See if the character is an excluded one, or is part of an escape triple...
sl@0	415	if( EscapeUtils::IsExcludedChar(aData[i]) && !EscapeUtils::IsEscapeTriple(aData.Mid(i), notUsed) )
sl@0	416	{
sl@0	417	valid = EFalse;
sl@0	418	}
sl@0	419	else
sl@0	420	{
sl@0	421	++i;
sl@0	422	}
sl@0	423	}
sl@0	424	return !valid;
sl@0	425	}
sl@0	426
sl@0	427	/**
sl@0	428	Checks the supplied host for an IPv4, IPv6 or text format host
sl@0	429
sl@0	430	@since 7.0
sl@0	431	@param aHost The descriptor containing the host to check
sl@0	432	@return A TUriHostType enum of either EIPv6, EIPv4, EText or EUnknown
sl@0	433	*/
sl@0	434	template<class TDesCType>
sl@0	435	LOCAL_C UriUtils::TUriHostType CheckHostType(const TDesCType& aHost)
sl@0	436	{
sl@0	437	UriUtils::TUriHostType hostType;
sl@0	438
sl@0	439	TInt dotCount=0;
sl@0	440	TBool colonPresent=EFalse;
sl@0	441	TBool numeric=ETrue;
sl@0	442
sl@0	443	TInt len = aHost.Length();
sl@0	444	for (TInt ii=0; ii < len && !colonPresent; ++ii)
sl@0	445	{
sl@0	446	TChar ch(aHost[ii]);
sl@0	447
sl@0	448	// host contains a character that is not '0'..'9' or '.'
sl@0	449	if ((ch < 48 \|\| ch > 57) && ch != 46)
sl@0	450	numeric=EFalse;
sl@0	451
sl@0	452	// need to check that IPv4 address has the 3 dots
sl@0	453	if (ch == 46)
sl@0	454	++dotCount;
sl@0	455	else
sl@0	456	if (ch == 58)
sl@0	457	colonPresent=ETrue;
sl@0	458	}
sl@0	459
sl@0	460	if (colonPresent) // if theres a colon, it has to be an IPv6 address
sl@0	461	hostType = UriUtils::EIPv6Host;
sl@0	462	else
sl@0	463	if (numeric && (dotCount==3)) // if its numeric only, and has three seperators...
sl@0	464	hostType = UriUtils::EIPv4Host;
sl@0	465	else
sl@0	466	hostType = UriUtils::ETextHost;
sl@0	467
sl@0	468	return hostType;
sl@0	469	}
sl@0	470
sl@0	471	/**
sl@0	472	@internalComponent
sl@0	473
sl@0	474	Checks that a text host is in a valid form
sl@0	475
sl@0	476	@param aHost The descriptor containing the host to check
sl@0	477	@return ETrue if the host is valid otherwise EFalse
sl@0	478	*/
sl@0	479	template<class TDesCType>
sl@0	480	LOCAL_C TBool CheckValidTextHost(const TDesCType& aHost)
sl@0	481	{
sl@0	482	TInt len = aHost.Length();
sl@0	483	if (len == 0)
sl@0	484	return EFalse;
sl@0	485
sl@0	486	// host name can't start with a dot or dash
sl@0	487	TChar firstChar(aHost[0]);
sl@0	488	if (firstChar == '-' \|\| firstChar == '.')
sl@0	489	return EFalse;
sl@0	490
sl@0	491	TChar prev = '\0';
sl@0	492	TInt ii;
sl@0	493	for (ii=0; ii < len; ii++)
sl@0	494	{
sl@0	495	TChar ch(aHost[ii]);
sl@0	496
sl@0	497	// Valid characters are a-z, 0-9, '-' and '.'
sl@0	498	if ((ch < 'A' \|\| ch > 'Z') && (ch < 'a' \|\| ch > 'z') && (ch < '0' \|\| ch > '9') && ch != '-' && ch != '.')
sl@0	499	{
sl@0	500	return EFalse;
sl@0	501	}
sl@0	502
sl@0	503	// dot is the section separator. Check the previous section is not empty
sl@0	504	if (ch == '.' && prev == '.')
sl@0	505	{
sl@0	506	// can't have an empty section
sl@0	507	return EFalse;
sl@0	508	}
sl@0	509	prev = ch;
sl@0	510	}
sl@0	511
sl@0	512	// host name can't end with a dot or dash
sl@0	513	if (prev == '-' \|\| prev == '.')
sl@0	514	return EFalse;
sl@0	515
sl@0	516	return ETrue;
sl@0	517	}
sl@0	518
sl@0	519	/**
sl@0	520	Supports Syntax-Based Normalization as specifed in section 6.2.2 of RFC3986.
sl@0	521	returns a new CUri8 object containing a normalised URI from a parsed URI object.
sl@0	522
sl@0	523	@param aUri A reference to a parsed uri object.
sl@0	524	@return A pointer to a CUri8 object containing normalised URI.
sl@0	525	@leave KErrNoMemory
sl@0	526	@internalAll
sl@0	527	*/
sl@0	528	EXPORT_C CUri8* UriUtils:: NormaliseUriL(const TUriC8& aUri)
sl@0	529	{
sl@0	530	CUri8* normalisedUri = CUri8::NewLC(aUri);
sl@0	531	PercentEncodeL(normalisedUri);
sl@0	532	CaseNormaliseL(normalisedUri);
sl@0	533	RemoveDotSegmentsL(normalisedUri);
sl@0	534	CleanupStack::Pop(normalisedUri);
sl@0	535	return normalisedUri;
sl@0	536	}
sl@0	537
sl@0	538	/**
sl@0	539	Performs Case Normalization for CUri8 object as specified
sl@0	540	in section 6.2.2.1 of RFC3986.
sl@0	541
sl@0	542	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer
sl@0	543	to CUri8 object with an uri needs to be case normalised and returns with
sl@0	544	case normalised.
sl@0	545	@leave KErrNoMemory
sl@0	546	*/
sl@0	547	void CaseNormaliseL(CUri8* aNormalisedUri )
sl@0	548	{
sl@0	549	//Case normalise the scheme
sl@0	550	DoCaseNormaliseL(aNormalisedUri, EUriScheme);
sl@0	551	//Case normalise the Userinfo
sl@0	552	DoCaseNormaliseL(aNormalisedUri, EUriUserinfo);
sl@0	553	//Case normalise the Host
sl@0	554	DoCaseNormaliseL(aNormalisedUri, EUriHost);
sl@0	555	//Case normalise the Port
sl@0	556	DoCaseNormaliseL(aNormalisedUri, EUriPort);
sl@0	557	//Case normalise the Path
sl@0	558	DoCaseNormaliseL(aNormalisedUri, EUriPath);
sl@0	559	//Case normalise the Query
sl@0	560	DoCaseNormaliseL(aNormalisedUri, EUriQuery);
sl@0	561	//Case normalise the Fragment
sl@0	562	DoCaseNormaliseL(aNormalisedUri, EUriFragment);
sl@0	563	}
sl@0	564
sl@0	565	/**
sl@0	566	Performs Case Normalization for specified sub component of URI.
sl@0	567
sl@0	568	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer
sl@0	569	to CUri8 object with an uri needs to be case normalised and returns with
sl@0	570	case normalised for specified sub component.
sl@0	571	@param aComponent Enumeration of TUriComponent.
sl@0	572	@leave KErrNoMemory
sl@0	573	*/
sl@0	574	void DoCaseNormaliseL(CUri8* aNormalisedUri, TUriComponent aComponent)
sl@0	575	{
sl@0	576	const TUriC8& uri(aNormalisedUri->Uri());
sl@0	577	if(!uri.IsPresent(aComponent) )
sl@0	578	{
sl@0	579	return;
sl@0	580	}
sl@0	581	//extracts subcomponent of uri which needs to be case-normalised
sl@0	582	HBufC8* heapBuf = uri.Extract(aComponent).AllocLC();
sl@0	583	TPtr8 normalisedComponent(heapBuf->Des());
sl@0	584	TBool normalised = EFalse;
sl@0	585	if(aComponent == EUriScheme \|\| aComponent == EUriHost )
sl@0	586	{
sl@0	587	//change this component to lower case
sl@0	588	normalisedComponent.LowerCase();
sl@0	589	normalised = ETrue;
sl@0	590	}
sl@0	591
sl@0	592	TInt len = normalisedComponent.Length();
sl@0	593	TBuf8<KSubstringLength> subString;
sl@0	594	//case normalise the component
sl@0	595	for (TInt pos = 0; pos < len; pos++)
sl@0	596	{
sl@0	597	if (normalisedComponent[pos] == KEscapeIndicator )
sl@0	598	{
sl@0	599	__ASSERT_DEBUG( ((len-pos) >= KSubstringLength), User::Panic(KNormalisationUriPanicCategory, KUriUtilsErrBadEscapeTriple) );
sl@0	600	TPtrC8 componentBuf(normalisedComponent.Mid(pos,KSubstringLength));
sl@0	601	if (ValidateAndConvertPercentEncodedTriple(componentBuf,subString))
sl@0	602	{
sl@0	603	normalisedComponent.Replace(pos,KSubstringLength,subString);
sl@0	604	pos += KUpdateLength;
sl@0	605	normalised = ETrue;
sl@0	606	subString.Zero();
sl@0	607	}
sl@0	608	}
sl@0	609	}
sl@0	610
sl@0	611	//updating the uri with normalised string
sl@0	612	if( normalised )
sl@0	613	{
sl@0	614	if(aComponent<EUriMaxComponents && aComponent >=EUriScheme)
sl@0	615	{
sl@0	616	aNormalisedUri->SetComponentL(normalisedComponent, aComponent);
sl@0	617	}
sl@0	618	else
sl@0	619	{
sl@0	620	User::Leave(KErrArgument);
sl@0	621	}
sl@0	622
sl@0	623	}
sl@0	624	CleanupStack::PopAndDestroy(heapBuf);
sl@0	625	}
sl@0	626
sl@0	627	/**
sl@0	628	Validates and Converts the valid Percent encoded triplets to Uppercase for specified
sl@0	629	sub component of URI. For eg: Converts %3a to %3A
sl@0	630
sl@0	631	@param aData A reference to a string to be validated and converted to upper case.
sl@0	632	@param aCaseNormalizedData A reference to a descriptor that is converted to
sl@0	633	uppercase that is to be returned.
sl@0	634	@return returns a bool whether it is a valid Percent encoded triplet
sl@0	635	*/
sl@0	636	TBool ValidateAndConvertPercentEncodedTriple(TDesC8& aData , TDes8& aCaseNormalizedData )
sl@0	637	{
sl@0	638	// See if the descriptor is actually long enough and
sl@0	639	// Check that the three characters form an escape triple - first char is '%'
sl@0	640	if( aData.Length() < KEscapeTripleLength \|\| aData[KEscDelimiterPos] != KEscapeIndicator )
sl@0	641	{
sl@0	642	return EFalse;//do nothing
sl@0	643	}
sl@0	644
sl@0	645	// Check that next two characters are valid
sl@0	646	TInt mostSignificantDigitValue = KHexDigit().LocateF(aData[KMostSignificantNibblePos] );
sl@0	647	TInt leastSignificantDigitValue = KHexDigit().LocateF(aData[KLeastSignificantNibblePos] );
sl@0	648
sl@0	649	if( mostSignificantDigitValue== KErrNotFound \|\| leastSignificantDigitValue == KErrNotFound )
sl@0	650	{
sl@0	651	// Either of the characters were not a valid hex character
sl@0	652	return EFalse;
sl@0	653	}
sl@0	654	aCaseNormalizedData.Zero();
sl@0	655	aCaseNormalizedData.Append(KEscapeIndicator);
sl@0	656
sl@0	657	//Coverts most significant hex character to uppercase
sl@0	658	(mostSignificantDigitValue >= 0 && mostSignificantDigitValue <= 0xF) ?
sl@0	659	aCaseNormalizedData.Append(KHexDigit().Mid(mostSignificantDigitValue,1)) :
sl@0	660	aCaseNormalizedData.Append(KHexDigit().Mid(mostSignificantDigitValue,1));
sl@0	661
sl@0	662	//Coverts least significant hex character to uppercase
sl@0	663	(leastSignificantDigitValue >= 0 && leastSignificantDigitValue <= 0xF) ?
sl@0	664	aCaseNormalizedData.Append(KHexDigit().Mid(leastSignificantDigitValue,1)) :
sl@0	665	aCaseNormalizedData.Append(aData[KLeastSignificantNibblePos]);
sl@0	666
sl@0	667	return ETrue;
sl@0	668	}
sl@0	669
sl@0	670	/**
sl@0	671	Performs Percent-Encoding Normalization for CUri8 object as specifed in
sl@0	672	section 6.2.2.2 of RFC3986.
sl@0	673
sl@0	674	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer to
sl@0	675	CUri8 object with an uri needs to be Percent-Encoded and returns with Percent-Encode
sl@0	676	normalised form.
sl@0	677	@leave KErrNoMemory
sl@0	678	*/
sl@0	679	void PercentEncodeL(CUri8* aNormalisedUri)
sl@0	680	{
sl@0	681	//PercentEncode the scheme
sl@0	682	DoPercentEncodeL(aNormalisedUri, EUriScheme);
sl@0	683	//PercentEncode the Userinfo
sl@0	684	DoPercentEncodeL(aNormalisedUri, EUriUserinfo);
sl@0	685	//PercentEncode the Host
sl@0	686	DoPercentEncodeL(aNormalisedUri, EUriHost);
sl@0	687	//PercentEncode the Port
sl@0	688	DoPercentEncodeL(aNormalisedUri, EUriPort);
sl@0	689	//PercentEncode the Path
sl@0	690	DoPercentEncodeL(aNormalisedUri, EUriPath);
sl@0	691	//PercentEncode the Query
sl@0	692	DoPercentEncodeL(aNormalisedUri, EUriQuery);
sl@0	693	//PercentEncode the Fragment
sl@0	694	DoPercentEncodeL(aNormalisedUri, EUriFragment);
sl@0	695	}
sl@0	696
sl@0	697	/**
sl@0	698	Performs Percent-Encoding for specified sub component of URI.
sl@0	699
sl@0	700	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer to
sl@0	701	CUri8 object with an uri needs to be Percent-Encoded and returns with Percent-Encoded
sl@0	702	for specified sub component.
sl@0	703	@param aComponent Enumeration of TUriComponent.
sl@0	704	@leave KErrNoMemory
sl@0	705	*/
sl@0	706	void DoPercentEncodeL(CUri8* aNormalisedUri, TUriComponent aComponent)
sl@0	707	{
sl@0	708	const TUriC8& uri(aNormalisedUri->Uri());
sl@0	709	if(!uri.IsPresent(aComponent))
sl@0	710	{
sl@0	711	return;
sl@0	712	}
sl@0	713
sl@0	714	HBufC8* heapBuf = uri.Extract(aComponent).AllocLC();
sl@0	715	TPtr8 percentNormalisedComponent(heapBuf->Des());
sl@0	716	TBool normalised = EFalse;
sl@0	717	TInt len = percentNormalisedComponent.Length();
sl@0	718	for (TInt pos = 0; pos < len; pos++)
sl@0	719	{
sl@0	720	TInt hex;
sl@0	721	// check for and decode '%' encoded characters
sl@0	722	if (percentNormalisedComponent[pos] == KEscapeIndicator && EscapeUtils::IsEscapeTriple(percentNormalisedComponent.Mid(pos, KSubstringLength), hex))
sl@0	723	{
sl@0	724	TChar replacedChar(hex);
sl@0	725	if( KUnreserved().LocateF(hex) != KErrNotFound \|\| replacedChar.IsAlphaDigit() )
sl@0	726	{
sl@0	727	TBuf8<KAttachLength> subString;
sl@0	728	subString.Append(replacedChar);
sl@0	729	percentNormalisedComponent.Replace(pos, KSubstringLength, subString);
sl@0	730	normalised = ETrue;
sl@0	731	len = percentNormalisedComponent.Length();
sl@0	732	}
sl@0	733	}
sl@0	734	}
sl@0	735	if( normalised )
sl@0	736	{
sl@0	737	if(aComponent<EUriMaxComponents && aComponent >=EUriScheme)
sl@0	738	{
sl@0	739	aNormalisedUri->SetComponentL(percentNormalisedComponent, aComponent);
sl@0	740	}
sl@0	741	else
sl@0	742	{
sl@0	743	User::Leave(KErrArgument);
sl@0	744	}
sl@0	745
sl@0	746	}
sl@0	747	CleanupStack::PopAndDestroy(heapBuf);
sl@0	748	}
sl@0	749
sl@0	750	/**
sl@0	751	Performs Path Segment Normalization for CUri8 object as specifed in
sl@0	752	section 6.2.2.3 of RFC3986.
sl@0	753
sl@0	754	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer to
sl@0	755	CUri8 object with uri needs to be Path Segment normalised and returns with
sl@0	756	Path Segment normalised form.
sl@0	757	@leave KErrNoMemory
sl@0	758	*/
sl@0	759	void RemoveDotSegmentsL(CUri8* aNormalisedUri)
sl@0	760	{
sl@0	761	const TUriC8& uri( aNormalisedUri->Uri() );
sl@0	762	if(uri.IsPresent(EUriPath))
sl@0	763	{
sl@0	764	HBufC8* dotSegmentsPath = uri.Extract(EUriPath).AllocLC();
sl@0	765	RemoveExtraneousDotSegmentsL(dotSegmentsPath);
sl@0	766	aNormalisedUri->SetComponentL(*dotSegmentsPath, EUriPath);
sl@0	767	CleanupStack::PopAndDestroy(dotSegmentsPath);
sl@0	768	}
sl@0	769	}
sl@0	770
sl@0	771	/**
sl@0	772	Performs Remove_dot_segments algorithm as specifed in section 5.2.4 of RFC3986.
sl@0	773
sl@0	774	@param aUriInputPath It is an in-out parameter. aUriInputPath is a pointer to the
sl@0	775	path descriptor to be normalised for extraneous dot_segments and returns with
sl@0	776	normalised dot_segments.
sl@0	777	@leave KErrNoMemory
sl@0	778	*/
sl@0	779	void RemoveExtraneousDotSegmentsL(HBufC8* aUriInputPath)
sl@0	780	{
sl@0	781	TPtr8 uriPathBuf(aUriInputPath->Des());
sl@0	782	TInt length = uriPathBuf.Length();
sl@0	783	HBufC8* path = HBufC8::NewLC(length);
sl@0	784	TPtr8 transitionalBuf(path->Des());
sl@0	785
sl@0	786	while(length > 0)
sl@0	787	{
sl@0	788	//step a of section 5.2.4 of RFC 3986
sl@0	789	if(length >= KDotDotSlashLength &&
sl@0	790	KDotDotSlash().Compare(uriPathBuf.Mid(0, KDotDotSlashLength)) == 0 )
sl@0	791	{
sl@0	792	uriPathBuf.Delete(0,KDotDotSlashLength);
sl@0	793	}
sl@0	794	//step a of section 5.2.4 of RFC 3986
sl@0	795	else if(length >= KDotDotLength &&
sl@0	796	KDotSlash().Compare(uriPathBuf.Mid(0, KDotDotLength)) == 0)
sl@0	797	{
sl@0	798	uriPathBuf.Delete(0,KDotDotLength);
sl@0	799	}
sl@0	800	//step b of section 5.2.4 of RFC 3986
sl@0	801	else if(length >= KDotDotSlashLength &&
sl@0	802	KSlashDotSlash().Compare(uriPathBuf.Mid(0, KDotDotSlashLength)) == 0)
sl@0	803	{
sl@0	804	uriPathBuf.Replace(0, KDotDotSlashLength, KSlash);
sl@0	805	}
sl@0	806	//step c of section 5.2.4 of RFC 3986
sl@0	807	else if(length >= KSlashDotDotSlashLength &&
sl@0	808	KSlashDotDotSlash().Compare(uriPathBuf.Mid(0, KSlashDotDotSlashLength)) == 0)
sl@0	809	{
sl@0	810	updateStrings(uriPathBuf, transitionalBuf, KSlashDotDotSlashLength);
sl@0	811	}
sl@0	812	//step c of section 5.2.4 of RFC 3986 --complete path segment
sl@0	813	else if(length == KDotDotSlashLength &&
sl@0	814	KSlashDotDot().Compare(uriPathBuf.Mid(0, KDotDotSlashLength)) == 0)
sl@0	815	{
sl@0	816	updateStrings(uriPathBuf, transitionalBuf, KDotDotSlashLength);
sl@0	817	}
sl@0	818	//step b of section 5.2.4 of RFC 3986--complete path segment
sl@0	819	else if(length == KDotDotLength &&
sl@0	820	KSlashDot().Compare(uriPathBuf.Mid(0, KDotDotLength)) == 0)
sl@0	821	{
sl@0	822	uriPathBuf.Replace(0, KDotDotLength, KSlash);
sl@0	823	}
sl@0	824	//step d of section 5.2.4 of RFC 3986
sl@0	825	else if(length == KDotDotLength &&
sl@0	826	KDotDot().Compare(uriPathBuf.Mid(0)) == 0)
sl@0	827	{
sl@0	828	uriPathBuf.Delete(0,KDotDotLength);
sl@0	829	}
sl@0	830	//step d of section 5.2.4 of RFC 3986
sl@0	831	else if(length == KDotLength &&
sl@0	832	KDot().Compare(uriPathBuf.Mid(0)) == 0)
sl@0	833	{
sl@0	834	uriPathBuf.Delete(0,KDotLength);
sl@0	835	}
sl@0	836	//step e of section 5.2.4 of RFC 3986
sl@0	837	else
sl@0	838	{
sl@0	839	//get the first path segment including initial / (if any)from uriPathBuf
sl@0	840	// till next slash (but not including next slash)..append it to the output Buf
sl@0	841	TInt substrLength;
sl@0	842	TInt nextSlashPos = uriPathBuf.Find(KSlash);
sl@0	843	if(nextSlashPos == 0 && length > KDotLength)
sl@0	844	//replace with locate next
sl@0	845	{
sl@0	846	nextSlashPos = uriPathBuf.Mid(1).Find(KSlash);
sl@0	847	if(nextSlashPos != KErrNotFound)
sl@0	848	{
sl@0	849	++nextSlashPos;
sl@0	850	}
sl@0	851	}
sl@0	852	if(length == KDotLength)
sl@0	853	//only '/' is exist
sl@0	854	{
sl@0	855	substrLength = length;
sl@0	856	}
sl@0	857	else
sl@0	858	{
sl@0	859	substrLength = nextSlashPos == KErrNotFound ? length : nextSlashPos ;
sl@0	860	}
sl@0	861	transitionalBuf.Append(uriPathBuf.Mid(0,substrLength));
sl@0	862	uriPathBuf.Delete(0,substrLength);
sl@0	863	}
sl@0	864	length = uriPathBuf.Length();
sl@0	865	}
sl@0	866	uriPathBuf.Copy(transitionalBuf);
sl@0	867	CleanupStack::PopAndDestroy(path);
sl@0	868	}
sl@0	869
sl@0	870	/**
sl@0	871	Updates the strings specified in step c of section 5.2.4 of RFC 3986
sl@0	872
sl@0	873	@param aInputBuf A reference to the inputBuf needs to be modified
sl@0	874	@param aOutPutBuf A reference to the outPutBuf needs to be modified
sl@0	875	@param aLength length of the string to be replaced.
sl@0	876	*/
sl@0	877	void updateStrings(TPtr8& aInputBuf, TPtr8& aOutPutBuf, TInt aLength)
sl@0	878	{
sl@0	879	aInputBuf.Replace(0,aLength,KSlash);
sl@0	880
sl@0	881	//In outPutBuf to remove the last segment starting with / (if exist)
sl@0	882	//eg: /abc/def/fgh --> /abc/def
sl@0	883	TInt outputBufLength = aOutPutBuf.Length();
sl@0	884	TInt pos = aOutPutBuf.LocateReverse('/');
sl@0	885	//remove the last segment including '/'
sl@0	886	pos != KErrNotFound ? aOutPutBuf.Delete( pos, outputBufLength - pos ) : aOutPutBuf.Delete( 0,outputBufLength );
sl@0	887	}
sl@0	888

author	sl
	Tue, 10 Jun 2014 14:32:02 +0200
changeset 1	260cb5ec6c19
permissions	-rw-r--r--