os/ossrv/genericservices/httputils/UriUtils/UriUtils.cpp
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
sl@0
     1
// Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
sl@0
     2
// All rights reserved.
sl@0
     3
// This component and the accompanying materials are made available
sl@0
     4
// under the terms of "Eclipse Public License v1.0"
sl@0
     5
// which accompanies this distribution, and is available
sl@0
     6
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
sl@0
     7
//
sl@0
     8
// Initial Contributors:
sl@0
     9
// Nokia Corporation - initial contribution.
sl@0
    10
//
sl@0
    11
// Contributors:
sl@0
    12
//
sl@0
    13
// Description:
sl@0
    14
//
sl@0
    15
sl@0
    16
#include <uriutils.h>
sl@0
    17
#include <uriutilscommon.h>
sl@0
    18
#include "UriUtilsInternal.h"
sl@0
    19
#include <escapeutils.h>
sl@0
    20
sl@0
    21
_LIT8(KDot, ".");
sl@0
    22
_LIT8(KDotDot, "..");
sl@0
    23
_LIT8(KDotSlash, "./");
sl@0
    24
_LIT8(KDotDotSlash, "../");
sl@0
    25
_LIT8(KSlash, "/");	
sl@0
    26
_LIT8(KSlashDot, "/.");
sl@0
    27
_LIT8(KSlashDotDot, "/..");
sl@0
    28
_LIT8(KSlashDotSlash, "/./");
sl@0
    29
_LIT8(KSlashDotDotSlash, "/../");
sl@0
    30
sl@0
    31
_LIT(KHexDigit, "0123456789ABCDEF");
sl@0
    32
_LIT(KUnreserved, "-.~_");
sl@0
    33
#ifdef _DEBUG
sl@0
    34
_LIT(KNormalisationUriPanicCategory, "URI-NORMALIZATION");
sl@0
    35
#endif
sl@0
    36
const TInt KEscapeIndicator					= '%';
sl@0
    37
const TInt KEscapeTripleLength				= 3;
sl@0
    38
const TInt KEscDelimiterPos					= 0;
sl@0
    39
const TInt KMostSignificantNibblePos		= 1;
sl@0
    40
const TInt KLeastSignificantNibblePos		= 2;
sl@0
    41
const TInt KSubstringLength					= 3;
sl@0
    42
const TInt KUpdateLength					= 2;
sl@0
    43
const TInt KAttachLength					= 1;
sl@0
    44
sl@0
    45
const TInt KDotLength						= 1;
sl@0
    46
const TInt KDotDotLength					= 2;
sl@0
    47
const TInt KDotDotSlashLength				= 3;
sl@0
    48
const TInt KSlashDotDotSlashLength			= 4;
sl@0
    49
sl@0
    50
//
sl@0
    51
//
sl@0
    52
// Implementation of UriUtils
sl@0
    53
//
sl@0
    54
//
sl@0
    55
sl@0
    56
/**
sl@0
    57
	Converts a 16-bit format uri into its internet form. Any Unicode characters 
sl@0
    58
	are converted into Utf8 representation and then any excluded characters are 
sl@0
    59
	escape encoded.  Reserved characters specified in RFC2396 will not be escape 
sl@0
    60
	encoded however, these include ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ",". 
sl@0
    61
	For example http://localhost will not be encoded to http%3A%2F%2Flocalhost.
sl@0
    62
	
sl@0
    63
	@since			6.0
sl@0
    64
	@deprecated	 Deprecated in 9.1
sl@0
    65
	@leave			KUriUtilsCannotConvert. When the input data cannot be converted.
sl@0
    66
	@leave			KUriUtilsErr16BitChar. When the input data has a 16-Bit character to be escape encoded.
sl@0
    67
	@param			aUri	The 16-bit format uri.
sl@0
    68
	@return			A pointer to a newly created 8-bit uri.
sl@0
    69
 */
sl@0
    70
EXPORT_C CUri8* UriUtils::ConvertToInternetFormL(const TUriC16& aUri)
sl@0
    71
	{
sl@0
    72
	// Need to convert to utf8
sl@0
    73
	HBufC8* utf8Buf = EscapeUtils::ConvertFromUnicodeToUtf8L(aUri.UriDes());
sl@0
    74
	CleanupStack::PushL(utf8Buf);
sl@0
    75
sl@0
    76
	// Ok need to parse for the uri without the fragment
sl@0
    77
	TUriParser8 parser;
sl@0
    78
	parser.Parse(*utf8Buf);
sl@0
    79
	TPtrC8 uriNoFragment;
sl@0
    80
	parser.UriWithoutFragment(uriNoFragment);
sl@0
    81
sl@0
    82
	// Now escape encode the uri without the fragment
sl@0
    83
	HBufC8* escapedBuf = EscapeUtils::EscapeEncodeL(uriNoFragment, EscapeUtils::EEscapeNormal);
sl@0
    84
	CleanupStack::PushL(escapedBuf);
sl@0
    85
sl@0
    86
	// Now escape encode the fragment if there is one...
sl@0
    87
	HBufC8* escapedFragmentBuf = NULL;
sl@0
    88
	if( parser.IsPresent(EUriFragment) )
sl@0
    89
		{
sl@0
    90
		escapedFragmentBuf = EscapeUtils::EscapeEncodeL(parser.Extract(EUriFragment), EscapeUtils::EEscapeNormal);
sl@0
    91
		CleanupStack::PushL(escapedFragmentBuf);
sl@0
    92
		}
sl@0
    93
sl@0
    94
	// Parse and then create the CUri8 object
sl@0
    95
	parser.Parse(*escapedBuf);
sl@0
    96
	CUri8* netForm = CUri8::NewL(parser);
sl@0
    97
sl@0
    98
	// Set the fragment if there was one...
sl@0
    99
	if( escapedFragmentBuf != NULL )
sl@0
   100
		{
sl@0
   101
		CleanupStack::PushL(netForm);
sl@0
   102
		netForm->SetComponentL(*escapedFragmentBuf, EUriFragment);
sl@0
   103
		CleanupStack::Pop(netForm);
sl@0
   104
		CleanupStack::PopAndDestroy(escapedFragmentBuf);
sl@0
   105
		}
sl@0
   106
sl@0
   107
	// Cleanup and return
sl@0
   108
	CleanupStack::PopAndDestroy(2, utf8Buf);	// utf8Buf, escapedBuf
sl@0
   109
	return netForm;
sl@0
   110
	}
sl@0
   111
sl@0
   112
/**
sl@0
   113
	Converts an 8-bit format uri its into display form. Any escape tripes are decoded and 
sl@0
   114
	sets of Utf8 format characters are converted into Unicode.
sl@0
   115
	
sl@0
   116
	@since			6.0
sl@0
   117
	@deprecated	 Deprecated in 9.1	
sl@0
   118
	@leave			KUriUtilsCannotConvert. When the input data cannot be converted.
sl@0
   119
	@param			aUri	The 8-bit format uri.
sl@0
   120
	@return			A pointer to a newly created 16-bit uri.
sl@0
   121
 */
sl@0
   122
EXPORT_C CUri16* UriUtils::ConvertToDisplayFormL(const TUriC8& aUri)
sl@0
   123
	{
sl@0
   124
	// Need decode escape triples
sl@0
   125
	HBufC8* unescapedBuf = EscapeUtils::EscapeDecodeL(aUri.UriDes());
sl@0
   126
	CleanupStack::PushL(unescapedBuf);
sl@0
   127
sl@0
   128
	// Now need to convert utf8 to unicode
sl@0
   129
	HBufC16* utf8Buf = EscapeUtils::ConvertToUnicodeFromUtf8L(*unescapedBuf);
sl@0
   130
	CleanupStack::PushL(utf8Buf);
sl@0
   131
sl@0
   132
	// Parse and then create the CUri16 object
sl@0
   133
	TUriParser16 parser;
sl@0
   134
	parser.Parse(*utf8Buf);
sl@0
   135
	CUri16* displayForm = CUri16::NewL(parser);
sl@0
   136
sl@0
   137
	// Cleanup and return
sl@0
   138
	CleanupStack::PopAndDestroy(2, unescapedBuf);	// unescapedBuf, utf8Buf
sl@0
   139
	return displayForm;
sl@0
   140
	}
sl@0
   141
	
sl@0
   142
/** 
sl@0
   143
	Create a new CUri8 object from a Unicode descriptor.
sl@0
   144
	
sl@0
   145
	@param aUri a Unicode string containing the URI to parse.
sl@0
   146
	@return the new CUri8 object
sl@0
   147
	@leave EUriUtilsParserErrInvalidUri  if the descriptor is an invalid URI.
sl@0
   148
 */
sl@0
   149
EXPORT_C CUri8* UriUtils::CreateUriL(const TDesC& aUri) 
sl@0
   150
	{
sl@0
   151
	// convert to UTF8
sl@0
   152
	HBufC8* unsafe8 = EscapeUtils::ConvertFromUnicodeToUtf8L(aUri);
sl@0
   153
     	CleanupStack::PushL(unsafe8);
sl@0
   154
	// escape encode only those characters that cannot be in a URI. assume all %hh are %encoded already
sl@0
   155
	HBufC8* uri8desc = EscapeUtils::ReEscapeEncodeL(*unsafe8);
sl@0
   156
	CleanupStack::PopAndDestroy(unsafe8);
sl@0
   157
     	CleanupStack::PushL(uri8desc);
sl@0
   158
	TUriParser8 parser;
sl@0
   159
	// parse the descriptor into a URI, Leave if it cannot be parsed
sl@0
   160
	User::LeaveIfError( parser.Parse(*uri8desc) );
sl@0
   161
sl@0
   162
	CUri8* uri8 = CUri8::NewL(parser);
sl@0
   163
	CleanupStack::PopAndDestroy(uri8desc);
sl@0
   164
	return uri8;
sl@0
   165
	}
sl@0
   166
sl@0
   167
/** 
sl@0
   168
	Create a new CAuthority8 object from a Unicode descriptor.
sl@0
   169
	
sl@0
   170
	@param aAuthority a Unicode string containing the Authority to parse.
sl@0
   171
	@return the new CAuthority8 object
sl@0
   172
	@leave EUriUtilsParserErrInvalidUri  if the descriptor is an invalid Authority.
sl@0
   173
 */
sl@0
   174
EXPORT_C CAuthority8* UriUtils::CreateAuthorityL(const TDesC& aAuthority) 
sl@0
   175
	{
sl@0
   176
	// convert to UTF8
sl@0
   177
	HBufC8* unsafe8 = EscapeUtils::ConvertFromUnicodeToUtf8L(aAuthority);
sl@0
   178
     	CleanupStack::PushL(unsafe8);
sl@0
   179
	// escape encode only those characters that cannot be in the authority. assume all %s are %encoded already
sl@0
   180
	HBufC8* authority8desc = EscapeUtils::ReEscapeEncodeL(*unsafe8);
sl@0
   181
	CleanupStack::PopAndDestroy(unsafe8);
sl@0
   182
     	CleanupStack::PushL(authority8desc);
sl@0
   183
	TAuthorityParser8 parser;
sl@0
   184
	// parse the descriptor into the authority, Leave if it cannot be parsed
sl@0
   185
	User::LeaveIfError( parser.Parse(*authority8desc) );
sl@0
   186
sl@0
   187
	CAuthority8* authority8 = CAuthority8::NewL(parser);
sl@0
   188
	CleanupStack::PopAndDestroy(authority8desc);
sl@0
   189
	return authority8;
sl@0
   190
	}
sl@0
   191
sl@0
   192
/**
sl@0
   193
	Checks a descriptor for excluded (invalid) characters. Excluded characters include all 
sl@0
   194
	control characters (values 0x00 to 0x1F and greater than 0x7F), space (0x20), delimiter 
sl@0
   195
	characters ('<', '>', '#', '%',	'"') and unwise characters ('{', '}', '|', '\', '^', '[', ']', '`').
sl@0
   196
	
sl@0
   197
	@since			6.0
sl@0
   198
	@param			aData	The descriptor to be checked.
sl@0
   199
	@return			A boolean value of ETrue if the descriptor contains invalid
sl@0
   200
	characters, otherwise EFalse.
sl@0
   201
 */
sl@0
   202
EXPORT_C TBool UriUtils::HasInvalidChars(const TDesC8& aData)
sl@0
   203
	{
sl@0
   204
	return CheckForExcludedChars(aData);
sl@0
   205
	}
sl@0
   206
	
sl@0
   207
/**
sl@0
   208
	Checks a descriptor for excluded (invalid) characters. Excluded characters include all 
sl@0
   209
	control characters (values 0x00 to 0x1F and	greater than 0x7F), space (0x20), delimiter 
sl@0
   210
	characters ('<', '>', '#', '%','"') and unwise characters ('{', '}', '|', '\', '^', '[', ']', '`').
sl@0
   211
	
sl@0
   212
	@since			6.0
sl@0
   213
	@param			aData	The descriptor to be checked.
sl@0
   214
	@return			A boolean value of ETrue if the descriptor contains invalid
sl@0
   215
	characters, otherwise EFalse.
sl@0
   216
 */
sl@0
   217
EXPORT_C TBool UriUtils::HasInvalidChars(const TDesC16& aData)
sl@0
   218
	{
sl@0
   219
	return CheckForExcludedChars(aData);
sl@0
   220
	}
sl@0
   221
sl@0
   222
/**
sl@0
   223
	Checks the supplied host for an IPv4, IPv6 or text format host
sl@0
   224
	
sl@0
   225
	@since			7.0
sl@0
   226
	@param			aHost	The descriptor containing the host to check
sl@0
   227
	@return			A TUriHostType enum of either EIPv6, EIPv4, EText or EUnknown
sl@0
   228
 */
sl@0
   229
EXPORT_C UriUtils::TUriHostType UriUtils::HostType(const TDesC8& aHost)
sl@0
   230
	{
sl@0
   231
	return CheckHostType(aHost);
sl@0
   232
	}
sl@0
   233
sl@0
   234
/**
sl@0
   235
	Checks the supplied host for an IPv4, IPv6 or text format host
sl@0
   236
	
sl@0
   237
	@since			7.0
sl@0
   238
	@param			aHost	The descriptor containing the host to check
sl@0
   239
	@return			A TUriHostType enum of either EIPv6, EIPv4, EText or EUnknown
sl@0
   240
 */
sl@0
   241
EXPORT_C UriUtils::TUriHostType UriUtils::HostType(const TDesC16& aHost)
sl@0
   242
	{
sl@0
   243
	return CheckHostType(aHost);
sl@0
   244
	}
sl@0
   245
sl@0
   246
sl@0
   247
sl@0
   248
//
sl@0
   249
//
sl@0
   250
// Implementation of component internal functions
sl@0
   251
//
sl@0
   252
//
sl@0
   253
sl@0
   254
/**
sl@0
   255
	@internalComponent
sl@0
   256
sl@0
   257
	Checks whether the given scheme is a network scheme or not
sl@0
   258
	
sl@0
   259
	@param aScheme The descriptor with the scheme.
sl@0
   260
	@return A boolean value of EFalse if the scheme is SIP.	For all other schemes returns ETrue.
sl@0
   261
 */
sl@0
   262
TBool IsNetworkScheme(const TDesC8& aScheme)
sl@0
   263
	{
sl@0
   264
	TUriSchemeType scheme = SchemeType(aScheme);
sl@0
   265
	if (scheme == ESchemeTypeSip)
sl@0
   266
		{
sl@0
   267
		return EFalse;
sl@0
   268
		}
sl@0
   269
	return ETrue;
sl@0
   270
	}
sl@0
   271
sl@0
   272
/**
sl@0
   273
	@internalComponent
sl@0
   274
sl@0
   275
	Checks whether the given scheme is a network scheme or not
sl@0
   276
	
sl@0
   277
	@param aScheme The descriptor with the scheme.
sl@0
   278
	@return A boolean value of EFalse if the scheme is SIP.	For all other schemes returns ETrue.
sl@0
   279
 */
sl@0
   280
TBool IsNetworkScheme(const TDesC16& aScheme)
sl@0
   281
	{
sl@0
   282
	TUriSchemeType scheme = SchemeType(aScheme);
sl@0
   283
	if (scheme == ESchemeTypeSip)
sl@0
   284
		{
sl@0
   285
		return EFalse;
sl@0
   286
		}
sl@0
   287
	return ETrue;
sl@0
   288
	}
sl@0
   289
sl@0
   290
/**
sl@0
   291
	@internalComponent
sl@0
   292
sl@0
   293
	Returns the type of the URIs scheme
sl@0
   294
	
sl@0
   295
	@param			aScheme	The descriptor with the scheme.
sl@0
   296
	@return			The scheme type
sl@0
   297
 */
sl@0
   298
TUriSchemeType SchemeType(const TDesC8& aScheme)
sl@0
   299
	{
sl@0
   300
	// Compares the scheme with both sip and sips
sl@0
   301
	if (aScheme.CompareF(KSipScheme8()) == 0 || aScheme.CompareF(KSipsScheme8()) == 0)
sl@0
   302
		{
sl@0
   303
		// there's a match so this is a sip scheme
sl@0
   304
		return ESchemeTypeSip;
sl@0
   305
		}
sl@0
   306
	//Compares the scheme with tel 
sl@0
   307
	else if (aScheme.CompareF(KTelScheme8()) == 0) 
sl@0
   308
		{
sl@0
   309
		return ESchemeTypeTel;
sl@0
   310
		} 
sl@0
   311
sl@0
   312
	return ESchemeTypeUnknown;
sl@0
   313
	}
sl@0
   314
sl@0
   315
/**
sl@0
   316
	@internalComponent
sl@0
   317
sl@0
   318
	Returns the type of the URIs scheme
sl@0
   319
	
sl@0
   320
	@param			aScheme	The descriptor with the scheme.
sl@0
   321
	@return			The scheme type
sl@0
   322
 */
sl@0
   323
TUriSchemeType SchemeType(const TDesC16& aScheme)
sl@0
   324
	{
sl@0
   325
	// Compares the scheme with both sip and sips
sl@0
   326
	if (aScheme.CompareF(KSipScheme()) == 0 || aScheme.CompareF(KSipsScheme()) == 0)
sl@0
   327
		{
sl@0
   328
		// there's a match so this is a sip scheme
sl@0
   329
		return ESchemeTypeSip;
sl@0
   330
		}
sl@0
   331
sl@0
   332
	return ESchemeTypeUnknown;
sl@0
   333
	}
sl@0
   334
sl@0
   335
/**
sl@0
   336
	@internalComponent
sl@0
   337
sl@0
   338
	Checks that a text host is in a valid form
sl@0
   339
	
sl@0
   340
	@param			aHost	The descriptor containing the host to check
sl@0
   341
	@return			ETrue if the host is valid otherwise EFalse
sl@0
   342
 */
sl@0
   343
TBool IsTextHostValid(const TDesC8& aHost)
sl@0
   344
	{
sl@0
   345
	return CheckValidTextHost(aHost);
sl@0
   346
	}
sl@0
   347
sl@0
   348
/**
sl@0
   349
	@internalComponent
sl@0
   350
sl@0
   351
	Checks that a text host is in a valid form
sl@0
   352
	
sl@0
   353
	@param			aHost	The descriptor containing the host to check
sl@0
   354
	@return			ETrue if the host is valid otherwise EFalse
sl@0
   355
 */
sl@0
   356
TBool IsTextHostValid(const TDesC16& aHost)
sl@0
   357
	{
sl@0
   358
	return CheckValidTextHost(aHost);
sl@0
   359
	}
sl@0
   360
sl@0
   361
sl@0
   362
/**
sl@0
   363
	@internalComponent
sl@0
   364
sl@0
   365
	Parses a segment of the form name=value and returns the name and value parts
sl@0
   366
	
sl@0
   367
	@param			aSegment	the name-value segemnt to parse
sl@0
   368
	@param			aName		the name part that is returned
sl@0
   369
	@param			aValue		the value part that is returned
sl@0
   370
 */
sl@0
   371
void GetNameValuePair(const TDesC8& aSegment, TPtrC8& aName, TPtrC8& aValue)
sl@0
   372
	{
sl@0
   373
	TPtrC8 value;
sl@0
   374
	TInt sepPos = aSegment.Locate(KEqualsSeparator);
sl@0
   375
	if (sepPos != KErrNotFound)
sl@0
   376
		{
sl@0
   377
		aName.Set(aSegment.Left(sepPos));
sl@0
   378
		value.Set(aSegment.Mid(sepPos+1));
sl@0
   379
		}
sl@0
   380
	else
sl@0
   381
		{
sl@0
   382
		aName.Set(aSegment);
sl@0
   383
		}
sl@0
   384
sl@0
   385
	aValue.Set(value);
sl@0
   386
	}
sl@0
   387
sl@0
   388
sl@0
   389
//
sl@0
   390
//
sl@0
   391
// Implementation of LOCAL functions
sl@0
   392
//
sl@0
   393
//
sl@0
   394
sl@0
   395
/**
sl@0
   396
	Checks the descriptor for any excluded characters. These are characters that 
sl@0
   397
	should have been escaped encoded or ocnverted to Utf8 from Unicode.
sl@0
   398
						
sl@0
   399
	@since			6.0
sl@0
   400
	@param			aData	The descriptor to be checked.
sl@0
   401
	@return		A boolean value of ETrue if the descriptor contains excluded
sl@0
   402
				characters, EFalse if it does not.
sl@0
   403
 */
sl@0
   404
template<class TDesCType>
sl@0
   405
LOCAL_C TBool CheckForExcludedChars(const TDesCType& aData)
sl@0
   406
	{
sl@0
   407
	// Run through the descriptor
sl@0
   408
	TBool valid = ETrue;
sl@0
   409
	const TInt length = aData.Length();
sl@0
   410
	TInt i=0;
sl@0
   411
	while( valid && i<length )
sl@0
   412
		{
sl@0
   413
		TInt notUsed;
sl@0
   414
		// See if the character is an excluded one, or is part of an escape triple...
sl@0
   415
		if( EscapeUtils::IsExcludedChar(aData[i]) && !EscapeUtils::IsEscapeTriple(aData.Mid(i), notUsed) )
sl@0
   416
			{
sl@0
   417
			valid = EFalse;
sl@0
   418
			}
sl@0
   419
		else
sl@0
   420
			{
sl@0
   421
			++i;
sl@0
   422
			}
sl@0
   423
		}
sl@0
   424
	return !valid;
sl@0
   425
	}
sl@0
   426
sl@0
   427
/**
sl@0
   428
	Checks the supplied host for an IPv4, IPv6 or text format host
sl@0
   429
	
sl@0
   430
	@since			7.0
sl@0
   431
	@param			aHost	The descriptor containing the host to check
sl@0
   432
	@return			A TUriHostType enum of either EIPv6, EIPv4, EText or EUnknown
sl@0
   433
 */
sl@0
   434
template<class TDesCType>
sl@0
   435
LOCAL_C UriUtils::TUriHostType CheckHostType(const TDesCType& aHost)
sl@0
   436
	{
sl@0
   437
	UriUtils::TUriHostType hostType;
sl@0
   438
sl@0
   439
	TInt dotCount=0;
sl@0
   440
	TBool colonPresent=EFalse;
sl@0
   441
	TBool numeric=ETrue;
sl@0
   442
sl@0
   443
	TInt len = aHost.Length();
sl@0
   444
	for (TInt ii=0; ii < len && !colonPresent; ++ii)
sl@0
   445
		{
sl@0
   446
		TChar ch(aHost[ii]);
sl@0
   447
sl@0
   448
		// host contains a character that is not '0'..'9' or '.'
sl@0
   449
		if ((ch < 48 || ch > 57) && ch != 46)
sl@0
   450
			numeric=EFalse;
sl@0
   451
sl@0
   452
		// need to check that IPv4 address has the 3 dots
sl@0
   453
		if (ch == 46)
sl@0
   454
			++dotCount;
sl@0
   455
		else
sl@0
   456
			if (ch == 58)
sl@0
   457
				colonPresent=ETrue;
sl@0
   458
		}
sl@0
   459
sl@0
   460
	if (colonPresent) // if theres a colon, it has to be an IPv6 address
sl@0
   461
		hostType = UriUtils::EIPv6Host;
sl@0
   462
	else
sl@0
   463
		if (numeric  && (dotCount==3)) // if its numeric only, and has three seperators...
sl@0
   464
			hostType = UriUtils::EIPv4Host;
sl@0
   465
		else
sl@0
   466
			hostType = UriUtils::ETextHost;
sl@0
   467
sl@0
   468
	return hostType;
sl@0
   469
	}
sl@0
   470
sl@0
   471
/**
sl@0
   472
	@internalComponent
sl@0
   473
sl@0
   474
	Checks that a text host is in a valid form
sl@0
   475
	
sl@0
   476
	@param			aHost	The descriptor containing the host to check
sl@0
   477
	@return			ETrue if the host is valid otherwise EFalse
sl@0
   478
 */
sl@0
   479
template<class TDesCType>
sl@0
   480
LOCAL_C TBool CheckValidTextHost(const TDesCType& aHost)
sl@0
   481
	{
sl@0
   482
	TInt len = aHost.Length();
sl@0
   483
	if (len == 0)
sl@0
   484
		return EFalse;
sl@0
   485
sl@0
   486
	// host name can't start with a dot or dash
sl@0
   487
	TChar firstChar(aHost[0]);
sl@0
   488
	if (firstChar == '-' || firstChar == '.')
sl@0
   489
		return EFalse;
sl@0
   490
sl@0
   491
	TChar prev = '\0';
sl@0
   492
	TInt ii;
sl@0
   493
	for (ii=0; ii < len; ii++)
sl@0
   494
		{
sl@0
   495
		TChar ch(aHost[ii]);
sl@0
   496
sl@0
   497
		// Valid characters are a-z, 0-9, '-' and '.'
sl@0
   498
		if ((ch < 'A' || ch > 'Z') && (ch < 'a' || ch > 'z') && (ch < '0' || ch > '9') && ch != '-' && ch != '.')
sl@0
   499
			{
sl@0
   500
			return EFalse;
sl@0
   501
			}
sl@0
   502
sl@0
   503
		// dot is the section separator. Check the previous section is not empty
sl@0
   504
		if (ch == '.' && prev == '.')
sl@0
   505
			{
sl@0
   506
			// can't have an empty section
sl@0
   507
			return EFalse;
sl@0
   508
			}
sl@0
   509
			prev = ch;
sl@0
   510
		}
sl@0
   511
sl@0
   512
	// host name can't end with a dot or dash
sl@0
   513
	if (prev == '-' || prev == '.')
sl@0
   514
		return EFalse;
sl@0
   515
sl@0
   516
	return ETrue;
sl@0
   517
	}
sl@0
   518
sl@0
   519
/**
sl@0
   520
	Supports Syntax-Based Normalization as specifed in section 6.2.2 of RFC3986.
sl@0
   521
	returns a new CUri8 object containing a normalised URI from a parsed URI object.
sl@0
   522
	
sl@0
   523
	@param aUri	A reference to a parsed uri object.
sl@0
   524
	@return A pointer to a CUri8 object containing normalised URI.
sl@0
   525
	@leave KErrNoMemory
sl@0
   526
	@internalAll
sl@0
   527
 */
sl@0
   528
EXPORT_C CUri8* UriUtils:: NormaliseUriL(const TUriC8& aUri)	
sl@0
   529
	{
sl@0
   530
	CUri8* normalisedUri = CUri8::NewLC(aUri);
sl@0
   531
	PercentEncodeL(normalisedUri); 
sl@0
   532
	CaseNormaliseL(normalisedUri);
sl@0
   533
	RemoveDotSegmentsL(normalisedUri);
sl@0
   534
	CleanupStack::Pop(normalisedUri);
sl@0
   535
	return normalisedUri;
sl@0
   536
	}
sl@0
   537
	
sl@0
   538
/**
sl@0
   539
	Performs Case Normalization for CUri8 object as specified 
sl@0
   540
	in section 6.2.2.1 of RFC3986.
sl@0
   541
	
sl@0
   542
	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer 
sl@0
   543
	to CUri8 object with an uri needs to be case normalised and returns with 
sl@0
   544
	case normalised.
sl@0
   545
	@leave KErrNoMemory
sl@0
   546
 */
sl@0
   547
void CaseNormaliseL(CUri8* aNormalisedUri )	
sl@0
   548
	{
sl@0
   549
	//Case normalise the scheme
sl@0
   550
	DoCaseNormaliseL(aNormalisedUri, EUriScheme);	
sl@0
   551
	//Case normalise the Userinfo
sl@0
   552
	DoCaseNormaliseL(aNormalisedUri, EUriUserinfo);	
sl@0
   553
	//Case normalise the Host
sl@0
   554
	DoCaseNormaliseL(aNormalisedUri, EUriHost);	
sl@0
   555
	//Case normalise the Port
sl@0
   556
	DoCaseNormaliseL(aNormalisedUri, EUriPort);	
sl@0
   557
	//Case normalise the Path
sl@0
   558
	DoCaseNormaliseL(aNormalisedUri, EUriPath);	
sl@0
   559
	//Case normalise the Query
sl@0
   560
	DoCaseNormaliseL(aNormalisedUri, EUriQuery);	
sl@0
   561
	//Case normalise the Fragment
sl@0
   562
	DoCaseNormaliseL(aNormalisedUri, EUriFragment);	
sl@0
   563
	}
sl@0
   564
sl@0
   565
/**
sl@0
   566
	Performs Case Normalization for specified sub component of URI.
sl@0
   567
	
sl@0
   568
	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer 
sl@0
   569
	to CUri8 object with an uri needs to be case normalised and returns with 
sl@0
   570
	case normalised for specified sub component.
sl@0
   571
	@param aComponent Enumeration of TUriComponent.
sl@0
   572
	@leave KErrNoMemory
sl@0
   573
*/
sl@0
   574
void DoCaseNormaliseL(CUri8* aNormalisedUri, TUriComponent aComponent)
sl@0
   575
	{
sl@0
   576
	const TUriC8& uri(aNormalisedUri->Uri());
sl@0
   577
	if(!uri.IsPresent(aComponent) )
sl@0
   578
		{
sl@0
   579
		return;
sl@0
   580
		}
sl@0
   581
	//extracts subcomponent of uri which needs to be case-normalised
sl@0
   582
	HBufC8* heapBuf = uri.Extract(aComponent).AllocLC(); 	
sl@0
   583
	TPtr8 normalisedComponent(heapBuf->Des());
sl@0
   584
	TBool normalised = EFalse;
sl@0
   585
	if(aComponent == EUriScheme || aComponent == EUriHost )
sl@0
   586
		{
sl@0
   587
		//change this component to lower case
sl@0
   588
		normalisedComponent.LowerCase();
sl@0
   589
		normalised = ETrue;
sl@0
   590
		}
sl@0
   591
	
sl@0
   592
	TInt len =  normalisedComponent.Length();
sl@0
   593
	TBuf8<KSubstringLength> subString;
sl@0
   594
	//case normalise the component
sl@0
   595
	for (TInt pos = 0; pos < len; pos++)
sl@0
   596
		{
sl@0
   597
		if (normalisedComponent[pos] == KEscapeIndicator )
sl@0
   598
			{
sl@0
   599
			__ASSERT_DEBUG( ((len-pos) >= KSubstringLength), User::Panic(KNormalisationUriPanicCategory, KUriUtilsErrBadEscapeTriple) );
sl@0
   600
			TPtrC8 componentBuf(normalisedComponent.Mid(pos,KSubstringLength));
sl@0
   601
			if (ValidateAndConvertPercentEncodedTriple(componentBuf,subString))
sl@0
   602
				{
sl@0
   603
				normalisedComponent.Replace(pos,KSubstringLength,subString);
sl@0
   604
				pos += KUpdateLength;
sl@0
   605
				normalised = ETrue;
sl@0
   606
				subString.Zero();
sl@0
   607
				}
sl@0
   608
			}
sl@0
   609
		}
sl@0
   610
sl@0
   611
	//updating the uri with normalised string
sl@0
   612
	if( normalised )
sl@0
   613
		{
sl@0
   614
		if(aComponent<EUriMaxComponents && aComponent >=EUriScheme)
sl@0
   615
		   {
sl@0
   616
		   aNormalisedUri->SetComponentL(normalisedComponent, aComponent);
sl@0
   617
		   }
sl@0
   618
		else
sl@0
   619
		   {
sl@0
   620
			User::Leave(KErrArgument);	
sl@0
   621
		   }
sl@0
   622
sl@0
   623
		}
sl@0
   624
	CleanupStack::PopAndDestroy(heapBuf);
sl@0
   625
	}
sl@0
   626
sl@0
   627
/**
sl@0
   628
	Validates and Converts the valid Percent encoded triplets to Uppercase for specified 
sl@0
   629
	sub component of URI. For eg: Converts %3a to %3A
sl@0
   630
	
sl@0
   631
	@param aData A reference to a string to be validated and converted to upper case.
sl@0
   632
	@param aCaseNormalizedData A reference to a descriptor that is converted to 
sl@0
   633
	uppercase that is to be returned.
sl@0
   634
	@return returns a bool whether it is a valid Percent encoded triplet
sl@0
   635
*/
sl@0
   636
TBool ValidateAndConvertPercentEncodedTriple(TDesC8& aData , TDes8& aCaseNormalizedData )	
sl@0
   637
	{
sl@0
   638
	// See if the descriptor is actually long enough and
sl@0
   639
	// Check that the three characters form an escape triple - first char is '%'
sl@0
   640
	if( aData.Length() < KEscapeTripleLength || aData[KEscDelimiterPos] != KEscapeIndicator )
sl@0
   641
		{
sl@0
   642
		return EFalse;//do nothing
sl@0
   643
		}
sl@0
   644
	
sl@0
   645
	// Check that next two characters are valid
sl@0
   646
	TInt mostSignificantDigitValue = KHexDigit().LocateF(aData[KMostSignificantNibblePos] );
sl@0
   647
	TInt leastSignificantDigitValue = KHexDigit().LocateF(aData[KLeastSignificantNibblePos] );
sl@0
   648
sl@0
   649
	if( mostSignificantDigitValue== KErrNotFound || leastSignificantDigitValue == KErrNotFound )
sl@0
   650
		{
sl@0
   651
		// Either of the characters were not a valid hex character
sl@0
   652
		return EFalse;
sl@0
   653
		}
sl@0
   654
	aCaseNormalizedData.Zero();
sl@0
   655
	aCaseNormalizedData.Append(KEscapeIndicator); 
sl@0
   656
	
sl@0
   657
	//Coverts most significant hex character to uppercase
sl@0
   658
	(mostSignificantDigitValue >= 0 && mostSignificantDigitValue <= 0xF) ? 
sl@0
   659
		aCaseNormalizedData.Append(KHexDigit().Mid(mostSignificantDigitValue,1)) :
sl@0
   660
		aCaseNormalizedData.Append(KHexDigit().Mid(mostSignificantDigitValue,1));
sl@0
   661
	
sl@0
   662
	//Coverts least significant hex character to uppercase
sl@0
   663
	(leastSignificantDigitValue >= 0 && leastSignificantDigitValue <= 0xF) ? 
sl@0
   664
		aCaseNormalizedData.Append(KHexDigit().Mid(leastSignificantDigitValue,1)) :
sl@0
   665
		aCaseNormalizedData.Append(aData[KLeastSignificantNibblePos]);
sl@0
   666
	
sl@0
   667
	return ETrue;
sl@0
   668
	}
sl@0
   669
sl@0
   670
/**
sl@0
   671
	Performs Percent-Encoding Normalization for CUri8 object as specifed in 
sl@0
   672
	section 6.2.2.2 of RFC3986.
sl@0
   673
	
sl@0
   674
	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer to 
sl@0
   675
	CUri8 object with an uri needs to be Percent-Encoded and returns with Percent-Encode 
sl@0
   676
	normalised form.
sl@0
   677
	@leave KErrNoMemory
sl@0
   678
 */
sl@0
   679
void  PercentEncodeL(CUri8* aNormalisedUri)
sl@0
   680
	{
sl@0
   681
	//PercentEncode the scheme
sl@0
   682
	DoPercentEncodeL(aNormalisedUri, EUriScheme);	
sl@0
   683
	//PercentEncode the Userinfo
sl@0
   684
	DoPercentEncodeL(aNormalisedUri, EUriUserinfo);	
sl@0
   685
	//PercentEncode the Host
sl@0
   686
	DoPercentEncodeL(aNormalisedUri, EUriHost);	
sl@0
   687
	//PercentEncode the Port
sl@0
   688
	DoPercentEncodeL(aNormalisedUri, EUriPort);	
sl@0
   689
	//PercentEncode the Path
sl@0
   690
	DoPercentEncodeL(aNormalisedUri, EUriPath);	
sl@0
   691
	//PercentEncode the Query
sl@0
   692
	DoPercentEncodeL(aNormalisedUri, EUriQuery);	
sl@0
   693
	//PercentEncode the Fragment
sl@0
   694
	DoPercentEncodeL(aNormalisedUri, EUriFragment);	
sl@0
   695
	}
sl@0
   696
sl@0
   697
/**
sl@0
   698
	Performs Percent-Encoding for specified sub component of URI.
sl@0
   699
	
sl@0
   700
	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer to 
sl@0
   701
	CUri8 object with an uri needs to be Percent-Encoded and returns with Percent-Encoded 
sl@0
   702
	for specified sub component.
sl@0
   703
	@param aComponent Enumeration of TUriComponent.
sl@0
   704
	@leave KErrNoMemory
sl@0
   705
*/
sl@0
   706
void DoPercentEncodeL(CUri8* aNormalisedUri, TUriComponent aComponent)
sl@0
   707
	{
sl@0
   708
	const TUriC8& uri(aNormalisedUri->Uri());
sl@0
   709
	if(!uri.IsPresent(aComponent))
sl@0
   710
		{
sl@0
   711
		return;
sl@0
   712
		}
sl@0
   713
	
sl@0
   714
	HBufC8* heapBuf = uri.Extract(aComponent).AllocLC();
sl@0
   715
	TPtr8 percentNormalisedComponent(heapBuf->Des());
sl@0
   716
	TBool normalised = EFalse;
sl@0
   717
	TInt len = percentNormalisedComponent.Length();	
sl@0
   718
	for (TInt pos = 0; pos < len; pos++)
sl@0
   719
		{
sl@0
   720
		TInt hex;
sl@0
   721
		// check for and decode '%' encoded characters
sl@0
   722
		if (percentNormalisedComponent[pos] == KEscapeIndicator && EscapeUtils::IsEscapeTriple(percentNormalisedComponent.Mid(pos, KSubstringLength), hex))
sl@0
   723
			{
sl@0
   724
			TChar replacedChar(hex);
sl@0
   725
			if( KUnreserved().LocateF(hex) != KErrNotFound || replacedChar.IsAlphaDigit() )
sl@0
   726
				{
sl@0
   727
				TBuf8<KAttachLength> subString;
sl@0
   728
				subString.Append(replacedChar);
sl@0
   729
				percentNormalisedComponent.Replace(pos, KSubstringLength, subString);
sl@0
   730
				normalised = ETrue;
sl@0
   731
				len = percentNormalisedComponent.Length();
sl@0
   732
				}
sl@0
   733
			}
sl@0
   734
		}
sl@0
   735
	if( normalised )
sl@0
   736
		{
sl@0
   737
		if(aComponent<EUriMaxComponents && aComponent >=EUriScheme)
sl@0
   738
		   {
sl@0
   739
		    aNormalisedUri->SetComponentL(percentNormalisedComponent, aComponent);
sl@0
   740
		   }
sl@0
   741
		else
sl@0
   742
		   {
sl@0
   743
			User::Leave(KErrArgument);	
sl@0
   744
		   }
sl@0
   745
sl@0
   746
		}
sl@0
   747
	CleanupStack::PopAndDestroy(heapBuf); 	
sl@0
   748
	}
sl@0
   749
sl@0
   750
/**
sl@0
   751
	Performs Path Segment Normalization for CUri8 object as specifed in 
sl@0
   752
	section 6.2.2.3 of RFC3986.
sl@0
   753
	
sl@0
   754
	@param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer to 
sl@0
   755
	CUri8 object with uri needs to be Path Segment normalised and returns with 
sl@0
   756
	Path Segment normalised form.
sl@0
   757
	@leave KErrNoMemory
sl@0
   758
 */
sl@0
   759
void  RemoveDotSegmentsL(CUri8* aNormalisedUri)
sl@0
   760
	{
sl@0
   761
	const TUriC8& uri( aNormalisedUri->Uri() );
sl@0
   762
	if(uri.IsPresent(EUriPath))
sl@0
   763
		{
sl@0
   764
		HBufC8* dotSegmentsPath = uri.Extract(EUriPath).AllocLC();
sl@0
   765
		RemoveExtraneousDotSegmentsL(dotSegmentsPath);
sl@0
   766
		aNormalisedUri->SetComponentL(*dotSegmentsPath, EUriPath);	
sl@0
   767
		CleanupStack::PopAndDestroy(dotSegmentsPath);	
sl@0
   768
		}
sl@0
   769
	}
sl@0
   770
sl@0
   771
/**
sl@0
   772
	Performs Remove_dot_segments algorithm as specifed in section 5.2.4 of RFC3986.
sl@0
   773
	
sl@0
   774
	@param aUriInputPath It is an in-out parameter. aUriInputPath is a pointer to the 
sl@0
   775
	path descriptor to be normalised for extraneous dot_segments and returns with 
sl@0
   776
	normalised dot_segments.
sl@0
   777
	@leave KErrNoMemory
sl@0
   778
*/
sl@0
   779
void RemoveExtraneousDotSegmentsL(HBufC8* aUriInputPath)
sl@0
   780
	{
sl@0
   781
	TPtr8 uriPathBuf(aUriInputPath->Des());
sl@0
   782
	TInt length = uriPathBuf.Length();	
sl@0
   783
	HBufC8* path = HBufC8::NewLC(length);
sl@0
   784
	TPtr8 transitionalBuf(path->Des());
sl@0
   785
sl@0
   786
	while(length > 0)	
sl@0
   787
		{
sl@0
   788
		//step a of section 5.2.4 of RFC 3986
sl@0
   789
		if(length >= KDotDotSlashLength && 
sl@0
   790
			KDotDotSlash().Compare(uriPathBuf.Mid(0, KDotDotSlashLength)) == 0 )
sl@0
   791
			{
sl@0
   792
			uriPathBuf.Delete(0,KDotDotSlashLength);
sl@0
   793
			}
sl@0
   794
		//step a of section 5.2.4 of RFC 3986
sl@0
   795
		else if(length >= KDotDotLength && 
sl@0
   796
				KDotSlash().Compare(uriPathBuf.Mid(0, KDotDotLength)) == 0)
sl@0
   797
			{
sl@0
   798
			uriPathBuf.Delete(0,KDotDotLength);	
sl@0
   799
			}
sl@0
   800
		//step b of section 5.2.4 of RFC 3986
sl@0
   801
		else if(length >= KDotDotSlashLength && 
sl@0
   802
				KSlashDotSlash().Compare(uriPathBuf.Mid(0, KDotDotSlashLength)) == 0)
sl@0
   803
			{
sl@0
   804
			uriPathBuf.Replace(0, KDotDotSlashLength, KSlash);
sl@0
   805
			}
sl@0
   806
		//step c of section 5.2.4 of RFC 3986
sl@0
   807
		else if(length >= KSlashDotDotSlashLength && 
sl@0
   808
				KSlashDotDotSlash().Compare(uriPathBuf.Mid(0, KSlashDotDotSlashLength)) == 0)
sl@0
   809
			{
sl@0
   810
			updateStrings(uriPathBuf, transitionalBuf, KSlashDotDotSlashLength);
sl@0
   811
			}
sl@0
   812
		//step c of section 5.2.4 of RFC 3986 --complete path segment
sl@0
   813
		else if(length == KDotDotSlashLength && 
sl@0
   814
				KSlashDotDot().Compare(uriPathBuf.Mid(0, KDotDotSlashLength)) == 0)
sl@0
   815
			{
sl@0
   816
			updateStrings(uriPathBuf, transitionalBuf, KDotDotSlashLength);
sl@0
   817
			}
sl@0
   818
		//step b of section 5.2.4 of RFC 3986--complete path segment
sl@0
   819
		else if(length == KDotDotLength && 
sl@0
   820
				KSlashDot().Compare(uriPathBuf.Mid(0, KDotDotLength)) == 0)
sl@0
   821
			{
sl@0
   822
			uriPathBuf.Replace(0, KDotDotLength, KSlash);
sl@0
   823
			}
sl@0
   824
		//step d of section 5.2.4 of RFC 3986
sl@0
   825
		else if(length == KDotDotLength && 
sl@0
   826
				KDotDot().Compare(uriPathBuf.Mid(0)) == 0)
sl@0
   827
			{
sl@0
   828
			uriPathBuf.Delete(0,KDotDotLength);	
sl@0
   829
			}
sl@0
   830
		//step d of section 5.2.4 of RFC 3986
sl@0
   831
		else if(length == KDotLength && 
sl@0
   832
				KDot().Compare(uriPathBuf.Mid(0)) == 0)
sl@0
   833
			{
sl@0
   834
			uriPathBuf.Delete(0,KDotLength);	
sl@0
   835
			}
sl@0
   836
		//step e of section 5.2.4 of RFC 3986
sl@0
   837
		else 
sl@0
   838
			{
sl@0
   839
			//get the first path segment including initial / (if any)from uriPathBuf
sl@0
   840
			// till next slash (but not including next slash)..append it to the output Buf	
sl@0
   841
			TInt substrLength;
sl@0
   842
			TInt nextSlashPos = uriPathBuf.Find(KSlash);
sl@0
   843
			if(nextSlashPos == 0 && length > KDotLength)
sl@0
   844
				//replace with locate next
sl@0
   845
				{
sl@0
   846
				nextSlashPos = uriPathBuf.Mid(1).Find(KSlash);
sl@0
   847
				if(nextSlashPos != KErrNotFound)
sl@0
   848
					{
sl@0
   849
					++nextSlashPos;
sl@0
   850
					}
sl@0
   851
				}
sl@0
   852
			if(length == KDotLength)
sl@0
   853
				//only '/' is exist
sl@0
   854
				{
sl@0
   855
				substrLength = length;	
sl@0
   856
				}
sl@0
   857
			else
sl@0
   858
				{
sl@0
   859
				substrLength = nextSlashPos == KErrNotFound ? length : nextSlashPos ;	
sl@0
   860
				}
sl@0
   861
			transitionalBuf.Append(uriPathBuf.Mid(0,substrLength));
sl@0
   862
			uriPathBuf.Delete(0,substrLength);	
sl@0
   863
			}
sl@0
   864
		length = uriPathBuf.Length();
sl@0
   865
		}
sl@0
   866
	uriPathBuf.Copy(transitionalBuf);
sl@0
   867
	CleanupStack::PopAndDestroy(path);
sl@0
   868
	}
sl@0
   869
sl@0
   870
/**
sl@0
   871
	Updates the strings specified in step c of section 5.2.4 of RFC 3986
sl@0
   872
	
sl@0
   873
	@param aInputBuf A reference to the inputBuf needs to be modified 
sl@0
   874
	@param aOutPutBuf A reference to the outPutBuf needs to be modified
sl@0
   875
	@param aLength length of the string to be replaced.
sl@0
   876
 */
sl@0
   877
void  updateStrings(TPtr8& aInputBuf, TPtr8& aOutPutBuf, TInt aLength)
sl@0
   878
	{
sl@0
   879
	aInputBuf.Replace(0,aLength,KSlash);
sl@0
   880
sl@0
   881
	//In outPutBuf to remove the last segment starting with / (if exist)
sl@0
   882
	//eg: /abc/def/fgh --> /abc/def
sl@0
   883
	TInt outputBufLength = aOutPutBuf.Length();
sl@0
   884
	TInt pos = aOutPutBuf.LocateReverse('/');	
sl@0
   885
	//remove the last segment including '/'
sl@0
   886
	pos != KErrNotFound ? aOutPutBuf.Delete( pos, outputBufLength - pos ) : aOutPutBuf.Delete( 0,outputBufLength );
sl@0
   887
	}
sl@0
   888