1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/ossrv/genericservices/httputils/UriUtils/UriUtils.cpp Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,888 @@
1.4 +// Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
1.5 +// All rights reserved.
1.6 +// This component and the accompanying materials are made available
1.7 +// under the terms of "Eclipse Public License v1.0"
1.8 +// which accompanies this distribution, and is available
1.9 +// at the URL "http://www.eclipse.org/legal/epl-v10.html".
1.10 +//
1.11 +// Initial Contributors:
1.12 +// Nokia Corporation - initial contribution.
1.13 +//
1.14 +// Contributors:
1.15 +//
1.16 +// Description:
1.17 +//
1.18 +
1.19 +#include <uriutils.h>
1.20 +#include <uriutilscommon.h>
1.21 +#include "UriUtilsInternal.h"
1.22 +#include <escapeutils.h>
1.23 +
1.24 +_LIT8(KDot, ".");
1.25 +_LIT8(KDotDot, "..");
1.26 +_LIT8(KDotSlash, "./");
1.27 +_LIT8(KDotDotSlash, "../");
1.28 +_LIT8(KSlash, "/");
1.29 +_LIT8(KSlashDot, "/.");
1.30 +_LIT8(KSlashDotDot, "/..");
1.31 +_LIT8(KSlashDotSlash, "/./");
1.32 +_LIT8(KSlashDotDotSlash, "/../");
1.33 +
1.34 +_LIT(KHexDigit, "0123456789ABCDEF");
1.35 +_LIT(KUnreserved, "-.~_");
1.36 +#ifdef _DEBUG
1.37 +_LIT(KNormalisationUriPanicCategory, "URI-NORMALIZATION");
1.38 +#endif
1.39 +const TInt KEscapeIndicator = '%';
1.40 +const TInt KEscapeTripleLength = 3;
1.41 +const TInt KEscDelimiterPos = 0;
1.42 +const TInt KMostSignificantNibblePos = 1;
1.43 +const TInt KLeastSignificantNibblePos = 2;
1.44 +const TInt KSubstringLength = 3;
1.45 +const TInt KUpdateLength = 2;
1.46 +const TInt KAttachLength = 1;
1.47 +
1.48 +const TInt KDotLength = 1;
1.49 +const TInt KDotDotLength = 2;
1.50 +const TInt KDotDotSlashLength = 3;
1.51 +const TInt KSlashDotDotSlashLength = 4;
1.52 +
1.53 +//
1.54 +//
1.55 +// Implementation of UriUtils
1.56 +//
1.57 +//
1.58 +
1.59 +/**
1.60 + Converts a 16-bit format uri into its internet form. Any Unicode characters
1.61 + are converted into Utf8 representation and then any excluded characters are
1.62 + escape encoded. Reserved characters specified in RFC2396 will not be escape
1.63 + encoded however, these include ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ",".
1.64 + For example http://localhost will not be encoded to http%3A%2F%2Flocalhost.
1.65 +
1.66 + @since 6.0
1.67 + @deprecated Deprecated in 9.1
1.68 + @leave KUriUtilsCannotConvert. When the input data cannot be converted.
1.69 + @leave KUriUtilsErr16BitChar. When the input data has a 16-Bit character to be escape encoded.
1.70 + @param aUri The 16-bit format uri.
1.71 + @return A pointer to a newly created 8-bit uri.
1.72 + */
1.73 +EXPORT_C CUri8* UriUtils::ConvertToInternetFormL(const TUriC16& aUri)
1.74 + {
1.75 + // Need to convert to utf8
1.76 + HBufC8* utf8Buf = EscapeUtils::ConvertFromUnicodeToUtf8L(aUri.UriDes());
1.77 + CleanupStack::PushL(utf8Buf);
1.78 +
1.79 + // Ok need to parse for the uri without the fragment
1.80 + TUriParser8 parser;
1.81 + parser.Parse(*utf8Buf);
1.82 + TPtrC8 uriNoFragment;
1.83 + parser.UriWithoutFragment(uriNoFragment);
1.84 +
1.85 + // Now escape encode the uri without the fragment
1.86 + HBufC8* escapedBuf = EscapeUtils::EscapeEncodeL(uriNoFragment, EscapeUtils::EEscapeNormal);
1.87 + CleanupStack::PushL(escapedBuf);
1.88 +
1.89 + // Now escape encode the fragment if there is one...
1.90 + HBufC8* escapedFragmentBuf = NULL;
1.91 + if( parser.IsPresent(EUriFragment) )
1.92 + {
1.93 + escapedFragmentBuf = EscapeUtils::EscapeEncodeL(parser.Extract(EUriFragment), EscapeUtils::EEscapeNormal);
1.94 + CleanupStack::PushL(escapedFragmentBuf);
1.95 + }
1.96 +
1.97 + // Parse and then create the CUri8 object
1.98 + parser.Parse(*escapedBuf);
1.99 + CUri8* netForm = CUri8::NewL(parser);
1.100 +
1.101 + // Set the fragment if there was one...
1.102 + if( escapedFragmentBuf != NULL )
1.103 + {
1.104 + CleanupStack::PushL(netForm);
1.105 + netForm->SetComponentL(*escapedFragmentBuf, EUriFragment);
1.106 + CleanupStack::Pop(netForm);
1.107 + CleanupStack::PopAndDestroy(escapedFragmentBuf);
1.108 + }
1.109 +
1.110 + // Cleanup and return
1.111 + CleanupStack::PopAndDestroy(2, utf8Buf); // utf8Buf, escapedBuf
1.112 + return netForm;
1.113 + }
1.114 +
1.115 +/**
1.116 + Converts an 8-bit format uri its into display form. Any escape tripes are decoded and
1.117 + sets of Utf8 format characters are converted into Unicode.
1.118 +
1.119 + @since 6.0
1.120 + @deprecated Deprecated in 9.1
1.121 + @leave KUriUtilsCannotConvert. When the input data cannot be converted.
1.122 + @param aUri The 8-bit format uri.
1.123 + @return A pointer to a newly created 16-bit uri.
1.124 + */
1.125 +EXPORT_C CUri16* UriUtils::ConvertToDisplayFormL(const TUriC8& aUri)
1.126 + {
1.127 + // Need decode escape triples
1.128 + HBufC8* unescapedBuf = EscapeUtils::EscapeDecodeL(aUri.UriDes());
1.129 + CleanupStack::PushL(unescapedBuf);
1.130 +
1.131 + // Now need to convert utf8 to unicode
1.132 + HBufC16* utf8Buf = EscapeUtils::ConvertToUnicodeFromUtf8L(*unescapedBuf);
1.133 + CleanupStack::PushL(utf8Buf);
1.134 +
1.135 + // Parse and then create the CUri16 object
1.136 + TUriParser16 parser;
1.137 + parser.Parse(*utf8Buf);
1.138 + CUri16* displayForm = CUri16::NewL(parser);
1.139 +
1.140 + // Cleanup and return
1.141 + CleanupStack::PopAndDestroy(2, unescapedBuf); // unescapedBuf, utf8Buf
1.142 + return displayForm;
1.143 + }
1.144 +
1.145 +/**
1.146 + Create a new CUri8 object from a Unicode descriptor.
1.147 +
1.148 + @param aUri a Unicode string containing the URI to parse.
1.149 + @return the new CUri8 object
1.150 + @leave EUriUtilsParserErrInvalidUri if the descriptor is an invalid URI.
1.151 + */
1.152 +EXPORT_C CUri8* UriUtils::CreateUriL(const TDesC& aUri)
1.153 + {
1.154 + // convert to UTF8
1.155 + HBufC8* unsafe8 = EscapeUtils::ConvertFromUnicodeToUtf8L(aUri);
1.156 + CleanupStack::PushL(unsafe8);
1.157 + // escape encode only those characters that cannot be in a URI. assume all %hh are %encoded already
1.158 + HBufC8* uri8desc = EscapeUtils::ReEscapeEncodeL(*unsafe8);
1.159 + CleanupStack::PopAndDestroy(unsafe8);
1.160 + CleanupStack::PushL(uri8desc);
1.161 + TUriParser8 parser;
1.162 + // parse the descriptor into a URI, Leave if it cannot be parsed
1.163 + User::LeaveIfError( parser.Parse(*uri8desc) );
1.164 +
1.165 + CUri8* uri8 = CUri8::NewL(parser);
1.166 + CleanupStack::PopAndDestroy(uri8desc);
1.167 + return uri8;
1.168 + }
1.169 +
1.170 +/**
1.171 + Create a new CAuthority8 object from a Unicode descriptor.
1.172 +
1.173 + @param aAuthority a Unicode string containing the Authority to parse.
1.174 + @return the new CAuthority8 object
1.175 + @leave EUriUtilsParserErrInvalidUri if the descriptor is an invalid Authority.
1.176 + */
1.177 +EXPORT_C CAuthority8* UriUtils::CreateAuthorityL(const TDesC& aAuthority)
1.178 + {
1.179 + // convert to UTF8
1.180 + HBufC8* unsafe8 = EscapeUtils::ConvertFromUnicodeToUtf8L(aAuthority);
1.181 + CleanupStack::PushL(unsafe8);
1.182 + // escape encode only those characters that cannot be in the authority. assume all %s are %encoded already
1.183 + HBufC8* authority8desc = EscapeUtils::ReEscapeEncodeL(*unsafe8);
1.184 + CleanupStack::PopAndDestroy(unsafe8);
1.185 + CleanupStack::PushL(authority8desc);
1.186 + TAuthorityParser8 parser;
1.187 + // parse the descriptor into the authority, Leave if it cannot be parsed
1.188 + User::LeaveIfError( parser.Parse(*authority8desc) );
1.189 +
1.190 + CAuthority8* authority8 = CAuthority8::NewL(parser);
1.191 + CleanupStack::PopAndDestroy(authority8desc);
1.192 + return authority8;
1.193 + }
1.194 +
1.195 +/**
1.196 + Checks a descriptor for excluded (invalid) characters. Excluded characters include all
1.197 + control characters (values 0x00 to 0x1F and greater than 0x7F), space (0x20), delimiter
1.198 + characters ('<', '>', '#', '%', '"') and unwise characters ('{', '}', '|', '\', '^', '[', ']', '`').
1.199 +
1.200 + @since 6.0
1.201 + @param aData The descriptor to be checked.
1.202 + @return A boolean value of ETrue if the descriptor contains invalid
1.203 + characters, otherwise EFalse.
1.204 + */
1.205 +EXPORT_C TBool UriUtils::HasInvalidChars(const TDesC8& aData)
1.206 + {
1.207 + return CheckForExcludedChars(aData);
1.208 + }
1.209 +
1.210 +/**
1.211 + Checks a descriptor for excluded (invalid) characters. Excluded characters include all
1.212 + control characters (values 0x00 to 0x1F and greater than 0x7F), space (0x20), delimiter
1.213 + characters ('<', '>', '#', '%','"') and unwise characters ('{', '}', '|', '\', '^', '[', ']', '`').
1.214 +
1.215 + @since 6.0
1.216 + @param aData The descriptor to be checked.
1.217 + @return A boolean value of ETrue if the descriptor contains invalid
1.218 + characters, otherwise EFalse.
1.219 + */
1.220 +EXPORT_C TBool UriUtils::HasInvalidChars(const TDesC16& aData)
1.221 + {
1.222 + return CheckForExcludedChars(aData);
1.223 + }
1.224 +
1.225 +/**
1.226 + Checks the supplied host for an IPv4, IPv6 or text format host
1.227 +
1.228 + @since 7.0
1.229 + @param aHost The descriptor containing the host to check
1.230 + @return A TUriHostType enum of either EIPv6, EIPv4, EText or EUnknown
1.231 + */
1.232 +EXPORT_C UriUtils::TUriHostType UriUtils::HostType(const TDesC8& aHost)
1.233 + {
1.234 + return CheckHostType(aHost);
1.235 + }
1.236 +
1.237 +/**
1.238 + Checks the supplied host for an IPv4, IPv6 or text format host
1.239 +
1.240 + @since 7.0
1.241 + @param aHost The descriptor containing the host to check
1.242 + @return A TUriHostType enum of either EIPv6, EIPv4, EText or EUnknown
1.243 + */
1.244 +EXPORT_C UriUtils::TUriHostType UriUtils::HostType(const TDesC16& aHost)
1.245 + {
1.246 + return CheckHostType(aHost);
1.247 + }
1.248 +
1.249 +
1.250 +
1.251 +//
1.252 +//
1.253 +// Implementation of component internal functions
1.254 +//
1.255 +//
1.256 +
1.257 +/**
1.258 + @internalComponent
1.259 +
1.260 + Checks whether the given scheme is a network scheme or not
1.261 +
1.262 + @param aScheme The descriptor with the scheme.
1.263 + @return A boolean value of EFalse if the scheme is SIP. For all other schemes returns ETrue.
1.264 + */
1.265 +TBool IsNetworkScheme(const TDesC8& aScheme)
1.266 + {
1.267 + TUriSchemeType scheme = SchemeType(aScheme);
1.268 + if (scheme == ESchemeTypeSip)
1.269 + {
1.270 + return EFalse;
1.271 + }
1.272 + return ETrue;
1.273 + }
1.274 +
1.275 +/**
1.276 + @internalComponent
1.277 +
1.278 + Checks whether the given scheme is a network scheme or not
1.279 +
1.280 + @param aScheme The descriptor with the scheme.
1.281 + @return A boolean value of EFalse if the scheme is SIP. For all other schemes returns ETrue.
1.282 + */
1.283 +TBool IsNetworkScheme(const TDesC16& aScheme)
1.284 + {
1.285 + TUriSchemeType scheme = SchemeType(aScheme);
1.286 + if (scheme == ESchemeTypeSip)
1.287 + {
1.288 + return EFalse;
1.289 + }
1.290 + return ETrue;
1.291 + }
1.292 +
1.293 +/**
1.294 + @internalComponent
1.295 +
1.296 + Returns the type of the URIs scheme
1.297 +
1.298 + @param aScheme The descriptor with the scheme.
1.299 + @return The scheme type
1.300 + */
1.301 +TUriSchemeType SchemeType(const TDesC8& aScheme)
1.302 + {
1.303 + // Compares the scheme with both sip and sips
1.304 + if (aScheme.CompareF(KSipScheme8()) == 0 || aScheme.CompareF(KSipsScheme8()) == 0)
1.305 + {
1.306 + // there's a match so this is a sip scheme
1.307 + return ESchemeTypeSip;
1.308 + }
1.309 + //Compares the scheme with tel
1.310 + else if (aScheme.CompareF(KTelScheme8()) == 0)
1.311 + {
1.312 + return ESchemeTypeTel;
1.313 + }
1.314 +
1.315 + return ESchemeTypeUnknown;
1.316 + }
1.317 +
1.318 +/**
1.319 + @internalComponent
1.320 +
1.321 + Returns the type of the URIs scheme
1.322 +
1.323 + @param aScheme The descriptor with the scheme.
1.324 + @return The scheme type
1.325 + */
1.326 +TUriSchemeType SchemeType(const TDesC16& aScheme)
1.327 + {
1.328 + // Compares the scheme with both sip and sips
1.329 + if (aScheme.CompareF(KSipScheme()) == 0 || aScheme.CompareF(KSipsScheme()) == 0)
1.330 + {
1.331 + // there's a match so this is a sip scheme
1.332 + return ESchemeTypeSip;
1.333 + }
1.334 +
1.335 + return ESchemeTypeUnknown;
1.336 + }
1.337 +
1.338 +/**
1.339 + @internalComponent
1.340 +
1.341 + Checks that a text host is in a valid form
1.342 +
1.343 + @param aHost The descriptor containing the host to check
1.344 + @return ETrue if the host is valid otherwise EFalse
1.345 + */
1.346 +TBool IsTextHostValid(const TDesC8& aHost)
1.347 + {
1.348 + return CheckValidTextHost(aHost);
1.349 + }
1.350 +
1.351 +/**
1.352 + @internalComponent
1.353 +
1.354 + Checks that a text host is in a valid form
1.355 +
1.356 + @param aHost The descriptor containing the host to check
1.357 + @return ETrue if the host is valid otherwise EFalse
1.358 + */
1.359 +TBool IsTextHostValid(const TDesC16& aHost)
1.360 + {
1.361 + return CheckValidTextHost(aHost);
1.362 + }
1.363 +
1.364 +
1.365 +/**
1.366 + @internalComponent
1.367 +
1.368 + Parses a segment of the form name=value and returns the name and value parts
1.369 +
1.370 + @param aSegment the name-value segemnt to parse
1.371 + @param aName the name part that is returned
1.372 + @param aValue the value part that is returned
1.373 + */
1.374 +void GetNameValuePair(const TDesC8& aSegment, TPtrC8& aName, TPtrC8& aValue)
1.375 + {
1.376 + TPtrC8 value;
1.377 + TInt sepPos = aSegment.Locate(KEqualsSeparator);
1.378 + if (sepPos != KErrNotFound)
1.379 + {
1.380 + aName.Set(aSegment.Left(sepPos));
1.381 + value.Set(aSegment.Mid(sepPos+1));
1.382 + }
1.383 + else
1.384 + {
1.385 + aName.Set(aSegment);
1.386 + }
1.387 +
1.388 + aValue.Set(value);
1.389 + }
1.390 +
1.391 +
1.392 +//
1.393 +//
1.394 +// Implementation of LOCAL functions
1.395 +//
1.396 +//
1.397 +
1.398 +/**
1.399 + Checks the descriptor for any excluded characters. These are characters that
1.400 + should have been escaped encoded or ocnverted to Utf8 from Unicode.
1.401 +
1.402 + @since 6.0
1.403 + @param aData The descriptor to be checked.
1.404 + @return A boolean value of ETrue if the descriptor contains excluded
1.405 + characters, EFalse if it does not.
1.406 + */
1.407 +template<class TDesCType>
1.408 +LOCAL_C TBool CheckForExcludedChars(const TDesCType& aData)
1.409 + {
1.410 + // Run through the descriptor
1.411 + TBool valid = ETrue;
1.412 + const TInt length = aData.Length();
1.413 + TInt i=0;
1.414 + while( valid && i<length )
1.415 + {
1.416 + TInt notUsed;
1.417 + // See if the character is an excluded one, or is part of an escape triple...
1.418 + if( EscapeUtils::IsExcludedChar(aData[i]) && !EscapeUtils::IsEscapeTriple(aData.Mid(i), notUsed) )
1.419 + {
1.420 + valid = EFalse;
1.421 + }
1.422 + else
1.423 + {
1.424 + ++i;
1.425 + }
1.426 + }
1.427 + return !valid;
1.428 + }
1.429 +
1.430 +/**
1.431 + Checks the supplied host for an IPv4, IPv6 or text format host
1.432 +
1.433 + @since 7.0
1.434 + @param aHost The descriptor containing the host to check
1.435 + @return A TUriHostType enum of either EIPv6, EIPv4, EText or EUnknown
1.436 + */
1.437 +template<class TDesCType>
1.438 +LOCAL_C UriUtils::TUriHostType CheckHostType(const TDesCType& aHost)
1.439 + {
1.440 + UriUtils::TUriHostType hostType;
1.441 +
1.442 + TInt dotCount=0;
1.443 + TBool colonPresent=EFalse;
1.444 + TBool numeric=ETrue;
1.445 +
1.446 + TInt len = aHost.Length();
1.447 + for (TInt ii=0; ii < len && !colonPresent; ++ii)
1.448 + {
1.449 + TChar ch(aHost[ii]);
1.450 +
1.451 + // host contains a character that is not '0'..'9' or '.'
1.452 + if ((ch < 48 || ch > 57) && ch != 46)
1.453 + numeric=EFalse;
1.454 +
1.455 + // need to check that IPv4 address has the 3 dots
1.456 + if (ch == 46)
1.457 + ++dotCount;
1.458 + else
1.459 + if (ch == 58)
1.460 + colonPresent=ETrue;
1.461 + }
1.462 +
1.463 + if (colonPresent) // if theres a colon, it has to be an IPv6 address
1.464 + hostType = UriUtils::EIPv6Host;
1.465 + else
1.466 + if (numeric && (dotCount==3)) // if its numeric only, and has three seperators...
1.467 + hostType = UriUtils::EIPv4Host;
1.468 + else
1.469 + hostType = UriUtils::ETextHost;
1.470 +
1.471 + return hostType;
1.472 + }
1.473 +
1.474 +/**
1.475 + @internalComponent
1.476 +
1.477 + Checks that a text host is in a valid form
1.478 +
1.479 + @param aHost The descriptor containing the host to check
1.480 + @return ETrue if the host is valid otherwise EFalse
1.481 + */
1.482 +template<class TDesCType>
1.483 +LOCAL_C TBool CheckValidTextHost(const TDesCType& aHost)
1.484 + {
1.485 + TInt len = aHost.Length();
1.486 + if (len == 0)
1.487 + return EFalse;
1.488 +
1.489 + // host name can't start with a dot or dash
1.490 + TChar firstChar(aHost[0]);
1.491 + if (firstChar == '-' || firstChar == '.')
1.492 + return EFalse;
1.493 +
1.494 + TChar prev = '\0';
1.495 + TInt ii;
1.496 + for (ii=0; ii < len; ii++)
1.497 + {
1.498 + TChar ch(aHost[ii]);
1.499 +
1.500 + // Valid characters are a-z, 0-9, '-' and '.'
1.501 + if ((ch < 'A' || ch > 'Z') && (ch < 'a' || ch > 'z') && (ch < '0' || ch > '9') && ch != '-' && ch != '.')
1.502 + {
1.503 + return EFalse;
1.504 + }
1.505 +
1.506 + // dot is the section separator. Check the previous section is not empty
1.507 + if (ch == '.' && prev == '.')
1.508 + {
1.509 + // can't have an empty section
1.510 + return EFalse;
1.511 + }
1.512 + prev = ch;
1.513 + }
1.514 +
1.515 + // host name can't end with a dot or dash
1.516 + if (prev == '-' || prev == '.')
1.517 + return EFalse;
1.518 +
1.519 + return ETrue;
1.520 + }
1.521 +
1.522 +/**
1.523 + Supports Syntax-Based Normalization as specifed in section 6.2.2 of RFC3986.
1.524 + returns a new CUri8 object containing a normalised URI from a parsed URI object.
1.525 +
1.526 + @param aUri A reference to a parsed uri object.
1.527 + @return A pointer to a CUri8 object containing normalised URI.
1.528 + @leave KErrNoMemory
1.529 + @internalAll
1.530 + */
1.531 +EXPORT_C CUri8* UriUtils:: NormaliseUriL(const TUriC8& aUri)
1.532 + {
1.533 + CUri8* normalisedUri = CUri8::NewLC(aUri);
1.534 + PercentEncodeL(normalisedUri);
1.535 + CaseNormaliseL(normalisedUri);
1.536 + RemoveDotSegmentsL(normalisedUri);
1.537 + CleanupStack::Pop(normalisedUri);
1.538 + return normalisedUri;
1.539 + }
1.540 +
1.541 +/**
1.542 + Performs Case Normalization for CUri8 object as specified
1.543 + in section 6.2.2.1 of RFC3986.
1.544 +
1.545 + @param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer
1.546 + to CUri8 object with an uri needs to be case normalised and returns with
1.547 + case normalised.
1.548 + @leave KErrNoMemory
1.549 + */
1.550 +void CaseNormaliseL(CUri8* aNormalisedUri )
1.551 + {
1.552 + //Case normalise the scheme
1.553 + DoCaseNormaliseL(aNormalisedUri, EUriScheme);
1.554 + //Case normalise the Userinfo
1.555 + DoCaseNormaliseL(aNormalisedUri, EUriUserinfo);
1.556 + //Case normalise the Host
1.557 + DoCaseNormaliseL(aNormalisedUri, EUriHost);
1.558 + //Case normalise the Port
1.559 + DoCaseNormaliseL(aNormalisedUri, EUriPort);
1.560 + //Case normalise the Path
1.561 + DoCaseNormaliseL(aNormalisedUri, EUriPath);
1.562 + //Case normalise the Query
1.563 + DoCaseNormaliseL(aNormalisedUri, EUriQuery);
1.564 + //Case normalise the Fragment
1.565 + DoCaseNormaliseL(aNormalisedUri, EUriFragment);
1.566 + }
1.567 +
1.568 +/**
1.569 + Performs Case Normalization for specified sub component of URI.
1.570 +
1.571 + @param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer
1.572 + to CUri8 object with an uri needs to be case normalised and returns with
1.573 + case normalised for specified sub component.
1.574 + @param aComponent Enumeration of TUriComponent.
1.575 + @leave KErrNoMemory
1.576 +*/
1.577 +void DoCaseNormaliseL(CUri8* aNormalisedUri, TUriComponent aComponent)
1.578 + {
1.579 + const TUriC8& uri(aNormalisedUri->Uri());
1.580 + if(!uri.IsPresent(aComponent) )
1.581 + {
1.582 + return;
1.583 + }
1.584 + //extracts subcomponent of uri which needs to be case-normalised
1.585 + HBufC8* heapBuf = uri.Extract(aComponent).AllocLC();
1.586 + TPtr8 normalisedComponent(heapBuf->Des());
1.587 + TBool normalised = EFalse;
1.588 + if(aComponent == EUriScheme || aComponent == EUriHost )
1.589 + {
1.590 + //change this component to lower case
1.591 + normalisedComponent.LowerCase();
1.592 + normalised = ETrue;
1.593 + }
1.594 +
1.595 + TInt len = normalisedComponent.Length();
1.596 + TBuf8<KSubstringLength> subString;
1.597 + //case normalise the component
1.598 + for (TInt pos = 0; pos < len; pos++)
1.599 + {
1.600 + if (normalisedComponent[pos] == KEscapeIndicator )
1.601 + {
1.602 + __ASSERT_DEBUG( ((len-pos) >= KSubstringLength), User::Panic(KNormalisationUriPanicCategory, KUriUtilsErrBadEscapeTriple) );
1.603 + TPtrC8 componentBuf(normalisedComponent.Mid(pos,KSubstringLength));
1.604 + if (ValidateAndConvertPercentEncodedTriple(componentBuf,subString))
1.605 + {
1.606 + normalisedComponent.Replace(pos,KSubstringLength,subString);
1.607 + pos += KUpdateLength;
1.608 + normalised = ETrue;
1.609 + subString.Zero();
1.610 + }
1.611 + }
1.612 + }
1.613 +
1.614 + //updating the uri with normalised string
1.615 + if( normalised )
1.616 + {
1.617 + if(aComponent<EUriMaxComponents && aComponent >=EUriScheme)
1.618 + {
1.619 + aNormalisedUri->SetComponentL(normalisedComponent, aComponent);
1.620 + }
1.621 + else
1.622 + {
1.623 + User::Leave(KErrArgument);
1.624 + }
1.625 +
1.626 + }
1.627 + CleanupStack::PopAndDestroy(heapBuf);
1.628 + }
1.629 +
1.630 +/**
1.631 + Validates and Converts the valid Percent encoded triplets to Uppercase for specified
1.632 + sub component of URI. For eg: Converts %3a to %3A
1.633 +
1.634 + @param aData A reference to a string to be validated and converted to upper case.
1.635 + @param aCaseNormalizedData A reference to a descriptor that is converted to
1.636 + uppercase that is to be returned.
1.637 + @return returns a bool whether it is a valid Percent encoded triplet
1.638 +*/
1.639 +TBool ValidateAndConvertPercentEncodedTriple(TDesC8& aData , TDes8& aCaseNormalizedData )
1.640 + {
1.641 + // See if the descriptor is actually long enough and
1.642 + // Check that the three characters form an escape triple - first char is '%'
1.643 + if( aData.Length() < KEscapeTripleLength || aData[KEscDelimiterPos] != KEscapeIndicator )
1.644 + {
1.645 + return EFalse;//do nothing
1.646 + }
1.647 +
1.648 + // Check that next two characters are valid
1.649 + TInt mostSignificantDigitValue = KHexDigit().LocateF(aData[KMostSignificantNibblePos] );
1.650 + TInt leastSignificantDigitValue = KHexDigit().LocateF(aData[KLeastSignificantNibblePos] );
1.651 +
1.652 + if( mostSignificantDigitValue== KErrNotFound || leastSignificantDigitValue == KErrNotFound )
1.653 + {
1.654 + // Either of the characters were not a valid hex character
1.655 + return EFalse;
1.656 + }
1.657 + aCaseNormalizedData.Zero();
1.658 + aCaseNormalizedData.Append(KEscapeIndicator);
1.659 +
1.660 + //Coverts most significant hex character to uppercase
1.661 + (mostSignificantDigitValue >= 0 && mostSignificantDigitValue <= 0xF) ?
1.662 + aCaseNormalizedData.Append(KHexDigit().Mid(mostSignificantDigitValue,1)) :
1.663 + aCaseNormalizedData.Append(KHexDigit().Mid(mostSignificantDigitValue,1));
1.664 +
1.665 + //Coverts least significant hex character to uppercase
1.666 + (leastSignificantDigitValue >= 0 && leastSignificantDigitValue <= 0xF) ?
1.667 + aCaseNormalizedData.Append(KHexDigit().Mid(leastSignificantDigitValue,1)) :
1.668 + aCaseNormalizedData.Append(aData[KLeastSignificantNibblePos]);
1.669 +
1.670 + return ETrue;
1.671 + }
1.672 +
1.673 +/**
1.674 + Performs Percent-Encoding Normalization for CUri8 object as specifed in
1.675 + section 6.2.2.2 of RFC3986.
1.676 +
1.677 + @param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer to
1.678 + CUri8 object with an uri needs to be Percent-Encoded and returns with Percent-Encode
1.679 + normalised form.
1.680 + @leave KErrNoMemory
1.681 + */
1.682 +void PercentEncodeL(CUri8* aNormalisedUri)
1.683 + {
1.684 + //PercentEncode the scheme
1.685 + DoPercentEncodeL(aNormalisedUri, EUriScheme);
1.686 + //PercentEncode the Userinfo
1.687 + DoPercentEncodeL(aNormalisedUri, EUriUserinfo);
1.688 + //PercentEncode the Host
1.689 + DoPercentEncodeL(aNormalisedUri, EUriHost);
1.690 + //PercentEncode the Port
1.691 + DoPercentEncodeL(aNormalisedUri, EUriPort);
1.692 + //PercentEncode the Path
1.693 + DoPercentEncodeL(aNormalisedUri, EUriPath);
1.694 + //PercentEncode the Query
1.695 + DoPercentEncodeL(aNormalisedUri, EUriQuery);
1.696 + //PercentEncode the Fragment
1.697 + DoPercentEncodeL(aNormalisedUri, EUriFragment);
1.698 + }
1.699 +
1.700 +/**
1.701 + Performs Percent-Encoding for specified sub component of URI.
1.702 +
1.703 + @param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer to
1.704 + CUri8 object with an uri needs to be Percent-Encoded and returns with Percent-Encoded
1.705 + for specified sub component.
1.706 + @param aComponent Enumeration of TUriComponent.
1.707 + @leave KErrNoMemory
1.708 +*/
1.709 +void DoPercentEncodeL(CUri8* aNormalisedUri, TUriComponent aComponent)
1.710 + {
1.711 + const TUriC8& uri(aNormalisedUri->Uri());
1.712 + if(!uri.IsPresent(aComponent))
1.713 + {
1.714 + return;
1.715 + }
1.716 +
1.717 + HBufC8* heapBuf = uri.Extract(aComponent).AllocLC();
1.718 + TPtr8 percentNormalisedComponent(heapBuf->Des());
1.719 + TBool normalised = EFalse;
1.720 + TInt len = percentNormalisedComponent.Length();
1.721 + for (TInt pos = 0; pos < len; pos++)
1.722 + {
1.723 + TInt hex;
1.724 + // check for and decode '%' encoded characters
1.725 + if (percentNormalisedComponent[pos] == KEscapeIndicator && EscapeUtils::IsEscapeTriple(percentNormalisedComponent.Mid(pos, KSubstringLength), hex))
1.726 + {
1.727 + TChar replacedChar(hex);
1.728 + if( KUnreserved().LocateF(hex) != KErrNotFound || replacedChar.IsAlphaDigit() )
1.729 + {
1.730 + TBuf8<KAttachLength> subString;
1.731 + subString.Append(replacedChar);
1.732 + percentNormalisedComponent.Replace(pos, KSubstringLength, subString);
1.733 + normalised = ETrue;
1.734 + len = percentNormalisedComponent.Length();
1.735 + }
1.736 + }
1.737 + }
1.738 + if( normalised )
1.739 + {
1.740 + if(aComponent<EUriMaxComponents && aComponent >=EUriScheme)
1.741 + {
1.742 + aNormalisedUri->SetComponentL(percentNormalisedComponent, aComponent);
1.743 + }
1.744 + else
1.745 + {
1.746 + User::Leave(KErrArgument);
1.747 + }
1.748 +
1.749 + }
1.750 + CleanupStack::PopAndDestroy(heapBuf);
1.751 + }
1.752 +
1.753 +/**
1.754 + Performs Path Segment Normalization for CUri8 object as specifed in
1.755 + section 6.2.2.3 of RFC3986.
1.756 +
1.757 + @param aNormalisedUri It is an in-out parameter. aNormalisedUri is a pointer to
1.758 + CUri8 object with uri needs to be Path Segment normalised and returns with
1.759 + Path Segment normalised form.
1.760 + @leave KErrNoMemory
1.761 + */
1.762 +void RemoveDotSegmentsL(CUri8* aNormalisedUri)
1.763 + {
1.764 + const TUriC8& uri( aNormalisedUri->Uri() );
1.765 + if(uri.IsPresent(EUriPath))
1.766 + {
1.767 + HBufC8* dotSegmentsPath = uri.Extract(EUriPath).AllocLC();
1.768 + RemoveExtraneousDotSegmentsL(dotSegmentsPath);
1.769 + aNormalisedUri->SetComponentL(*dotSegmentsPath, EUriPath);
1.770 + CleanupStack::PopAndDestroy(dotSegmentsPath);
1.771 + }
1.772 + }
1.773 +
1.774 +/**
1.775 + Performs Remove_dot_segments algorithm as specifed in section 5.2.4 of RFC3986.
1.776 +
1.777 + @param aUriInputPath It is an in-out parameter. aUriInputPath is a pointer to the
1.778 + path descriptor to be normalised for extraneous dot_segments and returns with
1.779 + normalised dot_segments.
1.780 + @leave KErrNoMemory
1.781 +*/
1.782 +void RemoveExtraneousDotSegmentsL(HBufC8* aUriInputPath)
1.783 + {
1.784 + TPtr8 uriPathBuf(aUriInputPath->Des());
1.785 + TInt length = uriPathBuf.Length();
1.786 + HBufC8* path = HBufC8::NewLC(length);
1.787 + TPtr8 transitionalBuf(path->Des());
1.788 +
1.789 + while(length > 0)
1.790 + {
1.791 + //step a of section 5.2.4 of RFC 3986
1.792 + if(length >= KDotDotSlashLength &&
1.793 + KDotDotSlash().Compare(uriPathBuf.Mid(0, KDotDotSlashLength)) == 0 )
1.794 + {
1.795 + uriPathBuf.Delete(0,KDotDotSlashLength);
1.796 + }
1.797 + //step a of section 5.2.4 of RFC 3986
1.798 + else if(length >= KDotDotLength &&
1.799 + KDotSlash().Compare(uriPathBuf.Mid(0, KDotDotLength)) == 0)
1.800 + {
1.801 + uriPathBuf.Delete(0,KDotDotLength);
1.802 + }
1.803 + //step b of section 5.2.4 of RFC 3986
1.804 + else if(length >= KDotDotSlashLength &&
1.805 + KSlashDotSlash().Compare(uriPathBuf.Mid(0, KDotDotSlashLength)) == 0)
1.806 + {
1.807 + uriPathBuf.Replace(0, KDotDotSlashLength, KSlash);
1.808 + }
1.809 + //step c of section 5.2.4 of RFC 3986
1.810 + else if(length >= KSlashDotDotSlashLength &&
1.811 + KSlashDotDotSlash().Compare(uriPathBuf.Mid(0, KSlashDotDotSlashLength)) == 0)
1.812 + {
1.813 + updateStrings(uriPathBuf, transitionalBuf, KSlashDotDotSlashLength);
1.814 + }
1.815 + //step c of section 5.2.4 of RFC 3986 --complete path segment
1.816 + else if(length == KDotDotSlashLength &&
1.817 + KSlashDotDot().Compare(uriPathBuf.Mid(0, KDotDotSlashLength)) == 0)
1.818 + {
1.819 + updateStrings(uriPathBuf, transitionalBuf, KDotDotSlashLength);
1.820 + }
1.821 + //step b of section 5.2.4 of RFC 3986--complete path segment
1.822 + else if(length == KDotDotLength &&
1.823 + KSlashDot().Compare(uriPathBuf.Mid(0, KDotDotLength)) == 0)
1.824 + {
1.825 + uriPathBuf.Replace(0, KDotDotLength, KSlash);
1.826 + }
1.827 + //step d of section 5.2.4 of RFC 3986
1.828 + else if(length == KDotDotLength &&
1.829 + KDotDot().Compare(uriPathBuf.Mid(0)) == 0)
1.830 + {
1.831 + uriPathBuf.Delete(0,KDotDotLength);
1.832 + }
1.833 + //step d of section 5.2.4 of RFC 3986
1.834 + else if(length == KDotLength &&
1.835 + KDot().Compare(uriPathBuf.Mid(0)) == 0)
1.836 + {
1.837 + uriPathBuf.Delete(0,KDotLength);
1.838 + }
1.839 + //step e of section 5.2.4 of RFC 3986
1.840 + else
1.841 + {
1.842 + //get the first path segment including initial / (if any)from uriPathBuf
1.843 + // till next slash (but not including next slash)..append it to the output Buf
1.844 + TInt substrLength;
1.845 + TInt nextSlashPos = uriPathBuf.Find(KSlash);
1.846 + if(nextSlashPos == 0 && length > KDotLength)
1.847 + //replace with locate next
1.848 + {
1.849 + nextSlashPos = uriPathBuf.Mid(1).Find(KSlash);
1.850 + if(nextSlashPos != KErrNotFound)
1.851 + {
1.852 + ++nextSlashPos;
1.853 + }
1.854 + }
1.855 + if(length == KDotLength)
1.856 + //only '/' is exist
1.857 + {
1.858 + substrLength = length;
1.859 + }
1.860 + else
1.861 + {
1.862 + substrLength = nextSlashPos == KErrNotFound ? length : nextSlashPos ;
1.863 + }
1.864 + transitionalBuf.Append(uriPathBuf.Mid(0,substrLength));
1.865 + uriPathBuf.Delete(0,substrLength);
1.866 + }
1.867 + length = uriPathBuf.Length();
1.868 + }
1.869 + uriPathBuf.Copy(transitionalBuf);
1.870 + CleanupStack::PopAndDestroy(path);
1.871 + }
1.872 +
1.873 +/**
1.874 + Updates the strings specified in step c of section 5.2.4 of RFC 3986
1.875 +
1.876 + @param aInputBuf A reference to the inputBuf needs to be modified
1.877 + @param aOutPutBuf A reference to the outPutBuf needs to be modified
1.878 + @param aLength length of the string to be replaced.
1.879 + */
1.880 +void updateStrings(TPtr8& aInputBuf, TPtr8& aOutPutBuf, TInt aLength)
1.881 + {
1.882 + aInputBuf.Replace(0,aLength,KSlash);
1.883 +
1.884 + //In outPutBuf to remove the last segment starting with / (if exist)
1.885 + //eg: /abc/def/fgh --> /abc/def
1.886 + TInt outputBufLength = aOutPutBuf.Length();
1.887 + TInt pos = aOutPutBuf.LocateReverse('/');
1.888 + //remove the last segment including '/'
1.889 + pos != KErrNotFound ? aOutPutBuf.Delete( pos, outputBufLength - pos ) : aOutPutBuf.Delete( 0,outputBufLength );
1.890 + }
1.891 +