First public contribution.
1 // Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
2 // All rights reserved.
3 // This component and the accompanying materials are made available
4 // under the terms of "Eclipse Public License v1.0"
5 // which accompanies this distribution, and is available
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
8 // Initial Contributors:
9 // Nokia Corporation - initial contribution.
16 #include <escapeutils.h>
20 #include "EscapeUtilsInternal.h"
24 _LIT(KHexDigit, "0123456789ABCDEF");
25 _LIT(KExcludedData, "{}|\\^`<>#%\"");
26 _LIT8(KQueryData8, ";/?:@&=+$,[]");
27 _LIT8(KPathData8, "/;=?[]");
28 _LIT8(KAuthData8, ";:@?/[]");
29 _LIT8(KUrlEncoded8, ";/?:@&=+$[]!\'()~*");
30 _LIT16(KQueryData16, ";/?:@&=+$,[]");
31 _LIT16(KPathData16, "/;=?[]");
32 _LIT16(KAuthData16, ";:@?/[]");
33 _LIT16(KUrlEncoded16, ";/?:@&=+$[]!\'()~");
34 const TInt KEscapeUtilsConversionBufferSize = 50;
35 const TInt KEscapeIndicator = '%';
36 const TInt KEscapeTripleLength = 3;
37 const TInt KEscDelimiterPos = 0;
38 const TInt KMostSignificantNibblePos = 1;
39 const TInt KLeastSignificantNibblePos = 2;
44 _LIT(KEscapeUtilsPanicCategory, "ESC-UTILS");
49 // Implementation of EscapeUtils
54 Escape encodes excluded and reserved characters in the data as escape triples.
55 The reserved characters are defined by the escape mode. These characters and the
56 set of excluded characters specified by RFC2396 form the entire set of excluded data.
59 @leave KUriUtilsErr16BitChar. A 16-Bit character was found in the data to be escape encoded.
60 @param aData A descriptor with the data to encode.
61 @param aEscapeMode An enum specifying the escape mode.
62 @return A pointer to a descriptor buffer which contains the escape encoded data.
64 EXPORT_C HBufC8* EscapeUtils::EscapeEncodeL(const TDesC8& aData, TEscapeMode aEscapeMode)
66 // Need descriptor pointer to reserved characters...
73 // This is normal operation - no reserved chars
74 reserved.Set(KNullDesC8);
78 // Reserved data in a URI query - ; / ? : @ & = + $ ,
79 reserved.Set(KQueryData8());
83 // Reserved data in a URI path segment - / ; = ?
84 reserved.Set(KPathData8());
88 // Reserved data in a URI authority - ; : @ ? /
89 reserved.Set(KAuthData8());
91 case EEscapeUrlEncoded:
93 // Reserved data in Url Encoded data - ; / ? : @ & = + $ [ ] ! ' ( ) ~ *
94 reserved.Set(KUrlEncoded8());
97 // Not supported return NULL
98 __ASSERT_DEBUG(EFalse, User::Panic(KEscapeUtilsPanicCategory, KUriUtilsErrBadEscapeMode));
101 return EscapeEncodeL(aData, reserved);
105 Escape encodes excluded and reserved characters in the data as escape triples. The
106 reserved characters are defined by the escape mode. These characters and the set of
107 excluded characters specified by RFC2396 form the entire set of excluded data.
110 @leave KUriUtilsErr16BitChar. A 16-Bit character was found in the data to be escape encoded.
111 @param aData A descriptor with the data to encode.
112 @param aEscapeMode An enum specifying the escape mode.
113 @return A pointer to a descriptor buffer which contains the escape encoded data.
115 EXPORT_C HBufC16* EscapeUtils::EscapeEncodeL(const TDesC16& aData, TEscapeMode aEscapeMode)
117 // Need to descriptor pointer to reserved chars
124 // This is normal operation - no reserved chars
125 reserved.Set(KNullDesC16);
129 // Reserved data in a URI query - ; / ? : @ & = + $ [],
130 reserved.Set(KQueryData16());
134 // Reserved data in a URI path segment - / ; = ? []
135 reserved.Set(KPathData16());
139 // Reserved data in a URI authority - ; : @ ? / []
140 reserved.Set(KAuthData16());
142 case EEscapeUrlEncoded:
144 // Reserved data in Url Encoded data - ; / ? : @ & = + $ [ ] ! ' ( ) ~
145 reserved.Set(KUrlEncoded16());
148 // Not supported return NULL
149 __ASSERT_DEBUG(EFalse, User::Panic(KEscapeUtilsPanicCategory, KUriUtilsErrBadEscapeMode));
152 return EscapeEncodeL(aData, reserved);
156 Escape encodes excluded and reserved characters in the data as escape triples. These
157 characters and the set of excluded characters specified by RFC2396 form the entire set
161 @leave KUriUtilsErr16BitChar. A 16-Bit character was found in the data to be escape encoded.
162 @param aData A descriptor with the data to encode.
163 @param aReservedChars A descriptor with the reserved characters.
164 @return A pointer to a descriptor buffer which contains the escape encoded data.
166 EXPORT_C HBufC8* EscapeUtils::EscapeEncodeL(const TDesC8& aData, const TDesC8& aReservedChars)
168 // Allocate space to build escaped url - consider worse case; all characters are excluded => length x 3
169 HBufC8* buf = HBufC8::NewLC(aData.Length()*3);
170 TPtr8 escaped = buf->Des();
172 User::LeaveIfError(EscapeEncodeData(aData, aReservedChars, escaped));
173 HBufC8* encoded = escaped.AllocL();
175 CleanupStack::PopAndDestroy(buf);
180 Escape encodes excluded and reserved characters in the data as escape triples. These characters
181 and the set of excluded characters specified by RFC2396 form the entire set of excluded data.
184 @leave KUriUtilsErr16BitChar. A 16-Bit character was found in the data to be escape encoded.
185 @param aData A descriptor with the data to encode.
186 @param aReservedChars A descriptor with the reserved characters.
187 @return A pointer to a descriptor buffer which contains the escape encoded data.
189 EXPORT_C HBufC16* EscapeUtils::EscapeEncodeL(const TDesC16& aData, const TDesC16& aReservedChars)
191 // Allocate space to build escaped url - consider worse case; all characters are excluded => length x 3
192 HBufC16* buf = HBufC16::NewLC(aData.Length()*3);
193 TPtr16 escaped = buf->Des();
195 User::LeaveIfError(EscapeEncodeData(aData, aReservedChars, escaped));
196 HBufC16* encoded = escaped.AllocL();
198 CleanupStack::PopAndDestroy(buf);
203 Escape decodes the data.
206 @param aData A descriptor with the data to decode.
207 @return A pointer to a descriptor buffer which contains the escape decoded data.
209 EXPORT_C HBufC8* EscapeUtils::EscapeDecodeL(const TDesC8& aData)
211 // Allocate space to build unescaped data
212 HBufC8* buf = HBufC8::NewLC(aData.Length());
213 TPtr8 unescaped = buf->Des();
215 User::LeaveIfError(EscapeDecodeData(aData, unescaped));
216 HBufC8* decoded = unescaped.AllocL();
218 CleanupStack::PopAndDestroy(buf);
223 Escape decodes the data.
226 @param aData A descriptor with the data to decode.
227 @return A pointer to a descriptor buffer which contains the escape decoded data.
229 EXPORT_C HBufC16* EscapeUtils::EscapeDecodeL(const TDesC16& aData)
231 // Allocate space to build unescaped data
232 HBufC16* buf = HBufC16::NewLC(aData.Length());
233 TPtr16 unescaped = buf->Des();
235 User::LeaveIfError(EscapeDecodeData(aData, unescaped));
236 HBufC16* decoded = unescaped.AllocL();
238 CleanupStack::PopAndDestroy(buf);
245 escape encode only those characters that cannot be in a URI. assume all %hh are %encoded already.
247 @param aData The descriptor buffer to be escape encoded.
248 @return A pointer to a descriptor buffer which contains the escape encoded data.
250 HBufC8* EscapeUtils::ReEscapeEncodeL(const TDesC8& aData)
252 // Descriptor to hex digits and excluded chars
253 const TDesC& KHexChars = KHexDigit;
255 const TInt length = aData.Length();
257 // find out how many characters need escape encoding
259 for( TInt i=0; i<length; ++i )
261 TChar current( aData[i] );
262 if( EscapeUtils::IsExcludedChar(current) && current != KFragmentDelimiter &&
263 !(current == KEscapeIndicator && i+2<length && TChar(aData[i+1]).IsHexDigit() && TChar(aData[i+2]).IsHexDigit() ) )
268 if( count == 0) // no encoding needed, just allocate and return the whole string
270 return aData.AllocL();
272 // pre-allocate space for the descriptor
273 HBufC8* buf = HBufC8::NewLC( aData.Length() + count*2 ); // two extra chars for each escaped
274 TPtr8 escaped = buf->Des();
276 for( TInt i=0; i<length; ++i )
278 // Check if current character must be escaped
279 TChar current ( aData[i] );
280 // Check if current character is excluded, but not if it appears to be escape encoded
281 TBool excluded = EscapeUtils::IsExcludedChar(current) && current != KFragmentDelimiter &&
282 !(current == KEscapeIndicator && i+2<length && TChar(aData[i+1]).IsHexDigit() && TChar(aData[i+2]).IsHexDigit() );
286 // Excluded char - escape encode
287 escaped.Append(KEscapeIndicator);
288 const TInt mostSignificantNibble = (current & 0xf0) >> 4; // Get msNibble by masking against 11110000 and dividing by 16 (>>4)
289 escaped.Append(KHexChars[mostSignificantNibble]);
290 const TInt leastSignificantNibble = (current & 0x0f); // Get lsNibble by masking against 00001111
291 escaped.Append(KHexChars[leastSignificantNibble]);
295 // Not an excluded char - just append
296 escaped.Append(current);
299 CleanupStack::Pop(buf);
306 Converts UNICODE data into UTF8 format.
309 @leave KUriUtilsCannotConvert. When the input data cannot be converted.
310 @param aString A descriptor with the data to convert.
311 @return A pointer to an 8-bit descriptor buffer which contains UTF8 data.
313 EXPORT_C HBufC8* EscapeUtils::ConvertFromUnicodeToUtf8L(const TDesC& aString)
315 // Return an empty buffer straight-away
316 if( aString.Compare(KNullDesC) == 0 )
317 return KNullDesC8().AllocL();
319 // Convert from Unicode to UTF8
320 TPtrC unicode = aString;
321 TBuf8<KEscapeUtilsConversionBufferSize> buf;
322 HBufC8* utf8Buffer = HBufC8::NewLC(unicode.Length());
323 TPtr8 utf8 = utf8Buffer->Des();
325 // Loop until all of the filename is converted
328 const TInt returnValue = CnvUtfConverter::ConvertFromUnicodeToUtf8(buf, unicode);
329 if( returnValue == CnvUtfConverter::EErrorIllFormedInput || returnValue < 0)
330 User::Leave(KUriUtilsCannotConvert);
332 // Is escapedFullPath too small?
333 if( utf8.Length() + buf.Length() > utf8.MaxLength() )
335 utf8Buffer = utf8Buffer->ReAllocL(utf8.Length() + buf.Length());
336 CleanupStack::Pop(); // utf8Buffer (old version)
337 CleanupStack::PushL(utf8Buffer); // new version
338 utf8.Set(utf8Buffer->Des());
340 // Copy converted characters
343 if( returnValue == KErrNone )
344 break; // All of aUnicodeText has been converted and handled
346 // Set input descriptor to remaining characters
347 unicode.Set(unicode.Right(returnValue));
349 CleanupStack::Pop(utf8Buffer);
350 return utf8Buffer; // Ownership transfered to caller
354 Converts UTF8 format into UNICODE data.
357 @leave KUriUtilsCannotConvert. When the input data cannot be converted.
358 @param aString A descriptor with the data to convert.
359 @return A pointer to a 16-bit descriptor buffer which contains UNICODE data.
361 EXPORT_C HBufC* EscapeUtils::ConvertToUnicodeFromUtf8L(const TDesC8& aString)
363 // Return an empty buffer straight-away
364 if( aString.Compare(KNullDesC8) == 0 )
365 return KNullDesC().AllocL();
367 // Convert from Unicode to UTF8
368 TPtrC8 utf8 = aString;
369 TBuf<KEscapeUtilsConversionBufferSize> buf;
370 HBufC* unicodeBuffer = HBufC::NewLC(utf8.Length());
371 TPtr unicode = unicodeBuffer->Des();
373 // Loop until all of the filename is converted
376 const TInt returnValue = CnvUtfConverter::ConvertToUnicodeFromUtf8(buf, utf8);
377 if( returnValue == CnvUtfConverter::EErrorIllFormedInput || returnValue < 0)
378 User::Leave(KUriUtilsCannotConvert);
380 // Is escapedFullPath too small?
381 if( unicode.Length() + buf.Length() > unicode.MaxLength() )
383 unicodeBuffer = unicodeBuffer->ReAllocL(unicode.Length() + buf.Length());
384 CleanupStack::Pop(); // unicodeBuffer (old version)
385 CleanupStack::PushL(unicodeBuffer); // new version
386 unicode.Set(unicodeBuffer->Des());
388 // Copy converted characters
392 break; // All of utf8 has been converted and handled
394 // Set input descriptor to remaining characters
395 utf8.Set(utf8.Right(returnValue));
397 CleanupStack::Pop(unicodeBuffer);
398 return unicodeBuffer; // Ownership transfered to caller
402 Checks to see if the input argument is excluded.
405 @param aChar The character to be checked.
406 @return A boolean value of ETrue if the character is an excluded one, or
409 EXPORT_C TBool EscapeUtils::IsExcludedChar(TChar aChar)
411 const TDesC& KExcludedChars = KExcludedData;
412 TBool excluded = KExcludedChars.Locate(aChar) != KErrNotFound || aChar <= 0x1F || aChar == ' ' || aChar > 0x7E;
417 Checks for an escape triple at the start of the input descriptor. If there is a triple
418 its value is calculated and returned through the output argument aHexValue. If there is
419 no escape triple then this argument is left unchanged.
422 @param aData The descriptor to be checked for an escape triple.
423 @param aHexValue The output argument with the value of the escape triple
425 @return A boolean value of ETrue if there is an escape triple at the start of
426 the input descriptor, EFalse otherwise.
428 EXPORT_C TBool EscapeUtils::IsEscapeTriple(const TDesC8& aData, TInt& aHexValue)
430 return CheckAndConvertEscapeTriple(aData, aHexValue);
434 Checks for an escape triple at the start of the input descriptor. If there is a triple
435 its value is calculated and returned through the output argument aHexValue. If there is
436 no escape triple then this argument is left unchanged.
439 @param aData The descriptor to be checked for an escape triple.
440 @param aHexValue The output argument with the value of the escape triple
442 @return A boolean value of ETrue if there is an escape triple at the start of
443 the input descriptor, EFalse otherwise.
445 EXPORT_C TBool EscapeUtils::IsEscapeTriple(const TDesC16& aData, TInt& aHexValue)
447 return CheckAndConvertEscapeTriple(aData, aHexValue);
451 returns the escape encoded descriptor output. This checks the every character of aData
452 against aCharsToEscape and if it exist then it escape encodes that character.
454 @param aData The descriptor to be checked against escaping set of characters.
455 @param aCharsToEscape The set of escape characters.
456 @return A pointer to the escape encoded descriptor.
458 EXPORT_C HBufC8* EscapeUtils::SpecificEscapeEncodeL ( const TDesC8& aData, const TDesC8& aCharsToEscape )
460 // Descriptor to hex digits and excluded chars
461 const TDesC& KHexChars = KHexDigit;
463 const TInt length = aData.Length();
465 // find out how many characters need escape encoding
467 for( TInt i=0; i<length; ++i )
469 TChar current( aData[i] );
470 if ( current <= 0x1F || aCharsToEscape.Locate ( current ) != KErrNotFound || current > 0x7E )
475 if( count == 0) // no encoding needed, just allocate and return the whole string
477 return aData.AllocL();
480 // pre-allocate space for the descriptor
481 HBufC8* buf = HBufC8::NewLC( length + count*2 ); // two extra chars for each escaped
482 TPtr8 escaped = buf->Des();
484 for( TInt i=0; i<length; ++i )
486 // Check if current character must be escaped
487 TChar current ( aData[i] );
488 // Check if current character is excluded ( control characters and the character specified for escaping )
489 TBool excluded = current <= 0x1F || ( aCharsToEscape.Locate ( current ) != KErrNotFound ) || current > 0x7E;
493 // Excluded char - escape encode
494 escaped.Append(KEscapeIndicator);
495 const TInt mostSignificantNibble = (current & 0xf0) >> 4; // Get msNibble by masking against 11110000 and dividing by 16 (>>4)
496 escaped.Append(KHexChars[mostSignificantNibble]);
497 const TInt leastSignificantNibble = (current & 0x0f); // Get lsNibble by masking against 00001111
498 escaped.Append(KHexChars[leastSignificantNibble]);
502 // Not an excluded char - just append
503 escaped.Append(current);
506 CleanupStack::Pop(buf);
511 The Dummy API is used to redirect to SpecificEscapeEncodeL() API in order to preserve BC and is made private
512 to ensure no-one else starts using it.
514 EXPORT_C HBufC8* EscapeUtils::DummyForwardingFunctionForCompatibility( const TDesC8& aData, const TDesC8& aCharsToEscape )
516 return EscapeUtils::SpecificEscapeEncodeL ( aData, aCharsToEscape );
522 // Implementation of LOCAL functions
526 Escape encodes the data, converting the reserved characters and excluded characters defined by
527 RFC2396 as escape triples.
530 @warning This function will panic if the output descriptor aEncodedData is
531 not big enough to append all the data.
532 @param aData A descriptor with the data to encode.
533 @param aReservedChars Reserved characters set.
534 @param aEncodedData The output descriptor pointer where the escaped encoded
536 @return An error code of KUriUtilsErr16BitChar if the data contains a 16-bit
537 character. KErrNone if the data was successfully encoded.
539 template<class TDesCType, class TPtrType>
540 TInt EscapeEncodeData(const TDesCType& aData, const TDesCType& aReservedChars, TPtrType& aEncodedData)
542 // Descriptor to hex digits and excluded chars
543 const TDesC& KHexChars = KHexDigit;
545 const TInt length = aData.Length();
546 for( TInt i=0; i<length; ++i )
548 // Check if current character must be escaped, will return error if not 8-bit character
549 TChar current = aData[i];
552 __ASSERT_DEBUG(EFalse, User::Panic(KEscapeUtilsPanicCategory, KUriUtilsErr16BitChar));
553 return (KUriUtilsErr16BitChar);
555 // Check if current character is excluded, a control character or a space
556 TBool excluded = EscapeUtils::IsExcludedChar(current) || aReservedChars.Locate(current) != KErrNotFound;
559 // Excluded char - escape encode
560 aEncodedData.Append(KEscapeIndicator);
561 const TInt mostSignificantNibble = (current & 0xf0) >> 4; // Get msNibble by masking against 11110000 and dividing by 16 (>>4)
562 aEncodedData.Append(KHexChars[mostSignificantNibble]);
563 const TInt leastSignificantNibble = (current & 0x0f); // Get lsNibble by masking against 00001111
564 aEncodedData.Append(KHexChars[leastSignificantNibble]);
568 // Not an excluded char or It's already Escape encode - just append
569 aEncodedData.Append(current);
576 Escape decodes the data, converting escape triples back to their single character value.
579 @warning This function will panic if the output descriptor aDecodedData is not big
580 enough to append all the data.
581 @param aData A descriptor with the data to decode.
582 @param aDecodedData The output descriptor pointer where the escaped decoded data
584 @return An error code of KUriUtilsErr16BitChar if the data contains a 16-bit character.
585 KErrNone if the data was successfully encoded.
587 template<class TDesCType, class TPtrType>
588 TInt EscapeDecodeData(const TDesCType& aData, TPtrType& aDecodedData)
590 // Go through the descriptor
591 const TInt length = aData.Length();
592 for( TInt i=0; i<length; ++i )
594 // See if at start of an escape triple
595 TChar current = aData[i];
596 if( current == KEscapeIndicator )
599 if( !CheckAndConvertEscapeTriple(aData.Mid(i), hex) )
601 // Either of the nibbles were not a valid hex character
602 return KUriUtilsErrBadEscapeTriple;
605 aDecodedData.Append(hex);
607 // Move index to get next character - add 2 to index
612 // Not an escaped triple - just append
613 aDecodedData.Append(current);
620 Checks for an escape triple at the start of the input descriptor. If there is a triple its
621 value is calculated and returned through the output argument aHexValue. If there is no escape
622 then triple this argument is left unchanged.
625 @param aData The descriptor to be checked for an escape triple.
626 @param aHexValue The output argument with the value of the escape triple
628 @return A boolean value of ETrue if there is an escape triple at the start
629 of the input descriptor, EFalse otherwise.
631 template<class TDesCType>
632 TBool CheckAndConvertEscapeTriple(const TDesCType& aData, TInt& aHexValue)
634 // See if the descriptor is actually long enough
635 if( aData.Length() < KEscapeTripleLength )
639 // Check that the three characters form an escape triple - first char is '%'
640 if( aData[KEscDelimiterPos] != KEscapeIndicator )
644 // Descriptor to hex digits and excluded chars
645 const TDesC& KHexChars = KHexDigit;
647 // Check that next two characters are valid
648 TChar mostSignificantNibble = aData[KMostSignificantNibblePos];
649 TChar leastSignificantNibble = aData[KLeastSignificantNibblePos];
651 TInt mostSignificantNibbleValue = KHexChars.LocateF(mostSignificantNibble);
652 TInt leastSignificantNibbleValue = KHexChars.LocateF(leastSignificantNibble);
654 if( mostSignificantNibbleValue == KErrNotFound || leastSignificantNibbleValue == KErrNotFound )
656 // Either of the nibbles were not a valid hex character
659 // Convert characters into hex value and return
660 aHexValue = 0x10*mostSignificantNibbleValue + 0x01*leastSignificantNibbleValue;