epoc32/include/gmxmlparser.h
author William Roberts <williamr@symbian.org>
Tue, 16 Mar 2010 16:12:26 +0000
branchSymbian2
changeset 2 2fe1408b6811
child 4 837f303aceeb
permissions -rw-r--r--
Final list of Symbian^2 public API header files
     1 // Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
     2 // All rights reserved.
     3 // This component and the accompanying materials are made available
     4 // under the terms of the License "Symbian Foundation License v1.0" to Symbian Foundation members and "Symbian Foundation End User License Agreement v1.0" to non-members
     5 // which accompanies this distribution, and is available
     6 // at the URL "http://www.symbianfoundation.org/legal/licencesv10.html".
     7 //
     8 // Initial Contributors:
     9 // Nokia Corporation - initial contribution.
    10 //
    11 // Contributors:
    12 //
    13 // Description:
    14 // This file contains the declaration of the generic CMDXMLParser class
    15 // which is responsible for creating a DOM structure
    16 // from a given XML file.
    17 // 
    18 //
    19 
    20 
    21 
    22 /**
    23  @file
    24 */
    25 
    26 #ifndef __GMXMLPARSER_H__
    27 #define __GMXMLPARSER_H__
    28 
    29 #include <e32std.h>
    30 #include <eikenv.h>
    31 #include <gmxmlconstants.h>
    32 
    33 //forward reference
    34 class CMDXMLDocument;
    35 class CMDXMLEntityConverter;
    36 class CMDXMLElement;
    37 class MXMLDtd;
    38 
    39 
    40 
    41 class MMDXMLParserObserver
    42 /** Abstract observer interface for notification when XML parsing is complete.
    43 
    44 It should be implemented by users of CMDXMLParser
    45 @publishedAll 
    46 @released*/
    47 	{
    48 public:
    49 	/**
    50 	Call back function used to inform a client of the Parser when a parsing operation completes.
    51 	 */
    52 	virtual void ParseFileCompleteL() = 0;
    53 	};
    54 
    55 class MMDXMLParserDataProvider
    56 /** Abstract data source interface for XML data source.
    57 
    58 The user of CMDXMLParser must build one of these to encapsulate the data source
    59 that they wish to parse.  CMDXMLParser implements a file-based data source to
    60 implement the functionality of the ParseFile function.
    61 
    62 @publishedAll 
    63 @released*/
    64 	{
    65 public:
    66 	/** Status codes returned by GetData() implementations. */
    67 	enum TDataProviderResults
    68 		{
    69 		KMoreData,		///< Returned by the interface implementation when it is returning more data.
    70 		KDataStreamError,	///< Returned by the interface when an unrecoverable error prevents obtaining more data.  A recoverable error should be represented by KDataNotReady.
    71 		KDataStreamEnd	///< Returned by the interface when there is no more data to come.
    72 		};
    73 
    74 public:
    75 	/** 
    76 	The XML Parser calls this on a specific data provider to get more data
    77 	when required.
    78 
    79 	Note that the TPtrC supplied may be used by the parser at any time
    80 	between the return of this call and the next call that the parser
    81 	makes out.
    82 
    83 	Your data provider must not move the data pointed to until the
    84 	parser has indicated that it's done with that block by asking for
    85 	another.
    86 
    87 	Ownership of the data pointed to remains with the data provider.
    88 
    89 
    90 	General comments on efficiency
    91 	------------------------------
    92 
    93 	The parser is designed such that it processes the whole data block
    94 	provided in one go.  It will automatically become asynchronous when
    95 	another block is required - the data provider only needs to supply
    96 	data.
    97 
    98 	Because of this design, it allows the data provider to indirectly
    99 	control the amount of processing time that will be needed
   100 	in a single block.
   101 
   102 	It is a good idea to balance the need for the fastest possible 
   103 	processing with the need for client application responsiveness by
   104 	ensuring that the amount of data passed in a single block is not 
   105 	too large.	However, it is worth bearing in mind that the parser
   106 	will convert UTF8 data streams in blocks of 32 characters, and
   107 	supplying blocks of smaller length than this will result in a
   108 	slight loss of efficiency.
   109 
   110 	@param aPtr On return, the data provided
   111 	@param aStatus Asynchronous status to be completed by the function with a 
   112 	TDataProviderResults value
   113 	*/
   114 	virtual void GetData(TPtrC8 &aPtr, TRequestStatus &aStatus) = 0;
   115 	/**
   116 	Called to indicate that use of the data source is complete.
   117 	*/
   118 	virtual void Disconnect() = 0;
   119 	};
   120 
   121 class CMDXMLParserFileDataSource;
   122 
   123 class CMDXMLParser: public CActive
   124 /** Creates a DOM structure from a given XML file.
   125 
   126 The parsing operation is asynchronous and is initiated by a call to ParseFile(). 
   127 On completion, the created DOM document can be retrieved through DetachXMLDoc().
   128 
   129 Note the following ownership rules for the DOM document:
   130 
   131 1. calling DetachXMLDoc() transfers ownership of the document to the client
   132 
   133 2. if the parser is asked to parse a new file while it still owns an existing 
   134 DOM document, it will delete the old document.
   135 
   136 @publishedAll
   137 @released
   138 */
   139 	{
   140 public:
   141 	/** Allocates and constructs a new XML parser, specifying a DTD.
   142 	
   143 	@param aParserObserver XML parser observer
   144 	@leave KErrNoMemory Out of memory
   145 	@return New XML parser */
   146 	IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver);
   147 
   148 	/** Allocates and constructs a new XML parser, specifying a DTD.
   149 	
   150 	@param aParserObserver XML parser observer
   151 	@param aDtdRepresentation DTD validator
   152 	@leave KErrNoMemory Out of memory
   153 	@return New XML parser */
   154 	IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
   155 
   156 	/** Allocates and constructs a new XML parser, leaving the object on the cleanup 
   157 	stack.
   158 	
   159 	@param aParserObserver XML parser observer
   160 	@leave KErrNoMemory Out of memory
   161 	@return New XML parser */
   162 	IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver);
   163 
   164 	/** Allocates and constructs a new XML parser, leaving the object on the cleanup 
   165 	stack.
   166 	
   167 	@param aParserObserver XML parser observer
   168 	@param aDtdRepresentation DTD validator
   169 	@leave KErrNoMemory Out of memory
   170 	@return New XML parser */
   171 	IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
   172 
   173 
   174 	/** Destructor. */
   175 	IMPORT_C ~CMDXMLParser();
   176 
   177 	/** Gets the last error found by the parser.
   178 	
   179 	@return Error code
   180 	 */
   181 	IMPORT_C TInt Error() const;
   182 
   183 	/**
   184 	 Get the severity of the most severe error found.
   185 	 @return the maximum error severity
   186 	 */
   187 	IMPORT_C TXMLErrorCodeSeverity ErrorSeverity() const; 
   188 
   189 	/** Gets the created DOM.
   190 	
   191 	This should be called after the conclusion of the parser process.
   192 	
   193 	Note that the function sets the internal variable pointing to the document 
   194 	to NULL, so this function can only be called once per file parse. The caller 
   195 	takes ownership of the document, and must delete it when its use is complete.
   196 	
   197 	@return The created DOM */
   198 	IMPORT_C CMDXMLDocument* DetachXMLDoc();
   199 
   200 	/** Parses a specified XML file into a DOM object tree.
   201 	
   202 	@param aRFs File server session
   203 	@param aFileToParse The file name to parse
   204 	@return KErrNone if success or a file read error code */
   205 	IMPORT_C TInt ParseFile(RFs aRFs, const TDesC& aFileToParse);
   206 	
   207 	IMPORT_C TInt ParseFile(RFile& aFileHandleToParse);
   208 
   209 	/** Parses a specified XML Data Source into a DOM object tree.
   210 	Use ParseSourceL() function in preference to ParseSource()
   211 	@param aSource MMDXMLParserDataProvider pointer 
   212 	*/
   213 	inline void ParseSource(MMDXMLParserDataProvider *aSource)
   214 		{
   215 		TRAP_IGNORE(ParseSourceL(aSource));
   216 		} 
   217 				
   218 	/** Parses a specified XML Data Source into a DOM object tree.	
   219 	@param aSource MMDXMLParserDataProvider pointer 
   220 	*/
   221 	IMPORT_C void ParseSourceL(MMDXMLParserDataProvider *aSource);
   222 
   223 	/** Defines input stream character widths. */
   224 	enum TMDXMLParserInputCharWidth
   225 		{
   226 		EAscii = 0x01, ///< ASCII
   227 		EUnicode = 0x02 ///<Unicode
   228 		};
   229 	
   230 	/** Sets the input stream character width.
   231 	 *
   232 	 * @param aWidth Character width for incoming stream.  Possible values are EAscii and EUnicode (representing Ascii/UTF8 and Unicode respectively).
   233 	 *
   234 	 */
   235 	IMPORT_C void SetSourceCharacterWidth(TMDXMLParserInputCharWidth aWidth);
   236 
   237 	//Defect fix for INC036136- Enable the use of custom entity converters in GMXML
   238 	/**
   239 	 * Sets the entity converter to be used for parsing.
   240 	 * and  take ownership of the passed entity converter
   241 	 * @param aEntityConverter the entity converter to be used.
   242 	 */
   243 	IMPORT_C void SetEntityConverter(CMDXMLEntityConverter* aEntityConverter);
   244 	//End Defect fix for INC036136
   245 
   246 	/**
   247 	 Controls whether invalid elements and attributes are added to the DOM.
   248 	 @param aStoreInvalid ETrue if invalid content should be stored, EFalse otherwise.
   249 	 */
   250 	IMPORT_C void SetStoreInvalid(TBool aStoreInvalid);
   251 	
   252 	/**
   253 	 Controls whether whitespaces are handled by XML parser or by client.
   254 	 @param aPreserve ETrue if all whitespaces should be preserved (handled by client), EFalse otherwise.
   255 	 */
   256 	IMPORT_C void SetWhiteSpaceHandlingMode(TBool aPreserve);
   257 
   258 public: // public functions used by other classes within the .dll, not for Export.
   259 	/** Gets the entity converter.
   260 	
   261 	@return The entity converter */
   262 	CMDXMLEntityConverter* EntityConverter();
   263 
   264 private:
   265 	IMPORT_C virtual void DoCancel();
   266 
   267 	/*
   268 	 * RunL function inherited from CActive base class - carries out the actual parsing.
   269 	 * @leave can Leave due to OOM
   270 	 */
   271 	virtual void RunL();
   272 
   273 	/*
   274 	 * Helper function that does the parsing - called from inside RunL
   275 	 */
   276 	TBool DoParseLoopL();
   277 
   278 	/*
   279 	 * RunError function inherited from CActive base class - intercepts any Leave from
   280 	 * the RunL() function, sets an appropriate errorcode and calls ParseFileCompleteL
   281 	 */
   282 	IMPORT_C TInt RunError(TInt aError);
   283 
   284 	/*
   285 	 * Constructors
   286 	 */
   287 	CMDXMLParser(MMDXMLParserObserver* aParserObserver);
   288 
   289 	CMDXMLParser(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
   290 
   291 	/*
   292 	 * Called when a character is read in and found to bo outside of an element tag
   293 	 */
   294 	virtual void HandleTextL(TDes& aChar);
   295 
   296 	enum TGetCharReturn
   297 		{
   298 		KError = 0x00,			// GetChar detected an error
   299 		KCharReturned,	// GetChar returned a character
   300 		KWaitForChar	// GetChar couldn't return a character this time, but might next time.
   301 		};
   302 
   303 	/*
   304 	 * Fetch one character from the input file
   305 	 * @param aChar the returned character.
   306 	 * @return returns one of the values of TCharReturn
   307 	 */
   308 	TGetCharReturn GetChar(TDes& aChar);
   309 
   310 	/* utility functions, called from GetChar to deal with the
   311 	 * 2 types of input stream
   312 	 */
   313 	TGetCharReturn GetDoubleByteChar(TDes& aChar);
   314 	TGetCharReturn GetSingleByteChar(TDes& aChar);
   315 
   316 	/*
   317 	 * Fetch some more data from the data provider
   318 	 * @return returns one of the values of TCharReturn
   319 	 */
   320 	void GetMoreData();
   321 
   322 	/*
   323 	 * @return Returns true if the current tag is a doctype tag and sets the
   324 	 * Document DocType member accordingly on the first pass of this function.
   325 	 */
   326 	TBool DocTypeL();
   327 
   328 	/*
   329 	 * creates a new processing instruction if necessary and adds to document
   330 	 * @return Returns true if the current tag is a processing instruction
   331 	 */
   332 	TBool ProcessingInstructionL(CMDXMLElement* aParentElement);
   333 
   334 	/*
   335 	 * creates a new CDataSection if necessary and adds to document
   336 	 * @return Returns true if the current tag is a processing instruction
   337 	 */
   338 	TBool CDataSectionL(CMDXMLElement* aParentElement);
   339 	TBool EndOfCDataSection();
   340 
   341 	/*
   342 	 * @return returns true if the current tag is a version id tag and sets the
   343 	 * Document Version member accordingly on the first pass of this function.
   344 	 */
   345 	TBool VersionIDL();
   346 
   347 	/*
   348 	 * creates a new comment if necessary and adds to document
   349 	 * @return returns true if the current tag is a comment tag
   350 	 */
   351 	TBool CommentL(CMDXMLElement* aParentElement);
   352 
   353 	/*
   354 	 * Parse a start of element tag and create an element with attributes set.
   355 	 * @return Returns a pointer to the created element
   356 	 * @leave can Leave due to OOM
   357 	 */
   358 	virtual CMDXMLElement* ParseStartTagL();
   359 
   360 	/*
   361 	 * Detects the type of a file - can be Unicode or UTF-8
   362 	 */
   363 	TBool DetectFileType();
   364 
   365 	/*
   366 	 * Creates a generic or DTD-specific document object
   367 	 * @leave can Leave due to OOM
   368 	 */
   369 	virtual void CreateDocumentL();
   370 
   371 	/*
   372 	 * Sets iError to new errorcode if more serious than any error so far encountered
   373 	 */
   374 	IMPORT_C void SetError(const TInt aErrorCode, const TXMLErrorCodeSeverity aSeverity);
   375 
   376 	/*
   377 	 * This function is used to parse the attributes.
   378      * @param aElement The element to which the attributes belong
   379      * @param aTagToParse The tag to be parsed
   380      * @return Returns KErrNone if both attribute name & value are valid 
   381 	 * KErrXMLBadAttributeName if attribute name is invalid or KErrXMLBadAttributeValue is invalid
   382      * @leave can Leave due to OOM
   383 	 */
   384 	TInt ParseElementAttributesL(CMDXMLElement& aElement, TDes& aTagToParse);
   385 
   386 	/** 
   387 	  This function locates the next attribute in the tag.
   388 	  @param aTagToParse the tag to find the attribute in
   389 	  @return the offset of the next attribute
   390 	 */
   391 	TInt LocateNextAttribute(const TDesC& aTagToParse);
   392 
   393     /*
   394      * Parses an end tag.  In fact, at this point the end tag must match
   395      * the tag name of the start tag.  
   396      * @param aTagToParse Text of the end tag.
   397      * @return Returns KErrNone if the end tag matches the start tag or KErrNotFound if there is a mismatch.
   398      */
   399 	TInt ParseElementEndTag(CMDXMLElement& aElement, const TDesC& aTagToParse);
   400 
   401 	TInt CheckForStartCData(const TDesC& aTextToCheck);
   402 	TInt FindDelimiter(TDesC& aDataToSearch, TDesC& aDelimiterToFind);
   403 
   404 	/*
   405 	 * Second stage constructor
   406 	 */
   407 	void ConstructL(MXMLDtd* aDtdRepresentation);
   408 	void AddTextL(CMDXMLElement* aParentElement);
   409 
   410 	/*
   411 	 * Checks whether the end of this tag is in a CDataSection.
   412 	 * @param aDataToSearch The data to check
   413 	 * @return Returns ETrue if the tag contains an unclosed CDataSection
   414 	 */
   415 	TBool InCDataSection(TDesC& aDataToSearch);
   416 
   417 	/*
   418 	 * Entity converts the sections of one attribute value that are not within a CDataSection.
   419 	 * @param aAttributeValue one attribute value
   420 	 * @return Returns an error if entity conversion did not successfully complete, otherwise KErrNone
   421 	 */
   422 	TInt ParseSingleAttributeL(TDes& aAttributeValue);
   423 
   424 	/*
   425 	 * Prepares this class for use on another file.
   426 	 *
   427 	 */
   428 	void PrepareForReuseL();
   429 
   430 	/**
   431 	 This should be called when parsing has been completed, before calling ParseFileCompleteL().
   432 	 It checks for errors that can only be determined at the end of parsing, eg missing doctype or 
   433 	 incomplete content.
   434 	 */
   435 	void CheckForErrors();
   436 
   437 	IMPORT_C void PlaceholderForRemovedExport1(MMDXMLParserObserver* aParserObserver);
   438 	IMPORT_C void PlaceholderForRemovedExport2(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
   439 	IMPORT_C void PlaceholderForRemovedExport3();
   440 
   441 
   442 private:
   443 	enum TPanicCode {	ENullMemVarDataSource, 
   444 						ENullMemVarParserObserver, 
   445 						ENullMemVarXMLDoc, 
   446 						ENullMemVarElementTag, 
   447 						ENullParameterParentElement };
   448 	void Panic(TPanicCode aReason) const;
   449 
   450 private:
   451 	MMDXMLParserObserver* iParserObserver;
   452 	MXMLDtd* iDtdRepresentation;
   453 	TInt iError;								// Current error
   454 	TXMLErrorCodeSeverity iSeverity;			// ErrorCode severity
   455 	CMDXMLDocument* iXMLDoc;					// Document created by the parser
   456 	CMDXMLEntityConverter* iEntityConverter;	// Entity converter used by the parser
   457 	HBufC* iElementTag;							// Currently processed element tag
   458 	TBool iDocTypeSet;
   459 	TBool iVersionSet;
   460 	TInt iBytesPerChar;
   461 
   462 	/* member variables dealing with access to source data */
   463 	TPtrC8 iInputBufferPtr;						// set during a call to get more data
   464 	TInt iCurrentInputBufferLen;				// current length of the data block available
   465 	TInt iNextChar;								// read position in the data block
   466 	TInt iInputBytesRemaining;					// number of bytes remaining to read.
   467 	HBufC8 *iUTF8EdgeBuffer;					// buffer to hold up to 6 bytes so that UTF8 parsing can span edges of data blocks
   468 	HBufC8 *iBomBuffer;							// buffer to hold data at the start of the stream so we may determine charset
   469 	TInt iRequiredUTF8Bytes;					// number of bytes required to complete the character held in the edge buffer
   470 	TBool iUnicodeInputMisaligned;				// Set to ETrue if the unicode input stream is not aligned to 16-bit boundaries
   471 	MMDXMLParserDataProvider* iDataSource;		// XML Data Source being parsed.
   472 	CMDXMLParserFileDataSource* iFileSource;	// We own this, and need to free it when we are done. Only used when we're providing the data source object to wrap a local file.
   473 
   474 	/* member variables dealing with chunked conversion into unicode output */
   475 	TBuf<32> iUnicodeConversion;				// buffer to temporarily hold the results of conversion from UTF8 to Unicode
   476 	TInt iUnicodeConversionLen;					// number of characters stored in our intermediate buffer
   477 	TInt iUnicodeReadPos;						// next character to send from our intermediate buffer
   478 	TBuf<1> iSpareChar;
   479 
   480 	/* member variables used when parsing a local file */
   481 	TDesC *iFileToParse;
   482 	RFs iRFs;
   483 	RFile iFileHandleToParse;
   484 
   485 	TBool iEndOfTag;
   486 	
   487 	/* member variables used in DoParseLoopL() */
   488 	TBool iOpened;
   489 	TBool iClosed;
   490 	CMDXMLElement* iNewElement;
   491 	CMDXMLElement* iParentElement;
   492 	HBufC* iText;
   493 	enum EParserStates
   494 		{
   495 		KInitFromFile,
   496 		KDetermineCharset,
   497 		KWaitingForData,
   498 		KParseData,
   499 		KSpanDataGap,
   500 		KFinished
   501 		};
   502 
   503 	EParserStates iState;
   504 	EParserStates iPreviousState;
   505 	TInt iSuspiciousCharacter;
   506 	TBool iStoreInvalid;						// controls whether invalid elements and attributes are stored in the DOM.
   507 	TBool iPreserve;
   508 
   509 	};
   510 
   511 #endif