epoc32/include/gmxmlparser.h
author William Roberts <williamr@symbian.org>
Wed, 31 Mar 2010 12:33:34 +0100
branchSymbian3
changeset 4 837f303aceeb
parent 2 2fe1408b6811
permissions -rw-r--r--
Current Symbian^3 public API header files (from PDK 3.0.h)
This is the epoc32/include tree with the "platform" subtrees removed, and
all but a selected few mbg and rsg files removed.
     1 // Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
     2 // All rights reserved.
     3 // This component and the accompanying materials are made available
     4 // under the terms of "Eclipse Public License v1.0"
     5 // which accompanies this distribution, and is available
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
     7 //
     8 // Initial Contributors:
     9 // Nokia Corporation - initial contribution.
    10 //
    11 // Contributors:
    12 //
    13 // Description:
    14 // This file contains the declaration of the generic CMDXMLParser class
    15 // which is responsible for creating a DOM structure
    16 // from a given XML file.
    17 // 
    18 //
    19 
    20 /**
    21  @file
    22 */
    23 
    24 #ifndef __GMXMLPARSER_H__
    25 #define __GMXMLPARSER_H__
    26 
    27 #include <e32std.h>
    28 #include <txtetext.h>
    29 #include <gmxmlconstants.h>
    30 #include <f32file.h>
    31 
    32 //forward reference
    33 class CMDXMLDocument;
    34 class CMDXMLEntityConverter;
    35 class CMDXMLElement;
    36 class MXMLDtd;
    37 
    38 
    39 
    40 class MMDXMLParserObserver
    41 /** Abstract observer interface for notification when XML parsing is complete.
    42 
    43 It should be implemented by users of CMDXMLParser
    44 @publishedAll 
    45 @released*/
    46 	{
    47 public:
    48 	/**
    49 	Call back function used to inform a client of the Parser when a parsing operation completes.
    50 	 */
    51 	virtual void ParseFileCompleteL() = 0;
    52 	};
    53 
    54 class MMDXMLParserDataProvider
    55 /** Abstract data source interface for XML data source.
    56 
    57 The user of CMDXMLParser must build one of these to encapsulate the data source
    58 that they wish to parse.  CMDXMLParser implements a file-based data source to
    59 implement the functionality of the ParseFile function.
    60 
    61 @publishedAll 
    62 @released*/
    63 	{
    64 public:
    65 	/** Status codes returned by GetData() implementations. */
    66 	enum TDataProviderResults
    67 		{
    68 		KMoreData,		//< Returned by the interface implementation when it is returning more data.
    69 		KDataStreamError,	//< Returned by the interface when an unrecoverable error prevents obtaining more data.  A recoverable error should be represented by KDataNotReady.
    70 		KDataStreamEnd	//< Returned by the interface when there is no more data to come.
    71 		};
    72 
    73 public:
    74 	/** 
    75 	The XML Parser calls this on a specific data provider to get more data
    76 	when required.
    77 
    78 	Note that the TPtrC supplied may be used by the parser at any time
    79 	between the return of this call and the next call that the parser
    80 	makes out.
    81 
    82 	Your data provider must not move the data pointed to until the
    83 	parser has indicated that it's done with that block by asking for
    84 	another.
    85 
    86 	Ownership of the data pointed to remains with the data provider.
    87 
    88 
    89 	General comments on efficiency
    90 	------------------------------
    91 
    92 	The parser is designed such that it processes the whole data block
    93 	provided in one go.  It will automatically become asynchronous when
    94 	another block is required - the data provider only needs to supply
    95 	data.
    96 
    97 	Because of this design, it allows the data provider to indirectly
    98 	control the amount of processing time that will be needed
    99 	in a single block.
   100 
   101 	It is a good idea to balance the need for the fastest possible 
   102 	processing with the need for client application responsiveness by
   103 	ensuring that the amount of data passed in a single block is not 
   104 	too large.	However, it is worth bearing in mind that the parser
   105 	will convert UTF8 data streams in blocks of 32 characters, and
   106 	supplying blocks of smaller length than this will result in a
   107 	slight loss of efficiency.
   108 
   109 	@param aPtr On return, the data provided
   110 	@param aStatus Asynchronous status to be completed by the function with a 
   111 	TDataProviderResults value
   112 	*/
   113 	virtual void GetData(TPtrC8 &aPtr, TRequestStatus &aStatus) = 0;
   114 	/**
   115 	Called to indicate that use of the data source is complete.
   116 	*/
   117 	virtual void Disconnect() = 0;
   118 	};
   119 
   120 class CMDXMLParserFileDataSource;
   121 
   122 class CMDXMLParser: public CActive
   123 /** Creates a DOM structure from a given XML file.
   124 
   125 The parsing operation is asynchronous and is initiated by a call to ParseFile(). 
   126 On completion, the created DOM document can be retrieved through DetachXMLDoc().
   127 
   128 Note the following ownership rules for the DOM document:
   129 
   130 1. calling DetachXMLDoc() transfers ownership of the document to the client
   131 
   132 2. if the parser is asked to parse a new file while it still owns an existing 
   133 DOM document, it will delete the old document.
   134 
   135 @publishedAll
   136 @released
   137 */
   138 	{
   139 public:
   140 	/** Allocates and constructs a new XML parser, specifying a DTD.
   141 	
   142 	@param aParserObserver XML parser observer
   143 	@leave KErrNoMemory Out of memory
   144 	@return New XML parser */
   145 	IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver);
   146 
   147 	/** Allocates and constructs a new XML parser, specifying a DTD.
   148 	
   149 	@param aParserObserver XML parser observer
   150 	@param aDtdRepresentation DTD validator
   151 	@leave KErrNoMemory Out of memory
   152 	@return New XML parser */
   153 	IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
   154 
   155 	/** Allocates and constructs a new XML parser, leaving the object on the cleanup 
   156 	stack.
   157 	
   158 	@param aParserObserver XML parser observer
   159 	@leave KErrNoMemory Out of memory
   160 	@return New XML parser */
   161 	IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver);
   162 
   163 	/** Allocates and constructs a new XML parser, leaving the object on the cleanup 
   164 	stack.
   165 	
   166 	@param aParserObserver XML parser observer
   167 	@param aDtdRepresentation DTD validator
   168 	@leave KErrNoMemory Out of memory
   169 	@return New XML parser */
   170 	IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
   171 
   172 
   173 	/** Destructor. */
   174 	IMPORT_C ~CMDXMLParser();
   175 
   176 	/** Gets the last error found by the parser.
   177 	
   178 	@return Error code
   179 	 */
   180 	IMPORT_C TInt Error() const;
   181 
   182 	/**
   183 	 Get the severity of the most severe error found.
   184 	 @return the maximum error severity
   185 	 */
   186 	IMPORT_C TXMLErrorCodeSeverity ErrorSeverity() const; 
   187 
   188 	/** Gets the created DOM.
   189 	
   190 	This should be called after the conclusion of the parser process.
   191 	
   192 	Note that the function sets the internal variable pointing to the document 
   193 	to NULL, so this function can only be called once per file parse. The caller 
   194 	takes ownership of the document, and must delete it when its use is complete.
   195 	
   196 	@return The created DOM */
   197 	IMPORT_C CMDXMLDocument* DetachXMLDoc();
   198 
   199 	/** Parses a specified XML file into a DOM object tree.
   200 	
   201 	@param aRFs File server session
   202 	@param aFileToParse The file name to parse
   203 	@return KErrNone if success or a file read error code */
   204 	IMPORT_C TInt ParseFile(RFs aRFs, const TDesC& aFileToParse);
   205 	
   206 	IMPORT_C TInt ParseFile(RFile& aFileHandleToParse);
   207 
   208 	/** Parses a specified XML Data Source into a DOM object tree.
   209 	Use ParseSourceL() function in preference to ParseSource()
   210 	@param aSource MMDXMLParserDataProvider pointer 
   211 	*/
   212 	inline void ParseSource(MMDXMLParserDataProvider *aSource)
   213 		{
   214 		TRAP_IGNORE(ParseSourceL(aSource));
   215 		} 
   216 				
   217 	/** Parses a specified XML Data Source into a DOM object tree.	
   218 	@param aSource MMDXMLParserDataProvider pointer 
   219 	*/
   220 	IMPORT_C void ParseSourceL(MMDXMLParserDataProvider *aSource);
   221 
   222 	/** Defines input stream character widths. */
   223 	enum TMDXMLParserInputCharWidth
   224 		{
   225 		EAscii = 0x01, //< ASCII
   226 		EUnicode = 0x02 //<Unicode
   227 		};
   228 	
   229 	/** Sets the input stream character width.
   230 	 *
   231 	 * @param aWidth Character width for incoming stream.  Possible values are EAscii and EUnicode (representing Ascii/UTF8 and Unicode respectively).
   232 	 *
   233 	 */
   234 	IMPORT_C void SetSourceCharacterWidth(TMDXMLParserInputCharWidth aWidth);
   235 
   236 	//Defect fix for INC036136- Enable the use of custom entity converters in GMXML
   237 	/**
   238 	 * Sets the entity converter to be used for parsing.
   239 	 * and  take ownership of the passed entity converter
   240 	 * @param aEntityConverter the entity converter to be used.
   241 	 */
   242 	IMPORT_C void SetEntityConverter(CMDXMLEntityConverter* aEntityConverter);
   243 	//End Defect fix for INC036136
   244 
   245 	/**
   246 	 Controls whether invalid elements and attributes are added to the DOM.
   247 	 @param aStoreInvalid ETrue if invalid content should be stored, EFalse otherwise.
   248 	 */
   249 	IMPORT_C void SetStoreInvalid(TBool aStoreInvalid);
   250 	
   251 	/**
   252 	 Controls whether whitespaces are handled by XML parser or by client.
   253 	 @param aPreserve ETrue if all whitespaces should be preserved (handled by client), EFalse otherwise.
   254 	 */
   255 	IMPORT_C void SetWhiteSpaceHandlingMode(TBool aPreserve);
   256 
   257 public: // public functions used by other classes within the .dll, not for Export.
   258 	/** Gets the entity converter.
   259 	
   260 	@return The entity converter */
   261 	CMDXMLEntityConverter* EntityConverter();
   262 
   263 private:
   264 	IMPORT_C virtual void DoCancel();
   265 
   266 	/*
   267 	 * RunL function inherited from CActive base class - carries out the actual parsing.
   268 	 * @leave can Leave due to OOM
   269 	 */
   270 	virtual void RunL();
   271 
   272 	/*
   273 	 * Helper function that does the parsing - called from inside RunL
   274 	 */
   275 	TBool DoParseLoopL();
   276 
   277 	/*
   278 	 * RunError function inherited from CActive base class - intercepts any Leave from
   279 	 * the RunL() function, sets an appropriate errorcode and calls ParseFileCompleteL
   280 	 */
   281 	IMPORT_C TInt RunError(TInt aError);
   282 
   283 	/*
   284 	 * Constructors
   285 	 */
   286 	CMDXMLParser(MMDXMLParserObserver* aParserObserver);
   287 
   288 	CMDXMLParser(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
   289 
   290 	/*
   291 	 * Called when a character is read in and found to bo outside of an element tag
   292 	 */
   293 	virtual void HandleTextL(TDes& aChar);
   294 
   295 	enum TGetCharReturn
   296 		{
   297 		KError = 0x00,			// GetChar detected an error
   298 		KCharReturned,	// GetChar returned a character
   299 		KWaitForChar	// GetChar couldn't return a character this time, but might next time.
   300 		};
   301 
   302 	/*
   303 	 * Fetch one character from the input file
   304 	 * @param aChar the returned character.
   305 	 * @return returns one of the values of TCharReturn
   306 	 */
   307 	TGetCharReturn GetChar(TDes& aChar);
   308 
   309 	/* utility functions, called from GetChar to deal with the
   310 	 * 2 types of input stream
   311 	 */
   312 	TGetCharReturn GetDoubleByteChar(TDes& aChar);
   313 	TGetCharReturn GetSingleByteChar(TDes& aChar);
   314 
   315 	/*
   316 	 * Fetch some more data from the data provider
   317 	 * @return returns one of the values of TCharReturn
   318 	 */
   319 	void GetMoreData();
   320 
   321 	/*
   322 	 * @return Returns true if the current tag is a doctype tag and sets the
   323 	 * Document DocType member accordingly on the first pass of this function.
   324 	 */
   325 	TBool DocTypeL();
   326 
   327 	/*
   328 	 * creates a new processing instruction if necessary and adds to document
   329 	 * @return Returns true if the current tag is a processing instruction
   330 	 */
   331 	TBool ProcessingInstructionL(CMDXMLElement* aParentElement);
   332 
   333 	/*
   334 	 * creates a new CDataSection if necessary and adds to document
   335 	 * @return Returns true if the current tag is a processing instruction
   336 	 */
   337 	TBool CDataSectionL(CMDXMLElement* aParentElement);
   338 	TBool EndOfCDataSection();
   339 
   340 	/*
   341 	 * @return returns true if the current tag is a version id tag and sets the
   342 	 * Document Version member accordingly on the first pass of this function.
   343 	 */
   344 	TBool VersionIDL();
   345 
   346 	/*
   347 	 * creates a new comment if necessary and adds to document
   348 	 * @return returns true if the current tag is a comment tag
   349 	 */
   350 	TBool CommentL(CMDXMLElement* aParentElement);
   351 
   352 	/*
   353 	 * Parse a start of element tag and create an element with attributes set.
   354 	 * @return Returns a pointer to the created element
   355 	 * @leave can Leave due to OOM
   356 	 */
   357 	virtual CMDXMLElement* ParseStartTagL();
   358 
   359 	/*
   360 	 * Detects the type of a file - can be Unicode or UTF-8
   361 	 */
   362 	TBool DetectFileType();
   363 
   364 	/*
   365 	 * Creates a generic or DTD-specific document object
   366 	 * @leave can Leave due to OOM
   367 	 */
   368 	virtual void CreateDocumentL();
   369 
   370 	/*
   371 	 * Sets iError to new errorcode if more serious than any error so far encountered
   372 	 */
   373 	IMPORT_C void SetError(const TInt aErrorCode, const TXMLErrorCodeSeverity aSeverity);
   374 
   375 	/*
   376 	 * This function is used to parse the attributes.
   377      * @param aElement The element to which the attributes belong
   378      * @param aTagToParse The tag to be parsed
   379      * @return Returns KErrNone if both attribute name & value are valid 
   380 	 * KErrXMLBadAttributeName if attribute name is invalid or KErrXMLBadAttributeValue is invalid
   381      * @leave can Leave due to OOM
   382 	 */
   383 	TInt ParseElementAttributesL(CMDXMLElement& aElement, TDes& aTagToParse);
   384 
   385 	/** 
   386 	  This function locates the next attribute in the tag.
   387 	  @param aTagToParse the tag to find the attribute in
   388 	  @return the offset of the next attribute
   389 	 */
   390 	TInt LocateNextAttribute(const TDesC& aTagToParse);
   391 
   392     /*
   393      * Parses an end tag.  In fact, at this point the end tag must match
   394      * the tag name of the start tag.  
   395      * @param aTagToParse Text of the end tag.
   396      * @return Returns KErrNone if the end tag matches the start tag or KErrNotFound if there is a mismatch.
   397      */
   398 	TInt ParseElementEndTag(CMDXMLElement& aElement, const TDesC& aTagToParse);
   399 
   400 	TInt CheckForStartCData(const TDesC& aTextToCheck);
   401 	TInt FindDelimiter(TDesC& aDataToSearch, TDesC& aDelimiterToFind);
   402 
   403 	/*
   404 	 * Second stage constructor
   405 	 */
   406 	void ConstructL(MXMLDtd* aDtdRepresentation);
   407 	void AddTextL(CMDXMLElement* aParentElement);
   408 
   409 	/*
   410 	 * Checks whether the end of this tag is in a CDataSection.
   411 	 * @param aDataToSearch The data to check
   412 	 * @return Returns ETrue if the tag contains an unclosed CDataSection
   413 	 */
   414 	TBool InCDataSection(TDesC& aDataToSearch);
   415 
   416 	/*
   417 	 * Entity converts the sections of one attribute value that are not within a CDataSection.
   418 	 * @param aAttributeValue one attribute value
   419 	 * @return Returns an error if entity conversion did not successfully complete, otherwise KErrNone
   420 	 */
   421 	TInt ParseSingleAttributeL(TDes& aAttributeValue);
   422 
   423 	/*
   424 	 * Prepares this class for use on another file.
   425 	 *
   426 	 */
   427 	void PrepareForReuseL();
   428 
   429 	/**
   430 	 This should be called when parsing has been completed, before calling ParseFileCompleteL().
   431 	 It checks for errors that can only be determined at the end of parsing, eg missing doctype or 
   432 	 incomplete content.
   433 	 */
   434 	void CheckForErrors();
   435 
   436 	IMPORT_C void PlaceholderForRemovedExport1(MMDXMLParserObserver* aParserObserver);
   437 	IMPORT_C void PlaceholderForRemovedExport2(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
   438 	IMPORT_C void PlaceholderForRemovedExport3();
   439 
   440 
   441 private:
   442 	enum TPanicCode {	ENullMemVarDataSource, 
   443 						ENullMemVarParserObserver, 
   444 						ENullMemVarXMLDoc, 
   445 						ENullMemVarElementTag, 
   446 						ENullParameterParentElement };
   447 	void Panic(TPanicCode aReason) const;
   448 
   449 private:
   450 	MMDXMLParserObserver* iParserObserver;
   451 	MXMLDtd* iDtdRepresentation;
   452 	TInt iError;								// Current error
   453 	TXMLErrorCodeSeverity iSeverity;			// ErrorCode severity
   454 	CMDXMLDocument* iXMLDoc;					// Document created by the parser
   455 	CMDXMLEntityConverter* iEntityConverter;	// Entity converter used by the parser
   456 	HBufC* iElementTag;							// Currently processed element tag
   457 	TBool iDocTypeSet;
   458 	TBool iVersionSet;
   459 	TInt iBytesPerChar;
   460 
   461 	/* member variables dealing with access to source data */
   462 	TPtrC8 iInputBufferPtr;						// set during a call to get more data
   463 	TInt iCurrentInputBufferLen;				// current length of the data block available
   464 	TInt iNextChar;								// read position in the data block
   465 	TInt iInputBytesRemaining;					// number of bytes remaining to read.
   466 	HBufC8 *iUTF8EdgeBuffer;					// buffer to hold up to 6 bytes so that UTF8 parsing can span edges of data blocks
   467 	HBufC8 *iBomBuffer;							// buffer to hold data at the start of the stream so we may determine charset
   468 	TInt iRequiredUTF8Bytes;					// number of bytes required to complete the character held in the edge buffer
   469 	TBool iUnicodeInputMisaligned;				// Set to ETrue if the unicode input stream is not aligned to 16-bit boundaries
   470 	MMDXMLParserDataProvider* iDataSource;		// XML Data Source being parsed.
   471 	CMDXMLParserFileDataSource* iFileSource;	// We own this, and need to free it when we are done. Only used when we're providing the data source object to wrap a local file.
   472 
   473 	/* member variables dealing with chunked conversion into unicode output */
   474 	TBuf<32> iUnicodeConversion;				// buffer to temporarily hold the results of conversion from UTF8 to Unicode
   475 	TInt iUnicodeConversionLen;					// number of characters stored in our intermediate buffer
   476 	TInt iUnicodeReadPos;						// next character to send from our intermediate buffer
   477 	TBuf<1> iSpareChar;
   478 
   479 	/* member variables used when parsing a local file */
   480 	TDesC *iFileToParse;
   481 	RFs iRFs;
   482 	RFile iFileHandleToParse;
   483 
   484 	TBool iEndOfTag;
   485 	
   486 	/* member variables used in DoParseLoopL() */
   487 	TBool iOpened;
   488 	TBool iClosed;
   489 	CMDXMLElement* iNewElement;
   490 	CMDXMLElement* iParentElement;
   491 	HBufC* iText;
   492 	enum EParserStates
   493 		{
   494 		KInitFromFile,
   495 		KDetermineCharset,
   496 		KWaitingForData,
   497 		KParseData,
   498 		KSpanDataGap,
   499 		KFinished
   500 		};
   501 
   502 	EParserStates iState;
   503 	EParserStates iPreviousState;
   504 	TInt iSuspiciousCharacter;
   505 	TBool iStoreInvalid;						// controls whether invalid elements and attributes are stored in the DOM.
   506 	TBool iPreserve;
   507 
   508 	};
   509 
   510 #endif