diff -r 666f914201fb -r 2fe1408b6811 epoc32/include/gmxmlparser.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/epoc32/include/gmxmlparser.h Tue Mar 16 16:12:26 2010 +0000 @@ -0,0 +1,511 @@ +// Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies). +// All rights reserved. +// This component and the accompanying materials are made available +// under the terms of the License "Symbian Foundation License v1.0" to Symbian Foundation members and "Symbian Foundation End User License Agreement v1.0" to non-members +// which accompanies this distribution, and is available +// at the URL "http://www.symbianfoundation.org/legal/licencesv10.html". +// +// Initial Contributors: +// Nokia Corporation - initial contribution. +// +// Contributors: +// +// Description: +// This file contains the declaration of the generic CMDXMLParser class +// which is responsible for creating a DOM structure +// from a given XML file. +// +// + + + +/** + @file +*/ + +#ifndef __GMXMLPARSER_H__ +#define __GMXMLPARSER_H__ + +#include <e32std.h> +#include <eikenv.h> +#include <gmxmlconstants.h> + +//forward reference +class CMDXMLDocument; +class CMDXMLEntityConverter; +class CMDXMLElement; +class MXMLDtd; + + + +class MMDXMLParserObserver +/** Abstract observer interface for notification when XML parsing is complete. + +It should be implemented by users of CMDXMLParser +@publishedAll +@released*/ + { +public: + /** + Call back function used to inform a client of the Parser when a parsing operation completes. + */ + virtual void ParseFileCompleteL() = 0; + }; + +class MMDXMLParserDataProvider +/** Abstract data source interface for XML data source. + +The user of CMDXMLParser must build one of these to encapsulate the data source +that they wish to parse. CMDXMLParser implements a file-based data source to +implement the functionality of the ParseFile function. + +@publishedAll +@released*/ + { +public: + /** Status codes returned by GetData() implementations. */ + enum TDataProviderResults + { + KMoreData, ///< Returned by the interface implementation when it is returning more data. + KDataStreamError, ///< Returned by the interface when an unrecoverable error prevents obtaining more data. A recoverable error should be represented by KDataNotReady. + KDataStreamEnd ///< Returned by the interface when there is no more data to come. + }; + +public: + /** + The XML Parser calls this on a specific data provider to get more data + when required. + + Note that the TPtrC supplied may be used by the parser at any time + between the return of this call and the next call that the parser + makes out. + + Your data provider must not move the data pointed to until the + parser has indicated that it's done with that block by asking for + another. + + Ownership of the data pointed to remains with the data provider. + + + General comments on efficiency + ------------------------------ + + The parser is designed such that it processes the whole data block + provided in one go. It will automatically become asynchronous when + another block is required - the data provider only needs to supply + data. + + Because of this design, it allows the data provider to indirectly + control the amount of processing time that will be needed + in a single block. + + It is a good idea to balance the need for the fastest possible + processing with the need for client application responsiveness by + ensuring that the amount of data passed in a single block is not + too large. However, it is worth bearing in mind that the parser + will convert UTF8 data streams in blocks of 32 characters, and + supplying blocks of smaller length than this will result in a + slight loss of efficiency. + + @param aPtr On return, the data provided + @param aStatus Asynchronous status to be completed by the function with a + TDataProviderResults value + */ + virtual void GetData(TPtrC8 &aPtr, TRequestStatus &aStatus) = 0; + /** + Called to indicate that use of the data source is complete. + */ + virtual void Disconnect() = 0; + }; + +class CMDXMLParserFileDataSource; + +class CMDXMLParser: public CActive +/** Creates a DOM structure from a given XML file. + +The parsing operation is asynchronous and is initiated by a call to ParseFile(). +On completion, the created DOM document can be retrieved through DetachXMLDoc(). + +Note the following ownership rules for the DOM document: + +1. calling DetachXMLDoc() transfers ownership of the document to the client + +2. if the parser is asked to parse a new file while it still owns an existing +DOM document, it will delete the old document. + +@publishedAll +@released +*/ + { +public: + /** Allocates and constructs a new XML parser, specifying a DTD. + + @param aParserObserver XML parser observer + @leave KErrNoMemory Out of memory + @return New XML parser */ + IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver); + + /** Allocates and constructs a new XML parser, specifying a DTD. + + @param aParserObserver XML parser observer + @param aDtdRepresentation DTD validator + @leave KErrNoMemory Out of memory + @return New XML parser */ + IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation); + + /** Allocates and constructs a new XML parser, leaving the object on the cleanup + stack. + + @param aParserObserver XML parser observer + @leave KErrNoMemory Out of memory + @return New XML parser */ + IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver); + + /** Allocates and constructs a new XML parser, leaving the object on the cleanup + stack. + + @param aParserObserver XML parser observer + @param aDtdRepresentation DTD validator + @leave KErrNoMemory Out of memory + @return New XML parser */ + IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation); + + + /** Destructor. */ + IMPORT_C ~CMDXMLParser(); + + /** Gets the last error found by the parser. + + @return Error code + */ + IMPORT_C TInt Error() const; + + /** + Get the severity of the most severe error found. + @return the maximum error severity + */ + IMPORT_C TXMLErrorCodeSeverity ErrorSeverity() const; + + /** Gets the created DOM. + + This should be called after the conclusion of the parser process. + + Note that the function sets the internal variable pointing to the document + to NULL, so this function can only be called once per file parse. The caller + takes ownership of the document, and must delete it when its use is complete. + + @return The created DOM */ + IMPORT_C CMDXMLDocument* DetachXMLDoc(); + + /** Parses a specified XML file into a DOM object tree. + + @param aRFs File server session + @param aFileToParse The file name to parse + @return KErrNone if success or a file read error code */ + IMPORT_C TInt ParseFile(RFs aRFs, const TDesC& aFileToParse); + + IMPORT_C TInt ParseFile(RFile& aFileHandleToParse); + + /** Parses a specified XML Data Source into a DOM object tree. + Use ParseSourceL() function in preference to ParseSource() + @param aSource MMDXMLParserDataProvider pointer + */ + inline void ParseSource(MMDXMLParserDataProvider *aSource) + { + TRAP_IGNORE(ParseSourceL(aSource)); + } + + /** Parses a specified XML Data Source into a DOM object tree. + @param aSource MMDXMLParserDataProvider pointer + */ + IMPORT_C void ParseSourceL(MMDXMLParserDataProvider *aSource); + + /** Defines input stream character widths. */ + enum TMDXMLParserInputCharWidth + { + EAscii = 0x01, ///< ASCII + EUnicode = 0x02 ///<Unicode + }; + + /** Sets the input stream character width. + * + * @param aWidth Character width for incoming stream. Possible values are EAscii and EUnicode (representing Ascii/UTF8 and Unicode respectively). + * + */ + IMPORT_C void SetSourceCharacterWidth(TMDXMLParserInputCharWidth aWidth); + + //Defect fix for INC036136- Enable the use of custom entity converters in GMXML + /** + * Sets the entity converter to be used for parsing. + * and take ownership of the passed entity converter + * @param aEntityConverter the entity converter to be used. + */ + IMPORT_C void SetEntityConverter(CMDXMLEntityConverter* aEntityConverter); + //End Defect fix for INC036136 + + /** + Controls whether invalid elements and attributes are added to the DOM. + @param aStoreInvalid ETrue if invalid content should be stored, EFalse otherwise. + */ + IMPORT_C void SetStoreInvalid(TBool aStoreInvalid); + + /** + Controls whether whitespaces are handled by XML parser or by client. + @param aPreserve ETrue if all whitespaces should be preserved (handled by client), EFalse otherwise. + */ + IMPORT_C void SetWhiteSpaceHandlingMode(TBool aPreserve); + +public: // public functions used by other classes within the .dll, not for Export. + /** Gets the entity converter. + + @return The entity converter */ + CMDXMLEntityConverter* EntityConverter(); + +private: + IMPORT_C virtual void DoCancel(); + + /* + * RunL function inherited from CActive base class - carries out the actual parsing. + * @leave can Leave due to OOM + */ + virtual void RunL(); + + /* + * Helper function that does the parsing - called from inside RunL + */ + TBool DoParseLoopL(); + + /* + * RunError function inherited from CActive base class - intercepts any Leave from + * the RunL() function, sets an appropriate errorcode and calls ParseFileCompleteL + */ + IMPORT_C TInt RunError(TInt aError); + + /* + * Constructors + */ + CMDXMLParser(MMDXMLParserObserver* aParserObserver); + + CMDXMLParser(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation); + + /* + * Called when a character is read in and found to bo outside of an element tag + */ + virtual void HandleTextL(TDes& aChar); + + enum TGetCharReturn + { + KError = 0x00, // GetChar detected an error + KCharReturned, // GetChar returned a character + KWaitForChar // GetChar couldn't return a character this time, but might next time. + }; + + /* + * Fetch one character from the input file + * @param aChar the returned character. + * @return returns one of the values of TCharReturn + */ + TGetCharReturn GetChar(TDes& aChar); + + /* utility functions, called from GetChar to deal with the + * 2 types of input stream + */ + TGetCharReturn GetDoubleByteChar(TDes& aChar); + TGetCharReturn GetSingleByteChar(TDes& aChar); + + /* + * Fetch some more data from the data provider + * @return returns one of the values of TCharReturn + */ + void GetMoreData(); + + /* + * @return Returns true if the current tag is a doctype tag and sets the + * Document DocType member accordingly on the first pass of this function. + */ + TBool DocTypeL(); + + /* + * creates a new processing instruction if necessary and adds to document + * @return Returns true if the current tag is a processing instruction + */ + TBool ProcessingInstructionL(CMDXMLElement* aParentElement); + + /* + * creates a new CDataSection if necessary and adds to document + * @return Returns true if the current tag is a processing instruction + */ + TBool CDataSectionL(CMDXMLElement* aParentElement); + TBool EndOfCDataSection(); + + /* + * @return returns true if the current tag is a version id tag and sets the + * Document Version member accordingly on the first pass of this function. + */ + TBool VersionIDL(); + + /* + * creates a new comment if necessary and adds to document + * @return returns true if the current tag is a comment tag + */ + TBool CommentL(CMDXMLElement* aParentElement); + + /* + * Parse a start of element tag and create an element with attributes set. + * @return Returns a pointer to the created element + * @leave can Leave due to OOM + */ + virtual CMDXMLElement* ParseStartTagL(); + + /* + * Detects the type of a file - can be Unicode or UTF-8 + */ + TBool DetectFileType(); + + /* + * Creates a generic or DTD-specific document object + * @leave can Leave due to OOM + */ + virtual void CreateDocumentL(); + + /* + * Sets iError to new errorcode if more serious than any error so far encountered + */ + IMPORT_C void SetError(const TInt aErrorCode, const TXMLErrorCodeSeverity aSeverity); + + /* + * This function is used to parse the attributes. + * @param aElement The element to which the attributes belong + * @param aTagToParse The tag to be parsed + * @return Returns KErrNone if both attribute name & value are valid + * KErrXMLBadAttributeName if attribute name is invalid or KErrXMLBadAttributeValue is invalid + * @leave can Leave due to OOM + */ + TInt ParseElementAttributesL(CMDXMLElement& aElement, TDes& aTagToParse); + + /** + This function locates the next attribute in the tag. + @param aTagToParse the tag to find the attribute in + @return the offset of the next attribute + */ + TInt LocateNextAttribute(const TDesC& aTagToParse); + + /* + * Parses an end tag. In fact, at this point the end tag must match + * the tag name of the start tag. + * @param aTagToParse Text of the end tag. + * @return Returns KErrNone if the end tag matches the start tag or KErrNotFound if there is a mismatch. + */ + TInt ParseElementEndTag(CMDXMLElement& aElement, const TDesC& aTagToParse); + + TInt CheckForStartCData(const TDesC& aTextToCheck); + TInt FindDelimiter(TDesC& aDataToSearch, TDesC& aDelimiterToFind); + + /* + * Second stage constructor + */ + void ConstructL(MXMLDtd* aDtdRepresentation); + void AddTextL(CMDXMLElement* aParentElement); + + /* + * Checks whether the end of this tag is in a CDataSection. + * @param aDataToSearch The data to check + * @return Returns ETrue if the tag contains an unclosed CDataSection + */ + TBool InCDataSection(TDesC& aDataToSearch); + + /* + * Entity converts the sections of one attribute value that are not within a CDataSection. + * @param aAttributeValue one attribute value + * @return Returns an error if entity conversion did not successfully complete, otherwise KErrNone + */ + TInt ParseSingleAttributeL(TDes& aAttributeValue); + + /* + * Prepares this class for use on another file. + * + */ + void PrepareForReuseL(); + + /** + This should be called when parsing has been completed, before calling ParseFileCompleteL(). + It checks for errors that can only be determined at the end of parsing, eg missing doctype or + incomplete content. + */ + void CheckForErrors(); + + IMPORT_C void PlaceholderForRemovedExport1(MMDXMLParserObserver* aParserObserver); + IMPORT_C void PlaceholderForRemovedExport2(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation); + IMPORT_C void PlaceholderForRemovedExport3(); + + +private: + enum TPanicCode { ENullMemVarDataSource, + ENullMemVarParserObserver, + ENullMemVarXMLDoc, + ENullMemVarElementTag, + ENullParameterParentElement }; + void Panic(TPanicCode aReason) const; + +private: + MMDXMLParserObserver* iParserObserver; + MXMLDtd* iDtdRepresentation; + TInt iError; // Current error + TXMLErrorCodeSeverity iSeverity; // ErrorCode severity + CMDXMLDocument* iXMLDoc; // Document created by the parser + CMDXMLEntityConverter* iEntityConverter; // Entity converter used by the parser + HBufC* iElementTag; // Currently processed element tag + TBool iDocTypeSet; + TBool iVersionSet; + TInt iBytesPerChar; + + /* member variables dealing with access to source data */ + TPtrC8 iInputBufferPtr; // set during a call to get more data + TInt iCurrentInputBufferLen; // current length of the data block available + TInt iNextChar; // read position in the data block + TInt iInputBytesRemaining; // number of bytes remaining to read. + HBufC8 *iUTF8EdgeBuffer; // buffer to hold up to 6 bytes so that UTF8 parsing can span edges of data blocks + HBufC8 *iBomBuffer; // buffer to hold data at the start of the stream so we may determine charset + TInt iRequiredUTF8Bytes; // number of bytes required to complete the character held in the edge buffer + TBool iUnicodeInputMisaligned; // Set to ETrue if the unicode input stream is not aligned to 16-bit boundaries + MMDXMLParserDataProvider* iDataSource; // XML Data Source being parsed. + CMDXMLParserFileDataSource* iFileSource; // We own this, and need to free it when we are done. Only used when we're providing the data source object to wrap a local file. + + /* member variables dealing with chunked conversion into unicode output */ + TBuf<32> iUnicodeConversion; // buffer to temporarily hold the results of conversion from UTF8 to Unicode + TInt iUnicodeConversionLen; // number of characters stored in our intermediate buffer + TInt iUnicodeReadPos; // next character to send from our intermediate buffer + TBuf<1> iSpareChar; + + /* member variables used when parsing a local file */ + TDesC *iFileToParse; + RFs iRFs; + RFile iFileHandleToParse; + + TBool iEndOfTag; + + /* member variables used in DoParseLoopL() */ + TBool iOpened; + TBool iClosed; + CMDXMLElement* iNewElement; + CMDXMLElement* iParentElement; + HBufC* iText; + enum EParserStates + { + KInitFromFile, + KDetermineCharset, + KWaitingForData, + KParseData, + KSpanDataGap, + KFinished + }; + + EParserStates iState; + EParserStates iPreviousState; + TInt iSuspiciousCharacter; + TBool iStoreInvalid; // controls whether invalid elements and attributes are stored in the DOM. + TBool iPreserve; + + }; + +#endif