1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/epoc32/include/gmxmlparser.h Tue Mar 16 16:12:26 2010 +0000
1.3 @@ -0,0 +1,511 @@
1.4 +// Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
1.5 +// All rights reserved.
1.6 +// This component and the accompanying materials are made available
1.7 +// under the terms of the License "Symbian Foundation License v1.0" to Symbian Foundation members and "Symbian Foundation End User License Agreement v1.0" to non-members
1.8 +// which accompanies this distribution, and is available
1.9 +// at the URL "http://www.symbianfoundation.org/legal/licencesv10.html".
1.10 +//
1.11 +// Initial Contributors:
1.12 +// Nokia Corporation - initial contribution.
1.13 +//
1.14 +// Contributors:
1.15 +//
1.16 +// Description:
1.17 +// This file contains the declaration of the generic CMDXMLParser class
1.18 +// which is responsible for creating a DOM structure
1.19 +// from a given XML file.
1.20 +//
1.21 +//
1.22 +
1.23 +
1.24 +
1.25 +/**
1.26 + @file
1.27 +*/
1.28 +
1.29 +#ifndef __GMXMLPARSER_H__
1.30 +#define __GMXMLPARSER_H__
1.31 +
1.32 +#include <e32std.h>
1.33 +#include <eikenv.h>
1.34 +#include <gmxmlconstants.h>
1.35 +
1.36 +//forward reference
1.37 +class CMDXMLDocument;
1.38 +class CMDXMLEntityConverter;
1.39 +class CMDXMLElement;
1.40 +class MXMLDtd;
1.41 +
1.42 +
1.43 +
1.44 +class MMDXMLParserObserver
1.45 +/** Abstract observer interface for notification when XML parsing is complete.
1.46 +
1.47 +It should be implemented by users of CMDXMLParser
1.48 +@publishedAll
1.49 +@released*/
1.50 + {
1.51 +public:
1.52 + /**
1.53 + Call back function used to inform a client of the Parser when a parsing operation completes.
1.54 + */
1.55 + virtual void ParseFileCompleteL() = 0;
1.56 + };
1.57 +
1.58 +class MMDXMLParserDataProvider
1.59 +/** Abstract data source interface for XML data source.
1.60 +
1.61 +The user of CMDXMLParser must build one of these to encapsulate the data source
1.62 +that they wish to parse. CMDXMLParser implements a file-based data source to
1.63 +implement the functionality of the ParseFile function.
1.64 +
1.65 +@publishedAll
1.66 +@released*/
1.67 + {
1.68 +public:
1.69 + /** Status codes returned by GetData() implementations. */
1.70 + enum TDataProviderResults
1.71 + {
1.72 + KMoreData, ///< Returned by the interface implementation when it is returning more data.
1.73 + KDataStreamError, ///< Returned by the interface when an unrecoverable error prevents obtaining more data. A recoverable error should be represented by KDataNotReady.
1.74 + KDataStreamEnd ///< Returned by the interface when there is no more data to come.
1.75 + };
1.76 +
1.77 +public:
1.78 + /**
1.79 + The XML Parser calls this on a specific data provider to get more data
1.80 + when required.
1.81 +
1.82 + Note that the TPtrC supplied may be used by the parser at any time
1.83 + between the return of this call and the next call that the parser
1.84 + makes out.
1.85 +
1.86 + Your data provider must not move the data pointed to until the
1.87 + parser has indicated that it's done with that block by asking for
1.88 + another.
1.89 +
1.90 + Ownership of the data pointed to remains with the data provider.
1.91 +
1.92 +
1.93 + General comments on efficiency
1.94 + ------------------------------
1.95 +
1.96 + The parser is designed such that it processes the whole data block
1.97 + provided in one go. It will automatically become asynchronous when
1.98 + another block is required - the data provider only needs to supply
1.99 + data.
1.100 +
1.101 + Because of this design, it allows the data provider to indirectly
1.102 + control the amount of processing time that will be needed
1.103 + in a single block.
1.104 +
1.105 + It is a good idea to balance the need for the fastest possible
1.106 + processing with the need for client application responsiveness by
1.107 + ensuring that the amount of data passed in a single block is not
1.108 + too large. However, it is worth bearing in mind that the parser
1.109 + will convert UTF8 data streams in blocks of 32 characters, and
1.110 + supplying blocks of smaller length than this will result in a
1.111 + slight loss of efficiency.
1.112 +
1.113 + @param aPtr On return, the data provided
1.114 + @param aStatus Asynchronous status to be completed by the function with a
1.115 + TDataProviderResults value
1.116 + */
1.117 + virtual void GetData(TPtrC8 &aPtr, TRequestStatus &aStatus) = 0;
1.118 + /**
1.119 + Called to indicate that use of the data source is complete.
1.120 + */
1.121 + virtual void Disconnect() = 0;
1.122 + };
1.123 +
1.124 +class CMDXMLParserFileDataSource;
1.125 +
1.126 +class CMDXMLParser: public CActive
1.127 +/** Creates a DOM structure from a given XML file.
1.128 +
1.129 +The parsing operation is asynchronous and is initiated by a call to ParseFile().
1.130 +On completion, the created DOM document can be retrieved through DetachXMLDoc().
1.131 +
1.132 +Note the following ownership rules for the DOM document:
1.133 +
1.134 +1. calling DetachXMLDoc() transfers ownership of the document to the client
1.135 +
1.136 +2. if the parser is asked to parse a new file while it still owns an existing
1.137 +DOM document, it will delete the old document.
1.138 +
1.139 +@publishedAll
1.140 +@released
1.141 +*/
1.142 + {
1.143 +public:
1.144 + /** Allocates and constructs a new XML parser, specifying a DTD.
1.145 +
1.146 + @param aParserObserver XML parser observer
1.147 + @leave KErrNoMemory Out of memory
1.148 + @return New XML parser */
1.149 + IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver);
1.150 +
1.151 + /** Allocates and constructs a new XML parser, specifying a DTD.
1.152 +
1.153 + @param aParserObserver XML parser observer
1.154 + @param aDtdRepresentation DTD validator
1.155 + @leave KErrNoMemory Out of memory
1.156 + @return New XML parser */
1.157 + IMPORT_C static CMDXMLParser* NewL(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
1.158 +
1.159 + /** Allocates and constructs a new XML parser, leaving the object on the cleanup
1.160 + stack.
1.161 +
1.162 + @param aParserObserver XML parser observer
1.163 + @leave KErrNoMemory Out of memory
1.164 + @return New XML parser */
1.165 + IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver);
1.166 +
1.167 + /** Allocates and constructs a new XML parser, leaving the object on the cleanup
1.168 + stack.
1.169 +
1.170 + @param aParserObserver XML parser observer
1.171 + @param aDtdRepresentation DTD validator
1.172 + @leave KErrNoMemory Out of memory
1.173 + @return New XML parser */
1.174 + IMPORT_C static CMDXMLParser* NewLC(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
1.175 +
1.176 +
1.177 + /** Destructor. */
1.178 + IMPORT_C ~CMDXMLParser();
1.179 +
1.180 + /** Gets the last error found by the parser.
1.181 +
1.182 + @return Error code
1.183 + */
1.184 + IMPORT_C TInt Error() const;
1.185 +
1.186 + /**
1.187 + Get the severity of the most severe error found.
1.188 + @return the maximum error severity
1.189 + */
1.190 + IMPORT_C TXMLErrorCodeSeverity ErrorSeverity() const;
1.191 +
1.192 + /** Gets the created DOM.
1.193 +
1.194 + This should be called after the conclusion of the parser process.
1.195 +
1.196 + Note that the function sets the internal variable pointing to the document
1.197 + to NULL, so this function can only be called once per file parse. The caller
1.198 + takes ownership of the document, and must delete it when its use is complete.
1.199 +
1.200 + @return The created DOM */
1.201 + IMPORT_C CMDXMLDocument* DetachXMLDoc();
1.202 +
1.203 + /** Parses a specified XML file into a DOM object tree.
1.204 +
1.205 + @param aRFs File server session
1.206 + @param aFileToParse The file name to parse
1.207 + @return KErrNone if success or a file read error code */
1.208 + IMPORT_C TInt ParseFile(RFs aRFs, const TDesC& aFileToParse);
1.209 +
1.210 + IMPORT_C TInt ParseFile(RFile& aFileHandleToParse);
1.211 +
1.212 + /** Parses a specified XML Data Source into a DOM object tree.
1.213 + Use ParseSourceL() function in preference to ParseSource()
1.214 + @param aSource MMDXMLParserDataProvider pointer
1.215 + */
1.216 + inline void ParseSource(MMDXMLParserDataProvider *aSource)
1.217 + {
1.218 + TRAP_IGNORE(ParseSourceL(aSource));
1.219 + }
1.220 +
1.221 + /** Parses a specified XML Data Source into a DOM object tree.
1.222 + @param aSource MMDXMLParserDataProvider pointer
1.223 + */
1.224 + IMPORT_C void ParseSourceL(MMDXMLParserDataProvider *aSource);
1.225 +
1.226 + /** Defines input stream character widths. */
1.227 + enum TMDXMLParserInputCharWidth
1.228 + {
1.229 + EAscii = 0x01, ///< ASCII
1.230 + EUnicode = 0x02 ///<Unicode
1.231 + };
1.232 +
1.233 + /** Sets the input stream character width.
1.234 + *
1.235 + * @param aWidth Character width for incoming stream. Possible values are EAscii and EUnicode (representing Ascii/UTF8 and Unicode respectively).
1.236 + *
1.237 + */
1.238 + IMPORT_C void SetSourceCharacterWidth(TMDXMLParserInputCharWidth aWidth);
1.239 +
1.240 + //Defect fix for INC036136- Enable the use of custom entity converters in GMXML
1.241 + /**
1.242 + * Sets the entity converter to be used for parsing.
1.243 + * and take ownership of the passed entity converter
1.244 + * @param aEntityConverter the entity converter to be used.
1.245 + */
1.246 + IMPORT_C void SetEntityConverter(CMDXMLEntityConverter* aEntityConverter);
1.247 + //End Defect fix for INC036136
1.248 +
1.249 + /**
1.250 + Controls whether invalid elements and attributes are added to the DOM.
1.251 + @param aStoreInvalid ETrue if invalid content should be stored, EFalse otherwise.
1.252 + */
1.253 + IMPORT_C void SetStoreInvalid(TBool aStoreInvalid);
1.254 +
1.255 + /**
1.256 + Controls whether whitespaces are handled by XML parser or by client.
1.257 + @param aPreserve ETrue if all whitespaces should be preserved (handled by client), EFalse otherwise.
1.258 + */
1.259 + IMPORT_C void SetWhiteSpaceHandlingMode(TBool aPreserve);
1.260 +
1.261 +public: // public functions used by other classes within the .dll, not for Export.
1.262 + /** Gets the entity converter.
1.263 +
1.264 + @return The entity converter */
1.265 + CMDXMLEntityConverter* EntityConverter();
1.266 +
1.267 +private:
1.268 + IMPORT_C virtual void DoCancel();
1.269 +
1.270 + /*
1.271 + * RunL function inherited from CActive base class - carries out the actual parsing.
1.272 + * @leave can Leave due to OOM
1.273 + */
1.274 + virtual void RunL();
1.275 +
1.276 + /*
1.277 + * Helper function that does the parsing - called from inside RunL
1.278 + */
1.279 + TBool DoParseLoopL();
1.280 +
1.281 + /*
1.282 + * RunError function inherited from CActive base class - intercepts any Leave from
1.283 + * the RunL() function, sets an appropriate errorcode and calls ParseFileCompleteL
1.284 + */
1.285 + IMPORT_C TInt RunError(TInt aError);
1.286 +
1.287 + /*
1.288 + * Constructors
1.289 + */
1.290 + CMDXMLParser(MMDXMLParserObserver* aParserObserver);
1.291 +
1.292 + CMDXMLParser(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
1.293 +
1.294 + /*
1.295 + * Called when a character is read in and found to bo outside of an element tag
1.296 + */
1.297 + virtual void HandleTextL(TDes& aChar);
1.298 +
1.299 + enum TGetCharReturn
1.300 + {
1.301 + KError = 0x00, // GetChar detected an error
1.302 + KCharReturned, // GetChar returned a character
1.303 + KWaitForChar // GetChar couldn't return a character this time, but might next time.
1.304 + };
1.305 +
1.306 + /*
1.307 + * Fetch one character from the input file
1.308 + * @param aChar the returned character.
1.309 + * @return returns one of the values of TCharReturn
1.310 + */
1.311 + TGetCharReturn GetChar(TDes& aChar);
1.312 +
1.313 + /* utility functions, called from GetChar to deal with the
1.314 + * 2 types of input stream
1.315 + */
1.316 + TGetCharReturn GetDoubleByteChar(TDes& aChar);
1.317 + TGetCharReturn GetSingleByteChar(TDes& aChar);
1.318 +
1.319 + /*
1.320 + * Fetch some more data from the data provider
1.321 + * @return returns one of the values of TCharReturn
1.322 + */
1.323 + void GetMoreData();
1.324 +
1.325 + /*
1.326 + * @return Returns true if the current tag is a doctype tag and sets the
1.327 + * Document DocType member accordingly on the first pass of this function.
1.328 + */
1.329 + TBool DocTypeL();
1.330 +
1.331 + /*
1.332 + * creates a new processing instruction if necessary and adds to document
1.333 + * @return Returns true if the current tag is a processing instruction
1.334 + */
1.335 + TBool ProcessingInstructionL(CMDXMLElement* aParentElement);
1.336 +
1.337 + /*
1.338 + * creates a new CDataSection if necessary and adds to document
1.339 + * @return Returns true if the current tag is a processing instruction
1.340 + */
1.341 + TBool CDataSectionL(CMDXMLElement* aParentElement);
1.342 + TBool EndOfCDataSection();
1.343 +
1.344 + /*
1.345 + * @return returns true if the current tag is a version id tag and sets the
1.346 + * Document Version member accordingly on the first pass of this function.
1.347 + */
1.348 + TBool VersionIDL();
1.349 +
1.350 + /*
1.351 + * creates a new comment if necessary and adds to document
1.352 + * @return returns true if the current tag is a comment tag
1.353 + */
1.354 + TBool CommentL(CMDXMLElement* aParentElement);
1.355 +
1.356 + /*
1.357 + * Parse a start of element tag and create an element with attributes set.
1.358 + * @return Returns a pointer to the created element
1.359 + * @leave can Leave due to OOM
1.360 + */
1.361 + virtual CMDXMLElement* ParseStartTagL();
1.362 +
1.363 + /*
1.364 + * Detects the type of a file - can be Unicode or UTF-8
1.365 + */
1.366 + TBool DetectFileType();
1.367 +
1.368 + /*
1.369 + * Creates a generic or DTD-specific document object
1.370 + * @leave can Leave due to OOM
1.371 + */
1.372 + virtual void CreateDocumentL();
1.373 +
1.374 + /*
1.375 + * Sets iError to new errorcode if more serious than any error so far encountered
1.376 + */
1.377 + IMPORT_C void SetError(const TInt aErrorCode, const TXMLErrorCodeSeverity aSeverity);
1.378 +
1.379 + /*
1.380 + * This function is used to parse the attributes.
1.381 + * @param aElement The element to which the attributes belong
1.382 + * @param aTagToParse The tag to be parsed
1.383 + * @return Returns KErrNone if both attribute name & value are valid
1.384 + * KErrXMLBadAttributeName if attribute name is invalid or KErrXMLBadAttributeValue is invalid
1.385 + * @leave can Leave due to OOM
1.386 + */
1.387 + TInt ParseElementAttributesL(CMDXMLElement& aElement, TDes& aTagToParse);
1.388 +
1.389 + /**
1.390 + This function locates the next attribute in the tag.
1.391 + @param aTagToParse the tag to find the attribute in
1.392 + @return the offset of the next attribute
1.393 + */
1.394 + TInt LocateNextAttribute(const TDesC& aTagToParse);
1.395 +
1.396 + /*
1.397 + * Parses an end tag. In fact, at this point the end tag must match
1.398 + * the tag name of the start tag.
1.399 + * @param aTagToParse Text of the end tag.
1.400 + * @return Returns KErrNone if the end tag matches the start tag or KErrNotFound if there is a mismatch.
1.401 + */
1.402 + TInt ParseElementEndTag(CMDXMLElement& aElement, const TDesC& aTagToParse);
1.403 +
1.404 + TInt CheckForStartCData(const TDesC& aTextToCheck);
1.405 + TInt FindDelimiter(TDesC& aDataToSearch, TDesC& aDelimiterToFind);
1.406 +
1.407 + /*
1.408 + * Second stage constructor
1.409 + */
1.410 + void ConstructL(MXMLDtd* aDtdRepresentation);
1.411 + void AddTextL(CMDXMLElement* aParentElement);
1.412 +
1.413 + /*
1.414 + * Checks whether the end of this tag is in a CDataSection.
1.415 + * @param aDataToSearch The data to check
1.416 + * @return Returns ETrue if the tag contains an unclosed CDataSection
1.417 + */
1.418 + TBool InCDataSection(TDesC& aDataToSearch);
1.419 +
1.420 + /*
1.421 + * Entity converts the sections of one attribute value that are not within a CDataSection.
1.422 + * @param aAttributeValue one attribute value
1.423 + * @return Returns an error if entity conversion did not successfully complete, otherwise KErrNone
1.424 + */
1.425 + TInt ParseSingleAttributeL(TDes& aAttributeValue);
1.426 +
1.427 + /*
1.428 + * Prepares this class for use on another file.
1.429 + *
1.430 + */
1.431 + void PrepareForReuseL();
1.432 +
1.433 + /**
1.434 + This should be called when parsing has been completed, before calling ParseFileCompleteL().
1.435 + It checks for errors that can only be determined at the end of parsing, eg missing doctype or
1.436 + incomplete content.
1.437 + */
1.438 + void CheckForErrors();
1.439 +
1.440 + IMPORT_C void PlaceholderForRemovedExport1(MMDXMLParserObserver* aParserObserver);
1.441 + IMPORT_C void PlaceholderForRemovedExport2(MMDXMLParserObserver* aParserObserver, MXMLDtd* aDtdRepresentation);
1.442 + IMPORT_C void PlaceholderForRemovedExport3();
1.443 +
1.444 +
1.445 +private:
1.446 + enum TPanicCode { ENullMemVarDataSource,
1.447 + ENullMemVarParserObserver,
1.448 + ENullMemVarXMLDoc,
1.449 + ENullMemVarElementTag,
1.450 + ENullParameterParentElement };
1.451 + void Panic(TPanicCode aReason) const;
1.452 +
1.453 +private:
1.454 + MMDXMLParserObserver* iParserObserver;
1.455 + MXMLDtd* iDtdRepresentation;
1.456 + TInt iError; // Current error
1.457 + TXMLErrorCodeSeverity iSeverity; // ErrorCode severity
1.458 + CMDXMLDocument* iXMLDoc; // Document created by the parser
1.459 + CMDXMLEntityConverter* iEntityConverter; // Entity converter used by the parser
1.460 + HBufC* iElementTag; // Currently processed element tag
1.461 + TBool iDocTypeSet;
1.462 + TBool iVersionSet;
1.463 + TInt iBytesPerChar;
1.464 +
1.465 + /* member variables dealing with access to source data */
1.466 + TPtrC8 iInputBufferPtr; // set during a call to get more data
1.467 + TInt iCurrentInputBufferLen; // current length of the data block available
1.468 + TInt iNextChar; // read position in the data block
1.469 + TInt iInputBytesRemaining; // number of bytes remaining to read.
1.470 + HBufC8 *iUTF8EdgeBuffer; // buffer to hold up to 6 bytes so that UTF8 parsing can span edges of data blocks
1.471 + HBufC8 *iBomBuffer; // buffer to hold data at the start of the stream so we may determine charset
1.472 + TInt iRequiredUTF8Bytes; // number of bytes required to complete the character held in the edge buffer
1.473 + TBool iUnicodeInputMisaligned; // Set to ETrue if the unicode input stream is not aligned to 16-bit boundaries
1.474 + MMDXMLParserDataProvider* iDataSource; // XML Data Source being parsed.
1.475 + CMDXMLParserFileDataSource* iFileSource; // We own this, and need to free it when we are done. Only used when we're providing the data source object to wrap a local file.
1.476 +
1.477 + /* member variables dealing with chunked conversion into unicode output */
1.478 + TBuf<32> iUnicodeConversion; // buffer to temporarily hold the results of conversion from UTF8 to Unicode
1.479 + TInt iUnicodeConversionLen; // number of characters stored in our intermediate buffer
1.480 + TInt iUnicodeReadPos; // next character to send from our intermediate buffer
1.481 + TBuf<1> iSpareChar;
1.482 +
1.483 + /* member variables used when parsing a local file */
1.484 + TDesC *iFileToParse;
1.485 + RFs iRFs;
1.486 + RFile iFileHandleToParse;
1.487 +
1.488 + TBool iEndOfTag;
1.489 +
1.490 + /* member variables used in DoParseLoopL() */
1.491 + TBool iOpened;
1.492 + TBool iClosed;
1.493 + CMDXMLElement* iNewElement;
1.494 + CMDXMLElement* iParentElement;
1.495 + HBufC* iText;
1.496 + enum EParserStates
1.497 + {
1.498 + KInitFromFile,
1.499 + KDetermineCharset,
1.500 + KWaitingForData,
1.501 + KParseData,
1.502 + KSpanDataGap,
1.503 + KFinished
1.504 + };
1.505 +
1.506 + EParserStates iState;
1.507 + EParserStates iPreviousState;
1.508 + TInt iSuspiciousCharacter;
1.509 + TBool iStoreInvalid; // controls whether invalid elements and attributes are stored in the DOM.
1.510 + TBool iPreserve;
1.511 +
1.512 + };
1.513 +
1.514 +#endif