1 // Copyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies).
2 // All rights reserved.
3 // This component and the accompanying materials are made available
4 // under the terms of the License "Symbian Foundation License v1.0" to Symbian Foundation members and "Symbian Foundation End User License Agreement v1.0" to non-members
5 // which accompanies this distribution, and is available
6 // at the URL "http://www.symbianfoundation.org/legal/licencesv10.html".
8 // Initial Contributors:
9 // Nokia Corporation - initial contribution.
14 // This class provides a mechanism to use a BNF tree to parse an input stream.
15 // The notation of the EBNF is based upon that described in the XML1.0 specification.
16 // The BNF tree form used is a variation on Extended BNF and has the following rule types,
17 // where the input stream must:
18 // , Exact - match exactly with the provided string.
19 // , Range - next character must be in the specified range.
20 // , Select - next character must exist in the selected string.
21 // If the select string starts with ^ it is a NOT Select.
22 // , And - match all of the given sub rules
23 // , Or - match one of the given sub rules
24 // , NMore - match N or more times the the SINGLE subrule.
25 // , Optional - match 0/1 times to the SINGLE subrule.
26 // , Without - match the first subrule but NOT the second.
27 // , Reference - match the referred to rule.
28 // The iterative parser not only validates an input stream against the
29 // BNF grammer but allows pre/post actions to be performed during the parsing.
30 // Partial parsing is also allowed in that the input stream does not have to
31 // completed before parsing can begin. As soon as data is added the parser
32 // attempts to parse it.
33 // Numerous methods are provided to assist in the building of the BNF Tree this parser uses.
35 // Create a derivation and implement the virtual method TreeL() to creat a BNF rule tree
36 // (the assistance methods NewBNF/NewRule etc should be used) - see DTDModel
37 // To use your new parser invoke Reset and pass input data using the ProcessData method.
41 #ifndef __CBNFPARSER_H__
42 #define __CBNFPARSER_H__
45 #include <mdataproviderobserver.h>
47 #include <cfragmentedstring.h>
51 // forward class declarations
53 class CAttributeLookupTable;
56 // Rule Tree node type definitions
57 /** Defines types of node in a BNF tree (CBNFParser).
59 Except for ERoot, EIncomplete, EReference, and ELastParserNodeType, the
60 types define different types of rule that the input stream must meet to
61 satisfy the grammar. */
66 /** Incomplete node. */
68 /** Exact rule: match exactly with the provided string. */
70 /** Range rule: next character must be in the specified range.
72 The start of the range is specified by a CBNFNode::KRangeStart()
73 attribute; the end by a CBNFNode::KRangeEnd() attribute. */
75 /** Select rule: next character must exist in the selected string.
77 If the select string starts with ^, it is a NOT Select. */
79 /** And rule: match all of the given sub-rules.
81 Sub-rules are defined by the child nodes of the AND rule node. */
83 /** Or rule: match one of the given sub-rules.
85 Sub-rules are defined by the child nodes of the OR rule node. */
87 /** NMore rule: match a single subrule N or more times.
89 A minimum is specified by a CBNFNode::KNMoreMinimum() attribute; a maximum by
90 a CBNFNode::KNMoreMaximum() attribute; an exact figure by a CBNFNode::KNMoreCount() attribute. */
92 /** Optional rule: match a single sub-rule 0/1 times.
94 A sub-rule is defined by the child node of the Optional rule node. */
96 /** Without rule: match the first sub-rule but not the second.
98 Sub-rules are defined by the child nodes of the Without rule node. */
100 /** Reference rule: match the referred to rule.
102 The target rule name is identified by a CBNFNode::KReference() attribute. */
104 /** Indicates final node type. */
110 // When a the state is EActive.
111 // Setting the parser state to something else in a pre-/post-rule callback function
112 // causes the parser to exit on next loop in ParseL. If the state is set to EStopped
113 // we have finished the parser operation (e.g. in event of an error), in state EPaused
114 // we are likely to resume the parser operation after some external operations.
115 /** CBNFParser parser states. */
118 /** Parser has stopped. */
120 /** Rarser is running. */
122 /** Parser has paused: e.g. waiting for further input to continue. */
128 class CBNFParser : public CBase, public MDataProviderObserver
129 /** Base class for parsers that use a BNF tree to parse an input stream.
131 The BNF tree form used is a variation on Extended BNF described in the XML1.0
132 specification. The general form of the tree is as follows:
134 Each node in the tree defines a rule that the input stream must meet to satisfy the grammar.
136 1. a node type is set to the rule type, as defined in TParserNodeTypes
138 2. node data stores any string required by the rule: e.g. for a comparison rule, the string
141 3. the parser allows callback functions to be called either before or after the rule is processed.
142 If these are present, they are stored as attributes of the node.
144 4. some rules allow sub-rules: for example, the AND rule expects a number of sub-rules, all
145 of which must be successful if the AND rule itself is to succeed. Each sub-rule is
146 represented as a child node of the parent rule. Sub-rules in turn can have sub-rules.
148 5. reference rule nodes are also allowed: these do not define themselves rules, but direct the
149 parser to another rule. They can link rules to each other and so build rule sequences more
150 complex than a simple tree.
152 All the top-level rules are stored as attributes of the root node. The attribute type is a string
153 that names the rule; the attribute value is a pointer to the node that implements the rule.
155 The class supplies functions that encapsulate adding rules appropriately to the tree. The parser
156 provider creates a derived class that implements the virtual method TreeL() that uses these
157 functions to create a BNF rule tree.
159 The user of the parser initialises the parser with ResetL(), and then passes input data to the
160 parser using ProcessData(). The parser supports partial parsing: the input stream does not have
161 to completed before parsing can begin. As soon as data is added, the parser attempts to parse it.
169 /** Defines a type to handle a stack of rules. */
170 typedef CStack<CBNFNode, EFalse> CRuleStack;
172 /** Type definition for a callback function pointer
173 Callback functions need to get a reference to the parser as parameter
174 and they need to be static. */
175 typedef void (TRuleCallback)(CBNFParser&);
178 // Constructor for a new parser instance
181 // aLUT - reference to attribute lookuptable; used to store all the stuff in the parser rule tree
183 //##ModelId=3B6669EA00F8
184 IMPORT_C static CBNFParser* NewL(CAttributeLookupTable& aLUT);
186 //##ModelId=3B6669EA00F7
187 IMPORT_C virtual ~CBNFParser();
189 // Prepare the parser to take in fresh stream of data.
190 // THIS METHOD MUST BE CALLED BEFORE DATA CAN BE PROCESSED BY THE PARSER!!
191 // Calls TreeL in order to create the parsing rule tree if no tree already
193 //##ModelId=3B6669EA00EF
194 IMPORT_C virtual void ResetL();
196 /** Checks if the input stream was completely processed
197 @return ETrue if all of the data was processed, EFalse if the data didn't match to the parsing rules
199 //##ModelId=3B6669EA00EE
200 TBool Valid() const { return iStringComplete && (iString.Length() == 0); }
202 /** Concatenates the rest of the input stream (which hasn't yet been processed)
203 into a single string. The ownership of the string is given to the caller.
204 @return String containing the remaining data to be parsed. OWNERSHIP PASSED TO CALLED. */
205 //##ModelId=3B6669EA00ED
206 HBufC* StringL() const { return iString.StringL(); }
208 /** Gets a pointer to the rule node currently being processed.
210 //##ModelId=3B6669EA00E3
211 CBNFNode* CurrentRule() { return iCurrentRule; }
213 // Set reference to an attribute lookup table
214 //##ModelId=3B6669EA00C5
215 void SetAttributeLookupTable(CAttributeLookupTable& aAttributeLookupTable);
217 // methods to allow the input stream to be marked so that the callbacks
218 // can determine those parts which successfully matched
220 /** Set a mark to the current position of the input stream.
222 The mark acts as a tag in the stream currently being processed.
223 As we process further along the stream after adding the mark, we can perform
224 a rollback to the most previously set mark and start processing again (e.g. OR rule
225 works this way). The string fragments won't be consumed (deleted) until
226 all the marks on a fragment (and fragments before that) are deleted. */
227 //##ModelId=3B6669EA00BC
228 void Mark() { iString.Mark(); }; // **Mark can leave**
230 /** Get string between the "cursor position" and the latest mark on the stream.
232 @return Pointer to the string from the previous mark on to the current position
233 of processed string. OWNERSHIP OF THE STRING GIVEN TO THE CALLER. */
234 //##ModelId=3B6669EA00BB
235 HBufC* MarkedL() { return iString.MarkedL(); };
237 /** Gets the marked string with a string added before the mached string.
239 @return A string cosisting of aInitialText appended with the marked string.
240 OWNERSHIP OF THE CONSTRUCTED STRING IS GIVEN TO THE CALLER. */
241 //##ModelId=3B6669EA009E
242 HBufC* MarkedWithInitialTextL(const TDesC& aInitialText) { return iString.MarkedWithInitialTextL(aInitialText); };
244 /** Removes the latest mark. All the marks are stored in a stack and this removes
246 //##ModelId=3B6669EA009D
247 void DeleteMark() { iString.DeleteMark(); };
249 // methods to determine it the used rule actually matched (typically used in post callbacks)
250 /** Tests if the used rule matched.
252 This is typically used in post-rule callbacks.
254 @return True if the used rule matched; otherwise false
256 //##ModelId=3B6669EA0094
257 TBool RuleMatched() const { return iSubRuleMatched; };
258 /** Tests if an Optional node sub-rule matched.
260 @return True if the sub- rule matched; otherwise false
262 //##ModelId=3B6669EA0093
263 TBool OptionalMatched() const { return iOptionalMatched; };
265 // Create new rule tree root node.
266 // This method creates a new single instance of CBNFNode, which shall act as the root
267 // node of the rule tree, which implements the BNF rules for parsing the input stream.
268 // All the other rules are attached as attributes to this node.
269 // The root node should have single child node, which should be a reference to the
270 // "logical root" of the rule tree. This can be done be attaching the logical root
271 // rule as a component to the root rule.
272 //##ModelId=3B6669EA0089
273 IMPORT_C CBNFNode* NewBNFL();
275 // Add a new rule to a rule tree.
278 // aRootRule - Pointer to the root bnf node (created with NewBNFL() ).
279 // aRuleName - Reference to a string identifying this rule. The string is used
280 // to make references to this rule from other rule's subtrees.
281 // aData - Pointer to a data string; used with EExact and ESelect type rules
282 // to match actual text strings.
283 // aPreRule - Function pointer to a prerule function that gets called _BEFORE_
284 // we start processing this rule and its children (i.e. the rule subtree)
285 // aPostRule - Function pointer to a postrule function which is called _AFTER_
286 // we have processed this rule (i.e. when we return up from the subtree
287 // and this rule is finished).
290 // CBNFNode& - Reference to the newly created rule node in the rule tree
292 //##ModelId=3B6669E90326
293 IMPORT_C CBNFNode& NewRuleL(CBNFNode* aRootRule,
294 const TDesC& aRuleName,
295 TParserNodeTypes aRuleType,
297 TRuleCallback* aPreRule,
298 TRuleCallback* aPostRule);
300 // Overridden version of the NewRuleL. Takes reference to the data instead of owning it.
301 //##ModelId=3B6669E903D1
302 IMPORT_C CBNFNode& NewRuleL(CBNFNode* aRootRule,
303 const TDesC& aRuleName,
304 TParserNodeTypes aRuleType,
306 TRuleCallback* aPreRule,
307 TRuleCallback* aPostRule);
309 // construct a new rule component not attached to a rule.
310 //##ModelId=3B6669E9018C
311 IMPORT_C CBNFNode* NewComponentL(TParserNodeTypes aRuleType, const TDesC& aData);
312 //##ModelId=3B6669E901B4
313 IMPORT_C CBNFNode* NewComponentL(TParserNodeTypes aRuleType, HBufC* aData = NULL, TRuleCallback* aPreRule = NULL, TRuleCallback* aPostRule = NULL);
315 // create a reference component to the rule of the given name
316 // which is not attached to any rule.
317 //##ModelId=3B6669E90204
318 IMPORT_C CBNFNode* NewComponentL(CBNFNode* aRootRule, const TDesC& aRuleName);
320 // Methods to create a new subrule to the given parent rule.
321 // These methods can be used to build the subtrees to the "main rules" attached to the root node.
324 // aParentRule - The rule for which the new rule shall be added as a child
325 // aRuleType - Type of the new rule
326 // aData - Data for the rule; the string to match for an EExact rule, the selection character set for ESelect
328 // aPreRule - Pre rule callback function pointer
329 // aPostRule - Post rule callback function pointer
331 // CBNFNode& - reference to the new rule
333 //##ModelId=3B6669E9022C
334 IMPORT_C CBNFNode& NewComponentL(CBNFNode &aParentRule, TParserNodeTypes aRuleType, const TDesC& aData);
335 //##ModelId=3B6669E90268
336 IMPORT_C CBNFNode& NewComponentL(CBNFNode &aParentRule, TParserNodeTypes aRuleType, HBufC* aData = NULL, TRuleCallback* aPreRule = NULL, TRuleCallback* aPostRule = NULL);
338 // Create a reference to another rule and attach this reference as a child of the given parent.
339 // Creates a child node of type EReference for the parent. This reference node
340 // hold the pointer to the rule we are refering to.
341 // Using references we can link rules to each other and build complex rule trees
342 // even though they don't physically form a complete tree.
343 // Notice, that the rule we are refering to does not necessarily need to exist, yet!
346 // aRootRule - The Root node to the rule tree (created with NewBNFL). This is needed to
347 // find the rule we are refering to with the string.
348 // aParentRule - The parent rule of the newly created reference
349 // aRuleName - The "id string" of the rule we are refering to.
350 //##ModelId=3B6669E902CC
351 IMPORT_C CBNFNode& NewComponentL(CBNFNode* aRootRule, CBNFNode &aParentRule, const TDesC& aRuleName);
353 // add additional attributes to components of rules (i.e. range values)
354 //##ModelId=3B6669E900F6
355 IMPORT_C void AddComponentAttributeL(CBNFNode& aRule, CBNFNodeAttributeType aAttribute, TInt aInt);
357 // re-implementations of MDataProviderObserver methods
358 //##ModelId=3B6669E900D8
359 IMPORT_C virtual void ProcessDataL(HBufC8& aData);
360 //##ModelId=3B6669E900AF
361 IMPORT_C virtual void SetStatus(TInt aStatus = KErrNone);
362 //##ModelId=3B6669E90069
363 IMPORT_C virtual void SetDocumentTypeL(const TDesC&);
364 //##ModelId=3B6669E90087
365 IMPORT_C virtual void SetDocumentTypeL(const TDesC&, const TDesC&);
366 //##ModelId=3B6669E90055
367 IMPORT_C virtual void SetDataExpected(TInt);
368 //##ModelId=3B6669E90041
369 IMPORT_C virtual void SetBaseUriL(const TDesC* aBaseUri);
370 //##ModelId=3B6669E90038
371 IMPORT_C virtual void MDataProviderObserverReserved1();
372 //##ModelId=3B6669E90037
373 IMPORT_C virtual void MDataProviderObserverReserved2();
375 // Tell the parser, that we all the data has been passed in.
376 // This method attempts to parse what ever is left of the input stream if it wasn't
378 //##ModelId=3B6669E9002E
379 IMPORT_C void CommitL();
381 /** Get the current state of the parser.
382 @return Parser state */
383 //##ModelId=3B6669E9002D
384 TParseState State() const {return(iParsing);};
387 IMPORT_C CBNFParser(CAttributeLookupTable& aLUT);
389 // Each of the following functions is a handler method for a specific type of a rule
390 // node. For example, ReferenceL handles reference nodes etc.
391 // These methods are called by PerformRuleL.
394 // aRule - reference to the rule being processed
395 // aMatched - reference to a CFragmentedString::TStringMatch variable, which holds
396 // the information if the string or character we previously were trying to
397 // match actually matched.
399 // TBool - We return ETrue if we have completed processing this node. If the processing
400 // still continues we return EFalse. For example, an EAnd rule would return
401 // ETrue if all of its chidren had matched or if a rule didn't match. In the first
402 // case the EAnd rule would have turned out to be true (aMatched = EMatched) since
403 // all of its children were true, but in the latter case we can stop processing the
404 // EAnd rule, since a subrule to the And didn't match and this means that the And
405 // expression can not be true. Either way, the processing of the And ends and we
408 //##ModelId=3B6669E90005
409 IMPORT_C virtual TBool ReferenceL(CBNFNode& aRule, CFragmentedString::TStringMatch& aMatched);
410 //##ModelId=3B6669E803BB
411 IMPORT_C virtual TBool ExactL(CBNFNode& aRule, CFragmentedString::TStringMatch& aMatched);
412 //##ModelId=3B6669E80389
413 IMPORT_C virtual TBool RangeL(CBNFNode& aRule, CFragmentedString::TStringMatch& aMatched);
414 //##ModelId=3B6669E80343
415 IMPORT_C virtual TBool SelectL(CBNFNode& aRule, CFragmentedString::TStringMatch& aMatched);
416 //##ModelId=3B6669E80311
417 IMPORT_C virtual TBool WithoutL(CBNFNode& aRule, CFragmentedString::TStringMatch& aMatched);
418 //##ModelId=3B6669E802D5
419 IMPORT_C virtual TBool AndL(CBNFNode& aRule, CFragmentedString::TStringMatch& aMatched);
420 //##ModelId=3B6669E80299
421 IMPORT_C virtual TBool OrL(CBNFNode& aRule, CFragmentedString::TStringMatch& aMatched);
422 //##ModelId=3B6669E80271
423 IMPORT_C virtual TBool OptionalL(CBNFNode& aRule, CFragmentedString::TStringMatch& aMatched);
424 //##ModelId=3B6669E8023F
425 IMPORT_C virtual TBool NMoreL(CBNFNode& aRule, CFragmentedString::TStringMatch& aMatched);
427 // A method to add a callback to a rule
430 // aRule - The rule to which the callback is to be added
431 // aCallbackID - Either CBNFNode::KPreRuleCallback() or CBNFNode::KPostRuleCallback()
432 // Defines the type of the callback function (i.e. is it to be called before
433 // or after the rule has been processed).
434 // aCallback - The callback function pointer
436 //##ModelId=3B6669E80203
437 IMPORT_C virtual void AddRuleCallbackL(CBNFNode& aRule, const TDesC* aCallbackID, TRuleCallback* aCallback);
438 //##ModelId=3B6669E801EF
439 IMPORT_C virtual void ExecutePreRuleCallbackL(CBNFNode& aRule);
440 //##ModelId=3B6669E801D1
441 IMPORT_C virtual void ExecutePostRuleCallbackL(CBNFNode& aRule);
443 // the method TreeL() should be reimplemented to generate a BNF rule tree and return
444 // ownership of it. This is the rule tree which will be to parse the input stream.
445 // See XmlPars.cpp or DTDMDL.cpp for example.
446 //##ModelId=3B6669E801D0
447 IMPORT_C virtual CBNFNode* TreeL();
449 // methods which are invoked when the parser encounters a conditional
450 // point in the BNF grammar (i.e. And/Or)
451 //##ModelId=3B6669E801B2
452 IMPORT_C virtual void StartConditional(TParserNodeTypes aRuleType);
453 //##ModelId=3B6669E80180
454 IMPORT_C virtual void EndConditional(TParserNodeTypes aRuleType, TBool aSuccess);
456 // A callback function to insert a mark to the current position of the stream
457 // being processed. Adding mark is a very common callback operation befor starting
458 // to process a rule, hence the method is provided by the parser.
459 //##ModelId=3B6669E8016C
460 IMPORT_C static void MarkCallback(CBNFParser& aParser);
462 // returns the LUT used by this parser.
463 //##ModelId=3B6669E80163
464 IMPORT_C CAttributeLookupTable& AttributeLUT() const;
466 // method which does the actual iterative parsing
467 //##ModelId=3B6669E80162
468 IMPORT_C TBool ParseL();
470 // A rule to handle a node in the rule tree. This method just calls the appropriate
471 // handler method according to the rule type.
472 //##ModelId=3B6669E8013A
473 IMPORT_C virtual TBool PerformRuleL(CBNFNode& aRule, CFragmentedString::TStringMatch& aMatched);
475 //##ModelId=3B6669E8011C
476 /** Sets the parser state.
478 @param aState Parser state
480 void SetState(TParseState aState) {iParsing=aState;};
483 /** Storage object for all the attributes and identifiers in a tree */
484 //##ModelId=3B6669E80108
485 CAttributeLookupTable& iLUT;
487 /** An utility object which stores all the buffers passed into the parser
488 and represents them as if they would form a single, continuous string.
489 This class also performs the actual physical matching/selection of the strings
490 and holds the marks set onto the string.*/
491 //##ModelId=3B6669E800EA
492 CFragmentedString iString;
493 /** Flag indicating if the input stream has been completely processed. */
494 //##ModelId=3B6669E800D6
495 TBool iStringComplete; // more input stream has completed
497 /** The BNF tree the parser is using to parse the input stream.*/
498 //##ModelId=3B6669E800C2
499 CBNFNode* iTree; // the BNF tree we are using to parse the input stream
501 /** A stack of rules from iTree which are waiting to be completed.
502 The stack basically holds the path along the rule tree. */
503 //##ModelId=3B6669E800AE
504 CRuleStack iRuleStack;
505 /** The BNF rule that is currently being processed. */
506 //##ModelId=3B6669E80090
507 CBNFNode* iCurrentRule; // the BNF rule we are currently using
509 // when returning to a rule in the rulestack this indicates
510 // if the child rule matched correctly
511 /** Flag that indicates when returning to a rule in the rulestack if the child rule matched correctly. */
512 //##ModelId=3B6669E8007C
513 TBool iSubRuleMatched;
514 /** Flag that indicates when returning to a rule in the rulestack if an optional rule matched correctly. */
515 //##ModelId=3B6669E8006A
516 TBool iOptionalMatched;
518 /** The child rule we are returning from (if any).
519 If this is NULL we are new to this BNF rule.*/
520 //##ModelId=3B6669E80054
524 //##ModelId=3B6669E8004A
525 TParseState iParsing;
526 /** Input stream matched rule flag. */
527 //##ModelId=3B6669E80038
528 CFragmentedString::TStringMatch iMatched;
530 // Storage pointers for strings identifying certain attributes on the rule nodes
531 /** Stores attribute identifier for reference string attributes. */
532 //##ModelId=3B6669E8002C
533 const TDesC* iReferenceString;
534 /** Stores attribute identifier for range start attributes. */
535 //##ModelId=3B6669E8001A
536 const TDesC* iRangeStart;
537 /** Stores attribute identifier for range end attributes. */
538 //##ModelId=3B6669E80010
539 const TDesC* iRangeEnd;
540 /** Stores attribute identifier for nmore minimum attributes. */
541 //##ModelId=3B6669E80006
542 const TDesC* iMoreMinimum;
543 /** Stores attribute identifier for nmore count attributes. */
544 //##ModelId=3B6669E703DA
545 const TDesC* iMoreCount;
546 /** Stores attribute identifier for nmore maximum attributes. */
547 //##ModelId=3B6669E703D0
548 const TDesC* iMoreMaximum;
549 /** Stores attribute identifier for pre-rule callback attributes. */
550 //##ModelId=3B6669E703C6
551 const TDesC* iPreRuleCallback;
552 /** Stores attribute identifier for post-rule callback attributes. */
553 //##ModelId=3B6669E703BC
554 const TDesC* iPostRuleCallback;