2 * Summary: internals routines exported by the parser.
3 * Description: this module exports a number of internal parsing routines
4 * they are not really all intended for applications but
5 * can prove useful doing low level processing.
7 * Copy: See Copyright for the status of this software.
9 * Author: Daniel Veillard
10 * Portion Copyright © 2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
18 #ifndef XML_PARSER_INTERNALS_H
19 #define XML_PARSER_INTERNALS_H
21 #include <stdapis/libxml2/libxml2_parser.h>
22 #include <stdapis/libxml2/libxml2_chvalid.h>
29 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
36 * Identifiers can be longer, but this will be more costly
39 #define XML_MAX_NAMELEN 100
44 * The parser tries to always have that amount of input ready.
45 * One of the point is providing context when reporting errors.
47 #define INPUT_CHUNK 250
50 * MIN_STACK_THRESHOLD:
52 * The safty buffer that defines number of bytes from stack overflow.
54 #define MIN_STACK_THRESHOLD 600
57 * MAX_STACK_THRESHOLD:
59 * The safty buffer that defines number of bytes from stack overflow.
60 * This value is used for SAX parsing - buffer is bigger to account
61 * for stack that might be allocated during user callbacks
63 #define MAX_STACK_THRESHOLD 1000
65 /************************************************************************
67 * UNICODE version of the macros. *
69 ************************************************************************/
72 * @param c an byte value (int)
74 * Macro to check the following production in the XML spec:
76 * [2] Char ::= #x9 | #xA | #xD | [#x20...]
77 * any byte character in the accepted range
79 #define IS_BYTE_CHAR(c) xmlIsChar_ch(c)
83 * @param c an UNICODE value (int)
85 * Macro to check the following production in the XML spec:
87 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
88 * | [#x10000-#x10FFFF]
89 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
91 #define IS_CHAR(c) xmlIsCharQ(c)
95 * @param c an xmlChar (usually an unsigned char)
97 * Behaves like IS_CHAR on single-byte value
99 #define IS_CHAR_CH(c) xmlIsChar_ch(c)
103 * @param c an UNICODE value (int)
105 * Macro to check the following production in the XML spec:
107 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
109 #define IS_BLANK(c) xmlIsBlankQ(c)
113 * @param c an xmlChar value (normally unsigned char)
115 * Behaviour same as IS_BLANK
119 #define IS_BLANK_CH(c) xmlIsBlank_ch(c)
123 * @param c an UNICODE value (int)
125 * Macro to check the following production in the XML spec:
127 * [85] BaseChar ::= ... long list see REC ...
129 #define IS_BASECHAR(c) xmlIsBaseCharQ(c)
133 * @param c an UNICODE value (int)
135 * Macro to check the following production in the XML spec:
137 * [88] Digit ::= ... long list see REC ...
139 #define IS_DIGIT(c) xmlIsDigitQ(c)
143 * @param c an xmlChar value (usually an unsigned char)
145 * Behaves like IS_DIGIT but with a single byte argument
147 #define IS_DIGIT_CH(c) xmlIsDigit_ch(c)
151 * @param c an UNICODE value (int)
153 * Macro to check the following production in the XML spec:
155 * [87] CombiningChar ::= ... long list see REC ...
157 #define IS_COMBINING(c) xmlIsCombiningQ(c)
161 * @param c an xmlChar (usually an unsigned char)
163 * Always false (all combining chars > 0xff)
165 #define IS_COMBINING_CH(c) 0
169 * @param c an UNICODE value (int)
171 * Macro to check the following production in the XML spec:
174 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
175 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
176 * [#x309D-#x309E] | [#x30FC-#x30FE]
178 #define IS_EXTENDER(c) xmlIsExtenderQ(c)
182 * @param c an xmlChar value (usually an unsigned char)
184 * Behaves like IS_EXTENDER but with a single-byte argument
186 #define IS_EXTENDER_CH(c) xmlIsExtender_ch(c)
190 * @param c an UNICODE value (int)
192 * Macro to check the following production in the XML spec:
195 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
197 #define IS_IDEOGRAPHIC(c) xmlIsIdeographicQ(c)
201 * @param c an UNICODE value (int)
203 * Macro to check the following production in the XML spec:
206 * [84] Letter ::= BaseChar | Ideographic
208 #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
212 * @param c an xmlChar value (normally unsigned char)
214 * Macro behaves like IS_LETTER, but only check base chars
217 #define IS_LETTER_CH(c) xmlIsBaseChar_ch(c)
220 * @param c an UNICODE value (int)
222 * Macro to check the following production in the XML spec:
225 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
227 #define IS_PUBIDCHAR(c) xmlIsPubidCharQ(c)
231 * @param c an xmlChar value (normally unsigned char)
233 * Same as IS_PUBIDCHAR but for single-byte value
235 #define IS_PUBIDCHAR_CH(c) xmlIsPubidChar_ch(c)
239 * @param p and UTF8 string pointer
241 * Skips the end of line chars.
243 #define SKIP_EOL(p) \
244 if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
245 if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
249 * @param p and UTF8 string pointer
251 * Skips to the next '>' char.
253 #define MOVETO_ENDTAG(p) \
254 while ((*p) && (*(p) != '>')) (p)++
258 * @param p and UTF8 string pointer
260 * Skips to the next '<' char.
262 #define MOVETO_STARTTAG(p) \
263 while ((*p) && (*(p) != '<')) (p)++
266 * Global constants used for predefined strings.
268 #ifndef UNDEF_IMPORT_C_IN_DATA_ParserInternal
269 XMLPUBVAR const xmlChar xmlStringText[];
270 XMLPUBVAR const xmlChar xmlStringTextNoenc[];
271 XMLPUBVAR const xmlChar xmlStringComment[];
274 * Function to finish the work of the macros where needed.
276 XMLPUBFUN int XMLCALL xmlIsLetter (int c);
281 XMLPUBFUN xmlParserCtxtPtr XMLCALL
282 xmlCreateFileParserCtxt (const char *filename);
283 XMLPUBFUN xmlParserCtxtPtr XMLCALL
284 xmlCreateURLParserCtxt (const char *filename,
286 XMLPUBFUN xmlParserCtxtPtr XMLCALL
287 xmlCreateMemoryParserCtxt(const char *buffer,
289 XMLPUBFUN xmlParserCtxtPtr XMLCALL
290 xmlCreateEntityParserCtxt(const xmlChar *URL,
292 const xmlChar *base);
293 XMLPUBFUN int XMLCALL
294 xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
295 xmlCharEncoding enc);
296 XMLPUBFUN int XMLCALL
297 xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
298 xmlCharEncodingHandlerPtr handler);
299 XMLPUBFUN int XMLCALL
300 xmlSwitchInputEncoding (xmlParserCtxtPtr ctxt,
301 xmlParserInputPtr input,
302 xmlCharEncodingHandlerPtr handler);
307 XMLPUBFUN void XMLCALL
308 xmlHandleEntity (xmlParserCtxtPtr ctxt,
309 xmlEntityPtr entity);
314 XMLPUBFUN xmlParserInputPtr XMLCALL
315 xmlNewStringInputStream (xmlParserCtxtPtr ctxt,
316 const xmlChar *buffer);
317 XMLPUBFUN xmlParserInputPtr XMLCALL
318 xmlNewEntityInputStream (xmlParserCtxtPtr ctxt,
319 xmlEntityPtr entity);
320 XMLPUBFUN void XMLCALL
321 xmlPushInput (xmlParserCtxtPtr ctxt,
322 xmlParserInputPtr input);
323 XMLPUBFUN xmlChar XMLCALL
324 xmlPopInput (xmlParserCtxtPtr ctxt);
325 XMLPUBFUN void XMLCALL
326 xmlFreeInputStream (xmlParserInputPtr input);
327 XMLPUBFUN xmlParserInputPtr XMLCALL
328 xmlNewInputFromFile (xmlParserCtxtPtr ctxt,
329 const char *filename);
330 XMLPUBFUN xmlParserInputPtr XMLCALL
331 xmlNewInputStream (xmlParserCtxtPtr ctxt);
336 XMLPUBFUN xmlChar * XMLCALL
337 xmlSplitQName (xmlParserCtxtPtr ctxt,
340 XMLPUBFUN xmlChar * XMLCALL
341 xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);
342 XMLPUBFUN xmlChar * XMLCALL
343 xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,
345 XMLPUBFUN xmlChar * XMLCALL
346 xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt);
347 XMLPUBFUN xmlChar * XMLCALL
348 xmlParseQuotedString (xmlParserCtxtPtr ctxt);
349 XMLPUBFUN void XMLCALL
350 xmlParseNamespace (xmlParserCtxtPtr ctxt);
353 * Generic production rules.
355 XMLPUBFUN xmlChar * XMLCALL
356 xmlScanName (xmlParserCtxtPtr ctxt);
357 XMLPUBFUN const xmlChar * XMLCALL
358 xmlParseName (xmlParserCtxtPtr ctxt);
359 XMLPUBFUN xmlChar * XMLCALL
360 xmlParseNmtoken (xmlParserCtxtPtr ctxt);
361 XMLPUBFUN xmlChar * XMLCALL
362 xmlParseEntityValue (xmlParserCtxtPtr ctxt,
364 XMLPUBFUN xmlChar * XMLCALL
365 xmlParseAttValue (xmlParserCtxtPtr ctxt);
366 XMLPUBFUN xmlChar * XMLCALL
367 xmlParseSystemLiteral (xmlParserCtxtPtr ctxt);
368 XMLPUBFUN xmlChar * XMLCALL
369 xmlParsePubidLiteral (xmlParserCtxtPtr ctxt);
370 XMLPUBFUN void XMLCALL
371 xmlParseCharData (xmlParserCtxtPtr ctxt,
373 XMLPUBFUN xmlChar * XMLCALL
374 xmlParseExternalID (xmlParserCtxtPtr ctxt,
377 XMLPUBFUN void XMLCALL
378 xmlParseComment (xmlParserCtxtPtr ctxt);
379 XMLPUBFUN const xmlChar * XMLCALL
380 xmlParsePITarget (xmlParserCtxtPtr ctxt);
381 XMLPUBFUN void XMLCALL
382 xmlParsePI (xmlParserCtxtPtr ctxt);
383 XMLPUBFUN void XMLCALL
384 xmlParseNotationDecl (xmlParserCtxtPtr ctxt);
385 XMLPUBFUN void XMLCALL
386 xmlParseEntityDecl (xmlParserCtxtPtr ctxt);
387 XMLPUBFUN int XMLCALL
388 xmlParseDefaultDecl (xmlParserCtxtPtr ctxt,
390 XMLPUBFUN xmlEnumerationPtr XMLCALL
391 xmlParseNotationType (xmlParserCtxtPtr ctxt);
392 XMLPUBFUN xmlEnumerationPtr XMLCALL
393 xmlParseEnumerationType (xmlParserCtxtPtr ctxt);
394 XMLPUBFUN int XMLCALL
395 xmlParseEnumeratedType (xmlParserCtxtPtr ctxt,
396 xmlEnumerationPtr *tree);
397 XMLPUBFUN int XMLCALL
398 xmlParseAttributeType (xmlParserCtxtPtr ctxt,
399 xmlEnumerationPtr *tree);
400 XMLPUBFUN void XMLCALL
401 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);
402 XMLPUBFUN xmlElementContentPtr XMLCALL
403 xmlParseElementMixedContentDecl
404 (xmlParserCtxtPtr ctxt,
406 XMLPUBFUN xmlElementContentPtr XMLCALL
407 xmlParseElementChildrenContentDecl
408 (xmlParserCtxtPtr ctxt,
410 XMLPUBFUN int XMLCALL
411 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,
413 xmlElementContentPtr *result);
414 XMLPUBFUN int XMLCALL
415 xmlParseElementDecl (xmlParserCtxtPtr ctxt);
416 XMLPUBFUN void XMLCALL
417 xmlParseMarkupDecl (xmlParserCtxtPtr ctxt);
418 XMLPUBFUN int XMLCALL
419 xmlParseCharRef (xmlParserCtxtPtr ctxt);
420 XMLPUBFUN xmlEntityPtr XMLCALL
421 xmlParseEntityRef (xmlParserCtxtPtr ctxt);
422 XMLPUBFUN void XMLCALL
423 xmlParseReference (xmlParserCtxtPtr ctxt);
424 XMLPUBFUN void XMLCALL
425 xmlParsePEReference (xmlParserCtxtPtr ctxt);
426 XMLPUBFUN void XMLCALL
427 xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt);
428 XMLPUBFUN const xmlChar * XMLCALL
429 xmlParseAttribute (xmlParserCtxtPtr ctxt,
431 XMLPUBFUN const xmlChar * XMLCALL
432 xmlParseStartTag (xmlParserCtxtPtr ctxt);
433 XMLPUBFUN void XMLCALL
434 xmlParseEndTag (xmlParserCtxtPtr ctxt);
435 XMLPUBFUN void XMLCALL
436 xmlParseCDSect (xmlParserCtxtPtr ctxt);
437 XMLPUBFUN void XMLCALL
438 xmlParseContent (xmlParserCtxtPtr ctxt);
439 XMLPUBFUN void XMLCALL
440 xmlParseElement (xmlParserCtxtPtr ctxt);
441 XMLPUBFUN xmlChar * XMLCALL
442 xmlParseVersionNum (xmlParserCtxtPtr ctxt);
443 XMLPUBFUN xmlChar * XMLCALL
444 xmlParseVersionInfo (xmlParserCtxtPtr ctxt);
445 XMLPUBFUN xmlChar * XMLCALL
446 xmlParseEncName (xmlParserCtxtPtr ctxt);
447 XMLPUBFUN const xmlChar * XMLCALL
448 xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
449 XMLPUBFUN int XMLCALL
450 xmlParseSDDecl (xmlParserCtxtPtr ctxt);
451 XMLPUBFUN void XMLCALL
452 xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
453 XMLPUBFUN void XMLCALL
454 xmlParseTextDecl (xmlParserCtxtPtr ctxt);
455 XMLPUBFUN void XMLCALL
456 xmlParseMisc (xmlParserCtxtPtr ctxt);
457 XMLPUBFUN void XMLCALL
458 xmlParseExternalSubset (xmlParserCtxtPtr ctxt,
459 const xmlChar *ExternalID,
460 const xmlChar *SystemID);
462 * XML_SUBSTITUTE_NONE:
464 * If no entities need to be substituted.
466 #define XML_SUBSTITUTE_NONE 0
468 * XML_SUBSTITUTE_REF:
470 * Whether general entities need to be substituted.
472 #define XML_SUBSTITUTE_REF 1
474 * XML_SUBSTITUTE_PEREF:
476 * Whether parameter entities need to be substituted.
478 #define XML_SUBSTITUTE_PEREF 2
480 * XML_SUBSTITUTE_BOTH:
482 * Both general and parameter entities need to be substituted.
484 #define XML_SUBSTITUTE_BOTH 3
486 XMLPUBFUN xmlChar * XMLCALL
487 xmlDecodeEntities (xmlParserCtxtPtr ctxt,
493 XMLPUBFUN xmlChar * XMLCALL
494 xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,
500 XMLPUBFUN xmlChar * XMLCALL
501 xmlStringLenDecodeEntities (xmlParserCtxtPtr ctxt,
510 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP.
512 XMLPUBFUN int XMLCALL nodePush (xmlParserCtxtPtr ctxt,
514 XMLPUBFUN xmlNodePtr XMLCALL nodePop (xmlParserCtxtPtr ctxt);
515 XMLPUBFUN int XMLCALL inputPush (xmlParserCtxtPtr ctxt,
516 xmlParserInputPtr value);
517 XMLPUBFUN xmlParserInputPtr XMLCALL inputPop (xmlParserCtxtPtr ctxt);
518 XMLPUBFUN const xmlChar * XMLCALL namePop (xmlParserCtxtPtr ctxt);
519 XMLPUBFUN int XMLCALL namePush (xmlParserCtxtPtr ctxt,
520 const xmlChar *value);
523 * other commodities shared between parser.c and parserInternals.
525 XMLPUBFUN int XMLCALL xmlSkipBlankChars (xmlParserCtxtPtr ctxt);
526 XMLPUBFUN int XMLCALL xmlStringCurrentChar (xmlParserCtxtPtr ctxt,
529 XMLPUBFUN void XMLCALL xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
530 XMLPUBFUN void XMLCALL xmlParserHandleReference(xmlParserCtxtPtr ctxt);
531 XMLPUBFUN int XMLCALL xmlCheckLanguageID (const xmlChar *lang);
534 * Really core function shared with HTML parser.
536 XMLPUBFUN int XMLCALL xmlCurrentChar (xmlParserCtxtPtr ctxt,
538 XMLPUBFUN int XMLCALL xmlCopyCharMultiByte (xmlChar *out,
540 XMLPUBFUN int XMLCALL xmlCopyChar (int len,
543 XMLPUBFUN void XMLCALL xmlNextChar (xmlParserCtxtPtr ctxt);
544 XMLPUBFUN void XMLCALL xmlParserInputShrink (xmlParserInputPtr in);
546 #ifdef LIBXML_HTML_ENABLED
548 * Actually comes from the HTML parser but launched from the init stuff.
550 XMLPUBFUN void XMLCALL htmlInitAutoClose (void);
551 //XMLPUBFUN htmlParserCtxtPtr XMLCALL htmlCreateFileParserCtxt(const char *filename,
552 // const char *encoding);
557 * Specific function to keep track of entities references
558 * and used by the XSLT debugger.
561 * xmlEntityReferenceFunc:
562 * @param ent the entity
563 * @param firstNode the fist node in the chunk
564 * @param lastNode the last nod in the chunk
566 * Callback function used when one needs to be able to track back the
567 * provenance of a chunk of nodes inherited from an entity replacement.
569 typedef void (*xmlEntityReferenceFunc) (xmlEntityPtr ent,
570 xmlNodePtr firstNode,
571 xmlNodePtr lastNode);
573 XMLPUBFUN void XMLCALL xmlSetEntityReferenceFunc (xmlEntityReferenceFunc func);
577 * Macros for identifying data nodes. Data nodes are kept as text nodes
578 * but some of the fields are reused to save memory.
580 * content <-> binary content, or RChunk, or RFile reference
581 * properties <-> size of binary data
582 * nsDef <-> offset of binary data in RChunk
583 * psvi <-> container type
585 * Added in S60 3.2 release.
587 #define IS_DATA_NODE(node) \
588 node->type == XML_TEXT_NODE && node->ns
590 #define IS_BINARY_NODE(node) \
591 node->type == XML_TEXT_NODE && (int)node->psvi == 30
593 #define IS_RCHUNK_NODE(node) \
594 node->type == XML_TEXT_NODE && (int)node->psvi == 31
596 #define IS_RFILE_NODE(node) \
597 node->type == XML_TEXT_NODE && (int)node->psvi == 32
599 #define IS_EXTERNAL_NODE(node) \
600 node->type == XML_TEXT_NODE && ((int)node->psvi == 31 || (int)node->psvi == 32)
602 /** given text node, check if it represents data node **/
603 #define TEXT_IS_DATA(node) \
606 /** given text node, check if it represents binary node **/
607 #define TEXT_IS_BINARY(node) \
608 (int)node->psvi == 30
610 /** given text node, check if it represents external node **/
611 #define TEXT_IS_EXTERNAL(node) \
612 ((int)node->psvi == 31 || (int)node->psvi == 32)
614 /** fetches data node cid **/
615 #define DATA_NODE_CID(node) \
621 #endif /* XML_PARSER_INTERNALS_H */