epoc32/include/stdapis/libxml2/libxml2_htmlparser.h
author William Roberts <williamr@symbian.org>
Wed, 31 Mar 2010 12:33:34 +0100
branchSymbian3
changeset 4 837f303aceeb
permissions -rw-r--r--
Current Symbian^3 public API header files (from PDK 3.0.h)
This is the epoc32/include tree with the "platform" subtrees removed, and
all but a selected few mbg and rsg files removed.
     1 /*
     2  * Summary: interface for an HTML 4.0 non-verifying parser
     3  * Description: this module implements an HTML 4.0 non-verifying parser
     4  *              with API compatible with the XML parser ones. It should
     5  *              be able to parse "real world" HTML, even if severely
     6  *              broken from a specification point of view.
     7  *
     8  * Copy: See Copyright for the status of this software.
     9  *
    10  * Author: Daniel Veillard
    11  * Portion Copyright © 2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
    12  */
    13 
    14 /** @file
    15 @publishedAll
    16 @released
    17 */
    18 
    19 #ifndef HTML_PARSER_H
    20 #define HTML_PARSER_H
    21 
    22 #include <stdapis/libxml2/libxml2_parser.h>
    23 
    24 #ifdef __cplusplus
    25 extern "C" {
    26 #endif
    27 
    28 /*
    29  * Most of the back-end structures from XML and HTML are shared.
    30  */
    31 typedef xmlParserCtxt htmlParserCtxt;
    32 typedef xmlParserCtxtPtr htmlParserCtxtPtr;
    33 typedef xmlParserNodeInfo htmlParserNodeInfo;
    34 typedef xmlSAXHandler htmlSAXHandler;
    35 typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
    36 typedef xmlParserInput htmlParserInput;
    37 typedef xmlParserInputPtr htmlParserInputPtr;
    38 typedef xmlDocPtr htmlDocPtr;
    39 typedef xmlNodePtr htmlNodePtr;
    40 
    41 /*
    42  * Internal description of an HTML element, representing HTML 4.01
    43  * and XHTML 1.0 (which share the same structure).
    44  */
    45 typedef struct _htmlElemDesc htmlElemDesc;
    46 typedef htmlElemDesc *htmlElemDescPtr;
    47 struct _htmlElemDesc {
    48     const char *name;   /* The tag name */
    49     char startTag;      /* Whether the start tag can be implied */
    50     char endTag;        /* Whether the end tag can be implied */
    51     char saveEndTag;    /* Whether the end tag should be saved */
    52     char empty;         /* Is this an empty element ? */
    53     char depr;          /* Is this a deprecated element ? */
    54     char dtd;           /* 1: only in Loose DTD, 2: only Frameset one */
    55     char isinline;      /* is this a block 0 or inline 1 element */
    56     const char *desc;   /* the description */
    57 
    58 /* NRK Jan.2003
    59  * New fields encapsulating HTML structure
    60  *
    61  * Bugs:
    62  *      This is a very limited representation.  It fails to tell us when
    63  *      an element *requires* subelements (we only have whether they're
    64  *      allowed or not), and it doesn't tell us where CDATA and PCDATA
    65  *      are allowed.  Some element relationships are not fully represented:
    66  *      these are flagged with the word MODIFIER
    67  */
    68     const char** subelts;               /* allowed sub-elements of this element */
    69     const char* defaultsubelt;  /* subelement for suggested auto-repair
    70                                            if necessary or NULL */
    71     const char** attrs_opt;             /* Optional Attributes */
    72     const char** attrs_depr;            /* Additional deprecated attributes */
    73     const char** attrs_req;             /* Required attributes */
    74 };
    75 
    76 /*
    77  * Internal description of an HTML entity.
    78  */
    79 typedef struct _htmlEntityDesc htmlEntityDesc;
    80 typedef htmlEntityDesc *htmlEntityDescPtr;
    81 struct _htmlEntityDesc {
    82     unsigned int value; /* the UNICODE value for the character */
    83     const char *name;   /* The entity name */
    84     const char *desc;   /* the description */
    85 };
    86 
    87 #if defined(LIBXML_HTML_ENABLED) || defined(XMLENGINE_XSLT)
    88 /*
    89  * There is only few public functions.
    90  */
    91 XMLPUBFUN const htmlElemDesc * XMLCALL
    92                         htmlTagLookup   (const xmlChar *tag);
    93 
    94 #endif /* LIBXML_HTML_ENABLED || XMLENGINE_XSLT ) */
    95 
    96 #ifdef LIBXML_HTML_ENABLED
    97 
    98 XMLPUBFUN const htmlEntityDesc * XMLCALL
    99                         htmlEntityLookup(const xmlChar *name);
   100 XMLPUBFUN const htmlEntityDesc * XMLCALL
   101                         htmlEntityValueLookup(unsigned int value);
   102 
   103 XMLPUBFUN int XMLCALL
   104                         htmlIsAutoClosed(htmlDocPtr doc,
   105                                          htmlNodePtr elem);
   106 XMLPUBFUN int XMLCALL
   107                         htmlAutoCloseTag(htmlDocPtr doc,
   108                                          const xmlChar *name,
   109                                          htmlNodePtr elem);
   110 XMLPUBFUN const htmlEntityDesc * XMLCALL
   111                         htmlParseEntityRef(htmlParserCtxtPtr ctxt,
   112                                          const xmlChar **str);
   113 XMLPUBFUN int XMLCALL
   114                         htmlParseCharRef(htmlParserCtxtPtr ctxt);
   115 XMLPUBFUN void XMLCALL
   116                         htmlParseElement(htmlParserCtxtPtr ctxt);
   117 
   118 XMLPUBFUN htmlParserCtxtPtr XMLCALL
   119                         htmlCreateMemoryParserCtxt(const char *buffer,
   120                                                    int size);
   121 
   122 XMLPUBFUN int XMLCALL
   123                         htmlParseDocument(htmlParserCtxtPtr ctxt);
   124 XMLPUBFUN htmlDocPtr XMLCALL
   125                         htmlSAXParseDoc (xmlChar *cur,
   126                                          const char *encoding,
   127                                          htmlSAXHandlerPtr sax,
   128                                          void *userData);
   129 XMLPUBFUN htmlDocPtr XMLCALL
   130                         htmlParseDoc    (xmlChar *cur,
   131                                          const char *encoding);
   132 XMLPUBFUN htmlDocPtr XMLCALL
   133                         htmlSAXParseFile(const char *filename,
   134                                          const char *encoding,
   135                                          htmlSAXHandlerPtr sax,
   136                                          void *userData);
   137 XMLPUBFUN htmlDocPtr XMLCALL
   138                         htmlParseFile   (const char *filename,
   139                                          const char *encoding);
   140 XMLPUBFUN int XMLCALL
   141                         UTF8ToHtml      (unsigned char *out,
   142                                          int *outlen,
   143                                          const unsigned char *in,
   144                                          int *inlen);
   145 XMLPUBFUN int XMLCALL
   146                         htmlEncodeEntities(unsigned char *out,
   147                                          int *outlen,
   148                                          const unsigned char *in,
   149                                          int *inlen, int quoteChar);
   150 XMLPUBFUN int XMLCALL
   151                         htmlIsScriptAttribute(const xmlChar *name);
   152 XMLPUBFUN int XMLCALL
   153                         htmlHandleOmittedElem(int val);
   154 
   155 #ifdef LIBXML_PUSH_ENABLED
   156 /**
   157  * Interfaces for the Push mode.
   158  */
   159 XMLPUBFUN htmlParserCtxtPtr XMLCALL
   160                         htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
   161                                                  void *user_data,
   162                                                  const char *chunk,
   163                                                  int size,
   164                                                  const char *filename,
   165                                                  xmlCharEncoding enc);
   166 XMLPUBFUN int XMLCALL
   167                         htmlParseChunk          (htmlParserCtxtPtr ctxt,
   168                                                  const char *chunk,
   169                                                  int size,
   170                                                  int terminate);
   171 #endif /* LIBXML_PUSH_ENABLED */
   172 
   173 XMLPUBFUN void XMLCALL
   174                         htmlFreeParserCtxt      (htmlParserCtxtPtr ctxt);
   175 
   176 /*
   177  * New set of simpler/more flexible APIs
   178  */
   179 /**
   180  * xmlParserOption:
   181  *
   182  * This is the set of XML parser options that can be passed down
   183  * to the xmlReadDoc() and similar calls.
   184  */
   185 typedef enum {
   186     HTML_PARSE_NOERROR  = 1<<5, /* suppress error reports */
   187     HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
   188     HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
   189     HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
   190     HTML_PARSE_NONET    = 1<<11 /* Forbid network access */
   191 } htmlParserOption;
   192 
   193 XMLPUBFUN void XMLCALL
   194                 htmlCtxtReset           (htmlParserCtxtPtr ctxt);
   195 XMLPUBFUN int XMLCALL
   196                 htmlCtxtUseOptions      (htmlParserCtxtPtr ctxt,
   197                                          int options);
   198 XMLPUBFUN htmlDocPtr XMLCALL
   199                 htmlReadDoc             (const xmlChar *cur,
   200                                          const char *URL,
   201                                          const char *encoding,
   202                                          int options);
   203 XMLPUBFUN htmlDocPtr XMLCALL
   204                 htmlReadFile            (const char *URL,
   205                                          const char *encoding,
   206                                          int options);
   207 XMLPUBFUN htmlDocPtr XMLCALL
   208                 htmlReadMemory          (const char *buffer,
   209                                          int size,
   210                                          const char *URL,
   211                                          const char *encoding,
   212                                          int options);
   213 XMLPUBFUN htmlDocPtr XMLCALL
   214                 htmlReadFd              (int fd,
   215                                          const char *URL,
   216                                          const char *encoding,
   217                                          int options);
   218 XMLPUBFUN htmlDocPtr XMLCALL
   219                 htmlReadIO              (xmlInputReadCallback ioread,
   220                                          xmlInputCloseCallback ioclose,
   221                                          void *ioctx,
   222                                          const char *URL,
   223                                          const char *encoding,
   224                                          int options);
   225 XMLPUBFUN htmlDocPtr XMLCALL
   226                 htmlCtxtReadDoc         (xmlParserCtxtPtr ctxt,
   227                                          const xmlChar *cur,
   228                                          const char *URL,
   229                                          const char *encoding,
   230                                          int options);
   231 XMLPUBFUN htmlDocPtr XMLCALL
   232                 htmlCtxtReadFile                (xmlParserCtxtPtr ctxt,
   233                                          const char *filename,
   234                                          const char *encoding,
   235                                          int options);
   236 XMLPUBFUN htmlDocPtr XMLCALL
   237                 htmlCtxtReadMemory              (xmlParserCtxtPtr ctxt,
   238                                          const char *buffer,
   239                                          int size,
   240                                          const char *URL,
   241                                          const char *encoding,
   242                                          int options);
   243 XMLPUBFUN htmlDocPtr XMLCALL
   244                 htmlCtxtReadFd          (xmlParserCtxtPtr ctxt,
   245                                          int fd,
   246                                          const char *URL,
   247                                          const char *encoding,
   248                                          int options);
   249 XMLPUBFUN htmlDocPtr XMLCALL
   250                 htmlCtxtReadIO          (xmlParserCtxtPtr ctxt,
   251                                          xmlInputReadCallback ioread,
   252                                          xmlInputCloseCallback ioclose,
   253                                          void *ioctx,
   254                                          const char *URL,
   255                                          const char *encoding,
   256                                          int options);
   257 
   258 /* NRK/Jan2003: further knowledge of HTML structure
   259  */
   260 typedef enum {
   261   HTML_NA = 0 ,         /* something we don't check at all */
   262   HTML_INVALID = 0x1 ,
   263   HTML_DEPRECATED = 0x2 ,
   264   HTML_VALID = 0x4 ,
   265   HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */
   266 } htmlStatus ;
   267 
   268 /* Using htmlElemDesc rather than name here, to emphasise the fact
   269    that otherwise there's a lookup overhead
   270 */
   271 XMLPUBFUN htmlStatus XMLCALL htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ;
   272 XMLPUBFUN int XMLCALL htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ;
   273 XMLPUBFUN htmlStatus XMLCALL htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ;
   274 XMLPUBFUN htmlStatus XMLCALL htmlNodeStatus(const htmlNodePtr, int) ;
   275 /**
   276  * htmlDefaultSubelement:
   277  * @param elt HTML element
   278  *
   279  * Returns the default subelement for this element
   280  */
   281 #define htmlDefaultSubelement(elt) elt->defaultsubelt
   282 /**
   283  * htmlElementAllowedHereDesc:
   284  * @param parent HTML parent element
   285  * @param elt HTML element
   286  *
   287  * Checks whether an HTML element description may be a
   288  * direct child of the specified element.
   289  *
   290  * Returns 1 if allowed; 0 otherwise.
   291  */
   292 #define htmlElementAllowedHereDesc(parent,elt) \
   293         htmlElementAllowedHere((parent), (elt)->name)
   294 /**
   295  * htmlRequiredAttrs:
   296  * @param elt HTML element
   297  *
   298  * Returns the attributes required for the specified element.
   299  */
   300 #define htmlRequiredAttrs(elt) (elt)->attrs_req
   301 
   302 
   303 #endif /* LIBXML_HTML_ENABLED */
   304 
   305 #ifdef __cplusplus
   306 }
   307 #endif
   308 
   309 #endif /* HTML_PARSER_H */