epoc32/include/stdapis/libxml2/libxml2_htmlparser.h
branchSymbian3
changeset 4 837f303aceeb
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/epoc32/include/stdapis/libxml2/libxml2_htmlparser.h	Wed Mar 31 12:33:34 2010 +0100
     1.3 @@ -0,0 +1,309 @@
     1.4 +/*
     1.5 + * Summary: interface for an HTML 4.0 non-verifying parser
     1.6 + * Description: this module implements an HTML 4.0 non-verifying parser
     1.7 + *              with API compatible with the XML parser ones. It should
     1.8 + *              be able to parse "real world" HTML, even if severely
     1.9 + *              broken from a specification point of view.
    1.10 + *
    1.11 + * Copy: See Copyright for the status of this software.
    1.12 + *
    1.13 + * Author: Daniel Veillard
    1.14 + * Portion Copyright © 2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
    1.15 + */
    1.16 +
    1.17 +/** @file
    1.18 +@publishedAll
    1.19 +@released
    1.20 +*/
    1.21 +
    1.22 +#ifndef HTML_PARSER_H
    1.23 +#define HTML_PARSER_H
    1.24 +
    1.25 +#include <stdapis/libxml2/libxml2_parser.h>
    1.26 +
    1.27 +#ifdef __cplusplus
    1.28 +extern "C" {
    1.29 +#endif
    1.30 +
    1.31 +/*
    1.32 + * Most of the back-end structures from XML and HTML are shared.
    1.33 + */
    1.34 +typedef xmlParserCtxt htmlParserCtxt;
    1.35 +typedef xmlParserCtxtPtr htmlParserCtxtPtr;
    1.36 +typedef xmlParserNodeInfo htmlParserNodeInfo;
    1.37 +typedef xmlSAXHandler htmlSAXHandler;
    1.38 +typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
    1.39 +typedef xmlParserInput htmlParserInput;
    1.40 +typedef xmlParserInputPtr htmlParserInputPtr;
    1.41 +typedef xmlDocPtr htmlDocPtr;
    1.42 +typedef xmlNodePtr htmlNodePtr;
    1.43 +
    1.44 +/*
    1.45 + * Internal description of an HTML element, representing HTML 4.01
    1.46 + * and XHTML 1.0 (which share the same structure).
    1.47 + */
    1.48 +typedef struct _htmlElemDesc htmlElemDesc;
    1.49 +typedef htmlElemDesc *htmlElemDescPtr;
    1.50 +struct _htmlElemDesc {
    1.51 +    const char *name;   /* The tag name */
    1.52 +    char startTag;      /* Whether the start tag can be implied */
    1.53 +    char endTag;        /* Whether the end tag can be implied */
    1.54 +    char saveEndTag;    /* Whether the end tag should be saved */
    1.55 +    char empty;         /* Is this an empty element ? */
    1.56 +    char depr;          /* Is this a deprecated element ? */
    1.57 +    char dtd;           /* 1: only in Loose DTD, 2: only Frameset one */
    1.58 +    char isinline;      /* is this a block 0 or inline 1 element */
    1.59 +    const char *desc;   /* the description */
    1.60 +
    1.61 +/* NRK Jan.2003
    1.62 + * New fields encapsulating HTML structure
    1.63 + *
    1.64 + * Bugs:
    1.65 + *      This is a very limited representation.  It fails to tell us when
    1.66 + *      an element *requires* subelements (we only have whether they're
    1.67 + *      allowed or not), and it doesn't tell us where CDATA and PCDATA
    1.68 + *      are allowed.  Some element relationships are not fully represented:
    1.69 + *      these are flagged with the word MODIFIER
    1.70 + */
    1.71 +    const char** subelts;               /* allowed sub-elements of this element */
    1.72 +    const char* defaultsubelt;  /* subelement for suggested auto-repair
    1.73 +                                           if necessary or NULL */
    1.74 +    const char** attrs_opt;             /* Optional Attributes */
    1.75 +    const char** attrs_depr;            /* Additional deprecated attributes */
    1.76 +    const char** attrs_req;             /* Required attributes */
    1.77 +};
    1.78 +
    1.79 +/*
    1.80 + * Internal description of an HTML entity.
    1.81 + */
    1.82 +typedef struct _htmlEntityDesc htmlEntityDesc;
    1.83 +typedef htmlEntityDesc *htmlEntityDescPtr;
    1.84 +struct _htmlEntityDesc {
    1.85 +    unsigned int value; /* the UNICODE value for the character */
    1.86 +    const char *name;   /* The entity name */
    1.87 +    const char *desc;   /* the description */
    1.88 +};
    1.89 +
    1.90 +#if defined(LIBXML_HTML_ENABLED) || defined(XMLENGINE_XSLT)
    1.91 +/*
    1.92 + * There is only few public functions.
    1.93 + */
    1.94 +XMLPUBFUN const htmlElemDesc * XMLCALL
    1.95 +                        htmlTagLookup   (const xmlChar *tag);
    1.96 +
    1.97 +#endif /* LIBXML_HTML_ENABLED || XMLENGINE_XSLT ) */
    1.98 +
    1.99 +#ifdef LIBXML_HTML_ENABLED
   1.100 +
   1.101 +XMLPUBFUN const htmlEntityDesc * XMLCALL
   1.102 +                        htmlEntityLookup(const xmlChar *name);
   1.103 +XMLPUBFUN const htmlEntityDesc * XMLCALL
   1.104 +                        htmlEntityValueLookup(unsigned int value);
   1.105 +
   1.106 +XMLPUBFUN int XMLCALL
   1.107 +                        htmlIsAutoClosed(htmlDocPtr doc,
   1.108 +                                         htmlNodePtr elem);
   1.109 +XMLPUBFUN int XMLCALL
   1.110 +                        htmlAutoCloseTag(htmlDocPtr doc,
   1.111 +                                         const xmlChar *name,
   1.112 +                                         htmlNodePtr elem);
   1.113 +XMLPUBFUN const htmlEntityDesc * XMLCALL
   1.114 +                        htmlParseEntityRef(htmlParserCtxtPtr ctxt,
   1.115 +                                         const xmlChar **str);
   1.116 +XMLPUBFUN int XMLCALL
   1.117 +                        htmlParseCharRef(htmlParserCtxtPtr ctxt);
   1.118 +XMLPUBFUN void XMLCALL
   1.119 +                        htmlParseElement(htmlParserCtxtPtr ctxt);
   1.120 +
   1.121 +XMLPUBFUN htmlParserCtxtPtr XMLCALL
   1.122 +                        htmlCreateMemoryParserCtxt(const char *buffer,
   1.123 +                                                   int size);
   1.124 +
   1.125 +XMLPUBFUN int XMLCALL
   1.126 +                        htmlParseDocument(htmlParserCtxtPtr ctxt);
   1.127 +XMLPUBFUN htmlDocPtr XMLCALL
   1.128 +                        htmlSAXParseDoc (xmlChar *cur,
   1.129 +                                         const char *encoding,
   1.130 +                                         htmlSAXHandlerPtr sax,
   1.131 +                                         void *userData);
   1.132 +XMLPUBFUN htmlDocPtr XMLCALL
   1.133 +                        htmlParseDoc    (xmlChar *cur,
   1.134 +                                         const char *encoding);
   1.135 +XMLPUBFUN htmlDocPtr XMLCALL
   1.136 +                        htmlSAXParseFile(const char *filename,
   1.137 +                                         const char *encoding,
   1.138 +                                         htmlSAXHandlerPtr sax,
   1.139 +                                         void *userData);
   1.140 +XMLPUBFUN htmlDocPtr XMLCALL
   1.141 +                        htmlParseFile   (const char *filename,
   1.142 +                                         const char *encoding);
   1.143 +XMLPUBFUN int XMLCALL
   1.144 +                        UTF8ToHtml      (unsigned char *out,
   1.145 +                                         int *outlen,
   1.146 +                                         const unsigned char *in,
   1.147 +                                         int *inlen);
   1.148 +XMLPUBFUN int XMLCALL
   1.149 +                        htmlEncodeEntities(unsigned char *out,
   1.150 +                                         int *outlen,
   1.151 +                                         const unsigned char *in,
   1.152 +                                         int *inlen, int quoteChar);
   1.153 +XMLPUBFUN int XMLCALL
   1.154 +                        htmlIsScriptAttribute(const xmlChar *name);
   1.155 +XMLPUBFUN int XMLCALL
   1.156 +                        htmlHandleOmittedElem(int val);
   1.157 +
   1.158 +#ifdef LIBXML_PUSH_ENABLED
   1.159 +/**
   1.160 + * Interfaces for the Push mode.
   1.161 + */
   1.162 +XMLPUBFUN htmlParserCtxtPtr XMLCALL
   1.163 +                        htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
   1.164 +                                                 void *user_data,
   1.165 +                                                 const char *chunk,
   1.166 +                                                 int size,
   1.167 +                                                 const char *filename,
   1.168 +                                                 xmlCharEncoding enc);
   1.169 +XMLPUBFUN int XMLCALL
   1.170 +                        htmlParseChunk          (htmlParserCtxtPtr ctxt,
   1.171 +                                                 const char *chunk,
   1.172 +                                                 int size,
   1.173 +                                                 int terminate);
   1.174 +#endif /* LIBXML_PUSH_ENABLED */
   1.175 +
   1.176 +XMLPUBFUN void XMLCALL
   1.177 +                        htmlFreeParserCtxt      (htmlParserCtxtPtr ctxt);
   1.178 +
   1.179 +/*
   1.180 + * New set of simpler/more flexible APIs
   1.181 + */
   1.182 +/**
   1.183 + * xmlParserOption:
   1.184 + *
   1.185 + * This is the set of XML parser options that can be passed down
   1.186 + * to the xmlReadDoc() and similar calls.
   1.187 + */
   1.188 +typedef enum {
   1.189 +    HTML_PARSE_NOERROR  = 1<<5, /* suppress error reports */
   1.190 +    HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
   1.191 +    HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
   1.192 +    HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
   1.193 +    HTML_PARSE_NONET    = 1<<11 /* Forbid network access */
   1.194 +} htmlParserOption;
   1.195 +
   1.196 +XMLPUBFUN void XMLCALL
   1.197 +                htmlCtxtReset           (htmlParserCtxtPtr ctxt);
   1.198 +XMLPUBFUN int XMLCALL
   1.199 +                htmlCtxtUseOptions      (htmlParserCtxtPtr ctxt,
   1.200 +                                         int options);
   1.201 +XMLPUBFUN htmlDocPtr XMLCALL
   1.202 +                htmlReadDoc             (const xmlChar *cur,
   1.203 +                                         const char *URL,
   1.204 +                                         const char *encoding,
   1.205 +                                         int options);
   1.206 +XMLPUBFUN htmlDocPtr XMLCALL
   1.207 +                htmlReadFile            (const char *URL,
   1.208 +                                         const char *encoding,
   1.209 +                                         int options);
   1.210 +XMLPUBFUN htmlDocPtr XMLCALL
   1.211 +                htmlReadMemory          (const char *buffer,
   1.212 +                                         int size,
   1.213 +                                         const char *URL,
   1.214 +                                         const char *encoding,
   1.215 +                                         int options);
   1.216 +XMLPUBFUN htmlDocPtr XMLCALL
   1.217 +                htmlReadFd              (int fd,
   1.218 +                                         const char *URL,
   1.219 +                                         const char *encoding,
   1.220 +                                         int options);
   1.221 +XMLPUBFUN htmlDocPtr XMLCALL
   1.222 +                htmlReadIO              (xmlInputReadCallback ioread,
   1.223 +                                         xmlInputCloseCallback ioclose,
   1.224 +                                         void *ioctx,
   1.225 +                                         const char *URL,
   1.226 +                                         const char *encoding,
   1.227 +                                         int options);
   1.228 +XMLPUBFUN htmlDocPtr XMLCALL
   1.229 +                htmlCtxtReadDoc         (xmlParserCtxtPtr ctxt,
   1.230 +                                         const xmlChar *cur,
   1.231 +                                         const char *URL,
   1.232 +                                         const char *encoding,
   1.233 +                                         int options);
   1.234 +XMLPUBFUN htmlDocPtr XMLCALL
   1.235 +                htmlCtxtReadFile                (xmlParserCtxtPtr ctxt,
   1.236 +                                         const char *filename,
   1.237 +                                         const char *encoding,
   1.238 +                                         int options);
   1.239 +XMLPUBFUN htmlDocPtr XMLCALL
   1.240 +                htmlCtxtReadMemory              (xmlParserCtxtPtr ctxt,
   1.241 +                                         const char *buffer,
   1.242 +                                         int size,
   1.243 +                                         const char *URL,
   1.244 +                                         const char *encoding,
   1.245 +                                         int options);
   1.246 +XMLPUBFUN htmlDocPtr XMLCALL
   1.247 +                htmlCtxtReadFd          (xmlParserCtxtPtr ctxt,
   1.248 +                                         int fd,
   1.249 +                                         const char *URL,
   1.250 +                                         const char *encoding,
   1.251 +                                         int options);
   1.252 +XMLPUBFUN htmlDocPtr XMLCALL
   1.253 +                htmlCtxtReadIO          (xmlParserCtxtPtr ctxt,
   1.254 +                                         xmlInputReadCallback ioread,
   1.255 +                                         xmlInputCloseCallback ioclose,
   1.256 +                                         void *ioctx,
   1.257 +                                         const char *URL,
   1.258 +                                         const char *encoding,
   1.259 +                                         int options);
   1.260 +
   1.261 +/* NRK/Jan2003: further knowledge of HTML structure
   1.262 + */
   1.263 +typedef enum {
   1.264 +  HTML_NA = 0 ,         /* something we don't check at all */
   1.265 +  HTML_INVALID = 0x1 ,
   1.266 +  HTML_DEPRECATED = 0x2 ,
   1.267 +  HTML_VALID = 0x4 ,
   1.268 +  HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */
   1.269 +} htmlStatus ;
   1.270 +
   1.271 +/* Using htmlElemDesc rather than name here, to emphasise the fact
   1.272 +   that otherwise there's a lookup overhead
   1.273 +*/
   1.274 +XMLPUBFUN htmlStatus XMLCALL htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ;
   1.275 +XMLPUBFUN int XMLCALL htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ;
   1.276 +XMLPUBFUN htmlStatus XMLCALL htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ;
   1.277 +XMLPUBFUN htmlStatus XMLCALL htmlNodeStatus(const htmlNodePtr, int) ;
   1.278 +/**
   1.279 + * htmlDefaultSubelement:
   1.280 + * @param elt HTML element
   1.281 + *
   1.282 + * Returns the default subelement for this element
   1.283 + */
   1.284 +#define htmlDefaultSubelement(elt) elt->defaultsubelt
   1.285 +/**
   1.286 + * htmlElementAllowedHereDesc:
   1.287 + * @param parent HTML parent element
   1.288 + * @param elt HTML element
   1.289 + *
   1.290 + * Checks whether an HTML element description may be a
   1.291 + * direct child of the specified element.
   1.292 + *
   1.293 + * Returns 1 if allowed; 0 otherwise.
   1.294 + */
   1.295 +#define htmlElementAllowedHereDesc(parent,elt) \
   1.296 +        htmlElementAllowedHere((parent), (elt)->name)
   1.297 +/**
   1.298 + * htmlRequiredAttrs:
   1.299 + * @param elt HTML element
   1.300 + *
   1.301 + * Returns the attributes required for the specified element.
   1.302 + */
   1.303 +#define htmlRequiredAttrs(elt) (elt)->attrs_req
   1.304 +
   1.305 +
   1.306 +#endif /* LIBXML_HTML_ENABLED */
   1.307 +
   1.308 +#ifdef __cplusplus
   1.309 +}
   1.310 +#endif
   1.311 +
   1.312 +#endif /* HTML_PARSER_H */