1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/epoc32/include/stdapis/libxml2/libxml2_htmlparser.h Wed Mar 31 12:33:34 2010 +0100
1.3 @@ -0,0 +1,309 @@
1.4 +/*
1.5 + * Summary: interface for an HTML 4.0 non-verifying parser
1.6 + * Description: this module implements an HTML 4.0 non-verifying parser
1.7 + * with API compatible with the XML parser ones. It should
1.8 + * be able to parse "real world" HTML, even if severely
1.9 + * broken from a specification point of view.
1.10 + *
1.11 + * Copy: See Copyright for the status of this software.
1.12 + *
1.13 + * Author: Daniel Veillard
1.14 + * Portion Copyright © 2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved.
1.15 + */
1.16 +
1.17 +/** @file
1.18 +@publishedAll
1.19 +@released
1.20 +*/
1.21 +
1.22 +#ifndef HTML_PARSER_H
1.23 +#define HTML_PARSER_H
1.24 +
1.25 +#include <stdapis/libxml2/libxml2_parser.h>
1.26 +
1.27 +#ifdef __cplusplus
1.28 +extern "C" {
1.29 +#endif
1.30 +
1.31 +/*
1.32 + * Most of the back-end structures from XML and HTML are shared.
1.33 + */
1.34 +typedef xmlParserCtxt htmlParserCtxt;
1.35 +typedef xmlParserCtxtPtr htmlParserCtxtPtr;
1.36 +typedef xmlParserNodeInfo htmlParserNodeInfo;
1.37 +typedef xmlSAXHandler htmlSAXHandler;
1.38 +typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
1.39 +typedef xmlParserInput htmlParserInput;
1.40 +typedef xmlParserInputPtr htmlParserInputPtr;
1.41 +typedef xmlDocPtr htmlDocPtr;
1.42 +typedef xmlNodePtr htmlNodePtr;
1.43 +
1.44 +/*
1.45 + * Internal description of an HTML element, representing HTML 4.01
1.46 + * and XHTML 1.0 (which share the same structure).
1.47 + */
1.48 +typedef struct _htmlElemDesc htmlElemDesc;
1.49 +typedef htmlElemDesc *htmlElemDescPtr;
1.50 +struct _htmlElemDesc {
1.51 + const char *name; /* The tag name */
1.52 + char startTag; /* Whether the start tag can be implied */
1.53 + char endTag; /* Whether the end tag can be implied */
1.54 + char saveEndTag; /* Whether the end tag should be saved */
1.55 + char empty; /* Is this an empty element ? */
1.56 + char depr; /* Is this a deprecated element ? */
1.57 + char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
1.58 + char isinline; /* is this a block 0 or inline 1 element */
1.59 + const char *desc; /* the description */
1.60 +
1.61 +/* NRK Jan.2003
1.62 + * New fields encapsulating HTML structure
1.63 + *
1.64 + * Bugs:
1.65 + * This is a very limited representation. It fails to tell us when
1.66 + * an element *requires* subelements (we only have whether they're
1.67 + * allowed or not), and it doesn't tell us where CDATA and PCDATA
1.68 + * are allowed. Some element relationships are not fully represented:
1.69 + * these are flagged with the word MODIFIER
1.70 + */
1.71 + const char** subelts; /* allowed sub-elements of this element */
1.72 + const char* defaultsubelt; /* subelement for suggested auto-repair
1.73 + if necessary or NULL */
1.74 + const char** attrs_opt; /* Optional Attributes */
1.75 + const char** attrs_depr; /* Additional deprecated attributes */
1.76 + const char** attrs_req; /* Required attributes */
1.77 +};
1.78 +
1.79 +/*
1.80 + * Internal description of an HTML entity.
1.81 + */
1.82 +typedef struct _htmlEntityDesc htmlEntityDesc;
1.83 +typedef htmlEntityDesc *htmlEntityDescPtr;
1.84 +struct _htmlEntityDesc {
1.85 + unsigned int value; /* the UNICODE value for the character */
1.86 + const char *name; /* The entity name */
1.87 + const char *desc; /* the description */
1.88 +};
1.89 +
1.90 +#if defined(LIBXML_HTML_ENABLED) || defined(XMLENGINE_XSLT)
1.91 +/*
1.92 + * There is only few public functions.
1.93 + */
1.94 +XMLPUBFUN const htmlElemDesc * XMLCALL
1.95 + htmlTagLookup (const xmlChar *tag);
1.96 +
1.97 +#endif /* LIBXML_HTML_ENABLED || XMLENGINE_XSLT ) */
1.98 +
1.99 +#ifdef LIBXML_HTML_ENABLED
1.100 +
1.101 +XMLPUBFUN const htmlEntityDesc * XMLCALL
1.102 + htmlEntityLookup(const xmlChar *name);
1.103 +XMLPUBFUN const htmlEntityDesc * XMLCALL
1.104 + htmlEntityValueLookup(unsigned int value);
1.105 +
1.106 +XMLPUBFUN int XMLCALL
1.107 + htmlIsAutoClosed(htmlDocPtr doc,
1.108 + htmlNodePtr elem);
1.109 +XMLPUBFUN int XMLCALL
1.110 + htmlAutoCloseTag(htmlDocPtr doc,
1.111 + const xmlChar *name,
1.112 + htmlNodePtr elem);
1.113 +XMLPUBFUN const htmlEntityDesc * XMLCALL
1.114 + htmlParseEntityRef(htmlParserCtxtPtr ctxt,
1.115 + const xmlChar **str);
1.116 +XMLPUBFUN int XMLCALL
1.117 + htmlParseCharRef(htmlParserCtxtPtr ctxt);
1.118 +XMLPUBFUN void XMLCALL
1.119 + htmlParseElement(htmlParserCtxtPtr ctxt);
1.120 +
1.121 +XMLPUBFUN htmlParserCtxtPtr XMLCALL
1.122 + htmlCreateMemoryParserCtxt(const char *buffer,
1.123 + int size);
1.124 +
1.125 +XMLPUBFUN int XMLCALL
1.126 + htmlParseDocument(htmlParserCtxtPtr ctxt);
1.127 +XMLPUBFUN htmlDocPtr XMLCALL
1.128 + htmlSAXParseDoc (xmlChar *cur,
1.129 + const char *encoding,
1.130 + htmlSAXHandlerPtr sax,
1.131 + void *userData);
1.132 +XMLPUBFUN htmlDocPtr XMLCALL
1.133 + htmlParseDoc (xmlChar *cur,
1.134 + const char *encoding);
1.135 +XMLPUBFUN htmlDocPtr XMLCALL
1.136 + htmlSAXParseFile(const char *filename,
1.137 + const char *encoding,
1.138 + htmlSAXHandlerPtr sax,
1.139 + void *userData);
1.140 +XMLPUBFUN htmlDocPtr XMLCALL
1.141 + htmlParseFile (const char *filename,
1.142 + const char *encoding);
1.143 +XMLPUBFUN int XMLCALL
1.144 + UTF8ToHtml (unsigned char *out,
1.145 + int *outlen,
1.146 + const unsigned char *in,
1.147 + int *inlen);
1.148 +XMLPUBFUN int XMLCALL
1.149 + htmlEncodeEntities(unsigned char *out,
1.150 + int *outlen,
1.151 + const unsigned char *in,
1.152 + int *inlen, int quoteChar);
1.153 +XMLPUBFUN int XMLCALL
1.154 + htmlIsScriptAttribute(const xmlChar *name);
1.155 +XMLPUBFUN int XMLCALL
1.156 + htmlHandleOmittedElem(int val);
1.157 +
1.158 +#ifdef LIBXML_PUSH_ENABLED
1.159 +/**
1.160 + * Interfaces for the Push mode.
1.161 + */
1.162 +XMLPUBFUN htmlParserCtxtPtr XMLCALL
1.163 + htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
1.164 + void *user_data,
1.165 + const char *chunk,
1.166 + int size,
1.167 + const char *filename,
1.168 + xmlCharEncoding enc);
1.169 +XMLPUBFUN int XMLCALL
1.170 + htmlParseChunk (htmlParserCtxtPtr ctxt,
1.171 + const char *chunk,
1.172 + int size,
1.173 + int terminate);
1.174 +#endif /* LIBXML_PUSH_ENABLED */
1.175 +
1.176 +XMLPUBFUN void XMLCALL
1.177 + htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
1.178 +
1.179 +/*
1.180 + * New set of simpler/more flexible APIs
1.181 + */
1.182 +/**
1.183 + * xmlParserOption:
1.184 + *
1.185 + * This is the set of XML parser options that can be passed down
1.186 + * to the xmlReadDoc() and similar calls.
1.187 + */
1.188 +typedef enum {
1.189 + HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */
1.190 + HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
1.191 + HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
1.192 + HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
1.193 + HTML_PARSE_NONET = 1<<11 /* Forbid network access */
1.194 +} htmlParserOption;
1.195 +
1.196 +XMLPUBFUN void XMLCALL
1.197 + htmlCtxtReset (htmlParserCtxtPtr ctxt);
1.198 +XMLPUBFUN int XMLCALL
1.199 + htmlCtxtUseOptions (htmlParserCtxtPtr ctxt,
1.200 + int options);
1.201 +XMLPUBFUN htmlDocPtr XMLCALL
1.202 + htmlReadDoc (const xmlChar *cur,
1.203 + const char *URL,
1.204 + const char *encoding,
1.205 + int options);
1.206 +XMLPUBFUN htmlDocPtr XMLCALL
1.207 + htmlReadFile (const char *URL,
1.208 + const char *encoding,
1.209 + int options);
1.210 +XMLPUBFUN htmlDocPtr XMLCALL
1.211 + htmlReadMemory (const char *buffer,
1.212 + int size,
1.213 + const char *URL,
1.214 + const char *encoding,
1.215 + int options);
1.216 +XMLPUBFUN htmlDocPtr XMLCALL
1.217 + htmlReadFd (int fd,
1.218 + const char *URL,
1.219 + const char *encoding,
1.220 + int options);
1.221 +XMLPUBFUN htmlDocPtr XMLCALL
1.222 + htmlReadIO (xmlInputReadCallback ioread,
1.223 + xmlInputCloseCallback ioclose,
1.224 + void *ioctx,
1.225 + const char *URL,
1.226 + const char *encoding,
1.227 + int options);
1.228 +XMLPUBFUN htmlDocPtr XMLCALL
1.229 + htmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
1.230 + const xmlChar *cur,
1.231 + const char *URL,
1.232 + const char *encoding,
1.233 + int options);
1.234 +XMLPUBFUN htmlDocPtr XMLCALL
1.235 + htmlCtxtReadFile (xmlParserCtxtPtr ctxt,
1.236 + const char *filename,
1.237 + const char *encoding,
1.238 + int options);
1.239 +XMLPUBFUN htmlDocPtr XMLCALL
1.240 + htmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
1.241 + const char *buffer,
1.242 + int size,
1.243 + const char *URL,
1.244 + const char *encoding,
1.245 + int options);
1.246 +XMLPUBFUN htmlDocPtr XMLCALL
1.247 + htmlCtxtReadFd (xmlParserCtxtPtr ctxt,
1.248 + int fd,
1.249 + const char *URL,
1.250 + const char *encoding,
1.251 + int options);
1.252 +XMLPUBFUN htmlDocPtr XMLCALL
1.253 + htmlCtxtReadIO (xmlParserCtxtPtr ctxt,
1.254 + xmlInputReadCallback ioread,
1.255 + xmlInputCloseCallback ioclose,
1.256 + void *ioctx,
1.257 + const char *URL,
1.258 + const char *encoding,
1.259 + int options);
1.260 +
1.261 +/* NRK/Jan2003: further knowledge of HTML structure
1.262 + */
1.263 +typedef enum {
1.264 + HTML_NA = 0 , /* something we don't check at all */
1.265 + HTML_INVALID = 0x1 ,
1.266 + HTML_DEPRECATED = 0x2 ,
1.267 + HTML_VALID = 0x4 ,
1.268 + HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */
1.269 +} htmlStatus ;
1.270 +
1.271 +/* Using htmlElemDesc rather than name here, to emphasise the fact
1.272 + that otherwise there's a lookup overhead
1.273 +*/
1.274 +XMLPUBFUN htmlStatus XMLCALL htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ;
1.275 +XMLPUBFUN int XMLCALL htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ;
1.276 +XMLPUBFUN htmlStatus XMLCALL htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ;
1.277 +XMLPUBFUN htmlStatus XMLCALL htmlNodeStatus(const htmlNodePtr, int) ;
1.278 +/**
1.279 + * htmlDefaultSubelement:
1.280 + * @param elt HTML element
1.281 + *
1.282 + * Returns the default subelement for this element
1.283 + */
1.284 +#define htmlDefaultSubelement(elt) elt->defaultsubelt
1.285 +/**
1.286 + * htmlElementAllowedHereDesc:
1.287 + * @param parent HTML parent element
1.288 + * @param elt HTML element
1.289 + *
1.290 + * Checks whether an HTML element description may be a
1.291 + * direct child of the specified element.
1.292 + *
1.293 + * Returns 1 if allowed; 0 otherwise.
1.294 + */
1.295 +#define htmlElementAllowedHereDesc(parent,elt) \
1.296 + htmlElementAllowedHere((parent), (elt)->name)
1.297 +/**
1.298 + * htmlRequiredAttrs:
1.299 + * @param elt HTML element
1.300 + *
1.301 + * Returns the attributes required for the specified element.
1.302 + */
1.303 +#define htmlRequiredAttrs(elt) (elt)->attrs_req
1.304 +
1.305 +
1.306 +#endif /* LIBXML_HTML_ENABLED */
1.307 +
1.308 +#ifdef __cplusplus
1.309 +}
1.310 +#endif
1.311 +
1.312 +#endif /* HTML_PARSER_H */