/* Part of SWI-Prolog Author: Jan Wielemaker E-mail: J.Wielemaker@vu.nl WWW: http://www.swi-prolog.org Copyright (c) 2000-2014, University of Amsterdam All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef SGML_PARSER_H_INCLUDED #define SGML_PARSER_H_INCLUDED #include "util.h" #include /******************************* * CALL-BACK * *******************************/ /* sgml_attribute->flags */ #define SGML_AT_DEFAULT 0x1 typedef struct _sgml_attribute { struct /* so we can free members */ { wchar_t *textW; /* UCS textual value */ intptr_t number; /* numeric value/length */ } value; dtd_attr *definition; /* DTD definition */ unsigned flags; /* additional flags */ } sgml_attribute; typedef struct _dtd_parser *dtd_parser_p; typedef int (*sgml_begin_element_f)(dtd_parser_p parser, dtd_element *e, size_t argc, sgml_attribute *argv); typedef int (*sgml_end_element_f)(dtd_parser_p parser, dtd_element *e); typedef int (*sgml_data_f)(dtd_parser_p parser, data_type type, size_t len, const wchar_t *text); typedef int (*sgml_wdata_f)(dtd_parser_p parser, data_type type, size_t len, const wchar_t *text); typedef int (*sgml_entity_f)(dtd_parser_p parser, dtd_entity *entity, int chr); typedef int (*sgml_pi_f)(dtd_parser_p parser, const ichar *pi); typedef int (*sgml_error_f)(dtd_parser_p parser, dtd_error *error); typedef int (*sgml_decl_f)(dtd_parser_p parser, const ichar *decl); #ifdef XMLNS typedef int (*xmlns_f)(dtd_parser_p parser, dtd_symbol *ns, dtd_symbol *url); #endif /******************************* * PARSER AND STATES * *******************************/ #define SGML_PARSER_MAGIC (0x834ab663) typedef enum { S_PCDATA, /* between declarations */ #ifdef UTF8 S_UTF8, /* Loading UTF-8 character */ #endif S_CDATA, /* non-parsed data */ S_RCDATA, /* CDATA+entities */ S_MSCDATA, /* */ S_EMSCDATA1, /* Seen ] in S_MSCDATA */ S_EMSCDATA2, /* Seen ]] in S_MSCDATA */ S_ECDATA1, /* Seen < in CDATA */ S_ECDATA2, /* Seen */ } dtdstate; typedef enum { DCL_DTD, /* DTD Declaration */ DCL_BEGIN, /* begin-tag */ DCL_END /* end-tag */ } dcl_type; typedef enum { MS_IGNORE, /* ignore this data */ MS_INCLUDE, /* process normally */ MS_CDATA, /* pass literally */ MS_RCDATA /* replace entities */ } marktype; typedef enum { EV_EXPLICIT, /* Explicit event */ EV_OMITTED, /* Omitted tag event */ EV_SHORTTAG, /* SHORTTAG event: flags */ #define SGML_PARSER_NODEFS 0x01 /* don't handle default atts */ #define SGML_PARSER_QUALIFY_ATTS 0x02 /* qualify attributes in XML mode */ typedef struct _dtd_parser { unsigned long magic; /* SGML_PARSER_MAGIC */ dtd *dtd; /* DTD we are building */ dtdstate state; /* current state */ dtdstate cdata_state; /* S_CDATA/S_RCDATA */ dtd_marked *marked; /* marked section stack */ marktype mark_state; /* processing mode */ dtd_element *empty_element; /* empty of seen */ sgml_environment *environments; /* Open environments */ data_mode dmode; /* How to handle characters */ int first; /* Just seen */ int waiting_for_net; /* waiting for / in