Incorporate Expat 1.95.6.

This commit is contained in:
Martin v. Löwis 2003-01-25 22:41:29 +00:00
parent 5a772d32f4
commit fc03a94aac
11 changed files with 5443 additions and 3941 deletions

View file

@ -1,5 +1,4 @@
/* /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission. See the file COPYING for copying permission.
*/ */

View file

@ -1,16 +1,28 @@
/* /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
See the file COPYING for copying permission. See the file COPYING for copying permission.
*/ */
#ifndef XmlParse_INCLUDED #ifndef XmlParse_INCLUDED
#define XmlParse_INCLUDED 1 #define XmlParse_INCLUDED 1
#ifdef __VMS
/* 0 1 2 3 0 1 2 3
1234567890123456789012345678901 1234567890123456789012345678901 */
#define XML_SetProcessingInstructionHandler XML_SetProcessingInstrHandler
#define XML_SetUnparsedEntityDeclHandler XML_SetUnparsedEntDeclHandler
#define XML_SetStartNamespaceDeclHandler XML_SetStartNamespcDeclHandler
#define XML_SetExternalEntityRefHandlerArg XML_SetExternalEntRefHandlerArg
#endif
#include <stdlib.h> #include <stdlib.h>
#ifndef XMLPARSEAPI #ifndef XMLPARSEAPI
# if defined(__declspec) && !defined(__BEOS__) && !defined(__CYGWIN__) #if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__)
#ifdef XML_STATIC
#define XMLPARSEAPI(type) type __cdecl
#else
#define XMLPARSEAPI(type) __declspec(dllimport) type __cdecl #define XMLPARSEAPI(type) __declspec(dllimport) type __cdecl
#endif
#else #else
#define XMLPARSEAPI(type) type #define XMLPARSEAPI(type) type
#endif #endif
@ -20,11 +32,60 @@ See the file COPYING for copying permission.
extern "C" { extern "C" {
#endif #endif
typedef void *XML_Parser; #ifdef XML_UNICODE_WCHAR_T
#define XML_UNICODE
#endif
/* Information is UTF-8 encoded. */ struct XML_ParserStruct;
typedef struct XML_ParserStruct *XML_Parser;
#ifdef XML_UNICODE /* Information is UTF-16 encoded. */
#ifdef XML_UNICODE_WCHAR_T
typedef wchar_t XML_Char;
typedef wchar_t XML_LChar;
#else
typedef unsigned short XML_Char;
typedef char XML_LChar;
#endif /* XML_UNICODE_WCHAR_T */
#else /* Information is UTF-8 encoded. */
typedef char XML_Char; typedef char XML_Char;
typedef char XML_LChar; typedef char XML_LChar;
#endif /* XML_UNICODE */
/* Should this be defined using stdbool.h when C99 is available? */
typedef unsigned char XML_Bool;
#define XML_TRUE ((XML_Bool) 1)
#define XML_FALSE ((XML_Bool) 0)
enum XML_Error {
XML_ERROR_NONE,
XML_ERROR_NO_MEMORY,
XML_ERROR_SYNTAX,
XML_ERROR_NO_ELEMENTS,
XML_ERROR_INVALID_TOKEN,
XML_ERROR_UNCLOSED_TOKEN,
XML_ERROR_PARTIAL_CHAR,
XML_ERROR_TAG_MISMATCH,
XML_ERROR_DUPLICATE_ATTRIBUTE,
XML_ERROR_JUNK_AFTER_DOC_ELEMENT,
XML_ERROR_PARAM_ENTITY_REF,
XML_ERROR_UNDEFINED_ENTITY,
XML_ERROR_RECURSIVE_ENTITY_REF,
XML_ERROR_ASYNC_ENTITY,
XML_ERROR_BAD_CHAR_REF,
XML_ERROR_BINARY_ENTITY_REF,
XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF,
XML_ERROR_MISPLACED_XML_PI,
XML_ERROR_UNKNOWN_ENCODING,
XML_ERROR_INCORRECT_ENCODING,
XML_ERROR_UNCLOSED_CDATA_SECTION,
XML_ERROR_EXTERNAL_ENTITY_HANDLING,
XML_ERROR_NOT_STANDALONE,
XML_ERROR_UNEXPECTED_STATE,
XML_ERROR_ENTITY_DECLARED_IN_PE,
XML_ERROR_FEATURE_REQUIRES_XML_DTD,
XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING
};
enum XML_Content_Type { enum XML_Content_Type {
XML_CTYPE_EMPTY = 1, XML_CTYPE_EMPTY = 1,
@ -75,7 +136,6 @@ struct XML_cp {
description of the model argument. It's the caller's responsibility description of the model argument. It's the caller's responsibility
to free model when finished with it. to free model when finished with it.
*/ */
typedef void (*XML_ElementDeclHandler) (void *userData, typedef void (*XML_ElementDeclHandler) (void *userData,
const XML_Char *name, const XML_Char *name,
XML_Content *model); XML_Content *model);
@ -84,16 +144,14 @@ XMLPARSEAPI(void)
XML_SetElementDeclHandler(XML_Parser parser, XML_SetElementDeclHandler(XML_Parser parser,
XML_ElementDeclHandler eldecl); XML_ElementDeclHandler eldecl);
/* /* The Attlist declaration handler is called for *each* attribute. So
The Attlist declaration handler is called for *each* attribute. So
a single Attlist declaration with multiple attributes declared will a single Attlist declaration with multiple attributes declared will
generate multiple calls to this handler. The "default" parameter generate multiple calls to this handler. The "default" parameter
may be NULL in the case of the "#IMPLIED" or "#REQUIRED" keyword. may be NULL in the case of the "#IMPLIED" or "#REQUIRED"
The "isrequired" parameter will be true and the default value will keyword. The "isrequired" parameter will be true and the default
be NULL in the case of "#REQUIRED". If "isrequired" is true and value will be NULL in the case of "#REQUIRED". If "isrequired" is
default is non-NULL, then this is a "#FIXED" default. true and default is non-NULL, then this is a "#FIXED" default.
*/ */
typedef void (*XML_AttlistDeclHandler) (void *userData, typedef void (*XML_AttlistDeclHandler) (void *userData,
const XML_Char *elname, const XML_Char *elname,
const XML_Char *attname, const XML_Char *attname,
@ -105,15 +163,14 @@ XMLPARSEAPI(void)
XML_SetAttlistDeclHandler(XML_Parser parser, XML_SetAttlistDeclHandler(XML_Parser parser,
XML_AttlistDeclHandler attdecl); XML_AttlistDeclHandler attdecl);
/* The XML declaration handler is called for *both* XML declarations
/* The XML declaration handler is called for *both* XML declarations and and text declarations. The way to distinguish is that the version
text declarations. The way to distinguish is that the version parameter parameter will be NULL for text declarations. The encoding
will be null for text declarations. The encoding parameter may be null parameter may be NULL for XML declarations. The standalone
for XML declarations. The standalone parameter will be -1, 0, or 1 parameter will be -1, 0, or 1 indicating respectively that there
indicating respectively that there was no standalone parameter in was no standalone parameter in the declaration, that it was given
the declaration, that it was given as no, or that it was given as yes. as no, or that it was given as yes.
*/ */
typedef void (*XML_XmlDeclHandler) (void *userData, typedef void (*XML_XmlDeclHandler) (void *userData,
const XML_Char *version, const XML_Char *version,
const XML_Char *encoding, const XML_Char *encoding,
@ -131,26 +188,27 @@ typedef struct {
} XML_Memory_Handling_Suite; } XML_Memory_Handling_Suite;
/* Constructs a new parser; encoding is the encoding specified by the /* Constructs a new parser; encoding is the encoding specified by the
external protocol or null if there is none specified. */ external protocol or NULL if there is none specified.
*/
XMLPARSEAPI(XML_Parser) XMLPARSEAPI(XML_Parser)
XML_ParserCreate(const XML_Char *encoding); XML_ParserCreate(const XML_Char *encoding);
/* Constructs a new parser and namespace processor. Element type /* Constructs a new parser and namespace processor. Element type
names and attribute names that belong to a namespace will be expanded; names and attribute names that belong to a namespace will be
unprefixed attribute names are never expanded; unprefixed element type expanded; unprefixed attribute names are never expanded; unprefixed
names are expanded only if there is a default namespace. The expanded element type names are expanded only if there is a default
name is the concatenation of the namespace URI, the namespace namespace. The expanded name is the concatenation of the namespace
separator character, and the local part of the name. If the namespace URI, the namespace separator character, and the local part of the
separator is '\0' then the namespace URI and the local part will be name. If the namespace separator is '\0' then the namespace URI
concatenated without any separator. When a namespace is not declared, and the local part will be concatenated without any separator.
the name and prefix will be passed through without expansion. */ When a namespace is not declared, the name and prefix will be
passed through without expansion.
*/
XMLPARSEAPI(XML_Parser) XMLPARSEAPI(XML_Parser)
XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
/* Constructs a new parser using the memory management suit referred to /* Constructs a new parser using the memory management suite referred to
by memsuite. If memsuite is NULL, then use the standard library memory by memsuite. If memsuite is NULL, then use the standard library memory
suite. If namespaceSeparator is non-NULL it creates a parser with suite. If namespaceSeparator is non-NULL it creates a parser with
namespace processing as described above. The character pointed at namespace processing as described above. The character pointed at
@ -159,15 +217,26 @@ XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
All further memory operations used for the created parser will come from All further memory operations used for the created parser will come from
the given suite. the given suite.
*/ */
XMLPARSEAPI(XML_Parser) XMLPARSEAPI(XML_Parser)
XML_ParserCreate_MM(const XML_Char *encoding, XML_ParserCreate_MM(const XML_Char *encoding,
const XML_Memory_Handling_Suite *memsuite, const XML_Memory_Handling_Suite *memsuite,
const XML_Char *namespaceSeparator); const XML_Char *namespaceSeparator);
/* atts is array of name/value pairs, terminated by 0; /* Prepare a parser object to be re-used. This is particularly
names and values are 0 terminated. */ valuable when memory allocation overhead is disproportionatly high,
such as when a large number of small documnents need to be parsed.
All handlers are cleared from the parser, except for the
unknownEncodingHandler. The parser's external state is re-initialized
except for the values of ns and ns_triplets.
Added in Expat 1.95.3.
*/
XMLPARSEAPI(XML_Bool)
XML_ParserReset(XML_Parser parser, const XML_Char *encoding);
/* atts is array of name/value pairs, terminated by 0;
names and values are 0 terminated.
*/
typedef void (*XML_StartElementHandler)(void *userData, typedef void (*XML_StartElementHandler)(void *userData,
const XML_Char *name, const XML_Char *name,
const XML_Char **atts); const XML_Char **atts);
@ -192,26 +261,26 @@ typedef void (*XML_CommentHandler)(void *userData, const XML_Char *data);
typedef void (*XML_StartCdataSectionHandler)(void *userData); typedef void (*XML_StartCdataSectionHandler)(void *userData);
typedef void (*XML_EndCdataSectionHandler)(void *userData); typedef void (*XML_EndCdataSectionHandler)(void *userData);
/* This is called for any characters in the XML document for /* This is called for any characters in the XML document for which
which there is no applicable handler. This includes both there is no applicable handler. This includes both characters that
characters that are part of markup which is of a kind that is are part of markup which is of a kind that is not reported
not reported (comments, markup declarations), or characters (comments, markup declarations), or characters that are part of a
that are part of a construct which could be reported but construct which could be reported but for which no handler has been
for which no handler has been supplied. The characters are passed supplied. The characters are passed exactly as they were in the XML
exactly as they were in the XML document except that document except that they will be encoded in UTF-8 or UTF-16.
they will be encoded in UTF-8. Line boundaries are not normalized. Line boundaries are not normalized. Note that a byte order mark
Note that a byte order mark character is not passed to the default handler. character is not passed to the default handler. There are no
There are no guarantees about how characters are divided between calls guarantees about how characters are divided between calls to the
to the default handler: for example, a comment might be split between default handler: for example, a comment might be split between
multiple calls. */ multiple calls.
*/
typedef void (*XML_DefaultHandler)(void *userData, typedef void (*XML_DefaultHandler)(void *userData,
const XML_Char *s, const XML_Char *s,
int len); int len);
/* This is called for the start of the DOCTYPE declaration, before /* This is called for the start of the DOCTYPE declaration, before
any DTD or internal subset is parsed. */ any DTD or internal subset is parsed.
*/
typedef void (*XML_StartDoctypeDeclHandler)(void *userData, typedef void (*XML_StartDoctypeDeclHandler)(void *userData,
const XML_Char *doctypeName, const XML_Char *doctypeName,
const XML_Char *sysid, const XML_Char *sysid,
@ -219,7 +288,9 @@ typedef void (*XML_StartDoctypeDeclHandler)(void *userData,
int has_internal_subset); int has_internal_subset);
/* This is called for the start of the DOCTYPE declaration when the /* This is called for the start of the DOCTYPE declaration when the
closing > is encountered, but after processing any external subset. */ closing > is encountered, but after processing any external
subset.
*/
typedef void (*XML_EndDoctypeDeclHandler)(void *userData); typedef void (*XML_EndDoctypeDeclHandler)(void *userData);
/* This is called for entity declarations. The is_parameter_entity /* This is called for entity declarations. The is_parameter_entity
@ -227,17 +298,19 @@ typedef void (*XML_EndDoctypeDeclHandler)(void *userData);
otherwise. otherwise.
For internal entities (<!ENTITY foo "bar">), value will For internal entities (<!ENTITY foo "bar">), value will
be non-null and systemId, publicID, and notationName will be null. be non-NULL and systemId, publicID, and notationName will be NULL.
The value string is NOT null terminated; the length is provided in The value string is NOT nul-terminated; the length is provided in
the value_length argument. Since it is legal to have zero-length the value_length argument. Since it is legal to have zero-length
values, do not use this argument to test for internal entities. values, do not use this argument to test for internal entities.
For external entities, value will be null and systemId will be non-null. For external entities, value will be NULL and systemId will be
The publicId argument will be null unless a public identifier was non-NULL. The publicId argument will be NULL unless a public
provided. The notationName argument will have a non-null value only identifier was provided. The notationName argument will have a
for unparsed entity declarations. non-NULL value only for unparsed entity declarations.
*/
Note that is_parameter_entity can't be changed to XML_Bool, since
that would break binary compatibility.
*/
typedef void (*XML_EntityDeclHandler) (void *userData, typedef void (*XML_EntityDeclHandler) (void *userData,
const XML_Char *entityName, const XML_Char *entityName,
int is_parameter_entity, int is_parameter_entity,
@ -255,11 +328,12 @@ XML_SetEntityDeclHandler(XML_Parser parser,
/* OBSOLETE -- OBSOLETE -- OBSOLETE /* OBSOLETE -- OBSOLETE -- OBSOLETE
This handler has been superceded by the EntityDeclHandler above. This handler has been superceded by the EntityDeclHandler above.
It is provided here for backward compatibility. It is provided here for backward compatibility.
This is called for a declaration of an unparsed (NDATA)
entity. The base argument is whatever was set by XML_SetBase.
The entityName, systemId and notationName arguments will never be null.
The other arguments may be. */
This is called for a declaration of an unparsed (NDATA) entity.
The base argument is whatever was set by XML_SetBase. The
entityName, systemId and notationName arguments will never be
NULL. The other arguments may be.
*/
typedef void (*XML_UnparsedEntityDeclHandler)(void *userData, typedef void (*XML_UnparsedEntityDeclHandler)(void *userData,
const XML_Char *entityName, const XML_Char *entityName,
const XML_Char *base, const XML_Char *base,
@ -267,10 +341,10 @@ typedef void (*XML_UnparsedEntityDeclHandler)(void *userData,
const XML_Char *publicId, const XML_Char *publicId,
const XML_Char *notationName); const XML_Char *notationName);
/* This is called for a declaration of notation. /* This is called for a declaration of notation. The base argument is
The base argument is whatever was set by XML_SetBase. whatever was set by XML_SetBase. The notationName will never be
The notationName will never be null. The other arguments can be. */ NULL. The other arguments can be.
*/
typedef void (*XML_NotationDeclHandler)(void *userData, typedef void (*XML_NotationDeclHandler)(void *userData,
const XML_Char *notationName, const XML_Char *notationName,
const XML_Char *base, const XML_Char *base,
@ -280,9 +354,9 @@ typedef void (*XML_NotationDeclHandler)(void *userData,
/* When namespace processing is enabled, these are called once for /* When namespace processing is enabled, these are called once for
each namespace declaration. The call to the start and end element each namespace declaration. The call to the start and end element
handlers occur between the calls to the start and end namespace handlers occur between the calls to the start and end namespace
declaration handlers. For an xmlns attribute, prefix will be null. declaration handlers. For an xmlns attribute, prefix will be
For an xmlns="" attribute, uri will be null. */ NULL. For an xmlns="" attribute, uri will be NULL.
*/
typedef void (*XML_StartNamespaceDeclHandler)(void *userData, typedef void (*XML_StartNamespaceDeclHandler)(void *userData,
const XML_Char *prefix, const XML_Char *prefix,
const XML_Char *uri); const XML_Char *uri);
@ -290,65 +364,101 @@ typedef void (*XML_StartNamespaceDeclHandler)(void *userData,
typedef void (*XML_EndNamespaceDeclHandler)(void *userData, typedef void (*XML_EndNamespaceDeclHandler)(void *userData,
const XML_Char *prefix); const XML_Char *prefix);
/* This is called if the document is not standalone (it has an /* This is called if the document is not standalone, that is, it has an
external subset or a reference to a parameter entity, but does not external subset or a reference to a parameter entity, but does not
have standalone="yes"). If this handler returns 0, then processing have standalone="yes". If this handler returns XML_STATUS_ERROR,
will not continue, and the parser will return a then processing will not continue, and the parser will return a
XML_ERROR_NOT_STANDALONE error. */ XML_ERROR_NOT_STANDALONE error.
If parameter entity parsing is enabled, then in addition to the
conditions above this handler will only be called if the referenced
entity was actually read.
*/
typedef int (*XML_NotStandaloneHandler)(void *userData); typedef int (*XML_NotStandaloneHandler)(void *userData);
/* This is called for a reference to an external parsed general entity. /* This is called for a reference to an external parsed general
The referenced entity is not automatically parsed. entity. The referenced entity is not automatically parsed. The
The application can parse it immediately or later using application can parse it immediately or later using
XML_ExternalEntityParserCreate. XML_ExternalEntityParserCreate.
The parser argument is the parser parsing the entity containing the reference;
it can be passed as the parser argument to XML_ExternalEntityParserCreate.
The systemId argument is the system identifier as specified in the entity
declaration; it will not be null.
The base argument is the system identifier that should be used as the base for
resolving systemId if systemId was relative; this is set by XML_SetBase;
it may be null.
The publicId argument is the public identifier as specified in the entity
declaration, or null if none was specified; the whitespace in the public
identifier will have been normalized as required by the XML spec.
The context argument specifies the parsing context in the format
expected by the context argument to
XML_ExternalEntityParserCreate; context is valid only until the handler
returns, so if the referenced entity is to be parsed later, it must be copied.
The handler should return 0 if processing should not continue because of
a fatal error in the handling of the external entity.
In this case the calling parser will return an
XML_ERROR_EXTERNAL_ENTITY_HANDLING error.
Note that unlike other handlers the first argument is the parser, not
userData. */
The parser argument is the parser parsing the entity containing the
reference; it can be passed as the parser argument to
XML_ExternalEntityParserCreate. The systemId argument is the
system identifier as specified in the entity declaration; it will
not be NULL.
The base argument is the system identifier that should be used as
the base for resolving systemId if systemId was relative; this is
set by XML_SetBase; it may be NULL.
The publicId argument is the public identifier as specified in the
entity declaration, or NULL if none was specified; the whitespace
in the public identifier will have been normalized as required by
the XML spec.
The context argument specifies the parsing context in the format
expected by the context argument to XML_ExternalEntityParserCreate;
context is valid only until the handler returns, so if the
referenced entity is to be parsed later, it must be copied.
context is NULL only when the entity is a parameter entity.
The handler should return XML_STATUS_ERROR if processing should not
continue because of a fatal error in the handling of the external
entity. In this case the calling parser will return an
XML_ERROR_EXTERNAL_ENTITY_HANDLING error.
Note that unlike other handlers the first argument is the parser,
not userData.
*/
typedef int (*XML_ExternalEntityRefHandler)(XML_Parser parser, typedef int (*XML_ExternalEntityRefHandler)(XML_Parser parser,
const XML_Char *context, const XML_Char *context,
const XML_Char *base, const XML_Char *base,
const XML_Char *systemId, const XML_Char *systemId,
const XML_Char *publicId); const XML_Char *publicId);
/* This structure is filled in by the XML_UnknownEncodingHandler /* This is called in two situations:
to provide information to the parser about encodings that are unknown 1) An entity reference is encountered for which no declaration
has been read *and* this is not an error.
2) An internal entity reference is read, but not expanded, because
XML_SetDefaultHandler has been called.
Note: skipped parameter entities in declarations and skipped general
entities in attribute values cannot be reported, because
the event would be out of sync with the reporting of the
declarations or attribute values
*/
typedef void (*XML_SkippedEntityHandler)(void *userData,
const XML_Char *entityName,
int is_parameter_entity);
/* This structure is filled in by the XML_UnknownEncodingHandler to
provide information to the parser about encodings that are unknown
to the parser. to the parser.
The map[b] member gives information about byte sequences
whose first byte is b. The map[b] member gives information about byte sequences whose
If map[b] is c where c is >= 0, then b by itself encodes the Unicode scalar first byte is b.
value c.
If map[b] is c where c is >= 0, then b by itself encodes the
Unicode scalar value c.
If map[b] is -1, then the byte sequence is malformed. If map[b] is -1, then the byte sequence is malformed.
If map[b] is -n, where n >= 2, then b is the first byte of an n-byte
sequence that encodes a single Unicode scalar value. If map[b] is -n, where n >= 2, then b is the first byte of an
The data member will be passed as the first argument to the convert function. n-byte sequence that encodes a single Unicode scalar value.
The convert function is used to convert multibyte sequences;
s will point to a n-byte sequence where map[(unsigned char)*s] == -n. The data member will be passed as the first argument to the convert
The convert function must return the Unicode scalar value function.
represented by this byte sequence or -1 if the byte sequence is malformed.
The convert function may be null if the encoding is a single-byte encoding, The convert function is used to convert multibyte sequences; s will
that is if map[b] >= -1 for all bytes b. point to a n-byte sequence where map[(unsigned char)*s] == -n. The
When the parser is finished with the encoding, then if release is not null, convert function must return the Unicode scalar value represented
it will call release passing it the data member; by this byte sequence or -1 if the byte sequence is malformed.
once release has been called, the convert function will not be called again.
The convert function may be NULL if the encoding is a single-byte
encoding, that is if map[b] >= -1 for all bytes b.
When the parser is finished with the encoding, then if release is
not NULL, it will call release passing it the data member; once
release has been called, the convert function will not be called
again.
Expat places certain restrictions on the encodings that are supported Expat places certain restrictions on the encodings that are supported
using this mechanism. using this mechanism.
@ -363,14 +473,14 @@ same byte that represents that character in ASCII.
2. No character may require more than 4 bytes to encode. 2. No character may require more than 4 bytes to encode.
3. All characters encoded must have Unicode scalar values <= 0xFFFF, (i.e., 3. All characters encoded must have Unicode scalar values <=
characters that would be encoded by surrogates in UTF-16 are not 0xFFFF, (i.e., characters that would be encoded by surrogates in
allowed). Note that this restriction doesn't apply to the built-in UTF-16 are not allowed). Note that this restriction doesn't
support for UTF-8 and UTF-16. apply to the built-in support for UTF-8 and UTF-16.
4. No Unicode character may be encoded by more than one distinct sequence
of bytes. */
4. No Unicode character may be encoded by more than one distinct
sequence of bytes.
*/
typedef struct { typedef struct {
int map[256]; int map[256];
void *data; void *data;
@ -379,16 +489,20 @@ typedef struct {
} XML_Encoding; } XML_Encoding;
/* This is called for an encoding that is unknown to the parser. /* This is called for an encoding that is unknown to the parser.
The encodingHandlerData argument is that which was passed as the The encodingHandlerData argument is that which was passed as the
second argument to XML_SetUnknownEncodingHandler. second argument to XML_SetUnknownEncodingHandler.
The name argument gives the name of the encoding as specified in The name argument gives the name of the encoding as specified in
the encoding declaration. the encoding declaration.
If the callback can provide information about the encoding,
it must fill in the XML_Encoding structure, and return 1.
Otherwise it must return 0.
If info does not describe a suitable encoding,
then the parser will return an XML_UNKNOWN_ENCODING error. */
If the callback can provide information about the encoding, it must
fill in the XML_Encoding structure, and return XML_STATUS_OK.
Otherwise it must return XML_STATUS_ERROR.
If info does not describe a suitable encoding, then the parser will
return an XML_UNKNOWN_ENCODING error.
*/
typedef int (*XML_UnknownEncodingHandler)(void *encodingHandlerData, typedef int (*XML_UnknownEncodingHandler)(void *encodingHandlerData,
const XML_Char *name, const XML_Char *name,
XML_Encoding *info); XML_Encoding *info);
@ -429,17 +543,17 @@ XML_SetEndCdataSectionHandler(XML_Parser parser,
XML_EndCdataSectionHandler end); XML_EndCdataSectionHandler end);
/* This sets the default handler and also inhibits expansion of /* This sets the default handler and also inhibits expansion of
internal entities. The entity reference will be passed to the default internal entities. These entity references will be passed to the
handler. */ default handler, or to the skipped entity handler, if one is set.
*/
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_SetDefaultHandler(XML_Parser parser, XML_SetDefaultHandler(XML_Parser parser,
XML_DefaultHandler handler); XML_DefaultHandler handler);
/* This sets the default handler but does not inhibit expansion of /* This sets the default handler but does not inhibit expansion of
internal entities. The entity reference will not be passed to the internal entities. The entity reference will not be passed to the
default handler. */ default handler.
*/
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_SetDefaultHandlerExpand(XML_Parser parser, XML_SetDefaultHandlerExpand(XML_Parser parser,
XML_DefaultHandler handler); XML_DefaultHandler handler);
@ -486,32 +600,41 @@ XMLPARSEAPI(void)
XML_SetExternalEntityRefHandler(XML_Parser parser, XML_SetExternalEntityRefHandler(XML_Parser parser,
XML_ExternalEntityRefHandler handler); XML_ExternalEntityRefHandler handler);
/* If a non-null value for arg is specified here, then it will be passed /* If a non-NULL value for arg is specified here, then it will be
as the first argument to the external entity ref handler instead passed as the first argument to the external entity ref handler
of the parser object. */ instead of the parser object.
*/
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_SetExternalEntityRefHandlerArg(XML_Parser, void *arg); XML_SetExternalEntityRefHandlerArg(XML_Parser, void *arg);
XMLPARSEAPI(void)
XML_SetSkippedEntityHandler(XML_Parser parser,
XML_SkippedEntityHandler handler);
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_SetUnknownEncodingHandler(XML_Parser parser, XML_SetUnknownEncodingHandler(XML_Parser parser,
XML_UnknownEncodingHandler handler, XML_UnknownEncodingHandler handler,
void *encodingHandlerData); void *encodingHandlerData);
/* This can be called within a handler for a start element, end element, /* This can be called within a handler for a start element, end
processing instruction or character data. It causes the corresponding element, processing instruction or character data. It causes the
markup to be passed to the default handler. */ corresponding markup to be passed to the default handler.
*/
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_DefaultCurrent(XML_Parser parser); XML_DefaultCurrent(XML_Parser parser);
/* If do_nst is non-zero, and namespace processing is in effect, and /* If do_nst is non-zero, and namespace processing is in effect, and
a name has a prefix (i.e. an explicit namespace qualifier) then a name has a prefix (i.e. an explicit namespace qualifier) then
that name is returned as a triplet in a single that name is returned as a triplet in a single string separated by
string separated by the separator character specified when the parser the separator character specified when the parser was created: URI
was created: URI + sep + local_name + sep + prefix. + sep + local_name + sep + prefix.
If do_nst is zero, then namespace information is returned in the If do_nst is zero, then namespace information is returned in the
default manner (URI + sep + local_name) whether or not the names default manner (URI + sep + local_name) whether or not the name
has a prefix. has a prefix.
Note: Calling XML_SetReturnNSTriplet after XML_Parse or
XML_ParseBuffer has no effect.
*/ */
XMLPARSEAPI(void) XMLPARSEAPI(void)
@ -521,31 +644,53 @@ XML_SetReturnNSTriplet(XML_Parser parser, int do_nst);
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_SetUserData(XML_Parser parser, void *userData); XML_SetUserData(XML_Parser parser, void *userData);
/* Returns the last value set by XML_SetUserData or null. */ /* Returns the last value set by XML_SetUserData or NULL. */
#define XML_GetUserData(parser) (*(void **)(parser)) #define XML_GetUserData(parser) (*(void **)(parser))
/* This is equivalent to supplying an encoding argument /* This is equivalent to supplying an encoding argument to
to XML_ParserCreate. It must not be called after XML_Parse XML_ParserCreate. On success XML_SetEncoding returns non-zero,
or XML_ParseBuffer. */ zero otherwise.
Note: Calling XML_SetEncoding after XML_Parse or XML_ParseBuffer
XMLPARSEAPI(int) has no effect and returns XML_STATUS_ERROR.
*/
XMLPARSEAPI(enum XML_Status)
XML_SetEncoding(XML_Parser parser, const XML_Char *encoding); XML_SetEncoding(XML_Parser parser, const XML_Char *encoding);
/* If this function is called, then the parser will be passed /* If this function is called, then the parser will be passed as the
as the first argument to callbacks instead of userData. first argument to callbacks instead of userData. The userData will
The userData will still be accessible using XML_GetUserData. */ still be accessible using XML_GetUserData.
*/
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_UseParserAsHandlerArg(XML_Parser parser); XML_UseParserAsHandlerArg(XML_Parser parser);
/* Sets the base to be used for resolving relative URIs in system /* If useDTD == XML_TRUE is passed to this function, then the parser
identifiers in declarations. Resolving relative identifiers is left will assume that there is an external subset, even if none is
to the application: this value will be passed through as the base specified in the document. In such a case the parser will call the
argument to the XML_ExternalEntityRefHandler, XML_NotationDeclHandler externalEntityRefHandler with a value of NULL for the systemId
and XML_UnparsedEntityDeclHandler. The base argument will be copied. argument (the publicId and context arguments will be NULL as well).
Returns zero if out of memory, non-zero otherwise. */ Note: If this function is called, then this must be done before
the first call to XML_Parse or XML_ParseBuffer, since it will
have no effect after that. Returns
XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING.
Note: If the document does not have a DOCTYPE declaration at all,
then startDoctypeDeclHandler and endDoctypeDeclHandler will not
be called, despite an external subset being parsed.
Note: If XML_DTD is not defined when Expat is compiled, returns
XML_ERROR_FEATURE_REQUIRES_XML_DTD.
*/
XMLPARSEAPI(enum XML_Error)
XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD);
XMLPARSEAPI(int)
/* Sets the base to be used for resolving relative URIs in system
identifiers in declarations. Resolving relative identifiers is
left to the application: this value will be passed through as the
base argument to the XML_ExternalEntityRefHandler,
XML_NotationDeclHandler and XML_UnparsedEntityDeclHandler. The base
argument will be copied. Returns XML_STATUS_ERROR if out of memory,
XML_STATUS_OK otherwise.
*/
XMLPARSEAPI(enum XML_Status)
XML_SetBase(XML_Parser parser, const XML_Char *base); XML_SetBase(XML_Parser parser, const XML_Char *base);
XMLPARSEAPI(const XML_Char *) XMLPARSEAPI(const XML_Char *)
@ -555,46 +700,70 @@ XML_GetBase(XML_Parser parser);
to the XML_StartElementHandler that were specified in the start-tag to the XML_StartElementHandler that were specified in the start-tag
rather than defaulted. Each attribute/value pair counts as 2; thus rather than defaulted. Each attribute/value pair counts as 2; thus
this correspondds to an index into the atts array passed to the this correspondds to an index into the atts array passed to the
XML_StartElementHandler. */ XML_StartElementHandler.
*/
XMLPARSEAPI(int) XMLPARSEAPI(int)
XML_GetSpecifiedAttributeCount(XML_Parser parser); XML_GetSpecifiedAttributeCount(XML_Parser parser);
/* Returns the index of the ID attribute passed in the last call to /* Returns the index of the ID attribute passed in the last call to
XML_StartElementHandler, or -1 if there is no ID attribute. Each XML_StartElementHandler, or -1 if there is no ID attribute. Each
attribute/value pair counts as 2; thus this correspondds to an index attribute/value pair counts as 2; thus this correspondds to an
into the atts array passed to the XML_StartElementHandler. */ index into the atts array passed to the XML_StartElementHandler.
*/
XMLPARSEAPI(int) XMLPARSEAPI(int)
XML_GetIdAttributeIndex(XML_Parser parser); XML_GetIdAttributeIndex(XML_Parser parser);
/* Parses some input. Returns 0 if a fatal error is detected. /* Parses some input. Returns XML_STATUS_ERROR if a fatal error is
The last call to XML_Parse must have isFinal true; detected. The last call to XML_Parse must have isFinal true; len
len may be zero for this call (or any other). */ may be zero for this call (or any other).
XMLPARSEAPI(int)
The XML_Status enum gives the possible return values for the
XML_Parse and XML_ParseBuffer functions. Though the return values
for these functions has always been described as a Boolean value,
the implementation, at least for the 1.95.x series, has always
returned exactly one of these values. The preprocessor #defines
are included so this stanza can be added to code that still needs
to support older versions of Expat 1.95.x:
#ifndef XML_STATUS_OK
#define XML_STATUS_OK 1
#define XML_STATUS_ERROR 0
#endif
Otherwise, the #define hackery is quite ugly and would have been dropped.
*/
enum XML_Status {
XML_STATUS_ERROR = 0,
#define XML_STATUS_ERROR XML_STATUS_ERROR
XML_STATUS_OK = 1
#define XML_STATUS_OK XML_STATUS_OK
};
XMLPARSEAPI(enum XML_Status)
XML_Parse(XML_Parser parser, const char *s, int len, int isFinal); XML_Parse(XML_Parser parser, const char *s, int len, int isFinal);
XMLPARSEAPI(void *) XMLPARSEAPI(void *)
XML_GetBuffer(XML_Parser parser, int len); XML_GetBuffer(XML_Parser parser, int len);
XMLPARSEAPI(int) XMLPARSEAPI(enum XML_Status)
XML_ParseBuffer(XML_Parser parser, int len, int isFinal); XML_ParseBuffer(XML_Parser parser, int len, int isFinal);
/* Creates an XML_Parser object that can parse an external general /* Creates an XML_Parser object that can parse an external general
entity; context is a '\0'-terminated string specifying the parse entity; context is a '\0'-terminated string specifying the parse
context; encoding is a '\0'-terminated string giving the name of the context; encoding is a '\0'-terminated string giving the name of
externally specified encoding, or null if there is no externally the externally specified encoding, or NULL if there is no
specified encoding. The context string consists of a sequence of externally specified encoding. The context string consists of a
tokens separated by formfeeds (\f); a token consisting of a name sequence of tokens separated by formfeeds (\f); a token consisting
specifies that the general entity of the name is open; a token of the of a name specifies that the general entity of the name is open; a
form prefix=uri specifies the namespace for a particular prefix; a token of the form prefix=uri specifies the namespace for a
token of the form =uri specifies the default namespace. This can be particular prefix; a token of the form =uri specifies the default
called at any point after the first call to an namespace. This can be called at any point after the first call to
ExternalEntityRefHandler so longer as the parser has not yet been an ExternalEntityRefHandler so longer as the parser has not yet
freed. The new parser is completely independent and may safely be been freed. The new parser is completely independent and may
used in a separate thread. The handlers and userData are initialized safely be used in a separate thread. The handlers and userData are
from the parser argument. Returns 0 if out of memory. Otherwise initialized from the parser argument. Returns NULL if out of memory.
returns a new XML_Parser object. */ Otherwise returns a new XML_Parser object.
*/
XMLPARSEAPI(XML_Parser) XMLPARSEAPI(XML_Parser)
XML_ExternalEntityParserCreate(XML_Parser parser, XML_ExternalEntityParserCreate(XML_Parser parser,
const XML_Char *context, const XML_Char *context,
@ -607,76 +776,56 @@ enum XML_ParamEntityParsing {
}; };
/* Controls parsing of parameter entities (including the external DTD /* Controls parsing of parameter entities (including the external DTD
subset). If parsing of parameter entities is enabled, then references subset). If parsing of parameter entities is enabled, then
to external parameter entities (including the external DTD subset) references to external parameter entities (including the external
will be passed to the handler set with DTD subset) will be passed to the handler set with
XML_SetExternalEntityRefHandler. The context passed will be 0. XML_SetExternalEntityRefHandler. The context passed will be 0.
Unlike external general entities, external parameter entities can only
be parsed synchronously. If the external parameter entity is to be
parsed, it must be parsed during the call to the external entity ref
handler: the complete sequence of XML_ExternalEntityParserCreate,
XML_Parse/XML_ParseBuffer and XML_ParserFree calls must be made during
this call. After XML_ExternalEntityParserCreate has been called to
create the parser for the external parameter entity (context must be 0
for this call), it is illegal to make any calls on the old parser
until XML_ParserFree has been called on the newly created parser. If
the library has been compiled without support for parameter entity
parsing (ie without XML_DTD being defined), then
XML_SetParamEntityParsing will return 0 if parsing of parameter
entities is requested; otherwise it will return non-zero. */
Unlike external general entities, external parameter entities can
only be parsed synchronously. If the external parameter entity is
to be parsed, it must be parsed during the call to the external
entity ref handler: the complete sequence of
XML_ExternalEntityParserCreate, XML_Parse/XML_ParseBuffer and
XML_ParserFree calls must be made during this call. After
XML_ExternalEntityParserCreate has been called to create the parser
for the external parameter entity (context must be 0 for this
call), it is illegal to make any calls on the old parser until
XML_ParserFree has been called on the newly created parser.
If the library has been compiled without support for parameter
entity parsing (ie without XML_DTD being defined), then
XML_SetParamEntityParsing will return 0 if parsing of parameter
entities is requested; otherwise it will return non-zero.
Note: If XML_SetParamEntityParsing is called after XML_Parse or
XML_ParseBuffer, then it has no effect and will always return 0.
*/
XMLPARSEAPI(int) XMLPARSEAPI(int)
XML_SetParamEntityParsing(XML_Parser parser, XML_SetParamEntityParsing(XML_Parser parser,
enum XML_ParamEntityParsing parsing); enum XML_ParamEntityParsing parsing);
enum XML_Error { /* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then
XML_ERROR_NONE, XML_GetErrorCode returns information about the error.
XML_ERROR_NO_MEMORY, */
XML_ERROR_SYNTAX,
XML_ERROR_NO_ELEMENTS,
XML_ERROR_INVALID_TOKEN,
XML_ERROR_UNCLOSED_TOKEN,
XML_ERROR_PARTIAL_CHAR,
XML_ERROR_TAG_MISMATCH,
XML_ERROR_DUPLICATE_ATTRIBUTE,
XML_ERROR_JUNK_AFTER_DOC_ELEMENT,
XML_ERROR_PARAM_ENTITY_REF,
XML_ERROR_UNDEFINED_ENTITY,
XML_ERROR_RECURSIVE_ENTITY_REF,
XML_ERROR_ASYNC_ENTITY,
XML_ERROR_BAD_CHAR_REF,
XML_ERROR_BINARY_ENTITY_REF,
XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF,
XML_ERROR_MISPLACED_XML_PI,
XML_ERROR_UNKNOWN_ENCODING,
XML_ERROR_INCORRECT_ENCODING,
XML_ERROR_UNCLOSED_CDATA_SECTION,
XML_ERROR_EXTERNAL_ENTITY_HANDLING,
XML_ERROR_NOT_STANDALONE,
XML_ERROR_UNEXPECTED_STATE
};
/* If XML_Parse or XML_ParseBuffer have returned 0, then XML_GetErrorCode
returns information about the error. */
XMLPARSEAPI(enum XML_Error) XMLPARSEAPI(enum XML_Error)
XML_GetErrorCode(XML_Parser parser); XML_GetErrorCode(XML_Parser parser);
/* These functions return information about the current parse location. /* These functions return information about the current parse
They may be called when XML_Parse or XML_ParseBuffer return 0; location. They may be called from any callback called to report
in this case the location is the location of the character at which some parse event; in this case the location is the location of
the error was detected. the first of the sequence of characters that generated the event.
They may also be called from any other callback called to report
some parse event; in this the location is the location of the first
of the sequence of characters that generated the event. */
They may also be called after returning from a call to XML_Parse
or XML_ParseBuffer. If the return value is XML_STATUS_ERROR then
the location is the location of the character at which the error
was detected; otherwise the location is the location of the last
parse event, as described above.
*/
XMLPARSEAPI(int) XML_GetCurrentLineNumber(XML_Parser parser); XMLPARSEAPI(int) XML_GetCurrentLineNumber(XML_Parser parser);
XMLPARSEAPI(int) XML_GetCurrentColumnNumber(XML_Parser parser); XMLPARSEAPI(int) XML_GetCurrentColumnNumber(XML_Parser parser);
XMLPARSEAPI(long) XML_GetCurrentByteIndex(XML_Parser parser); XMLPARSEAPI(long) XML_GetCurrentByteIndex(XML_Parser parser);
/* Return the number of bytes in the current event. /* Return the number of bytes in the current event.
Returns 0 if the event is in an internal entity. */ Returns 0 if the event is in an internal entity.
*/
XMLPARSEAPI(int) XMLPARSEAPI(int)
XML_GetCurrentByteCount(XML_Parser parser); XML_GetCurrentByteCount(XML_Parser parser);
@ -684,12 +833,12 @@ XML_GetCurrentByteCount(XML_Parser parser);
the integer pointed to by offset to the offset within this buffer the integer pointed to by offset to the offset within this buffer
of the current parse position, and sets the integer pointed to by size of the current parse position, and sets the integer pointed to by size
to the size of this buffer (the number of input bytes). Otherwise to the size of this buffer (the number of input bytes). Otherwise
returns a null pointer. Also returns a null pointer if a parse isn't returns a NULL pointer. Also returns a NULL pointer if a parse isn't
active. active.
NOTE: The character pointer returned should not be used outside NOTE: The character pointer returned should not be used outside
the handler that makes the call. */ the handler that makes the call.
*/
XMLPARSEAPI(const char *) XMLPARSEAPI(const char *)
XML_GetInputContext(XML_Parser parser, XML_GetInputContext(XML_Parser parser,
int *offset, int *offset,
@ -700,13 +849,27 @@ XML_GetInputContext(XML_Parser parser,
#define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber #define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber
#define XML_GetErrorByteIndex XML_GetCurrentByteIndex #define XML_GetErrorByteIndex XML_GetCurrentByteIndex
/* Frees the content model passed to the element declaration handler */
XMLPARSEAPI(void)
XML_FreeContentModel(XML_Parser parser, XML_Content *model);
/* Exposing the memory handling functions used in Expat */
XMLPARSEAPI(void *)
XML_MemMalloc(XML_Parser parser, size_t size);
XMLPARSEAPI(void *)
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
XMLPARSEAPI(void)
XML_MemFree(XML_Parser parser, void *ptr);
/* Frees memory used by the parser. */ /* Frees memory used by the parser. */
XMLPARSEAPI(void) XMLPARSEAPI(void)
XML_ParserFree(XML_Parser parser); XML_ParserFree(XML_Parser parser);
/* Returns a string describing the error. */ /* Returns a string describing the error. */
XMLPARSEAPI(const XML_LChar *) XMLPARSEAPI(const XML_LChar *)
XML_ErrorString(int code); XML_ErrorString(enum XML_Error code);
/* Return a string containing the version number of this expat */ /* Return a string containing the version number of this expat */
XMLPARSEAPI(const XML_LChar *) XMLPARSEAPI(const XML_LChar *)
@ -719,18 +882,42 @@ typedef struct {
} XML_Expat_Version; } XML_Expat_Version;
/* Return an XML_Expat_Version structure containing numeric version /* Return an XML_Expat_Version structure containing numeric version
number information for this version of expat */ number information for this version of expat.
*/
XMLPARSEAPI(XML_Expat_Version) XMLPARSEAPI(XML_Expat_Version)
XML_ExpatVersionInfo(void); XML_ExpatVersionInfo(void);
/* VERSION is not defined in expat.h.in, but it really belongs here, /* Added in Expat 1.95.5. */
and defining it on the command line gives difficulties with MSVC. */ enum XML_FeatureEnum {
#define VERSION "1.95.2" XML_FEATURE_END = 0,
XML_FEATURE_UNICODE,
XML_FEATURE_UNICODE_WCHAR_T,
XML_FEATURE_DTD,
XML_FEATURE_CONTEXT_BYTES,
XML_FEATURE_MIN_SIZE,
XML_FEATURE_SIZEOF_XML_CHAR,
XML_FEATURE_SIZEOF_XML_LCHAR
/* Additional features must be added to the end of this enum. */
};
typedef struct {
enum XML_FeatureEnum feature;
const XML_LChar *name;
long int value;
} XML_Feature;
XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
/* Expat follows the GNU/Linux convention of odd number minor version for
beta/development releases and even number minor version for stable
releases. Micro is bumped with each release, and set to 0 with each
change to major or minor version.
*/
#define XML_MAJOR_VERSION 1 #define XML_MAJOR_VERSION 1
#define XML_MINOR_VERSION 95 #define XML_MINOR_VERSION 95
#define XML_MICRO_VERSION 2 #define XML_MICRO_VERSION 6
#ifdef __cplusplus #ifdef __cplusplus
} }

73
Modules/expat/internal.h Normal file
View file

@ -0,0 +1,73 @@
/* internal.h
Internal definitions used by Expat. This is not needed to compile
client code.
The following calling convention macros are defined for frequently
called functions:
FASTCALL - Used for those internal functions that have a simple
body and a low number of arguments and local variables.
PTRCALL - Used for functions called though function pointers.
PTRFASTCALL - Like PTRCALL, but for low number of arguments.
inline - Used for selected internal functions for which inlining
may improve performance on some platforms.
Note: Use of these macros is based on judgement, not hard rules,
and therefore subject to change.
*/
#if defined(__GNUC__)
/* Instability reported with egcs on a RedHat Linux 7.3.
Let's comment it out:
#define FASTCALL __attribute__((stdcall, regparm(3)))
and let's try this:
*/
#define FASTCALL __attribute__((regparm(3)))
#define PTRCALL
#define PTRFASTCALL __attribute__((regparm(3)))
#elif defined(WIN32)
/* Using __fastcall seems to have an unexpected negative effect under
MS VC++, especially for function pointers, so we won't use it for
now on that platform. It may be reconsidered for a future release
if it can be made more effective.
Likely reason: __fastcall on Windows is like stdcall, therefore
the compiler cannot perform stack optimizations for call clusters.
*/
#define FASTCALL
#define PTRCALL
#define PTRFASTCALL
#endif
#ifndef FASTCALL
#define FASTCALL
#endif
#ifndef PTRCALL
#define PTRCALL
#endif
#ifndef PTRFASTCALL
#define PTRFASTCALL
#endif
#ifndef XML_MIN_SIZE
#if !defined(__cplusplus) && !defined(inline)
#ifdef __GNUC__
#define inline __inline
#endif /* __GNUC__ */
#endif
#endif /* XML_MIN_SIZE */
#ifdef __cplusplus
#define inline inline
#else
#ifndef inline
#define inline
#endif
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,11 +1,16 @@
/* /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission. See the file COPYING for copying permission.
*/ */
#ifndef XmlRole_INCLUDED #ifndef XmlRole_INCLUDED
#define XmlRole_INCLUDED 1 #define XmlRole_INCLUDED 1
#ifdef __VMS
/* 0 1 2 3 0 1 2 3
1234567890123456789012345678901 1234567890123456789012345678901 */
#define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt
#endif
#include "xmltok.h" #include "xmltok.h"
#ifdef __cplusplus #ifdef __cplusplus
@ -17,6 +22,7 @@ enum {
XML_ROLE_NONE = 0, XML_ROLE_NONE = 0,
XML_ROLE_XML_DECL, XML_ROLE_XML_DECL,
XML_ROLE_INSTANCE_START, XML_ROLE_INSTANCE_START,
XML_ROLE_DOCTYPE_NONE,
XML_ROLE_DOCTYPE_NAME, XML_ROLE_DOCTYPE_NAME,
XML_ROLE_DOCTYPE_SYSTEM_ID, XML_ROLE_DOCTYPE_SYSTEM_ID,
XML_ROLE_DOCTYPE_PUBLIC_ID, XML_ROLE_DOCTYPE_PUBLIC_ID,
@ -24,11 +30,13 @@ enum {
XML_ROLE_DOCTYPE_CLOSE, XML_ROLE_DOCTYPE_CLOSE,
XML_ROLE_GENERAL_ENTITY_NAME, XML_ROLE_GENERAL_ENTITY_NAME,
XML_ROLE_PARAM_ENTITY_NAME, XML_ROLE_PARAM_ENTITY_NAME,
XML_ROLE_ENTITY_NONE,
XML_ROLE_ENTITY_VALUE, XML_ROLE_ENTITY_VALUE,
XML_ROLE_ENTITY_SYSTEM_ID, XML_ROLE_ENTITY_SYSTEM_ID,
XML_ROLE_ENTITY_PUBLIC_ID, XML_ROLE_ENTITY_PUBLIC_ID,
XML_ROLE_ENTITY_COMPLETE, XML_ROLE_ENTITY_COMPLETE,
XML_ROLE_ENTITY_NOTATION_NAME, XML_ROLE_ENTITY_NOTATION_NAME,
XML_ROLE_NOTATION_NONE,
XML_ROLE_NOTATION_NAME, XML_ROLE_NOTATION_NAME,
XML_ROLE_NOTATION_SYSTEM_ID, XML_ROLE_NOTATION_SYSTEM_ID,
XML_ROLE_NOTATION_NO_SYSTEM_ID, XML_ROLE_NOTATION_NO_SYSTEM_ID,
@ -44,11 +52,13 @@ enum {
XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS, XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS,
XML_ROLE_ATTRIBUTE_ENUM_VALUE, XML_ROLE_ATTRIBUTE_ENUM_VALUE,
XML_ROLE_ATTRIBUTE_NOTATION_VALUE, XML_ROLE_ATTRIBUTE_NOTATION_VALUE,
XML_ROLE_ATTLIST_NONE,
XML_ROLE_ATTLIST_ELEMENT_NAME, XML_ROLE_ATTLIST_ELEMENT_NAME,
XML_ROLE_IMPLIED_ATTRIBUTE_VALUE, XML_ROLE_IMPLIED_ATTRIBUTE_VALUE,
XML_ROLE_REQUIRED_ATTRIBUTE_VALUE, XML_ROLE_REQUIRED_ATTRIBUTE_VALUE,
XML_ROLE_DEFAULT_ATTRIBUTE_VALUE, XML_ROLE_DEFAULT_ATTRIBUTE_VALUE,
XML_ROLE_FIXED_ATTRIBUTE_VALUE, XML_ROLE_FIXED_ATTRIBUTE_VALUE,
XML_ROLE_ELEMENT_NONE,
XML_ROLE_ELEMENT_NAME, XML_ROLE_ELEMENT_NAME,
XML_ROLE_CONTENT_ANY, XML_ROLE_CONTENT_ANY,
XML_ROLE_CONTENT_EMPTY, XML_ROLE_CONTENT_EMPTY,
@ -64,6 +74,8 @@ enum {
XML_ROLE_CONTENT_ELEMENT_REP, XML_ROLE_CONTENT_ELEMENT_REP,
XML_ROLE_CONTENT_ELEMENT_OPT, XML_ROLE_CONTENT_ELEMENT_OPT,
XML_ROLE_CONTENT_ELEMENT_PLUS, XML_ROLE_CONTENT_ELEMENT_PLUS,
XML_ROLE_PI,
XML_ROLE_COMMENT,
#ifdef XML_DTD #ifdef XML_DTD
XML_ROLE_TEXT_DECL, XML_ROLE_TEXT_DECL,
XML_ROLE_IGNORE_SECT, XML_ROLE_IGNORE_SECT,
@ -73,15 +85,17 @@ enum {
}; };
typedef struct prolog_state { typedef struct prolog_state {
int (*handler)(struct prolog_state *state, int (PTRCALL *handler) (struct prolog_state *state,
int tok, int tok,
const char *ptr, const char *ptr,
const char *end, const char *end,
const ENCODING *enc); const ENCODING *enc);
unsigned level; unsigned level;
int role_none;
#ifdef XML_DTD #ifdef XML_DTD
unsigned includeLevel; unsigned includeLevel;
int documentEntity; int documentEntity;
int inEntityValue;
#endif /* XML_DTD */ #endif /* XML_DTD */
} PROLOG_STATE; } PROLOG_STATE;

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,4 @@
/* /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission. See the file COPYING for copying permission.
*/ */
@ -11,9 +10,11 @@ extern "C" {
#endif #endif
/* The following token may be returned by XmlContentTok */ /* The following token may be returned by XmlContentTok */
#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be start of #define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be
illegal ]]> sequence */ start of illegal ]]> sequence */
/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ /* The following tokens may be returned by both XmlPrologTok and
XmlContentTok.
*/
#define XML_TOK_NONE -4 /* The string to be scanned is empty */ #define XML_TOK_NONE -4 /* The string to be scanned is empty */
#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan; #define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan;
might be part of CRLF sequence */ might be part of CRLF sequence */
@ -22,8 +23,8 @@ extern "C" {
#define XML_TOK_INVALID 0 #define XML_TOK_INVALID 0
/* The following tokens are returned by XmlContentTok; some are also /* The following tokens are returned by XmlContentTok; some are also
returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */ returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok.
*/
#define XML_TOK_START_TAG_WITH_ATTS 1 #define XML_TOK_START_TAG_WITH_ATTS 1
#define XML_TOK_START_TAG_NO_ATTS 2 #define XML_TOK_START_TAG_NO_ATTS 2
#define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */ #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */
@ -35,7 +36,9 @@ extern "C" {
#define XML_TOK_ENTITY_REF 9 #define XML_TOK_ENTITY_REF 9
#define XML_TOK_CHAR_REF 10 /* numeric character reference */ #define XML_TOK_CHAR_REF 10 /* numeric character reference */
/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ /* The following tokens may be returned by both XmlPrologTok and
XmlContentTok.
*/
#define XML_TOK_PI 11 /* processing instruction */ #define XML_TOK_PI 11 /* processing instruction */
#define XML_TOK_XML_DECL 12 /* XML decl or text decl */ #define XML_TOK_XML_DECL 12 /* XML decl or text decl */
#define XML_TOK_COMMENT 13 #define XML_TOK_COMMENT 13
@ -75,8 +78,9 @@ extern "C" {
/* The following token is returned only by XmlCdataSectionTok */ /* The following token is returned only by XmlCdataSectionTok */
#define XML_TOK_CDATA_SECT_CLOSE 40 #define XML_TOK_CDATA_SECT_CLOSE 40
/* With namespace processing this is returned by XmlPrologTok /* With namespace processing this is returned by XmlPrologTok for a
for a name with a colon. */ name with a colon.
*/
#define XML_TOK_PREFIXED_NAME 41 #define XML_TOK_PREFIXED_NAME 41
#ifdef XML_DTD #ifdef XML_DTD
@ -121,37 +125,45 @@ typedef struct {
struct encoding; struct encoding;
typedef struct encoding ENCODING; typedef struct encoding ENCODING;
typedef int (PTRCALL *SCANNER)(const ENCODING *,
const char *,
const char *,
const char **);
struct encoding { struct encoding {
int (*scanners[XML_N_STATES])(const ENCODING *, SCANNER scanners[XML_N_STATES];
SCANNER literalScanners[XML_N_LITERAL_TYPES];
int (PTRCALL *sameName)(const ENCODING *,
const char *,
const char *);
int (PTRCALL *nameMatchesAscii)(const ENCODING *,
const char *, const char *,
const char *, const char *,
const char **); const char *);
int (*literalScanners[XML_N_LITERAL_TYPES])(const ENCODING *, int (PTRFASTCALL *nameLength)(const ENCODING *, const char *);
const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *);
int (PTRCALL *getAtts)(const ENCODING *enc,
const char *ptr,
int attsMax,
ATTRIBUTE *atts);
int (PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr);
int (PTRCALL *predefinedEntityName)(const ENCODING *,
const char *, const char *,
const char *, const char *);
const char **); void (PTRCALL *updatePosition)(const ENCODING *,
int (*sameName)(const ENCODING *,
const char *, const char *);
int (*nameMatchesAscii)(const ENCODING *,
const char *, const char *, const char *);
int (*nameLength)(const ENCODING *, const char *);
const char *(*skipS)(const ENCODING *, const char *);
int (*getAtts)(const ENCODING *enc, const char *ptr,
int attsMax, ATTRIBUTE *atts);
int (*charRefNumber)(const ENCODING *enc, const char *ptr);
int (*predefinedEntityName)(const ENCODING *, const char *, const char *);
void (*updatePosition)(const ENCODING *,
const char *ptr, const char *ptr,
const char *end, const char *end,
POSITION *); POSITION *);
int (*isPublicId)(const ENCODING *enc, const char *ptr, const char *end, int (PTRCALL *isPublicId)(const ENCODING *enc,
const char *ptr,
const char *end,
const char **badPtr); const char **badPtr);
void (*utf8Convert)(const ENCODING *enc, void (PTRCALL *utf8Convert)(const ENCODING *enc,
const char **fromP, const char **fromP,
const char *fromLim, const char *fromLim,
char **toP, char **toP,
const char *toLim); const char *toLim);
void (*utf16Convert)(const ENCODING *enc, void (PTRCALL *utf16Convert)(const ENCODING *enc,
const char **fromP, const char **fromP,
const char *fromLim, const char *fromLim,
unsigned short **toP, unsigned short **toP,
@ -161,24 +173,25 @@ struct encoding {
char isUtf16; char isUtf16;
}; };
/* /* Scan the string starting at ptr until the end of the next complete
Scan the string starting at ptr until the end of the next complete token, token, but do not scan past eptr. Return an integer giving the
but do not scan past eptr. Return an integer giving the type of token. type of token.
Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
Return XML_TOK_PARTIAL when the string does not contain a complete token; Return XML_TOK_PARTIAL when the string does not contain a complete
nextTokPtr will not be set. token; nextTokPtr will not be set.
Return XML_TOK_INVALID when the string does not start a valid token; nextTokPtr Return XML_TOK_INVALID when the string does not start a valid
will be set to point to the character which made the token invalid. token; nextTokPtr will be set to point to the character which made
the token invalid.
Otherwise the string starts with a valid token; nextTokPtr will be set to point Otherwise the string starts with a valid token; nextTokPtr will be
to the character following the end of that token. set to point to the character following the end of that token.
Each data character counts as a single token, but adjacent data characters Each data character counts as a single token, but adjacent data
may be returned together. Similarly for characters in the prolog outside characters may be returned together. Similarly for characters in
literals, comments and processing instructions. the prolog outside literals, comments and processing instructions.
*/ */
@ -201,9 +214,9 @@ literals, comments and processing instructions.
#endif /* XML_DTD */ #endif /* XML_DTD */
/* This is used for performing a 2nd-level tokenization on /* This is used for performing a 2nd-level tokenization on the content
the content of a literal that has already been returned by XmlTok. */ of a literal that has already been returned by XmlTok.
*/
#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
(((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
@ -264,14 +277,16 @@ int XmlParseXmlDecl(int isGeneralTextEntity,
int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
const ENCODING *XmlGetUtf8InternalEncoding(void); const ENCODING *XmlGetUtf8InternalEncoding(void);
const ENCODING *XmlGetUtf16InternalEncoding(void); const ENCODING *XmlGetUtf16InternalEncoding(void);
int XmlUtf8Encode(int charNumber, char *buf); int FASTCALL XmlUtf8Encode(int charNumber, char *buf);
int XmlUtf16Encode(int charNumber, unsigned short *buf); int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf);
int XmlSizeOfUnknownEncoding(void); int XmlSizeOfUnknownEncoding(void);
typedef int (*CONVERTER)(void *userData, const char *p);
ENCODING * ENCODING *
XmlInitUnknownEncoding(void *mem, XmlInitUnknownEncoding(void *mem,
int *table, int *table,
int (*conv)(void *userData, const char *p), CONVERTER convert,
void *userData); void *userData);
int XmlParseXmlDeclNS(int isGeneralTextEntity, int XmlParseXmlDeclNS(int isGeneralTextEntity,
@ -284,13 +299,14 @@ int XmlParseXmlDeclNS(int isGeneralTextEntity,
const char **encodingNamePtr, const char **encodingNamePtr,
const ENCODING **namedEncodingPtr, const ENCODING **namedEncodingPtr,
int *standalonePtr); int *standalonePtr);
int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
const ENCODING *XmlGetUtf8InternalEncodingNS(void); const ENCODING *XmlGetUtf8InternalEncodingNS(void);
const ENCODING *XmlGetUtf16InternalEncodingNS(void); const ENCODING *XmlGetUtf16InternalEncodingNS(void);
ENCODING * ENCODING *
XmlInitUnknownEncodingNS(void *mem, XmlInitUnknownEncodingNS(void *mem,
int *table, int *table,
int (*conv)(void *userData, const char *p), CONVERTER convert,
void *userData); void *userData);
#ifdef __cplusplus #ifdef __cplusplus
} }

View file

@ -1,5 +1,4 @@
/* /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission. See the file COPYING for copying permission.
*/ */
@ -87,9 +86,9 @@ See the file COPYING for copying permission.
/* ptr points to character following "<!-" */ /* ptr points to character following "<!-" */
static static int PTRCALL
int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr != end) { if (ptr != end) {
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
@ -125,9 +124,9 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "<!" */ /* ptr points to character following "<!" */
static static int PTRCALL
int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr == end) if (ptr == end)
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
@ -172,8 +171,9 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
} }
static static int PTRCALL
int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr) PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
const char *end, int *tokPtr)
{ {
int upper = 0; int upper = 0;
*tokPtr = XML_TOK_PI; *tokPtr = XML_TOK_PI;
@ -216,9 +216,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "<?" */ /* ptr points to character following "<?" */
static static int PTRCALL
int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
int tok; int tok;
const char *target = ptr; const char *target = ptr;
@ -278,12 +278,12 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
} }
static int PTRCALL
static PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, const char *end, const char **nextTokPtr)
const char **nextTokPtr)
{ {
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB }; static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
ASCII_T, ASCII_A, ASCII_LSQB };
int i; int i;
/* CDATA[ */ /* CDATA[ */
if (end - ptr < 6 * MINBPC(enc)) if (end - ptr < 6 * MINBPC(enc))
@ -298,9 +298,9 @@ int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *e
return XML_TOK_CDATA_SECT_OPEN; return XML_TOK_CDATA_SECT_OPEN;
} }
static static int PTRCALL
int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr == end) if (ptr == end)
return XML_TOK_NONE; return XML_TOK_NONE;
@ -376,9 +376,9 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en
/* ptr points to character following "</" */ /* ptr points to character following "</" */
static static int PTRCALL
int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr == end) if (ptr == end)
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
@ -407,7 +407,8 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
#ifdef XML_NS #ifdef XML_NS
case BT_COLON: case BT_COLON:
/* no need to check qname syntax here, since end-tag must match exactly */ /* no need to check qname syntax here,
since end-tag must match exactly */
ptr += MINBPC(enc); ptr += MINBPC(enc);
break; break;
#endif #endif
@ -424,9 +425,9 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "&#X" */ /* ptr points to character following "&#X" */
static static int PTRCALL
int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr != end) { if (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
@ -456,9 +457,9 @@ int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end
/* ptr points to character following "&#" */ /* ptr points to character following "&#" */
static static int PTRCALL
int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr != end) { if (ptr != end) {
if (CHAR_MATCHES(enc, ptr, ASCII_x)) if (CHAR_MATCHES(enc, ptr, ASCII_x))
@ -488,8 +489,8 @@ int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "&" */ /* ptr points to character following "&" */
static static int PTRCALL
int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) if (ptr == end)
@ -518,8 +519,8 @@ int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following first character of attribute name */ /* ptr points to character following first character of attribute name */
static static int PTRCALL
int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
#ifdef XML_NS #ifdef XML_NS
@ -574,7 +575,6 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
hadColon = 0; hadColon = 0;
#endif #endif
for (;;) { for (;;) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (ptr == end)
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
@ -678,8 +678,8 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "<" */ /* ptr points to character following "<" */
static static int PTRCALL
int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
#ifdef XML_NS #ifdef XML_NS
@ -696,7 +696,8 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_MINUS: case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_LSQB: case BT_LSQB:
return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
end, nextTokPtr);
} }
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
@ -777,8 +778,8 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
} }
static static int PTRCALL
int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) if (ptr == end)
@ -876,8 +877,8 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to character following "%" */ /* ptr points to character following "%" */
static static int PTRCALL
int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) if (ptr == end)
@ -905,8 +906,8 @@ int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
} }
static static int PTRCALL
int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) if (ptr == end)
@ -932,8 +933,8 @@ int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
return -XML_TOK_POUND_NAME; return -XML_TOK_POUND_NAME;
} }
static static int PTRCALL
int PREFIX(scanLit)(int open, const ENCODING *enc, PREFIX(scanLit)(int open, const ENCODING *enc,
const char *ptr, const char *end, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
@ -964,8 +965,8 @@ int PREFIX(scanLit)(int open, const ENCODING *enc,
return XML_TOK_PARTIAL; return XML_TOK_PARTIAL;
} }
static static int PTRCALL
int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
int tok; int tok;
@ -1008,8 +1009,11 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
case BT_CR: case BT_CR:
if (ptr + MINBPC(enc) == end) if (ptr + MINBPC(enc) == end) {
*nextTokPtr = end;
/* indicate that this might be part of a CR/LF pair */
return -XML_TOK_PROLOG_S; return -XML_TOK_PROLOG_S;
}
/* fall through */ /* fall through */
case BT_S: case BT_LF: case BT_S: case BT_LF:
for (;;) { for (;;) {
@ -1192,9 +1196,9 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return -tok; return -tok;
} }
static static int PTRCALL
int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
const char *start; const char *start;
if (ptr == end) if (ptr == end)
@ -1250,9 +1254,9 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *
return XML_TOK_DATA_CHARS; return XML_TOK_DATA_CHARS;
} }
static static int PTRCALL
int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
const char *start; const char *start;
if (ptr == end) if (ptr == end)
@ -1307,9 +1311,9 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end
#ifdef XML_DTD #ifdef XML_DTD
static static int PTRCALL
int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
int level = 0; int level = 0;
if (MINBPC(enc) > 1) { if (MINBPC(enc) > 1) {
@ -1360,8 +1364,8 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e
#endif /* XML_DTD */ #endif /* XML_DTD */
static static int PTRCALL
int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
const char **badPtr) const char **badPtr)
{ {
ptr += MINBPC(enc); ptr += MINBPC(enc);
@ -1415,12 +1419,13 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
return 1; return 1;
} }
/* This must only be called for a well-formed start-tag or empty element tag. /* This must only be called for a well-formed start-tag or empty
Returns the number of attributes. Pointers to the first attsMax attributes element tag. Returns the number of attributes. Pointers to the
are stored in atts. */ first attsMax attributes are stored in atts.
*/
static static int PTRCALL
int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
int attsMax, ATTRIBUTE *atts) int attsMax, ATTRIBUTE *atts)
{ {
enum { other, inName, inValue } state = inName; enum { other, inName, inValue } state = inName;
@ -1512,14 +1517,16 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
/* not reached */ /* not reached */
} }
static static int PTRFASTCALL
int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
{ {
int result = 0; int result = 0;
/* skip &# */ /* skip &# */
ptr += 2*MINBPC(enc); ptr += 2*MINBPC(enc);
if (CHAR_MATCHES(enc, ptr, ASCII_x)) { if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { for (ptr += MINBPC(enc);
!CHAR_MATCHES(enc, ptr, ASCII_SEMI);
ptr += MINBPC(enc)) {
int c = BYTE_TO_ASCII(enc, ptr); int c = BYTE_TO_ASCII(enc, ptr);
switch (c) { switch (c) {
case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
@ -1527,11 +1534,13 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
result <<= 4; result <<= 4;
result |= (c - ASCII_0); result |= (c - ASCII_0);
break; break;
case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F: case ASCII_A: case ASCII_B: case ASCII_C:
case ASCII_D: case ASCII_E: case ASCII_F:
result <<= 4; result <<= 4;
result += 10 + (c - ASCII_A); result += 10 + (c - ASCII_A);
break; break;
case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f: case ASCII_a: case ASCII_b: case ASCII_c:
case ASCII_d: case ASCII_e: case ASCII_f:
result <<= 4; result <<= 4;
result += 10 + (c - ASCII_a); result += 10 + (c - ASCII_a);
break; break;
@ -1552,8 +1561,9 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
return checkCharRefNumber(result); return checkCharRefNumber(result);
} }
static static int PTRCALL
int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end) PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
const char *end)
{ {
switch ((end - ptr)/MINBPC(enc)) { switch ((end - ptr)/MINBPC(enc)) {
case 2: case 2:
@ -1605,8 +1615,8 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha
return 0; return 0;
} }
static static int PTRCALL
int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
{ {
for (;;) { for (;;) {
switch (BYTE_TYPE(enc, ptr1)) { switch (BYTE_TYPE(enc, ptr1)) {
@ -1669,8 +1679,8 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
/* not reached */ /* not reached */
} }
static static int PTRCALL
int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
const char *end1, const char *ptr2) const char *end1, const char *ptr2)
{ {
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
@ -1682,8 +1692,8 @@ int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
return ptr1 == end1; return ptr1 == end1;
} }
static static int PTRFASTCALL
int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
{ {
const char *start = ptr; const char *start = ptr;
for (;;) { for (;;) {
@ -1709,8 +1719,8 @@ int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
} }
} }
static static const char * PTRFASTCALL
const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr) PREFIX(skipS)(const ENCODING *enc, const char *ptr)
{ {
for (;;) { for (;;) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
@ -1725,8 +1735,8 @@ const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr)
} }
} }
static static void PTRCALL
void PREFIX(updatePosition)(const ENCODING *enc, PREFIX(updatePosition)(const ENCODING *enc,
const char *ptr, const char *ptr,
const char *end, const char *end,
POSITION *pos) POSITION *pos)
@ -1766,3 +1776,4 @@ void PREFIX(updatePosition)(const ENCODING *enc,
#undef CHECK_NAME_CASES #undef CHECK_NAME_CASES
#undef CHECK_NMSTRT_CASE #undef CHECK_NMSTRT_CASE
#undef CHECK_NMSTRT_CASES #undef CHECK_NMSTRT_CASES

View file

@ -1,22 +1,25 @@
const ENCODING *NS(XmlGetUtf8InternalEncoding)(void) const ENCODING *
NS(XmlGetUtf8InternalEncoding)(void)
{ {
return &ns(internal_utf8_encoding).enc; return &ns(internal_utf8_encoding).enc;
} }
const ENCODING *NS(XmlGetUtf16InternalEncoding)(void) const ENCODING *
NS(XmlGetUtf16InternalEncoding)(void)
{ {
#if XML_BYTE_ORDER == 12 #if BYTEORDER == 1234
return &ns(internal_little2_encoding).enc; return &ns(internal_little2_encoding).enc;
#elif XML_BYTE_ORDER == 21 #elif BYTEORDER == 4321
return &ns(internal_big2_encoding).enc; return &ns(internal_big2_encoding).enc;
#else #else
const short n = 1; const short n = 1;
return *(const char *)&n ? &ns(internal_little2_encoding).enc : &ns(internal_big2_encoding).enc; return (*(const char *)&n
? &ns(internal_little2_encoding).enc
: &ns(internal_big2_encoding).enc);
#endif #endif
} }
static static const ENCODING *NS(encodings)[] = {
const ENCODING *NS(encodings)[] = {
&ns(latin1_encoding).enc, &ns(latin1_encoding).enc,
&ns(ascii_encoding).enc, &ns(ascii_encoding).enc,
&ns(utf8_encoding).enc, &ns(utf8_encoding).enc,
@ -26,21 +29,25 @@ const ENCODING *NS(encodings)[] = {
&ns(utf8_encoding).enc /* NO_ENC */ &ns(utf8_encoding).enc /* NO_ENC */
}; };
static static int PTRCALL
int NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end, NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_PROLOG_STATE, ptr, end, nextTokPtr); return initScan(NS(encodings), (const INIT_ENCODING *)enc,
XML_PROLOG_STATE, ptr, end, nextTokPtr);
} }
static static int PTRCALL
int NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end, NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_CONTENT_STATE, ptr, end, nextTokPtr); return initScan(NS(encodings), (const INIT_ENCODING *)enc,
XML_CONTENT_STATE, ptr, end, nextTokPtr);
} }
int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *name) int
NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr,
const char *name)
{ {
int i = getEncodingIndex(name); int i = getEncodingIndex(name);
if (i == UNKNOWN_ENC) if (i == UNKNOWN_ENC)
@ -54,8 +61,8 @@ int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *n
return 1; return 1;
} }
static static const ENCODING *
const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end)
{ {
#define ENCODING_MAX 128 #define ENCODING_MAX 128
char buf[ENCODING_MAX]; char buf[ENCODING_MAX];
@ -73,7 +80,8 @@ const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const cha
return NS(encodings)[i]; return NS(encodings)[i];
} }
int NS(XmlParseXmlDecl)(int isGeneralTextEntity, int
NS(XmlParseXmlDecl)(int isGeneralTextEntity,
const ENCODING *enc, const ENCODING *enc,
const char *ptr, const char *ptr,
const char *end, const char *end,

View file

@ -690,9 +690,9 @@ class PyBuildExt(build_ext):
# More information on Expat can be found at www.libexpat.org. # More information on Expat can be found at www.libexpat.org.
# #
if sys.byteorder == "little": if sys.byteorder == "little":
xmlbo = "12" xmlbo = "1234"
else: else:
xmlbo = "21" xmlbo = "4321"
expatinc = os.path.join(os.getcwd(), srcdir, 'Modules', 'expat') expatinc = os.path.join(os.getcwd(), srcdir, 'Modules', 'expat')
exts.append(Extension('pyexpat', exts.append(Extension('pyexpat',
sources = [ sources = [
@ -702,10 +702,9 @@ class PyBuildExt(build_ext):
'expat/xmltok.c', 'expat/xmltok.c',
], ],
define_macros = [ define_macros = [
('HAVE_EXPAT_H',None),
('XML_NS', '1'), ('XML_NS', '1'),
('XML_DTD', '1'), ('XML_DTD', '1'),
('XML_BYTE_ORDER', xmlbo), ('BYTEORDER', xmlbo),
('XML_CONTEXT_BYTES','1024'), ('XML_CONTEXT_BYTES','1024'),
], ],
include_dirs = [expatinc] include_dirs = [expatinc]