bpo-29591: Upgrade Modules/expat to libexpat 2.2 (#2164) (#2200)

* bpo-29591: Upgrade Modules/expat to libexpat 2.2

* bpo-29591: Restore Python changes on expat

* bpo-29591: Remove expat config of unsupported platforms

Remove the configuration (Modules/expat/*config.h) of unsupported
platforms:

* Amiga
* MacOS Classic on PPC32
* Open Watcom

* bpo-29591: Remove useless XML_HAS_SET_HASH_SALT

The XML_HAS_SET_HASH_SALT define of Modules/expat/expat.h became
useless since our local expat copy was upgrade to expat 2.1 (it's now
expat 2.2.0).

(cherry picked from commit 23ec4b57e1)
This commit is contained in:
Victor Stinner 2017-06-15 01:26:16 +02:00 committed by GitHub
parent 8a39af9457
commit 86b95370c4
14 changed files with 527 additions and 476 deletions

View file

@ -51,6 +51,10 @@ Core and Builtins
Library Library
------- -------
- [Security] bpo-29591: Update expat copy from 2.1.1 to 2.2.0 to get fixes
of CVE-2016-0718 and CVE-2016-4472. See
https://sourceforge.net/p/expat/bugs/537/ for more information.
- bpo-24484: Avoid race condition in multiprocessing cleanup (#2159) - bpo-24484: Avoid race condition in multiprocessing cleanup (#2159)
- bpo-28994: The traceback no longer displayed for SystemExit raised in - bpo-28994: The traceback no longer displayed for SystemExit raised in

View file

@ -1,6 +1,5 @@
Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd Copyright (c) 1998-2000 Thai Open Source Software Center Ltd and Clark Cooper
and Clark Cooper Copyright (c) 2001-2016 Expat maintainers
Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Expat maintainers.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View file

@ -1,32 +0,0 @@
#ifndef AMIGACONFIG_H
#define AMIGACONFIG_H
/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */
#define BYTEORDER 4321
/* Define to 1 if you have the `bcopy' function. */
#define HAVE_BCOPY 1
/* Define to 1 if you have the <check.h> header file. */
#undef HAVE_CHECK_H
/* Define to 1 if you have the `memmove' function. */
#define HAVE_MEMMOVE 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* whether byteorder is bigendian */
#define WORDS_BIGENDIAN
/* Define to specify how much context to retain around the current parse
point. */
#define XML_CONTEXT_BYTES 1024
/* Define to make parameter entity parsing functionality available. */
#define XML_DTD
/* Define to make XML Namespaces functionality available. */
#define XML_NS
#endif /* AMIGACONFIG_H */

View file

@ -342,7 +342,7 @@ XML_SetEntityDeclHandler(XML_Parser parser,
XML_EntityDeclHandler handler); XML_EntityDeclHandler handler);
/* OBSOLETE -- OBSOLETE -- OBSOLETE /* OBSOLETE -- OBSOLETE -- OBSOLETE
This handler has been superceded by the EntityDeclHandler above. This handler has been superseded by the EntityDeclHandler above.
It is provided here for backward compatibility. It is provided here for backward compatibility.
This is called for a declaration of an unparsed (NDATA) entity. This is called for a declaration of an unparsed (NDATA) entity.
@ -915,8 +915,6 @@ XMLPARSEAPI(int)
XML_SetHashSalt(XML_Parser parser, XML_SetHashSalt(XML_Parser parser,
unsigned long hash_salt); unsigned long hash_salt);
#define XML_HAS_SET_HASH_SALT /* Python Only: Defined for pyexpat.c. */
/* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then /* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then
XML_GetErrorCode returns information about the error. XML_GetErrorCode returns information about the error.
*/ */
@ -975,9 +973,12 @@ XML_FreeContentModel(XML_Parser parser, XML_Content *model);
/* Exposing the memory handling functions used in Expat */ /* Exposing the memory handling functions used in Expat */
XMLPARSEAPI(void *) XMLPARSEAPI(void *)
XML_ATTR_MALLOC
XML_ATTR_ALLOC_SIZE(2)
XML_MemMalloc(XML_Parser parser, size_t size); XML_MemMalloc(XML_Parser parser, size_t size);
XMLPARSEAPI(void *) XMLPARSEAPI(void *)
XML_ATTR_ALLOC_SIZE(3)
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size); XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
XMLPARSEAPI(void) XMLPARSEAPI(void)
@ -1033,14 +1034,12 @@ XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void); XML_GetFeatureList(void);
/* Expat follows the GNU/Linux convention of odd number minor version for /* Expat follows the semantic versioning convention.
beta/development releases and even number minor version for stable See http://semver.org.
releases. Micro is bumped with each release, and set to 0 with each
change to major or minor version.
*/ */
#define XML_MAJOR_VERSION 2 #define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 1 #define XML_MINOR_VERSION 2
#define XML_MICRO_VERSION 1 #define XML_MICRO_VERSION 0
#ifdef __cplusplus #ifdef __cplusplus
} }

View file

@ -69,12 +69,26 @@
#endif #endif
#endif /* not defined XML_STATIC */ #endif /* not defined XML_STATIC */
#if !defined(XMLIMPORT) && defined(__GNUC__) && (__GNUC__ >= 4)
#define XMLIMPORT __attribute__ ((visibility ("default")))
#endif
/* If we didn't define it above, define it away: */ /* If we didn't define it above, define it away: */
#ifndef XMLIMPORT #ifndef XMLIMPORT
#define XMLIMPORT #define XMLIMPORT
#endif #endif
#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
#define XML_ATTR_MALLOC __attribute__((__malloc__))
#else
#define XML_ATTR_MALLOC
#endif
#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
#define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x)))
#else
#define XML_ATTR_ALLOC_SIZE(x)
#endif
#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL #define XMLPARSEAPI(type) XMLIMPORT type XMLCALL

View file

@ -71,3 +71,25 @@
#define inline #define inline
#endif #endif
#endif #endif
#ifndef UNUSED_P
# ifdef __GNUC__
# define UNUSED_P(p) UNUSED_ ## p __attribute__((__unused__))
# else
# define UNUSED_P(p) UNUSED_ ## p
# endif
#endif
#ifdef __cplusplus
extern "C" {
#endif
void
align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef);
#ifdef __cplusplus
}
#endif

View file

@ -1,53 +0,0 @@
/*================================================================
** Copyright 2000, Clark Cooper
** All rights reserved.
**
** This is free software. You are permitted to copy, distribute, or modify
** it under the terms of the MIT/X license (contained in the COPYING file
** with this distribution.)
**
*/
#ifndef MACCONFIG_H
#define MACCONFIG_H
/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */
#define BYTEORDER 4321
/* Define to 1 if you have the `bcopy' function. */
#undef HAVE_BCOPY
/* Define to 1 if you have the `memmove' function. */
#define HAVE_MEMMOVE
/* Define to 1 if you have a working `mmap' system call. */
#undef HAVE_MMAP
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* whether byteorder is bigendian */
#define WORDS_BIGENDIAN
/* Define to specify how much context to retain around the current parse
point. */
#undef XML_CONTEXT_BYTES
/* Define to make parameter entity parsing functionality available. */
#define XML_DTD
/* Define to make XML Namespaces functionality available. */
#define XML_NS
/* Define to empty if `const' does not conform to ANSI C. */
#undef const
/* Define to `long' if <sys/types.h> does not define. */
#define off_t long
/* Define to `unsigned' if <sys/types.h> does not define. */
#undef size_t
#endif /* ifndef MACCONFIG_H */

View file

@ -1,47 +0,0 @@
/* expat_config.h for use with Open Watcom 1.5 and above. */
#ifndef WATCOMCONFIG_H
#define WATCOMCONFIG_H
#ifdef __NT__
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#undef WIN32_LEAN_AND_MEAN
#endif
/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */
#define BYTEORDER 1234
/* Define to 1 if you have the `memmove' function. */
#define HAVE_MEMMOVE 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT "expat-bugs@mail.libexpat.org"
/* Define to the full name of this package. */
#define PACKAGE_NAME "expat"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "expat 2.0.0"
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the version of this package. */
#define PACKAGE_VERSION "2.0.0"
/* Define to specify how much context to retain around the current parse
point. */
#define XML_CONTEXT_BYTES 1024
/* Define to make parameter entity parsing functionality available. */
#define XML_DTD 1
/* Define to make XML Namespaces functionality available. */
#define XML_NS 1
#endif

View file

@ -2,9 +2,22 @@
See the file COPYING for copying permission. See the file COPYING for copying permission.
*/ */
#include <stddef.h>
#include <string.h> /* memset(), memcpy() */
#include <assert.h>
#include <limits.h> /* UINT_MAX */
#ifdef WIN32
#define getpid GetCurrentProcessId
#else
#include <sys/time.h> /* gettimeofday() */
#include <sys/types.h> /* getpid() */
#include <unistd.h> /* getpid() */
#endif
#define XML_BUILDING_EXPAT 1 #define XML_BUILDING_EXPAT 1
#ifdef COMPILED_FROM_DSP #ifdef WIN32
#include "winconfig.h" #include "winconfig.h"
#elif defined(MACOS_CLASSIC) #elif defined(MACOS_CLASSIC)
#include "macconfig.h" #include "macconfig.h"
@ -14,13 +27,7 @@
#include "watcomconfig.h" #include "watcomconfig.h"
#elif defined(HAVE_EXPAT_CONFIG_H) #elif defined(HAVE_EXPAT_CONFIG_H)
#include <expat_config.h> #include <expat_config.h>
#endif /* ndef COMPILED_FROM_DSP */ #endif /* ndef WIN32 */
#include <stddef.h>
#include <string.h> /* memset(), memcpy() */
#include <assert.h>
#include <limits.h> /* UINT_MAX */
#include <time.h> /* time() */
#include "ascii.h" #include "ascii.h"
#include "expat.h" #include "expat.h"
@ -432,7 +439,7 @@ static ELEMENT_TYPE *
getElementType(XML_Parser parser, const ENCODING *enc, getElementType(XML_Parser parser, const ENCODING *enc,
const char *ptr, const char *end); const char *ptr, const char *end);
static unsigned long generate_hash_secret_salt(void); static unsigned long generate_hash_secret_salt(XML_Parser parser);
static XML_Bool startParsing(XML_Parser parser); static XML_Bool startParsing(XML_Parser parser);
static XML_Parser static XML_Parser
@ -691,11 +698,38 @@ static const XML_Char implicitContext[] = {
}; };
static unsigned long static unsigned long
generate_hash_secret_salt(void) gather_time_entropy(void)
{ {
unsigned int seed = time(NULL) % UINT_MAX; #ifdef WIN32
srand(seed); FILETIME ft;
return rand(); GetSystemTimeAsFileTime(&ft); /* never fails */
return ft.dwHighDateTime ^ ft.dwLowDateTime;
#else
struct timeval tv;
int gettimeofday_res;
gettimeofday_res = gettimeofday(&tv, NULL);
assert (gettimeofday_res == 0);
/* Microseconds time is <20 bits entropy */
return tv.tv_usec;
#endif
}
static unsigned long
generate_hash_secret_salt(XML_Parser parser)
{
/* Process ID is 0 bits entropy if attacker has local access
* XML_Parser address is few bits of entropy if attacker has local access */
const unsigned long entropy =
gather_time_entropy() ^ getpid() ^ (unsigned long)parser;
/* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
if (sizeof(unsigned long) == 4) {
return entropy * 2147483647;
} else {
return entropy * (unsigned long)2305843009213693951;
}
} }
static XML_Bool /* only valid for root parser */ static XML_Bool /* only valid for root parser */
@ -703,7 +737,7 @@ startParsing(XML_Parser parser)
{ {
/* hash functions must be initialized before setContext() is called */ /* hash functions must be initialized before setContext() is called */
if (hash_secret_salt == 0) if (hash_secret_salt == 0)
hash_secret_salt = generate_hash_secret_salt(); hash_secret_salt = generate_hash_secret_salt(parser);
if (ns) { if (ns) {
/* implicit context only set for root parser, since child /* implicit context only set for root parser, since child
parsers (i.e. external entity parsers) will inherit it parsers (i.e. external entity parsers) will inherit it
@ -1695,15 +1729,15 @@ XML_GetBuffer(XML_Parser parser, int len)
if (len > bufferLim - bufferEnd) { if (len > bufferLim - bufferEnd) {
#ifdef XML_CONTEXT_BYTES #ifdef XML_CONTEXT_BYTES
int keep; int keep;
#endif #endif /* defined XML_CONTEXT_BYTES */
int neededSize = len + (int)(bufferEnd - bufferPtr); /* Do not invoke signed arithmetic overflow: */
int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
if (neededSize < 0) { if (neededSize < 0) {
errorCode = XML_ERROR_NO_MEMORY; errorCode = XML_ERROR_NO_MEMORY;
return NULL; return NULL;
} }
#ifdef XML_CONTEXT_BYTES #ifdef XML_CONTEXT_BYTES
keep = (int)(bufferPtr - buffer); keep = (int)(bufferPtr - buffer);
if (keep > XML_CONTEXT_BYTES) if (keep > XML_CONTEXT_BYTES)
keep = XML_CONTEXT_BYTES; keep = XML_CONTEXT_BYTES;
neededSize += keep; neededSize += keep;
@ -1728,7 +1762,8 @@ XML_GetBuffer(XML_Parser parser, int len)
if (bufferSize == 0) if (bufferSize == 0)
bufferSize = INIT_BUFFER_SIZE; bufferSize = INIT_BUFFER_SIZE;
do { do {
bufferSize *= 2; /* Do not invoke signed arithmetic overflow: */
bufferSize = (int) (2U * (unsigned) bufferSize);
} while (bufferSize < neededSize && bufferSize > 0); } while (bufferSize < neededSize && bufferSize > 0);
if (bufferSize <= 0) { if (bufferSize <= 0) {
errorCode = XML_ERROR_NO_MEMORY; errorCode = XML_ERROR_NO_MEMORY;
@ -1855,7 +1890,7 @@ XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser) XML_GetCurrentByteIndex(XML_Parser parser)
{ {
if (eventPtr) if (eventPtr)
return parseEndByteIndex - (parseEndPtr - eventPtr); return (XML_Index)(parseEndByteIndex - (parseEndPtr - eventPtr));
return -1; return -1;
} }
@ -2429,11 +2464,11 @@ doContent(XML_Parser parser,
for (;;) { for (;;) {
int bufSize; int bufSize;
int convLen; int convLen;
XmlConvert(enc, const enum XML_Convert_Result convert_res = XmlConvert(enc,
&fromPtr, rawNameEnd, &fromPtr, rawNameEnd,
(ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
convLen = (int)(toPtr - (XML_Char *)tag->buf); convLen = (int)(toPtr - (XML_Char *)tag->buf);
if (fromPtr == rawNameEnd) { if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
tag->name.strLen = convLen; tag->name.strLen = convLen;
break; break;
} }
@ -2654,11 +2689,11 @@ doContent(XML_Parser parser,
if (MUST_CONVERT(enc, s)) { if (MUST_CONVERT(enc, s)) {
for (;;) { for (;;) {
ICHAR *dataPtr = (ICHAR *)dataBuf; ICHAR *dataPtr = (ICHAR *)dataBuf;
XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = s; *eventEndPP = s;
charDataHandler(handlerArg, dataBuf, charDataHandler(handlerArg, dataBuf,
(int)(dataPtr - (ICHAR *)dataBuf)); (int)(dataPtr - (ICHAR *)dataBuf));
if (s == next) if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break; break;
*eventPP = s; *eventPP = s;
} }
@ -3264,11 +3299,11 @@ doCdataSection(XML_Parser parser,
if (MUST_CONVERT(enc, s)) { if (MUST_CONVERT(enc, s)) {
for (;;) { for (;;) {
ICHAR *dataPtr = (ICHAR *)dataBuf; ICHAR *dataPtr = (ICHAR *)dataBuf;
XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = next; *eventEndPP = next;
charDataHandler(handlerArg, dataBuf, charDataHandler(handlerArg, dataBuf,
(int)(dataPtr - (ICHAR *)dataBuf)); (int)(dataPtr - (ICHAR *)dataBuf));
if (s == next) if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break; break;
*eventPP = s; *eventPP = s;
} }
@ -4927,9 +4962,9 @@ internalEntityProcessor(XML_Parser parser,
static enum XML_Error PTRCALL static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser, errorProcessor(XML_Parser parser,
const char *s, const char *UNUSED_P(s),
const char *end, const char *UNUSED_P(end),
const char **nextPtr) const char **UNUSED_P(nextPtr))
{ {
return errorCode; return errorCode;
} }
@ -5345,6 +5380,7 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
const char *s, const char *end) const char *s, const char *end)
{ {
if (MUST_CONVERT(enc, s)) { if (MUST_CONVERT(enc, s)) {
enum XML_Convert_Result convert_res;
const char **eventPP; const char **eventPP;
const char **eventEndPP; const char **eventEndPP;
if (enc == encoding) { if (enc == encoding) {
@ -5357,11 +5393,11 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
} }
do { do {
ICHAR *dataPtr = (ICHAR *)dataBuf; ICHAR *dataPtr = (ICHAR *)dataBuf;
XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = s; *eventEndPP = s;
defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf)); defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
*eventPP = s; *eventPP = s;
} while (s != end); } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
} }
else else
defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s)); defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
@ -6166,8 +6202,8 @@ poolAppend(STRING_POOL *pool, const ENCODING *enc,
if (!pool->ptr && !poolGrow(pool)) if (!pool->ptr && !poolGrow(pool))
return NULL; return NULL;
for (;;) { for (;;) {
XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
if (ptr == end) if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break; break;
if (!poolGrow(pool)) if (!poolGrow(pool))
return NULL; return NULL;
@ -6251,8 +6287,13 @@ poolGrow(STRING_POOL *pool)
} }
} }
if (pool->blocks && pool->start == pool->blocks->s) { if (pool->blocks && pool->start == pool->blocks->s) {
int blockSize = (int)(pool->end - pool->start)*2; BLOCK *temp;
BLOCK *temp = (BLOCK *) int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
if (blockSize < 0)
return XML_FALSE;
temp = (BLOCK *)
pool->mem->realloc_fcn(pool->blocks, pool->mem->realloc_fcn(pool->blocks,
(offsetof(BLOCK, s) (offsetof(BLOCK, s)
+ blockSize * sizeof(XML_Char))); + blockSize * sizeof(XML_Char)));
@ -6267,6 +6308,10 @@ poolGrow(STRING_POOL *pool)
else { else {
BLOCK *tem; BLOCK *tem;
int blockSize = (int)(pool->end - pool->start); int blockSize = (int)(pool->end - pool->start);
if (blockSize < 0)
return XML_FALSE;
if (blockSize < INIT_BLOCK_SIZE) if (blockSize < INIT_BLOCK_SIZE)
blockSize = INIT_BLOCK_SIZE; blockSize = INIT_BLOCK_SIZE;
else else

View file

@ -4,7 +4,7 @@
#include <stddef.h> #include <stddef.h>
#ifdef COMPILED_FROM_DSP #ifdef WIN32
#include "winconfig.h" #include "winconfig.h"
#elif defined(MACOS_CLASSIC) #elif defined(MACOS_CLASSIC)
#include "macconfig.h" #include "macconfig.h"
@ -16,7 +16,7 @@
#ifdef HAVE_EXPAT_CONFIG_H #ifdef HAVE_EXPAT_CONFIG_H
#include <expat_config.h> #include <expat_config.h>
#endif #endif
#endif /* ndef COMPILED_FROM_DSP */ #endif /* ndef WIN32 */
#include "expat_external.h" #include "expat_external.h"
#include "internal.h" #include "internal.h"
@ -195,9 +195,9 @@ prolog1(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
prolog2(PROLOG_STATE *state, prolog2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -216,9 +216,9 @@ prolog2(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
doctype0(PROLOG_STATE *state, doctype0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -264,9 +264,9 @@ doctype1(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
doctype2(PROLOG_STATE *state, doctype2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -281,9 +281,9 @@ doctype2(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
doctype3(PROLOG_STATE *state, doctype3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -298,9 +298,9 @@ doctype3(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
doctype4(PROLOG_STATE *state, doctype4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -318,9 +318,9 @@ doctype4(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
doctype5(PROLOG_STATE *state, doctype5(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -437,9 +437,9 @@ externalSubset1(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity0(PROLOG_STATE *state, entity0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -457,9 +457,9 @@ entity0(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity1(PROLOG_STATE *state, entity1(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -502,9 +502,9 @@ entity2(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity3(PROLOG_STATE *state, entity3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -519,9 +519,9 @@ entity3(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity4(PROLOG_STATE *state, entity4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -559,9 +559,9 @@ entity5(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity6(PROLOG_STATE *state, entity6(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -605,9 +605,9 @@ entity7(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity8(PROLOG_STATE *state, entity8(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -622,9 +622,9 @@ entity8(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity9(PROLOG_STATE *state, entity9(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -639,9 +639,9 @@ entity9(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
entity10(PROLOG_STATE *state, entity10(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -656,9 +656,9 @@ entity10(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
notation0(PROLOG_STATE *state, notation0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -697,9 +697,9 @@ notation1(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
notation2(PROLOG_STATE *state, notation2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -714,9 +714,9 @@ notation2(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
notation3(PROLOG_STATE *state, notation3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -732,9 +732,9 @@ notation3(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
notation4(PROLOG_STATE *state, notation4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -753,9 +753,9 @@ notation4(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist0(PROLOG_STATE *state, attlist0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -771,9 +771,9 @@ attlist0(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist1(PROLOG_STATE *state, attlist1(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -833,9 +833,9 @@ attlist2(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist3(PROLOG_STATE *state, attlist3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -852,9 +852,9 @@ attlist3(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist4(PROLOG_STATE *state, attlist4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -872,9 +872,9 @@ attlist4(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist5(PROLOG_STATE *state, attlist5(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -889,9 +889,9 @@ attlist5(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist6(PROLOG_STATE *state, attlist6(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -906,9 +906,9 @@ attlist6(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist7(PROLOG_STATE *state, attlist7(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -967,9 +967,9 @@ attlist8(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
attlist9(PROLOG_STATE *state, attlist9(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -984,9 +984,9 @@ attlist9(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
element0(PROLOG_STATE *state, element0(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -1072,9 +1072,9 @@ element2(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
element3(PROLOG_STATE *state, element3(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -1097,9 +1097,9 @@ element3(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
element4(PROLOG_STATE *state, element4(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -1115,9 +1115,9 @@ element4(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
element5(PROLOG_STATE *state, element5(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -1136,9 +1136,9 @@ element5(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
element6(PROLOG_STATE *state, element6(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -1166,9 +1166,9 @@ element6(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
element7(PROLOG_STATE *state, element7(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -1240,9 +1240,9 @@ condSect0(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
condSect1(PROLOG_STATE *state, condSect1(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -1258,9 +1258,9 @@ condSect1(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
condSect2(PROLOG_STATE *state, condSect2(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -1277,9 +1277,9 @@ condSect2(PROLOG_STATE *state,
static int PTRCALL static int PTRCALL
declClose(PROLOG_STATE *state, declClose(PROLOG_STATE *state,
int tok, int tok,
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
switch (tok) { switch (tok) {
case XML_TOK_PROLOG_S: case XML_TOK_PROLOG_S:
@ -1292,11 +1292,11 @@ declClose(PROLOG_STATE *state,
} }
static int PTRCALL static int PTRCALL
error(PROLOG_STATE *state, error(PROLOG_STATE *UNUSED_P(state),
int tok, int UNUSED_P(tok),
const char *ptr, const char *UNUSED_P(ptr),
const char *end, const char *UNUSED_P(end),
const ENCODING *enc) const ENCODING *UNUSED_P(enc))
{ {
return XML_ROLE_NONE; return XML_ROLE_NONE;
} }

View file

@ -4,7 +4,7 @@
#include <stddef.h> #include <stddef.h>
#ifdef COMPILED_FROM_DSP #ifdef WIN32
#include "winconfig.h" #include "winconfig.h"
#elif defined(MACOS_CLASSIC) #elif defined(MACOS_CLASSIC)
#include "macconfig.h" #include "macconfig.h"
@ -16,7 +16,7 @@
#ifdef HAVE_EXPAT_CONFIG_H #ifdef HAVE_EXPAT_CONFIG_H
#include <expat_config.h> #include <expat_config.h>
#endif #endif
#endif /* ndef COMPILED_FROM_DSP */ #endif /* ndef WIN32 */
#include "expat_external.h" #include "expat_external.h"
#include "internal.h" #include "internal.h"
@ -46,7 +46,7 @@
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
#define UCS2_GET_NAMING(pages, hi, lo) \ #define UCS2_GET_NAMING(pages, hi, lo) \
(namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
/* A 2 byte UTF-8 representation splits the characters 11 bits between /* A 2 byte UTF-8 representation splits the characters 11 bits between
the bottom 5 and 6 bits of the bytes. We need 8 bits to index into the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
@ -56,7 +56,7 @@
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
+ ((((byte)[0]) & 3) << 1) \ + ((((byte)[0]) & 3) << 1) \
+ ((((byte)[1]) >> 5) & 1)] \ + ((((byte)[1]) >> 5) & 1)] \
& (1 << (((byte)[1]) & 0x1F))) & (1u << (((byte)[1]) & 0x1F)))
/* A 3 byte UTF-8 representation splits the characters 16 bits between /* A 3 byte UTF-8 representation splits the characters 16 bits between
the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
@ -69,7 +69,7 @@
<< 3) \ << 3) \
+ ((((byte)[1]) & 3) << 1) \ + ((((byte)[1]) & 3) << 1) \
+ ((((byte)[2]) >> 5) & 1)] \ + ((((byte)[2]) >> 5) & 1)] \
& (1 << (((byte)[2]) & 0x1F))) & (1u << (((byte)[2]) & 0x1F)))
#define UTF8_GET_NAMING(pages, p, n) \ #define UTF8_GET_NAMING(pages, p, n) \
((n) == 2 \ ((n) == 2 \
@ -122,19 +122,19 @@
((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
static int PTRFASTCALL static int PTRFASTCALL
isNever(const ENCODING *enc, const char *p) isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
{ {
return 0; return 0;
} }
static int PTRFASTCALL static int PTRFASTCALL
utf8_isName2(const ENCODING *enc, const char *p) utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
{ {
return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
} }
static int PTRFASTCALL static int PTRFASTCALL
utf8_isName3(const ENCODING *enc, const char *p) utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
{ {
return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
} }
@ -142,13 +142,13 @@ utf8_isName3(const ENCODING *enc, const char *p)
#define utf8_isName4 isNever #define utf8_isName4 isNever
static int PTRFASTCALL static int PTRFASTCALL
utf8_isNmstrt2(const ENCODING *enc, const char *p) utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
{ {
return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
} }
static int PTRFASTCALL static int PTRFASTCALL
utf8_isNmstrt3(const ENCODING *enc, const char *p) utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
{ {
return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
} }
@ -156,19 +156,19 @@ utf8_isNmstrt3(const ENCODING *enc, const char *p)
#define utf8_isNmstrt4 isNever #define utf8_isNmstrt4 isNever
static int PTRFASTCALL static int PTRFASTCALL
utf8_isInvalid2(const ENCODING *enc, const char *p) utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
{ {
return UTF8_INVALID2((const unsigned char *)p); return UTF8_INVALID2((const unsigned char *)p);
} }
static int PTRFASTCALL static int PTRFASTCALL
utf8_isInvalid3(const ENCODING *enc, const char *p) utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
{ {
return UTF8_INVALID3((const unsigned char *)p); return UTF8_INVALID3((const unsigned char *)p);
} }
static int PTRFASTCALL static int PTRFASTCALL
utf8_isInvalid4(const ENCODING *enc, const char *p) utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
{ {
return UTF8_INVALID4((const unsigned char *)p); return UTF8_INVALID4((const unsigned char *)p);
} }
@ -222,6 +222,17 @@ struct normal_encoding {
E ## isInvalid3, \ E ## isInvalid3, \
E ## isInvalid4 E ## isInvalid4
#define NULL_VTABLE \
/* isName2 */ NULL, \
/* isName3 */ NULL, \
/* isName4 */ NULL, \
/* isNmstrt2 */ NULL, \
/* isNmstrt3 */ NULL, \
/* isNmstrt4 */ NULL, \
/* isInvalid2 */ NULL, \
/* isInvalid3 */ NULL, \
/* isInvalid4 */ NULL
static int FASTCALL checkCharRefNumber(int); static int FASTCALL checkCharRefNumber(int);
#include "xmltok_impl.h" #include "xmltok_impl.h"
@ -318,39 +329,89 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
UTF8_cval4 = 0xf0 UTF8_cval4 = 0xf0
}; };
static void PTRCALL void
utf8_toUtf8(const ENCODING *enc, align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
{
const char * fromLim = *fromLimRef;
size_t walked = 0;
for (; fromLim > from; fromLim--, walked++) {
const unsigned char prev = (unsigned char)fromLim[-1];
if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
if (walked + 1 >= 4) {
fromLim += 4 - 1;
break;
} else {
walked = 0;
}
} else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
if (walked + 1 >= 3) {
fromLim += 3 - 1;
break;
} else {
walked = 0;
}
} else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
if (walked + 1 >= 2) {
fromLim += 2 - 1;
break;
} else {
walked = 0;
}
} else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
break;
}
}
*fromLimRef = fromLim;
}
static enum XML_Convert_Result PTRCALL
utf8_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
char **toP, const char *toLim) char **toP, const char *toLim)
{ {
enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
char *to; char *to;
const char *from; const char *from;
if (fromLim - *fromP > toLim - *toP) { if (fromLim - *fromP > toLim - *toP) {
/* Avoid copying partial characters. */ /* Avoid copying partial characters. */
for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) res = XML_CONVERT_OUTPUT_EXHAUSTED;
if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) fromLim = *fromP + (toLim - *toP);
break; align_limit_to_full_utf8_characters(*fromP, &fromLim);
} }
for (to = *toP, from = *fromP; from != fromLim; from++, to++) for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
*to = *from; *to = *from;
*fromP = from; *fromP = from;
*toP = to; *toP = to;
if ((to == toLim) && (from < fromLim))
return XML_CONVERT_OUTPUT_EXHAUSTED;
else
return res;
} }
static void PTRCALL static enum XML_Convert_Result PTRCALL
utf8_toUtf16(const ENCODING *enc, utf8_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim) unsigned short **toP, const unsigned short *toLim)
{ {
enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
unsigned short *to = *toP; unsigned short *to = *toP;
const char *from = *fromP; const char *from = *fromP;
while (from != fromLim && to != toLim) { while (from < fromLim && to < toLim) {
switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
case BT_LEAD2: case BT_LEAD2:
if (fromLim - from < 2) {
res = XML_CONVERT_INPUT_INCOMPLETE;
break;
}
*to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
from += 2; from += 2;
break; break;
case BT_LEAD3: case BT_LEAD3:
if (fromLim - from < 3) {
res = XML_CONVERT_INPUT_INCOMPLETE;
break;
}
*to++ = (unsigned short)(((from[0] & 0xf) << 12) *to++ = (unsigned short)(((from[0] & 0xf) << 12)
| ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
from += 3; from += 3;
@ -358,8 +419,14 @@ utf8_toUtf16(const ENCODING *enc,
case BT_LEAD4: case BT_LEAD4:
{ {
unsigned long n; unsigned long n;
if (to + 1 == toLim) if (toLim - to < 2) {
res = XML_CONVERT_OUTPUT_EXHAUSTED;
goto after; goto after;
}
if (fromLim - from < 4) {
res = XML_CONVERT_INPUT_INCOMPLETE;
goto after;
}
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
| ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
n -= 0x10000; n -= 0x10000;
@ -377,6 +444,7 @@ utf8_toUtf16(const ENCODING *enc,
after: after:
*fromP = from; *fromP = from;
*toP = to; *toP = to;
return res;
} }
#ifdef XML_NS #ifdef XML_NS
@ -425,38 +493,43 @@ static const struct normal_encoding internal_utf8_encoding = {
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
}; };
static void PTRCALL static enum XML_Convert_Result PTRCALL
latin1_toUtf8(const ENCODING *enc, latin1_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
char **toP, const char *toLim) char **toP, const char *toLim)
{ {
for (;;) { for (;;) {
unsigned char c; unsigned char c;
if (*fromP == fromLim) if (*fromP == fromLim)
break; return XML_CONVERT_COMPLETED;
c = (unsigned char)**fromP; c = (unsigned char)**fromP;
if (c & 0x80) { if (c & 0x80) {
if (toLim - *toP < 2) if (toLim - *toP < 2)
break; return XML_CONVERT_OUTPUT_EXHAUSTED;
*(*toP)++ = (char)((c >> 6) | UTF8_cval2); *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
*(*toP)++ = (char)((c & 0x3f) | 0x80); *(*toP)++ = (char)((c & 0x3f) | 0x80);
(*fromP)++; (*fromP)++;
} }
else { else {
if (*toP == toLim) if (*toP == toLim)
break; return XML_CONVERT_OUTPUT_EXHAUSTED;
*(*toP)++ = *(*fromP)++; *(*toP)++ = *(*fromP)++;
} }
} }
} }
static void PTRCALL static enum XML_Convert_Result PTRCALL
latin1_toUtf16(const ENCODING *enc, latin1_toUtf16(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim) unsigned short **toP, const unsigned short *toLim)
{ {
while (*fromP != fromLim && *toP != toLim) while (*fromP < fromLim && *toP < toLim)
*(*toP)++ = (unsigned char)*(*fromP)++; *(*toP)++ = (unsigned char)*(*fromP)++;
if ((*toP == toLim) && (*fromP < fromLim))
return XML_CONVERT_OUTPUT_EXHAUSTED;
else
return XML_CONVERT_COMPLETED;
} }
#ifdef XML_NS #ifdef XML_NS
@ -467,7 +540,7 @@ static const struct normal_encoding latin1_encoding_ns = {
#include "asciitab.h" #include "asciitab.h"
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(sb_) STANDARD_VTABLE(sb_) NULL_VTABLE
}; };
#endif #endif
@ -480,16 +553,21 @@ static const struct normal_encoding latin1_encoding = {
#undef BT_COLON #undef BT_COLON
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(sb_) STANDARD_VTABLE(sb_) NULL_VTABLE
}; };
static void PTRCALL static enum XML_Convert_Result PTRCALL
ascii_toUtf8(const ENCODING *enc, ascii_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
char **toP, const char *toLim) char **toP, const char *toLim)
{ {
while (*fromP != fromLim && *toP != toLim) while (*fromP < fromLim && *toP < toLim)
*(*toP)++ = *(*fromP)++; *(*toP)++ = *(*fromP)++;
if ((*toP == toLim) && (*fromP < fromLim))
return XML_CONVERT_OUTPUT_EXHAUSTED;
else
return XML_CONVERT_COMPLETED;
} }
#ifdef XML_NS #ifdef XML_NS
@ -500,7 +578,7 @@ static const struct normal_encoding ascii_encoding_ns = {
#include "asciitab.h" #include "asciitab.h"
/* BT_NONXML == 0 */ /* BT_NONXML == 0 */
}, },
STANDARD_VTABLE(sb_) STANDARD_VTABLE(sb_) NULL_VTABLE
}; };
#endif #endif
@ -513,7 +591,7 @@ static const struct normal_encoding ascii_encoding = {
#undef BT_COLON #undef BT_COLON
/* BT_NONXML == 0 */ /* BT_NONXML == 0 */
}, },
STANDARD_VTABLE(sb_) STANDARD_VTABLE(sb_) NULL_VTABLE
}; };
static int PTRFASTCALL static int PTRFASTCALL
@ -536,13 +614,14 @@ unicode_byte_type(char hi, char lo)
} }
#define DEFINE_UTF16_TO_UTF8(E) \ #define DEFINE_UTF16_TO_UTF8(E) \
static void PTRCALL \ static enum XML_Convert_Result PTRCALL \
E ## toUtf8(const ENCODING *enc, \ E ## toUtf8(const ENCODING *UNUSED_P(enc), \
const char **fromP, const char *fromLim, \ const char **fromP, const char *fromLim, \
char **toP, const char *toLim) \ char **toP, const char *toLim) \
{ \ { \
const char *from; \ const char *from = *fromP; \
for (from = *fromP; from != fromLim; from += 2) { \ fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
for (; from < fromLim; from += 2) { \
int plane; \ int plane; \
unsigned char lo2; \ unsigned char lo2; \
unsigned char lo = GET_LO(from); \ unsigned char lo = GET_LO(from); \
@ -552,7 +631,7 @@ E ## toUtf8(const ENCODING *enc, \
if (lo < 0x80) { \ if (lo < 0x80) { \
if (*toP == toLim) { \ if (*toP == toLim) { \
*fromP = from; \ *fromP = from; \
return; \ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \ } \
*(*toP)++ = lo; \ *(*toP)++ = lo; \
break; \ break; \
@ -562,7 +641,7 @@ E ## toUtf8(const ENCODING *enc, \
case 0x4: case 0x5: case 0x6: case 0x7: \ case 0x4: case 0x5: case 0x6: case 0x7: \
if (toLim - *toP < 2) { \ if (toLim - *toP < 2) { \
*fromP = from; \ *fromP = from; \
return; \ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \ } \
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
*(*toP)++ = ((lo & 0x3f) | 0x80); \ *(*toP)++ = ((lo & 0x3f) | 0x80); \
@ -570,7 +649,7 @@ E ## toUtf8(const ENCODING *enc, \
default: \ default: \
if (toLim - *toP < 3) { \ if (toLim - *toP < 3) { \
*fromP = from; \ *fromP = from; \
return; \ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \ } \
/* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
*(*toP)++ = ((hi >> 4) | UTF8_cval3); \ *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
@ -580,7 +659,11 @@ E ## toUtf8(const ENCODING *enc, \
case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
if (toLim - *toP < 4) { \ if (toLim - *toP < 4) { \
*fromP = from; \ *fromP = from; \
return; \ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
if (fromLim - from < 4) { \
*fromP = from; \
return XML_CONVERT_INPUT_INCOMPLETE; \
} \ } \
plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
*(*toP)++ = ((plane >> 2) | UTF8_cval4); \ *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
@ -596,20 +679,32 @@ E ## toUtf8(const ENCODING *enc, \
} \ } \
} \ } \
*fromP = from; \ *fromP = from; \
if (from < fromLim) \
return XML_CONVERT_INPUT_INCOMPLETE; \
else \
return XML_CONVERT_COMPLETED; \
} }
#define DEFINE_UTF16_TO_UTF16(E) \ #define DEFINE_UTF16_TO_UTF16(E) \
static void PTRCALL \ static enum XML_Convert_Result PTRCALL \
E ## toUtf16(const ENCODING *enc, \ E ## toUtf16(const ENCODING *UNUSED_P(enc), \
const char **fromP, const char *fromLim, \ const char **fromP, const char *fromLim, \
unsigned short **toP, const unsigned short *toLim) \ unsigned short **toP, const unsigned short *toLim) \
{ \ { \
enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
/* Avoid copying first half only of surrogate */ \ /* Avoid copying first half only of surrogate */ \
if (fromLim - *fromP > ((toLim - *toP) << 1) \ if (fromLim - *fromP > ((toLim - *toP) << 1) \
&& (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
fromLim -= 2; \ fromLim -= 2; \
for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ res = XML_CONVERT_INPUT_INCOMPLETE; \
} \
for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
*(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
if ((*toP == toLim) && (*fromP < fromLim)) \
return XML_CONVERT_OUTPUT_EXHAUSTED; \
else \
return res; \
} }
#define SET2(ptr, ch) \ #define SET2(ptr, ch) \
@ -726,7 +821,7 @@ static const struct normal_encoding little2_encoding_ns = {
#include "asciitab.h" #include "asciitab.h"
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(little2_) STANDARD_VTABLE(little2_) NULL_VTABLE
}; };
#endif #endif
@ -745,7 +840,7 @@ static const struct normal_encoding little2_encoding = {
#undef BT_COLON #undef BT_COLON
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(little2_) STANDARD_VTABLE(little2_) NULL_VTABLE
}; };
#if BYTEORDER != 4321 #if BYTEORDER != 4321
@ -758,7 +853,7 @@ static const struct normal_encoding internal_little2_encoding_ns = {
#include "iasciitab.h" #include "iasciitab.h"
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(little2_) STANDARD_VTABLE(little2_) NULL_VTABLE
}; };
#endif #endif
@ -771,7 +866,7 @@ static const struct normal_encoding internal_little2_encoding = {
#undef BT_COLON #undef BT_COLON
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(little2_) STANDARD_VTABLE(little2_) NULL_VTABLE
}; };
#endif #endif
@ -867,7 +962,7 @@ static const struct normal_encoding big2_encoding_ns = {
#include "asciitab.h" #include "asciitab.h"
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(big2_) STANDARD_VTABLE(big2_) NULL_VTABLE
}; };
#endif #endif
@ -886,7 +981,7 @@ static const struct normal_encoding big2_encoding = {
#undef BT_COLON #undef BT_COLON
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(big2_) STANDARD_VTABLE(big2_) NULL_VTABLE
}; };
#if BYTEORDER != 1234 #if BYTEORDER != 1234
@ -899,7 +994,7 @@ static const struct normal_encoding internal_big2_encoding_ns = {
#include "iasciitab.h" #include "iasciitab.h"
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(big2_) STANDARD_VTABLE(big2_) NULL_VTABLE
}; };
#endif #endif
@ -912,7 +1007,7 @@ static const struct normal_encoding internal_big2_encoding = {
#undef BT_COLON #undef BT_COLON
#include "latin1tab.h" #include "latin1tab.h"
}, },
STANDARD_VTABLE(big2_) STANDARD_VTABLE(big2_) NULL_VTABLE
}; };
#endif #endif
@ -938,7 +1033,7 @@ streqci(const char *s1, const char *s2)
} }
static void PTRCALL static void PTRCALL
initUpdatePosition(const ENCODING *enc, const char *ptr, initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, POSITION *pos) const char *end, POSITION *pos)
{ {
normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
@ -1288,7 +1383,7 @@ unknown_isInvalid(const ENCODING *enc, const char *p)
return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
} }
static void PTRCALL static enum XML_Convert_Result PTRCALL
unknown_toUtf8(const ENCODING *enc, unknown_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
char **toP, const char *toLim) char **toP, const char *toLim)
@ -1299,21 +1394,21 @@ unknown_toUtf8(const ENCODING *enc,
const char *utf8; const char *utf8;
int n; int n;
if (*fromP == fromLim) if (*fromP == fromLim)
break; return XML_CONVERT_COMPLETED;
utf8 = uenc->utf8[(unsigned char)**fromP]; utf8 = uenc->utf8[(unsigned char)**fromP];
n = *utf8++; n = *utf8++;
if (n == 0) { if (n == 0) {
int c = uenc->convert(uenc->userData, *fromP); int c = uenc->convert(uenc->userData, *fromP);
n = XmlUtf8Encode(c, buf); n = XmlUtf8Encode(c, buf);
if (n > toLim - *toP) if (n > toLim - *toP)
break; return XML_CONVERT_OUTPUT_EXHAUSTED;
utf8 = buf; utf8 = buf;
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2)); - (BT_LEAD2 - 2));
} }
else { else {
if (n > toLim - *toP) if (n > toLim - *toP)
break; return XML_CONVERT_OUTPUT_EXHAUSTED;
(*fromP)++; (*fromP)++;
} }
do { do {
@ -1322,13 +1417,13 @@ unknown_toUtf8(const ENCODING *enc,
} }
} }
static void PTRCALL static enum XML_Convert_Result PTRCALL
unknown_toUtf16(const ENCODING *enc, unknown_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim, const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim) unsigned short **toP, const unsigned short *toLim)
{ {
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
while (*fromP != fromLim && *toP != toLim) { while (*fromP < fromLim && *toP < toLim) {
unsigned short c = uenc->utf16[(unsigned char)**fromP]; unsigned short c = uenc->utf16[(unsigned char)**fromP];
if (c == 0) { if (c == 0) {
c = (unsigned short) c = (unsigned short)
@ -1340,6 +1435,11 @@ unknown_toUtf16(const ENCODING *enc,
(*fromP)++; (*fromP)++;
*(*toP)++ = c; *(*toP)++ = c;
} }
if ((*toP == toLim) && (*fromP < fromLim))
return XML_CONVERT_OUTPUT_EXHAUSTED;
else
return XML_CONVERT_COMPLETED;
} }
ENCODING * ENCODING *
@ -1503,7 +1603,7 @@ initScan(const ENCODING * const *encodingTable,
{ {
const ENCODING **encPtr; const ENCODING **encPtr;
if (ptr == end) if (ptr >= end)
return XML_TOK_NONE; return XML_TOK_NONE;
encPtr = enc->encPtr; encPtr = enc->encPtr;
if (ptr + 1 == end) { if (ptr + 1 == end) {

View file

@ -130,6 +130,12 @@ typedef int (PTRCALL *SCANNER)(const ENCODING *,
const char *, const char *,
const char **); const char **);
enum XML_Convert_Result {
XML_CONVERT_COMPLETED = 0,
XML_CONVERT_INPUT_INCOMPLETE = 1,
XML_CONVERT_OUTPUT_EXHAUSTED = 2 /* and therefore potentially input remaining as well */
};
struct encoding { struct encoding {
SCANNER scanners[XML_N_STATES]; SCANNER scanners[XML_N_STATES];
SCANNER literalScanners[XML_N_LITERAL_TYPES]; SCANNER literalScanners[XML_N_LITERAL_TYPES];
@ -158,12 +164,12 @@ struct encoding {
const char *ptr, const char *ptr,
const char *end, const char *end,
const char **badPtr); const char **badPtr);
void (PTRCALL *utf8Convert)(const ENCODING *enc, enum XML_Convert_Result (PTRCALL *utf8Convert)(const ENCODING *enc,
const char **fromP, const char **fromP,
const char *fromLim, const char *fromLim,
char **toP, char **toP,
const char *toLim); const char *toLim);
void (PTRCALL *utf16Convert)(const ENCODING *enc, enum XML_Convert_Result (PTRCALL *utf16Convert)(const ENCODING *enc,
const char **fromP, const char **fromP,
const char *fromLim, const char *fromLim,
unsigned short **toP, unsigned short **toP,

View file

@ -87,27 +87,45 @@
#define PREFIX(ident) ident #define PREFIX(ident) ident
#endif #endif
#define HAS_CHARS(enc, ptr, end, count) \
(end - ptr >= count * MINBPC(enc))
#define HAS_CHAR(enc, ptr, end) \
HAS_CHARS(enc, ptr, end, 1)
#define REQUIRE_CHARS(enc, ptr, end, count) \
{ \
if (! HAS_CHARS(enc, ptr, end, count)) { \
return XML_TOK_PARTIAL; \
} \
}
#define REQUIRE_CHAR(enc, ptr, end) \
REQUIRE_CHARS(enc, ptr, end, 1)
/* ptr points to character following "<!-" */ /* ptr points to character following "<!-" */
static int PTRCALL static int PTRCALL
PREFIX(scanComment)(const ENCODING *enc, const char *ptr, PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr != end) { if (HAS_CHAR(enc, ptr, end)) {
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
ptr += MINBPC(enc); ptr += MINBPC(enc);
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr) INVALID_CASES(ptr, nextTokPtr)
case BT_MINUS: case BT_MINUS:
if ((ptr += MINBPC(enc)) == end) ptr += MINBPC(enc);
return XML_TOK_PARTIAL; REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
if ((ptr += MINBPC(enc)) == end) ptr += MINBPC(enc);
return XML_TOK_PARTIAL; REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
@ -131,8 +149,7 @@ static int PTRCALL
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_MINUS: case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@ -147,11 +164,10 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_PERCNT: case BT_PERCNT:
if (ptr + MINBPC(enc) == end) REQUIRE_CHARS(enc, ptr, end, 2);
return XML_TOK_PARTIAL;
/* don't allow <!ENTITY% foo "whatever"> */ /* don't allow <!ENTITY% foo "whatever"> */
switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
@ -175,7 +191,7 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
} }
static int PTRCALL static int PTRCALL
PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, int *tokPtr) const char *end, int *tokPtr)
{ {
int upper = 0; int upper = 0;
@ -225,15 +241,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
{ {
int tok; int tok;
const char *target = ptr; const char *target = ptr;
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default: default:
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF: case BT_S: case BT_CR: case BT_LF:
@ -242,13 +257,12 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
ptr += MINBPC(enc); ptr += MINBPC(enc);
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr) INVALID_CASES(ptr, nextTokPtr)
case BT_QUEST: case BT_QUEST:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc); *nextTokPtr = ptr + MINBPC(enc);
return tok; return tok;
@ -266,8 +280,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc); *nextTokPtr = ptr + MINBPC(enc);
return tok; return tok;
@ -282,15 +295,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
} }
static int PTRCALL static int PTRCALL
PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
ASCII_T, ASCII_A, ASCII_LSQB }; ASCII_T, ASCII_A, ASCII_LSQB };
int i; int i;
/* CDATA[ */ /* CDATA[ */
if (end - ptr < 6 * MINBPC(enc)) REQUIRE_CHARS(enc, ptr, end, 6);
return XML_TOK_PARTIAL;
for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
*nextTokPtr = ptr; *nextTokPtr = ptr;
@ -305,7 +317,7 @@ static int PTRCALL
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr == end) if (ptr >= end)
return XML_TOK_NONE; return XML_TOK_NONE;
if (MINBPC(enc) > 1) { if (MINBPC(enc) > 1) {
size_t n = end - ptr; size_t n = end - ptr;
@ -319,13 +331,11 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_RSQB: case BT_RSQB:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
break; break;
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr -= MINBPC(enc); ptr -= MINBPC(enc);
break; break;
@ -334,8 +344,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
return XML_TOK_CDATA_SECT_CLOSE; return XML_TOK_CDATA_SECT_CLOSE;
case BT_CR: case BT_CR:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
if (BYTE_TYPE(enc, ptr) == BT_LF) if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc); ptr += MINBPC(enc);
*nextTokPtr = ptr; *nextTokPtr = ptr;
@ -348,7 +357,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
ptr += MINBPC(enc); ptr += MINBPC(enc);
break; break;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \ #define LEAD_CASE(n) \
case BT_LEAD ## n: \ case BT_LEAD ## n: \
@ -383,19 +392,18 @@ static int PTRCALL
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default: default:
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF: case BT_S: case BT_CR: case BT_LF:
for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_CR: case BT_LF: case BT_S: case BT_CR: case BT_LF:
break; break;
@ -432,7 +440,7 @@ static int PTRCALL
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr != end) { if (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT: case BT_DIGIT:
case BT_HEX: case BT_HEX:
@ -441,7 +449,7 @@ PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT: case BT_DIGIT:
case BT_HEX: case BT_HEX:
@ -464,7 +472,7 @@ static int PTRCALL
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
if (ptr != end) { if (HAS_CHAR(enc, ptr, end)) {
if (CHAR_MATCHES(enc, ptr, ASCII_x)) if (CHAR_MATCHES(enc, ptr, ASCII_x))
return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
@ -474,7 +482,7 @@ PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT: case BT_DIGIT:
break; break;
@ -496,8 +504,7 @@ static int PTRCALL
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_NUM: case BT_NUM:
@ -506,7 +513,7 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_SEMI: case BT_SEMI:
@ -529,7 +536,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
#ifdef XML_NS #ifdef XML_NS
int hadColon = 0; int hadColon = 0;
#endif #endif
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XML_NS #ifdef XML_NS
@ -540,8 +547,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
} }
hadColon = 1; hadColon = 1;
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default: default:
@ -555,8 +561,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
int t; int t;
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
t = BYTE_TYPE(enc, ptr); t = BYTE_TYPE(enc, ptr);
if (t == BT_EQUALS) if (t == BT_EQUALS)
break; break;
@ -579,8 +584,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
#endif #endif
for (;;) { for (;;) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
open = BYTE_TYPE(enc, ptr); open = BYTE_TYPE(enc, ptr);
if (open == BT_QUOT || open == BT_APOS) if (open == BT_QUOT || open == BT_APOS)
break; break;
@ -598,8 +602,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
/* in attribute value */ /* in attribute value */
for (;;) { for (;;) {
int t; int t;
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
t = BYTE_TYPE(enc, ptr); t = BYTE_TYPE(enc, ptr);
if (t == open) if (t == open)
break; break;
@ -624,8 +627,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
} }
} }
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_S:
case BT_CR: case BT_CR:
@ -642,8 +644,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to closing quote */ /* ptr points to closing quote */
for (;;) { for (;;) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF: case BT_S: case BT_CR: case BT_LF:
@ -655,8 +656,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
case BT_SOL: case BT_SOL:
sol: sol:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
@ -688,13 +688,12 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
#ifdef XML_NS #ifdef XML_NS
int hadColon; int hadColon;
#endif #endif
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_EXCL: case BT_EXCL:
if ((ptr += MINBPC(enc)) == end) ptr += MINBPC(enc);
return XML_TOK_PARTIAL; REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_MINUS: case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@ -716,7 +715,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
hadColon = 0; hadColon = 0;
#endif #endif
/* we have a start-tag */ /* we have a start-tag */
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XML_NS #ifdef XML_NS
@ -727,8 +726,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
} }
hadColon = 1; hadColon = 1;
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default: default:
@ -740,7 +738,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_S: case BT_CR: case BT_LF: case BT_S: case BT_CR: case BT_LF:
{ {
ptr += MINBPC(enc); ptr += MINBPC(enc);
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_GT: case BT_GT:
@ -765,8 +763,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_SOL: case BT_SOL:
sol: sol:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
@ -785,7 +782,7 @@ static int PTRCALL
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) if (ptr >= end)
return XML_TOK_NONE; return XML_TOK_NONE;
if (MINBPC(enc) > 1) { if (MINBPC(enc) > 1) {
size_t n = end - ptr; size_t n = end - ptr;
@ -803,7 +800,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_CR: case BT_CR:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR; return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF) if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc); ptr += MINBPC(enc);
@ -814,12 +811,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_DATA_NEWLINE; return XML_TOK_DATA_NEWLINE;
case BT_RSQB: case BT_RSQB:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_RSQB; return XML_TOK_TRAILING_RSQB;
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
break; break;
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_RSQB; return XML_TOK_TRAILING_RSQB;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr -= MINBPC(enc); ptr -= MINBPC(enc);
@ -832,7 +829,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc); ptr += MINBPC(enc);
break; break;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \ #define LEAD_CASE(n) \
case BT_LEAD ## n: \ case BT_LEAD ## n: \
@ -845,12 +842,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE #undef LEAD_CASE
case BT_RSQB: case BT_RSQB:
if (ptr + MINBPC(enc) != end) { if (HAS_CHARS(enc, ptr, end, 2)) {
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
break; break;
} }
if (ptr + 2*MINBPC(enc) != end) { if (HAS_CHARS(enc, ptr, end, 3)) {
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
break; break;
@ -884,8 +881,7 @@ static int PTRCALL
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
@ -895,7 +891,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_SEMI: case BT_SEMI:
@ -913,15 +909,14 @@ static int PTRCALL
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default: default:
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_CR: case BT_LF: case BT_S: case BT_CR: case BT_LF: case BT_S:
@ -941,7 +936,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
const char *ptr, const char *end, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
int t = BYTE_TYPE(enc, ptr); int t = BYTE_TYPE(enc, ptr);
switch (t) { switch (t) {
INVALID_CASES(ptr, nextTokPtr) INVALID_CASES(ptr, nextTokPtr)
@ -950,7 +945,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (t != open) if (t != open)
break; break;
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_LITERAL; return -XML_TOK_LITERAL;
*nextTokPtr = ptr; *nextTokPtr = ptr;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
@ -973,7 +968,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr) const char **nextTokPtr)
{ {
int tok; int tok;
if (ptr == end) if (ptr >= end)
return XML_TOK_NONE; return XML_TOK_NONE;
if (MINBPC(enc) > 1) { if (MINBPC(enc) > 1) {
size_t n = end - ptr; size_t n = end - ptr;
@ -992,8 +987,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_LT: case BT_LT:
{ {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_EXCL: case BT_EXCL:
return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@ -1021,7 +1015,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_S: case BT_LF: case BT_S: case BT_LF:
for (;;) { for (;;) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
break; break;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_LF: case BT_S: case BT_LF:
@ -1048,11 +1042,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_OPEN_BRACKET; return XML_TOK_OPEN_BRACKET;
case BT_RSQB: case BT_RSQB:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_CLOSE_BRACKET; return -XML_TOK_CLOSE_BRACKET;
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
if (ptr + MINBPC(enc) == end) REQUIRE_CHARS(enc, ptr, end, 2);
return XML_TOK_PARTIAL;
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
*nextTokPtr = ptr + 2*MINBPC(enc); *nextTokPtr = ptr + 2*MINBPC(enc);
return XML_TOK_COND_SECT_CLOSE; return XML_TOK_COND_SECT_CLOSE;
@ -1065,7 +1058,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_OPEN_PAREN; return XML_TOK_OPEN_PAREN;
case BT_RPAR: case BT_RPAR:
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_CLOSE_PAREN; return -XML_TOK_CLOSE_PAREN;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_AST: case BT_AST:
@ -1141,7 +1134,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr; *nextTokPtr = ptr;
return XML_TOK_INVALID; return XML_TOK_INVALID;
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_GT: case BT_RPAR: case BT_COMMA: case BT_GT: case BT_RPAR: case BT_COMMA:
@ -1154,8 +1147,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc); ptr += MINBPC(enc);
switch (tok) { switch (tok) {
case XML_TOK_NAME: case XML_TOK_NAME:
if (ptr == end) REQUIRE_CHAR(enc, ptr, end);
return XML_TOK_PARTIAL;
tok = XML_TOK_PREFIXED_NAME; tok = XML_TOK_PREFIXED_NAME;
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
@ -1204,10 +1196,12 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
const char *start; const char *start;
if (ptr == end) if (ptr >= end)
return XML_TOK_NONE; return XML_TOK_NONE;
else if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_PARTIAL;
start = ptr; start = ptr;
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \ #define LEAD_CASE(n) \
case BT_LEAD ## n: ptr += n; break; case BT_LEAD ## n: ptr += n; break;
@ -1232,7 +1226,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
case BT_CR: case BT_CR:
if (ptr == start) { if (ptr == start) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR; return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF) if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc); ptr += MINBPC(enc);
@ -1262,10 +1256,12 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr) const char *end, const char **nextTokPtr)
{ {
const char *start; const char *start;
if (ptr == end) if (ptr >= end)
return XML_TOK_NONE; return XML_TOK_NONE;
else if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_PARTIAL;
start = ptr; start = ptr;
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \ #define LEAD_CASE(n) \
case BT_LEAD ## n: ptr += n; break; case BT_LEAD ## n: ptr += n; break;
@ -1294,7 +1290,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
case BT_CR: case BT_CR:
if (ptr == start) { if (ptr == start) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr == end) if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR; return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF) if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc); ptr += MINBPC(enc);
@ -1326,15 +1322,15 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
end = ptr + n; end = ptr + n;
} }
} }
while (ptr != end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr) INVALID_CASES(ptr, nextTokPtr)
case BT_LT: case BT_LT:
if ((ptr += MINBPC(enc)) == end) ptr += MINBPC(enc);
return XML_TOK_PARTIAL; REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
if ((ptr += MINBPC(enc)) == end) ptr += MINBPC(enc);
return XML_TOK_PARTIAL; REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
++level; ++level;
ptr += MINBPC(enc); ptr += MINBPC(enc);
@ -1342,11 +1338,11 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
} }
break; break;
case BT_RSQB: case BT_RSQB:
if ((ptr += MINBPC(enc)) == end) ptr += MINBPC(enc);
return XML_TOK_PARTIAL; REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
if ((ptr += MINBPC(enc)) == end) ptr += MINBPC(enc);
return XML_TOK_PARTIAL; REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (level == 0) { if (level == 0) {
@ -1373,7 +1369,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
{ {
ptr += MINBPC(enc); ptr += MINBPC(enc);
end -= MINBPC(enc); end -= MINBPC(enc);
for (; ptr != end; ptr += MINBPC(enc)) { for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT: case BT_DIGIT:
case BT_HEX: case BT_HEX:
@ -1521,7 +1517,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
} }
static int PTRFASTCALL static int PTRFASTCALL
PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
{ {
int result = 0; int result = 0;
/* skip &# */ /* skip &# */
@ -1565,7 +1561,7 @@ PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
} }
static int PTRCALL static int PTRCALL
PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end) const char *end)
{ {
switch ((end - ptr)/MINBPC(enc)) { switch ((end - ptr)/MINBPC(enc)) {
@ -1683,11 +1679,11 @@ PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
} }
static int PTRCALL static int PTRCALL
PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
const char *end1, const char *ptr2) const char *end1, const char *ptr2)
{ {
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
if (ptr1 == end1) if (end1 - ptr1 < MINBPC(enc))
return 0; return 0;
if (!CHAR_MATCHES(enc, ptr1, *ptr2)) if (!CHAR_MATCHES(enc, ptr1, *ptr2))
return 0; return 0;
@ -1744,7 +1740,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
const char *end, const char *end,
POSITION *pos) POSITION *pos)
{ {
while (ptr < end) { while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) { switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \ #define LEAD_CASE(n) \
case BT_LEAD ## n: \ case BT_LEAD ## n: \
@ -1760,7 +1756,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
case BT_CR: case BT_CR:
pos->lineNumber++; pos->lineNumber++;
ptr += MINBPC(enc); ptr += MINBPC(enc);
if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc); ptr += MINBPC(enc);
pos->columnNumber = (XML_Size)-1; pos->columnNumber = (XML_Size)-1;
break; break;

View file

@ -1191,10 +1191,8 @@ newxmlparseobject(const char *encoding, const char *namespace_separator, PyObjec
Py_DECREF(self); Py_DECREF(self);
return NULL; return NULL;
} }
#if XML_COMBINED_VERSION >= 20100 || defined(XML_HAS_SET_HASH_SALT) #if XML_COMBINED_VERSION >= 20100
/* This feature was added upstream in libexpat 2.1.0. Our expat copy /* This feature was added upstream in libexpat 2.1.0. */
* has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
* to indicate that we can still use it. */
XML_SetHashSalt(self->itself, XML_SetHashSalt(self->itself,
(unsigned long)_Py_HashSecret.expat.hashsalt); (unsigned long)_Py_HashSecret.expat.hashsalt);
#endif #endif