Support using UCS-4 as the Py_UNICODE type:

Add configure option --enable-unicode.
Add config.h macros Py_USING_UNICODE, PY_UNICODE_TYPE, Py_UNICODE_SIZE,
                    SIZEOF_WCHAR_T.
Define Py_UCS2.
Encode and decode large UTF-8 characters into single Py_UNICODE values
for wide Unicode types; likewise for UTF-16.
Remove test whether sizeof Py_UNICODE is two.
This commit is contained in:
Martin v. Löwis 2001-06-26 22:22:37 +00:00
parent ff1cc902fe
commit 0ba70cc3c8
7 changed files with 667 additions and 473 deletions

View file

@ -60,16 +60,9 @@ Copyright (c) Corporation for National Research Initiatives.
/* experimental UCS-4 support. enable at your own risk! */
#undef USE_UCS4_STORAGE
/*
* Use this typedef when you need to represent a UTF-16 surrogate pair
* as single unsigned integer.
*/
#if SIZEOF_INT >= 4
typedef unsigned int Py_UCS4;
#elif SIZEOF_LONG >= 4
typedef unsigned long Py_UCS4;
#endif
#if Py_UNICODE_SIZE == 4
#define USE_UCS4_STORAGE
#endif
/* Set these flags if the platform has "wchar.h", "wctype.h" and the
wchar_t type is a 16-bit unsigned type */
@ -77,11 +70,16 @@ typedef unsigned long Py_UCS4;
/* #define HAVE_USABLE_WCHAR_T */
/* Defaults for various platforms */
#ifndef HAVE_USABLE_WCHAR_T
#ifndef PY_UNICODE_TYPE
/* Windows has a usable wchar_t type (unless we're using UCS-4) */
# if defined(MS_WIN32) && !defined(USE_UCS4_STORAGE)
# define HAVE_USABLE_WCHAR_T
# define PY_UNICODE_TYPE wchar_t
# endif
# if defined(USE_UCS4_STORAGE)
# define PY_UNICODE_TYPE Py_UCS4
# endif
#endif
@ -104,28 +102,23 @@ typedef unsigned long Py_UCS4;
# include "wchar.h"
#endif
#ifdef HAVE_USABLE_WCHAR_T
/* If the compiler defines whcar_t as a 16-bit unsigned type we can
use the compiler type directly. Works fine with all modern Windows
platforms. */
typedef wchar_t Py_UNICODE;
#else
/* Use if you have a standard ANSI compiler, without wchar_t support.
If a short is not 16 bits on your platform, you have to fix the
typedef below, or the module initialization code will complain. */
#ifdef USE_UCS4_STORAGE
typedef Py_UCS4 Py_UNICODE;
#else
typedef unsigned short Py_UNICODE;
/*
* Use this typedef when you need to represent a UTF-16 surrogate pair
* as single unsigned integer.
*/
#if SIZEOF_INT >= 4
typedef unsigned int Py_UCS4;
#elif SIZEOF_LONG >= 4
typedef unsigned long Py_UCS4;
#endif
#endif
#if SIZEOF_SHORT == 2
typedef unsigned short Py_UCS2;
#else
#error Cannot find a two-byte type
#endif
typedef PY_UNICODE_TYPE Py_UNICODE;
/* --- Internal Unicode Operations ---------------------------------------- */