Marc-Andre Lemburg:

The attached patch set includes a workaround to get Python with Unicode compile on BSDI 4.x (courtesy Thomas Wouters; the cause is a bug in the BSDI wchar.h header file) and Python interfaces for the MBCS codec donated by Mark Hammond. Also included are some minor corrections w/r to the docs of the new "es" and "es#" parser markers (use PyMem_Free() instead of free(); thanks to Mark Hammond for finding these). The unicodedata tests are now in a separate file (test_unicodedata.py) to avoid problems if the module cannot be found.
2025-10-08 16:11:51 +00:00 · 2000-03-28 20:29:59 +00:00 · 2000-03-28 20:29:59 +00:00 · 24bdb0474f
commit 24bdb0474f
parent 66d4513975
9 changed files with 116 additions and 56 deletions
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@ -82,6 +82,10 @@ Unicode Integration Proposal (see file Misc/unicode.txt).
 #endif
 #ifdef HAVE_WCHAR_H
 /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
 # ifdef _HAVE_BSDI
 #  include <time.h>
 # endif
 # include "wchar.h"
 #endif
@ -562,7 +566,9 @@ extern DL_IMPORT(PyObject *) PyUnicode_TranslateCharmap(
    );
 #ifdef MS_WIN32
 /* --- MBCS codecs for Windows -------------------------------------------- */
 extern DL_IMPORT(PyObject*) PyUnicode_DecodeMBCS(
    const char *string,         /* MBCS encoded string */
    int length,                 /* size of string */
@ -579,8 +585,8 @@ extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCS(
    const char *errors          /* error handling */
    );
 #endif /* MS_WIN32 */
 /* --- Methods & Slots ----------------------------------------------------
   These are capable of handling Unicode objects and strings on input
--- a/Lib/encodings/mbcs.py
+++ b/Lib/encodings/mbcs.py
@ -34,4 +34,3 @@ class StreamConverter(StreamWriter,StreamReader):
 def getregentry():
    return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
--- a/Lib/test/output/test_unicode
+++ b/Lib/test/output/test_unicode
@ -1,4 +1,5 @@
 test_unicode
 Testing Unicode comparisons... done.
 Testing Unicode contains method... done.
 Testing Unicode formatting strings... done.
-Testing unicodedata module... done.
+Testing builtin codecs... done.
--- a/Lib/test/output/test_unicodedata
+++ b/Lib/test/output/test_unicodedata
@ -0,0 +1,2 @@
 test_unicodedata
 Testing unicodedata module... done.
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@ -1,6 +1,5 @@
 """ Test script for the Unicode implementation.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
@ -250,50 +249,6 @@ assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
 assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
 print 'done.'
 # Test Unicode database APIs
 try:
    import unicodedata
 except ImportError:
    pass
 else:
    print 'Testing unicodedata module...',
    assert unicodedata.digit(u'A',None) is None
    assert unicodedata.digit(u'9') == 9
    assert unicodedata.digit(u'\u215b',None) is None
    assert unicodedata.digit(u'\u2468') == 9
    assert unicodedata.numeric(u'A',None) is None
    assert unicodedata.numeric(u'9') == 9
    assert unicodedata.numeric(u'\u215b') == 0.125
    assert unicodedata.numeric(u'\u2468') == 9.0
    assert unicodedata.decimal(u'A',None) is None
    assert unicodedata.decimal(u'9') == 9
    assert unicodedata.decimal(u'\u215b',None) is None
    assert unicodedata.decimal(u'\u2468',None) is None
    assert unicodedata.category(u'\uFFFE') == 'Cn'
    assert unicodedata.category(u'a') == 'Ll'
    assert unicodedata.category(u'A') == 'Lu'
    assert unicodedata.bidirectional(u'\uFFFE') == ''
    assert unicodedata.bidirectional(u' ') == 'WS'
    assert unicodedata.bidirectional(u'A') == 'L'
    assert unicodedata.decomposition(u'\uFFFE') == ''
    assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
    assert unicodedata.mirrored(u'\uFFFE') == 0
    assert unicodedata.mirrored(u'a') == 0
    assert unicodedata.mirrored(u'\u2201') == 1
    assert unicodedata.combining(u'\uFFFE') == 0
    assert unicodedata.combining(u'a') == 0
    assert unicodedata.combining(u'\u20e1') == 230
    print 'done.'
 # Test builtin codecs
 print 'Testing builtin codecs...',
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@ -0,0 +1,50 @@
 """ Test script for the unicodedata module.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 """#"
 from test_support import verbose
 import sys
 # Test Unicode database APIs
 import unicodedata
 print 'Testing unicodedata module...',
 assert unicodedata.digit(u'A',None) is None
 assert unicodedata.digit(u'9') == 9
 assert unicodedata.digit(u'\u215b',None) is None
 assert unicodedata.digit(u'\u2468') == 9
 assert unicodedata.numeric(u'A',None) is None
 assert unicodedata.numeric(u'9') == 9
 assert unicodedata.numeric(u'\u215b') == 0.125
 assert unicodedata.numeric(u'\u2468') == 9.0
 assert unicodedata.decimal(u'A',None) is None
 assert unicodedata.decimal(u'9') == 9
 assert unicodedata.decimal(u'\u215b',None) is None
 assert unicodedata.decimal(u'\u2468',None) is None
 assert unicodedata.category(u'\uFFFE') == 'Cn'
 assert unicodedata.category(u'a') == 'Ll'
 assert unicodedata.category(u'A') == 'Lu'
 assert unicodedata.bidirectional(u'\uFFFE') == ''
 assert unicodedata.bidirectional(u' ') == 'WS'
 assert unicodedata.bidirectional(u'A') == 'L'
 assert unicodedata.decomposition(u'\uFFFE') == ''
 assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
 assert unicodedata.mirrored(u'\uFFFE') == 0
 assert unicodedata.mirrored(u'a') == 0
 assert unicodedata.mirrored(u'\u2201') == 1
 assert unicodedata.combining(u'\uFFFE') == 0
 assert unicodedata.combining(u'a') == 0
 assert unicodedata.combining(u'\u20e1') == 230
 print 'done.'
--- a/Misc/unicode.txt
+++ b/Misc/unicode.txt
@ -740,8 +740,8 @@ These markers are used by the PyArg_ParseTuple() APIs:
 	On output, a buffer of the needed size is allocated and
 	returned through *buffer as NULL-terminated string.
 	The encoded may not contain embedded NULL characters.
-	The caller is responsible for free()ing the allocated *buffer
+	The caller is responsible for calling PyMem_Free()
-	after usage.
+	to free the allocated *buffer after usage.
  "es#":
 	Takes three parameters: encoding (const char *),
@ -755,8 +755,9 @@ These markers are used by the PyArg_ParseTuple() APIs:
 	If *buffer is NULL, a buffer of the needed size is
 	allocated and output copied into it. *buffer is then
-	updated to point to the allocated memory area. The caller
+	updated to point to the allocated memory area.
-	is responsible for free()ing *buffer after usage.
+	The caller is responsible for calling PyMem_Free()
 	to free the allocated *buffer after usage.
 	In both cases *buffer_len is updated to the number of
 	characters written (excluding the trailing NULL-byte).
@ -784,7 +785,7 @@ Using "es#" with auto-allocation:
 	    return NULL;
 	}
 	str = PyString_FromStringAndSize(buffer, buffer_len);
-	free(buffer);
+	PyMem_Free(buffer);
 	return str;
    }
@ -807,7 +808,7 @@ Using "es" with auto-allocation returning a NULL-terminated string:
 	    return NULL;
 	}
 	str = PyString_FromString(buffer);
-	free(buffer);
+	PyMem_Free(buffer);
 	return str;
    }
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@ -286,6 +286,26 @@ charmap_decode(PyObject *self,
 		       size);
 }
 #ifdef MS_WIN32
 static PyObject *
 mbcs_decode(PyObject *self,
 	    PyObject *args)
 {
    const char *data;
    int size;
    const char *errors = NULL;
    if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
 			  &data, &size, &errors))
 	return NULL;
    return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
 		       size);
 }
 #endif /* MS_WIN32 */
 /* --- Encoder ------------------------------------------------------------ */
 static PyObject *
@ -491,6 +511,28 @@ charmap_encode(PyObject *self,
 		       PyUnicode_GET_SIZE(str));
 }
 #ifdef MS_WIN32
 static PyObject *
 mbcs_encode(PyObject *self,
 	    PyObject *args)
 {
    PyObject *str;
    const char *errors = NULL;
    if (!PyArg_ParseTuple(args, "U|z:mbcs_encode",
 			  &str, &errors))
 	return NULL;
    return codec_tuple(PyUnicode_EncodeMBCS(
 			       PyUnicode_AS_UNICODE(str), 
 			       PyUnicode_GET_SIZE(str),
 			       errors),
 		       PyUnicode_GET_SIZE(str));
 }
 #endif /* MS_WIN32 */
 /* --- Module API --------------------------------------------------------- */
 static PyMethodDef _codecs_functions[] = {
@ -519,6 +561,10 @@ static PyMethodDef _codecs_functions[] = {
    {"charmap_decode", 		charmap_decode,			1},
    {"readbuffer_encode",	readbuffer_encode,		1},
    {"charbuffer_encode",	charbuffer_encode,		1},
 #ifdef MS_WIN32
    {"mbcs_encode", 		mbcs_encode,			1},
    {"mbcs_decode", 		mbcs_decode,			1},
 #endif
    {NULL, NULL}		/* sentinel */
 };
--- a/Python/getargs.c
+++ b/Python/getargs.c
@ -704,7 +704,7 @@ convertsimple1(arg, p_format, p_va)
 				   the data copied into it; *buffer is
 				   updated to point to the new buffer;
 				   the caller is responsible for
-				   free()ing it after usage
+				   PyMem_Free()ing it after usage
 				   - if *buffer is not NULL, the data
 				   is copied to *buffer; *buffer_len
@ -752,7 +752,7 @@ convertsimple1(arg, p_format, p_va)
 				   is allocated and the data copied
 				   into it; *buffer is updated to
 				   point to the new buffer; the caller
-				   is responsible for free()ing it
+				   is responsible for PyMem_Free()ing it
 				   after usage
 				 */
`@ -34,4 +34,3 @@ class StreamConverter(StreamWriter,StreamReader):`
	`def getregentry():`	`def getregentry():`

	`return (Codec.encode,Codec.decode,StreamReader,StreamWriter)`	`return (Codec.encode,Codec.decode,StreamReader,StreamWriter)`
		`@ -0,0 +1,2 @@`
							`test_unicodedata`
							`Testing unicodedata module... done.`