Issue 4474: On platforms with sizeof(wchar_t) == 4 and

sizeof(Py_UNICODE) == 2, PyUnicode_FromWideChar now converts each character outside the BMP to the appropriate surrogate pair. Thanks Victor Stinner for the patch.
2025-12-15 21:44:50 +00:00 · 2009-03-18 14:47:41 +00:00 · 2009-03-18 14:47:41 +00:00 · 081dfee4f1
commit 081dfee4f1
parent ecdfd513a2
3 changed files with 113 additions and 0 deletions
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@ -707,6 +707,50 @@ test_Z_code(PyObject *self)
 	Py_RETURN_NONE;
 }

+static PyObject *
+test_widechar(PyObject *self)
+{
+#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
+	const wchar_t wtext[2] = {(wchar_t)0x10ABCDu};
+	size_t wtextlen = 1;
+#else
+	const wchar_t wtext[3] = {(wchar_t)0xDBEAu, (wchar_t)0xDFCDu};
+	size_t wtextlen = 2;
+#endif
+	PyObject *wide, *utf8;
+
+	wide = PyUnicode_FromWideChar(wtext, wtextlen);
+	if (wide == NULL)
+		return NULL;
+
+	utf8 = PyUnicode_FromString("\xf4\x8a\xaf\x8d");
+	if (utf8 == NULL) {
+		Py_DECREF(wide);
+		return NULL;
+	}
+
+	if (PyUnicode_GET_SIZE(wide) != PyUnicode_GET_SIZE(utf8)) {
+		Py_DECREF(wide);
+		Py_DECREF(utf8);
+		return raiseTestError("test_widechar",
+				      "wide string and utf8 string "
+				      "have different length");
+	}
+	if (PyUnicode_Compare(wide, utf8)) {
+		Py_DECREF(wide);
+		Py_DECREF(utf8);
+		if (PyErr_Occurred())
+			return NULL;
+		return raiseTestError("test_widechar",
+				      "wide string and utf8 string "
+				      "are different");
+	}
+
+	Py_DECREF(wide);
+	Py_DECREF(utf8);
+	Py_RETURN_NONE;
+}
+
 static PyObject *
 test_empty_argparse(PyObject *self)
 {
@ -1206,6 +1250,7 @@ static PyMethodDef TestMethods[] = {
 	{"test_s_code",		(PyCFunction)test_s_code,	 METH_NOARGS},
 	{"test_u_code",		(PyCFunction)test_u_code,	 METH_NOARGS},
 	{"test_Z_code",		(PyCFunction)test_Z_code,	 METH_NOARGS},
+ 	{"test_widechar",	(PyCFunction)test_widechar,	 METH_NOARGS},
 #ifdef WITH_THREAD
 	{"_test_thread_state",  test_thread_state, 		 METH_VARARGS},
 	{"_pending_threadfunc",	pending_threadfunc,		 METH_VARARGS},