bpo-43667: Fix broken Unicode encoding in non-UTF locales on Solaris (GH-25096)

This commit is contained in:
Jakub Kulík 2021-04-30 15:21:42 +02:00 committed by GitHub
parent 4908fae3d5
commit 9032cf5cb1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 194 additions and 0 deletions

View file

@ -57,6 +57,10 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <windows.h>
#endif
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
#include "pycore_fileutils.h" // _Py_LocaleUsesNonUnicodeWchar()
#endif
/* Uncomment to display statistics on interned strings at exit
in _PyUnicode_ClearInterned(). */
/* #define INTERNED_STATS 1 */
@ -2217,6 +2221,20 @@ PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size)
if (size == 0)
_Py_RETURN_UNICODE_EMPTY();
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
/* Oracle Solaris uses non-Unicode internal wchar_t form for
non-Unicode locales and hence needs conversion to UCS-4 first. */
if (_Py_LocaleUsesNonUnicodeWchar()) {
wchar_t* converted = _Py_DecodeNonUnicodeWchar(u, size);
if (!converted) {
return NULL;
}
PyObject *unicode = _PyUnicode_FromUCS4(converted, size);
PyMem_Free(converted);
return unicode;
}
#endif
/* Single character Unicode objects in the Latin-1 range are
shared when using this constructor */
if (size == 1 && (Py_UCS4)*u < 256)
@ -3295,6 +3313,17 @@ PyUnicode_AsWideChar(PyObject *unicode,
res = size;
}
unicode_copy_as_widechar(unicode, w, size);
#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
/* Oracle Solaris uses non-Unicode internal wchar_t form for
non-Unicode locales and hence needs conversion first. */
if (_Py_LocaleUsesNonUnicodeWchar()) {
if (_Py_EncodeNonUnicodeWchar_InPlace(w, size) < 0) {
return -1;
}
}
#endif
return res;
}
@ -3321,6 +3350,17 @@ PyUnicode_AsWideCharString(PyObject *unicode,
return NULL;
}
unicode_copy_as_widechar(unicode, buffer, buflen + 1);
#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
/* Oracle Solaris uses non-Unicode internal wchar_t form for
non-Unicode locales and hence needs conversion first. */
if (_Py_LocaleUsesNonUnicodeWchar()) {
if (_Py_EncodeNonUnicodeWchar_InPlace(buffer, (buflen + 1)) < 0) {
return NULL;
}
}
#endif
if (size != NULL) {
*size = buflen;
}