mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
[3.9] bpo-43667: Fix broken Unicode encoding in non-UTF locales on Solaris (GH-25096) (GH-25847)
(cherry picked from commit 9032cf5cb1
)
Co-authored-by: Jakub Kulík <Kulikjak@gmail.com>
This commit is contained in:
parent
0593ae84af
commit
d3cc68900d
6 changed files with 194 additions and 0 deletions
|
@ -56,6 +56,10 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
|
||||
#include "pycore_fileutils.h" // _Py_LocaleUsesNonUnicodeWchar()
|
||||
#endif
|
||||
|
||||
/* Uncomment to display statistics on interned strings at exit when
|
||||
using Valgrind or Insecure++. */
|
||||
/* #define INTERNED_STATS 1 */
|
||||
|
@ -2211,6 +2215,20 @@ PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size)
|
|||
if (size == 0)
|
||||
_Py_RETURN_UNICODE_EMPTY();
|
||||
|
||||
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
|
||||
/* Oracle Solaris uses non-Unicode internal wchar_t form for
|
||||
non-Unicode locales and hence needs conversion to UCS-4 first. */
|
||||
if (_Py_LocaleUsesNonUnicodeWchar()) {
|
||||
wchar_t* converted = _Py_DecodeNonUnicodeWchar(u, size);
|
||||
if (!converted) {
|
||||
return NULL;
|
||||
}
|
||||
PyObject *unicode = _PyUnicode_FromUCS4(converted, size);
|
||||
PyMem_Free(converted);
|
||||
return unicode;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Single character Unicode objects in the Latin-1 range are
|
||||
shared when using this constructor */
|
||||
if (size == 1 && (Py_UCS4)*u < 256)
|
||||
|
@ -3223,6 +3241,17 @@ PyUnicode_AsWideChar(PyObject *unicode,
|
|||
res = size;
|
||||
}
|
||||
unicode_copy_as_widechar(unicode, w, size);
|
||||
|
||||
#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
|
||||
/* Oracle Solaris uses non-Unicode internal wchar_t form for
|
||||
non-Unicode locales and hence needs conversion first. */
|
||||
if (_Py_LocaleUsesNonUnicodeWchar()) {
|
||||
if (_Py_EncodeNonUnicodeWchar_InPlace(w, size) < 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -3249,6 +3278,17 @@ PyUnicode_AsWideCharString(PyObject *unicode,
|
|||
return NULL;
|
||||
}
|
||||
unicode_copy_as_widechar(unicode, buffer, buflen + 1);
|
||||
|
||||
#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
|
||||
/* Oracle Solaris uses non-Unicode internal wchar_t form for
|
||||
non-Unicode locales and hence needs conversion first. */
|
||||
if (_Py_LocaleUsesNonUnicodeWchar()) {
|
||||
if (_Py_EncodeNonUnicodeWchar_InPlace(buffer, (buflen + 1)) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (size != NULL) {
|
||||
*size = buflen;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue