gh-129569: The function unicodedata.normalize() always returns built-in str (#129570)

Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
Hizuru 2025-02-21 22:51:13 +09:00 committed by GitHub
parent 9bf73c032f
commit c359fcd2f5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 29 additions and 5 deletions

View file

@ -467,6 +467,29 @@ class NormalizationTest(unittest.TestCase):
# Check for bug 834676
unicodedata.normalize('NFC', '\ud55c\uae00')
def test_normalize_return_type(self):
# gh-129569: normalize() return type must always be str
normalize = unicodedata.normalize
class MyStr(str):
pass
normalization_forms = ("NFC", "NFKC", "NFD", "NFKD")
input_strings = (
# normalized strings
"",
"ascii",
# unnormalized strings
"\u1e0b\u0323",
"\u0071\u0307\u0323",
)
for form in normalization_forms:
for input_str in input_strings:
with self.subTest(form=form, input_str=input_str):
self.assertIs(type(normalize(form, input_str)), str)
self.assertIs(type(normalize(form, MyStr(input_str))), str)
if __name__ == "__main__":
unittest.main()

View file

@ -0,0 +1 @@
Fix :func:`unicodedata.normalize` to always return a built-in :class:`str` object when given an input of a :class:`str` subclass, regardless of whether the string is already normalized.

View file

@ -933,34 +933,34 @@ unicodedata_UCD_normalize_impl(PyObject *self, PyObject *form,
if (PyUnicode_GET_LENGTH(input) == 0) {
/* Special case empty input strings, since resizing
them later would cause internal errors. */
return Py_NewRef(input);
return PyUnicode_FromObject(input);
}
if (PyUnicode_CompareWithASCIIString(form, "NFC") == 0) {
if (is_normalized_quickcheck(self, input,
true, false, true) == YES) {
return Py_NewRef(input);
return PyUnicode_FromObject(input);
}
return nfc_nfkc(self, input, 0);
}
if (PyUnicode_CompareWithASCIIString(form, "NFKC") == 0) {
if (is_normalized_quickcheck(self, input,
true, true, true) == YES) {
return Py_NewRef(input);
return PyUnicode_FromObject(input);
}
return nfc_nfkc(self, input, 1);
}
if (PyUnicode_CompareWithASCIIString(form, "NFD") == 0) {
if (is_normalized_quickcheck(self, input,
false, false, true) == YES) {
return Py_NewRef(input);
return PyUnicode_FromObject(input);
}
return nfd_nfkd(self, input, 0);
}
if (PyUnicode_CompareWithASCIIString(form, "NFKD") == 0) {
if (is_normalized_quickcheck(self, input,
false, true, true) == YES) {
return Py_NewRef(input);
return PyUnicode_FromObject(input);
}
return nfd_nfkd(self, input, 1);
}