mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00
gh-129569: The function unicodedata.normalize() always returns built-in str (#129570)
Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
parent
9bf73c032f
commit
c359fcd2f5
3 changed files with 29 additions and 5 deletions
|
@ -467,6 +467,29 @@ class NormalizationTest(unittest.TestCase):
|
|||
# Check for bug 834676
|
||||
unicodedata.normalize('NFC', '\ud55c\uae00')
|
||||
|
||||
def test_normalize_return_type(self):
|
||||
# gh-129569: normalize() return type must always be str
|
||||
normalize = unicodedata.normalize
|
||||
|
||||
class MyStr(str):
|
||||
pass
|
||||
|
||||
normalization_forms = ("NFC", "NFKC", "NFD", "NFKD")
|
||||
input_strings = (
|
||||
# normalized strings
|
||||
"",
|
||||
"ascii",
|
||||
# unnormalized strings
|
||||
"\u1e0b\u0323",
|
||||
"\u0071\u0307\u0323",
|
||||
)
|
||||
|
||||
for form in normalization_forms:
|
||||
for input_str in input_strings:
|
||||
with self.subTest(form=form, input_str=input_str):
|
||||
self.assertIs(type(normalize(form, input_str)), str)
|
||||
self.assertIs(type(normalize(form, MyStr(input_str))), str)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Fix :func:`unicodedata.normalize` to always return a built-in :class:`str` object when given an input of a :class:`str` subclass, regardless of whether the string is already normalized.
|
|
@ -933,34 +933,34 @@ unicodedata_UCD_normalize_impl(PyObject *self, PyObject *form,
|
|||
if (PyUnicode_GET_LENGTH(input) == 0) {
|
||||
/* Special case empty input strings, since resizing
|
||||
them later would cause internal errors. */
|
||||
return Py_NewRef(input);
|
||||
return PyUnicode_FromObject(input);
|
||||
}
|
||||
|
||||
if (PyUnicode_CompareWithASCIIString(form, "NFC") == 0) {
|
||||
if (is_normalized_quickcheck(self, input,
|
||||
true, false, true) == YES) {
|
||||
return Py_NewRef(input);
|
||||
return PyUnicode_FromObject(input);
|
||||
}
|
||||
return nfc_nfkc(self, input, 0);
|
||||
}
|
||||
if (PyUnicode_CompareWithASCIIString(form, "NFKC") == 0) {
|
||||
if (is_normalized_quickcheck(self, input,
|
||||
true, true, true) == YES) {
|
||||
return Py_NewRef(input);
|
||||
return PyUnicode_FromObject(input);
|
||||
}
|
||||
return nfc_nfkc(self, input, 1);
|
||||
}
|
||||
if (PyUnicode_CompareWithASCIIString(form, "NFD") == 0) {
|
||||
if (is_normalized_quickcheck(self, input,
|
||||
false, false, true) == YES) {
|
||||
return Py_NewRef(input);
|
||||
return PyUnicode_FromObject(input);
|
||||
}
|
||||
return nfd_nfkd(self, input, 0);
|
||||
}
|
||||
if (PyUnicode_CompareWithASCIIString(form, "NFKD") == 0) {
|
||||
if (is_normalized_quickcheck(self, input,
|
||||
false, true, true) == YES) {
|
||||
return Py_NewRef(input);
|
||||
return PyUnicode_FromObject(input);
|
||||
}
|
||||
return nfd_nfkd(self, input, 1);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue