gh-99593: Add tests for Unicode C API (part 1) (GH-99651)

Add tests for functions corresponding to the str class methods.
2025-11-25 04:34:37 +00:00 · 2022-11-29 09:59:56 +02:00 · 2022-11-29 09:59:56 +02:00 · deaa8dee48
commit deaa8dee48
parent 8f2fb7dfe7
3 changed files with 777 additions and 47 deletions
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@ -9,10 +9,35 @@ except ImportError:
    _testcapi = None


+NULL = None
+
+class Str(str):
+    pass
+
+
 class CAPITest(unittest.TestCase):

-    # Test PyUnicode_FromFormat()
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_fromobject(self):
+        """Test PyUnicode_FromObject()"""
+        from _testcapi import unicode_fromobject as fromobject
+
+        for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
+                  'a\ud800b\udfffc', '\ud834\udd1e']:
+            self.assertEqual(fromobject(s), s)
+            o = Str(s)
+            s2 = fromobject(o)
+            self.assertEqual(s2, s)
+            self.assertIs(type(s2), str)
+            self.assertIsNot(s2, s)
+
+        self.assertRaises(TypeError, fromobject, b'abc')
+        self.assertRaises(TypeError, fromobject, [])
+        # CRASHES fromobject(NULL)
+
    def test_from_format(self):
+        """Test PyUnicode_FromFormat()"""
        import_helper.import_module('ctypes')
        from ctypes import (
            c_char_p,
@ -268,10 +293,10 @@ class CAPITest(unittest.TestCase):
        self.assertRaisesRegex(SystemError, 'invalid format string',
            PyUnicode_FromFormat, b'%+i', c_int(10))

-    # Test PyUnicode_AsWideChar()
    @support.cpython_only
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
    def test_aswidechar(self):
+        """Test PyUnicode_AsWideChar()"""
        from _testcapi import unicode_aswidechar
        import_helper.import_module('ctypes')
        from ctypes import c_wchar, sizeof
@ -307,10 +332,10 @@ class CAPITest(unittest.TestCase):
        self.assertEqual(size, nchar)
        self.assertEqual(wchar, nonbmp + '\0')

-    # Test PyUnicode_AsWideCharString()
    @support.cpython_only
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
    def test_aswidecharstring(self):
+        """Test PyUnicode_AsWideCharString()"""
        from _testcapi import unicode_aswidecharstring
        import_helper.import_module('ctypes')
        from ctypes import c_wchar, sizeof
@ -332,10 +357,10 @@ class CAPITest(unittest.TestCase):
        self.assertEqual(size, nchar)
        self.assertEqual(wchar, nonbmp + '\0')

-    # Test PyUnicode_AsUCS4()
    @support.cpython_only
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
    def test_asucs4(self):
+        """Test PyUnicode_AsUCS4()"""
        from _testcapi import unicode_asucs4
        for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
                  'a\ud800b\udfffc', '\ud834\udd1e']:
@ -350,10 +375,10 @@ class CAPITest(unittest.TestCase):
            self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0')
            self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff')

-    # Test PyUnicode_AsUTF8()
    @support.cpython_only
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
    def test_asutf8(self):
+        """Test PyUnicode_AsUTF8()"""
        from _testcapi import unicode_asutf8

        bmp = '\u0100'
@ -365,10 +390,10 @@ class CAPITest(unittest.TestCase):
        self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf')
        self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc')

-    # Test PyUnicode_AsUTF8AndSize()
    @support.cpython_only
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
    def test_asutf8andsize(self):
+        """Test PyUnicode_AsUTF8AndSize()"""
        from _testcapi import unicode_asutf8andsize

        bmp = '\u0100'
@ -380,54 +405,275 @@ class CAPITest(unittest.TestCase):
        self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
        self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')

-    # Test PyUnicode_Count()
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_concat(self):
+        """Test PyUnicode_Concat()"""
+        from _testcapi import unicode_concat as concat
+
+        self.assertEqual(concat('abc', 'def'), 'abcdef')
+        self.assertEqual(concat('abc', 'где'), 'abcгде')
+        self.assertEqual(concat('абв', 'def'), 'абвdef')
+        self.assertEqual(concat('абв', 'где'), 'абвгде')
+        self.assertEqual(concat('a\0b', 'c\0d'), 'a\0bc\0d')
+
+        self.assertRaises(TypeError, concat, b'abc', 'def')
+        self.assertRaises(TypeError, concat, 'abc', b'def')
+        self.assertRaises(TypeError, concat, b'abc', b'def')
+        self.assertRaises(TypeError, concat, [], 'def')
+        self.assertRaises(TypeError, concat, 'abc', [])
+        self.assertRaises(TypeError, concat, [], [])
+        # CRASHES concat(NULL, 'def')
+        # CRASHES concat('abc', NULL)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_split(self):
+        """Test PyUnicode_Split()"""
+        from _testcapi import unicode_split as split
+
+        self.assertEqual(split('a|b|c|d', '|'), ['a', 'b', 'c', 'd'])
+        self.assertEqual(split('a|b|c|d', '|', 2), ['a', 'b', 'c|d'])
+        self.assertEqual(split('a|b|c|d', '\u20ac'), ['a|b|c|d'])
+        self.assertEqual(split('a||b|c||d', '||'), ['a', 'b|c', 'd'])
+        self.assertEqual(split('а|б|в|г', '|'), ['а', 'б', 'в', 'г'])
+        self.assertEqual(split('абабагаламага', 'а'),
+                         ['', 'б', 'б', 'г', 'л', 'м', 'г', ''])
+        self.assertEqual(split(' a\tb\nc\rd\ve\f', NULL),
+                         ['a', 'b', 'c', 'd', 'e'])
+        self.assertEqual(split('a\x85b\xa0c\u1680d\u2000e', NULL),
+                         ['a', 'b', 'c', 'd', 'e'])
+
+        self.assertRaises(ValueError, split, 'a|b|c|d', '')
+        self.assertRaises(TypeError, split, 'a|b|c|d', ord('|'))
+        self.assertRaises(TypeError, split, [], '|')
+        # CRASHES split(NULL, '|')
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_rsplit(self):
+        """Test PyUnicode_RSplit()"""
+        from _testcapi import unicode_rsplit as rsplit
+
+        self.assertEqual(rsplit('a|b|c|d', '|'), ['a', 'b', 'c', 'd'])
+        self.assertEqual(rsplit('a|b|c|d', '|', 2), ['a|b', 'c', 'd'])
+        self.assertEqual(rsplit('a|b|c|d', '\u20ac'), ['a|b|c|d'])
+        self.assertEqual(rsplit('a||b|c||d', '||'), ['a', 'b|c', 'd'])
+        self.assertEqual(rsplit('а|б|в|г', '|'), ['а', 'б', 'в', 'г'])
+        self.assertEqual(rsplit('абабагаламага', 'а'),
+                         ['', 'б', 'б', 'г', 'л', 'м', 'г', ''])
+        self.assertEqual(rsplit('aжbжcжd', 'ж'), ['a', 'b', 'c', 'd'])
+        self.assertEqual(rsplit(' a\tb\nc\rd\ve\f', NULL),
+                         ['a', 'b', 'c', 'd', 'e'])
+        self.assertEqual(rsplit('a\x85b\xa0c\u1680d\u2000e', NULL),
+                         ['a', 'b', 'c', 'd', 'e'])
+
+        self.assertRaises(ValueError, rsplit, 'a|b|c|d', '')
+        self.assertRaises(TypeError, rsplit, 'a|b|c|d', ord('|'))
+        self.assertRaises(TypeError, rsplit, [], '|')
+        # CRASHES rsplit(NULL, '|')
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_partition(self):
+        """Test PyUnicode_Partition()"""
+        from _testcapi import unicode_partition as partition
+
+        self.assertEqual(partition('a|b|c', '|'), ('a', '|', 'b|c'))
+        self.assertEqual(partition('a||b||c', '||'), ('a', '||', 'b||c'))
+        self.assertEqual(partition('а|б|в', '|'), ('а', '|', 'б|в'))
+        self.assertEqual(partition('кабан', 'а'), ('к', 'а', 'бан'))
+        self.assertEqual(partition('aжbжc', 'ж'), ('a', 'ж', 'bжc'))
+
+        self.assertRaises(ValueError, partition, 'a|b|c', '')
+        self.assertRaises(TypeError, partition, b'a|b|c', '|')
+        self.assertRaises(TypeError, partition, 'a|b|c', b'|')
+        self.assertRaises(TypeError, partition, 'a|b|c', ord('|'))
+        self.assertRaises(TypeError, partition, [], '|')
+        # CRASHES partition(NULL, '|')
+        # CRASHES partition('a|b|c', NULL)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_rpartition(self):
+        """Test PyUnicode_RPartition()"""
+        from _testcapi import unicode_rpartition as rpartition
+
+        self.assertEqual(rpartition('a|b|c', '|'), ('a|b', '|', 'c'))
+        self.assertEqual(rpartition('a||b||c', '||'), ('a||b', '||', 'c'))
+        self.assertEqual(rpartition('а|б|в', '|'), ('а|б', '|', 'в'))
+        self.assertEqual(rpartition('кабан', 'а'), ('каб', 'а', 'н'))
+        self.assertEqual(rpartition('aжbжc', 'ж'), ('aжb', 'ж', 'c'))
+
+        self.assertRaises(ValueError, rpartition, 'a|b|c', '')
+        self.assertRaises(TypeError, rpartition, b'a|b|c', '|')
+        self.assertRaises(TypeError, rpartition, 'a|b|c', b'|')
+        self.assertRaises(TypeError, rpartition, 'a|b|c', ord('|'))
+        self.assertRaises(TypeError, rpartition, [], '|')
+        # CRASHES rpartition(NULL, '|')
+        # CRASHES rpartition('a|b|c', NULL)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_splitlines(self):
+        """Test PyUnicode_SplitLines()"""
+        from _testcapi import unicode_splitlines as splitlines
+
+        self.assertEqual(splitlines('a\nb\rc\r\nd'), ['a', 'b', 'c', 'd'])
+        self.assertEqual(splitlines('a\nb\rc\r\nd', True),
+                         ['a\n', 'b\r', 'c\r\n', 'd'])
+        self.assertEqual(splitlines('a\x85b\u2028c\u2029d'),
+                         ['a', 'b', 'c', 'd'])
+        self.assertEqual(splitlines('a\x85b\u2028c\u2029d', True),
+                         ['a\x85', 'b\u2028', 'c\u2029', 'd'])
+        self.assertEqual(splitlines('а\nб\rв\r\nг'), ['а', 'б', 'в', 'г'])
+
+        self.assertRaises(TypeError, splitlines, b'a\nb\rc\r\nd')
+        # CRASHES splitlines(NULL)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_translate(self):
+        """Test PyUnicode_Translate()"""
+        from _testcapi import unicode_translate as translate
+
+        self.assertEqual(translate('abcd', {ord('a'): 'A', ord('b'): ord('B'), ord('c'): '<>'}), 'AB<>d')
+        self.assertEqual(translate('абвг', {ord('а'): 'А', ord('б'): ord('Б'), ord('в'): '<>'}), 'АБ<>г')
+        self.assertEqual(translate('abc', {}), 'abc')
+        self.assertEqual(translate('abc', []), 'abc')
+        self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None})
+        self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}, 'strict')
+        self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo')
+        self.assertEqual(translate('abc', {ord('b'): None}, 'ignore'), 'ac')
+        self.assertEqual(translate('abc', {ord('b'): None}, 'replace'), 'a\ufffdc')
+        self.assertEqual(translate('abc', {ord('b'): None}, 'backslashreplace'), r'a\x62c')
+        # XXX Other error handlers do not support UnicodeTranslateError
+        self.assertRaises(TypeError, translate, b'abc', [])
+        self.assertRaises(TypeError, translate, 123, [])
+        self.assertRaises(TypeError, translate, 'abc', {ord('a'): b'A'})
+        self.assertRaises(TypeError, translate, 'abc', 123)
+        self.assertRaises(TypeError, translate, 'abc', NULL)
+        self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo')
+        # CRASHES translate(NULL, [])
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_join(self):
+        """Test PyUnicode_Join()"""
+        from _testcapi import unicode_join as join
+        self.assertEqual(join('|', ['a', 'b', 'c']), 'a|b|c')
+        self.assertEqual(join('|', ['a', '', 'c']), 'a||c')
+        self.assertEqual(join('', ['a', 'b', 'c']), 'abc')
+        self.assertEqual(join(NULL, ['a', 'b', 'c']), 'a b c')
+        self.assertEqual(join('|', ['а', 'б', 'в']), 'а|б|в')
+        self.assertEqual(join('ж', ['а', 'б', 'в']), 'ажбжв')
+        self.assertRaises(TypeError, join, b'|', ['a', 'b', 'c'])
+        self.assertRaises(TypeError, join, '|', [b'a', b'b', b'c'])
+        self.assertRaises(TypeError, join, NULL, [b'a', b'b', b'c'])
+        self.assertRaises(TypeError, join, '|', b'123')
+        self.assertRaises(TypeError, join, '|', 123)
+        self.assertRaises(SystemError, join, '|', NULL)
+
    @support.cpython_only
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
    def test_count(self):
+        """Test PyUnicode_Count()"""
        from _testcapi import unicode_count

-        st = 'abcabd'
-        self.assertEqual(unicode_count(st, 'a', 0, len(st)), 2)
-        self.assertEqual(unicode_count(st, 'ab', 0, len(st)), 2)
-        self.assertEqual(unicode_count(st, 'abc', 0, len(st)), 1)
-        self.assertEqual(unicode_count(st, 'а', 0, len(st)), 0)  # cyrillic "a"
+        for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
+            for i, ch in enumerate(str):
+                self.assertEqual(unicode_count(str, ch, 0, len(str)), 1)
+
+        str = "!>_<!"
+        self.assertEqual(unicode_count(str, 'z', 0, len(str)), 0)
+        self.assertEqual(unicode_count(str, '', 0, len(str)), len(str)+1)
        # start < end
-        self.assertEqual(unicode_count(st, 'a', 3, len(st)), 1)
-        self.assertEqual(unicode_count(st, 'a', 4, len(st)), 0)
-        self.assertEqual(unicode_count(st, 'a', 0, sys.maxsize), 2)
+        self.assertEqual(unicode_count(str, '!', 1, len(str)+1), 1)
        # start >= end
-        self.assertEqual(unicode_count(st, 'abc', 0, 0), 0)
-        self.assertEqual(unicode_count(st, 'a', 3, 2), 0)
-        self.assertEqual(unicode_count(st, 'a', sys.maxsize, 5), 0)
+        self.assertEqual(unicode_count(str, '!', 0, 0), 0)
+        self.assertEqual(unicode_count(str, '!', len(str), 0), 0)
        # negative
-        self.assertEqual(unicode_count(st, 'ab', -len(st), -1), 2)
-        self.assertEqual(unicode_count(st, 'a', -len(st), -3), 1)
-        # wrong args
-        self.assertRaises(TypeError, unicode_count, 'a', 'a')
-        self.assertRaises(TypeError, unicode_count, 'a', 'a', 1)
-        self.assertRaises(TypeError, unicode_count, 1, 'a', 0, 1)
-        self.assertRaises(TypeError, unicode_count, 'a', 1, 0, 1)
-        # empty string
-        self.assertEqual(unicode_count('abc', '', 0, 3), 4)
-        self.assertEqual(unicode_count('abc', '', 1, 3), 3)
-        self.assertEqual(unicode_count('', '', 0, 1), 1)
-        self.assertEqual(unicode_count('', 'a', 0, 1), 0)
-        # different unicode kinds
-        for uni in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
-            for ch in uni:
-                self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1)
-                self.assertEqual(unicode_count(st, ch, 0, len(st)), 0)
+        self.assertEqual(unicode_count(str, '!', -len(str), -1), 1)
+        # bad arguments
+        self.assertRaises(TypeError, unicode_count, str, b'!', 0, len(str))
+        self.assertRaises(TypeError, unicode_count, b"!>_<!", '!', 0, len(str))
+        self.assertRaises(TypeError, unicode_count, str, ord('!'), 0, len(str))
+        self.assertRaises(TypeError, unicode_count, [], '!', 0, len(str), 1)
+        # CRASHES unicode_count(NULL, '!', 0, len(str))
+        # CRASHES unicode_count(str, NULL, 0, len(str))

-        # subclasses should still work
-        class MyStr(str):
-            pass
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_tailmatch(self):
+        """Test PyUnicode_Tailmatch()"""
+        from _testcapi import unicode_tailmatch as tailmatch

-        self.assertEqual(unicode_count(MyStr('aab'), 'a', 0, 3), 2)
+        str = 'ababahalamaha'
+        self.assertEqual(tailmatch(str, 'aba', 0, len(str), -1), 1)
+        self.assertEqual(tailmatch(str, 'aha', 0, len(str), 1), 1)
+
+        self.assertEqual(tailmatch(str, 'aba', 0, sys.maxsize, -1), 1)
+        self.assertEqual(tailmatch(str, 'aba', -len(str), sys.maxsize, -1), 1)
+        self.assertEqual(tailmatch(str, 'aba', -sys.maxsize-1, len(str), -1), 1)
+        self.assertEqual(tailmatch(str, 'aha', 0, sys.maxsize, 1), 1)
+        self.assertEqual(tailmatch(str, 'aha', -sys.maxsize-1, len(str), 1), 1)
+
+        self.assertEqual(tailmatch(str, 'z', 0, len(str), 1), 0)
+        self.assertEqual(tailmatch(str, 'z', 0, len(str), -1), 0)
+        self.assertEqual(tailmatch(str, '', 0, len(str), 1), 1)
+        self.assertEqual(tailmatch(str, '', 0, len(str), -1), 1)
+
+        self.assertEqual(tailmatch(str, 'ba', 0, len(str)-1, -1), 0)
+        self.assertEqual(tailmatch(str, 'ba', 1, len(str)-1, -1), 1)
+        self.assertEqual(tailmatch(str, 'aba', 1, len(str)-1, -1), 0)
+        self.assertEqual(tailmatch(str, 'ba', -len(str)+1, -1, -1), 1)
+        self.assertEqual(tailmatch(str, 'ah', 0, len(str), 1), 0)
+        self.assertEqual(tailmatch(str, 'ah', 0, len(str)-1, 1), 1)
+        self.assertEqual(tailmatch(str, 'ah', -len(str), -1, 1), 1)
+
+        # bad arguments
+        self.assertRaises(TypeError, tailmatch, str, ('aba', 'aha'), 0, len(str), -1)
+        self.assertRaises(TypeError, tailmatch, str, ('aba', 'aha'), 0, len(str), 1)
+        # CRASHES tailmatch(NULL, 'aba', 0, len(str), -1)
+        # CRASHES tailmatch(str, NULL, 0, len(str), -1)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_find(self):
+        """Test PyUnicode_Find()"""
+        from _testcapi import unicode_find as find
+
+        for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
+            for i, ch in enumerate(str):
+                self.assertEqual(find(str, ch, 0, len(str), 1), i)
+                self.assertEqual(find(str, ch, 0, len(str), -1), i)
+
+        str = "!>_<!"
+        self.assertEqual(find(str, 'z', 0, len(str), 1), -1)
+        self.assertEqual(find(str, 'z', 0, len(str), -1), -1)
+        self.assertEqual(find(str, '', 0, len(str), 1), 0)
+        self.assertEqual(find(str, '', 0, len(str), -1), len(str))
+        # start < end
+        self.assertEqual(find(str, '!', 1, len(str)+1, 1), 4)
+        self.assertEqual(find(str, '!', 1, len(str)+1, -1), 4)
+        # start >= end
+        self.assertEqual(find(str, '!', 0, 0, 1), -1)
+        self.assertEqual(find(str, '!', len(str), 0, 1), -1)
+        # negative
+        self.assertEqual(find(str, '!', -len(str), -1, 1), 0)
+        self.assertEqual(find(str, '!', -len(str), -1, -1), 0)
+        # bad arguments
+        self.assertRaises(TypeError, find, str, b'!', 0, len(str), 1)
+        self.assertRaises(TypeError, find, b"!>_<!", '!', 0, len(str), 1)
+        self.assertRaises(TypeError, find, str, ord('!'), 0, len(str), 1)
+        self.assertRaises(TypeError, find, [], '!', 0, len(str), 1)
+        # CRASHES find(NULL, '!', 0, len(str), 1)
+        # CRASHES find(str, NULL, 0, len(str), 1)

-    # Test PyUnicode_FindChar()
    @support.cpython_only
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
    def test_findchar(self):
+        """Test PyUnicode_FindChar()"""
        from _testcapi import unicode_findchar

        for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
@ -447,11 +693,168 @@ class CAPITest(unittest.TestCase):
        # negative
        self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0)
        self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0)
+        # bad arguments
+        # CRASHES unicode_findchar(b"!>_<!", ord('!'), 0, len(str), 1)
+        # CRASHES unicode_findchar([], ord('!'), 0, len(str), 1)
+        # CRASHES unicode_findchar(NULL, ord('!'), 0, len(str), 1), 1)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_replace(self):
+        """Test PyUnicode_Replace()"""
+        from _testcapi import unicode_replace as replace
+
+        str = 'abracadabra'
+        self.assertEqual(replace(str, 'a', '='), '=br=c=d=br=')
+        self.assertEqual(replace(str, 'a', '<>'), '<>br<>c<>d<>br<>')
+        self.assertEqual(replace(str, 'abra', '='), '=cad=')
+        self.assertEqual(replace(str, 'a', '=', 2), '=br=cadabra')
+        self.assertEqual(replace(str, 'a', '=', 0), str)
+        self.assertEqual(replace(str, 'a', '=', sys.maxsize), '=br=c=d=br=')
+        self.assertEqual(replace(str, 'z', '='), str)
+        self.assertEqual(replace(str, '', '='), '=a=b=r=a=c=a=d=a=b=r=a=')
+        self.assertEqual(replace(str, 'a', 'ж'), 'жbrжcжdжbrж')
+        self.assertEqual(replace('абабагаламага', 'а', '='), '=б=б=г=л=м=г=')
+        self.assertEqual(replace('Баден-Баден', 'Баден', 'Baden'), 'Baden-Baden')
+        # bad arguments
+        self.assertRaises(TypeError, replace, 'a', 'a', b'=')
+        self.assertRaises(TypeError, replace, 'a', b'a', '=')
+        self.assertRaises(TypeError, replace, b'a', 'a', '=')
+        self.assertRaises(TypeError, replace, 'a', 'a', ord('='))
+        self.assertRaises(TypeError, replace, 'a', ord('a'), '=')
+        self.assertRaises(TypeError, replace, [], 'a', '=')
+        # CRASHES replace('a', 'a', NULL)
+        # CRASHES replace('a', NULL, '=')
+        # CRASHES replace(NULL, 'a', '=')
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_compare(self):
+        """Test PyUnicode_Compare()"""
+        from _testcapi import unicode_compare as compare
+
+        self.assertEqual(compare('abc', 'abc'), 0)
+        self.assertEqual(compare('abc', 'def'), -1)
+        self.assertEqual(compare('def', 'abc'), 1)
+        self.assertEqual(compare('abc', 'abc\0def'), -1)
+        self.assertEqual(compare('abc\0def', 'abc\0def'), 0)
+        self.assertEqual(compare('абв', 'abc'), 1)
+
+        self.assertRaises(TypeError, compare, b'abc', 'abc')
+        self.assertRaises(TypeError, compare, 'abc', b'abc')
+        self.assertRaises(TypeError, compare, b'abc', b'abc')
+        self.assertRaises(TypeError, compare, [], 'abc')
+        self.assertRaises(TypeError, compare, 'abc', [])
+        self.assertRaises(TypeError, compare, [], [])
+        # CRASHES compare(NULL, 'abc')
+        # CRASHES compare('abc', NULL)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_comparewithasciistring(self):
+        """Test PyUnicode_CompareWithASCIIString()"""
+        from _testcapi import unicode_comparewithasciistring as comparewithasciistring
+
+        self.assertEqual(comparewithasciistring('abc', b'abc'), 0)
+        self.assertEqual(comparewithasciistring('abc', b'def'), -1)
+        self.assertEqual(comparewithasciistring('def', b'abc'), 1)
+        self.assertEqual(comparewithasciistring('abc', b'abc\0def'), 0)
+        self.assertEqual(comparewithasciistring('abc\0def', b'abc\0def'), 1)
+        self.assertEqual(comparewithasciistring('абв', b'abc'), 1)
+
+        # CRASHES comparewithasciistring(b'abc', b'abc')
+        # CRASHES comparewithasciistring([], b'abc')
+        # CRASHES comparewithasciistring(NULL, b'abc')
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_richcompare(self):
+        """Test PyUnicode_RichCompare()"""
+        from _testcapi import unicode_richcompare as richcompare
+
+        LT, LE, EQ, NE, GT, GE = range(6)
+        strings = ('abc', 'абв', '\U0001f600', 'abc\0')
+        for s1 in strings:
+            for s2 in strings:
+                self.assertIs(richcompare(s1, s2, LT), s1 < s2)
+                self.assertIs(richcompare(s1, s2, LE), s1 <= s2)
+                self.assertIs(richcompare(s1, s2, EQ), s1 == s2)
+                self.assertIs(richcompare(s1, s2, NE), s1 != s2)
+                self.assertIs(richcompare(s1, s2, GT), s1 > s2)
+                self.assertIs(richcompare(s1, s2, GE), s1 >= s2)
+
+        for op in LT, LE, EQ, NE, GT, GE:
+            self.assertIs(richcompare(b'abc', 'abc', op), NotImplemented)
+            self.assertIs(richcompare('abc', b'abc', op), NotImplemented)
+            self.assertIs(richcompare(b'abc', b'abc', op), NotImplemented)
+            self.assertIs(richcompare([], 'abc', op), NotImplemented)
+            self.assertIs(richcompare('abc', [], op), NotImplemented)
+            self.assertIs(richcompare([], [], op), NotImplemented)
+
+            # CRASHES richcompare(NULL, 'abc', op)
+            # CRASHES richcompare('abc', NULL, op)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_format(self):
+        """Test PyUnicode_Format()"""
+        from _testcapi import unicode_format as format
+
+        self.assertEqual(format('x=%d!', 42), 'x=42!')
+        self.assertEqual(format('x=%d!', (42,)), 'x=42!')
+        self.assertEqual(format('x=%d y=%s!', (42, [])), 'x=42 y=[]!')
+
+        self.assertRaises(SystemError, format, 'x=%d!', NULL)
+        self.assertRaises(SystemError, format, NULL, 42)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_contains(self):
+        """Test PyUnicode_Contains()"""
+        from _testcapi import unicode_contains as contains
+
+        self.assertEqual(contains('abcd', ''), 1)
+        self.assertEqual(contains('abcd', 'b'), 1)
+        self.assertEqual(contains('abcd', 'x'), 0)
+        self.assertEqual(contains('abcd', 'ж'), 0)
+        self.assertEqual(contains('abcd', '\0'), 0)
+        self.assertEqual(contains('abc\0def', '\0'), 1)
+        self.assertEqual(contains('abcd', 'bc'), 1)
+
+        self.assertRaises(TypeError, contains, b'abcd', 'b')
+        self.assertRaises(TypeError, contains, 'abcd', b'b')
+        self.assertRaises(TypeError, contains, b'abcd', b'b')
+        self.assertRaises(TypeError, contains, [], 'b')
+        self.assertRaises(TypeError, contains, 'abcd', ord('b'))
+        # CRASHES contains(NULL, 'b')
+        # CRASHES contains('abcd', NULL)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_isidentifier(self):
+        """Test PyUnicode_IsIdentifier()"""
+        from _testcapi import unicode_isidentifier as isidentifier
+
+        self.assertEqual(isidentifier("a"), 1)
+        self.assertEqual(isidentifier("b0"), 1)
+        self.assertEqual(isidentifier("µ"), 1)
+        self.assertEqual(isidentifier("𝔘𝔫𝔦𝔠𝔬𝔡𝔢"), 1)
+
+        self.assertEqual(isidentifier(""), 0)
+        self.assertEqual(isidentifier(" "), 0)
+        self.assertEqual(isidentifier("["), 0)
+        self.assertEqual(isidentifier("©"), 0)
+        self.assertEqual(isidentifier("0"), 0)
+        self.assertEqual(isidentifier("32M"), 0)
+
+        # CRASHES isidentifier(b"a")
+        # CRASHES isidentifier([])
+        # CRASHES isidentifier(NULL)

-    # Test PyUnicode_CopyCharacters()
    @support.cpython_only
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
    def test_copycharacters(self):
+        """Test PyUnicode_CopyCharacters()"""
        from _testcapi import unicode_copycharacters

        strings = [
--- a/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst
+++ b/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst
@ -0,0 +1 @@
+Cover the Unicode C API with tests.
--- a/Modules/_testcapi/unicode.c
+++ b/Modules/_testcapi/unicode.c
@ -1,3 +1,4 @@
+#define PY_SSIZE_T_CLEAN
 #include "parts.h"

 static struct PyModuleDef *_testcapimodule = NULL;  // set at initialization
@ -99,6 +100,17 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))
    Py_RETURN_NONE;
 }

+#define NULLABLE(x) do { if (x == Py_None) x = NULL; } while (0);
+
+/* Test PyUnicode_FromObject() */
+static PyObject *
+unicode_fromobject(PyObject *self, PyObject *arg)
+{
+    NULLABLE(arg);
+    return PyUnicode_FromObject(arg);
+}
+
+/* Test PyUnicode_AsWideChar() */
 static PyObject *
 unicode_aswidechar(PyObject *self, PyObject *args)
 {
@ -130,6 +142,7 @@ unicode_aswidechar(PyObject *self, PyObject *args)
    return Py_BuildValue("(Nn)", result, size);
 }

+/* Test PyUnicode_AsWideCharString() */
 static PyObject *
 unicode_aswidecharstring(PyObject *self, PyObject *args)
 {
@ -151,6 +164,7 @@ unicode_aswidecharstring(PyObject *self, PyObject *args)
    return Py_BuildValue("(Nn)", result, size);
 }

+/* Test PyUnicode_AsUCS4() */
 static PyObject *
 unicode_asucs4(PyObject *self, PyObject *args)
 {
@ -181,6 +195,7 @@ unicode_asucs4(PyObject *self, PyObject *args)
    return result;
 }

+/* Test PyUnicode_AsUTF8() */
 static PyObject *
 unicode_asutf8(PyObject *self, PyObject *args)
 {
@ -199,6 +214,7 @@ unicode_asutf8(PyObject *self, PyObject *args)
    return PyBytes_FromString(buffer);
 }

+/* Test PyUnicode_AsUTF8AndSize() */
 static PyObject *
 unicode_asutf8andsize(PyObject *self, PyObject *args)
 {
@ -223,26 +239,194 @@ unicode_asutf8andsize(PyObject *self, PyObject *args)
    return Py_BuildValue("(Nn)", result, utf8_len);
 }

+/* Test PyUnicode_Concat() */
+static PyObject *
+unicode_concat(PyObject *self, PyObject *args)
+{
+    PyObject *left;
+    PyObject *right;
+
+    if (!PyArg_ParseTuple(args, "OO", &left, &right))
+        return NULL;
+
+    NULLABLE(left);
+    NULLABLE(right);
+    return PyUnicode_Concat(left, right);
+}
+
+/* Test PyUnicode_Split() */
+static PyObject *
+unicode_split(PyObject *self, PyObject *args)
+{
+    PyObject *s;
+    PyObject *sep;
+    Py_ssize_t maxsplit = -1;
+
+    if (!PyArg_ParseTuple(args, "OO|n", &s, &sep, &maxsplit))
+        return NULL;
+
+    NULLABLE(s);
+    NULLABLE(sep);
+    return PyUnicode_Split(s, sep, maxsplit);
+}
+
+/* Test PyUnicode_RSplit() */
+static PyObject *
+unicode_rsplit(PyObject *self, PyObject *args)
+{
+    PyObject *s;
+    PyObject *sep;
+    Py_ssize_t maxsplit = -1;
+
+    if (!PyArg_ParseTuple(args, "OO|n", &s, &sep, &maxsplit))
+        return NULL;
+
+    NULLABLE(s);
+    NULLABLE(sep);
+    return PyUnicode_RSplit(s, sep, maxsplit);
+}
+
+/* Test PyUnicode_Splitlines() */
+static PyObject *
+unicode_splitlines(PyObject *self, PyObject *args)
+{
+    PyObject *s;
+    int keepends = 0;
+
+    if (!PyArg_ParseTuple(args, "O|i", &s, &keepends))
+        return NULL;
+
+    NULLABLE(s);
+    return PyUnicode_Splitlines(s, keepends);
+}
+
+/* Test PyUnicode_Partition() */
+static PyObject *
+unicode_partition(PyObject *self, PyObject *args)
+{
+    PyObject *s;
+    PyObject *sep;
+
+    if (!PyArg_ParseTuple(args, "OO", &s, &sep))
+        return NULL;
+
+    NULLABLE(s);
+    NULLABLE(sep);
+    return PyUnicode_Partition(s, sep);
+}
+
+/* Test PyUnicode_RPartition() */
+static PyObject *
+unicode_rpartition(PyObject *self, PyObject *args)
+{
+    PyObject *s;
+    PyObject *sep;
+
+    if (!PyArg_ParseTuple(args, "OO", &s, &sep))
+        return NULL;
+
+    NULLABLE(s);
+    NULLABLE(sep);
+    return PyUnicode_RPartition(s, sep);
+}
+
+/* Test PyUnicode_Translate() */
+static PyObject *
+unicode_translate(PyObject *self, PyObject *args)
+{
+    PyObject *obj;
+    PyObject *table;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "OO|z", &obj, &table, &errors))
+        return NULL;
+
+    NULLABLE(obj);
+    NULLABLE(table);
+    return PyUnicode_Translate(obj, table, errors);
+}
+
+/* Test PyUnicode_Join() */
+static PyObject *
+unicode_join(PyObject *self, PyObject *args)
+{
+    PyObject *sep;
+    PyObject *seq;
+
+    if (!PyArg_ParseTuple(args, "OO", &sep, &seq))
+        return NULL;
+
+    NULLABLE(sep);
+    NULLABLE(seq);
+    return PyUnicode_Join(sep, seq);
+}
+
+/* Test PyUnicode_Count() */
 static PyObject *
 unicode_count(PyObject *self, PyObject *args)
 {
    PyObject *str;
    PyObject *substr;
+    Py_ssize_t start;
+    Py_ssize_t end;
    Py_ssize_t result;
-    Py_ssize_t start, end;

-    if (!PyArg_ParseTuple(args, "UUnn:unicode_count", &str, &substr,
-                          &start, &end)) {
+    if (!PyArg_ParseTuple(args, "OOnn", &str, &substr, &start, &end))
        return NULL;
-    }

+    NULLABLE(str);
+    NULLABLE(substr);
    result = PyUnicode_Count(str, substr, start, end);
    if (result == -1)
        return NULL;
-    else
-        return PyLong_FromSsize_t(result);
+    return PyLong_FromSsize_t(result);
 }

+/* Test PyUnicode_Find() */
+static PyObject *
+unicode_find(PyObject *self, PyObject *args)
+{
+    PyObject *str;
+    PyObject *substr;
+    Py_ssize_t start;
+    Py_ssize_t end;
+    int direction;
+    Py_ssize_t result;
+
+    if (!PyArg_ParseTuple(args, "OOnni", &str, &substr, &start, &end, &direction))
+        return NULL;
+
+    NULLABLE(str);
+    NULLABLE(substr);
+    result = PyUnicode_Find(str, substr, start, end, direction);
+    if (result == -2)
+        return NULL;
+    return PyLong_FromSsize_t(result);
+}
+
+/* Test PyUnicode_Tailmatch() */
+static PyObject *
+unicode_tailmatch(PyObject *self, PyObject *args)
+{
+    PyObject *str;
+    PyObject *substr;
+    Py_ssize_t start;
+    Py_ssize_t end;
+    int direction;
+    Py_ssize_t result;
+
+    if (!PyArg_ParseTuple(args, "OOnni", &str, &substr, &start, &end, &direction))
+        return NULL;
+
+    NULLABLE(str);
+    NULLABLE(substr);
+    result = PyUnicode_Tailmatch(str, substr, start, end, direction);
+    if (result == -1)
+        return NULL;
+    return PyLong_FromSsize_t(result);
+}
+
+/* Test PyUnicode_FindChar() */
 static PyObject *
 unicode_findchar(PyObject *self, PyObject *args)
 {
@ -264,6 +448,130 @@ unicode_findchar(PyObject *self, PyObject *args)
        return PyLong_FromSsize_t(result);
 }

+/* Test PyUnicode_Replace() */
+static PyObject *
+unicode_replace(PyObject *self, PyObject *args)
+{
+    PyObject *str;
+    PyObject *substr;
+    PyObject *replstr;
+    Py_ssize_t maxcount = -1;
+
+    if (!PyArg_ParseTuple(args, "OOO|n", &str, &substr, &replstr, &maxcount))
+        return NULL;
+
+    NULLABLE(str);
+    NULLABLE(substr);
+    NULLABLE(replstr);
+    return PyUnicode_Replace(str, substr, replstr, maxcount);
+}
+
+/* Test PyUnicode_Compare() */
+static PyObject *
+unicode_compare(PyObject *self, PyObject *args)
+{
+    PyObject *left;
+    PyObject *right;
+    int result;
+
+    if (!PyArg_ParseTuple(args, "OO", &left, &right))
+        return NULL;
+
+    NULLABLE(left);
+    NULLABLE(right);
+    result = PyUnicode_Compare(left, right);
+    if (result == -1 && PyErr_Occurred()) {
+        return NULL;
+    }
+    return PyLong_FromLong(result);
+}
+
+/* Test PyUnicode_CompareWithASCIIString() */
+static PyObject *
+unicode_comparewithasciistring(PyObject *self, PyObject *args)
+{
+    PyObject *left;
+    const char *right = NULL;
+    Py_ssize_t right_len;
+    int result;
+
+    if (!PyArg_ParseTuple(args, "O|y#", &left, &right, &right_len))
+        return NULL;
+
+    NULLABLE(left);
+    result = PyUnicode_CompareWithASCIIString(left, right);
+    if (result == -1 && PyErr_Occurred()) {
+        return NULL;
+    }
+    return PyLong_FromLong(result);
+}
+
+/* Test PyUnicode_RichCompare() */
+static PyObject *
+unicode_richcompare(PyObject *self, PyObject *args)
+{
+    PyObject *left;
+    PyObject *right;
+    int op;
+
+    if (!PyArg_ParseTuple(args, "OOi", &left, &right, &op))
+        return NULL;
+
+    NULLABLE(left);
+    NULLABLE(right);
+    return PyUnicode_RichCompare(left, right, op);
+}
+
+/* Test PyUnicode_Format() */
+static PyObject *
+unicode_format(PyObject *self, PyObject *args)
+{
+    PyObject *format;
+    PyObject *fargs;
+
+    if (!PyArg_ParseTuple(args, "OO", &format, &fargs))
+        return NULL;
+
+    NULLABLE(format);
+    NULLABLE(fargs);
+    return PyUnicode_Format(format, fargs);
+}
+
+/* Test PyUnicode_Contains() */
+static PyObject *
+unicode_contains(PyObject *self, PyObject *args)
+{
+    PyObject *container;
+    PyObject *element;
+    int result;
+
+    if (!PyArg_ParseTuple(args, "OO", &container, &element))
+        return NULL;
+
+    NULLABLE(container);
+    NULLABLE(element);
+    result = PyUnicode_Contains(container, element);
+    if (result == -1 && PyErr_Occurred()) {
+        return NULL;
+    }
+    return PyLong_FromLong(result);
+}
+
+/* Test PyUnicode_IsIdentifier() */
+static PyObject *
+unicode_isidentifier(PyObject *self, PyObject *arg)
+{
+    int result;
+
+    NULLABLE(arg);
+    result = PyUnicode_IsIdentifier(arg);
+    if (result == -1 && PyErr_Occurred()) {
+        return NULL;
+    }
+    return PyLong_FromLong(result);
+}
+
+/* Test PyUnicode_CopyCharacters() */
 static PyObject *
 unicode_copycharacters(PyObject *self, PyObject *args)
 {
@ -711,13 +1019,31 @@ static PyMethodDef TestMethods[] = {
     test_unicode_compare_with_ascii,                            METH_NOARGS},
    {"test_string_from_format",  test_string_from_format,        METH_NOARGS},
    {"test_widechar",            test_widechar,                  METH_NOARGS},
+    {"unicode_fromobject",       unicode_fromobject,             METH_O},
    {"unicode_aswidechar",       unicode_aswidechar,             METH_VARARGS},
    {"unicode_aswidecharstring", unicode_aswidecharstring,       METH_VARARGS},
    {"unicode_asucs4",           unicode_asucs4,                 METH_VARARGS},
    {"unicode_asutf8",           unicode_asutf8,                 METH_VARARGS},
    {"unicode_asutf8andsize",    unicode_asutf8andsize,          METH_VARARGS},
+    {"unicode_concat",           unicode_concat,                 METH_VARARGS},
+    {"unicode_splitlines",       unicode_splitlines,             METH_VARARGS},
+    {"unicode_split",            unicode_split,                  METH_VARARGS},
+    {"unicode_rsplit",           unicode_rsplit,                 METH_VARARGS},
+    {"unicode_partition",        unicode_partition,              METH_VARARGS},
+    {"unicode_rpartition",       unicode_rpartition,             METH_VARARGS},
+    {"unicode_translate",        unicode_translate,              METH_VARARGS},
+    {"unicode_join",             unicode_join,                   METH_VARARGS},
    {"unicode_count",            unicode_count,                  METH_VARARGS},
+    {"unicode_tailmatch",        unicode_tailmatch,              METH_VARARGS},
+    {"unicode_find",             unicode_find,                   METH_VARARGS},
    {"unicode_findchar",         unicode_findchar,               METH_VARARGS},
+    {"unicode_replace",          unicode_replace,                METH_VARARGS},
+    {"unicode_compare",          unicode_compare,                METH_VARARGS},
+    {"unicode_comparewithasciistring",unicode_comparewithasciistring,METH_VARARGS},
+    {"unicode_richcompare",      unicode_richcompare,            METH_VARARGS},
+    {"unicode_format",           unicode_format,                 METH_VARARGS},
+    {"unicode_contains",         unicode_contains,               METH_VARARGS},
+    {"unicode_isidentifier",     unicode_isidentifier,           METH_O},
    {"unicode_copycharacters",   unicode_copycharacters,         METH_VARARGS},
    {NULL},
 };