gh-133583: Add support for fixed size unsigned integers in argument parsing (GH-133584)

* Add Argument Clinic converters: uint8, uint16, uint32, uint64. * Add private C API: _PyLong_UInt8_Converter(), _PyLong_UInt16_Converter(), _PyLong_UInt32_Converter(), _PyLong_UInt64_Converter().
2025-07-07 11:25:30 +00:00 · 2025-05-08 12:27:50 +03:00 · 2025-05-08 12:27:50 +03:00 · 4c914e7a36
commit 4c914e7a36
parent 3224b99872
7 changed files with 87 additions and 93 deletions
--- a/Include/internal/pycore_long.h
+++ b/Include/internal/pycore_long.h
@ -158,6 +158,11 @@ PyAPI_FUNC(int) _PyLong_UnsignedLongLong_Converter(PyObject *, void *);
 // Export for '_testclinic' shared extension (Argument Clinic code)
 PyAPI_FUNC(int) _PyLong_Size_t_Converter(PyObject *, void *);

+PyAPI_FUNC(int) _PyLong_UInt8_Converter(PyObject *, void *);
+PyAPI_FUNC(int) _PyLong_UInt16_Converter(PyObject *, void *);
+PyAPI_FUNC(int) _PyLong_UInt32_Converter(PyObject *, void *);
+PyAPI_FUNC(int) _PyLong_UInt64_Converter(PyObject *, void *);
+
 /* Long value tag bits:
 * 0-1: Sign bits value = (1-sign), ie. negative=2, positive=0, zero=1.
 * 2: Set to 1 for the small ints
--- a/Lib/test/test_clinic.py
+++ b/Lib/test/test_clinic.py
@ -2835,6 +2835,10 @@ class ClinicExternalTest(TestCase):
            "size_t",
            "slice_index",
            "str",
+            "uint16",
+            "uint32",
+            "uint64",
+            "uint8",
            "unicode",
            "unsigned_char",
            "unsigned_int",
--- a/Modules/_lzmamodule.c
+++ b/Modules/_lzmamodule.c
@ -17,6 +17,7 @@

 #include <lzma.h>

+#include "pycore_long.h"          // _PyLong_UInt32_Converter()
 // Blocks output buffer wrappers
 #include "pycore_blocks_output_buffer.h"

@ -223,8 +224,6 @@ FUNCNAME(PyObject *obj, void *ptr)                                  \
    return 1;                                                       \
 }

-INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
-INT_TYPE_CONVERTER_FUNC(uint64_t, uint64_converter)
 INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
 INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
 INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
@ -254,7 +253,7 @@ parse_filter_spec_lzma(_lzma_state *state, PyObject *spec)
        return NULL;
    }
    if (preset_obj != NULL) {
-        int ok = uint32_converter(preset_obj, &preset);
+        int ok = _PyLong_UInt32_Converter(preset_obj, &preset);
        Py_DECREF(preset_obj);
        if (!ok) {
            return NULL;
@ -275,14 +274,14 @@ parse_filter_spec_lzma(_lzma_state *state, PyObject *spec)
    if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec,
                                     "|OOO&O&O&O&O&O&O&O&", optnames,
                                     &id, &preset_obj,
-                                     uint32_converter, &options->dict_size,
-                                     uint32_converter, &options->lc,
-                                     uint32_converter, &options->lp,
-                                     uint32_converter, &options->pb,
+                                     _PyLong_UInt32_Converter, &options->dict_size,
+                                     _PyLong_UInt32_Converter, &options->lc,
+                                     _PyLong_UInt32_Converter, &options->lp,
+                                     _PyLong_UInt32_Converter, &options->pb,
                                     lzma_mode_converter, &options->mode,
-                                     uint32_converter, &options->nice_len,
+                                     _PyLong_UInt32_Converter, &options->nice_len,
                                     lzma_mf_converter, &options->mf,
-                                     uint32_converter, &options->depth)) {
+                                     _PyLong_UInt32_Converter, &options->depth)) {
        PyErr_SetString(PyExc_ValueError,
                        "Invalid filter specifier for LZMA filter");
        PyMem_Free(options);
@ -301,7 +300,7 @@ parse_filter_spec_delta(_lzma_state *state, PyObject *spec)
    lzma_options_delta *options;

    if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
-                                     &id, uint32_converter, &dist)) {
+                                     &id, _PyLong_UInt32_Converter, &dist)) {
        PyErr_SetString(PyExc_ValueError,
                        "Invalid filter specifier for delta filter");
        return NULL;
@ -325,7 +324,7 @@ parse_filter_spec_bcj(_lzma_state *state, PyObject *spec)
    lzma_options_bcj *options;

    if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
-                                     &id, uint32_converter, &start_offset)) {
+                                     &id, _PyLong_UInt32_Converter, &start_offset)) {
        PyErr_SetString(PyExc_ValueError,
                        "Invalid filter specifier for BCJ filter");
        return NULL;
@ -806,7 +805,7 @@ Compressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
        return NULL;
    }

-    if (preset_obj != Py_None && !uint32_converter(preset_obj, &preset)) {
+    if (preset_obj != Py_None && !_PyLong_UInt32_Converter(preset_obj, &preset)) {
        return NULL;
    }

@ -1226,7 +1225,7 @@ _lzma_LZMADecompressor_impl(PyTypeObject *type, int format,
                            "Cannot specify memory limit with FORMAT_RAW");
            return NULL;
        }
-        if (!uint64_converter(memlimit, &memlimit_)) {
+        if (!_PyLong_UInt64_Converter(memlimit, &memlimit_)) {
            return NULL;
        }
    }
--- a/Modules/clinic/socketmodule.c.h
+++ b/Modules/clinic/socketmodule.c.h
@ -6,6 +6,7 @@ preserve
 #  include "pycore_gc.h"          // PyGC_Head
 #  include "pycore_runtime.h"     // _Py_ID()
 #endif
+#include "pycore_long.h"          // _PyLong_UInt16_Converter()
 #include "pycore_modsupport.h"    // _PyArg_UnpackKeywords()

 PyDoc_STRVAR(_socket_socket_close__doc__,
@ -369,4 +370,4 @@ exit:
 #ifndef _SOCKET_IF_INDEXTONAME_METHODDEF
    #define _SOCKET_IF_INDEXTONAME_METHODDEF
 #endif /* !defined(_SOCKET_IF_INDEXTONAME_METHODDEF) */
-/*[clinic end generated code: output=c971b79d2193b426 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=07776dd21d1e3b56 input=a9049054013a1b77]*/
--- a/Modules/socketmodule.c
+++ b/Modules/socketmodule.c
@ -638,33 +638,22 @@ _PyLong_##NAME##_Converter(PyObject *obj, void *ptr)                \
    return 1;                                                       \
 }

-UNSIGNED_INT_CONVERTER(UInt16, uint16_t)
-UNSIGNED_INT_CONVERTER(UInt32, uint32_t)
-
 #if defined(HAVE_IF_NAMEINDEX) || defined(MS_WINDOWS)
 # ifdef MS_WINDOWS
    UNSIGNED_INT_CONVERTER(NetIfindex, NET_IFINDEX)
 # else
-    UNSIGNED_INT_CONVERTER(NetIfindex, unsigned int)
+#   define _PyLong_NetIfindex_Converter _PyLong_UnsignedInt_Converter
 #   define NET_IFINDEX unsigned int
 # endif
 #endif // defined(HAVE_IF_NAMEINDEX) || defined(MS_WINDOWS)

 /*[python input]
-class uint16_converter(CConverter):
-    type = "uint16_t"
-    converter = '_PyLong_UInt16_Converter'
-
-class uint32_converter(CConverter):
-    type = "uint32_t"
-    converter = '_PyLong_UInt32_Converter'
-
 class NET_IFINDEX_converter(CConverter):
    type = "NET_IFINDEX"
    converter = '_PyLong_NetIfindex_Converter'

 [python start generated code]*/
-/*[python end generated code: output=da39a3ee5e6b4b0d input=3de2e4a03fbf83b8]*/
+/*[python end generated code: output=da39a3ee5e6b4b0d input=1cf809c40a407c34]*/

 /*[clinic input]
 module _socket
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@ -1760,6 +1760,10 @@ UNSIGNED_INT_CONVERTER(UnsignedInt, unsigned int)
 UNSIGNED_INT_CONVERTER(UnsignedLong, unsigned long)
 UNSIGNED_INT_CONVERTER(UnsignedLongLong, unsigned long long)
 UNSIGNED_INT_CONVERTER(Size_t, size_t)
+UNSIGNED_INT_CONVERTER(UInt8, uint8_t)
+UNSIGNED_INT_CONVERTER(UInt16, uint16_t)
+UNSIGNED_INT_CONVERTER(UInt32, uint32_t)
+UNSIGNED_INT_CONVERTER(UInt64, uint64_t)


 #define CHECK_BINOP(v,w)                                \
--- a/Tools/clinic/libclinic/converters.py
+++ b/Tools/clinic/libclinic/converters.py
@ -17,6 +17,54 @@ from libclinic.converter import (
 TypeSet = set[bltns.type[object]]


+class BaseUnsignedIntConverter(CConverter):
+
+    def use_converter(self) -> None:
+        if self.converter:
+            self.add_include('pycore_long.h',
+                             f'{self.converter}()')
+
+    def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None:
+        if not limited_capi:
+            return super().parse_arg(argname, displayname, limited_capi=limited_capi)
+        return self.format_code("""
+            {{{{
+                Py_ssize_t _bytes = PyLong_AsNativeBytes({argname}, &{paramname}, sizeof({type}),
+                        Py_ASNATIVEBYTES_NATIVE_ENDIAN |
+                        Py_ASNATIVEBYTES_ALLOW_INDEX |
+                        Py_ASNATIVEBYTES_REJECT_NEGATIVE |
+                        Py_ASNATIVEBYTES_UNSIGNED_BUFFER);
+                if (_bytes < 0) {{{{
+                    goto exit;
+                }}}}
+                if ((size_t)_bytes > sizeof({type})) {{{{
+                    PyErr_SetString(PyExc_OverflowError,
+                                    "Python int too large for C {type}");
+                    goto exit;
+                }}}}
+            }}}}
+            """,
+            argname=argname,
+            type=self.type)
+
+
+class uint8_converter(BaseUnsignedIntConverter):
+    type = "uint8_t"
+    converter = '_PyLong_UInt8_Converter'
+
+class uint16_converter(BaseUnsignedIntConverter):
+    type = "uint16_t"
+    converter = '_PyLong_UInt16_Converter'
+
+class uint32_converter(BaseUnsignedIntConverter):
+    type = "uint32_t"
+    converter = '_PyLong_UInt32_Converter'
+
+class uint64_converter(BaseUnsignedIntConverter):
+    type = "uint64_t"
+    converter = '_PyLong_UInt64_Converter'
+
+
 class bool_converter(CConverter):
    type = 'int'
    default_type = bool
@ -211,29 +259,7 @@ class short_converter(CConverter):
        return super().parse_arg(argname, displayname, limited_capi=limited_capi)


-def format_inline_unsigned_int_converter(self: CConverter, argname: str) -> str:
-    return self.format_code("""
-        {{{{
-            Py_ssize_t _bytes = PyLong_AsNativeBytes({argname}, &{paramname}, sizeof({type}),
-                    Py_ASNATIVEBYTES_NATIVE_ENDIAN |
-                    Py_ASNATIVEBYTES_ALLOW_INDEX |
-                    Py_ASNATIVEBYTES_REJECT_NEGATIVE |
-                    Py_ASNATIVEBYTES_UNSIGNED_BUFFER);
-            if (_bytes < 0) {{{{
-                goto exit;
-            }}}}
-            if ((size_t)_bytes > sizeof({type})) {{{{
-                PyErr_SetString(PyExc_OverflowError,
-                                "Python int too large for C {type}");
-                goto exit;
-            }}}}
-        }}}}
-        """,
-        argname=argname,
-        type=self.type)
-
-
-class unsigned_short_converter(CConverter):
+class unsigned_short_converter(BaseUnsignedIntConverter):
    type = 'unsigned short'
    default_type = int
    c_ignored_default = "0"
@ -244,11 +270,6 @@ class unsigned_short_converter(CConverter):
        else:
            self.converter = '_PyLong_UnsignedShort_Converter'

-    def use_converter(self) -> None:
-        if self.converter == '_PyLong_UnsignedShort_Converter':
-            self.add_include('pycore_long.h',
-                             '_PyLong_UnsignedShort_Converter()')
-
    def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None:
        if self.format_unit == 'H':
            return self.format_code("""
@ -258,9 +279,7 @@ class unsigned_short_converter(CConverter):
                }}}}
                """,
                argname=argname)
-        if not limited_capi:
-            return super().parse_arg(argname, displayname, limited_capi=limited_capi)
-        return format_inline_unsigned_int_converter(self, argname)
+        return super().parse_arg(argname, displayname, limited_capi=limited_capi)


@add_legacy_c_converter('C', accept={str})
@ -311,7 +330,7 @@ class int_converter(CConverter):
        return super().parse_arg(argname, displayname, limited_capi=limited_capi)


-class unsigned_int_converter(CConverter):
+class unsigned_int_converter(BaseUnsignedIntConverter):
    type = 'unsigned int'
    default_type = int
    c_ignored_default = "0"
@ -322,11 +341,6 @@ class unsigned_int_converter(CConverter):
        else:
            self.converter = '_PyLong_UnsignedInt_Converter'

-    def use_converter(self) -> None:
-        if self.converter == '_PyLong_UnsignedInt_Converter':
-            self.add_include('pycore_long.h',
-                             '_PyLong_UnsignedInt_Converter()')
-
    def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None:
        if self.format_unit == 'I':
            return self.format_code("""
@ -336,9 +350,7 @@ class unsigned_int_converter(CConverter):
                }}}}
                """,
                argname=argname)
-        if not limited_capi:
-            return super().parse_arg(argname, displayname, limited_capi=limited_capi)
-        return format_inline_unsigned_int_converter(self, argname)
+        return super().parse_arg(argname, displayname, limited_capi=limited_capi)


 class long_converter(CConverter):
@ -359,7 +371,7 @@ class long_converter(CConverter):
        return super().parse_arg(argname, displayname, limited_capi=limited_capi)


-class unsigned_long_converter(CConverter):
+class unsigned_long_converter(BaseUnsignedIntConverter):
    type = 'unsigned long'
    default_type = int
    c_ignored_default = "0"
@ -370,11 +382,6 @@ class unsigned_long_converter(CConverter):
        else:
            self.converter = '_PyLong_UnsignedLong_Converter'

-    def use_converter(self) -> None:
-        if self.converter == '_PyLong_UnsignedLong_Converter':
-            self.add_include('pycore_long.h',
-                             '_PyLong_UnsignedLong_Converter()')
-
    def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None:
        if self.format_unit == 'k':
            return self.format_code("""
@ -387,9 +394,7 @@ class unsigned_long_converter(CConverter):
                argname=argname,
                bad_argument=self.bad_argument(displayname, 'int', limited_capi=limited_capi),
            )
-        if not limited_capi:
-            return super().parse_arg(argname, displayname, limited_capi=limited_capi)
-        return format_inline_unsigned_int_converter(self, argname)
+        return super().parse_arg(argname, displayname, limited_capi=limited_capi)


 class long_long_converter(CConverter):
@ -410,7 +415,7 @@ class long_long_converter(CConverter):
        return super().parse_arg(argname, displayname, limited_capi=limited_capi)


-class unsigned_long_long_converter(CConverter):
+class unsigned_long_long_converter(BaseUnsignedIntConverter):
    type = 'unsigned long long'
    default_type = int
    c_ignored_default = "0"
@ -421,11 +426,6 @@ class unsigned_long_long_converter(CConverter):
        else:
            self.converter = '_PyLong_UnsignedLongLong_Converter'

-    def use_converter(self) -> None:
-        if self.converter == '_PyLong_UnsignedLongLong_Converter':
-            self.add_include('pycore_long.h',
-                             '_PyLong_UnsignedLongLong_Converter()')
-
    def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None:
        if self.format_unit == 'K':
            return self.format_code("""
@ -438,9 +438,7 @@ class unsigned_long_long_converter(CConverter):
                argname=argname,
                bad_argument=self.bad_argument(displayname, 'int', limited_capi=limited_capi),
            )
-        if not limited_capi:
-            return super().parse_arg(argname, displayname, limited_capi=limited_capi)
-        return format_inline_unsigned_int_converter(self, argname)
+        return super().parse_arg(argname, displayname, limited_capi=limited_capi)


 class Py_ssize_t_converter(CConverter):
@ -557,15 +555,11 @@ class slice_index_converter(CConverter):
                argname=argname)


-class size_t_converter(CConverter):
+class size_t_converter(BaseUnsignedIntConverter):
    type = 'size_t'
    converter = '_PyLong_Size_t_Converter'
    c_ignored_default = "0"

-    def use_converter(self) -> None:
-        self.add_include('pycore_long.h',
-                         '_PyLong_Size_t_Converter()')
-
    def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None:
        if self.format_unit == 'n':
            return self.format_code("""
@ -575,9 +569,7 @@ class size_t_converter(CConverter):
                }}}}
                """,
                argname=argname)
-        if not limited_capi:
-            return super().parse_arg(argname, displayname, limited_capi=limited_capi)
-        return format_inline_unsigned_int_converter(self, argname)
+        return super().parse_arg(argname, displayname, limited_capi=limited_capi)


 class fildes_converter(CConverter):