Issue #3163: The struct module gets new format characters 'n' and 'N'

supporting C integer types `ssize_t` and `size_t`, respectively.
2025-09-26 18:29:57 +00:00 · 2011-10-06 15:27:40 +02:00 · 2011-10-06 15:27:40 +02:00 · 45d9c91d4b
commit 45d9c91d4b
parent 15a66cf134
4 changed files with 150 additions and 30 deletions
--- a/Doc/library/struct.rst
+++ b/Doc/library/struct.rst
@ -187,17 +187,24 @@ platform-dependent.
 | ``Q``  | :c:type:`unsigned long   | integer            | 8              | \(2), \(3) |
 |        | long`                    |                    |                |            |
 +--------+--------------------------+--------------------+----------------+------------+
-| ``f``  | :c:type:`float`          | float              | 4              | \(4)       |
+| ``n``  | :c:type:`ssize_t`        | integer            |                | \(4)       |
 +--------+--------------------------+--------------------+----------------+------------+
-| ``d``  | :c:type:`double`         | float              | 8              | \(4)       |
+| ``N``  | :c:type:`size_t`         | integer            |                | \(4)       |
 +--------+--------------------------+--------------------+----------------+------------+
 | ``f``  | :c:type:`float`          | float              | 4              | \(5)       |
 +--------+--------------------------+--------------------+----------------+------------+
 | ``d``  | :c:type:`double`         | float              | 8              | \(5)       |
 +--------+--------------------------+--------------------+----------------+------------+
 | ``s``  | :c:type:`char[]`         | bytes              |                |            |
 +--------+--------------------------+--------------------+----------------+------------+
 | ``p``  | :c:type:`char[]`         | bytes              |                |            |
 +--------+--------------------------+--------------------+----------------+------------+
-| ``P``  | :c:type:`void \*`        | integer            |                | \(5)       |
+| ``P``  | :c:type:`void \*`        | integer            |                | \(6)       |
 +--------+--------------------------+--------------------+----------------+------------+
 .. versionchanged:: 3.3
   Added support for the ``'n'`` and ``'N'`` formats.
 Notes:
 (1)
@ -219,11 +226,17 @@ Notes:
      Use of the :meth:`__index__` method for non-integers is new in 3.2.
 (4)
   The ``'n'`` and ``'N'`` conversion codes are only available for the native
   size (selected as the default or with the ``'@'`` byte order character).
   For the standard size, you can use whichever of the other integer formats
   fits your application.
 (5)
   For the ``'f'`` and ``'d'`` conversion codes, the packed representation uses
   the IEEE 754 binary32 (for ``'f'``) or binary64 (for ``'d'``) format,
   regardless of the floating-point format used by the platform.
-(5)
+(6)
   The ``'P'`` format character is only available for the native byte ordering
   (selected as the default or with the ``'@'`` byte order character). The byte
   order character ``'='`` chooses to use little- or big-endian ordering based
--- a/Lib/test/test_struct.py
+++ b/Lib/test/test_struct.py
@ -8,9 +8,19 @@ from test.support import run_unittest
 ISBIGENDIAN = sys.byteorder == "big"
 IS32BIT = sys.maxsize == 0x7fffffff
-integer_codes = 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q'
+integer_codes = 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q', 'n', 'N'
 byteorders = '', '@', '=', '<', '>', '!'
 def iter_integer_formats(byteorders=byteorders):
    for code in integer_codes:
        for byteorder in byteorders:
            if (byteorder in ('', '@') and code in ('q', 'Q') and
                not HAVE_LONG_LONG):
                continue
            if (byteorder not in ('', '@') and code in ('n', 'N')):
                continue
            yield code, byteorder
 # Native 'q' packing isn't available on systems that don't have the C
 # long long type.
 try:
@ -141,14 +151,13 @@ class StructTest(unittest.TestCase):
            }
        # standard integer sizes
-        for code in integer_codes:
+        for code, byteorder in iter_integer_formats(('=', '<', '>', '!')):
            for byteorder in '=', '<', '>', '!':
            format = byteorder+code
            size = struct.calcsize(format)
            self.assertEqual(size, expected_size[code])
        # native integer sizes
-        native_pairs = 'bB', 'hH', 'iI', 'lL'
+        native_pairs = 'bB', 'hH', 'iI', 'lL', 'nN'
        if HAVE_LONG_LONG:
            native_pairs += 'qQ',
        for format_pair in native_pairs:
@ -166,9 +175,11 @@ class StructTest(unittest.TestCase):
        if HAVE_LONG_LONG:
            self.assertLessEqual(8, struct.calcsize('q'))
            self.assertLessEqual(struct.calcsize('l'), struct.calcsize('q'))
        self.assertGreaterEqual(struct.calcsize('n'), struct.calcsize('i'))
        self.assertGreaterEqual(struct.calcsize('n'), struct.calcsize('P'))
    def test_integers(self):
-        # Integer tests (bBhHiIlLqQ).
+        # Integer tests (bBhHiIlLqQnN).
        import binascii
        class IntTester(unittest.TestCase):
@ -182,11 +193,11 @@ class StructTest(unittest.TestCase):
                                     self.byteorder)
                self.bytesize = struct.calcsize(format)
                self.bitsize = self.bytesize * 8
-                if self.code in tuple('bhilq'):
+                if self.code in tuple('bhilqn'):
                    self.signed = True
                    self.min_value = -(2**(self.bitsize-1))
                    self.max_value = 2**(self.bitsize-1) - 1
-                elif self.code in tuple('BHILQ'):
+                elif self.code in tuple('BHILQN'):
                    self.signed = False
                    self.min_value = 0
                    self.max_value = 2**self.bitsize - 1
@ -316,15 +327,24 @@ class StructTest(unittest.TestCase):
                                      struct.pack, self.format,
                                      obj)
-        for code in integer_codes:
+        for code, byteorder in iter_integer_formats():
            for byteorder in byteorders:
                if (byteorder in ('', '@') and code in ('q', 'Q') and
                    not HAVE_LONG_LONG):
                    continue
            format = byteorder+code
            t = IntTester(format)
            t.run()
    def test_nN_code(self):
        # n and N don't exist in standard sizes
        def assertStructError(func, *args, **kwargs):
            with self.assertRaises(struct.error) as cm:
                func(*args, **kwargs)
            self.assertIn("bad char in struct format", str(cm.exception))
        for code in 'nN':
            for byteorder in ('=', '<', '>', '!'):
                format = byteorder+code
                assertStructError(struct.calcsize, format)
                assertStructError(struct.pack, format, 0)
                assertStructError(struct.unpack, format, b"")
    def test_p_code(self):
        # Test p ("Pascal string") code.
        for code, input, expected, expectedback in [
@ -377,11 +397,7 @@ class StructTest(unittest.TestCase):
        self.assertRaises(OverflowError, struct.pack, ">f", big)
    def test_1530559(self):
-        for byteorder in '', '@', '=', '<', '>', '!':
+        for code, byteorder in iter_integer_formats():
            for code in integer_codes:
                if (byteorder in ('', '@') and code in ('q', 'Q') and
                    not HAVE_LONG_LONG):
                    continue
            format = byteorder + code
            self.assertRaises(struct.error, struct.pack, format, 1.0)
            self.assertRaises(struct.error, struct.pack, format, 1.5)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -294,6 +294,9 @@ Core and Builtins
 Library
 -------
 - Issue #3163: The struct module gets new format characters 'n' and 'N'
  supporting C integer types ``ssize_t`` and ``size_t``, respectively.
 - Issue #13099: Fix sqlite3.Cursor.lastrowid under a Turkish locale.
  Reported and diagnosed by Thomas Kluyver.
--- a/Modules/_struct.c
+++ b/Modules/_struct.c
@ -58,6 +58,7 @@ typedef struct { char c; long x; } st_long;
 typedef struct { char c; float x; } st_float;
 typedef struct { char c; double x; } st_double;
 typedef struct { char c; void *x; } st_void_p;
 typedef struct { char c; size_t x; } st_size_t;
 #define SHORT_ALIGN (sizeof(st_short) - sizeof(short))
 #define INT_ALIGN (sizeof(st_int) - sizeof(int))
@ -65,6 +66,7 @@ typedef struct { char c; void *x; } st_void_p;
 #define FLOAT_ALIGN (sizeof(st_float) - sizeof(float))
 #define DOUBLE_ALIGN (sizeof(st_double) - sizeof(double))
 #define VOID_P_ALIGN (sizeof(st_void_p) - sizeof(void *))
 #define SIZE_T_ALIGN (sizeof(st_size_t) - sizeof(size_t))
 /* We can't support q and Q in native mode unless the compiler does;
   in std mode, they're 8 bytes on all platforms. */
@ -213,6 +215,52 @@ get_ulonglong(PyObject *v, unsigned PY_LONG_LONG *p)
 #endif
 /* Same, but handling Py_ssize_t */
 static int
 get_ssize_t(PyObject *v, Py_ssize_t *p)
 {
    Py_ssize_t x;
    v = get_pylong(v);
    if (v == NULL)
        return -1;
    assert(PyLong_Check(v));
    x = PyLong_AsSsize_t(v);
    Py_DECREF(v);
    if (x == (Py_ssize_t)-1 && PyErr_Occurred()) {
        if (PyErr_ExceptionMatches(PyExc_OverflowError))
            PyErr_SetString(StructError,
                            "argument out of range");
        return -1;
    }
    *p = x;
    return 0;
 }
 /* Same, but handling size_t */
 static int
 get_size_t(PyObject *v, size_t *p)
 {
    size_t x;
    v = get_pylong(v);
    if (v == NULL)
        return -1;
    assert(PyLong_Check(v));
    x = PyLong_AsSize_t(v);
    Py_DECREF(v);
    if (x == (size_t)-1 && PyErr_Occurred()) {
        if (PyErr_ExceptionMatches(PyExc_OverflowError))
            PyErr_SetString(StructError,
                            "argument out of range");
        return -1;
    }
    *p = x;
    return 0;
 }
 #define RANGE_ERROR(x, f, flag, mask) return _range_error(f, flag)
@ -369,6 +417,23 @@ nu_ulong(const char *p, const formatdef *f)
    return PyLong_FromUnsignedLong(x);
 }
 static PyObject *
 nu_ssize_t(const char *p, const formatdef *f)
 {
    Py_ssize_t x;
    memcpy((char *)&x, p, sizeof x);
    return PyLong_FromSsize_t(x);
 }
 static PyObject *
 nu_size_t(const char *p, const formatdef *f)
 {
    size_t x;
    memcpy((char *)&x, p, sizeof x);
    return PyLong_FromSize_t(x);
 }
 /* Native mode doesn't support q or Q unless the platform C supports
   long long (or, on Windows, __int64). */
@ -558,6 +623,26 @@ np_ulong(char *p, PyObject *v, const formatdef *f)
    return 0;
 }
 static int
 np_ssize_t(char *p, PyObject *v, const formatdef *f)
 {
    Py_ssize_t x;
    if (get_ssize_t(v, &x) < 0)
        return -1;
    memcpy(p, (char *)&x, sizeof x);
    return 0;
 }
 static int
 np_size_t(char *p, PyObject *v, const formatdef *f)
 {
    size_t x;
    if (get_size_t(v, &x) < 0)
        return -1;
    memcpy(p, (char *)&x, sizeof x);
    return 0;
 }
 #ifdef HAVE_LONG_LONG
 static int
@ -651,6 +736,8 @@ static formatdef native_table[] = {
    {'I',       sizeof(int),    INT_ALIGN,      nu_uint,        np_uint},
    {'l',       sizeof(long),   LONG_ALIGN,     nu_long,        np_long},
    {'L',       sizeof(long),   LONG_ALIGN,     nu_ulong,       np_ulong},
    {'n',       sizeof(size_t), SIZE_T_ALIGN,   nu_ssize_t,     np_ssize_t},
    {'N',       sizeof(size_t), SIZE_T_ALIGN,   nu_size_t,      np_size_t},
 #ifdef HAVE_LONG_LONG
    {'q',       sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_longlong, np_longlong},
    {'Q',       sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_ulonglong,np_ulonglong},
@ -1951,7 +2038,8 @@ these can be preceded by a decimal repeat count:\n\
  l:long; L:unsigned long; f:float; d:double.\n\
 Special cases (preceding decimal count indicates length):\n\
  s:string (array of char); p: pascal string (with count byte).\n\
-Special case (only available in native format):\n\
+Special cases (only available in native format):\n\
  n:ssize_t; N:size_t;\n\
  P:an integer type that is wide enough to hold a pointer.\n\
 Special case (not in native mode unless 'long long' in platform C):\n\
  q:long long; Q:unsigned long long\n\