mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 11:49:12 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			197 lines
		
	
	
	
		
			7.8 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			197 lines
		
	
	
	
		
			7.8 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
\section{\module{struct} ---
 | 
						|
         Interpret strings as packed binary data}
 | 
						|
\declaremodule{builtin}{struct}
 | 
						|
 | 
						|
\modulesynopsis{Interpret strings as packed binary data.}
 | 
						|
 | 
						|
\indexii{C}{structures}
 | 
						|
\indexiii{packing}{binary}{data}
 | 
						|
 | 
						|
This module performs conversions between Python values and C
 | 
						|
structs represented as Python strings.  It uses \dfn{format strings}
 | 
						|
(explained below) as compact descriptions of the lay-out of the C
 | 
						|
structs and the intended conversion to/from Python values.  This can
 | 
						|
be used in handling binary data stored in files or from network
 | 
						|
connections, among other sources.
 | 
						|
 | 
						|
The module defines the following exception and functions:
 | 
						|
 | 
						|
 | 
						|
\begin{excdesc}{error}
 | 
						|
  Exception raised on various occasions; argument is a string
 | 
						|
  describing what is wrong.
 | 
						|
\end{excdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{pack}{fmt, v1, v2, \textrm{\ldots}}
 | 
						|
  Return a string containing the values
 | 
						|
  \code{\var{v1}, \var{v2}, \textrm{\ldots}} packed according to the given
 | 
						|
  format.  The arguments must match the values required by the format
 | 
						|
  exactly.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{unpack}{fmt, string}
 | 
						|
  Unpack the string (presumably packed by \code{pack(\var{fmt},
 | 
						|
  \textrm{\ldots})}) according to the given format.  The result is a
 | 
						|
  tuple even if it contains exactly one item.  The string must contain
 | 
						|
  exactly the amount of data required by the format
 | 
						|
  (\code{len(\var{string})} must equal \code{calcsize(\var{fmt})}).
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{calcsize}{fmt}
 | 
						|
  Return the size of the struct (and hence of the string)
 | 
						|
  corresponding to the given format.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
Format characters have the following meaning; the conversion between
 | 
						|
C and Python values should be obvious given their types:
 | 
						|
 | 
						|
\begin{tableiv}{c|l|l|c}{samp}{Format}{C Type}{Python}{Notes}
 | 
						|
  \lineiv{x}{pad byte}{no value}{}
 | 
						|
  \lineiv{c}{\ctype{char}}{string of length 1}{}
 | 
						|
  \lineiv{b}{\ctype{signed char}}{integer}{}
 | 
						|
  \lineiv{B}{\ctype{unsigned char}}{integer}{}
 | 
						|
  \lineiv{h}{\ctype{short}}{integer}{}
 | 
						|
  \lineiv{H}{\ctype{unsigned short}}{integer}{}
 | 
						|
  \lineiv{i}{\ctype{int}}{integer}{}
 | 
						|
  \lineiv{I}{\ctype{unsigned int}}{long}{}
 | 
						|
  \lineiv{l}{\ctype{long}}{integer}{}
 | 
						|
  \lineiv{L}{\ctype{unsigned long}}{long}{}
 | 
						|
  \lineiv{q}{\ctype{long long}}{long}{(1)}
 | 
						|
  \lineiv{Q}{\ctype{unsigned long long}}{long}{(1)}
 | 
						|
  \lineiv{f}{\ctype{float}}{float}{}
 | 
						|
  \lineiv{d}{\ctype{double}}{float}{}
 | 
						|
  \lineiv{s}{\ctype{char[]}}{string}{}
 | 
						|
  \lineiv{p}{\ctype{char[]}}{string}{}
 | 
						|
  \lineiv{P}{\ctype{void *}}{integer}{}
 | 
						|
\end{tableiv}
 | 
						|
 | 
						|
\noindent
 | 
						|
Notes:
 | 
						|
 | 
						|
\begin{description}
 | 
						|
\item[(1)]
 | 
						|
  The \character{q} and \character{Q} conversion codes are available in
 | 
						|
  native mode only if the platform C compiler supports C \ctype{long long},
 | 
						|
  or, on Windows, \ctype{__int64}.  They are always available in standard
 | 
						|
  modes.
 | 
						|
  \versionadded{2.2}
 | 
						|
\end{description}
 | 
						|
 | 
						|
 | 
						|
A format character may be preceded by an integral repeat count.  For
 | 
						|
example, the format string \code{'4h'} means exactly the same as
 | 
						|
\code{'hhhh'}.
 | 
						|
 | 
						|
Whitespace characters between formats are ignored; a count and its
 | 
						|
format must not contain whitespace though.
 | 
						|
 | 
						|
For the \character{s} format character, the count is interpreted as the
 | 
						|
size of the string, not a repeat count like for the other format
 | 
						|
characters; for example, \code{'10s'} means a single 10-byte string, while
 | 
						|
\code{'10c'} means 10 characters.  For packing, the string is
 | 
						|
truncated or padded with null bytes as appropriate to make it fit.
 | 
						|
For unpacking, the resulting string always has exactly the specified
 | 
						|
number of bytes.  As a special case, \code{'0s'} means a single, empty
 | 
						|
string (while \code{'0c'} means 0 characters).
 | 
						|
 | 
						|
The \character{p} format character encodes a "Pascal string", meaning
 | 
						|
a short variable-length string stored in a fixed number of bytes.
 | 
						|
The count is the total number of bytes stored.  The first byte stored is
 | 
						|
the length of the string, or 255, whichever is smaller.  The bytes
 | 
						|
of the string follow.  If the string passed in to \function{pack()} is too
 | 
						|
long (longer than the count minus 1), only the leading count-1 bytes of the
 | 
						|
string are stored.  If the string is shorter than count-1, it is padded
 | 
						|
with null bytes so that exactly count bytes in all are used.  Note that
 | 
						|
for \function{unpack()}, the \character{p} format character consumes count
 | 
						|
bytes, but that the string returned can never contain more than 255
 | 
						|
characters.
 | 
						|
 | 
						|
For the \character{I}, \character{L}, \character{q} and \character{Q}
 | 
						|
format characters, the return value is a Python long integer.
 | 
						|
 | 
						|
For the \character{P} format character, the return value is a Python
 | 
						|
integer or long integer, depending on the size needed to hold a
 | 
						|
pointer when it has been cast to an integer type.  A \NULL{} pointer will
 | 
						|
always be returned as the Python integer \code{0}. When packing pointer-sized
 | 
						|
values, Python integer or long integer objects may be used.  For
 | 
						|
example, the Alpha and Merced processors use 64-bit pointer values,
 | 
						|
meaning a Python long integer will be used to hold the pointer; other
 | 
						|
platforms use 32-bit pointers and will use a Python integer.
 | 
						|
 | 
						|
By default, C numbers are represented in the machine's native format
 | 
						|
and byte order, and properly aligned by skipping pad bytes if
 | 
						|
necessary (according to the rules used by the C compiler).
 | 
						|
 | 
						|
Alternatively, the first character of the format string can be used to
 | 
						|
indicate the byte order, size and alignment of the packed data,
 | 
						|
according to the following table:
 | 
						|
 | 
						|
\begin{tableiii}{c|l|l}{samp}{Character}{Byte order}{Size and alignment}
 | 
						|
  \lineiii{@}{native}{native}
 | 
						|
  \lineiii{=}{native}{standard}
 | 
						|
  \lineiii{<}{little-endian}{standard}
 | 
						|
  \lineiii{>}{big-endian}{standard}
 | 
						|
  \lineiii{!}{network (= big-endian)}{standard}
 | 
						|
\end{tableiii}
 | 
						|
 | 
						|
If the first character is not one of these, \character{@} is assumed.
 | 
						|
 | 
						|
Native byte order is big-endian or little-endian, depending on the
 | 
						|
host system.  For example, Motorola and Sun processors are big-endian;
 | 
						|
Intel and DEC processors are little-endian.
 | 
						|
 | 
						|
Native size and alignment are determined using the C compiler's
 | 
						|
\keyword{sizeof} expression.  This is always combined with native byte
 | 
						|
order.
 | 
						|
 | 
						|
Standard size and alignment are as follows: no alignment is required
 | 
						|
for any type (so you have to use pad bytes);
 | 
						|
\ctype{short} is 2 bytes;
 | 
						|
\ctype{int} and \ctype{long} are 4 bytes;
 | 
						|
\ctype{long long} (\ctype{__int64} on Windows) is 8 bytes;
 | 
						|
\ctype{float} and \ctype{double} are 32-bit and 64-bit
 | 
						|
IEEE floating point numbers, respectively.
 | 
						|
 | 
						|
Note the difference between \character{@} and \character{=}: both use
 | 
						|
native byte order, but the size and alignment of the latter is
 | 
						|
standardized.
 | 
						|
 | 
						|
The form \character{!} is available for those poor souls who claim they
 | 
						|
can't remember whether network byte order is big-endian or
 | 
						|
little-endian.
 | 
						|
 | 
						|
There is no way to indicate non-native byte order (force
 | 
						|
byte-swapping); use the appropriate choice of \character{<} or
 | 
						|
\character{>}.
 | 
						|
 | 
						|
The \character{P} format character is only available for the native
 | 
						|
byte ordering (selected as the default or with the \character{@} byte
 | 
						|
order character). The byte order character \character{=} chooses to
 | 
						|
use little- or big-endian ordering based on the host system. The
 | 
						|
struct module does not interpret this as native ordering, so the
 | 
						|
\character{P} format is not available.
 | 
						|
 | 
						|
Examples (all using native byte order, size and alignment, on a
 | 
						|
big-endian machine):
 | 
						|
 | 
						|
\begin{verbatim}
 | 
						|
>>> from struct import *
 | 
						|
>>> pack('hhl', 1, 2, 3)
 | 
						|
'\x00\x01\x00\x02\x00\x00\x00\x03'
 | 
						|
>>> unpack('hhl', '\x00\x01\x00\x02\x00\x00\x00\x03')
 | 
						|
(1, 2, 3)
 | 
						|
>>> calcsize('hhl')
 | 
						|
8
 | 
						|
\end{verbatim}
 | 
						|
 | 
						|
Hint: to align the end of a structure to the alignment requirement of
 | 
						|
a particular type, end the format with the code for that type with a
 | 
						|
repeat count of zero.  For example, the format \code{'llh0l'}
 | 
						|
specifies two pad bytes at the end, assuming longs are aligned on
 | 
						|
4-byte boundaries.  This only works when native size and alignment are
 | 
						|
in effect; standard size and alignment does not enforce any alignment.
 | 
						|
 | 
						|
\begin{seealso}
 | 
						|
  \seemodule{array}{Packed binary storage of homogeneous data.}
 | 
						|
  \seemodule{xdrlib}{Packing and unpacking of XDR data.}
 | 
						|
\end{seealso}
 |