Added docs for the new Unicode and string APIs.

2025-11-13 23:46:24 +00:00 · 2000-07-07 15:47:06 +00:00 · 2000-07-07 15:47:06 +00:00 · 5a20b21fb1
commit 5a20b21fb1
parent 71f36983b9
1 changed files with 52 additions and 7 deletions
--- a/Doc/api/api.tex
+++ b/Doc/api/api.tex
@ -1923,6 +1923,40 @@ that has been interned, or a new (``owned'') reference to an earlier
 interned string object with the same value.
 \end{cfuncdesc}
 \begin{cfuncdesc}{PyObject*}{PyString_Decode}{const char *s,
                                               int size,
                                               const char *encoding,
                                               const char *errors}
 Create a string object by decoding \var{size} bytes of the encoded
 buffer \var{s}. \var{encoding} and \var{errors} have the same meaning
 as the parameters of the same name in the unicode() builtin
 function. The codec to be used is looked up using the Python codec
 registry. Returns \NULL{} in case an exception was raised by the
 codec.
 \end{cfuncdesc}
 \begin{cfuncdesc}{PyObject*}{PyString_Encode}{const Py_UNICODE *s,
                                               int size,
                                               const char *encoding,
                                               const char *errors}
 Encodes the \ctype{Py_UNICODE} buffer of the given size and returns a
 Python string object. \var{encoding} and \var{errors} have the same
 meaning as the parameters of the same name in the string .encode()
 method. The codec to be used is looked up using the Python codec
 registry. Returns \NULL{} in case an exception was raised by the
 codec.
 \end{cfuncdesc}
 \begin{cfuncdesc}{PyObject*}{PyString_AsEncodedString}{PyObject *unicode,
                                               const char *encoding,
                                               const char *errors}
 Encodes a string object and returns the result as Python string
 object. \var{encoding} and \var{errors} have the same meaning as the
 parameters of the same name in the string .encode() method. The codec
 to be used is looked up using the Python codec registry. Returns
 \NULL{} in case an exception was raised by the codec.
 \end{cfuncdesc}
 \subsection{Unicode Objects \label{unicodeObjects}}
 \sectionauthor{Marc-Andre Lemburg}{mal@lemburg.com}
@ -2076,26 +2110,37 @@ Return a read-only pointer to the Unicode object's internal
 Return the length of the Unicode object.
 \end{cfuncdesc}
-\begin{cfuncdesc}{PyObject*}{PyUnicode_FromObject}{PyObject *obj}
+\begin{cfuncdesc}{PyObject*}{PyUnicode_FromEncodedObject}{PyObject *obj,
                                                      const char *encoding,
                                                      const char *errors}
-Coerce obj to an Unicode object and return a reference with
+Coerce an encoded object obj to an Unicode object and return a
-incremented refcount.
+reference with incremented refcount.
 Coercion is done in the following way:
 \begin{enumerate}
 \item  Unicode objects are passed back as-is with incremented
-      refcount.
+      refcount. Note: these cannot be decoded; passing a non-NULL
      value for encoding will result in a TypeError.
 \item String and other char buffer compatible objects are decoded
-      under the assumptions that they contain UTF-8 data. Decoding
+      according to the given encoding and using the error handling
-      is done in "strict" mode.
+      defined by errors. Both can be NULL to have the interface use
      the default values (see the next section for details).
-\item All other objects raise an exception.
+\item All other objects cause an exception.
 \end{enumerate}
 The API returns NULL in case of an error. The caller is responsible
 for decref'ing the returned objects.
 \end{cfuncdesc}
 \begin{cfuncdesc}{PyObject*}{PyUnicode_FromObject}{PyObject *obj}
 Shortcut for PyUnicode_FromEncodedObject(obj, NULL, ``strict'')
 which is used throughout the interpreter whenever coercion to
 Unicode is needed.
 \end{cfuncdesc}
 % --- wchar_t support for platforms which support it ---------------------
 If the platform supports \ctype{wchar_t} and provides a header file