cpython/Include/internal/pycore_unicodeobject.h
Victor Stinner 506cfdf141
gh-106320: Remove more private _PyUnicode C API functions (#106382)
Remove more private _PyUnicode C API functions:
move them to the internal C API (pycore_unicodeobject.h).

No longer export most pycore_unicodeobject.h functions.
2023-07-03 22:35:46 +00:00

279 lines
9.1 KiB
C

#ifndef Py_INTERNAL_UNICODEOBJECT_H
#define Py_INTERNAL_UNICODEOBJECT_H
#ifdef __cplusplus
extern "C" {
#endif
#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif
#include "pycore_fileutils.h" // _Py_error_handler
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
void _PyUnicode_ExactDealloc(PyObject *op);
Py_ssize_t _PyUnicode_InternedSize(void);
/* Get a copy of a Unicode string. */
PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
PyObject *unicode
);
/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
if parameters are invalid (e.g. if length is longer than the string). */
extern void _PyUnicode_FastFill(
PyObject *unicode,
Py_ssize_t start,
Py_ssize_t length,
Py_UCS4 fill_char
);
/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
may crash if parameters are invalid (e.g. if the output string
is too short). */
extern void _PyUnicode_FastCopyCharacters(
PyObject *to,
Py_ssize_t to_start,
PyObject *from,
Py_ssize_t from_start,
Py_ssize_t how_many
);
/* Create a new string from a buffer of ASCII characters.
WARNING: Don't check if the string contains any non-ASCII character. */
extern PyObject* _PyUnicode_FromASCII(
const char *buffer,
Py_ssize_t size);
/* Compute the maximum character of the substring unicode[start:end].
Return 127 for an empty string. */
extern Py_UCS4 _PyUnicode_FindMaxChar (
PyObject *unicode,
Py_ssize_t start,
Py_ssize_t end);
/* --- _PyUnicodeWriter API ----------------------------------------------- */
typedef struct {
PyObject *buffer;
void *data;
int kind;
Py_UCS4 maxchar;
Py_ssize_t size;
Py_ssize_t pos;
/* minimum number of allocated characters (default: 0) */
Py_ssize_t min_length;
/* minimum character (default: 127, ASCII) */
Py_UCS4 min_char;
/* If non-zero, overallocate the buffer (default: 0). */
unsigned char overallocate;
/* If readonly is 1, buffer is a shared string (cannot be modified)
and size is set to 0. */
unsigned char readonly;
} _PyUnicodeWriter ;
/* Initialize a Unicode writer.
*
* By default, the minimum buffer size is 0 character and overallocation is
* disabled. Set min_length, min_char and overallocate attributes to control
* the allocation of the buffer. */
PyAPI_FUNC(void)
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
/* Prepare the buffer to write 'length' characters
with the specified maximum character.
Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
(((MAXCHAR) <= (WRITER)->maxchar \
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
? 0 \
: (((LENGTH) == 0) \
? 0 \
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar);
/* Prepare the buffer to have at least the kind KIND.
For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
support characters in range U+000-U+FFFF.
Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
((KIND) <= (WRITER)->kind \
? 0 \
: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
macro instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
int kind);
/* Append a Unicode character.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
Py_UCS4 ch
);
/* Append a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
PyObject *str /* Unicode string */
);
/* Append a substring of a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
PyObject *str, /* Unicode string */
Py_ssize_t start,
Py_ssize_t end
);
/* Append an ASCII-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
const char *str, /* ASCII-encoded byte string */
Py_ssize_t len /* number of bytes, or -1 if unknown */
);
/* Append a latin1-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
const char *str, /* latin1-encoded byte string */
Py_ssize_t len /* length in bytes */
);
/* Get the value of the writer as a Unicode string. Clear the
buffer of the writer. Raise an exception and return NULL
on error. */
PyAPI_FUNC(PyObject *)
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
/* Deallocate memory of a writer (clear its internal buffer). */
PyAPI_FUNC(void)
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
_PyUnicodeWriter *writer,
PyObject *obj,
PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);
/* --- Methods & Slots ---------------------------------------------------- */
extern PyObject* _PyUnicode_JoinArray(
PyObject *separator,
PyObject *const *items,
Py_ssize_t seqlen
);
/* Test whether a unicode is equal to ASCII identifier. Return 1 if true,
0 otherwise. The right argument must be ASCII identifier.
Any error occurs inside will be cleared before return. */
extern int _PyUnicode_EqualToASCIIId(
PyObject *left, /* Left string */
_Py_Identifier *right /* Right identifier */
);
/* Test whether a unicode is equal to ASCII string. Return 1 if true,
0 otherwise. The right argument must be ASCII-encoded string.
Any error occurs inside will be cleared before return. */
PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
PyObject *left,
const char *right /* ASCII-encoded string */
);
/* Externally visible for str.strip(unicode) */
extern PyObject* _PyUnicode_XStrip(
PyObject *self,
int striptype,
PyObject *sepobj
);
/* Using explicit passed-in values, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(
_PyUnicodeWriter *writer,
Py_ssize_t n_buffer,
PyObject *digits,
Py_ssize_t d_pos,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
PyObject *thousands_sep,
Py_UCS4 *maxchar);
/* --- Runtime lifecycle -------------------------------------------------- */
extern void _PyUnicode_InitState(PyInterpreterState *);
extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *);
extern void _PyUnicode_Fini(PyInterpreterState *);
extern void _PyUnicode_FiniTypes(PyInterpreterState *);
extern PyTypeObject _PyUnicodeASCIIIter_Type;
/* --- Other API ---------------------------------------------------------- */
struct _Py_unicode_runtime_ids {
PyThread_type_lock lock;
// next_index value must be preserved when Py_Initialize()/Py_Finalize()
// is called multiple times: see _PyUnicode_FromId() implementation.
Py_ssize_t next_index;
};
struct _Py_unicode_runtime_state {
struct _Py_unicode_runtime_ids ids;
};
/* fs_codec.encoding is initialized to NULL.
Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
struct _Py_unicode_fs_codec {
char *encoding; // Filesystem encoding (encoded to UTF-8)
int utf8; // encoding=="utf-8"?
char *errors; // Filesystem errors (encoded to UTF-8)
_Py_error_handler error_handler;
};
struct _Py_unicode_ids {
Py_ssize_t size;
PyObject **array;
};
struct _Py_unicode_state {
struct _Py_unicode_fs_codec fs_codec;
_PyUnicode_Name_CAPI *ucnhash_capi;
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
struct _Py_unicode_ids ids;
};
extern void _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p);
extern void _PyUnicode_ClearInterned(PyInterpreterState *interp);
#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_UNICODEOBJECT_H */