diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index a69757be0d2..e6355142b4d 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -195,6 +195,27 @@ These APIs can be used for fast direct character conversions: possible. This macro does not raise exceptions. +These APIs can be used to work with surrogates: + +.. c:macro:: Py_UNICODE_IS_SURROGATE(ch) + + Check if *ch* is a surrogate (``0xD800 <= ch <= 0xDFFF``). + +.. c:macro:: Py_UNICODE_IS_HIGH_SURROGATE(ch) + + Check if *ch* is an high surrogate (``0xD800 <= ch <= 0xDBFF``). + +.. c:macro:: Py_UNICODE_IS_LOW_SURROGATE(ch) + + Check if *ch* is a low surrogate (``0xDC00 <= ch <= 0xDFFF``). + +.. c:macro:: Py_UNICODE_JOIN_SURROGATES(high, low) + + Join two surrogate characters and return a single Py_UCS4 value. + *high* and *low* are respectively the leading and trailing surrogates in a + surrogate pair. + + Plain Py_UNICODE """""""""""""""" diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 44c1775441c..68298b05e65 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -362,6 +362,15 @@ typedef PY_UNICODE_TYPE Py_UNICODE; for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ } while (0) +/* macros to work with surrogates */ +#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDFFF) +#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF) +#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF) +/* Join two surrogate characters and return a single Py_UCS4 value. */ +#define Py_UNICODE_JOIN_SURROGATES(high, low) \ + (((((Py_UCS4)(high) & 0x03FF) << 10) | \ + ((Py_UCS4)(low) & 0x03FF)) + 0x10000) + /* Check if substring matches at given offset. The offset must be valid, and the substring must not be empty. */ diff --git a/Misc/NEWS b/Misc/NEWS index 28a32f6eb52..a2302f38210 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -1395,6 +1395,10 @@ Tests C-API ----- +- Issue #10542: Add 4 macros to work with surrogates: Py_UNICODE_IS_SURROGATE, + Py_UNICODE_IS_HIGH_SURROGATE, Py_UNICODE_IS_LOW_SURROGATE, + Py_UNICODE_JOIN_SURROGATES. + - Issue #12724: Add Py_RETURN_NOTIMPLEMENTED macro for returning NotImplemented. - PY_PATCHLEVEL_REVISION has been removed, since it's meaningless with