mirror of
https://github.com/python/cpython.git
synced 2025-10-17 12:18:23 +00:00
bpo-30277: Replace _sre.getlower() with _sre.ascii_tolower() and _sre.unicode_tolower(). (#1468)
This commit is contained in:
parent
76a3e51a40
commit
7186cc29be
4 changed files with 82 additions and 43 deletions
|
@ -69,13 +69,14 @@ def _compile(code, pattern, flags):
|
||||||
REPEATING_CODES = _REPEATING_CODES
|
REPEATING_CODES = _REPEATING_CODES
|
||||||
SUCCESS_CODES = _SUCCESS_CODES
|
SUCCESS_CODES = _SUCCESS_CODES
|
||||||
ASSERT_CODES = _ASSERT_CODES
|
ASSERT_CODES = _ASSERT_CODES
|
||||||
if (flags & SRE_FLAG_IGNORECASE and
|
tolower = None
|
||||||
not (flags & SRE_FLAG_LOCALE) and
|
fixes = None
|
||||||
flags & SRE_FLAG_UNICODE and
|
if flags & SRE_FLAG_IGNORECASE and not flags & SRE_FLAG_LOCALE:
|
||||||
not (flags & SRE_FLAG_ASCII)):
|
if flags & SRE_FLAG_UNICODE and not flags & SRE_FLAG_ASCII:
|
||||||
fixes = _ignorecase_fixes
|
tolower = _sre.unicode_tolower
|
||||||
else:
|
fixes = _ignorecase_fixes
|
||||||
fixes = None
|
else:
|
||||||
|
tolower = _sre.ascii_tolower
|
||||||
for op, av in pattern:
|
for op, av in pattern:
|
||||||
if op in LITERAL_CODES:
|
if op in LITERAL_CODES:
|
||||||
if not flags & SRE_FLAG_IGNORECASE:
|
if not flags & SRE_FLAG_IGNORECASE:
|
||||||
|
@ -85,7 +86,7 @@ def _compile(code, pattern, flags):
|
||||||
emit(OP_LOC_IGNORE[op])
|
emit(OP_LOC_IGNORE[op])
|
||||||
emit(av)
|
emit(av)
|
||||||
else:
|
else:
|
||||||
lo = _sre.getlower(av, flags)
|
lo = tolower(av)
|
||||||
if fixes and lo in fixes:
|
if fixes and lo in fixes:
|
||||||
emit(IN_IGNORE)
|
emit(IN_IGNORE)
|
||||||
skip = _len(code); emit(0)
|
skip = _len(code); emit(0)
|
||||||
|
@ -102,16 +103,12 @@ def _compile(code, pattern, flags):
|
||||||
elif op is IN:
|
elif op is IN:
|
||||||
if not flags & SRE_FLAG_IGNORECASE:
|
if not flags & SRE_FLAG_IGNORECASE:
|
||||||
emit(op)
|
emit(op)
|
||||||
fixup = None
|
|
||||||
elif flags & SRE_FLAG_LOCALE:
|
elif flags & SRE_FLAG_LOCALE:
|
||||||
emit(IN_LOC_IGNORE)
|
emit(IN_LOC_IGNORE)
|
||||||
fixup = None
|
|
||||||
else:
|
else:
|
||||||
emit(IN_IGNORE)
|
emit(IN_IGNORE)
|
||||||
def fixup(literal, flags=flags):
|
|
||||||
return _sre.getlower(literal, flags)
|
|
||||||
skip = _len(code); emit(0)
|
skip = _len(code); emit(0)
|
||||||
_compile_charset(av, flags, code, fixup, fixes)
|
_compile_charset(av, flags, code, tolower, fixes)
|
||||||
code[skip] = _len(code) - skip
|
code[skip] = _len(code) - skip
|
||||||
elif op is ANY:
|
elif op is ANY:
|
||||||
if flags & SRE_FLAG_DOTALL:
|
if flags & SRE_FLAG_DOTALL:
|
||||||
|
|
|
@ -883,17 +883,23 @@ class ReTests(unittest.TestCase):
|
||||||
def test_category(self):
|
def test_category(self):
|
||||||
self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
|
self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
|
||||||
|
|
||||||
def test_getlower(self):
|
@cpython_only
|
||||||
|
def test_case_helpers(self):
|
||||||
import _sre
|
import _sre
|
||||||
self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
|
for i in range(128):
|
||||||
self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
|
c = chr(i)
|
||||||
self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
|
lo = ord(c.lower())
|
||||||
self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))
|
self.assertEqual(_sre.ascii_tolower(i), lo)
|
||||||
|
self.assertEqual(_sre.unicode_tolower(i), lo)
|
||||||
|
|
||||||
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
|
for i in list(range(128, 0x1000)) + [0x10400, 0x10428]:
|
||||||
self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
|
c = chr(i)
|
||||||
self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC")
|
self.assertEqual(_sre.ascii_tolower(i), i)
|
||||||
self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")
|
if i != 0x0130:
|
||||||
|
self.assertEqual(_sre.unicode_tolower(i), ord(c.lower()))
|
||||||
|
|
||||||
|
self.assertEqual(_sre.ascii_tolower(0x0130), 0x0130)
|
||||||
|
self.assertEqual(_sre.unicode_tolower(0x0130), ord('i'))
|
||||||
|
|
||||||
def test_not_literal(self):
|
def test_not_literal(self):
|
||||||
self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b")
|
self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b")
|
||||||
|
|
|
@ -274,25 +274,35 @@ _sre_getcodesize_impl(PyObject *module)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
_sre.getlower -> int
|
_sre.ascii_tolower -> int
|
||||||
|
|
||||||
character: int
|
character: int
|
||||||
flags: int
|
|
||||||
/
|
/
|
||||||
|
|
||||||
[clinic start generated code]*/
|
[clinic start generated code]*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
_sre_getlower_impl(PyObject *module, int character, int flags)
|
_sre_ascii_tolower_impl(PyObject *module, int character)
|
||||||
/*[clinic end generated code: output=47eebc4c1214feb5 input=087d2f1c44bbca6f]*/
|
/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
|
||||||
{
|
{
|
||||||
if (flags & SRE_FLAG_LOCALE)
|
|
||||||
return sre_lower_locale(character);
|
|
||||||
if (flags & SRE_FLAG_UNICODE)
|
|
||||||
return sre_lower_unicode(character);
|
|
||||||
return sre_lower(character);
|
return sre_lower(character);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*[clinic input]
|
||||||
|
_sre.unicode_tolower -> int
|
||||||
|
|
||||||
|
character: int
|
||||||
|
/
|
||||||
|
|
||||||
|
[clinic start generated code]*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
_sre_unicode_tolower_impl(PyObject *module, int character)
|
||||||
|
/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
|
||||||
|
{
|
||||||
|
return sre_lower_unicode(character);
|
||||||
|
}
|
||||||
|
|
||||||
LOCAL(void)
|
LOCAL(void)
|
||||||
state_reset(SRE_STATE* state)
|
state_reset(SRE_STATE* state)
|
||||||
{
|
{
|
||||||
|
@ -2740,7 +2750,8 @@ static PyTypeObject Scanner_Type = {
|
||||||
static PyMethodDef _functions[] = {
|
static PyMethodDef _functions[] = {
|
||||||
_SRE_COMPILE_METHODDEF
|
_SRE_COMPILE_METHODDEF
|
||||||
_SRE_GETCODESIZE_METHODDEF
|
_SRE_GETCODESIZE_METHODDEF
|
||||||
_SRE_GETLOWER_METHODDEF
|
_SRE_ASCII_TOLOWER_METHODDEF
|
||||||
|
_SRE_UNICODE_TOLOWER_METHODDEF
|
||||||
{NULL, NULL}
|
{NULL, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -29,34 +29,59 @@ exit:
|
||||||
return return_value;
|
return return_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(_sre_getlower__doc__,
|
PyDoc_STRVAR(_sre_ascii_tolower__doc__,
|
||||||
"getlower($module, character, flags, /)\n"
|
"ascii_tolower($module, character, /)\n"
|
||||||
"--\n"
|
"--\n"
|
||||||
"\n");
|
"\n");
|
||||||
|
|
||||||
#define _SRE_GETLOWER_METHODDEF \
|
#define _SRE_ASCII_TOLOWER_METHODDEF \
|
||||||
{"getlower", (PyCFunction)_sre_getlower, METH_FASTCALL, _sre_getlower__doc__},
|
{"ascii_tolower", (PyCFunction)_sre_ascii_tolower, METH_O, _sre_ascii_tolower__doc__},
|
||||||
|
|
||||||
static int
|
static int
|
||||||
_sre_getlower_impl(PyObject *module, int character, int flags);
|
_sre_ascii_tolower_impl(PyObject *module, int character);
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
_sre_getlower(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
|
_sre_ascii_tolower(PyObject *module, PyObject *arg)
|
||||||
{
|
{
|
||||||
PyObject *return_value = NULL;
|
PyObject *return_value = NULL;
|
||||||
int character;
|
int character;
|
||||||
int flags;
|
|
||||||
int _return_value;
|
int _return_value;
|
||||||
|
|
||||||
if (!_PyArg_ParseStack(args, nargs, "ii:getlower",
|
if (!PyArg_Parse(arg, "i:ascii_tolower", &character)) {
|
||||||
&character, &flags)) {
|
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
_return_value = _sre_ascii_tolower_impl(module, character);
|
||||||
|
if ((_return_value == -1) && PyErr_Occurred()) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
return_value = PyLong_FromLong((long)_return_value);
|
||||||
|
|
||||||
if (!_PyArg_NoStackKeywords("getlower", kwnames)) {
|
exit:
|
||||||
|
return return_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR(_sre_unicode_tolower__doc__,
|
||||||
|
"unicode_tolower($module, character, /)\n"
|
||||||
|
"--\n"
|
||||||
|
"\n");
|
||||||
|
|
||||||
|
#define _SRE_UNICODE_TOLOWER_METHODDEF \
|
||||||
|
{"unicode_tolower", (PyCFunction)_sre_unicode_tolower, METH_O, _sre_unicode_tolower__doc__},
|
||||||
|
|
||||||
|
static int
|
||||||
|
_sre_unicode_tolower_impl(PyObject *module, int character);
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
_sre_unicode_tolower(PyObject *module, PyObject *arg)
|
||||||
|
{
|
||||||
|
PyObject *return_value = NULL;
|
||||||
|
int character;
|
||||||
|
int _return_value;
|
||||||
|
|
||||||
|
if (!PyArg_Parse(arg, "i:unicode_tolower", &character)) {
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
_return_value = _sre_getlower_impl(module, character, flags);
|
_return_value = _sre_unicode_tolower_impl(module, character);
|
||||||
if ((_return_value == -1) && PyErr_Occurred()) {
|
if ((_return_value == -1) && PyErr_Occurred()) {
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
@ -690,4 +715,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyObject *Py_UNUSED(ignored))
|
||||||
{
|
{
|
||||||
return _sre_SRE_Scanner_search_impl(self);
|
return _sre_SRE_Scanner_search_impl(self);
|
||||||
}
|
}
|
||||||
/*[clinic end generated code: output=e6dab3ba8864da9e input=a9049054013a1b77]*/
|
/*[clinic end generated code: output=811e67d7f8f5052e input=a9049054013a1b77]*/
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue