gh-111259: Optimize complementary character sets in RE (GH-120742)

Patterns like "[\s\S]" or "\s|\S" which match any character are now compiled
to the same effective code as a dot with the DOTALL modifier ("(?s:.)").
This commit is contained in:
Serhiy Storchaka 2024-06-20 10:19:32 +03:00 committed by GitHub
parent 3846fcfb92
commit 8bc76ae45f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 50 additions and 13 deletions

View file

@ -206,6 +206,8 @@ CH_UNICODE = {
CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
}
CH_NEGATE = dict(zip(CHCODES[::2] + CHCODES[1::2], CHCODES[1::2] + CHCODES[::2]))
# flags
SRE_FLAG_IGNORECASE = 2 # case insensitive
SRE_FLAG_LOCALE = 4 # honour system locale