mirror of
https://github.com/python/cpython.git
synced 2025-08-04 17:08:35 +00:00
gh-118761: Improve the import time of `gettext
` (#128898)
``gettext`` is often imported in programs that may not end up translating anything. In fact, the ``struct`` module already has a delayed import when parsing ``GNUTranslations`` to speed up the no ``.mo`` files case. The re module is also used in the same situation, but behind a function chain only called by ``GNUTranslations``. Cache the compiled regex globally the first time it is used. The finditer function is converted to a method call on the compiled object which is slightly more efficient, and necessary for the delayed re import.
This commit is contained in:
parent
bbeb219354
commit
c9c9fcb8fc
2 changed files with 21 additions and 15 deletions
|
@ -48,7 +48,6 @@ internationalized, to the local language and cultural habits.
|
|||
|
||||
import operator
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
|
||||
|
@ -70,22 +69,26 @@ _default_localedir = os.path.join(sys.base_prefix, 'share', 'locale')
|
|||
# https://www.gnu.org/software/gettext/manual/gettext.html#Plural-forms
|
||||
# http://git.savannah.gnu.org/cgit/gettext.git/tree/gettext-runtime/intl/plural.y
|
||||
|
||||
_token_pattern = re.compile(r"""
|
||||
(?P<WHITESPACES>[ \t]+) | # spaces and horizontal tabs
|
||||
(?P<NUMBER>[0-9]+\b) | # decimal integer
|
||||
(?P<NAME>n\b) | # only n is allowed
|
||||
(?P<PARENTHESIS>[()]) |
|
||||
(?P<OPERATOR>[-*/%+?:]|[><!]=?|==|&&|\|\|) | # !, *, /, %, +, -, <, >,
|
||||
# <=, >=, ==, !=, &&, ||,
|
||||
# ? :
|
||||
# unary and bitwise ops
|
||||
# not allowed
|
||||
(?P<INVALID>\w+|.) # invalid token
|
||||
""", re.VERBOSE|re.DOTALL)
|
||||
|
||||
_token_pattern = None
|
||||
|
||||
def _tokenize(plural):
|
||||
for mo in re.finditer(_token_pattern, plural):
|
||||
global _token_pattern
|
||||
if _token_pattern is None:
|
||||
import re
|
||||
_token_pattern = re.compile(r"""
|
||||
(?P<WHITESPACES>[ \t]+) | # spaces and horizontal tabs
|
||||
(?P<NUMBER>[0-9]+\b) | # decimal integer
|
||||
(?P<NAME>n\b) | # only n is allowed
|
||||
(?P<PARENTHESIS>[()]) |
|
||||
(?P<OPERATOR>[-*/%+?:]|[><!]=?|==|&&|\|\|) | # !, *, /, %, +, -, <, >,
|
||||
# <=, >=, ==, !=, &&, ||,
|
||||
# ? :
|
||||
# unary and bitwise ops
|
||||
# not allowed
|
||||
(?P<INVALID>\w+|.) # invalid token
|
||||
""", re.VERBOSE|re.DOTALL)
|
||||
|
||||
for mo in _token_pattern.finditer(plural):
|
||||
kind = mo.lastgroup
|
||||
if kind == 'WHITESPACES':
|
||||
continue
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
Reduce import time of :mod:`gettext` by up to ten times, by importing
|
||||
:mod:`re` on demand. In particular, ``re`` is no longer implicitly
|
||||
exposed as ``gettext.re``. Patch by Eli Schwartz.
|
Loading…
Add table
Add a link
Reference in a new issue