gh-118761: Optimise import time for `shlex` (#132036)

This commit is contained in:
Adam Turner 2025-04-24 16:10:46 +01:00 committed by GitHub
parent 984a314b9f
commit 06a26fda60
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 17 additions and 7 deletions

View file

@ -7,11 +7,7 @@
# iterator interface by Gustavo Niemeyer, April 2003. # iterator interface by Gustavo Niemeyer, April 2003.
# changes to tokenize more like Posix shells by Vinay Sajip, July 2016. # changes to tokenize more like Posix shells by Vinay Sajip, July 2016.
import os
import re
import sys import sys
from collections import deque
from io import StringIO from io import StringIO
__all__ = ["shlex", "split", "quote", "join"] __all__ = ["shlex", "split", "quote", "join"]
@ -20,6 +16,8 @@ class shlex:
"A lexical analyzer class for simple shell-like syntaxes." "A lexical analyzer class for simple shell-like syntaxes."
def __init__(self, instream=None, infile=None, posix=False, def __init__(self, instream=None, infile=None, posix=False,
punctuation_chars=False): punctuation_chars=False):
from collections import deque # deferred import for performance
if isinstance(instream, str): if isinstance(instream, str):
instream = StringIO(instream) instream = StringIO(instream)
if instream is not None: if instream is not None:
@ -278,6 +276,7 @@ class shlex:
def sourcehook(self, newfile): def sourcehook(self, newfile):
"Hook called on a filename to be sourced." "Hook called on a filename to be sourced."
import os.path
if newfile[0] == '"': if newfile[0] == '"':
newfile = newfile[1:-1] newfile = newfile[1:-1]
# This implements cpp-like semantics for relative-path inclusion. # This implements cpp-like semantics for relative-path inclusion.
@ -318,13 +317,17 @@ def join(split_command):
return ' '.join(quote(arg) for arg in split_command) return ' '.join(quote(arg) for arg in split_command)
_find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search
def quote(s): def quote(s):
"""Return a shell-escaped version of the string *s*.""" """Return a shell-escaped version of the string *s*."""
if not s: if not s:
return "''" return "''"
if _find_unsafe(s) is None:
# Use bytes.translate() for performance
safe_chars = (b'%+,-./0123456789:=@'
b'ABCDEFGHIJKLMNOPQRSTUVWXYZ_'
b'abcdefghijklmnopqrstuvwxyz')
# No quoting is needed if `s` is an ASCII string consisting only of `safe_chars`
if s.isascii() and not s.encode().translate(None, delete=safe_chars):
return s return s
# use single quotes, and put single quotes into double quotes # use single quotes, and put single quotes into double quotes

View file

@ -3,6 +3,7 @@ import itertools
import shlex import shlex
import string import string
import unittest import unittest
from test.support import import_helper
# The original test data set was from shellwords, by Hartmut Goebel. # The original test data set was from shellwords, by Hartmut Goebel.
@ -363,6 +364,9 @@ class ShlexTest(unittest.TestCase):
with self.assertRaises(AttributeError): with self.assertRaises(AttributeError):
shlex_instance.punctuation_chars = False shlex_instance.punctuation_chars = False
def test_lazy_imports(self):
import_helper.ensure_lazy_imports('shlex', {'collections', 're', 'os'})
# Allow this test to be used with old shlex.py # Allow this test to be used with old shlex.py
if not getattr(shlex, "split", None): if not getattr(shlex, "split", None):

View file

@ -0,0 +1,3 @@
Improve import times by up to 33x for the :mod:`shlex` module,
and improve the performance of :func:`shlex.quote` by up to 12x.
Patch by Adam Turner.