gh-130167: Improve `difflib.IS_LINE_JUNK` performance by using string methods (#130170)

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com>
Co-authored-by: Tim Peters <tim.peters@gmail.com>
This commit is contained in:
Semyon Moroz 2025-05-01 08:11:36 +04:00 committed by GitHub
parent b99d970bcd
commit bce45bcaf8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 8 additions and 5 deletions

View file

@ -1038,11 +1038,9 @@ class Differ:
# remaining is that perhaps it was really the case that " volatile" # remaining is that perhaps it was really the case that " volatile"
# was inserted after "private". I can live with that <wink>. # was inserted after "private". I can live with that <wink>.
import re def IS_LINE_JUNK(line, pat=None):
def IS_LINE_JUNK(line, pat=re.compile(r"\s*(?:#\s*)?$").match):
r""" r"""
Return True for ignorable line: iff `line` is blank or contains a single '#'. Return True for ignorable line: if `line` is blank or contains a single '#'.
Examples: Examples:
@ -1054,6 +1052,11 @@ def IS_LINE_JUNK(line, pat=re.compile(r"\s*(?:#\s*)?$").match):
False False
""" """
if pat is None:
# Default: match '#' or the empty string
return line.strip() in '#'
# Previous versions used the undocumented parameter 'pat' as a
# match function. Retain this behaviour for compatibility.
return pat(line) is not None return pat(line) is not None
def IS_CHARACTER_JUNK(ch, ws=" \t"): def IS_CHARACTER_JUNK(ch, ws=" \t"):
@ -2027,7 +2030,6 @@ class HtmlDiff(object):
replace('\1','</span>'). \ replace('\1','</span>'). \
replace('\t','&nbsp;') replace('\t','&nbsp;')
del re
def restore(delta, which): def restore(delta, which):
r""" r"""

View file

@ -0,0 +1 @@
Improve speed of :func:`difflib.IS_LINE_JUNK`. Patch by Semyon Moroz.