mirror of
https://github.com/python/cpython.git
synced 2025-10-10 00:43:41 +00:00
gh-103285: Rewrite _splitlines_no_ff to improve performance (#103307)
This commit is contained in:
parent
f0ed293f6a
commit
36860134a9
3 changed files with 20 additions and 18 deletions
26
Lib/ast.py
26
Lib/ast.py
|
@ -25,6 +25,7 @@
|
||||||
:license: Python License.
|
:license: Python License.
|
||||||
"""
|
"""
|
||||||
import sys
|
import sys
|
||||||
|
import re
|
||||||
from _ast import *
|
from _ast import *
|
||||||
from contextlib import contextmanager, nullcontext
|
from contextlib import contextmanager, nullcontext
|
||||||
from enum import IntEnum, auto, _simple_enum
|
from enum import IntEnum, auto, _simple_enum
|
||||||
|
@ -305,28 +306,17 @@ def get_docstring(node, clean=True):
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def _splitlines_no_ff(source):
|
_line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))")
|
||||||
|
def _splitlines_no_ff(source, maxlines=None):
|
||||||
"""Split a string into lines ignoring form feed and other chars.
|
"""Split a string into lines ignoring form feed and other chars.
|
||||||
|
|
||||||
This mimics how the Python parser splits source code.
|
This mimics how the Python parser splits source code.
|
||||||
"""
|
"""
|
||||||
idx = 0
|
|
||||||
lines = []
|
lines = []
|
||||||
next_line = ''
|
for lineno, match in enumerate(_line_pattern.finditer(source), 1):
|
||||||
while idx < len(source):
|
if maxlines is not None and lineno > maxlines:
|
||||||
c = source[idx]
|
break
|
||||||
next_line += c
|
lines.append(match[0])
|
||||||
idx += 1
|
|
||||||
# Keep \r\n together
|
|
||||||
if c == '\r' and idx < len(source) and source[idx] == '\n':
|
|
||||||
next_line += '\n'
|
|
||||||
idx += 1
|
|
||||||
if c in '\r\n':
|
|
||||||
lines.append(next_line)
|
|
||||||
next_line = ''
|
|
||||||
|
|
||||||
if next_line:
|
|
||||||
lines.append(next_line)
|
|
||||||
return lines
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
@ -360,7 +350,7 @@ def get_source_segment(source, node, *, padded=False):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
lines = _splitlines_no_ff(source)
|
lines = _splitlines_no_ff(source, maxlines=end_lineno+1)
|
||||||
if end_lineno == lineno:
|
if end_lineno == lineno:
|
||||||
return lines[lineno].encode()[col_offset:end_col_offset].decode()
|
return lines[lineno].encode()[col_offset:end_col_offset].decode()
|
||||||
|
|
||||||
|
|
|
@ -2293,6 +2293,17 @@ class EndPositionTests(unittest.TestCase):
|
||||||
cdef = ast.parse(s).body[0]
|
cdef = ast.parse(s).body[0]
|
||||||
self.assertEqual(ast.get_source_segment(s, cdef.body[0], padded=True), s_method)
|
self.assertEqual(ast.get_source_segment(s, cdef.body[0], padded=True), s_method)
|
||||||
|
|
||||||
|
def test_source_segment_newlines(self):
|
||||||
|
s = 'def f():\n pass\ndef g():\r pass\r\ndef h():\r\n pass\r\n'
|
||||||
|
f, g, h = ast.parse(s).body
|
||||||
|
self._check_content(s, f, 'def f():\n pass')
|
||||||
|
self._check_content(s, g, 'def g():\r pass')
|
||||||
|
self._check_content(s, h, 'def h():\r\n pass')
|
||||||
|
|
||||||
|
s = 'def f():\n a = 1\r b = 2\r\n c = 3\n'
|
||||||
|
f = ast.parse(s).body[0]
|
||||||
|
self._check_content(s, f, s.rstrip())
|
||||||
|
|
||||||
def test_source_segment_missing_info(self):
|
def test_source_segment_missing_info(self):
|
||||||
s = 'v = 1\r\nw = 1\nx = 1\n\ry = 1\r\n'
|
s = 'v = 1\r\nw = 1\nx = 1\n\ry = 1\r\n'
|
||||||
v, w, x, y = ast.parse(s).body
|
v, w, x, y = ast.parse(s).body
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Improve performance of :func:`ast.get_source_segment`.
|
Loading…
Add table
Add a link
Reference in a new issue