mirror of
https://github.com/python/cpython.git
synced 2025-10-10 00:43:41 +00:00
GH-72904: Add glob.translate()
function (#106703)
Add `glob.translate()` function that converts a pathname with shell wildcards to a regular expression. The regular expression is used by pathlib to implement `match()` and `glob()`. This function differs from `fnmatch.translate()` in that wildcards do not match path separators by default, and that a `*` pattern segment matches precisely one path segment. When *recursive* is set to true, `**` pattern segments match any number of path segments, and `**` cannot appear outside its own segment. In pathlib, this change speeds up directory walking (because `_make_child_relpath()` does less work), makes path objects smaller (they don't need a `_lines` slot), and removes the need for some gnarly code. Co-authored-by: Jason R. Coombs <jaraco@jaraco.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
This commit is contained in:
parent
babb787047
commit
cf67ebfb31
7 changed files with 229 additions and 106 deletions
60
Lib/glob.py
60
Lib/glob.py
|
@ -249,3 +249,63 @@ def escape(pathname):
|
|||
|
||||
|
||||
_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
|
||||
|
||||
|
||||
def translate(pat, *, recursive=False, include_hidden=False, seps=None):
|
||||
"""Translate a pathname with shell wildcards to a regular expression.
|
||||
|
||||
If `recursive` is true, the pattern segment '**' will match any number of
|
||||
path segments; if '**' appears outside its own segment, ValueError will be
|
||||
raised.
|
||||
|
||||
If `include_hidden` is true, wildcards can match path segments beginning
|
||||
with a dot ('.').
|
||||
|
||||
If a sequence of separator characters is given to `seps`, they will be
|
||||
used to split the pattern into segments and match path separators. If not
|
||||
given, os.path.sep and os.path.altsep (where available) are used.
|
||||
"""
|
||||
if not seps:
|
||||
if os.path.altsep:
|
||||
seps = (os.path.sep, os.path.altsep)
|
||||
else:
|
||||
seps = os.path.sep
|
||||
escaped_seps = ''.join(map(re.escape, seps))
|
||||
any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps
|
||||
not_sep = f'[^{escaped_seps}]'
|
||||
if include_hidden:
|
||||
one_last_segment = f'{not_sep}+'
|
||||
one_segment = f'{one_last_segment}{any_sep}'
|
||||
any_segments = f'(?:.+{any_sep})?'
|
||||
any_last_segments = '.*'
|
||||
else:
|
||||
one_last_segment = f'[^{escaped_seps}.]{not_sep}*'
|
||||
one_segment = f'{one_last_segment}{any_sep}'
|
||||
any_segments = f'(?:{one_segment})*'
|
||||
any_last_segments = f'{any_segments}(?:{one_last_segment})?'
|
||||
|
||||
results = []
|
||||
parts = re.split(any_sep, pat)
|
||||
last_part_idx = len(parts) - 1
|
||||
for idx, part in enumerate(parts):
|
||||
if part == '*':
|
||||
results.append(one_segment if idx < last_part_idx else one_last_segment)
|
||||
continue
|
||||
if recursive:
|
||||
if part == '**':
|
||||
if idx < last_part_idx:
|
||||
if parts[idx + 1] != '**':
|
||||
results.append(any_segments)
|
||||
else:
|
||||
results.append(any_last_segments)
|
||||
continue
|
||||
elif '**' in part:
|
||||
raise ValueError("Invalid pattern: '**' can only be an entire path component")
|
||||
if part:
|
||||
if not include_hidden and part[0] in '*?':
|
||||
results.append(r'(?!\.)')
|
||||
results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep))
|
||||
if idx < last_part_idx:
|
||||
results.append(any_sep)
|
||||
res = ''.join(results)
|
||||
return fr'(?s:{res})\Z'
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue