GH-72904: Add glob.translate() function (#106703)

Add `glob.translate()` function that converts a pathname with shell wildcards to a regular expression. The regular expression is used by pathlib to implement `match()` and `glob()`.

This function differs from `fnmatch.translate()` in that wildcards do not match path separators by default, and that a `*` pattern segment matches precisely one path segment. When *recursive* is set to true, `**` pattern segments match any number of path segments, and `**` cannot appear outside its own segment.

In pathlib, this change speeds up directory walking (because `_make_child_relpath()` does less work), makes path objects smaller (they don't need a `_lines` slot), and removes the need for some gnarly code.

Co-authored-by: Jason R. Coombs <jaraco@jaraco.com>
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
This commit is contained in:
Barney Gale 2023-11-13 17:15:56 +00:00 committed by GitHub
parent babb787047
commit cf67ebfb31
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 229 additions and 106 deletions

View file

@ -1,5 +1,6 @@
import glob
import os
import re
import shutil
import sys
import unittest
@ -349,6 +350,96 @@ class GlobTests(unittest.TestCase):
for it in iters:
self.assertEqual(next(it), p)
def test_translate_matching(self):
match = re.compile(glob.translate('*')).match
self.assertIsNotNone(match('foo'))
self.assertIsNotNone(match('foo.bar'))
self.assertIsNone(match('.foo'))
match = re.compile(glob.translate('.*')).match
self.assertIsNotNone(match('.foo'))
match = re.compile(glob.translate('**', recursive=True)).match
self.assertIsNotNone(match('foo'))
self.assertIsNone(match('.foo'))
self.assertIsNotNone(match(os.path.join('foo', 'bar')))
self.assertIsNone(match(os.path.join('foo', '.bar')))
self.assertIsNone(match(os.path.join('.foo', 'bar')))
self.assertIsNone(match(os.path.join('.foo', '.bar')))
match = re.compile(glob.translate('**/*', recursive=True)).match
self.assertIsNotNone(match(os.path.join('foo', 'bar')))
self.assertIsNone(match(os.path.join('foo', '.bar')))
self.assertIsNone(match(os.path.join('.foo', 'bar')))
self.assertIsNone(match(os.path.join('.foo', '.bar')))
match = re.compile(glob.translate('*/**', recursive=True)).match
self.assertIsNotNone(match(os.path.join('foo', 'bar')))
self.assertIsNone(match(os.path.join('foo', '.bar')))
self.assertIsNone(match(os.path.join('.foo', 'bar')))
self.assertIsNone(match(os.path.join('.foo', '.bar')))
match = re.compile(glob.translate('**/.bar', recursive=True)).match
self.assertIsNotNone(match(os.path.join('foo', '.bar')))
self.assertIsNone(match(os.path.join('.foo', '.bar')))
match = re.compile(glob.translate('**/*.*', recursive=True)).match
self.assertIsNone(match(os.path.join('foo', 'bar')))
self.assertIsNone(match(os.path.join('foo', '.bar')))
self.assertIsNotNone(match(os.path.join('foo', 'bar.txt')))
self.assertIsNone(match(os.path.join('foo', '.bar.txt')))
def test_translate(self):
def fn(pat):
return glob.translate(pat, seps='/')
self.assertEqual(fn('foo'), r'(?s:foo)\Z')
self.assertEqual(fn('foo/bar'), r'(?s:foo/bar)\Z')
self.assertEqual(fn('*'), r'(?s:[^/.][^/]*)\Z')
self.assertEqual(fn('?'), r'(?s:(?!\.)[^/])\Z')
self.assertEqual(fn('a*'), r'(?s:a[^/]*)\Z')
self.assertEqual(fn('*a'), r'(?s:(?!\.)[^/]*a)\Z')
self.assertEqual(fn('.*'), r'(?s:\.[^/]*)\Z')
self.assertEqual(fn('?aa'), r'(?s:(?!\.)[^/]aa)\Z')
self.assertEqual(fn('aa?'), r'(?s:aa[^/])\Z')
self.assertEqual(fn('aa[ab]'), r'(?s:aa[ab])\Z')
self.assertEqual(fn('**'), r'(?s:(?!\.)[^/]*)\Z')
self.assertEqual(fn('***'), r'(?s:(?!\.)[^/]*)\Z')
self.assertEqual(fn('a**'), r'(?s:a[^/]*)\Z')
self.assertEqual(fn('**b'), r'(?s:(?!\.)[^/]*b)\Z')
self.assertEqual(fn('/**/*/*.*/**'),
r'(?s:/(?!\.)[^/]*/[^/.][^/]*/(?!\.)[^/]*\.[^/]*/(?!\.)[^/]*)\Z')
def test_translate_include_hidden(self):
def fn(pat):
return glob.translate(pat, include_hidden=True, seps='/')
self.assertEqual(fn('foo'), r'(?s:foo)\Z')
self.assertEqual(fn('foo/bar'), r'(?s:foo/bar)\Z')
self.assertEqual(fn('*'), r'(?s:[^/]+)\Z')
self.assertEqual(fn('?'), r'(?s:[^/])\Z')
self.assertEqual(fn('a*'), r'(?s:a[^/]*)\Z')
self.assertEqual(fn('*a'), r'(?s:[^/]*a)\Z')
self.assertEqual(fn('.*'), r'(?s:\.[^/]*)\Z')
self.assertEqual(fn('?aa'), r'(?s:[^/]aa)\Z')
self.assertEqual(fn('aa?'), r'(?s:aa[^/])\Z')
self.assertEqual(fn('aa[ab]'), r'(?s:aa[ab])\Z')
self.assertEqual(fn('**'), r'(?s:[^/]*)\Z')
self.assertEqual(fn('***'), r'(?s:[^/]*)\Z')
self.assertEqual(fn('a**'), r'(?s:a[^/]*)\Z')
self.assertEqual(fn('**b'), r'(?s:[^/]*b)\Z')
self.assertEqual(fn('/**/*/*.*/**'), r'(?s:/[^/]*/[^/]+/[^/]*\.[^/]*/[^/]*)\Z')
def test_translate_recursive(self):
def fn(pat):
return glob.translate(pat, recursive=True, include_hidden=True, seps='/')
self.assertEqual(fn('*'), r'(?s:[^/]+)\Z')
self.assertEqual(fn('?'), r'(?s:[^/])\Z')
self.assertEqual(fn('**'), r'(?s:.*)\Z')
self.assertEqual(fn('**/**'), r'(?s:.*)\Z')
self.assertRaises(ValueError, fn, '***')
self.assertRaises(ValueError, fn, 'a**')
self.assertRaises(ValueError, fn, '**b')
self.assertEqual(fn('/**/*/*.*/**'), r'(?s:/(?:.+/)?[^/]+/[^/]*\.[^/]*/.*)\Z')
def test_translate_seps(self):
def fn(pat):
return glob.translate(pat, recursive=True, include_hidden=True, seps=['/', '\\'])
self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z')
self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z')
@skip_unless_symlink
class SymlinkLoopGlobTests(unittest.TestCase):