Move portable glob parsing to struct (#13311)

Refactoring to make fixing #13280 easier.
This commit is contained in:
konsti 2025-05-06 13:22:54 +02:00 committed by GitHub
parent 3218e364ae
commit 9071e0eeac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 129 additions and 115 deletions

View file

@ -12,7 +12,7 @@ use version_ranges::Ranges;
use walkdir::WalkDir; use walkdir::WalkDir;
use uv_fs::Simplified; use uv_fs::Simplified;
use uv_globfilter::{parse_portable_glob, GlobDirFilter}; use uv_globfilter::{GlobDirFilter, PortableGlobParser};
use uv_normalize::{ExtraName, PackageName}; use uv_normalize::{ExtraName, PackageName};
use uv_pep440::{Version, VersionSpecifiers}; use uv_pep440::{Version, VersionSpecifiers};
use uv_pep508::{ use uv_pep508::{
@ -395,11 +395,12 @@ impl PyProjectToml {
let mut license_files = Vec::new(); let mut license_files = Vec::new();
let mut license_globs_parsed = Vec::new(); let mut license_globs_parsed = Vec::new();
for license_glob in license_globs { for license_glob in license_globs {
let pep639_glob = let pep639_glob = PortableGlobParser.parse(license_glob).map_err(|err| {
parse_portable_glob(license_glob).map_err(|err| Error::PortableGlob { Error::PortableGlob {
field: license_glob.to_string(), field: license_glob.to_string(),
source: err, source: err,
})?; }
})?;
license_globs_parsed.push(pep639_glob); license_globs_parsed.push(pep639_glob);
} }
let license_globs = let license_globs =

View file

@ -15,7 +15,7 @@ use tar::{EntryType, Header};
use tracing::{debug, trace}; use tracing::{debug, trace};
use uv_distribution_filename::{SourceDistExtension, SourceDistFilename}; use uv_distribution_filename::{SourceDistExtension, SourceDistFilename};
use uv_fs::Simplified; use uv_fs::Simplified;
use uv_globfilter::{parse_portable_glob, GlobDirFilter}; use uv_globfilter::{GlobDirFilter, PortableGlobParser};
use uv_pypi_types::Identifier; use uv_pypi_types::Identifier;
use uv_warnings::warn_user_once; use uv_warnings::warn_user_once;
use walkdir::WalkDir; use walkdir::WalkDir;
@ -88,10 +88,12 @@ fn source_dist_matcher(
.to_string(); .to_string();
includes.push(format!("{}/**", globset::escape(import_path))); includes.push(format!("{}/**", globset::escape(import_path)));
for include in includes { for include in includes {
let glob = parse_portable_glob(&include).map_err(|err| Error::PortableGlob { let glob = PortableGlobParser
field: "tool.uv.build-backend.source-include".to_string(), .parse(&include)
source: err, .map_err(|err| Error::PortableGlob {
})?; field: "tool.uv.build-backend.source-include".to_string(),
source: err,
})?;
include_globs.push(glob.clone()); include_globs.push(glob.clone());
} }
@ -111,21 +113,22 @@ fn source_dist_matcher(
// Include the license files // Include the license files
for license_files in pyproject_toml.license_files_source_dist() { for license_files in pyproject_toml.license_files_source_dist() {
trace!("Including license files at: `{license_files}`"); trace!("Including license files at: `{license_files}`");
let glob = parse_portable_glob(license_files).map_err(|err| Error::PortableGlob { let glob = PortableGlobParser
field: "project.license-files".to_string(), .parse(license_files)
source: err, .map_err(|err| Error::PortableGlob {
})?; field: "project.license-files".to_string(),
source: err,
})?;
include_globs.push(glob); include_globs.push(glob);
} }
// Include the data files // Include the data files
for (name, directory) in settings.data.iter() { for (name, directory) in settings.data.iter() {
let glob = let glob = PortableGlobParser
parse_portable_glob(&format!("{}/**", globset::escape(directory))).map_err(|err| { .parse(&format!("{}/**", globset::escape(directory)))
Error::PortableGlob { .map_err(|err| Error::PortableGlob {
field: format!("tool.uv.build-backend.data.{name}"), field: format!("tool.uv.build-backend.data.{name}"),
source: err, source: err,
}
})?; })?;
trace!("Including data ({name}) at: `{directory}`"); trace!("Including data ({name}) at: `{directory}`");
include_globs.push(glob); include_globs.push(glob);

View file

@ -12,7 +12,7 @@ use zip::{CompressionMethod, ZipWriter};
use uv_distribution_filename::WheelFilename; use uv_distribution_filename::WheelFilename;
use uv_fs::Simplified; use uv_fs::Simplified;
use uv_globfilter::{parse_portable_glob, GlobDirFilter}; use uv_globfilter::{GlobDirFilter, PortableGlobParser};
use uv_platform_tags::{AbiTag, LanguageTag, PlatformTag}; use uv_platform_tags::{AbiTag, LanguageTag, PlatformTag};
use uv_pypi_types::Identifier; use uv_pypi_types::Identifier;
use uv_warnings::warn_user_once; use uv_warnings::warn_user_once;
@ -432,10 +432,12 @@ pub(crate) fn build_exclude_matcher(
} else { } else {
format!("**/{exclude}").to_string() format!("**/{exclude}").to_string()
}; };
let glob = parse_portable_glob(&exclude).map_err(|err| Error::PortableGlob { let glob = PortableGlobParser
field: "tool.uv.build-backend.*-exclude".to_string(), .parse(&exclude)
source: err, .map_err(|err| Error::PortableGlob {
})?; field: "tool.uv.build-backend.*-exclude".to_string(),
source: err,
})?;
exclude_builder.add(glob); exclude_builder.add(glob);
} }
let exclude_matcher = exclude_builder let exclude_matcher = exclude_builder
@ -467,7 +469,7 @@ fn wheel_subdir_from_globs(
src.user_display(), src.user_display(),
license_files license_files
); );
parse_portable_glob(license_files) PortableGlobParser.parse(license_files)
}) })
.collect::<Result<_, _>>() .collect::<Result<_, _>>()
.map_err(|err| Error::PortableGlob { .map_err(|err| Error::PortableGlob {

View file

@ -124,7 +124,7 @@ impl GlobDirFilter {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::glob_dir_filter::GlobDirFilter; use crate::glob_dir_filter::GlobDirFilter;
use crate::portable_glob::parse_portable_glob; use crate::portable_glob::PortableGlobParser;
use std::path::{Path, MAIN_SEPARATOR}; use std::path::{Path, MAIN_SEPARATOR};
use tempfile::tempdir; use tempfile::tempdir;
use walkdir::WalkDir; use walkdir::WalkDir;
@ -152,7 +152,7 @@ mod tests {
#[test] #[test]
fn match_directory() { fn match_directory() {
let patterns = PATTERNS.map(|pattern| parse_portable_glob(pattern).unwrap()); let patterns = PATTERNS.map(|pattern| PortableGlobParser.parse(pattern).unwrap());
let matcher = GlobDirFilter::from_globs(&patterns).unwrap(); let matcher = GlobDirFilter::from_globs(&patterns).unwrap();
assert!(matcher.match_directory(&Path::new("path1").join("dir1"))); assert!(matcher.match_directory(&Path::new("path1").join("dir1")));
assert!(matcher.match_directory(&Path::new("path2").join("dir2"))); assert!(matcher.match_directory(&Path::new("path2").join("dir2")));
@ -170,7 +170,7 @@ mod tests {
fs_err::create_dir_all(file.parent().unwrap()).unwrap(); fs_err::create_dir_all(file.parent().unwrap()).unwrap();
fs_err::File::create(file).unwrap(); fs_err::File::create(file).unwrap();
} }
let patterns = PATTERNS.map(|pattern| parse_portable_glob(pattern).unwrap()); let patterns = PATTERNS.map(|pattern| PortableGlobParser.parse(pattern).unwrap());
let matcher = GlobDirFilter::from_globs(&patterns).unwrap(); let matcher = GlobDirFilter::from_globs(&patterns).unwrap();
// Test the prefix filtering // Test the prefix filtering
@ -228,7 +228,7 @@ mod tests {
fs_err::create_dir_all(file.parent().unwrap()).unwrap(); fs_err::create_dir_all(file.parent().unwrap()).unwrap();
fs_err::File::create(file).unwrap(); fs_err::File::create(file).unwrap();
} }
let patterns = PATTERNS.map(|pattern| parse_portable_glob(pattern).unwrap()); let patterns = PATTERNS.map(|pattern| PortableGlobParser.parse(pattern).unwrap());
let include_matcher = GlobDirFilter::from_globs(&patterns).unwrap(); let include_matcher = GlobDirFilter::from_globs(&patterns).unwrap();

View file

@ -7,4 +7,4 @@ mod glob_dir_filter;
mod portable_glob; mod portable_glob;
pub use glob_dir_filter::GlobDirFilter; pub use glob_dir_filter::GlobDirFilter;
pub use portable_glob::{check_portable_glob, parse_portable_glob, PortableGlobError}; pub use portable_glob::{PortableGlobError, PortableGlobParser};

View file

@ -3,7 +3,7 @@
use globset::GlobSetBuilder; use globset::GlobSetBuilder;
use std::env::args; use std::env::args;
use tracing::trace; use tracing::trace;
use uv_globfilter::{parse_portable_glob, GlobDirFilter}; use uv_globfilter::{GlobDirFilter, PortableGlobParser};
use walkdir::WalkDir; use walkdir::WalkDir;
fn main() { fn main() {
@ -12,7 +12,7 @@ fn main() {
let mut include_globs = Vec::new(); let mut include_globs = Vec::new();
for include in includes { for include in includes {
let glob = parse_portable_glob(include).unwrap(); let glob = PortableGlobParser.parse(include).unwrap();
include_globs.push(glob.clone()); include_globs.push(glob.clone());
} }
let include_matcher = GlobDirFilter::from_globs(&include_globs).unwrap(); let include_matcher = GlobDirFilter::from_globs(&include_globs).unwrap();
@ -25,7 +25,7 @@ fn main() {
} else { } else {
format!("**/{exclude}").to_string() format!("**/{exclude}").to_string()
}; };
let glob = parse_portable_glob(&exclude).unwrap(); let glob = PortableGlobParser.parse(&exclude).unwrap();
exclude_builder.add(glob); exclude_builder.add(glob);
} }
// https://github.com/BurntSushi/ripgrep/discussions/2927 // https://github.com/BurntSushi/ripgrep/discussions/2927

View file

@ -1,4 +1,5 @@
//! Cross-language glob syntax from [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/). //! Cross-language glob syntax from
//! [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/).
use globset::{Glob, GlobBuilder}; use globset::{Glob, GlobBuilder};
use thiserror::Error; use thiserror::Error;
@ -28,99 +29,106 @@ pub enum PortableGlobError {
TooManyStars { glob: String, pos: usize }, TooManyStars { glob: String, pos: usize },
} }
/// Parse cross-language glob syntax from [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/): /// Cross-language glob parser with the glob syntax from
/// /// [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/).
/// - Alphanumeric characters, underscores (`_`), hyphens (`-`) and dots (`.`) are matched verbatim. #[derive(Debug, PartialEq, Eq, Clone, Copy)]
/// - The special glob characters are: pub struct PortableGlobParser;
/// - `*`: Matches any number of characters except path separators
/// - `?`: Matches a single character except the path separator
/// - `**`: Matches any number of characters including path separators
/// - `[]`, containing only the verbatim matched characters: Matches a single of the characters contained. Within
/// `[...]`, the hyphen indicates a locale-agnostic range (e.g. `a-z`, order based on Unicode code points). Hyphens at
/// the start or end are matched literally.
/// - The path separator is the forward slash character (`/`). Patterns are relative to the given directory, a leading slash
/// character for absolute paths is not supported.
/// - Parent directory indicators (`..`) are not allowed.
///
/// These rules mean that matching the backslash (`\`) is forbidden, which avoid collisions with the windows path separator.
pub fn parse_portable_glob(glob: &str) -> Result<Glob, PortableGlobError> {
check_portable_glob(glob)?;
Ok(GlobBuilder::new(glob).literal_separator(true).build()?)
}
/// See [`parse_portable_glob`]. impl PortableGlobParser {
pub fn check_portable_glob(glob: &str) -> Result<(), PortableGlobError> { /// Parse cross-language glob syntax from [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/):
let mut chars = glob.chars().enumerate().peekable(); ///
// A `..` is on a parent directory indicator at the start of the string or after a directory /// - Alphanumeric characters, underscores (`_`), hyphens (`-`) and dots (`.`) are matched verbatim.
// separator. /// - The special glob characters are:
let mut start_or_slash = true; /// - `*`: Matches any number of characters except path separators
// The number of consecutive stars before the current character. /// - `?`: Matches a single character except the path separator
while let Some((pos, c)) = chars.next() { /// - `**`: Matches any number of characters including path separators
// `***` or `**literals` can be correctly represented with less stars. They are banned by /// - `[]`, containing only the verbatim matched characters: Matches a single of the characters contained. Within
// `glob`, they are allowed by `globset` and PEP 639 is ambiguous, so we're filtering them /// `[...]`, the hyphen indicates a locale-agnostic range (e.g. `a-z`, order based on Unicode code points). Hyphens at
// out. /// the start or end are matched literally.
if c == '*' { /// - The path separator is the forward slash character (`/`). Patterns are relative to the given directory, a leading slash
let mut star_run = 1; /// character for absolute paths is not supported.
while let Some((_, c)) = chars.peek() { /// - Parent directory indicators (`..`) are not allowed.
if *c == '*' { ///
star_run += 1; /// These rules mean that matching the backslash (`\`) is forbidden, which avoid collisions with the windows path separator.
chars.next(); pub fn parse(&self, glob: &str) -> Result<Glob, PortableGlobError> {
} else { self.check(glob)?;
break; Ok(GlobBuilder::new(glob).literal_separator(true).build()?)
}
/// See [`Self::parse`].
pub fn check(&self, glob: &str) -> Result<(), PortableGlobError> {
let mut chars = glob.chars().enumerate().peekable();
// A `..` is on a parent directory indicator at the start of the string or after a directory
// separator.
let mut start_or_slash = true;
// The number of consecutive stars before the current character.
while let Some((pos, c)) = chars.next() {
// `***` or `**literals` can be correctly represented with less stars. They are banned by
// `glob`, they are allowed by `globset` and PEP 639 is ambiguous, so we're filtering them
// out.
if c == '*' {
let mut star_run = 1;
while let Some((_, c)) = chars.peek() {
if *c == '*' {
star_run += 1;
chars.next();
} else {
break;
}
} }
} if star_run >= 3 {
if star_run >= 3 {
return Err(PortableGlobError::TooManyStars {
glob: glob.to_string(),
// We don't update pos for the stars.
pos,
});
} else if star_run == 2 {
if chars.peek().is_some_and(|(_, c)| *c != '/') {
return Err(PortableGlobError::TooManyStars { return Err(PortableGlobError::TooManyStars {
glob: glob.to_string(), glob: glob.to_string(),
// We don't update pos for the stars. // We don't update pos for the stars.
pos, pos,
}); });
} else if star_run == 2 {
if chars.peek().is_some_and(|(_, c)| *c != '/') {
return Err(PortableGlobError::TooManyStars {
glob: glob.to_string(),
// We don't update pos for the stars.
pos,
});
}
} }
} start_or_slash = false;
start_or_slash = false; } else if c.is_alphanumeric() || matches!(c, '_' | '-' | '?') {
} else if c.is_alphanumeric() || matches!(c, '_' | '-' | '?') { start_or_slash = false;
start_or_slash = false; } else if c == '.' {
} else if c == '.' { if start_or_slash && matches!(chars.peek(), Some((_, '.'))) {
if start_or_slash && matches!(chars.peek(), Some((_, '.'))) { return Err(PortableGlobError::ParentDirectory {
return Err(PortableGlobError::ParentDirectory {
pos,
glob: glob.to_string(),
});
}
start_or_slash = false;
} else if c == '/' {
start_or_slash = true;
} else if c == '[' {
for (pos, c) in chars.by_ref() {
if c.is_alphanumeric() || matches!(c, '_' | '-' | '.') {
// Allowed.
} else if c == ']' {
break;
} else {
return Err(PortableGlobError::InvalidCharacterRange {
glob: glob.to_string(),
pos, pos,
invalid: c, glob: glob.to_string(),
}); });
} }
start_or_slash = false;
} else if c == '/' {
start_or_slash = true;
} else if c == '[' {
for (pos, c) in chars.by_ref() {
if c.is_alphanumeric() || matches!(c, '_' | '-' | '.') {
// Allowed.
} else if c == ']' {
break;
} else {
return Err(PortableGlobError::InvalidCharacterRange {
glob: glob.to_string(),
pos,
invalid: c,
});
}
}
start_or_slash = false;
} else {
return Err(PortableGlobError::InvalidCharacter {
glob: glob.to_string(),
pos,
invalid: c,
});
} }
start_or_slash = false;
} else {
return Err(PortableGlobError::InvalidCharacter {
glob: glob.to_string(),
pos,
invalid: c,
});
} }
Ok(())
} }
Ok(())
} }
#[cfg(test)] #[cfg(test)]
@ -130,7 +138,7 @@ mod tests {
#[test] #[test]
fn test_error() { fn test_error() {
let parse_err = |glob| parse_portable_glob(glob).unwrap_err().to_string(); let parse_err = |glob| PortableGlobParser.parse(glob).unwrap_err().to_string();
assert_snapshot!( assert_snapshot!(
parse_err(".."), parse_err(".."),
@"The parent directory operator (`..`) at position 0 is not allowed in glob: `..`" @"The parent directory operator (`..`) at position 0 is not allowed in glob: `..`"
@ -188,7 +196,7 @@ mod tests {
"src/**", "src/**",
]; ];
for case in cases { for case in cases {
parse_portable_glob(case).unwrap(); PortableGlobParser.parse(case).unwrap();
} }
} }
} }