Support PEP 723 scripts in uv add and uv remove (#5995)

## Summary

Resolves https://github.com/astral-sh/uv/issues/4667

## Test Plan

`cargo test`
This commit is contained in:
Ahmed Ilyas 2024-08-11 03:40:59 +02:00 committed by GitHub
parent 9b8c07bf18
commit 2d53e35e39
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 1215 additions and 289 deletions

View file

@ -1,9 +1,11 @@
use std::collections::BTreeMap;
use std::io;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::LazyLock;
use memchr::memmem::Finder;
use pep440_rs::VersionSpecifiers;
use serde::Deserialize;
use thiserror::Error;
@ -17,8 +19,14 @@ static FINDER: LazyLock<Finder> = LazyLock::new(|| Finder::new(b"# /// script"))
/// A PEP 723 script, including its [`Pep723Metadata`].
#[derive(Debug)]
pub struct Pep723Script {
/// The path to the Python script.
pub path: PathBuf,
/// The parsed [`Pep723Metadata`] table from the script.
pub metadata: Pep723Metadata,
/// The content of the script after the metadata table.
pub raw: String,
/// The content of the script before the metadata table.
pub prelude: String,
}
impl Pep723Script {
@ -26,12 +34,76 @@ impl Pep723Script {
///
/// See: <https://peps.python.org/pep-0723/>
pub async fn read(file: impl AsRef<Path>) -> Result<Option<Self>, Pep723Error> {
let metadata = Pep723Metadata::read(&file).await?;
Ok(metadata.map(|metadata| Self {
let contents = match fs_err::tokio::read(&file).await {
Ok(contents) => contents,
Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None),
Err(err) => return Err(err.into()),
};
// Extract the `script` tag.
let Some(script_tag) = ScriptTag::parse(&contents)? else {
return Ok(None);
};
// Parse the metadata.
let metadata = Pep723Metadata::from_str(&script_tag.metadata)?;
Ok(Some(Self {
path: file.as_ref().to_path_buf(),
metadata,
raw: script_tag.script,
prelude: script_tag.prelude,
}))
}
/// Reads a Python script and generates a default PEP 723 metadata table.
///
/// See: <https://peps.python.org/pep-0723/>
pub async fn create(
file: impl AsRef<Path>,
requires_python: &VersionSpecifiers,
) -> Result<Self, Pep723Error> {
let contents = match fs_err::tokio::read(&file).await {
Ok(contents) => contents,
Err(err) => return Err(err.into()),
};
// Extract the `script` tag.
let default_metadata = indoc::formatdoc! {r#"
requires-python = "{requires_python}"
dependencies = []
"#,
requires_python = requires_python,
};
let (prelude, raw) = extract_shebang(&contents)?;
// Parse the metadata.
let metadata = Pep723Metadata::from_str(&default_metadata)?;
Ok(Self {
path: file.as_ref().to_path_buf(),
prelude: prelude.unwrap_or_default(),
metadata,
raw,
})
}
/// Replace the existing metadata in the file with new metadata and write the updated content.
pub async fn write(&self, metadata: &str) -> Result<(), Pep723Error> {
let content = format!(
"{}{}{}",
if self.prelude.is_empty() {
String::new()
} else {
format!("{}\n", self.prelude)
},
serialize_metadata(metadata),
self.raw
);
Ok(fs_err::tokio::write(&self.path, content).await?)
}
}
/// PEP 723 metadata as parsed from a `script` comment block.
@ -41,30 +113,23 @@ impl Pep723Script {
#[serde(rename_all = "kebab-case")]
pub struct Pep723Metadata {
pub dependencies: Option<Vec<pep508_rs::Requirement<VerbatimParsedUrl>>>,
pub requires_python: Option<pep440_rs::VersionSpecifiers>,
pub requires_python: Option<VersionSpecifiers>,
pub tool: Option<Tool>,
/// The raw unserialized document.
#[serde(skip)]
pub raw: String,
}
impl Pep723Metadata {
/// Read the PEP 723 `script` metadata from a Python file, if it exists.
///
/// See: <https://peps.python.org/pep-0723/>
pub async fn read(file: impl AsRef<Path>) -> Result<Option<Self>, Pep723Error> {
let contents = match fs_err::tokio::read(file).await {
Ok(contents) => contents,
Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None),
Err(err) => return Err(err.into()),
};
impl FromStr for Pep723Metadata {
type Err = Pep723Error;
// Extract the `script` tag.
let Some(contents) = extract_script_tag(&contents)? else {
return Ok(None);
};
// Parse the metadata.
let metadata = toml::from_str(&contents)?;
Ok(Some(metadata))
/// Parse `Pep723Metadata` from a raw TOML string.
fn from_str(raw: &str) -> Result<Self, Self::Err> {
let metadata = toml::from_str(raw)?;
Ok(Pep723Metadata {
raw: raw.to_string(),
..metadata
})
}
}
@ -94,120 +159,193 @@ pub enum Pep723Error {
Toml(#[from] toml::de::Error),
}
/// Read the PEP 723 `script` metadata from a Python file, if it exists.
///
/// See: <https://peps.python.org/pep-0723/>
pub async fn read_pep723_metadata(
file: impl AsRef<Path>,
) -> Result<Option<Pep723Metadata>, Pep723Error> {
let contents = match fs_err::tokio::read(file).await {
Ok(contents) => contents,
Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None),
Err(err) => return Err(err.into()),
};
// Extract the `script` tag.
let Some(contents) = extract_script_tag(&contents)? else {
return Ok(None);
};
// Parse the metadata.
let metadata = toml::from_str(&contents)?;
Ok(Some(metadata))
#[derive(Debug, Clone, Eq, PartialEq)]
struct ScriptTag {
/// The content of the script before the metadata block.
prelude: String,
/// The metadata block.
metadata: String,
/// The content of the script after the metadata block.
script: String,
}
/// Given the contents of a Python file, extract the `script` metadata block, with leading comment
/// hashes removed.
///
/// See: <https://peps.python.org/pep-0723/>
fn extract_script_tag(contents: &[u8]) -> Result<Option<String>, Pep723Error> {
// Identify the opening pragma.
let Some(index) = FINDER.find(contents) else {
return Ok(None);
};
impl ScriptTag {
/// Given the contents of a Python file, extract the `script` metadata block with leading
/// comment hashes removed, any preceding shebang or content (prelude), and the remaining Python
/// script.
///
/// Given the following input string representing the contents of a Python script:
///
/// ```python
/// #!/usr/bin/env python3
/// # /// script
/// # requires-python = '>=3.11'
/// # dependencies = [
/// # 'requests<3',
/// # 'rich',
/// # ]
/// # ///
///
/// import requests
///
/// print("Hello, World!")
/// ```
///
/// This function would return:
///
/// - Preamble: `#!/usr/bin/env python3\n`
/// - Metadata: `requires-python = '>=3.11'\ndependencies = [\n 'requests<3',\n 'rich',\n]`
/// - Script: `import requests\n\nprint("Hello, World!")\n`
///
/// See: <https://peps.python.org/pep-0723/>
fn parse(contents: &[u8]) -> Result<Option<Self>, Pep723Error> {
// Identify the opening pragma.
let Some(index) = FINDER.find(contents) else {
return Ok(None);
};
// The opening pragma must be the first line, or immediately preceded by a newline.
if !(index == 0 || matches!(contents[index - 1], b'\r' | b'\n')) {
return Ok(None);
// The opening pragma must be the first line, or immediately preceded by a newline.
if !(index == 0 || matches!(contents[index - 1], b'\r' | b'\n')) {
return Ok(None);
}
// Extract the preceding content.
let prelude = std::str::from_utf8(&contents[..index])?;
// Decode as UTF-8.
let contents = &contents[index..];
let contents = std::str::from_utf8(contents)?;
let mut lines = contents.lines();
// Ensure that the first line is exactly `# /// script`.
if !lines.next().is_some_and(|line| line == "# /// script") {
return Ok(None);
}
// > Every line between these two lines (# /// TYPE and # ///) MUST be a comment starting
// > with #. If there are characters after the # then the first character MUST be a space. The
// > embedded content is formed by taking away the first two characters of each line if the
// > second character is a space, otherwise just the first character (which means the line
// > consists of only a single #).
let mut toml = vec![];
// Extract the content that follows the metadata block.
let mut python_script = vec![];
while let Some(line) = lines.next() {
// Remove the leading `#`.
let Some(line) = line.strip_prefix('#') else {
python_script.push(line);
python_script.extend(lines);
break;
};
// If the line is empty, continue.
if line.is_empty() {
toml.push("");
continue;
}
// Otherwise, the line _must_ start with ` `.
let Some(line) = line.strip_prefix(' ') else {
python_script.push(line);
python_script.extend(lines);
break;
};
toml.push(line);
}
// Find the closing `# ///`. The precedence is such that we need to identify the _last_ such
// line.
//
// For example, given:
// ```python
// # /// script
// #
// # ///
// #
// # ///
// ```
//
// The latter `///` is the closing pragma
let Some(index) = toml.iter().rev().position(|line| *line == "///") else {
return Ok(None);
};
let index = toml.len() - index;
// Discard any lines after the closing `# ///`.
//
// For example, given:
// ```python
// # /// script
// #
// # ///
// #
// #
// ```
//
// We need to discard the last two lines.
toml.truncate(index - 1);
// Join the lines into a single string.
let prelude = prelude.to_string();
let metadata = toml.join("\n") + "\n";
let script = python_script.join("\n") + "\n";
Ok(Some(Self {
prelude,
metadata,
script,
}))
}
}
// Decode as UTF-8.
let contents = &contents[index..];
/// Extracts the shebang line from the given file contents and returns it along with the remaining
/// content.
fn extract_shebang(contents: &[u8]) -> Result<(Option<String>, String), Pep723Error> {
let contents = std::str::from_utf8(contents)?;
let mut lines = contents.lines();
// Ensure that the first line is exactly `# /// script`.
if !lines.next().is_some_and(|line| line == "# /// script") {
return Ok(None);
}
// > Every line between these two lines (# /// TYPE and # ///) MUST be a comment starting
// > with #. If there are characters after the # then the first character MUST be a space. The
// > embedded content is formed by taking away the first two characters of each line if the
// > second character is a space, otherwise just the first character (which means the line
// > consists of only a single #).
let mut toml = vec![];
for line in lines {
// Remove the leading `#`.
let Some(line) = line.strip_prefix('#') else {
break;
};
// If the line is empty, continue.
if line.is_empty() {
toml.push("");
continue;
// Check the first line for a shebang
if let Some(first_line) = lines.next() {
if first_line.starts_with("#!") {
let shebang = first_line.to_string();
let remaining_content: String = lines.collect::<Vec<&str>>().join("\n");
return Ok((Some(shebang), remaining_content));
}
// Otherwise, the line _must_ start with ` `.
let Some(line) = line.strip_prefix(' ') else {
break;
};
toml.push(line);
}
// Find the closing `# ///`. The precedence is such that we need to identify the _last_ such
// line.
//
// For example, given:
// ```python
// # /// script
// #
// # ///
// #
// # ///
// ```
//
// The latter `///` is the closing pragma
let Some(index) = toml.iter().rev().position(|line| *line == "///") else {
return Ok(None);
};
let index = toml.len() - index;
Ok((None, contents.to_string()))
}
// Discard any lines after the closing `# ///`.
//
// For example, given:
// ```python
// # /// script
// #
// # ///
// #
// #
// ```
//
// We need to discard the last two lines.
toml.truncate(index - 1);
/// Formats the provided metadata by prefixing each line with `#` and wrapping it with script markers.
fn serialize_metadata(metadata: &str) -> String {
let mut output = String::with_capacity(metadata.len() + 2);
// Join the lines into a single string.
let toml = toml.join("\n") + "\n";
output.push_str("# /// script\n");
Ok(Some(toml))
for line in metadata.lines() {
if line.is_empty() {
output.push('\n');
} else {
output.push_str("# ");
output.push_str(line);
output.push('\n');
}
}
output.push_str("# ///\n");
output
}
#[cfg(test)]
mod tests {
use crate::{serialize_metadata, ScriptTag};
#[test]
fn missing_space() {
let contents = indoc::indoc! {r"
@ -216,10 +354,7 @@ mod tests {
# ///
"};
assert_eq!(
super::extract_script_tag(contents.as_bytes()).unwrap(),
None
);
assert_eq!(ScriptTag::parse(contents.as_bytes()).unwrap(), None);
}
#[test]
@ -233,10 +368,7 @@ mod tests {
# ]
"};
assert_eq!(
super::extract_script_tag(contents.as_bytes()).unwrap(),
None
);
assert_eq!(ScriptTag::parse(contents.as_bytes()).unwrap(), None);
}
#[test]
@ -253,10 +385,7 @@ mod tests {
#
"};
assert_eq!(
super::extract_script_tag(contents.as_bytes()).unwrap(),
None
);
assert_eq!(ScriptTag::parse(contents.as_bytes()).unwrap(), None);
}
#[test]
@ -269,9 +398,15 @@ mod tests {
# 'rich',
# ]
# ///
import requests
from rich.pretty import pprint
resp = requests.get('https://peps.python.org/api/peps.json')
data = resp.json()
"};
let expected = indoc::indoc! {r"
let expected_metadata = indoc::indoc! {r"
requires-python = '>=3.11'
dependencies = [
'requests<3',
@ -279,13 +414,64 @@ mod tests {
]
"};
let actual = super::extract_script_tag(contents.as_bytes())
.unwrap()
.unwrap();
let expected_data = indoc::indoc! {r"
assert_eq!(actual, expected);
import requests
from rich.pretty import pprint
resp = requests.get('https://peps.python.org/api/peps.json')
data = resp.json()
"};
let actual = ScriptTag::parse(contents.as_bytes()).unwrap().unwrap();
assert_eq!(actual.prelude, String::new());
assert_eq!(actual.metadata, expected_metadata);
assert_eq!(actual.script, expected_data);
}
#[test]
fn simple_with_shebang() {
let contents = indoc::indoc! {r"
#!/usr/bin/env python3
# /// script
# requires-python = '>=3.11'
# dependencies = [
# 'requests<3',
# 'rich',
# ]
# ///
import requests
from rich.pretty import pprint
resp = requests.get('https://peps.python.org/api/peps.json')
data = resp.json()
"};
let expected_metadata = indoc::indoc! {r"
requires-python = '>=3.11'
dependencies = [
'requests<3',
'rich',
]
"};
let expected_data = indoc::indoc! {r"
import requests
from rich.pretty import pprint
resp = requests.get('https://peps.python.org/api/peps.json')
data = resp.json()
"};
let actual = ScriptTag::parse(contents.as_bytes()).unwrap().unwrap();
assert_eq!(actual.prelude, "#!/usr/bin/env python3\n".to_string());
assert_eq!(actual.metadata, expected_metadata);
assert_eq!(actual.script, expected_data);
}
#[test]
fn embedded_comment() {
let contents = indoc::indoc! {r"
@ -310,9 +496,10 @@ mod tests {
'''
"};
let actual = super::extract_script_tag(contents.as_bytes())
let actual = ScriptTag::parse(contents.as_bytes())
.unwrap()
.unwrap();
.unwrap()
.metadata;
assert_eq!(actual, expected);
}
@ -339,10 +526,44 @@ mod tests {
]
"};
let actual = super::extract_script_tag(contents.as_bytes())
let actual = ScriptTag::parse(contents.as_bytes())
.unwrap()
.unwrap();
.unwrap()
.metadata;
assert_eq!(actual, expected);
}
#[test]
fn test_serialize_metadata_formatting() {
let metadata = indoc::indoc! {r"
requires-python = '>=3.11'
dependencies = [
'requests<3',
'rich',
]
"};
let expected_output = indoc::indoc! {r"
# /// script
# requires-python = '>=3.11'
# dependencies = [
# 'requests<3',
# 'rich',
# ]
# ///
"};
let result = serialize_metadata(metadata);
assert_eq!(result, expected_output);
}
#[test]
fn test_serialize_metadata_empty() {
let metadata = "";
let expected_output = "# /// script\n# ///\n";
let result = serialize_metadata(metadata);
assert_eq!(result, expected_output);
}
}