Remove RFC2047 decoder (#967)

## Summary

- This was inherited from
d719988323/src/metadata.rs (LL78C2-L91C26)
- ...which introduced this code here:
9cd1d43f7c
- ...with the originating issue here:
https://github.com/PyO3/maturin/issues/612
- ...and the upstream issue here:
https://github.com/staktrace/mailparse/issues/50

It seems like the goal was to support Unicode in certain header fields,
but I don't think this is necessary for us. We only use
`get_first_value` for `Requires-Python`, which has to be ASCII, doesn't
it?

In my testing, it seems like the `charset` hack can also be removed. The
tests I copied over actually work without it, which makes me a bit
skeptical.

The main benefit here is that we get to a remove a _big_ dependency
stack, including Chumsky and Stacker and psm which have limited
cross-platform support.
This commit is contained in:
Charlie Marsh 2024-01-18 15:09:45 -05:00 committed by GitHub
parent f17bad0a75
commit 96a61fb351
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 59 additions and 106 deletions

View file

@ -21,7 +21,6 @@ chrono = { workspace = true, features = ["serde"] }
mailparse = { workspace = true }
once_cell = { workspace = true }
regex = { workspace = true }
rfc2047-decoder = { workspace = true }
serde = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }

View file

@ -75,24 +75,16 @@ pub enum Error {
impl Metadata21 {
/// Parse distribution metadata from metadata bytes
pub fn parse(content: &[u8]) -> Result<Self, Error> {
// HACK: trick mailparse to parse as UTF-8 instead of ASCII
let mut mail = b"Content-Type: text/plain; charset=utf-8\n".to_vec();
mail.extend_from_slice(content);
let msg = mailparse::parse_mail(&mail)?;
let msg = mailparse::parse_mail(content)?;
let headers = msg.get_headers();
let get_first_value = |name| {
headers.get_first_header(name).and_then(|header| {
match rfc2047_decoder::decode(header.get_value_raw()) {
Ok(value) => {
if value == "UNKNOWN" {
None
} else {
Some(value)
}
}
Err(_) => None,
let value = header.get_value();
if value == "UNKNOWN" {
None
} else {
Some(value)
}
})
};
@ -141,3 +133,55 @@ impl Metadata21 {
})
}
}
impl FromStr for Metadata21 {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Metadata21::parse(s.as_bytes())
}
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
use pep440_rs::Version;
use puffin_normalize::PackageName;
use crate::Error;
use super::Metadata21;
#[test]
fn test_parse_from_str() {
let s = "Metadata-Version: 1.0";
let meta: Result<Metadata21, Error> = s.parse();
assert!(matches!(meta, Err(Error::FieldNotFound("Name"))));
let s = "Metadata-Version: 1.0\nName: asdf";
let meta = Metadata21::parse(s.as_bytes());
assert!(matches!(meta, Err(Error::FieldNotFound("Version"))));
let s = "Metadata-Version: 1.0\nName: asdf\nVersion: 1.0";
let meta = Metadata21::parse(s.as_bytes()).unwrap();
assert_eq!(meta.metadata_version, "1.0");
assert_eq!(meta.name, PackageName::from_str("asdf").unwrap());
assert_eq!(meta.version, Version::new([1, 0]));
let s = "Metadata-Version: 1.0\nName: asdf\nVersion: 1.0\nAuthor: 中文\n\n一个 Python 包";
let meta = Metadata21::parse(s.as_bytes()).unwrap();
assert_eq!(meta.metadata_version, "1.0");
assert_eq!(meta.name, PackageName::from_str("asdf").unwrap());
assert_eq!(meta.version, Version::new([1, 0]));
let s = "Metadata-Version: 1.0\nName: =?utf-8?q?foobar?=\nVersion: 1.0";
let meta = Metadata21::parse(s.as_bytes()).unwrap();
assert_eq!(meta.metadata_version, "1.0");
assert_eq!(meta.name, PackageName::from_str("foobar").unwrap());
assert_eq!(meta.version, Version::new([1, 0]));
let s = "Metadata-Version: 1.0\nName: =?utf-8?q?=C3=A4_space?= <x@y.org>\nVersion: 1.0";
let meta = Metadata21::parse(s.as_bytes());
assert!(matches!(meta, Err(Error::InvalidName(_))));
}
}