Remove RFC2047 decoder (#967)

## Summary

- This was inherited from
d719988323/src/metadata.rs (LL78C2-L91C26)
- ...which introduced this code here:
9cd1d43f7c
- ...with the originating issue here:
https://github.com/PyO3/maturin/issues/612
- ...and the upstream issue here:
https://github.com/staktrace/mailparse/issues/50

It seems like the goal was to support Unicode in certain header fields,
but I don't think this is necessary for us. We only use
`get_first_value` for `Requires-Python`, which has to be ASCII, doesn't
it?

In my testing, it seems like the `charset` hack can also be removed. The
tests I copied over actually work without it, which makes me a bit
skeptical.

The main benefit here is that we get to a remove a _big_ dependency
stack, including Chumsky and Stacker and psm which have limited
cross-platform support.
This commit is contained in:
Charlie Marsh 2024-01-18 15:09:45 -05:00 committed by GitHub
parent f17bad0a75
commit 96a61fb351
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 59 additions and 106 deletions

91
Cargo.lock generated
View file

@ -26,18 +26,6 @@ dependencies = [
"const-random",
]
[[package]]
name = "ahash"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01"
dependencies = [
"cfg-if 1.0.0",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
name = "aho-corasick"
version = "1.1.2"
@ -62,12 +50,6 @@ dependencies = [
"alloc-no-stdlib",
]
[[package]]
name = "allocator-api2"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
[[package]]
name = "android-tzdata"
version = "0.1.1"
@ -480,16 +462,6 @@ dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "chumsky"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9"
dependencies = [
"hashbrown 0.14.3",
"stacker",
]
[[package]]
name = "ciborium"
version = "0.2.1"
@ -747,7 +719,7 @@ version = "3.11.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f260e2fc850179ef410018660006951c1b55b79e8087e87111a2c388994b9b5"
dependencies = [
"ahash 0.3.8",
"ahash",
"cfg-if 0.1.10",
"num_cpus",
]
@ -1233,10 +1205,6 @@ name = "hashbrown"
version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
dependencies = [
"ahash 0.8.7",
"allocator-api2",
]
[[package]]
name = "heck"
@ -2272,15 +2240,6 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "psm"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
dependencies = [
"cc",
]
[[package]]
name = "pubgrub"
version = "0.2.1"
@ -2842,7 +2801,6 @@ dependencies = [
"pep508_rs",
"puffin-normalize",
"regex",
"rfc2047-decoder",
"serde",
"serde_json",
"tempfile",
@ -3139,20 +3097,6 @@ dependencies = [
"rand",
]
[[package]]
name = "rfc2047-decoder"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e372613f15fc5171f9052b0c1fbafca5b1e5b0ba86aa13c9c39fd91ca1f7955"
dependencies = [
"base64 0.21.7",
"charset",
"chumsky",
"memchr",
"quoted_printable",
"thiserror",
]
[[package]]
name = "ring"
version = "0.17.7"
@ -3438,19 +3382,6 @@ version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
[[package]]
name = "stacker"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
dependencies = [
"cc",
"cfg-if 1.0.0",
"libc",
"psm",
"winapi",
]
[[package]]
name = "strsim"
version = "0.10.0"
@ -4506,26 +4437,6 @@ dependencies = [
"linked-hash-map",
]
[[package]]
name = "zerocopy"
version = "0.7.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.48",
]
[[package]]
name = "zip"
version = "0.6.6"

View file

@ -67,7 +67,6 @@ regex = { version = "1.10.2" }
reqwest = { version = "0.11.23", default-features = false, features = ["json", "gzip", "brotli", "stream", "rustls-tls"] }
reqwest-middleware = { version = "0.2.4" }
reqwest-retry = { version = "0.3.0" }
rfc2047-decoder = { version = "1.0.2" }
rmp-serde = { version = "1.1.2" }
rustc-hash = { version = "1.1.0" }
seahash = { version = "4.1.0" }

View file

@ -21,7 +21,6 @@ chrono = { workspace = true, features = ["serde"] }
mailparse = { workspace = true }
once_cell = { workspace = true }
regex = { workspace = true }
rfc2047-decoder = { workspace = true }
serde = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }

View file

@ -75,25 +75,17 @@ pub enum Error {
impl Metadata21 {
/// Parse distribution metadata from metadata bytes
pub fn parse(content: &[u8]) -> Result<Self, Error> {
// HACK: trick mailparse to parse as UTF-8 instead of ASCII
let mut mail = b"Content-Type: text/plain; charset=utf-8\n".to_vec();
mail.extend_from_slice(content);
let msg = mailparse::parse_mail(&mail)?;
let msg = mailparse::parse_mail(content)?;
let headers = msg.get_headers();
let get_first_value = |name| {
headers.get_first_header(name).and_then(|header| {
match rfc2047_decoder::decode(header.get_value_raw()) {
Ok(value) => {
let value = header.get_value();
if value == "UNKNOWN" {
None
} else {
Some(value)
}
}
Err(_) => None,
}
})
};
let get_all_values = |name| {
@ -141,3 +133,55 @@ impl Metadata21 {
})
}
}
impl FromStr for Metadata21 {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Metadata21::parse(s.as_bytes())
}
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
use pep440_rs::Version;
use puffin_normalize::PackageName;
use crate::Error;
use super::Metadata21;
#[test]
fn test_parse_from_str() {
let s = "Metadata-Version: 1.0";
let meta: Result<Metadata21, Error> = s.parse();
assert!(matches!(meta, Err(Error::FieldNotFound("Name"))));
let s = "Metadata-Version: 1.0\nName: asdf";
let meta = Metadata21::parse(s.as_bytes());
assert!(matches!(meta, Err(Error::FieldNotFound("Version"))));
let s = "Metadata-Version: 1.0\nName: asdf\nVersion: 1.0";
let meta = Metadata21::parse(s.as_bytes()).unwrap();
assert_eq!(meta.metadata_version, "1.0");
assert_eq!(meta.name, PackageName::from_str("asdf").unwrap());
assert_eq!(meta.version, Version::new([1, 0]));
let s = "Metadata-Version: 1.0\nName: asdf\nVersion: 1.0\nAuthor: 中文\n\n一个 Python 包";
let meta = Metadata21::parse(s.as_bytes()).unwrap();
assert_eq!(meta.metadata_version, "1.0");
assert_eq!(meta.name, PackageName::from_str("asdf").unwrap());
assert_eq!(meta.version, Version::new([1, 0]));
let s = "Metadata-Version: 1.0\nName: =?utf-8?q?foobar?=\nVersion: 1.0";
let meta = Metadata21::parse(s.as_bytes()).unwrap();
assert_eq!(meta.metadata_version, "1.0");
assert_eq!(meta.name, PackageName::from_str("foobar").unwrap());
assert_eq!(meta.version, Version::new([1, 0]));
let s = "Metadata-Version: 1.0\nName: =?utf-8?q?=C3=A4_space?= <x@y.org>\nVersion: 1.0";
let meta = Metadata21::parse(s.as_bytes());
assert!(matches!(meta, Err(Error::InvalidName(_))));
}
}