From d20a48a5b46b1676f67fff7636c699401a875c78 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Wed, 15 Jan 2025 11:39:25 -0500 Subject: [PATCH] Use `memchr` for wheel parsing (#10620) ## Summary Before: ``` wheelname_parsing/numpy-compatible time: [106.90 ns 107.86 ns 108.86 ns] thrpt: [402.97 MiB/s 406.71 MiB/s 410.39 MiB/s] change: time: [-4.5360% -3.7694% -2.9179%] (p = 0.00 < 0.05) thrpt: [+3.0056% +3.9170% +4.7515%] Performance has improved. wheelname_parsing/flyte-short-incompatible time: [81.439 ns 82.209 ns 83.015 ns] thrpt: [390.59 MiB/s 394.42 MiB/s 398.15 MiB/s] change: time: [+6.2344% +7.5385% +8.8928%] (p = 0.00 < 0.05) thrpt: [-8.1666% -7.0101% -5.8685%] Performance has regressed. wheelname_parsing/flyte-short-compatible time: [78.909 ns 79.456 ns 80.031 ns] thrpt: [357.49 MiB/s 360.08 MiB/s 362.57 MiB/s] change: time: [+3.2653% +4.1733% +5.1062%] (p = 0.00 < 0.05) thrpt: [-4.8582% -4.0061% -3.1620%] Performance has regressed. Found 1 outliers among 100 measurements (1.00%) 1 (1.00%) high mild wheelname_parsing/flyte-long-incompatible time: [353.35 ns 357.49 ns 361.77 ns] thrpt: [340.06 MiB/s 344.13 MiB/s 348.17 MiB/s] change: time: [+1.4846% +2.0228% +2.6504%] (p = 0.00 < 0.05) thrpt: [-2.5820% -1.9827% -1.4629%] Performance has regressed. Found 10 outliers among 100 measurements (10.00%) 5 (5.00%) high mild 5 (5.00%) high severe wheelname_parsing/flyte-long-compatible time: [256.47 ns 258.12 ns 260.17 ns] thrpt: [417.88 MiB/s 421.20 MiB/s 423.90 MiB/s] change: time: [+0.4079% +1.8252% +3.6270%] (p = 0.02 < 0.05) thrpt: [-3.5001% -1.7925% -0.4063%] Change within noise threshold. Found 1 outliers among 100 measurements (1.00%) 1 (1.00%) high severe ``` After: ``` wheelname_parsing_fastest/numpy-compatible time: [61.500 ns 61.904 ns 62.350 ns] thrpt: [703.60 MiB/s 708.66 MiB/s 713.32 MiB/s] change: time: [+0.9879% +1.4542% +1.9311%] (p = 0.00 < 0.05) thrpt: [-1.8945% -1.4334% -0.9782%] Change within noise threshold. Found 8 outliers among 100 measurements (8.00%) 6 (6.00%) high mild 2 (2.00%) high severe wheelname_parsing_fastest/flyte-short-incompatible time: [49.341 ns 49.538 ns 49.769 ns] thrpt: [651.50 MiB/s 654.54 MiB/s 657.16 MiB/s] change: time: [+5.8750% +6.3656% +6.8338%] (p = 0.00 < 0.05) thrpt: [-6.3967% -5.9847% -5.5490%] Performance has regressed. Found 17 outliers among 100 measurements (17.00%) 5 (5.00%) low severe 1 (1.00%) low mild 6 (6.00%) high mild 5 (5.00%) high severe wheelname_parsing_fastest/flyte-short-compatible time: [49.425 ns 49.789 ns 50.193 ns] thrpt: [570.01 MiB/s 574.63 MiB/s 578.86 MiB/s] change: time: [+5.1267% +5.7418% +6.3476%] (p = 0.00 < 0.05) thrpt: [-5.9687% -5.4300% -4.8767%] Performance has regressed. Found 7 outliers among 100 measurements (7.00%) 7 (7.00%) high mild wheelname_parsing_fastest/flyte-long-incompatible time: [295.81 ns 298.01 ns 301.04 ns] thrpt: [408.66 MiB/s 412.82 MiB/s 415.89 MiB/s] change: time: [+0.5553% +1.0842% +1.7059%] (p = 0.00 < 0.05) thrpt: [-1.6772% -1.0726% -0.5523%] Change within noise threshold. Found 4 outliers among 100 measurements (4.00%) 3 (3.00%) high mild 1 (1.00%) high severe wheelname_parsing_fastest/flyte-long-compatible time: [214.80 ns 216.10 ns 217.50 ns] thrpt: [499.87 MiB/s 503.10 MiB/s 506.15 MiB/s] change: time: [+0.9003% +1.3207% +1.8268%] (p = 0.00 < 0.05) thrpt: [-1.7940% -1.3035% -0.8923%] Change within noise threshold. Found 8 outliers among 100 measurements (8.00%) 7 (7.00%) high mild 1 (1.00%) high severe ``` (Ignore the percent changes; they're not relative to one another.) So it's like >40% faster for the small case and >15% faster for the large case. --- Cargo.lock | 1 + crates/uv-distribution-filename/Cargo.toml | 1 + crates/uv-distribution-filename/src/lib.rs | 1 + .../uv-distribution-filename/src/splitter.rs | 50 ++++++++++++++ crates/uv-distribution-filename/src/wheel.rs | 65 +++++++++---------- 5 files changed, 84 insertions(+), 34 deletions(-) create mode 100644 crates/uv-distribution-filename/src/splitter.rs diff --git a/Cargo.lock b/Cargo.lock index cac33e552..9ef407a99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5002,6 +5002,7 @@ name = "uv-distribution-filename" version = "0.0.1" dependencies = [ "insta", + "memchr", "rkyv", "serde", "smallvec", diff --git a/crates/uv-distribution-filename/Cargo.toml b/crates/uv-distribution-filename/Cargo.toml index 430b99346..ee2eca251 100644 --- a/crates/uv-distribution-filename/Cargo.toml +++ b/crates/uv-distribution-filename/Cargo.toml @@ -21,6 +21,7 @@ uv-pep440 = { workspace = true } uv-platform-tags = { workspace = true } uv-small-str = { workspace = true } +memchr = { workspace = true } rkyv = { workspace = true, features = ["smallvec-1"] } serde = { workspace = true } smallvec = { workspace = true } diff --git a/crates/uv-distribution-filename/src/lib.rs b/crates/uv-distribution-filename/src/lib.rs index 39c8ac0f8..540947e5d 100644 --- a/crates/uv-distribution-filename/src/lib.rs +++ b/crates/uv-distribution-filename/src/lib.rs @@ -13,6 +13,7 @@ mod build_tag; mod egg; mod extension; mod source_dist; +mod splitter; mod wheel; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] diff --git a/crates/uv-distribution-filename/src/splitter.rs b/crates/uv-distribution-filename/src/splitter.rs new file mode 100644 index 000000000..1e6177d23 --- /dev/null +++ b/crates/uv-distribution-filename/src/splitter.rs @@ -0,0 +1,50 @@ +/// A simple splitter that uses `memchr` to find the next delimiter. +pub(crate) struct MemchrSplitter<'a> { + memchr: memchr::Memchr<'a>, + haystack: &'a str, + offset: usize, +} + +impl<'a> MemchrSplitter<'a> { + #[inline] + pub(crate) fn split(haystack: &'a str, delimiter: u8) -> Self { + Self { + memchr: memchr::Memchr::new(delimiter, haystack.as_bytes()), + haystack, + offset: 0, + } + } +} + +impl<'a> Iterator for MemchrSplitter<'a> { + type Item = &'a str; + + #[inline(always)] + #[allow(clippy::inline_always)] + fn next(&mut self) -> Option { + match self.memchr.next() { + Some(index) => { + let start = self.offset; + self.offset = index + 1; + Some(&self.haystack[start..index]) + } + None if self.offset < self.haystack.len() => { + let start = self.offset; + self.offset = self.haystack.len(); + Some(&self.haystack[start..]) + } + None => None, + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + // We know we'll return at least one item if there's remaining text. + let min = usize::from(self.offset < self.haystack.len()); + + // Maximum possible splits is remaining length divided by 2 (minimum one char between delimiters). + let max = (self.haystack.len() - self.offset + 1) / 2 + min; + + (min, Some(max)) + } +} diff --git a/crates/uv-distribution-filename/src/wheel.rs b/crates/uv-distribution-filename/src/wheel.rs index 81bc33d33..e5e735d6e 100644 --- a/crates/uv-distribution-filename/src/wheel.rs +++ b/crates/uv-distribution-filename/src/wheel.rs @@ -1,5 +1,6 @@ use std::fmt::{Display, Formatter}; +use memchr::memchr; use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; use std::str::FromStr; use thiserror::Error; @@ -12,6 +13,7 @@ use uv_platform_tags::{ PlatformTag, TagCompatibility, Tags, }; +use crate::splitter::MemchrSplitter; use crate::{BuildTag, BuildTagError}; #[derive( @@ -145,64 +147,66 @@ impl WheelFilename { // The wheel filename should contain either five or six entries. If six, then the third // entry is the build tag. If five, then the third entry is the Python tag. // https://www.python.org/dev/peps/pep-0427/#file-name-convention - let mut parts = stem.split('-'); + let mut splitter = memchr::Memchr::new(b'-', stem.as_bytes()); - let name = parts - .next() - .expect("split always yields 1 or more elements"); - - let Some(version) = parts.next() else { + let Some(version) = splitter.next() else { return Err(WheelFilenameError::InvalidWheelFileName( filename.to_string(), "Must have a version".to_string(), )); }; - let Some(build_tag_or_python_tag) = parts.next() else { + let Some(build_tag_or_python_tag) = splitter.next() else { return Err(WheelFilenameError::InvalidWheelFileName( filename.to_string(), "Must have a Python tag".to_string(), )); }; - let Some(python_tag_or_abi_tag) = parts.next() else { + let Some(python_tag_or_abi_tag) = splitter.next() else { return Err(WheelFilenameError::InvalidWheelFileName( filename.to_string(), "Must have an ABI tag".to_string(), )); }; - let Some(abi_tag_or_platform_tag) = parts.next() else { + let Some(abi_tag_or_platform_tag) = splitter.next() else { return Err(WheelFilenameError::InvalidWheelFileName( filename.to_string(), "Must have a platform tag".to_string(), )); }; - let (name, version, build_tag, python_tag, abi_tag, platform_tag) = - if let Some(platform_tag) = parts.next() { - if parts.next().is_some() { + let (name, version, build_tag, python_tag, abi_tag, platform_tag, is_large) = + if let Some(platform_tag) = splitter.next() { + if splitter.next().is_some() { return Err(WheelFilenameError::InvalidWheelFileName( filename.to_string(), "Must have 5 or 6 components, but has more".to_string(), )); } ( - name, - version, - Some(build_tag_or_python_tag), - python_tag_or_abi_tag, - abi_tag_or_platform_tag, - platform_tag, + &stem[..version], + &stem[version + 1..build_tag_or_python_tag], + Some(&stem[build_tag_or_python_tag + 1..python_tag_or_abi_tag]), + &stem[python_tag_or_abi_tag + 1..abi_tag_or_platform_tag], + &stem[abi_tag_or_platform_tag + 1..platform_tag], + &stem[platform_tag + 1..], + // Always take the slow path if a build tag is present. + true, ) } else { ( - name, - version, + &stem[..version], + &stem[version + 1..build_tag_or_python_tag], None, - build_tag_or_python_tag, - python_tag_or_abi_tag, - abi_tag_or_platform_tag, + &stem[build_tag_or_python_tag + 1..python_tag_or_abi_tag], + &stem[python_tag_or_abi_tag + 1..abi_tag_or_platform_tag], + &stem[abi_tag_or_platform_tag + 1..], + // Determine whether any of the tag types contain a period, which would indicate + // that at least one of the tag types includes multiple tags (which in turn + // necessitates taking the slow path). + memchr(b'.', stem[build_tag_or_python_tag..].as_bytes()).is_some(), ) }; @@ -217,30 +221,23 @@ impl WheelFilename { }) .transpose()?; - let tags = if build_tag.is_some() - || python_tag.contains('.') - || abi_tag.contains('.') - || platform_tag.contains('.') - { + let tags = if is_large { WheelTag::Large { large: Box::new(WheelTagLarge { build_tag, - python_tag: python_tag - .split('.') + python_tag: MemchrSplitter::split(python_tag, b'.') .map(LanguageTag::from_str) .collect::>() .map_err(|err| { WheelFilenameError::InvalidLanguageTag(filename.to_string(), err) })?, - abi_tag: abi_tag - .split('.') + abi_tag: MemchrSplitter::split(abi_tag, b'.') .map(AbiTag::from_str) .collect::>() .map_err(|err| { WheelFilenameError::InvalidAbiTag(filename.to_string(), err) })?, - platform_tag: platform_tag - .split('.') + platform_tag: MemchrSplitter::split(platform_tag, b'.') .map(PlatformTag::from_str) .collect::>() .map_err(|err| {