mirror of
https://github.com/astral-sh/uv.git
synced 2025-10-28 10:50:29 +00:00
Use memchr for wheel parsing (#10620)
## Summary
Before:
```
wheelname_parsing/numpy-compatible
time: [106.90 ns 107.86 ns 108.86 ns]
thrpt: [402.97 MiB/s 406.71 MiB/s 410.39 MiB/s]
change:
time: [-4.5360% -3.7694% -2.9179%] (p = 0.00 < 0.05)
thrpt: [+3.0056% +3.9170% +4.7515%]
Performance has improved.
wheelname_parsing/flyte-short-incompatible
time: [81.439 ns 82.209 ns 83.015 ns]
thrpt: [390.59 MiB/s 394.42 MiB/s 398.15 MiB/s]
change:
time: [+6.2344% +7.5385% +8.8928%] (p = 0.00 < 0.05)
thrpt: [-8.1666% -7.0101% -5.8685%]
Performance has regressed.
wheelname_parsing/flyte-short-compatible
time: [78.909 ns 79.456 ns 80.031 ns]
thrpt: [357.49 MiB/s 360.08 MiB/s 362.57 MiB/s]
change:
time: [+3.2653% +4.1733% +5.1062%] (p = 0.00 < 0.05)
thrpt: [-4.8582% -4.0061% -3.1620%]
Performance has regressed.
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high mild
wheelname_parsing/flyte-long-incompatible
time: [353.35 ns 357.49 ns 361.77 ns]
thrpt: [340.06 MiB/s 344.13 MiB/s 348.17 MiB/s]
change:
time: [+1.4846% +2.0228% +2.6504%] (p = 0.00 < 0.05)
thrpt: [-2.5820% -1.9827% -1.4629%]
Performance has regressed.
Found 10 outliers among 100 measurements (10.00%)
5 (5.00%) high mild
5 (5.00%) high severe
wheelname_parsing/flyte-long-compatible
time: [256.47 ns 258.12 ns 260.17 ns]
thrpt: [417.88 MiB/s 421.20 MiB/s 423.90 MiB/s]
change:
time: [+0.4079% +1.8252% +3.6270%] (p = 0.02 < 0.05)
thrpt: [-3.5001% -1.7925% -0.4063%]
Change within noise threshold.
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high severe
```
After:
```
wheelname_parsing_fastest/numpy-compatible
time: [61.500 ns 61.904 ns 62.350 ns]
thrpt: [703.60 MiB/s 708.66 MiB/s 713.32 MiB/s]
change:
time: [+0.9879% +1.4542% +1.9311%] (p = 0.00 < 0.05)
thrpt: [-1.8945% -1.4334% -0.9782%]
Change within noise threshold.
Found 8 outliers among 100 measurements (8.00%)
6 (6.00%) high mild
2 (2.00%) high severe
wheelname_parsing_fastest/flyte-short-incompatible
time: [49.341 ns 49.538 ns 49.769 ns]
thrpt: [651.50 MiB/s 654.54 MiB/s 657.16 MiB/s]
change:
time: [+5.8750% +6.3656% +6.8338%] (p = 0.00 < 0.05)
thrpt: [-6.3967% -5.9847% -5.5490%]
Performance has regressed.
Found 17 outliers among 100 measurements (17.00%)
5 (5.00%) low severe
1 (1.00%) low mild
6 (6.00%) high mild
5 (5.00%) high severe
wheelname_parsing_fastest/flyte-short-compatible
time: [49.425 ns 49.789 ns 50.193 ns]
thrpt: [570.01 MiB/s 574.63 MiB/s 578.86 MiB/s]
change:
time: [+5.1267% +5.7418% +6.3476%] (p = 0.00 < 0.05)
thrpt: [-5.9687% -5.4300% -4.8767%]
Performance has regressed.
Found 7 outliers among 100 measurements (7.00%)
7 (7.00%) high mild
wheelname_parsing_fastest/flyte-long-incompatible
time: [295.81 ns 298.01 ns 301.04 ns]
thrpt: [408.66 MiB/s 412.82 MiB/s 415.89 MiB/s]
change:
time: [+0.5553% +1.0842% +1.7059%] (p = 0.00 < 0.05)
thrpt: [-1.6772% -1.0726% -0.5523%]
Change within noise threshold.
Found 4 outliers among 100 measurements (4.00%)
3 (3.00%) high mild
1 (1.00%) high severe
wheelname_parsing_fastest/flyte-long-compatible
time: [214.80 ns 216.10 ns 217.50 ns]
thrpt: [499.87 MiB/s 503.10 MiB/s 506.15 MiB/s]
change:
time: [+0.9003% +1.3207% +1.8268%] (p = 0.00 < 0.05)
thrpt: [-1.7940% -1.3035% -0.8923%]
Change within noise threshold.
Found 8 outliers among 100 measurements (8.00%)
7 (7.00%) high mild
1 (1.00%) high severe
```
(Ignore the percent changes; they're not relative to one another.)
So it's like >40% faster for the small case and >15% faster for the
large case.
This commit is contained in:
parent
82c08b83d9
commit
d20a48a5b4
5 changed files with 84 additions and 34 deletions
|
|
@ -21,6 +21,7 @@ uv-pep440 = { workspace = true }
|
|||
uv-platform-tags = { workspace = true }
|
||||
uv-small-str = { workspace = true }
|
||||
|
||||
memchr = { workspace = true }
|
||||
rkyv = { workspace = true, features = ["smallvec-1"] }
|
||||
serde = { workspace = true }
|
||||
smallvec = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ mod build_tag;
|
|||
mod egg;
|
||||
mod extension;
|
||||
mod source_dist;
|
||||
mod splitter;
|
||||
mod wheel;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||
|
|
|
|||
50
crates/uv-distribution-filename/src/splitter.rs
Normal file
50
crates/uv-distribution-filename/src/splitter.rs
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
/// A simple splitter that uses `memchr` to find the next delimiter.
|
||||
pub(crate) struct MemchrSplitter<'a> {
|
||||
memchr: memchr::Memchr<'a>,
|
||||
haystack: &'a str,
|
||||
offset: usize,
|
||||
}
|
||||
|
||||
impl<'a> MemchrSplitter<'a> {
|
||||
#[inline]
|
||||
pub(crate) fn split(haystack: &'a str, delimiter: u8) -> Self {
|
||||
Self {
|
||||
memchr: memchr::Memchr::new(delimiter, haystack.as_bytes()),
|
||||
haystack,
|
||||
offset: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for MemchrSplitter<'a> {
|
||||
type Item = &'a str;
|
||||
|
||||
#[inline(always)]
|
||||
#[allow(clippy::inline_always)]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.memchr.next() {
|
||||
Some(index) => {
|
||||
let start = self.offset;
|
||||
self.offset = index + 1;
|
||||
Some(&self.haystack[start..index])
|
||||
}
|
||||
None if self.offset < self.haystack.len() => {
|
||||
let start = self.offset;
|
||||
self.offset = self.haystack.len();
|
||||
Some(&self.haystack[start..])
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
// We know we'll return at least one item if there's remaining text.
|
||||
let min = usize::from(self.offset < self.haystack.len());
|
||||
|
||||
// Maximum possible splits is remaining length divided by 2 (minimum one char between delimiters).
|
||||
let max = (self.haystack.len() - self.offset + 1) / 2 + min;
|
||||
|
||||
(min, Some(max))
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use memchr::memchr;
|
||||
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
|
||||
use std::str::FromStr;
|
||||
use thiserror::Error;
|
||||
|
|
@ -12,6 +13,7 @@ use uv_platform_tags::{
|
|||
PlatformTag, TagCompatibility, Tags,
|
||||
};
|
||||
|
||||
use crate::splitter::MemchrSplitter;
|
||||
use crate::{BuildTag, BuildTagError};
|
||||
|
||||
#[derive(
|
||||
|
|
@ -145,64 +147,66 @@ impl WheelFilename {
|
|||
// The wheel filename should contain either five or six entries. If six, then the third
|
||||
// entry is the build tag. If five, then the third entry is the Python tag.
|
||||
// https://www.python.org/dev/peps/pep-0427/#file-name-convention
|
||||
let mut parts = stem.split('-');
|
||||
let mut splitter = memchr::Memchr::new(b'-', stem.as_bytes());
|
||||
|
||||
let name = parts
|
||||
.next()
|
||||
.expect("split always yields 1 or more elements");
|
||||
|
||||
let Some(version) = parts.next() else {
|
||||
let Some(version) = splitter.next() else {
|
||||
return Err(WheelFilenameError::InvalidWheelFileName(
|
||||
filename.to_string(),
|
||||
"Must have a version".to_string(),
|
||||
));
|
||||
};
|
||||
|
||||
let Some(build_tag_or_python_tag) = parts.next() else {
|
||||
let Some(build_tag_or_python_tag) = splitter.next() else {
|
||||
return Err(WheelFilenameError::InvalidWheelFileName(
|
||||
filename.to_string(),
|
||||
"Must have a Python tag".to_string(),
|
||||
));
|
||||
};
|
||||
|
||||
let Some(python_tag_or_abi_tag) = parts.next() else {
|
||||
let Some(python_tag_or_abi_tag) = splitter.next() else {
|
||||
return Err(WheelFilenameError::InvalidWheelFileName(
|
||||
filename.to_string(),
|
||||
"Must have an ABI tag".to_string(),
|
||||
));
|
||||
};
|
||||
|
||||
let Some(abi_tag_or_platform_tag) = parts.next() else {
|
||||
let Some(abi_tag_or_platform_tag) = splitter.next() else {
|
||||
return Err(WheelFilenameError::InvalidWheelFileName(
|
||||
filename.to_string(),
|
||||
"Must have a platform tag".to_string(),
|
||||
));
|
||||
};
|
||||
|
||||
let (name, version, build_tag, python_tag, abi_tag, platform_tag) =
|
||||
if let Some(platform_tag) = parts.next() {
|
||||
if parts.next().is_some() {
|
||||
let (name, version, build_tag, python_tag, abi_tag, platform_tag, is_large) =
|
||||
if let Some(platform_tag) = splitter.next() {
|
||||
if splitter.next().is_some() {
|
||||
return Err(WheelFilenameError::InvalidWheelFileName(
|
||||
filename.to_string(),
|
||||
"Must have 5 or 6 components, but has more".to_string(),
|
||||
));
|
||||
}
|
||||
(
|
||||
name,
|
||||
version,
|
||||
Some(build_tag_or_python_tag),
|
||||
python_tag_or_abi_tag,
|
||||
abi_tag_or_platform_tag,
|
||||
platform_tag,
|
||||
&stem[..version],
|
||||
&stem[version + 1..build_tag_or_python_tag],
|
||||
Some(&stem[build_tag_or_python_tag + 1..python_tag_or_abi_tag]),
|
||||
&stem[python_tag_or_abi_tag + 1..abi_tag_or_platform_tag],
|
||||
&stem[abi_tag_or_platform_tag + 1..platform_tag],
|
||||
&stem[platform_tag + 1..],
|
||||
// Always take the slow path if a build tag is present.
|
||||
true,
|
||||
)
|
||||
} else {
|
||||
(
|
||||
name,
|
||||
version,
|
||||
&stem[..version],
|
||||
&stem[version + 1..build_tag_or_python_tag],
|
||||
None,
|
||||
build_tag_or_python_tag,
|
||||
python_tag_or_abi_tag,
|
||||
abi_tag_or_platform_tag,
|
||||
&stem[build_tag_or_python_tag + 1..python_tag_or_abi_tag],
|
||||
&stem[python_tag_or_abi_tag + 1..abi_tag_or_platform_tag],
|
||||
&stem[abi_tag_or_platform_tag + 1..],
|
||||
// Determine whether any of the tag types contain a period, which would indicate
|
||||
// that at least one of the tag types includes multiple tags (which in turn
|
||||
// necessitates taking the slow path).
|
||||
memchr(b'.', stem[build_tag_or_python_tag..].as_bytes()).is_some(),
|
||||
)
|
||||
};
|
||||
|
||||
|
|
@ -217,30 +221,23 @@ impl WheelFilename {
|
|||
})
|
||||
.transpose()?;
|
||||
|
||||
let tags = if build_tag.is_some()
|
||||
|| python_tag.contains('.')
|
||||
|| abi_tag.contains('.')
|
||||
|| platform_tag.contains('.')
|
||||
{
|
||||
let tags = if is_large {
|
||||
WheelTag::Large {
|
||||
large: Box::new(WheelTagLarge {
|
||||
build_tag,
|
||||
python_tag: python_tag
|
||||
.split('.')
|
||||
python_tag: MemchrSplitter::split(python_tag, b'.')
|
||||
.map(LanguageTag::from_str)
|
||||
.collect::<Result<_, _>>()
|
||||
.map_err(|err| {
|
||||
WheelFilenameError::InvalidLanguageTag(filename.to_string(), err)
|
||||
})?,
|
||||
abi_tag: abi_tag
|
||||
.split('.')
|
||||
abi_tag: MemchrSplitter::split(abi_tag, b'.')
|
||||
.map(AbiTag::from_str)
|
||||
.collect::<Result<_, _>>()
|
||||
.map_err(|err| {
|
||||
WheelFilenameError::InvalidAbiTag(filename.to_string(), err)
|
||||
})?,
|
||||
platform_tag: platform_tag
|
||||
.split('.')
|
||||
platform_tag: MemchrSplitter::split(platform_tag, b'.')
|
||||
.map(PlatformTag::from_str)
|
||||
.collect::<Result<_, _>>()
|
||||
.map_err(|err| {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue