mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-04 10:58:28 +00:00
Use arcstr
for package, extra, and group names (#10475)
## Summary This appears to be a consistent 1% performance improvement and should also reduce memory quite a bit. We've also decided to use these for markers, so it's nice to use the same optimization here. ``` ❯ hyperfine "./uv pip compile --universal scripts/requirements/airflow.in" "./arcstr pip compile --universal scripts/requirements/airflow.in" --min-runs 50 --warmup 20 Benchmark 1: ./uv pip compile --universal scripts/requirements/airflow.in Time (mean ± σ): 136.3 ms ± 4.0 ms [User: 139.1 ms, System: 241.9 ms] Range (min … max): 131.5 ms … 149.5 ms 50 runs Benchmark 2: ./arcstr pip compile --universal scripts/requirements/airflow.in Time (mean ± σ): 134.9 ms ± 3.2 ms [User: 137.6 ms, System: 239.0 ms] Range (min … max): 130.1 ms … 151.8 ms 50 runs Summary ./arcstr pip compile --universal scripts/requirements/airflow.in ran 1.01 ± 0.04 times faster than ./uv pip compile --universal scripts/requirements/airflow.in ```
This commit is contained in:
parent
503f9a97af
commit
b3d7beb1a0
9 changed files with 166 additions and 16 deletions
|
@ -4,6 +4,7 @@ use std::str::FromStr;
|
|||
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
|
||||
use crate::small_string::SmallString;
|
||||
use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNameError};
|
||||
|
||||
/// The normalized name of an extra dependency.
|
||||
|
@ -14,9 +15,9 @@ use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNam
|
|||
/// See:
|
||||
/// - <https://peps.python.org/pep-0685/#specification/>
|
||||
/// - <https://packaging.python.org/en/latest/specifications/name-normalization/>
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
|
||||
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
||||
pub struct ExtraName(String);
|
||||
pub struct ExtraName(SmallString);
|
||||
|
||||
impl ExtraName {
|
||||
/// Create a validated, normalized extra name.
|
||||
|
|
|
@ -5,6 +5,7 @@ use std::sync::LazyLock;
|
|||
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
use crate::small_string::SmallString;
|
||||
use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNameError};
|
||||
|
||||
/// The normalized name of a dependency group.
|
||||
|
@ -12,9 +13,9 @@ use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNam
|
|||
/// See:
|
||||
/// - <https://peps.python.org/pep-0735/>
|
||||
/// - <https://packaging.python.org/en/latest/specifications/name-normalization/>
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
||||
pub struct GroupName(String);
|
||||
pub struct GroupName(SmallString);
|
||||
|
||||
impl GroupName {
|
||||
/// Create a validated, normalized group name.
|
||||
|
|
|
@ -5,26 +5,37 @@ pub use dist_info_name::DistInfoName;
|
|||
pub use extra_name::ExtraName;
|
||||
pub use group_name::{GroupName, DEV_DEPENDENCIES};
|
||||
pub use package_name::PackageName;
|
||||
use small_string::SmallString;
|
||||
|
||||
mod dist_info_name;
|
||||
mod extra_name;
|
||||
mod group_name;
|
||||
mod package_name;
|
||||
mod small_string;
|
||||
|
||||
/// Validate and normalize an owned package or extra name.
|
||||
pub(crate) fn validate_and_normalize_owned(name: String) -> Result<String, InvalidNameError> {
|
||||
pub(crate) fn validate_and_normalize_owned(name: String) -> Result<SmallString, InvalidNameError> {
|
||||
if is_normalized(&name)? {
|
||||
Ok(name)
|
||||
Ok(SmallString::from(name))
|
||||
} else {
|
||||
validate_and_normalize_ref(name)
|
||||
Ok(SmallString::from(normalize(&name)?))
|
||||
}
|
||||
}
|
||||
|
||||
/// Validate and normalize an unowned package or extra name.
|
||||
pub(crate) fn validate_and_normalize_ref(
|
||||
name: impl AsRef<str>,
|
||||
) -> Result<String, InvalidNameError> {
|
||||
) -> Result<SmallString, InvalidNameError> {
|
||||
let name = name.as_ref();
|
||||
if is_normalized(name)? {
|
||||
Ok(SmallString::from(name))
|
||||
} else {
|
||||
Ok(SmallString::from(normalize(name)?))
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalize an unowned package or extra name.
|
||||
fn normalize(name: &str) -> Result<String, InvalidNameError> {
|
||||
let mut normalized = String::with_capacity(name.len());
|
||||
|
||||
let mut last = None;
|
||||
|
@ -136,9 +147,14 @@ mod tests {
|
|||
"FrIeNdLy-._.-bArD",
|
||||
];
|
||||
for input in inputs {
|
||||
assert_eq!(validate_and_normalize_ref(input).unwrap(), "friendly-bard");
|
||||
assert_eq!(
|
||||
validate_and_normalize_owned(input.to_string()).unwrap(),
|
||||
validate_and_normalize_ref(input).unwrap().as_ref(),
|
||||
"friendly-bard"
|
||||
);
|
||||
assert_eq!(
|
||||
validate_and_normalize_owned(input.to_string())
|
||||
.unwrap()
|
||||
.as_ref(),
|
||||
"friendly-bard"
|
||||
);
|
||||
}
|
||||
|
@ -169,9 +185,11 @@ mod tests {
|
|||
// Unchanged
|
||||
let unchanged = ["friendly-bard", "1okay", "okay2"];
|
||||
for input in unchanged {
|
||||
assert_eq!(validate_and_normalize_ref(input).unwrap(), input);
|
||||
assert_eq!(validate_and_normalize_ref(input).unwrap().as_ref(), input);
|
||||
assert_eq!(
|
||||
validate_and_normalize_owned(input.to_string()).unwrap(),
|
||||
validate_and_normalize_owned(input.to_string())
|
||||
.unwrap()
|
||||
.as_ref(),
|
||||
input
|
||||
);
|
||||
assert!(is_normalized(input).unwrap());
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
use std::borrow::Cow;
|
||||
use std::cmp::PartialEq;
|
||||
use std::str::FromStr;
|
||||
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
|
||||
use crate::small_string::SmallString;
|
||||
use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNameError};
|
||||
|
||||
/// The normalized name of a package.
|
||||
|
@ -13,7 +15,6 @@ use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNam
|
|||
/// See: <https://packaging.python.org/en/latest/specifications/name-normalization/>
|
||||
#[derive(
|
||||
Debug,
|
||||
Default,
|
||||
Clone,
|
||||
PartialEq,
|
||||
Eq,
|
||||
|
@ -27,7 +28,7 @@ use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNam
|
|||
)]
|
||||
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
||||
#[rkyv(derive(Debug))]
|
||||
pub struct PackageName(String);
|
||||
pub struct PackageName(SmallString);
|
||||
|
||||
impl PackageName {
|
||||
/// Create a validated, normalized package name.
|
||||
|
@ -56,7 +57,7 @@ impl PackageName {
|
|||
|
||||
Cow::Owned(owned_string)
|
||||
} else {
|
||||
Cow::Borrowed(self.0.as_str())
|
||||
Cow::Borrowed(self.0.as_ref())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
119
crates/uv-normalize/src/small_string.rs
Normal file
119
crates/uv-normalize/src/small_string.rs
Normal file
|
@ -0,0 +1,119 @@
|
|||
use std::cmp::PartialEq;
|
||||
use std::ops::Deref;
|
||||
|
||||
/// An optimized small string type for short identifiers, like package names.
|
||||
///
|
||||
/// Represented as an [`arcstr::ArcStr`] internally.
|
||||
#[derive(Default, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub(crate) struct SmallString(arcstr::ArcStr);
|
||||
|
||||
impl From<&str> for SmallString {
|
||||
#[inline]
|
||||
fn from(s: &str) -> Self {
|
||||
Self(s.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for SmallString {
|
||||
#[inline]
|
||||
fn from(s: String) -> Self {
|
||||
Self(s.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<str> for SmallString {
|
||||
#[inline]
|
||||
fn as_ref(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for SmallString {
|
||||
type Target = str;
|
||||
|
||||
#[inline]
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl core::fmt::Debug for SmallString {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
core::fmt::Debug::fmt(&self.0, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl core::fmt::Display for SmallString {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
core::fmt::Display::fmt(&self.0, f)
|
||||
}
|
||||
}
|
||||
|
||||
/// A [`serde::Serialize`] implementation for [`SmallString`].
|
||||
impl serde::Serialize for SmallString {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
self.0.serialize(serializer)
|
||||
}
|
||||
}
|
||||
|
||||
/// An [`rkyv`] implementation for [`SmallString`].
|
||||
impl rkyv::Archive for SmallString {
|
||||
type Archived = rkyv::string::ArchivedString;
|
||||
type Resolver = rkyv::string::StringResolver;
|
||||
|
||||
#[inline]
|
||||
fn resolve(&self, resolver: Self::Resolver, out: rkyv::Place<Self::Archived>) {
|
||||
rkyv::string::ArchivedString::resolve_from_str(&self.0, resolver, out);
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> rkyv::Serialize<S> for SmallString
|
||||
where
|
||||
S: rkyv::rancor::Fallible + rkyv::ser::Allocator + rkyv::ser::Writer + ?Sized,
|
||||
S::Error: rkyv::rancor::Source,
|
||||
{
|
||||
fn serialize(&self, serializer: &mut S) -> Result<Self::Resolver, S::Error> {
|
||||
rkyv::string::ArchivedString::serialize_from_str(&self.0, serializer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: rkyv::rancor::Fallible + ?Sized> rkyv::Deserialize<SmallString, D>
|
||||
for rkyv::string::ArchivedString
|
||||
{
|
||||
fn deserialize(&self, _deserializer: &mut D) -> Result<SmallString, D::Error> {
|
||||
Ok(SmallString::from(self.as_str()))
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<SmallString> for rkyv::string::ArchivedString {
|
||||
fn eq(&self, other: &SmallString) -> bool {
|
||||
**other == **self
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd<SmallString> for rkyv::string::ArchivedString {
|
||||
fn partial_cmp(&self, other: &SmallString) -> Option<::core::cmp::Ordering> {
|
||||
Some(self.as_str().cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
/// An [`schemars::JsonSchema`] implementation for [`SmallString`].
|
||||
#[cfg(feature = "schemars")]
|
||||
impl schemars::JsonSchema for SmallString {
|
||||
fn is_referenceable() -> bool {
|
||||
String::is_referenceable()
|
||||
}
|
||||
|
||||
fn schema_name() -> String {
|
||||
String::schema_name()
|
||||
}
|
||||
|
||||
fn json_schema(_gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
|
||||
String::json_schema(_gen)
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue