mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-30 22:01:47 +00:00
red-knot: infer multiplication for strings and integers (#13117)
## Summary The resulting type when multiplying a string literal by an integer literal is one of two types: - `StringLiteral`, in the case where it is a reasonably small resulting string (arbitrarily bounded here to 4096 bytes, roughly a page on many operating systems), including the fully expanded string. - `LiteralString`, matching Pyright etc., for strings larger than that. Additionally: - Switch to using `Box<str>` instead of `String` for the internal value of `StringLiteral`, saving some non-trivial byte overhead (and keeping the total number of allocations the same). - Be clearer and more accurate about which types we ought to defer to in `StringLiteral` and `LiteralString` member lookup. ## Test Plan Added a test case covering multiplication times integers: positive, negative, zero, and in and out of bounds. --------- Co-authored-by: Alex Waygood <alex.waygood@gmail.com> Co-authored-by: Carl Meyer <carl@astral.sh>
This commit is contained in:
parent
96b42b0c8f
commit
aba1802828
3 changed files with 88 additions and 8 deletions
|
@ -183,6 +183,9 @@ pub enum Type<'db> {
|
||||||
BooleanLiteral(bool),
|
BooleanLiteral(bool),
|
||||||
/// A string literal
|
/// A string literal
|
||||||
StringLiteral(StringLiteralType<'db>),
|
StringLiteral(StringLiteralType<'db>),
|
||||||
|
/// A string known to originate only from literal values, but whose value is not known (unlike
|
||||||
|
/// `StringLiteral` above).
|
||||||
|
LiteralString,
|
||||||
/// A bytes literal
|
/// A bytes literal
|
||||||
BytesLiteral(BytesLiteralType<'db>),
|
BytesLiteral(BytesLiteralType<'db>),
|
||||||
// TODO protocols, callable types, overloads, generics, type vars
|
// TODO protocols, callable types, overloads, generics, type vars
|
||||||
|
@ -281,7 +284,13 @@ impl<'db> Type<'db> {
|
||||||
}
|
}
|
||||||
Type::BooleanLiteral(_) => Type::Unknown,
|
Type::BooleanLiteral(_) => Type::Unknown,
|
||||||
Type::StringLiteral(_) => {
|
Type::StringLiteral(_) => {
|
||||||
// TODO defer to Type::Instance(<str from typeshed>).member
|
// TODO defer to `typing.LiteralString`/`builtins.str` methods
|
||||||
|
// from typeshed's stubs
|
||||||
|
Type::Unknown
|
||||||
|
}
|
||||||
|
Type::LiteralString => {
|
||||||
|
// TODO defer to `typing.LiteralString`/`builtins.str` methods
|
||||||
|
// from typeshed's stubs
|
||||||
Type::Unknown
|
Type::Unknown
|
||||||
}
|
}
|
||||||
Type::BytesLiteral(_) => {
|
Type::BytesLiteral(_) => {
|
||||||
|
@ -387,7 +396,7 @@ pub struct IntersectionType<'db> {
|
||||||
#[salsa::interned]
|
#[salsa::interned]
|
||||||
pub struct StringLiteralType<'db> {
|
pub struct StringLiteralType<'db> {
|
||||||
#[return_ref]
|
#[return_ref]
|
||||||
value: String,
|
value: Box<str>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[salsa::interned]
|
#[salsa::interned]
|
||||||
|
|
|
@ -46,6 +46,7 @@ impl Display for DisplayType<'_> {
|
||||||
r#"Literal["{}"]"#,
|
r#"Literal["{}"]"#,
|
||||||
string.value(self.db).replace('"', r#"\""#)
|
string.value(self.db).replace('"', r#"\""#)
|
||||||
),
|
),
|
||||||
|
Type::LiteralString => write!(f, "LiteralString"),
|
||||||
Type::BytesLiteral(bytes) => {
|
Type::BytesLiteral(bytes) => {
|
||||||
let escape =
|
let escape =
|
||||||
AsciiEscape::with_preferred_quote(bytes.value(self.db).as_ref(), Quote::Double);
|
AsciiEscape::with_preferred_quote(bytes.value(self.db).as_ref(), Quote::Double);
|
||||||
|
|
|
@ -217,6 +217,12 @@ struct TypeInferenceBuilder<'db> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'db> TypeInferenceBuilder<'db> {
|
impl<'db> TypeInferenceBuilder<'db> {
|
||||||
|
/// How big a string do we build before bailing?
|
||||||
|
///
|
||||||
|
/// This is a fairly arbitrary number. It should be *far* more than enough
|
||||||
|
/// for most use cases, but we can reevaluate it later if useful.
|
||||||
|
const MAX_STRING_LITERAL_SIZE: usize = 4096;
|
||||||
|
|
||||||
/// Creates a new builder for inferring types in a region.
|
/// Creates a new builder for inferring types in a region.
|
||||||
pub(super) fn new(
|
pub(super) fn new(
|
||||||
db: &'db dyn Db,
|
db: &'db dyn Db,
|
||||||
|
@ -1259,12 +1265,16 @@ impl<'db> TypeInferenceBuilder<'db> {
|
||||||
Type::BooleanLiteral(*value)
|
Type::BooleanLiteral(*value)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::unused_self)]
|
|
||||||
fn infer_string_literal_expression(&mut self, literal: &ast::ExprStringLiteral) -> Type<'db> {
|
fn infer_string_literal_expression(&mut self, literal: &ast::ExprStringLiteral) -> Type<'db> {
|
||||||
Type::StringLiteral(StringLiteralType::new(self.db, literal.value.to_string()))
|
let value = if literal.value.len() <= Self::MAX_STRING_LITERAL_SIZE {
|
||||||
|
literal.value.to_str().into()
|
||||||
|
} else {
|
||||||
|
Box::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
Type::StringLiteral(StringLiteralType::new(self.db, value))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::unused_self)]
|
|
||||||
fn infer_bytes_literal_expression(&mut self, literal: &ast::ExprBytesLiteral) -> Type<'db> {
|
fn infer_bytes_literal_expression(&mut self, literal: &ast::ExprBytesLiteral) -> Type<'db> {
|
||||||
// TODO: ignoring r/R prefixes for now, should normalize bytes values
|
// TODO: ignoring r/R prefixes for now, should normalize bytes values
|
||||||
Type::BytesLiteral(BytesLiteralType::new(
|
Type::BytesLiteral(BytesLiteralType::new(
|
||||||
|
@ -1787,11 +1797,30 @@ impl<'db> TypeInferenceBuilder<'db> {
|
||||||
|
|
||||||
(Type::StringLiteral(lhs), Type::StringLiteral(rhs), ast::Operator::Add) => {
|
(Type::StringLiteral(lhs), Type::StringLiteral(rhs), ast::Operator::Add) => {
|
||||||
Type::StringLiteral(StringLiteralType::new(self.db, {
|
Type::StringLiteral(StringLiteralType::new(self.db, {
|
||||||
let lhs_value = lhs.value(self.db);
|
let lhs_value = lhs.value(self.db).to_string();
|
||||||
let rhs_value = rhs.value(self.db);
|
let rhs_value = rhs.value(self.db).as_ref();
|
||||||
lhs_value.clone() + rhs_value
|
(lhs_value + rhs_value).into()
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(Type::StringLiteral(s), Type::IntLiteral(n), ast::Operator::Mult)
|
||||||
|
| (Type::IntLiteral(n), Type::StringLiteral(s), ast::Operator::Mult) => {
|
||||||
|
if n < 1 {
|
||||||
|
Type::StringLiteral(StringLiteralType::new(self.db, Box::default()))
|
||||||
|
} else if let Ok(n) = usize::try_from(n) {
|
||||||
|
if n.checked_mul(s.value(self.db).len())
|
||||||
|
.is_some_and(|new_length| new_length <= Self::MAX_STRING_LITERAL_SIZE)
|
||||||
|
{
|
||||||
|
let new_literal = s.value(self.db).repeat(n);
|
||||||
|
Type::StringLiteral(StringLiteralType::new(self.db, new_literal.into()))
|
||||||
|
} else {
|
||||||
|
Type::LiteralString
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Type::LiteralString
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
_ => Type::Unknown, // TODO
|
_ => Type::Unknown, // TODO
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1951,6 +1980,7 @@ enum ModuleNameResolutionError {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
|
|
||||||
use ruff_db::files::{system_path_to_file, File};
|
use ruff_db::files::{system_path_to_file, File};
|
||||||
|
@ -1969,6 +1999,8 @@ mod tests {
|
||||||
use crate::types::{global_symbol_ty_by_name, infer_definition_types, symbol_ty_by_name, Type};
|
use crate::types::{global_symbol_ty_by_name, infer_definition_types, symbol_ty_by_name, Type};
|
||||||
use crate::{HasTy, ProgramSettings, SemanticModel};
|
use crate::{HasTy, ProgramSettings, SemanticModel};
|
||||||
|
|
||||||
|
use super::TypeInferenceBuilder;
|
||||||
|
|
||||||
fn setup_db() -> TestDb {
|
fn setup_db() -> TestDb {
|
||||||
let db = TestDb::new();
|
let db = TestDb::new();
|
||||||
|
|
||||||
|
@ -2378,6 +2410,44 @@ mod tests {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn multiplied_string() -> anyhow::Result<()> {
|
||||||
|
let mut db = setup_db();
|
||||||
|
|
||||||
|
db.write_dedented(
|
||||||
|
"src/a.py",
|
||||||
|
&format!(
|
||||||
|
r#"
|
||||||
|
w = 2 * "hello"
|
||||||
|
x = "goodbye" * 3
|
||||||
|
y = "a" * {y}
|
||||||
|
z = {z} * "b"
|
||||||
|
a = 0 * "hello"
|
||||||
|
b = -3 * "hello"
|
||||||
|
"#,
|
||||||
|
y = TypeInferenceBuilder::MAX_STRING_LITERAL_SIZE,
|
||||||
|
z = TypeInferenceBuilder::MAX_STRING_LITERAL_SIZE + 1
|
||||||
|
),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
assert_public_ty(&db, "src/a.py", "w", r#"Literal["hellohello"]"#);
|
||||||
|
assert_public_ty(&db, "src/a.py", "x", r#"Literal["goodbyegoodbyegoodbye"]"#);
|
||||||
|
assert_public_ty(
|
||||||
|
&db,
|
||||||
|
"src/a.py",
|
||||||
|
"y",
|
||||||
|
&format!(
|
||||||
|
r#"Literal["{}"]"#,
|
||||||
|
"a".repeat(TypeInferenceBuilder::MAX_STRING_LITERAL_SIZE)
|
||||||
|
),
|
||||||
|
);
|
||||||
|
assert_public_ty(&db, "src/a.py", "z", "LiteralString");
|
||||||
|
assert_public_ty(&db, "src/a.py", "a", r#"Literal[""]"#);
|
||||||
|
assert_public_ty(&db, "src/a.py", "b", r#"Literal[""]"#);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bytes_type() -> anyhow::Result<()> {
|
fn bytes_type() -> anyhow::Result<()> {
|
||||||
let mut db = setup_db();
|
let mut db = setup_db();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue