red-knot: infer string literal types (#13113)

## Summary

Introduce a `StringLiteralType` with corresponding `Display` type and a
relatively basic test that the resulting representation is as expected.

Note: we currently always allocate for `StringLiteral` types. This may
end up being a perf issue later, at which point we may want to look at
other ways of representing `value` here, i.e. with some kind of smarter
string structure which can reuse types. That is most likely to show up
with e.g. concatenation.

Contributes to #12701.

## Test Plan

Added a test for individual strings with both single and double quotes
as well as concatenated strings with both forms.
This commit is contained in:
Chris Krycho 2024-08-26 12:42:34 -06:00 committed by GitHub
parent ab3648c4c5
commit c4d628cc4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 73 additions and 4 deletions

View file

@ -181,6 +181,8 @@ pub enum Type<'db> {
IntLiteral(i64), IntLiteral(i64),
/// A boolean literal, either `True` or `False`. /// A boolean literal, either `True` or `False`.
BooleanLiteral(bool), BooleanLiteral(bool),
/// A string literal
StringLiteral(StringLiteralType<'db>),
/// A bytes literal /// A bytes literal
BytesLiteral(BytesLiteralType<'db>), BytesLiteral(BytesLiteralType<'db>),
// TODO protocols, callable types, overloads, generics, type vars // TODO protocols, callable types, overloads, generics, type vars
@ -278,6 +280,10 @@ impl<'db> Type<'db> {
Type::Unknown Type::Unknown
} }
Type::BooleanLiteral(_) => Type::Unknown, Type::BooleanLiteral(_) => Type::Unknown,
Type::StringLiteral(_) => {
// TODO defer to Type::Instance(<str from typeshed>).member
Type::Unknown
}
Type::BytesLiteral(_) => { Type::BytesLiteral(_) => {
// TODO defer to Type::Instance(<bytes from typeshed>).member // TODO defer to Type::Instance(<bytes from typeshed>).member
Type::Unknown Type::Unknown
@ -378,6 +384,12 @@ pub struct IntersectionType<'db> {
negative: FxOrderSet<Type<'db>>, negative: FxOrderSet<Type<'db>>,
} }
#[salsa::interned]
pub struct StringLiteralType<'db> {
#[return_ref]
value: String,
}
#[salsa::interned] #[salsa::interned]
pub struct BytesLiteralType<'db> { pub struct BytesLiteralType<'db> {
#[return_ref] #[return_ref]

View file

@ -41,6 +41,11 @@ impl Display for DisplayType<'_> {
Type::BooleanLiteral(boolean) => { Type::BooleanLiteral(boolean) => {
write!(f, "Literal[{}]", if *boolean { "True" } else { "False" }) write!(f, "Literal[{}]", if *boolean { "True" } else { "False" })
} }
Type::StringLiteral(string) => write!(
f,
r#"Literal["{}"]"#,
string.value(self.db).replace('"', r#"\""#)
),
Type::BytesLiteral(bytes) => { Type::BytesLiteral(bytes) => {
let escape = let escape =
AsciiEscape::with_preferred_quote(bytes.value(self.db).as_ref(), Quote::Double); AsciiEscape::with_preferred_quote(bytes.value(self.db).as_ref(), Quote::Double);

View file

@ -44,7 +44,7 @@ use crate::semantic_index::SemanticIndex;
use crate::types::diagnostic::{TypeCheckDiagnostic, TypeCheckDiagnostics}; use crate::types::diagnostic::{TypeCheckDiagnostic, TypeCheckDiagnostics};
use crate::types::{ use crate::types::{
builtins_symbol_ty_by_name, definitions_ty, global_symbol_ty_by_name, BytesLiteralType, builtins_symbol_ty_by_name, definitions_ty, global_symbol_ty_by_name, BytesLiteralType,
ClassType, FunctionType, Name, Type, UnionBuilder, ClassType, FunctionType, Name, StringLiteralType, Type, UnionBuilder,
}; };
use crate::Db; use crate::Db;
@ -1243,9 +1243,8 @@ impl<'db> TypeInferenceBuilder<'db> {
} }
#[allow(clippy::unused_self)] #[allow(clippy::unused_self)]
fn infer_string_literal_expression(&mut self, _literal: &ast::ExprStringLiteral) -> Type<'db> { fn infer_string_literal_expression(&mut self, literal: &ast::ExprStringLiteral) -> Type<'db> {
// TODO Literal["..."] or str Type::StringLiteral(StringLiteralType::new(self.db, literal.value.to_string()))
Type::Unknown
} }
#[allow(clippy::unused_self)] #[allow(clippy::unused_self)]
@ -1785,6 +1784,17 @@ impl<'db> TypeInferenceBuilder<'db> {
_ => Type::Unknown, // TODO _ => Type::Unknown, // TODO
} }
} }
Type::StringLiteral(lhs) => match right_ty {
Type::StringLiteral(rhs) => match op {
ast::Operator::Add => Type::StringLiteral(StringLiteralType::new(self.db, {
let lhs_value = lhs.value(self.db);
let rhs_value = rhs.value(self.db);
lhs_value.clone() + rhs_value
})),
_ => Type::Unknown, // TODO
},
_ => Type::Unknown, // TODO
},
_ => Type::Unknown, // TODO _ => Type::Unknown, // TODO
} }
} }
@ -2298,6 +2308,48 @@ mod tests {
Ok(()) Ok(())
} }
#[test]
fn string_type() -> anyhow::Result<()> {
let mut db = setup_db();
db.write_dedented(
"src/a.py",
r#"
w = "Hello"
x = 'world'
y = "Guten " + 'tag'
z = 'bon ' + "jour"
"#,
)?;
assert_public_ty(&db, "src/a.py", "w", r#"Literal["Hello"]"#);
assert_public_ty(&db, "src/a.py", "x", r#"Literal["world"]"#);
assert_public_ty(&db, "src/a.py", "y", r#"Literal["Guten tag"]"#);
assert_public_ty(&db, "src/a.py", "z", r#"Literal["bon jour"]"#);
Ok(())
}
#[test]
fn string_type_with_nested_quotes() -> anyhow::Result<()> {
let mut db = setup_db();
db.write_dedented(
"src/a.py",
r#"
x = 'I say "hello" to you'
y = "You say \"hey\" back"
z = 'No "closure here'
"#,
)?;
assert_public_ty(&db, "src/a.py", "x", r#"Literal["I say \"hello\" to you"]"#);
assert_public_ty(&db, "src/a.py", "y", r#"Literal["You say \"hey\" back"]"#);
assert_public_ty(&db, "src/a.py", "z", r#"Literal["No \"closure here"]"#);
Ok(())
}
#[test] #[test]
fn bytes_type() -> anyhow::Result<()> { fn bytes_type() -> anyhow::Result<()> {
let mut db = setup_db(); let mut db = setup_db();