mirror of
https://github.com/astral-sh/ruff.git
synced 2025-10-01 06:11:21 +00:00
Track t-strings and f-strings for token-based rules and suppression comments (#20357)
Our token-based rules and `noqa` extraction used an `Indexer` that kept track of f-string ranges but not t-strings. We've updated the `Indexer` and downstream uses thereof to handle both f-strings and t-strings. Most of the diff is renaming and adding tests. Note that much of the "new" logic gets to be naive because the lexer has already ensured that f and t-string "starts" are paired with their respective "ends", even amidst nesting and so on. Finally: one could imagine wanting to know if a given interpolated string range corresponds to an f-string or a t-string, but I didn't find a place where we actually needed this. Closes #20310
This commit is contained in:
parent
ec863bcde7
commit
b6bd32d9dc
18 changed files with 519 additions and 89 deletions
|
@ -210,6 +210,58 @@ pub const SINGLE_QUOTE_BYTE_PREFIXES: &[&str] = &[
|
|||
"b'",
|
||||
];
|
||||
|
||||
/// Includes all permutations of `t` and `rt`. This includes all possible orders, and all possible
|
||||
/// casings, for both single and triple quotes.
|
||||
///
|
||||
/// See: <https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals>
|
||||
#[rustfmt::skip]
|
||||
pub const TRIPLE_QUOTE_TEMPLATE_PREFIXES: &[&str] = &[
|
||||
"TR\"\"\"",
|
||||
"Tr\"\"\"",
|
||||
"tR\"\"\"",
|
||||
"tr\"\"\"",
|
||||
"RT\"\"\"",
|
||||
"Rt\"\"\"",
|
||||
"rT\"\"\"",
|
||||
"rt\"\"\"",
|
||||
"TR'''",
|
||||
"Tr'''",
|
||||
"tR'''",
|
||||
"tr'''",
|
||||
"RT'''",
|
||||
"Rt'''",
|
||||
"rT'''",
|
||||
"rt'''",
|
||||
"T\"\"\"",
|
||||
"t\"\"\"",
|
||||
"T'''",
|
||||
"t'''",
|
||||
];
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub const SINGLE_QUOTE_TEMPLATE_PREFIXES: &[&str] = &[
|
||||
"TR\"",
|
||||
"Tr\"",
|
||||
"tR\"",
|
||||
"tr\"",
|
||||
"RT\"",
|
||||
"Rt\"",
|
||||
"rT\"",
|
||||
"rt\"",
|
||||
"TR'",
|
||||
"Tr'",
|
||||
"tR'",
|
||||
"tr'",
|
||||
"RT'",
|
||||
"Rt'",
|
||||
"rT'",
|
||||
"rt'",
|
||||
"T\"",
|
||||
"t\"",
|
||||
"T'",
|
||||
"t'",
|
||||
];
|
||||
|
||||
/// Strip the leading and trailing quotes from a string.
|
||||
/// Assumes that the string is a valid string literal, but does not verify that the string
|
||||
/// is a "simple" string literal (i.e., that it does not contain any implicit concatenations).
|
||||
|
@ -229,7 +281,7 @@ pub fn raw_contents_range(contents: &str) -> Option<TextRange> {
|
|||
))
|
||||
}
|
||||
|
||||
/// An [`AhoCorasick`] matcher for string and byte literal prefixes.
|
||||
/// An [`AhoCorasick`] matcher for string, template, and bytes literal prefixes.
|
||||
static PREFIX_MATCHER: LazyLock<AhoCorasick> = LazyLock::new(|| {
|
||||
AhoCorasick::builder()
|
||||
.start_kind(StartKind::Anchored)
|
||||
|
@ -239,19 +291,21 @@ static PREFIX_MATCHER: LazyLock<AhoCorasick> = LazyLock::new(|| {
|
|||
TRIPLE_QUOTE_STR_PREFIXES
|
||||
.iter()
|
||||
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
|
||||
.chain(TRIPLE_QUOTE_TEMPLATE_PREFIXES)
|
||||
.chain(SINGLE_QUOTE_STR_PREFIXES)
|
||||
.chain(SINGLE_QUOTE_BYTE_PREFIXES),
|
||||
.chain(SINGLE_QUOTE_BYTE_PREFIXES)
|
||||
.chain(SINGLE_QUOTE_TEMPLATE_PREFIXES),
|
||||
)
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
/// Return the leading quote for a string or byte literal (e.g., `"""`).
|
||||
/// Return the leading quote for a string, template, or bytes literal (e.g., `"""`).
|
||||
pub fn leading_quote(content: &str) -> Option<&str> {
|
||||
let mat = PREFIX_MATCHER.find(Input::new(content).anchored(Anchored::Yes))?;
|
||||
Some(&content[mat.start()..mat.end()])
|
||||
}
|
||||
|
||||
/// Return the trailing quote string for a string or byte literal (e.g., `"""`).
|
||||
/// Return the trailing quote string for a string, template, or bytes literal (e.g., `"""`).
|
||||
pub fn trailing_quote(content: &str) -> Option<&str> {
|
||||
if content.ends_with("'''") {
|
||||
Some("'''")
|
||||
|
@ -268,14 +322,16 @@ pub fn trailing_quote(content: &str) -> Option<&str> {
|
|||
|
||||
/// Return `true` if the string is a triple-quote string or byte prefix.
|
||||
pub fn is_triple_quote(content: &str) -> bool {
|
||||
TRIPLE_QUOTE_STR_PREFIXES.contains(&content) || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
|
||||
TRIPLE_QUOTE_STR_PREFIXES.contains(&content)
|
||||
|| TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
|
||||
|| TRIPLE_QUOTE_TEMPLATE_PREFIXES.contains(&content)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{
|
||||
SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES,
|
||||
TRIPLE_QUOTE_STR_PREFIXES,
|
||||
SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, SINGLE_QUOTE_TEMPLATE_PREFIXES,
|
||||
TRIPLE_QUOTE_BYTE_PREFIXES, TRIPLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_TEMPLATE_PREFIXES,
|
||||
};
|
||||
|
||||
#[test]
|
||||
|
@ -283,8 +339,10 @@ mod tests {
|
|||
let prefixes = TRIPLE_QUOTE_STR_PREFIXES
|
||||
.iter()
|
||||
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
|
||||
.chain(TRIPLE_QUOTE_TEMPLATE_PREFIXES)
|
||||
.chain(SINGLE_QUOTE_STR_PREFIXES)
|
||||
.chain(SINGLE_QUOTE_BYTE_PREFIXES)
|
||||
.chain(SINGLE_QUOTE_TEMPLATE_PREFIXES)
|
||||
.collect::<Vec<_>>();
|
||||
for (i, prefix_i) in prefixes.iter().enumerate() {
|
||||
for (j, prefix_j) in prefixes.iter().enumerate() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue