mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 02:38:25 +00:00
Avoid panics for implicitly-concatenated docstrings (#3584)
## Summary In the rare event that a docstring contains an implicit string concatenation, we currently have the potential to panic, because we assume that if a string starts with triple quotes, it _ends_ with triple quotes. But with implicit concatenation, that's not the case: a single `Expr` could start and end with different quote styles, because it can contain multiple string tokens. Supporting these "properly" is pretty hard. In some cases it's hard to even know what the "right" behavior is. So for now, I'm just detecting and warning, which is better than a panic. Closes #3543. Closes #3585.
This commit is contained in:
parent
a5494b8541
commit
4892167217
7 changed files with 135 additions and 25 deletions
|
@ -67,15 +67,71 @@ pub fn is_triple_quote(content: &str) -> bool {
|
|||
TRIPLE_QUOTE_STR_PREFIXES.contains(&content) || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
|
||||
}
|
||||
|
||||
/// Return `true` if the string expression is an implicit concatenation.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// use ruff_python_ast::str::is_implicit_concatenation;
|
||||
///
|
||||
/// assert!(is_implicit_concatenation(r#"'abc' 'def'"#));
|
||||
/// assert!(!is_implicit_concatenation(r#"'abcdef'"#));
|
||||
/// ```
|
||||
pub fn is_implicit_concatenation(content: &str) -> bool {
|
||||
let Some(leading_quote_str) = leading_quote(content) else {
|
||||
return false;
|
||||
};
|
||||
let Some(trailing_quote_str) = trailing_quote(content) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
// If the trailing quote doesn't match the _expected_ trailing quote, then the string is
|
||||
// implicitly concatenated.
|
||||
//
|
||||
// For example, given:
|
||||
// ```python
|
||||
// u"""abc""" 'def'
|
||||
// ```
|
||||
//
|
||||
// The leading quote would be `u"""`, and the trailing quote would be `'`, but the _expected_
|
||||
// trailing quote would be `"""`. Since `'` does not equal `"""`, we'd return `true`.
|
||||
if trailing_quote_str != trailing_quote(leading_quote_str).unwrap() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Search for any trailing quotes _before_ the end of the string.
|
||||
let mut rest = &content[leading_quote_str.len()..content.len() - trailing_quote_str.len()];
|
||||
while let Some(index) = rest.find(trailing_quote_str) {
|
||||
let mut chars = rest[..index].chars().rev();
|
||||
if let Some('\\') = chars.next() {
|
||||
// If the quote is double-escaped, then it's _not_ escaped, so the string is
|
||||
// implicitly concatenated.
|
||||
if let Some('\\') = chars.next() {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
// If the quote is _not_ escaped, then it's implicitly concatenated.
|
||||
return true;
|
||||
}
|
||||
rest = &rest[index + trailing_quote_str.len()..];
|
||||
}
|
||||
|
||||
// Otherwise, we know the string ends with the expected trailing quote, so it's not implicitly
|
||||
// concatenated.
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::str::is_implicit_concatenation;
|
||||
|
||||
use super::{
|
||||
SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES,
|
||||
TRIPLE_QUOTE_STR_PREFIXES,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_prefixes() {
|
||||
fn prefix_uniqueness() {
|
||||
let prefixes = TRIPLE_QUOTE_STR_PREFIXES
|
||||
.iter()
|
||||
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
|
||||
|
@ -93,4 +149,38 @@ mod tests {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn implicit_concatenation() {
|
||||
// Positive cases.
|
||||
assert!(is_implicit_concatenation(r#""abc" "def""#));
|
||||
assert!(is_implicit_concatenation(r#""abc" 'def'"#));
|
||||
assert!(is_implicit_concatenation(r#""""abc""" "def""#));
|
||||
assert!(is_implicit_concatenation(r#"'''abc''' 'def'"#));
|
||||
assert!(is_implicit_concatenation(r#""""abc""" 'def'"#));
|
||||
assert!(is_implicit_concatenation(r#"'''abc''' "def""#));
|
||||
assert!(is_implicit_concatenation(r#""""abc""""def""#));
|
||||
assert!(is_implicit_concatenation(r#"'''abc''''def'"#));
|
||||
assert!(is_implicit_concatenation(r#""""abc"""'def'"#));
|
||||
assert!(is_implicit_concatenation(r#"'''abc'''"def""#));
|
||||
|
||||
// Negative cases.
|
||||
assert!(!is_implicit_concatenation(r#""abc""#));
|
||||
assert!(!is_implicit_concatenation(r#"'abc'"#));
|
||||
assert!(!is_implicit_concatenation(r#""""abc""""#));
|
||||
assert!(!is_implicit_concatenation(r#"'''abc'''"#));
|
||||
assert!(!is_implicit_concatenation(r#""""ab"c""""#));
|
||||
assert!(!is_implicit_concatenation(r#"'''ab'c'''"#));
|
||||
assert!(!is_implicit_concatenation(r#""""ab'c""""#));
|
||||
assert!(!is_implicit_concatenation(r#"'''ab"c'''"#));
|
||||
assert!(!is_implicit_concatenation(r#""""ab'''c""""#));
|
||||
assert!(!is_implicit_concatenation(r#"'''ab"""c'''"#));
|
||||
|
||||
// Positive cases with escaped quotes.
|
||||
assert!(is_implicit_concatenation(r#""abc\\""def""#));
|
||||
assert!(is_implicit_concatenation(r#""abc\\""def""#));
|
||||
|
||||
// Negative cases with escaped quotes.
|
||||
assert!(!is_implicit_concatenation(r#""abc\"def""#));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue