Remove escaped mac/windows eol from AST string value (#7724)

## Summary

This PR fixes the bug where the value of a string node type includes the
escaped mac/windows newline character.

Note that the token value still includes them, it's only removed when
parsing the string content.

## Test Plan

Add new test cases for the string node type to check that the escapes
aren't being included in the string value.

fixes: #7723
This commit is contained in:
Dhruv Manilawala 2023-10-01 07:37:59 +05:30 committed by GitHub
parent 488ec54d21
commit e72d617f4b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 103 additions and 1 deletions

View file

@ -0,0 +1,23 @@
---
source: crates/ruff_python_parser/src/string.rs
expression: parse_ast
---
[
Expr(
StmtExpr {
range: 0..18,
value: Constant(
ExprConstant {
range: 0..18,
value: Str(
StringConstant {
value: "text more text",
unicode: false,
implicit_concatenated: false,
},
),
},
),
},
),
]

View file

@ -0,0 +1,23 @@
---
source: crates/ruff_python_parser/src/string.rs
expression: parse_ast
---
[
Expr(
StmtExpr {
range: 0..18,
value: Constant(
ExprConstant {
range: 0..18,
value: Str(
StringConstant {
value: "text more text",
unicode: false,
implicit_concatenated: false,
},
),
},
),
},
),
]

View file

@ -0,0 +1,23 @@
---
source: crates/ruff_python_parser/src/string.rs
expression: parse_ast
---
[
Expr(
StmtExpr {
range: 0..19,
value: Constant(
ExprConstant {
range: 0..19,
value: Str(
StringConstant {
value: "text more text",
unicode: false,
implicit_concatenated: false,
},
),
},
),
},
),
]

View file

@ -178,6 +178,12 @@ impl<'a> StringParser<'a> {
'N' if !self.kind.is_any_bytes() => self.parse_unicode_name()?,
// Special cases where the escape sequence is not a single character
'\n' => return Ok(String::new()),
'\r' => {
if self.peek() == Some('\n') {
self.next_char();
}
return Ok(String::new());
}
c => {
if self.kind.is_any_bytes() && !c.is_ascii() {
return Err(LexicalError {
@ -558,10 +564,37 @@ impl From<FStringError> for crate::parser::LalrpopError<TextSize, Tok, LexicalEr
mod tests {
use crate::lexer::LexicalErrorType;
use crate::parser::parse_suite;
use crate::ParseErrorType;
use crate::{ParseErrorType, Suite};
use super::*;
const WINDOWS_EOL: &str = "\r\n";
const MAC_EOL: &str = "\r";
const UNIX_EOL: &str = "\n";
fn string_parser_escaped_eol(eol: &str) -> Suite {
let source = format!(r"'text \{eol}more text'");
parse_suite(&source, "<test>").unwrap()
}
#[test]
fn test_string_parser_escaped_unix_eol() {
let parse_ast = string_parser_escaped_eol(UNIX_EOL);
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_string_parser_escaped_mac_eol() {
let parse_ast = string_parser_escaped_eol(MAC_EOL);
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_string_parser_escaped_windows_eol() {
let parse_ast = string_parser_escaped_eol(WINDOWS_EOL);
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring() {
let source = r#"f"{a}{ b }{{foo}}""#;