Use Jupyter mode while parsing Notebook files (#5552)

## Summary

Enable using the new `Mode::Jupyter` for the tokenizer/parser to parse
Jupyter line magic tokens.

The individual call to the lexer i.e., `lex_starts_at` done by various
rules should consider the context of the source code (is this content
from a Jupyter Notebook?). Thus, a new field `source_type` (of type
`PySourceType`) is added to `Checker` which is being passed around as an
argument to the relevant functions. This is then used to determine the
`Mode` for the lexer.

## Test Plan

Add new test cases to make sure that the magic statement is considered
while generating the diagnostic and autofix:
* For `I001`, if there's a magic statement in between two import blocks,
they should be sorted independently

fixes: #6090
This commit is contained in:
Dhruv Manilawala 2023-08-05 06:02:07 +05:30 committed by GitHub
parent d788957ec4
commit 32fa05765a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
52 changed files with 652 additions and 196 deletions

View file

@ -114,7 +114,7 @@ pub use parser::{
parse, parse_expression, parse_expression_starts_at, parse_program, parse_starts_at,
parse_suite, parse_tokens, ParseError, ParseErrorType,
};
use ruff_python_ast::{CmpOp, Expr, Mod, Ranged, Suite};
use ruff_python_ast::{CmpOp, Expr, Mod, PySourceType, Ranged, Suite};
use ruff_text_size::{TextRange, TextSize};
pub use string::FStringErrorType;
pub use token::{StringKind, Tok, TokenKind};
@ -130,9 +130,9 @@ mod token;
pub mod typing;
/// Collect tokens up to and including the first error.
pub fn tokenize(contents: &str) -> Vec<LexResult> {
pub fn tokenize(contents: &str, mode: Mode) -> Vec<LexResult> {
let mut tokens: Vec<LexResult> = vec![];
for tok in lexer::lex(contents, Mode::Module) {
for tok in lexer::lex(contents, mode) {
let is_err = tok.is_err();
tokens.push(tok);
if is_err {
@ -146,17 +146,32 @@ pub fn tokenize(contents: &str) -> Vec<LexResult> {
pub fn parse_program_tokens(
lxr: Vec<LexResult>,
source_path: &str,
is_jupyter_notebook: bool,
) -> anyhow::Result<Suite, ParseError> {
match parse_tokens(lxr, Mode::Module, source_path)? {
let mode = if is_jupyter_notebook {
Mode::Jupyter
} else {
Mode::Module
};
match parse_tokens(lxr, mode, source_path)? {
Mod::Module(m) => Ok(m.body),
Mod::Expression(_) => unreachable!("Mode::Module doesn't return other variant"),
}
}
/// Return the `Range` of the first `Tok::Colon` token in a `Range`.
pub fn first_colon_range(range: TextRange, source: &str) -> Option<TextRange> {
pub fn first_colon_range(
range: TextRange,
source: &str,
is_jupyter_notebook: bool,
) -> Option<TextRange> {
let contents = &source[range];
let range = lexer::lex_starts_at(contents, Mode::Module, range.start())
let mode = if is_jupyter_notebook {
Mode::Jupyter
} else {
Mode::Module
};
let range = lexer::lex_starts_at(contents, mode, range.start())
.flatten()
.find(|(tok, _)| tok.is_colon())
.map(|(_, range)| range);
@ -308,6 +323,19 @@ impl std::str::FromStr for Mode {
}
}
pub trait AsMode {
fn as_mode(&self) -> Mode;
}
impl AsMode for PySourceType {
fn as_mode(&self) -> Mode {
match self {
PySourceType::Python | PySourceType::Stub => Mode::Module,
PySourceType::Jupyter => Mode::Jupyter,
}
}
}
/// Returned when a given mode is not valid.
#[derive(Debug)]
pub struct ModeParseError;
@ -357,6 +385,7 @@ mod tests {
let range = first_colon_range(
TextRange::new(TextSize::from(0), contents.text_len()),
contents,
false,
)
.unwrap();
assert_eq!(&contents[range], ":");