Add hidden --extension to override inference of source type from file extension (#8373)

## Summary

This PR addresses the incompatibility with `jupyterlab-lsp` +
`python-lsp-ruff` arising from the inference of source type from file
extension, raised in #6847.

In particular it follows the suggestion in
https://github.com/astral-sh/ruff/issues/6847#issuecomment-1765724679 to
specify a mapping from file extension to source type.

The source types are

- python
- pyi
- ipynb

Usage:

```sh
ruff check --no-cache --stdin-filename Untitled.ipynb --extension ipynb:python
```

Unlike the original suggestion, `:` instead of `=` is used to associate
file extensions to language since that is what is used with
`--per-file-ignores` which is an existing option that accepts a mapping.

## Test Plan

2 tests added to `integration_test.rs` to ensure the override works as
expected

---------

Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
This commit is contained in:
Felix Williams 2023-11-08 02:32:40 +00:00 committed by GitHub
parent 71e93a9fa4
commit 7391f74cbc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 296 additions and 34 deletions

View file

@ -8,8 +8,8 @@ use ruff_linter::line_width::LineLength;
use ruff_linter::logging::LogLevel; use ruff_linter::logging::LogLevel;
use ruff_linter::registry::Rule; use ruff_linter::registry::Rule;
use ruff_linter::settings::types::{ use ruff_linter::settings::types::{
FilePattern, PatternPrefixPair, PerFileIgnore, PreviewMode, PythonVersion, SerializationFormat, ExtensionPair, FilePattern, PatternPrefixPair, PerFileIgnore, PreviewMode, PythonVersion,
UnsafeFixes, SerializationFormat, UnsafeFixes,
}; };
use ruff_linter::{RuleParser, RuleSelector, RuleSelectorParser}; use ruff_linter::{RuleParser, RuleSelector, RuleSelectorParser};
use ruff_workspace::configuration::{Configuration, RuleSelection}; use ruff_workspace::configuration::{Configuration, RuleSelection};
@ -351,6 +351,9 @@ pub struct CheckCommand {
conflicts_with = "watch", conflicts_with = "watch",
)] )]
pub show_settings: bool, pub show_settings: bool,
/// List of mappings from file extension to language (one of ["python", "ipynb", "pyi"]).
#[arg(long, value_delimiter = ',', hide = true)]
pub extension: Option<Vec<ExtensionPair>>,
/// Dev-only argument to show fixes /// Dev-only argument to show fixes
#[arg(long, hide = true)] #[arg(long, hide = true)]
pub ecosystem_ci: bool, pub ecosystem_ci: bool,
@ -535,6 +538,7 @@ impl CheckCommand {
force_exclude: resolve_bool_arg(self.force_exclude, self.no_force_exclude), force_exclude: resolve_bool_arg(self.force_exclude, self.no_force_exclude),
output_format: self.output_format, output_format: self.output_format,
show_fixes: resolve_bool_arg(self.show_fixes, self.no_show_fixes), show_fixes: resolve_bool_arg(self.show_fixes, self.no_show_fixes),
extension: self.extension,
}, },
) )
} }
@ -647,6 +651,7 @@ pub struct CliOverrides {
pub force_exclude: Option<bool>, pub force_exclude: Option<bool>,
pub output_format: Option<SerializationFormat>, pub output_format: Option<SerializationFormat>,
pub show_fixes: Option<bool>, pub show_fixes: Option<bool>,
pub extension: Option<Vec<ExtensionPair>>,
} }
impl ConfigurationTransformer for CliOverrides { impl ConfigurationTransformer for CliOverrides {
@ -731,6 +736,9 @@ impl ConfigurationTransformer for CliOverrides {
if let Some(target_version) = &self.target_version { if let Some(target_version) = &self.target_version {
config.target_version = Some(*target_version); config.target_version = Some(*target_version);
} }
if let Some(extension) = &self.extension {
config.lint.extension = Some(extension.clone().into_iter().collect());
}
config config
} }

View file

@ -30,6 +30,7 @@ use crate::diagnostics::Diagnostics;
use crate::panic::catch_unwind; use crate::panic::catch_unwind;
/// Run the linter over a collection of files. /// Run the linter over a collection of files.
#[allow(clippy::too_many_arguments)]
pub(crate) fn check( pub(crate) fn check(
files: &[PathBuf], files: &[PathBuf],
pyproject_config: &PyprojectConfig, pyproject_config: &PyprojectConfig,
@ -184,6 +185,7 @@ pub(crate) fn check(
/// Wraps [`lint_path`](crate::diagnostics::lint_path) in a [`catch_unwind`](std::panic::catch_unwind) and emits /// Wraps [`lint_path`](crate::diagnostics::lint_path) in a [`catch_unwind`](std::panic::catch_unwind) and emits
/// a diagnostic if the linting the file panics. /// a diagnostic if the linting the file panics.
#[allow(clippy::too_many_arguments)]
fn lint_path( fn lint_path(
path: &Path, path: &Path,
package: Option<&Path>, package: Option<&Path>,

View file

@ -17,13 +17,13 @@ use ruff_linter::logging::DisplayParseError;
use ruff_linter::message::Message; use ruff_linter::message::Message;
use ruff_linter::pyproject_toml::lint_pyproject_toml; use ruff_linter::pyproject_toml::lint_pyproject_toml;
use ruff_linter::registry::AsRule; use ruff_linter::registry::AsRule;
use ruff_linter::settings::types::UnsafeFixes; use ruff_linter::settings::types::{ExtensionMapping, UnsafeFixes};
use ruff_linter::settings::{flags, LinterSettings}; use ruff_linter::settings::{flags, LinterSettings};
use ruff_linter::source_kind::{SourceError, SourceKind}; use ruff_linter::source_kind::{SourceError, SourceKind};
use ruff_linter::{fs, IOError, SyntaxError}; use ruff_linter::{fs, IOError, SyntaxError};
use ruff_notebook::{Notebook, NotebookError, NotebookIndex}; use ruff_notebook::{Notebook, NotebookError, NotebookIndex};
use ruff_python_ast::imports::ImportMap; use ruff_python_ast::imports::ImportMap;
use ruff_python_ast::{SourceType, TomlSourceType}; use ruff_python_ast::{PySourceType, SourceType, TomlSourceType};
use ruff_source_file::{LineIndex, SourceCode, SourceFileBuilder}; use ruff_source_file::{LineIndex, SourceCode, SourceFileBuilder};
use ruff_text_size::{TextRange, TextSize}; use ruff_text_size::{TextRange, TextSize};
use ruff_workspace::Settings; use ruff_workspace::Settings;
@ -177,6 +177,11 @@ impl AddAssign for FixMap {
} }
} }
fn override_source_type(path: Option<&Path>, extension: &ExtensionMapping) -> Option<PySourceType> {
let ext = path?.extension()?.to_str()?;
extension.get(ext).map(PySourceType::from)
}
/// Lint the source code at the given `Path`. /// Lint the source code at the given `Path`.
pub(crate) fn lint_path( pub(crate) fn lint_path(
path: &Path, path: &Path,
@ -221,31 +226,35 @@ pub(crate) fn lint_path(
debug!("Checking: {}", path.display()); debug!("Checking: {}", path.display());
let source_type = match SourceType::from(path) { let source_type = match override_source_type(Some(path), &settings.extension) {
SourceType::Toml(TomlSourceType::Pyproject) => { Some(source_type) => source_type,
let messages = if settings None => match SourceType::from(path) {
.rules SourceType::Toml(TomlSourceType::Pyproject) => {
.iter_enabled() let messages = if settings
.any(|rule_code| rule_code.lint_source().is_pyproject_toml()) .rules
{ .iter_enabled()
let contents = match std::fs::read_to_string(path).map_err(SourceError::from) { .any(|rule_code| rule_code.lint_source().is_pyproject_toml())
Ok(contents) => contents, {
Err(err) => { let contents = match std::fs::read_to_string(path).map_err(SourceError::from) {
return Ok(Diagnostics::from_source_error(&err, Some(path), settings)); Ok(contents) => contents,
} Err(err) => {
return Ok(Diagnostics::from_source_error(&err, Some(path), settings));
}
};
let source_file =
SourceFileBuilder::new(path.to_string_lossy(), contents).finish();
lint_pyproject_toml(source_file, settings)
} else {
vec![]
}; };
let source_file = SourceFileBuilder::new(path.to_string_lossy(), contents).finish(); return Ok(Diagnostics {
lint_pyproject_toml(source_file, settings) messages,
} else { ..Diagnostics::default()
vec![] });
}; }
return Ok(Diagnostics { SourceType::Toml(_) => return Ok(Diagnostics::default()),
messages, SourceType::Python(source_type) => source_type,
..Diagnostics::default() },
});
}
SourceType::Toml(_) => return Ok(Diagnostics::default()),
SourceType::Python(source_type) => source_type,
}; };
// Extract the sources from the file. // Extract the sources from the file.
@ -370,8 +379,15 @@ pub(crate) fn lint_stdin(
fix_mode: flags::FixMode, fix_mode: flags::FixMode,
) -> Result<Diagnostics> { ) -> Result<Diagnostics> {
// TODO(charlie): Support `pyproject.toml`. // TODO(charlie): Support `pyproject.toml`.
let SourceType::Python(source_type) = path.map(SourceType::from).unwrap_or_default() else { let source_type = if let Some(source_type) =
return Ok(Diagnostics::default()); override_source_type(path, &settings.linter.extension)
{
source_type
} else {
let SourceType::Python(source_type) = path.map(SourceType::from).unwrap_or_default() else {
return Ok(Diagnostics::default());
};
source_type
}; };
// Extract the sources from the file. // Extract the sources from the file.

View file

@ -320,6 +320,119 @@ fn stdin_fix_jupyter() {
Found 2 errors (2 fixed, 0 remaining). Found 2 errors (2 fixed, 0 remaining).
"###); "###);
} }
#[test]
fn stdin_override_parser_ipynb() {
let args = ["--extension", "py:ipynb", "--stdin-filename", "Jupyter.py"];
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.args(STDIN_BASE_OPTIONS)
.args(args)
.pass_stdin(r#"{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "dccc687c-96e2-4604-b957-a8a89b5bec06",
"metadata": {},
"outputs": [],
"source": [
"import os"
]
},
{
"cell_type": "markdown",
"id": "19e1b029-f516-4662-a9b9-623b93edac1a",
"metadata": {},
"source": [
"Foo"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "cdce7b92-b0fb-4c02-86f6-e233b26fa84f",
"metadata": {},
"outputs": [],
"source": [
"import sys"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e40b33d2-7fe4-46c5-bdf0-8802f3052565",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n"
]
}
],
"source": [
"print(1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a1899bc8-d46f-4ec0-b1d1-e1ca0f04bf60",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}"#), @r###"
success: false
exit_code: 1
----- stdout -----
Jupyter.py:cell 1:1:8: F401 [*] `os` imported but unused
Jupyter.py:cell 3:1:8: F401 [*] `sys` imported but unused
Found 2 errors.
[*] 2 fixable with the `--fix` option.
----- stderr -----
"###);
}
#[test]
fn stdin_override_parser_py() {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.args(STDIN_BASE_OPTIONS)
.args(["--extension", "ipynb:python", "--stdin-filename", "F401.ipynb"])
.pass_stdin("import os\n"), @r###"
success: false
exit_code: 1
----- stdout -----
F401.ipynb:1:8: F401 [*] `os` imported but unused
Found 1 error.
[*] 1 fixable with the `--fix` option.
----- stderr -----
"###);
}
#[test] #[test]
fn stdin_fix_when_not_fixable_should_still_print_contents() { fn stdin_fix_when_not_fixable_should_still_print_contents() {

View file

@ -23,7 +23,7 @@ use crate::rules::{
flake8_tidy_imports, flake8_type_checking, flake8_unused_arguments, isort, mccabe, pep8_naming, flake8_tidy_imports, flake8_type_checking, flake8_unused_arguments, isort, mccabe, pep8_naming,
pycodestyle, pydocstyle, pyflakes, pylint, pyupgrade, pycodestyle, pydocstyle, pyflakes, pylint, pyupgrade,
}; };
use crate::settings::types::{FilePatternSet, PerFileIgnore, PythonVersion}; use crate::settings::types::{ExtensionMapping, FilePatternSet, PerFileIgnore, PythonVersion};
use crate::{codes, RuleSelector}; use crate::{codes, RuleSelector};
use super::line_width::IndentWidth; use super::line_width::IndentWidth;
@ -50,6 +50,7 @@ pub struct LinterSettings {
pub target_version: PythonVersion, pub target_version: PythonVersion,
pub preview: PreviewMode, pub preview: PreviewMode,
pub explicit_preview_rules: bool, pub explicit_preview_rules: bool,
pub extension: ExtensionMapping,
// Rule-specific settings // Rule-specific settings
pub allowed_confusables: FxHashSet<char>, pub allowed_confusables: FxHashSet<char>,
@ -187,6 +188,7 @@ impl LinterSettings {
pyupgrade: pyupgrade::settings::Settings::default(), pyupgrade: pyupgrade::settings::Settings::default(),
preview: PreviewMode::default(), preview: PreviewMode::default(),
explicit_preview_rules: false, explicit_preview_rules: false,
extension: ExtensionMapping::default(),
} }
} }

View file

@ -7,13 +7,15 @@ use std::string::ToString;
use anyhow::{bail, Result}; use anyhow::{bail, Result};
use globset::{Glob, GlobSet, GlobSetBuilder}; use globset::{Glob, GlobSet, GlobSetBuilder};
use pep440_rs::{Version as Pep440Version, VersionSpecifiers}; use pep440_rs::{Version as Pep440Version, VersionSpecifiers};
use ruff_diagnostics::Applicability; use rustc_hash::FxHashMap;
use serde::{de, Deserialize, Deserializer, Serialize}; use serde::{de, Deserialize, Deserializer, Serialize};
use strum::IntoEnumIterator; use strum::IntoEnumIterator;
use strum_macros::EnumIter; use strum_macros::EnumIter;
use ruff_cache::{CacheKey, CacheKeyHasher}; use ruff_cache::{CacheKey, CacheKeyHasher};
use ruff_diagnostics::Applicability;
use ruff_macros::CacheKey; use ruff_macros::CacheKey;
use ruff_python_ast::PySourceType;
use crate::fs; use crate::fs;
use crate::registry::RuleSet; use crate::registry::RuleSet;
@ -289,6 +291,119 @@ impl FromStr for PatternPrefixPair {
} }
} }
#[derive(
Clone,
Copy,
Debug,
PartialOrd,
Ord,
PartialEq,
Eq,
Default,
Serialize,
Deserialize,
CacheKey,
EnumIter,
)]
#[serde(rename_all = "lowercase")]
#[cfg_attr(feature = "clap", derive(clap::ValueEnum))]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub enum Language {
#[default]
Python,
Pyi,
Ipynb,
}
impl FromStr for Language {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_ascii_lowercase().as_str() {
"python" => Ok(Self::Python),
"pyi" => Ok(Self::Pyi),
"ipynb" => Ok(Self::Ipynb),
_ => {
bail!("Unrecognized language: `{s}`. Expected one of `python`, `pyi`, or `ipynb`.")
}
}
}
}
impl From<Language> for PySourceType {
fn from(value: Language) -> Self {
match value {
Language::Python => Self::Python,
Language::Ipynb => Self::Ipynb,
Language::Pyi => Self::Stub,
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ExtensionPair {
pub extension: String,
pub language: Language,
}
impl ExtensionPair {
const EXPECTED_PATTERN: &'static str = "<Extension>:<LanguageCode> pattern";
}
impl FromStr for ExtensionPair {
type Err = anyhow::Error;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
let (extension_str, language_str) = {
let tokens = s.split(':').collect::<Vec<_>>();
if tokens.len() != 2 {
bail!("Expected {}", Self::EXPECTED_PATTERN);
}
(tokens[0].trim(), tokens[1].trim())
};
let extension = extension_str.into();
let language = Language::from_str(language_str)?;
Ok(Self {
extension,
language,
})
}
}
impl From<ExtensionPair> for (String, Language) {
fn from(value: ExtensionPair) -> Self {
(value.extension, value.language)
}
}
#[derive(Debug, Clone, Default, CacheKey)]
pub struct ExtensionMapping {
mapping: FxHashMap<String, Language>,
}
impl ExtensionMapping {
/// Return the [`Language`] for the given extension.
pub fn get(&self, extension: &str) -> Option<Language> {
self.mapping.get(extension).copied()
}
}
impl From<FxHashMap<String, Language>> for ExtensionMapping {
fn from(value: FxHashMap<String, Language>) -> Self {
Self { mapping: value }
}
}
impl FromIterator<ExtensionPair> for ExtensionMapping {
fn from_iter<T: IntoIterator<Item = ExtensionPair>>(iter: T) -> Self {
Self {
mapping: iter
.into_iter()
.map(|pair| (pair.extension, pair.language))
.collect(),
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Debug, Hash)] #[derive(Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Debug, Hash)]
#[cfg_attr(feature = "clap", derive(clap::ValueEnum))] #[cfg_attr(feature = "clap", derive(clap::ValueEnum))]
#[serde(rename_all = "kebab-case")] #[serde(rename_all = "kebab-case")]

View file

@ -25,8 +25,8 @@ use ruff_linter::rule_selector::{PreviewOptions, Specificity};
use ruff_linter::rules::pycodestyle; use ruff_linter::rules::pycodestyle;
use ruff_linter::settings::rule_table::RuleTable; use ruff_linter::settings::rule_table::RuleTable;
use ruff_linter::settings::types::{ use ruff_linter::settings::types::{
FilePattern, FilePatternSet, PerFileIgnore, PreviewMode, PythonVersion, SerializationFormat, ExtensionMapping, FilePattern, FilePatternSet, PerFileIgnore, PreviewMode, PythonVersion,
UnsafeFixes, Version, SerializationFormat, UnsafeFixes, Version,
}; };
use ruff_linter::settings::{ use ruff_linter::settings::{
resolve_per_file_ignores, LinterSettings, DEFAULT_SELECTORS, DUMMY_VARIABLE_RGX, TASK_TAGS, resolve_per_file_ignores, LinterSettings, DEFAULT_SELECTORS, DUMMY_VARIABLE_RGX, TASK_TAGS,
@ -216,6 +216,7 @@ impl Configuration {
linter: LinterSettings { linter: LinterSettings {
rules: lint.as_rule_table(lint_preview), rules: lint.as_rule_table(lint_preview),
exclude: FilePatternSet::try_from_iter(lint.exclude.unwrap_or_default())?, exclude: FilePatternSet::try_from_iter(lint.exclude.unwrap_or_default())?,
extension: lint.extension.unwrap_or_default(),
preview: lint_preview, preview: lint_preview,
target_version, target_version,
project_root: project_root.to_path_buf(), project_root: project_root.to_path_buf(),
@ -523,6 +524,7 @@ impl Configuration {
pub struct LintConfiguration { pub struct LintConfiguration {
pub exclude: Option<Vec<FilePattern>>, pub exclude: Option<Vec<FilePattern>>,
pub preview: Option<PreviewMode>, pub preview: Option<PreviewMode>,
pub extension: Option<ExtensionMapping>,
// Rule selection // Rule selection
pub extend_per_file_ignores: Vec<PerFileIgnore>, pub extend_per_file_ignores: Vec<PerFileIgnore>,
@ -589,6 +591,9 @@ impl LintConfiguration {
.chain(options.common.extend_unfixable.into_iter().flatten()) .chain(options.common.extend_unfixable.into_iter().flatten())
.collect(); .collect();
Ok(LintConfiguration { Ok(LintConfiguration {
// `--extension` is a hidden command-line argument that isn't supported in configuration
// files at present.
extension: None,
exclude: options.exclude.map(|paths| { exclude: options.exclude.map(|paths| {
paths paths
.into_iter() .into_iter()
@ -905,6 +910,7 @@ impl LintConfiguration {
Self { Self {
exclude: self.exclude.or(config.exclude), exclude: self.exclude.or(config.exclude),
preview: self.preview.or(config.preview), preview: self.preview.or(config.preview),
extension: self.extension.or(config.extension),
rule_selections: config rule_selections: config
.rule_selections .rule_selections
.into_iter() .into_iter()