[ruff] Adds an allowlist for unsafe-markup-use (RUF035) (#15076)
Some checks are pending
CI / Determine changes (push) Waiting to run
CI / cargo fmt (push) Waiting to run
CI / cargo clippy (push) Blocked by required conditions
CI / cargo test (linux) (push) Blocked by required conditions
CI / cargo test (linux, release) (push) Blocked by required conditions
CI / cargo test (windows) (push) Blocked by required conditions
CI / cargo test (wasm) (push) Blocked by required conditions
CI / cargo build (release) (push) Waiting to run
CI / cargo build (msrv) (push) Blocked by required conditions
CI / cargo fuzz build (push) Blocked by required conditions
CI / fuzz parser (push) Blocked by required conditions
CI / test scripts (push) Blocked by required conditions
CI / ecosystem (push) Blocked by required conditions
CI / cargo shear (push) Blocked by required conditions
CI / python package (push) Waiting to run
CI / pre-commit (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / formatter instabilities and black similarity (push) Blocked by required conditions
CI / test ruff-lsp (push) Blocked by required conditions
CI / benchmarks (push) Blocked by required conditions

Closes: #14523

## Summary

Adds a whitelist of calls allowed to be used within a
`markupsafe.Markup` call.

## Test Plan

`cargo nextest run`
This commit is contained in:
David Salvisberg 2024-12-20 10:36:12 +01:00 committed by GitHub
parent 913bce3cd5
commit 089a98e904
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 120 additions and 10 deletions

View file

@ -375,6 +375,7 @@ linter.pylint.max_locals = 15
linter.pyupgrade.keep_runtime_typing = false
linter.ruff.parenthesize_tuple_in_subscript = false
linter.ruff.extend_markup_names = []
linter.ruff.allowed_markup_calls = []
# Formatter Settings
formatter.exclude = []

View file

@ -0,0 +1,9 @@
from bleach import clean
from markupsafe import Markup
content = "<script>alert('Hello, world!')</script>"
Markup(clean(content))
# indirect assignments are currently not supported
cleaned = clean(content)
Markup(cleaned) # RUF035

View file

@ -91,6 +91,7 @@ mod tests {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: true,
extend_markup_names: vec![],
allowed_markup_calls: vec![],
},
..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript)
},
@ -107,6 +108,7 @@ mod tests {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: false,
extend_markup_names: vec![],
allowed_markup_calls: vec![],
},
target_version: PythonVersion::Py310,
..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript)
@ -447,6 +449,30 @@ mod tests {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: true,
extend_markup_names: vec!["webhelpers.html.literal".to_string()],
allowed_markup_calls: vec![],
},
preview: PreviewMode::Enabled,
..LinterSettings::for_rule(rule_code)
},
)?;
assert_messages!(snapshot, diagnostics);
Ok(())
}
#[test_case(Rule::UnsafeMarkupUse, Path::new("RUF035_whitelisted_markup_calls.py"))]
fn whitelisted_markup_calls(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!(
"whitelisted_markup_calls__{}_{}",
rule_code.noqa_code(),
path.to_string_lossy()
);
let diagnostics = test_path(
Path::new("ruff").join(path).as_path(),
&LinterSettings {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: true,
extend_markup_names: vec![],
allowed_markup_calls: vec!["bleach.clean".to_string()],
},
preview: PreviewMode::Enabled,
..LinterSettings::for_rule(rule_code)

View file

@ -1,9 +1,9 @@
use ruff_python_ast::ExprCall;
use ruff_python_ast::{Expr, ExprCall};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, ViolationMetadata};
use ruff_python_ast::name::QualifiedName;
use ruff_python_semantic::Modules;
use ruff_python_semantic::{Modules, SemanticModel};
use ruff_text_size::Ranged;
use crate::{checkers::ast::Checker, settings::LinterSettings};
@ -22,7 +22,9 @@ use crate::{checkers::ast::Checker, settings::LinterSettings};
/// treated like [`markupsafe.Markup`].
///
/// This rule was originally inspired by [flake8-markupsafe] but doesn't carve
/// out any exceptions for i18n related calls.
/// out any exceptions for i18n related calls by default.
///
/// You can use [`lint.ruff.allowed-markup-calls`] to specify exceptions.
///
/// ## Example
/// Given:
@ -64,6 +66,7 @@ use crate::{checkers::ast::Checker, settings::LinterSettings};
/// ```
/// ## Options
/// - `lint.ruff.extend-markup-names`
/// - `lint.ruff.allowed-markup-calls`
///
/// ## References
/// - [MarkupSafe](https://pypi.org/project/MarkupSafe/)
@ -95,7 +98,7 @@ pub(crate) fn unsafe_markup_call(checker: &mut Checker, call: &ExprCall) {
return;
}
if !is_unsafe_call(call) {
if !is_unsafe_call(call, checker.semantic(), checker.settings) {
return;
}
@ -127,12 +130,29 @@ fn is_markup_call(qualified_name: &QualifiedName, settings: &LinterSettings) ->
.any(|target| *qualified_name == target)
}
fn is_unsafe_call(call: &ExprCall) -> bool {
fn is_unsafe_call(call: &ExprCall, semantic: &SemanticModel, settings: &LinterSettings) -> bool {
// technically this could be circumvented by using a keyword argument
// but without type-inference we can't really know which keyword argument
// corresponds to the first positional argument and either way it is
// unlikely that someone will actually use a keyword argument here
// TODO: Eventually we may want to allow dynamic values, as long as they
// have a __html__ attribute, since that is part of the API
matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr())
matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr() && !is_whitelisted_call(first, semantic, settings))
}
fn is_whitelisted_call(expr: &Expr, semantic: &SemanticModel, settings: &LinterSettings) -> bool {
let Expr::Call(ExprCall { func, .. }) = expr else {
return false;
};
let Some(qualified_name) = semantic.resolve_qualified_name(func) else {
return false;
};
settings
.ruff
.allowed_markup_calls
.iter()
.map(|target| QualifiedName::from_dotted_name(target))
.any(|target| qualified_name == target)
}

View file

@ -8,6 +8,7 @@ use std::fmt;
pub struct Settings {
pub parenthesize_tuple_in_subscript: bool,
pub extend_markup_names: Vec<String>,
pub allowed_markup_calls: Vec<String>,
}
impl fmt::Display for Settings {
@ -18,6 +19,7 @@ impl fmt::Display for Settings {
fields = [
self.parenthesize_tuple_in_subscript,
self.extend_markup_names | array,
self.allowed_markup_calls | array,
]
}
Ok(())

View file

@ -0,0 +1,10 @@
---
source: crates/ruff_linter/src/rules/ruff/mod.rs
---
RUF035_whitelisted_markup_calls.py:9:1: RUF035 Unsafe use of `markupsafe.Markup` detected
|
7 | # indirect assignments are currently not supported
8 | cleaned = clean(content)
9 | Markup(cleaned) # RUF035
| ^^^^^^^^^^^^^^^ RUF035
|

View file

@ -3079,18 +3079,49 @@ pub struct RuffOptions {
)]
pub parenthesize_tuple_in_subscript: Option<bool>,
/// A list of additional callable names that behave like [`markupsafe.Markup`].
/// A list of additional callable names that behave like
/// [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).
///
/// Expects to receive a list of fully-qualified names (e.g., `webhelpers.html.literal`, rather than
/// `literal`).
///
/// [markupsafe.Markup]: https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup
#[option(
default = "[]",
value_type = "list[str]",
example = "extend-markup-names = [\"webhelpers.html.literal\", \"my_package.Markup\"]"
)]
pub extend_markup_names: Option<Vec<String>>,
/// A list of callable names, whose result may be safely passed into
/// [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).
///
/// Expects to receive a list of fully-qualified names (e.g., `bleach.clean`, rather than `clean`).
///
/// This setting helps you avoid false positives in code like:
///
/// ```python
/// from bleach import clean
/// from markupsafe import Markup
///
/// cleaned_markup = Markup(clean(some_user_input))
/// ```
///
/// Where the use of [`bleach.clean`](https://bleach.readthedocs.io/en/latest/clean.html)
/// usually ensures that there's no XSS vulnerability.
///
/// Although it is not recommended, you may also use this setting to whitelist other
/// kinds of calls, e.g. calls to i18n translation functions, where how safe that is
/// will depend on the implementation and how well the translations are audited.
///
/// Another common use-case is to wrap the output of functions that generate markup
/// like [`xml.etree.ElementTree.tostring`](https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.tostring)
/// or template rendering engines where sanitization of potential user input is either
/// already baked in or has to happen before rendering.
#[option(
default = "[]",
value_type = "list[str]",
example = "allowed-markup-calls = [\"bleach.clean\", \"my_package.sanitize\"]"
)]
pub allowed_markup_calls: Option<Vec<String>>,
}
impl RuffOptions {
@ -3100,6 +3131,7 @@ impl RuffOptions {
.parenthesize_tuple_in_subscript
.unwrap_or_default(),
extend_markup_names: self.extend_markup_names.unwrap_or_default(),
allowed_markup_calls: self.allowed_markup_calls.unwrap_or_default(),
}
}
}