[ruff] Adds an allowlist for unsafe-markup-use (RUF035) (#15076)
Some checks are pending
CI / Determine changes (push) Waiting to run
CI / cargo fmt (push) Waiting to run
CI / cargo clippy (push) Blocked by required conditions
CI / cargo test (linux) (push) Blocked by required conditions
CI / cargo test (linux, release) (push) Blocked by required conditions
CI / cargo test (windows) (push) Blocked by required conditions
CI / cargo test (wasm) (push) Blocked by required conditions
CI / cargo build (release) (push) Waiting to run
CI / cargo build (msrv) (push) Blocked by required conditions
CI / cargo fuzz build (push) Blocked by required conditions
CI / fuzz parser (push) Blocked by required conditions
CI / test scripts (push) Blocked by required conditions
CI / ecosystem (push) Blocked by required conditions
CI / cargo shear (push) Blocked by required conditions
CI / python package (push) Waiting to run
CI / pre-commit (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / formatter instabilities and black similarity (push) Blocked by required conditions
CI / test ruff-lsp (push) Blocked by required conditions
CI / benchmarks (push) Blocked by required conditions

Closes: #14523

## Summary

Adds a whitelist of calls allowed to be used within a
`markupsafe.Markup` call.

## Test Plan

`cargo nextest run`
This commit is contained in:
David Salvisberg 2024-12-20 10:36:12 +01:00 committed by GitHub
parent 913bce3cd5
commit 089a98e904
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 120 additions and 10 deletions

View file

@ -375,6 +375,7 @@ linter.pylint.max_locals = 15
linter.pyupgrade.keep_runtime_typing = false
linter.ruff.parenthesize_tuple_in_subscript = false
linter.ruff.extend_markup_names = []
linter.ruff.allowed_markup_calls = []
# Formatter Settings
formatter.exclude = []

View file

@ -0,0 +1,9 @@
from bleach import clean
from markupsafe import Markup
content = "<script>alert('Hello, world!')</script>"
Markup(clean(content))
# indirect assignments are currently not supported
cleaned = clean(content)
Markup(cleaned) # RUF035

View file

@ -91,6 +91,7 @@ mod tests {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: true,
extend_markup_names: vec![],
allowed_markup_calls: vec![],
},
..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript)
},
@ -107,6 +108,7 @@ mod tests {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: false,
extend_markup_names: vec![],
allowed_markup_calls: vec![],
},
target_version: PythonVersion::Py310,
..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript)
@ -447,6 +449,30 @@ mod tests {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: true,
extend_markup_names: vec!["webhelpers.html.literal".to_string()],
allowed_markup_calls: vec![],
},
preview: PreviewMode::Enabled,
..LinterSettings::for_rule(rule_code)
},
)?;
assert_messages!(snapshot, diagnostics);
Ok(())
}
#[test_case(Rule::UnsafeMarkupUse, Path::new("RUF035_whitelisted_markup_calls.py"))]
fn whitelisted_markup_calls(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!(
"whitelisted_markup_calls__{}_{}",
rule_code.noqa_code(),
path.to_string_lossy()
);
let diagnostics = test_path(
Path::new("ruff").join(path).as_path(),
&LinterSettings {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: true,
extend_markup_names: vec![],
allowed_markup_calls: vec!["bleach.clean".to_string()],
},
preview: PreviewMode::Enabled,
..LinterSettings::for_rule(rule_code)

View file

@ -1,9 +1,9 @@
use ruff_python_ast::ExprCall;
use ruff_python_ast::{Expr, ExprCall};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, ViolationMetadata};
use ruff_python_ast::name::QualifiedName;
use ruff_python_semantic::Modules;
use ruff_python_semantic::{Modules, SemanticModel};
use ruff_text_size::Ranged;
use crate::{checkers::ast::Checker, settings::LinterSettings};
@ -22,7 +22,9 @@ use crate::{checkers::ast::Checker, settings::LinterSettings};
/// treated like [`markupsafe.Markup`].
///
/// This rule was originally inspired by [flake8-markupsafe] but doesn't carve
/// out any exceptions for i18n related calls.
/// out any exceptions for i18n related calls by default.
///
/// You can use [`lint.ruff.allowed-markup-calls`] to specify exceptions.
///
/// ## Example
/// Given:
@ -64,6 +66,7 @@ use crate::{checkers::ast::Checker, settings::LinterSettings};
/// ```
/// ## Options
/// - `lint.ruff.extend-markup-names`
/// - `lint.ruff.allowed-markup-calls`
///
/// ## References
/// - [MarkupSafe](https://pypi.org/project/MarkupSafe/)
@ -95,7 +98,7 @@ pub(crate) fn unsafe_markup_call(checker: &mut Checker, call: &ExprCall) {
return;
}
if !is_unsafe_call(call) {
if !is_unsafe_call(call, checker.semantic(), checker.settings) {
return;
}
@ -127,12 +130,29 @@ fn is_markup_call(qualified_name: &QualifiedName, settings: &LinterSettings) ->
.any(|target| *qualified_name == target)
}
fn is_unsafe_call(call: &ExprCall) -> bool {
fn is_unsafe_call(call: &ExprCall, semantic: &SemanticModel, settings: &LinterSettings) -> bool {
// technically this could be circumvented by using a keyword argument
// but without type-inference we can't really know which keyword argument
// corresponds to the first positional argument and either way it is
// unlikely that someone will actually use a keyword argument here
// TODO: Eventually we may want to allow dynamic values, as long as they
// have a __html__ attribute, since that is part of the API
matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr())
matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr() && !is_whitelisted_call(first, semantic, settings))
}
fn is_whitelisted_call(expr: &Expr, semantic: &SemanticModel, settings: &LinterSettings) -> bool {
let Expr::Call(ExprCall { func, .. }) = expr else {
return false;
};
let Some(qualified_name) = semantic.resolve_qualified_name(func) else {
return false;
};
settings
.ruff
.allowed_markup_calls
.iter()
.map(|target| QualifiedName::from_dotted_name(target))
.any(|target| qualified_name == target)
}

View file

@ -8,6 +8,7 @@ use std::fmt;
pub struct Settings {
pub parenthesize_tuple_in_subscript: bool,
pub extend_markup_names: Vec<String>,
pub allowed_markup_calls: Vec<String>,
}
impl fmt::Display for Settings {
@ -18,6 +19,7 @@ impl fmt::Display for Settings {
fields = [
self.parenthesize_tuple_in_subscript,
self.extend_markup_names | array,
self.allowed_markup_calls | array,
]
}
Ok(())

View file

@ -0,0 +1,10 @@
---
source: crates/ruff_linter/src/rules/ruff/mod.rs
---
RUF035_whitelisted_markup_calls.py:9:1: RUF035 Unsafe use of `markupsafe.Markup` detected
|
7 | # indirect assignments are currently not supported
8 | cleaned = clean(content)
9 | Markup(cleaned) # RUF035
| ^^^^^^^^^^^^^^^ RUF035
|

View file

@ -3079,18 +3079,49 @@ pub struct RuffOptions {
)]
pub parenthesize_tuple_in_subscript: Option<bool>,
/// A list of additional callable names that behave like [`markupsafe.Markup`].
/// A list of additional callable names that behave like
/// [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).
///
/// Expects to receive a list of fully-qualified names (e.g., `webhelpers.html.literal`, rather than
/// `literal`).
///
/// [markupsafe.Markup]: https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup
#[option(
default = "[]",
value_type = "list[str]",
example = "extend-markup-names = [\"webhelpers.html.literal\", \"my_package.Markup\"]"
)]
pub extend_markup_names: Option<Vec<String>>,
/// A list of callable names, whose result may be safely passed into
/// [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).
///
/// Expects to receive a list of fully-qualified names (e.g., `bleach.clean`, rather than `clean`).
///
/// This setting helps you avoid false positives in code like:
///
/// ```python
/// from bleach import clean
/// from markupsafe import Markup
///
/// cleaned_markup = Markup(clean(some_user_input))
/// ```
///
/// Where the use of [`bleach.clean`](https://bleach.readthedocs.io/en/latest/clean.html)
/// usually ensures that there's no XSS vulnerability.
///
/// Although it is not recommended, you may also use this setting to whitelist other
/// kinds of calls, e.g. calls to i18n translation functions, where how safe that is
/// will depend on the implementation and how well the translations are audited.
///
/// Another common use-case is to wrap the output of functions that generate markup
/// like [`xml.etree.ElementTree.tostring`](https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.tostring)
/// or template rendering engines where sanitization of potential user input is either
/// already baked in or has to happen before rendering.
#[option(
default = "[]",
value_type = "list[str]",
example = "allowed-markup-calls = [\"bleach.clean\", \"my_package.sanitize\"]"
)]
pub allowed_markup_calls: Option<Vec<String>>,
}
impl RuffOptions {
@ -3100,6 +3131,7 @@ impl RuffOptions {
.parenthesize_tuple_in_subscript
.unwrap_or_default(),
extend_markup_names: self.extend_markup_names.unwrap_or_default(),
allowed_markup_calls: self.allowed_markup_calls.unwrap_or_default(),
}
}
}

12
ruff.schema.json generated
View file

@ -2758,8 +2758,18 @@
"RuffOptions": {
"type": "object",
"properties": {
"allowed-markup-calls": {
"description": "A list of callable names, whose result may be safely passed into [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).\n\nExpects to receive a list of fully-qualified names (e.g., `bleach.clean`, rather than `clean`).\n\nThis setting helps you avoid false positives in code like:\n\n```python from bleach import clean from markupsafe import Markup\n\ncleaned_markup = Markup(clean(some_user_input)) ```\n\nWhere the use of [`bleach.clean`](https://bleach.readthedocs.io/en/latest/clean.html) usually ensures that there's no XSS vulnerability.\n\nAlthough it is not recommended, you may also use this setting to whitelist other kinds of calls, e.g. calls to i18n translation functions, where how safe that is will depend on the implementation and how well the translations are audited.\n\nAnother common use-case is to wrap the output of functions that generate markup like [`xml.etree.ElementTree.tostring`](https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.tostring) or template rendering engines where sanitization of potential user input is either already baked in or has to happen before rendering.",
"type": [
"array",
"null"
],
"items": {
"type": "string"
}
},
"extend-markup-names": {
"description": "A list of additional callable names that behave like [`markupsafe.Markup`].\n\nExpects to receive a list of fully-qualified names (e.g., `webhelpers.html.literal`, rather than `literal`).\n\n[markupsafe.Markup]: https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup",
"description": "A list of additional callable names that behave like [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).\n\nExpects to receive a list of fully-qualified names (e.g., `webhelpers.html.literal`, rather than `literal`).",
"type": [
"array",
"null"