[ruff] Adds an allowlist for unsafe-markup-use (RUF035) (#15076)

Closes: #14523 ## Summary Adds a whitelist of calls allowed to be used within a `markupsafe.Markup` call. ## Test Plan `cargo nextest run`
2025-08-03 10:22:24 +00:00 · 2024-12-20 10:36:12 +01:00 · 2024-12-20 10:36:12 +01:00 · 089a98e904
commit 089a98e904
parent 913bce3cd5
8 changed files with 120 additions and 10 deletions
--- a/crates/ruff/tests/snapshots/show_settings__display_default_settings.snap
+++ b/crates/ruff/tests/snapshots/show_settings__display_default_settings.snap
@ -375,6 +375,7 @@ linter.pylint.max_locals = 15
 linter.pyupgrade.keep_runtime_typing = false
 linter.ruff.parenthesize_tuple_in_subscript = false
 linter.ruff.extend_markup_names = []
+linter.ruff.allowed_markup_calls = []

 # Formatter Settings
 formatter.exclude = []
--- a/crates/ruff_linter/resources/test/fixtures/ruff/RUF035_whitelisted_markup_calls.py
+++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF035_whitelisted_markup_calls.py
@ -0,0 +1,9 @@
+from bleach import clean
+from markupsafe import Markup
+
+content = "<script>alert('Hello, world!')</script>"
+Markup(clean(content))
+
+# indirect assignments are currently not supported
+cleaned = clean(content)
+Markup(cleaned)  # RUF035
--- a/crates/ruff_linter/src/rules/ruff/mod.rs
+++ b/crates/ruff_linter/src/rules/ruff/mod.rs
@ -91,6 +91,7 @@ mod tests {
                ruff: super::settings::Settings {
                    parenthesize_tuple_in_subscript: true,
                    extend_markup_names: vec![],
+                    allowed_markup_calls: vec![],
                },
                ..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript)
            },
@ -107,6 +108,7 @@ mod tests {
                ruff: super::settings::Settings {
                    parenthesize_tuple_in_subscript: false,
                    extend_markup_names: vec![],
+                    allowed_markup_calls: vec![],
                },
                target_version: PythonVersion::Py310,
                ..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript)
@ -447,6 +449,30 @@ mod tests {
                ruff: super::settings::Settings {
                    parenthesize_tuple_in_subscript: true,
                    extend_markup_names: vec!["webhelpers.html.literal".to_string()],
+                    allowed_markup_calls: vec![],
+                },
+                preview: PreviewMode::Enabled,
+                ..LinterSettings::for_rule(rule_code)
+            },
+        )?;
+        assert_messages!(snapshot, diagnostics);
+        Ok(())
+    }
+
+    #[test_case(Rule::UnsafeMarkupUse, Path::new("RUF035_whitelisted_markup_calls.py"))]
+    fn whitelisted_markup_calls(rule_code: Rule, path: &Path) -> Result<()> {
+        let snapshot = format!(
+            "whitelisted_markup_calls__{}_{}",
+            rule_code.noqa_code(),
+            path.to_string_lossy()
+        );
+        let diagnostics = test_path(
+            Path::new("ruff").join(path).as_path(),
+            &LinterSettings {
+                ruff: super::settings::Settings {
+                    parenthesize_tuple_in_subscript: true,
+                    extend_markup_names: vec![],
+                    allowed_markup_calls: vec!["bleach.clean".to_string()],
                },
                preview: PreviewMode::Enabled,
                ..LinterSettings::for_rule(rule_code)
--- a/crates/ruff_linter/src/rules/ruff/rules/unsafe_markup_use.rs
+++ b/crates/ruff_linter/src/rules/ruff/rules/unsafe_markup_use.rs
@ -1,9 +1,9 @@
-use ruff_python_ast::ExprCall;
+use ruff_python_ast::{Expr, ExprCall};

 use ruff_diagnostics::{Diagnostic, Violation};
 use ruff_macros::{derive_message_formats, ViolationMetadata};
 use ruff_python_ast::name::QualifiedName;
-use ruff_python_semantic::Modules;
+use ruff_python_semantic::{Modules, SemanticModel};
 use ruff_text_size::Ranged;

 use crate::{checkers::ast::Checker, settings::LinterSettings};
@ -22,7 +22,9 @@ use crate::{checkers::ast::Checker, settings::LinterSettings};
 /// treated like [`markupsafe.Markup`].
 ///
 /// This rule was originally inspired by [flake8-markupsafe] but doesn't carve
-/// out any exceptions for i18n related calls.
+/// out any exceptions for i18n related calls by default.
+///
+/// You can use [`lint.ruff.allowed-markup-calls`] to specify exceptions.
 ///
 /// ## Example
 /// Given:
@ -64,6 +66,7 @@ use crate::{checkers::ast::Checker, settings::LinterSettings};
 /// ```
 /// ## Options
 /// - `lint.ruff.extend-markup-names`
+/// - `lint.ruff.allowed-markup-calls`
 ///
 /// ## References
 /// - [MarkupSafe](https://pypi.org/project/MarkupSafe/)
@ -95,7 +98,7 @@ pub(crate) fn unsafe_markup_call(checker: &mut Checker, call: &ExprCall) {
        return;
    }

-    if !is_unsafe_call(call) {
+    if !is_unsafe_call(call, checker.semantic(), checker.settings) {
        return;
    }

@ -127,12 +130,29 @@ fn is_markup_call(qualified_name: &QualifiedName, settings: &LinterSettings) ->
        .any(|target| *qualified_name == target)
 }

-fn is_unsafe_call(call: &ExprCall) -> bool {
+fn is_unsafe_call(call: &ExprCall, semantic: &SemanticModel, settings: &LinterSettings) -> bool {
    // technically this could be circumvented by using a keyword argument
    // but without type-inference we can't really know which keyword argument
    // corresponds to the first positional argument and either way it is
    // unlikely that someone will actually use a keyword argument here
    // TODO: Eventually we may want to allow dynamic values, as long as they
    //       have a __html__ attribute, since that is part of the API
-    matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr())
+    matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr() && !is_whitelisted_call(first, semantic, settings))
+}
+
+fn is_whitelisted_call(expr: &Expr, semantic: &SemanticModel, settings: &LinterSettings) -> bool {
+    let Expr::Call(ExprCall { func, .. }) = expr else {
+        return false;
+    };
+
+    let Some(qualified_name) = semantic.resolve_qualified_name(func) else {
+        return false;
+    };
+
+    settings
+        .ruff
+        .allowed_markup_calls
+        .iter()
+        .map(|target| QualifiedName::from_dotted_name(target))
+        .any(|target| qualified_name == target)
 }
--- a/crates/ruff_linter/src/rules/ruff/settings.rs
+++ b/crates/ruff_linter/src/rules/ruff/settings.rs
@ -8,6 +8,7 @@ use std::fmt;
 pub struct Settings {
    pub parenthesize_tuple_in_subscript: bool,
    pub extend_markup_names: Vec<String>,
+    pub allowed_markup_calls: Vec<String>,
 }

 impl fmt::Display for Settings {
@ -18,6 +19,7 @@ impl fmt::Display for Settings {
            fields = [
                self.parenthesize_tuple_in_subscript,
                self.extend_markup_names | array,
+                self.allowed_markup_calls | array,
            ]
        }
        Ok(())
--- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linterrulesrufftestswhitelisted_markup_calls__RUF035_RUF035_whitelisted_markup_calls.py.snap
+++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linterrulesrufftestswhitelisted_markup_calls__RUF035_RUF035_whitelisted_markup_calls.py.snap
@ -0,0 +1,10 @@
+---
+source: crates/ruff_linter/src/rules/ruff/mod.rs
+---
+RUF035_whitelisted_markup_calls.py:9:1: RUF035 Unsafe use of `markupsafe.Markup` detected
+  |
+7 | # indirect assignments are currently not supported
+8 | cleaned = clean(content)
+9 | Markup(cleaned)  # RUF035
+  | ^^^^^^^^^^^^^^^ RUF035
+  |
--- a/crates/ruff_workspace/src/options.rs
+++ b/crates/ruff_workspace/src/options.rs
@ -3079,18 +3079,49 @@ pub struct RuffOptions {
    )]
    pub parenthesize_tuple_in_subscript: Option<bool>,

-    /// A list of additional callable names that behave like [`markupsafe.Markup`].
+    /// A list of additional callable names that behave like
+    /// [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).
    ///
    /// Expects to receive a list of fully-qualified names (e.g., `webhelpers.html.literal`, rather than
    /// `literal`).
-    ///
-    /// [markupsafe.Markup]: https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup
    #[option(
        default = "[]",
        value_type = "list[str]",
        example = "extend-markup-names = [\"webhelpers.html.literal\", \"my_package.Markup\"]"
    )]
    pub extend_markup_names: Option<Vec<String>>,
+
+    /// A list of callable names, whose result may be safely passed into
+    /// [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).
+    ///
+    /// Expects to receive a list of fully-qualified names (e.g., `bleach.clean`, rather than `clean`).
+    ///
+    /// This setting helps you avoid false positives in code like:
+    ///
+    /// ```python
+    /// from bleach import clean
+    /// from markupsafe import Markup
+    ///
+    /// cleaned_markup = Markup(clean(some_user_input))
+    /// ```
+    ///
+    /// Where the use of [`bleach.clean`](https://bleach.readthedocs.io/en/latest/clean.html)
+    /// usually ensures that there's no XSS vulnerability.
+    ///
+    /// Although it is not recommended, you may also use this setting to whitelist other
+    /// kinds of calls, e.g. calls to i18n translation functions, where how safe that is
+    /// will depend on the implementation and how well the translations are audited.
+    ///
+    /// Another common use-case is to wrap the output of functions that generate markup
+    /// like [`xml.etree.ElementTree.tostring`](https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.tostring)
+    /// or template rendering engines where sanitization of potential user input is either
+    /// already baked in or has to happen before rendering.
+    #[option(
+        default = "[]",
+        value_type = "list[str]",
+        example = "allowed-markup-calls = [\"bleach.clean\", \"my_package.sanitize\"]"
+    )]
+    pub allowed_markup_calls: Option<Vec<String>>,
 }

 impl RuffOptions {
@ -3100,6 +3131,7 @@ impl RuffOptions {
                .parenthesize_tuple_in_subscript
                .unwrap_or_default(),
            extend_markup_names: self.extend_markup_names.unwrap_or_default(),
+            allowed_markup_calls: self.allowed_markup_calls.unwrap_or_default(),
        }
    }
 }