[ruff] Adds an allowlist for unsafe-markup-use (RUF035) (#15076)

Closes: #14523 ## Summary Adds a whitelist of calls allowed to be used within a `markupsafe.Markup` call. ## Test Plan `cargo nextest run`
2025-08-04 18:58:04 +00:00 · 2024-12-20 10:36:12 +01:00 · 2024-12-20 10:36:12 +01:00 · 089a98e904
commit 089a98e904
parent 913bce3cd5
8 changed files with 120 additions and 10 deletions
--- a/crates/ruff/tests/snapshots/show_settings__display_default_settings.snap
+++ b/crates/ruff/tests/snapshots/show_settings__display_default_settings.snap
@ -375,6 +375,7 @@ linter.pylint.max_locals = 15
 linter.pyupgrade.keep_runtime_typing = false
 linter.ruff.parenthesize_tuple_in_subscript = false
 linter.ruff.extend_markup_names = []
+linter.ruff.allowed_markup_calls = []

 # Formatter Settings
 formatter.exclude = []
--- a/crates/ruff_linter/resources/test/fixtures/ruff/RUF035_whitelisted_markup_calls.py
+++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF035_whitelisted_markup_calls.py
@ -0,0 +1,9 @@
+from bleach import clean
+from markupsafe import Markup
+
+content = "<script>alert('Hello, world!')</script>"
+Markup(clean(content))
+
+# indirect assignments are currently not supported
+cleaned = clean(content)
+Markup(cleaned)  # RUF035
--- a/crates/ruff_linter/src/rules/ruff/mod.rs
+++ b/crates/ruff_linter/src/rules/ruff/mod.rs
@ -91,6 +91,7 @@ mod tests {
                ruff: super::settings::Settings {
                    parenthesize_tuple_in_subscript: true,
                    extend_markup_names: vec![],
+                    allowed_markup_calls: vec![],
                },
                ..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript)
            },
@ -107,6 +108,7 @@ mod tests {
                ruff: super::settings::Settings {
                    parenthesize_tuple_in_subscript: false,
                    extend_markup_names: vec![],
+                    allowed_markup_calls: vec![],
                },
                target_version: PythonVersion::Py310,
                ..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript)
@ -447,6 +449,30 @@ mod tests {
                ruff: super::settings::Settings {
                    parenthesize_tuple_in_subscript: true,
                    extend_markup_names: vec!["webhelpers.html.literal".to_string()],
+                    allowed_markup_calls: vec![],
+                },
+                preview: PreviewMode::Enabled,
+                ..LinterSettings::for_rule(rule_code)
+            },
+        )?;
+        assert_messages!(snapshot, diagnostics);
+        Ok(())
+    }
+
+    #[test_case(Rule::UnsafeMarkupUse, Path::new("RUF035_whitelisted_markup_calls.py"))]
+    fn whitelisted_markup_calls(rule_code: Rule, path: &Path) -> Result<()> {
+        let snapshot = format!(
+            "whitelisted_markup_calls__{}_{}",
+            rule_code.noqa_code(),
+            path.to_string_lossy()
+        );
+        let diagnostics = test_path(
+            Path::new("ruff").join(path).as_path(),
+            &LinterSettings {
+                ruff: super::settings::Settings {
+                    parenthesize_tuple_in_subscript: true,
+                    extend_markup_names: vec![],
+                    allowed_markup_calls: vec!["bleach.clean".to_string()],
                },
                preview: PreviewMode::Enabled,
                ..LinterSettings::for_rule(rule_code)
--- a/crates/ruff_linter/src/rules/ruff/rules/unsafe_markup_use.rs
+++ b/crates/ruff_linter/src/rules/ruff/rules/unsafe_markup_use.rs
@ -1,9 +1,9 @@
-use ruff_python_ast::ExprCall;
+use ruff_python_ast::{Expr, ExprCall};

 use ruff_diagnostics::{Diagnostic, Violation};
 use ruff_macros::{derive_message_formats, ViolationMetadata};
 use ruff_python_ast::name::QualifiedName;
-use ruff_python_semantic::Modules;
+use ruff_python_semantic::{Modules, SemanticModel};
 use ruff_text_size::Ranged;

 use crate::{checkers::ast::Checker, settings::LinterSettings};
@ -22,7 +22,9 @@ use crate::{checkers::ast::Checker, settings::LinterSettings};
 /// treated like [`markupsafe.Markup`].
 ///
 /// This rule was originally inspired by [flake8-markupsafe] but doesn't carve
-/// out any exceptions for i18n related calls.
+/// out any exceptions for i18n related calls by default.
+///
+/// You can use [`lint.ruff.allowed-markup-calls`] to specify exceptions.
 ///
 /// ## Example
 /// Given:
@ -64,6 +66,7 @@ use crate::{checkers::ast::Checker, settings::LinterSettings};
 /// ```
 /// ## Options
 /// - `lint.ruff.extend-markup-names`
+/// - `lint.ruff.allowed-markup-calls`
 ///
 /// ## References
 /// - [MarkupSafe](https://pypi.org/project/MarkupSafe/)
@ -95,7 +98,7 @@ pub(crate) fn unsafe_markup_call(checker: &mut Checker, call: &ExprCall) {
        return;
    }

-    if !is_unsafe_call(call) {
+    if !is_unsafe_call(call, checker.semantic(), checker.settings) {
        return;
    }

@ -127,12 +130,29 @@ fn is_markup_call(qualified_name: &QualifiedName, settings: &LinterSettings) ->
        .any(|target| *qualified_name == target)
 }

-fn is_unsafe_call(call: &ExprCall) -> bool {
+fn is_unsafe_call(call: &ExprCall, semantic: &SemanticModel, settings: &LinterSettings) -> bool {
    // technically this could be circumvented by using a keyword argument
    // but without type-inference we can't really know which keyword argument
    // corresponds to the first positional argument and either way it is
    // unlikely that someone will actually use a keyword argument here
    // TODO: Eventually we may want to allow dynamic values, as long as they
    //       have a __html__ attribute, since that is part of the API
-    matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr())
+    matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr() && !is_whitelisted_call(first, semantic, settings))
+}
+
+fn is_whitelisted_call(expr: &Expr, semantic: &SemanticModel, settings: &LinterSettings) -> bool {
+    let Expr::Call(ExprCall { func, .. }) = expr else {
+        return false;
+    };
+
+    let Some(qualified_name) = semantic.resolve_qualified_name(func) else {
+        return false;
+    };
+
+    settings
+        .ruff
+        .allowed_markup_calls
+        .iter()
+        .map(|target| QualifiedName::from_dotted_name(target))
+        .any(|target| qualified_name == target)
 }
--- a/crates/ruff_linter/src/rules/ruff/settings.rs
+++ b/crates/ruff_linter/src/rules/ruff/settings.rs
@ -8,6 +8,7 @@ use std::fmt;
 pub struct Settings {
    pub parenthesize_tuple_in_subscript: bool,
    pub extend_markup_names: Vec<String>,
+    pub allowed_markup_calls: Vec<String>,
 }

 impl fmt::Display for Settings {
@ -18,6 +19,7 @@ impl fmt::Display for Settings {
            fields = [
                self.parenthesize_tuple_in_subscript,
                self.extend_markup_names | array,
+                self.allowed_markup_calls | array,
            ]
        }
        Ok(())
--- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linterrulesrufftestswhitelisted_markup_calls__RUF035_RUF035_whitelisted_markup_calls.py.snap
+++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linterrulesrufftestswhitelisted_markup_calls__RUF035_RUF035_whitelisted_markup_calls.py.snap
@ -0,0 +1,10 @@
+---
+source: crates/ruff_linter/src/rules/ruff/mod.rs
+---
+RUF035_whitelisted_markup_calls.py:9:1: RUF035 Unsafe use of `markupsafe.Markup` detected
+  |
+7 | # indirect assignments are currently not supported
+8 | cleaned = clean(content)
+9 | Markup(cleaned)  # RUF035
+  | ^^^^^^^^^^^^^^^ RUF035
+  |
--- a/crates/ruff_workspace/src/options.rs
+++ b/crates/ruff_workspace/src/options.rs
@ -3079,18 +3079,49 @@ pub struct RuffOptions {
    )]
    pub parenthesize_tuple_in_subscript: Option<bool>,

-    /// A list of additional callable names that behave like [`markupsafe.Markup`].
+    /// A list of additional callable names that behave like
+    /// [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).
    ///
    /// Expects to receive a list of fully-qualified names (e.g., `webhelpers.html.literal`, rather than
    /// `literal`).
-    ///
-    /// [markupsafe.Markup]: https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup
    #[option(
        default = "[]",
        value_type = "list[str]",
        example = "extend-markup-names = [\"webhelpers.html.literal\", \"my_package.Markup\"]"
    )]
    pub extend_markup_names: Option<Vec<String>>,
+
+    /// A list of callable names, whose result may be safely passed into
+    /// [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).
+    ///
+    /// Expects to receive a list of fully-qualified names (e.g., `bleach.clean`, rather than `clean`).
+    ///
+    /// This setting helps you avoid false positives in code like:
+    ///
+    /// ```python
+    /// from bleach import clean
+    /// from markupsafe import Markup
+    ///
+    /// cleaned_markup = Markup(clean(some_user_input))
+    /// ```
+    ///
+    /// Where the use of [`bleach.clean`](https://bleach.readthedocs.io/en/latest/clean.html)
+    /// usually ensures that there's no XSS vulnerability.
+    ///
+    /// Although it is not recommended, you may also use this setting to whitelist other
+    /// kinds of calls, e.g. calls to i18n translation functions, where how safe that is
+    /// will depend on the implementation and how well the translations are audited.
+    ///
+    /// Another common use-case is to wrap the output of functions that generate markup
+    /// like [`xml.etree.ElementTree.tostring`](https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.tostring)
+    /// or template rendering engines where sanitization of potential user input is either
+    /// already baked in or has to happen before rendering.
+    #[option(
+        default = "[]",
+        value_type = "list[str]",
+        example = "allowed-markup-calls = [\"bleach.clean\", \"my_package.sanitize\"]"
+    )]
+    pub allowed_markup_calls: Option<Vec<String>>,
 }

 impl RuffOptions {
@ -3100,6 +3131,7 @@ impl RuffOptions {
                .parenthesize_tuple_in_subscript
                .unwrap_or_default(),
            extend_markup_names: self.extend_markup_names.unwrap_or_default(),
+            allowed_markup_calls: self.allowed_markup_calls.unwrap_or_default(),
        }
    }
 }
--- a/ruff.schema.json
+++ b/ruff.schema.json
@ -2758,8 +2758,18 @@
    "RuffOptions": {
      "type": "object",
      "properties": {
+        "allowed-markup-calls": {
+          "description": "A list of callable names, whose result may be safely passed into [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).\n\nExpects to receive a list of fully-qualified names (e.g., `bleach.clean`, rather than `clean`).\n\nThis setting helps you avoid false positives in code like:\n\n```python from bleach import clean from markupsafe import Markup\n\ncleaned_markup = Markup(clean(some_user_input)) ```\n\nWhere the use of [`bleach.clean`](https://bleach.readthedocs.io/en/latest/clean.html) usually ensures that there's no XSS vulnerability.\n\nAlthough it is not recommended, you may also use this setting to whitelist other kinds of calls, e.g. calls to i18n translation functions, where how safe that is will depend on the implementation and how well the translations are audited.\n\nAnother common use-case is to wrap the output of functions that generate markup like [`xml.etree.ElementTree.tostring`](https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.tostring) or template rendering engines where sanitization of potential user input is either already baked in or has to happen before rendering.",
+          "type": [
+            "array",
+            "null"
+          ],
+          "items": {
+            "type": "string"
+          }
+        },
        "extend-markup-names": {
-          "description": "A list of additional callable names that behave like [`markupsafe.Markup`].\n\nExpects to receive a list of fully-qualified names (e.g., `webhelpers.html.literal`, rather than `literal`).\n\n[markupsafe.Markup]: https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup",
+          "description": "A list of additional callable names that behave like [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).\n\nExpects to receive a list of fully-qualified names (e.g., `webhelpers.html.literal`, rather than `literal`).",
          "type": [
            "array",
            "null"