[flake8-markupsafe] Adds Implementation for MS001 via RUF035 (#14224)

Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
David Salvisberg 2024-11-11 19:30:03 +01:00 committed by GitHub
parent b8a65182dd
commit f82ee8ea59
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 321 additions and 1 deletions

View file

@ -375,6 +375,7 @@ linter.pylint.max_public_methods = 20
linter.pylint.max_locals = 15
linter.pyupgrade.keep_runtime_typing = false
linter.ruff.parenthesize_tuple_in_subscript = false
linter.ruff.extend_markup_names = []
# Formatter Settings
formatter.exclude = []

View file

@ -0,0 +1,18 @@
import flask
from markupsafe import Markup, escape
content = "<script>alert('Hello, world!')</script>"
Markup(f"unsafe {content}") # RUF035
flask.Markup("unsafe {}".format(content)) # RUF035
Markup("safe {}").format(content)
flask.Markup(b"safe {}", encoding='utf-8').format(content)
escape(content)
Markup(content) # RUF035
flask.Markup("unsafe %s" % content) # RUF035
Markup(object="safe")
Markup(object="unsafe {}".format(content)) # Not currently detected
# NOTE: We may be able to get rid of these false positives with red-knot
# if it includes comprehensive constant expression detection/evaluation.
Markup("*" * 8) # RUF035 (false positive)
flask.Markup("hello {}".format("world")) # RUF035 (false positive)

View file

@ -0,0 +1,6 @@
from markupsafe import Markup
from webhelpers.html import literal
content = "<script>alert('Hello, world!')</script>"
Markup(f"unsafe {content}") # RUF035
literal(f"unsafe {content}") # RUF035

View file

@ -0,0 +1,7 @@
from webhelpers.html import literal
# NOTE: This test case exists to make sure our optimization doesn't cause
# additional markup names to be skipped if we don't import either
# markupsafe or flask first.
content = "<script>alert('Hello, world!')</script>"
literal(f"unsafe {content}") # RUF035

View file

@ -1030,6 +1030,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
if checker.enabled(Rule::IntOnSlicedStr) {
refurb::rules::int_on_sliced_str(checker, call);
}
if checker.enabled(Rule::UnsafeMarkupUse) {
ruff::rules::unsafe_markup_call(checker, call);
}
}
Expr::Dict(dict) => {
if checker.any_enabled(&[

View file

@ -966,6 +966,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Ruff, "032") => (RuleGroup::Preview, rules::ruff::rules::DecimalFromFloatLiteral),
(Ruff, "033") => (RuleGroup::Preview, rules::ruff::rules::PostInitDefault),
(Ruff, "034") => (RuleGroup::Preview, rules::ruff::rules::UselessIfElse),
(Ruff, "035") => (RuleGroup::Preview, rules::ruff::rules::UnsafeMarkupUse),
(Ruff, "100") => (RuleGroup::Stable, rules::ruff::rules::UnusedNOQA),
(Ruff, "101") => (RuleGroup::Stable, rules::ruff::rules::RedirectedNOQA),

View file

@ -79,6 +79,7 @@ mod tests {
&LinterSettings {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: true,
extend_markup_names: vec![],
},
..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript)
},
@ -94,6 +95,7 @@ mod tests {
&LinterSettings {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: false,
extend_markup_names: vec![],
},
target_version: PythonVersion::Py310,
..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript)
@ -385,6 +387,7 @@ mod tests {
}
#[test_case(Rule::ZipInsteadOfPairwise, Path::new("RUF007.py"))]
#[test_case(Rule::UnsafeMarkupUse, Path::new("RUF035.py"))]
fn preview_rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!(
"preview__{}_{}",
@ -401,4 +404,27 @@ mod tests {
assert_messages!(snapshot, diagnostics);
Ok(())
}
#[test_case(Rule::UnsafeMarkupUse, Path::new("RUF035_extend_markup_names.py"))]
#[test_case(Rule::UnsafeMarkupUse, Path::new("RUF035_skip_early_out.py"))]
fn extend_allowed_callable(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!(
"extend_allow_callables__{}_{}",
rule_code.noqa_code(),
path.to_string_lossy()
);
let diagnostics = test_path(
Path::new("ruff").join(path).as_path(),
&LinterSettings {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: true,
extend_markup_names: vec!["webhelpers.html.literal".to_string()],
},
preview: PreviewMode::Enabled,
..LinterSettings::for_rule(rule_code)
},
)?;
assert_messages!(snapshot, diagnostics);
Ok(())
}
}

View file

@ -28,6 +28,7 @@ pub(crate) use static_key_dict_comprehension::*;
pub(crate) use test_rules::*;
pub(crate) use unnecessary_iterable_allocation_for_first_element::*;
pub(crate) use unnecessary_key_check::*;
pub(crate) use unsafe_markup_use::*;
pub(crate) use unused_async::*;
pub(crate) use unused_noqa::*;
pub(crate) use useless_if_else::*;
@ -67,6 +68,7 @@ mod suppression_comment_visitor;
pub(crate) mod test_rules;
mod unnecessary_iterable_allocation_for_first_element;
mod unnecessary_key_check;
mod unsafe_markup_use;
mod unused_async;
mod unused_noqa;
mod useless_if_else;

View file

@ -0,0 +1,138 @@
use ruff_python_ast::ExprCall;
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::name::QualifiedName;
use ruff_python_semantic::Modules;
use ruff_text_size::Ranged;
use crate::{checkers::ast::Checker, settings::LinterSettings};
/// ## What it does
/// Checks for non-literal strings being passed to [`markupsafe.Markup`].
///
/// ## Why is this bad?
/// [`markupsafe.Markup`] does not perform any escaping, so passing dynamic
/// content, like f-strings, variables or interpolated strings will potentially
/// lead to XSS vulnerabilities.
///
/// Instead you should interpolate the [`markupsafe.Markup`] object.
///
/// Using [`lint.ruff.extend-markup-names`] additional objects can be
/// treated like [`markupsafe.Markup`].
///
/// This rule was originally inspired by [flake8-markupsafe] but doesn't carve
/// out any exceptions for i18n related calls.
///
/// ## Example
/// Given:
/// ```python
/// from markupsafe import Markup
///
/// content = "<script>alert('Hello, world!')</script>"
/// html = Markup(f"<b>{content}</b>") # XSS
/// ```
///
/// Use instead:
/// ```python
/// from markupsafe import Markup
///
/// content = "<script>alert('Hello, world!')</script>"
/// html = Markup("<b>{}</b>").format(content) # Safe
/// ```
///
/// Given:
/// ```python
/// from markupsafe import Markup
///
/// lines = [
/// Markup("<b>heading</b>"),
/// "<script>alert('XSS attempt')</script>",
/// ]
/// html = Markup("<br>".join(lines)) # XSS
/// ```
///
/// Use instead:
/// ```python
/// from markupsafe import Markup
///
/// lines = [
/// Markup("<b>heading</b>"),
/// "<script>alert('XSS attempt')</script>",
/// ]
/// html = Markup("<br>").join(lines) # Safe
/// ```
/// ## Options
/// - `lint.ruff.extend-markup-names`
///
/// ## References
/// - [MarkupSafe](https://pypi.org/project/MarkupSafe/)
/// - [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup)
///
/// [markupsafe.Markup]: https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup
/// [flake8-markupsafe]: https://github.com/vmagamedov/flake8-markupsafe
#[violation]
pub struct UnsafeMarkupUse {
name: String,
}
impl Violation for UnsafeMarkupUse {
#[derive_message_formats]
fn message(&self) -> String {
let UnsafeMarkupUse { name } = self;
format!("Unsafe use of `{name}` detected")
}
}
/// Checks for unsafe calls to `[markupsafe.Markup]`.
///
/// [markupsafe.Markup]: https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup
pub(crate) fn unsafe_markup_call(checker: &mut Checker, call: &ExprCall) {
if checker.settings.ruff.extend_markup_names.is_empty()
&& !(checker.semantic().seen_module(Modules::MARKUPSAFE)
|| checker.semantic().seen_module(Modules::FLASK))
{
return;
}
if !is_unsafe_call(call) {
return;
}
let Some(qualified_name) = checker.semantic().resolve_qualified_name(&call.func) else {
return;
};
if !is_markup_call(&qualified_name, checker.settings) {
return;
}
checker.diagnostics.push(Diagnostic::new(
UnsafeMarkupUse {
name: qualified_name.to_string(),
},
call.range(),
));
}
fn is_markup_call(qualified_name: &QualifiedName, settings: &LinterSettings) -> bool {
matches!(
qualified_name.segments(),
["markupsafe" | "flask", "Markup"]
) || settings
.ruff
.extend_markup_names
.iter()
.map(|target| QualifiedName::from_dotted_name(target))
.any(|target| *qualified_name == target)
}
fn is_unsafe_call(call: &ExprCall) -> bool {
// technically this could be circumvented by using a keyword argument
// but without type-inference we can't really know which keyword argument
// corresponds to the first positional argument and either way it is
// unlikely that someone will actually use a keyword argument here
// TODO: Eventually we may want to allow dynamic values, as long as they
// have a __html__ attribute, since that is part of the API
matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr())
}

View file

@ -7,6 +7,7 @@ use std::fmt;
#[derive(Debug, Clone, CacheKey, Default)]
pub struct Settings {
pub parenthesize_tuple_in_subscript: bool,
pub extend_markup_names: Vec<String>,
}
impl fmt::Display for Settings {
@ -15,7 +16,8 @@ impl fmt::Display for Settings {
formatter = f,
namespace = "linter.ruff",
fields = [
self.parenthesize_tuple_in_subscript
self.parenthesize_tuple_in_subscript,
self.extend_markup_names | array,
]
}
Ok(())

View file

@ -0,0 +1,18 @@
---
source: crates/ruff_linter/src/rules/ruff/mod.rs
---
RUF035_extend_markup_names.py:5:1: RUF035 Unsafe use of `markupsafe.Markup` detected
|
4 | content = "<script>alert('Hello, world!')</script>"
5 | Markup(f"unsafe {content}") # RUF035
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF035
6 | literal(f"unsafe {content}") # RUF035
|
RUF035_extend_markup_names.py:6:1: RUF035 Unsafe use of `webhelpers.html.literal` detected
|
4 | content = "<script>alert('Hello, world!')</script>"
5 | Markup(f"unsafe {content}") # RUF035
6 | literal(f"unsafe {content}") # RUF035
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF035
|

View file

@ -0,0 +1,10 @@
---
source: crates/ruff_linter/src/rules/ruff/mod.rs
---
RUF035_skip_early_out.py:7:1: RUF035 Unsafe use of `webhelpers.html.literal` detected
|
5 | # markupsafe or flask first.
6 | content = "<script>alert('Hello, world!')</script>"
7 | literal(f"unsafe {content}") # RUF035
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF035
|

View file

@ -0,0 +1,58 @@
---
source: crates/ruff_linter/src/rules/ruff/mod.rs
---
RUF035.py:5:1: RUF035 Unsafe use of `markupsafe.Markup` detected
|
4 | content = "<script>alert('Hello, world!')</script>"
5 | Markup(f"unsafe {content}") # RUF035
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF035
6 | flask.Markup("unsafe {}".format(content)) # RUF035
7 | Markup("safe {}").format(content)
|
RUF035.py:6:1: RUF035 Unsafe use of `flask.Markup` detected
|
4 | content = "<script>alert('Hello, world!')</script>"
5 | Markup(f"unsafe {content}") # RUF035
6 | flask.Markup("unsafe {}".format(content)) # RUF035
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF035
7 | Markup("safe {}").format(content)
8 | flask.Markup(b"safe {}", encoding='utf-8').format(content)
|
RUF035.py:10:1: RUF035 Unsafe use of `markupsafe.Markup` detected
|
8 | flask.Markup(b"safe {}", encoding='utf-8').format(content)
9 | escape(content)
10 | Markup(content) # RUF035
| ^^^^^^^^^^^^^^^ RUF035
11 | flask.Markup("unsafe %s" % content) # RUF035
12 | Markup(object="safe")
|
RUF035.py:11:1: RUF035 Unsafe use of `flask.Markup` detected
|
9 | escape(content)
10 | Markup(content) # RUF035
11 | flask.Markup("unsafe %s" % content) # RUF035
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF035
12 | Markup(object="safe")
13 | Markup(object="unsafe {}".format(content)) # Not currently detected
|
RUF035.py:17:1: RUF035 Unsafe use of `markupsafe.Markup` detected
|
15 | # NOTE: We may be able to get rid of these false positives with red-knot
16 | # if it includes comprehensive constant expression detection/evaluation.
17 | Markup("*" * 8) # RUF035 (false positive)
| ^^^^^^^^^^^^^^^ RUF035
18 | flask.Markup("hello {}".format("world")) # RUF035 (false positive)
|
RUF035.py:18:1: RUF035 Unsafe use of `flask.Markup` detected
|
16 | # if it includes comprehensive constant expression detection/evaluation.
17 | Markup("*" * 8) # RUF035 (false positive)
18 | flask.Markup("hello {}".format("world")) # RUF035 (false positive)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF035
|

View file

@ -1272,7 +1272,9 @@ impl<'a> SemanticModel<'a> {
"datetime" => self.seen.insert(Modules::DATETIME),
"django" => self.seen.insert(Modules::DJANGO),
"fastapi" => self.seen.insert(Modules::FASTAPI),
"flask" => self.seen.insert(Modules::FLASK),
"logging" => self.seen.insert(Modules::LOGGING),
"markupsafe" => self.seen.insert(Modules::MARKUPSAFE),
"mock" => self.seen.insert(Modules::MOCK),
"numpy" => self.seen.insert(Modules::NUMPY),
"os" => self.seen.insert(Modules::OS),
@ -1858,6 +1860,8 @@ bitflags! {
const ANYIO = 1 << 20;
const FASTAPI = 1 << 21;
const COPY = 1 << 22;
const MARKUPSAFE = 1 << 23;
const FLASK = 1 << 24;
}
}

View file

@ -1387,6 +1387,7 @@ impl Flake8ImportConventionsOptions {
}
}
}
#[derive(
Clone, Debug, PartialEq, Eq, Default, Serialize, Deserialize, OptionsMetadata, CombineOptions,
)]
@ -3005,6 +3006,19 @@ pub struct RuffOptions {
"#
)]
pub parenthesize_tuple_in_subscript: Option<bool>,
/// A list of additional callable names that behave like [`markupsafe.Markup`].
///
/// Expects to receive a list of fully-qualified names (e.g., `webhelpers.html.literal`, rather than
/// `literal`).
///
/// [markupsafe.Markup]: https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup
#[option(
default = "[]",
value_type = "list[str]",
example = "extend-markup-names = [\"webhelpers.html.literal\", \"my_package.Markup\"]"
)]
pub extend_markup_names: Option<Vec<String>>,
}
impl RuffOptions {
@ -3013,6 +3027,7 @@ impl RuffOptions {
parenthesize_tuple_in_subscript: self
.parenthesize_tuple_in_subscript
.unwrap_or_default(),
extend_markup_names: self.extend_markup_names.unwrap_or_default(),
}
}
}

11
ruff.schema.json generated
View file

@ -2755,6 +2755,16 @@
"RuffOptions": {
"type": "object",
"properties": {
"extend-markup-names": {
"description": "A list of additional callable names that behave like [`markupsafe.Markup`].\n\nExpects to receive a list of fully-qualified names (e.g., `webhelpers.html.literal`, rather than `literal`).\n\n[markupsafe.Markup]: https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup",
"type": [
"array",
"null"
],
"items": {
"type": "string"
}
},
"parenthesize-tuple-in-subscript": {
"description": "Whether to prefer accessing items keyed by tuples with parentheses around the tuple (see `RUF031`).",
"type": [
@ -3817,6 +3827,7 @@
"RUF032",
"RUF033",
"RUF034",
"RUF035",
"RUF1",
"RUF10",
"RUF100",