diff --git a/crates/ruff_linter/resources/test/fixtures/pandas_vet/PD011.py b/crates/ruff_linter/resources/test/fixtures/pandas_vet/PD011.py new file mode 100644 index 0000000000..bd9be741a3 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/pandas_vet/PD011.py @@ -0,0 +1,38 @@ +import pandas as pd +import numpy as np + + +def test_numpy_unique_inverse(): + unique = np.unique_inverse([1, 2, 3, 2, 1]) + result = unique.values + + +def test_numpy_unique_all(): + unique = np.unique_all([1, 2, 3, 2, 1]) + result = unique.values + + +def test_numpy_unique_counts(): + unique = np.unique_counts([1, 2, 3, 2, 1]) + result = unique.values + + +def test_numpy_typed_unique_inverse(): + from typing import TYPE_CHECKING + if TYPE_CHECKING: + from numpy.lib._arraysetops_impl import UniqueInverseResult + unique: UniqueInverseResult[np.uint64] = np.unique_inverse([1, 2, 3, 2, 1]) + result = unique.values + + +def test_simple_non_pandas(): + p = 1 + result = p.values + + +def test_pandas_dataframe_values(): + """This should trigger PD011 - pandas DataFrame .values usage""" + import pandas as pd + x = pd.DataFrame() + result = x.values + diff --git a/crates/ruff_linter/src/rules/pandas_vet/helpers.rs b/crates/ruff_linter/src/rules/pandas_vet/helpers.rs index 3b42617ce7..45d7871392 100644 --- a/crates/ruff_linter/src/rules/pandas_vet/helpers.rs +++ b/crates/ruff_linter/src/rules/pandas_vet/helpers.rs @@ -18,6 +18,7 @@ pub(super) enum Resolution { /// Test an [`Expr`] for relevance to Pandas-related operations. pub(super) fn test_expression(expr: &Expr, semantic: &SemanticModel) -> Resolution { match expr { + // Literals in the expression itself are definitely not pandas-related Expr::StringLiteral(_) | Expr::BytesLiteral(_) | Expr::NumberLiteral(_) @@ -43,6 +44,7 @@ pub(super) fn test_expression(expr: &Expr, semantic: &SemanticModel) -> Resoluti if matches!(name.id.as_str(), "self" | "cls") { Resolution::IrrelevantBinding } else { + // Function arguments are treated as relevant unless proven otherwise Resolution::RelevantLocal } } @@ -52,72 +54,62 @@ pub(super) fn test_expression(expr: &Expr, semantic: &SemanticModel) -> Resoluti | BindingKind::LoopVar | BindingKind::Global(_) | BindingKind::Nonlocal(_, _) => { - // Check if this binding comes from pandas or another relevant source + // Check if this binding comes from a definitively non-pandas source if let Some(assigned_value) = find_binding_value(binding, semantic) { - // Check if the assigned value comes from pandas - if is_pandas_related_value(assigned_value, semantic) { - Resolution::RelevantLocal - } else { - // This is a non-pandas binding (e.g., literal, numpy, etc.) - Resolution::IrrelevantBinding + // Recurse to check the assigned value + match test_expression(assigned_value, semantic) { + // If the assigned value is definitively not pandas (literals, etc.) + Resolution::IrrelevantExpression => { + Resolution::IrrelevantBinding + } + // If it's clearly pandas-related, treat as relevant + Resolution::RelevantLocal | Resolution::PandasModule => { + Resolution::RelevantLocal + } + // If we got IrrelevantBinding, it means we traced it back to a + // non-pandas source (e.g., numpy import), so keep it as irrelevant + Resolution::IrrelevantBinding => Resolution::IrrelevantBinding, } } else { - // If we can't determine the source, be conservative and treat as relevant + // If we can't determine the source, be liberal and treat as relevant + // to avoid false negatives (e.g., function parameters with annotations) Resolution::RelevantLocal } } - BindingKind::Import(import) - if matches!(import.qualified_name().segments(), ["pandas"]) => - { - Resolution::PandasModule + BindingKind::Import(import) => { + let segments = import.qualified_name().segments(); + if matches!(segments, ["pandas"]) { + Resolution::PandasModule + } else if matches!(segments, ["numpy"]) { + // Explicitly exclude numpy imports + Resolution::IrrelevantBinding + } else { + Resolution::IrrelevantBinding + } } _ => Resolution::IrrelevantBinding, } }) } + // Recurse for attribute access (e.g., df.values -> check df) + Expr::Attribute(attr) => test_expression(attr.value.as_ref(), semantic), + // Recurse for call expressions (e.g., pd.DataFrame() -> check pd) + Expr::Call(call) => { + // Check if this is a pandas function call + if let Some(qualified_name) = semantic.resolve_qualified_name(&call.func) { + let segments = qualified_name.segments(); + if segments.starts_with(&["pandas"]) { + return Resolution::RelevantLocal; + } + // Explicitly exclude numpy function calls + if segments.starts_with(&["numpy"]) || segments.starts_with(&["np"]) { + return Resolution::IrrelevantBinding; + } + } + // For other calls, recurse on the function expression + test_expression(&call.func, semantic) + } + // For other expressions, default to relevant to avoid false negatives _ => Resolution::RelevantLocal, } } - -/// Check if an expression value is related to pandas (e.g., comes from pandas module or operations). -fn is_pandas_related_value(expr: &Expr, semantic: &SemanticModel) -> bool { - match expr { - // Literals are definitely not pandas-related - Expr::StringLiteral(_) - | Expr::BytesLiteral(_) - | Expr::NumberLiteral(_) - | Expr::BooleanLiteral(_) - | Expr::NoneLiteral(_) - | Expr::EllipsisLiteral(_) - | Expr::Tuple(_) - | Expr::List(_) - | Expr::Set(_) - | Expr::Dict(_) => false, - - // Direct pandas module access - Expr::Name(name) => { - if let Some(binding_id) = semantic.resolve_name(name) { - let binding = semantic.binding(binding_id); - if let BindingKind::Import(import) = &binding.kind { - return matches!(import.qualified_name().segments(), ["pandas"]); - } - } - false - } - // Method calls on pandas objects - Expr::Attribute(attr) => { - // Check if the object being accessed is pandas-related - is_pandas_related_value(attr.value.as_ref(), semantic) - } - // Function calls - check if they're pandas functions - Expr::Call(call) => { - if let Some(qualified_name) = semantic.resolve_qualified_name(&call.func) { - return qualified_name.segments().starts_with(&["pandas"]); - } - false - } - // For other expressions, we can't easily determine if they're pandas-related - // so we return false to be conservative (treat as non-pandas) - _ => false, - } -} diff --git a/crates/ruff_linter/src/rules/pandas_vet/mod.rs b/crates/ruff_linter/src/rules/pandas_vet/mod.rs index 46c1f439f9..7e6582d30c 100644 --- a/crates/ruff_linter/src/rules/pandas_vet/mod.rs +++ b/crates/ruff_linter/src/rules/pandas_vet/mod.rs @@ -269,53 +269,6 @@ mod tests { ", "PD011_pass_node_name" )] - #[test_case( - r" - import pandas as pd - import numpy as np - unique = np.unique_inverse([1, 2, 3, 2, 1]) - result = unique.values - ", - "PD011_pass_numpy_unique_inverse" - )] - #[test_case( - r" - import pandas as pd - import numpy as np - unique = np.unique_all([1, 2, 3, 2, 1]) - result = unique.values - ", - "PD011_pass_numpy_unique_all" - )] - #[test_case( - r" - import pandas as pd - import numpy as np - unique = np.unique_counts([1, 2, 3, 2, 1]) - result = unique.values - ", - "PD011_pass_numpy_unique_counts" - )] - #[test_case( - r" - import pandas as pd - from typing import TYPE_CHECKING - if TYPE_CHECKING: - from numpy.lib._arraysetops_impl import UniqueInverseResult - import numpy as np - unique: UniqueInverseResult[np.uint64] = np.unique_inverse([1, 2, 3, 2, 1]) - result = unique.values - ", - "PD011_pass_numpy_typed_unique_inverse" - )] - #[test_case( - r" - import pandas as pd - p = 1 - result = p.values - ", - "PD011_pass_simple_non_pandas" - )] #[test_case( r#" import pandas as pd @@ -426,6 +379,7 @@ mod tests { )] #[test_case(Rule::PandasUseOfInplaceArgument, Path::new("PD002.py"))] #[test_case(Rule::PandasNuniqueConstantSeriesCheck, Path::new("PD101.py"))] + #[test_case(Rule::PandasUseOfDotValues, Path::new("PD011.py"))] fn paths(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); let diagnostics = test_path( diff --git a/crates/ruff_linter/src/rules/pandas_vet/snapshots/ruff_linter__rules__pandas_vet__tests__PD011_PD011.py.snap b/crates/ruff_linter/src/rules/pandas_vet/snapshots/ruff_linter__rules__pandas_vet__tests__PD011_PD011.py.snap new file mode 100644 index 0000000000..ccd01f2903 --- /dev/null +++ b/crates/ruff_linter/src/rules/pandas_vet/snapshots/ruff_linter__rules__pandas_vet__tests__PD011_PD011.py.snap @@ -0,0 +1,11 @@ +--- +source: crates/ruff_linter/src/rules/pandas_vet/mod.rs +--- +PD011 Use `.to_numpy()` instead of `.values` + --> PD011.py:37:14 + | +35 | import pandas as pd +36 | x = pd.DataFrame() +37 | result = x.values + | ^^^^^^^^ + |