mirror of
https://github.com/zizmorcore/zizmor.git
synced 2025-12-23 08:47:33 +00:00
feat: track anchors in yamlpath (#1263)
This commit is contained in:
parent
32fdf28173
commit
c46558e9a3
9 changed files with 325 additions and 12 deletions
1
.github/workflows/release-support-crate.yml
vendored
1
.github/workflows/release-support-crate.yml
vendored
|
|
@ -9,6 +9,7 @@ on:
|
|||
- "subfeature/v*"
|
||||
- "yamlpath/v*"
|
||||
- "yamlpatch/v*"
|
||||
- "tree-sitter-iter/v*"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
package-name:
|
||||
|
|
|
|||
16
Cargo.lock
generated
16
Cargo.lock
generated
|
|
@ -2367,6 +2367,12 @@ version = "1.2.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "self_cell"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0f7d95a54511e0c7be3f51e8867aa8cf35148d7b9445d44de2f943e2b206e749"
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.27"
|
||||
|
|
@ -3165,6 +3171,14 @@ dependencies = [
|
|||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-iter"
|
||||
version = "0.0.2"
|
||||
dependencies = [
|
||||
"tree-sitter",
|
||||
"tree-sitter-yaml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-language"
|
||||
version = "0.1.5"
|
||||
|
|
@ -3857,10 +3871,12 @@ name = "yamlpath"
|
|||
version = "0.25.0"
|
||||
dependencies = [
|
||||
"line-index",
|
||||
"self_cell",
|
||||
"serde",
|
||||
"serde_yaml",
|
||||
"thiserror 2.0.17",
|
||||
"tree-sitter",
|
||||
"tree-sitter-iter",
|
||||
"tree-sitter-yaml",
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ members = [
|
|||
"crates/github-actions-expressions",
|
||||
"crates/github-actions-models",
|
||||
"crates/subfeature",
|
||||
"crates/tree-sitter-iter",
|
||||
"crates/yamlpatch",
|
||||
"crates/yamlpath",
|
||||
"crates/zizmor",
|
||||
|
|
@ -53,6 +54,7 @@ owo-colors = "4.2.3"
|
|||
regex = "1.11.3"
|
||||
reqwest = { version = "0.12.23", default-features = false }
|
||||
reqwest-middleware = "0.4.2"
|
||||
self_cell = "1"
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde-sarif = "0.8.0"
|
||||
serde_json = "1.0.145"
|
||||
|
|
@ -69,6 +71,7 @@ tracing-indicatif = "0.3.13"
|
|||
tracing-subscriber = "0.3.20"
|
||||
tree-sitter = "0.25.10"
|
||||
tree-sitter-bash = "0.23.3"
|
||||
tree-sitter-iter = { path = "crates/tree-sitter-iter", version = "0.0.2" }
|
||||
tree-sitter-powershell = "0.25.9"
|
||||
yamlpath = { path = "crates/yamlpath", version = "0.25.0" }
|
||||
yamlpatch = { path = "crates/yamlpatch", version = "0.3.0" }
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ See the table and each subdirectory for more details on each crate.
|
|||
| [`yamlpatch`][yamlpath-dir] | [][yamlpath-crates] | [][yamlpath-docs] | Comment and format-preserving YAML patch operations. |
|
||||
| [`github-actions-models`][github-actions-models-dir] | [][github-actions-models-crates] | [][github-actions-models-docs] | Unofficial, high-quality data models for GitHub Actions workflows, actions, and related components. |
|
||||
| [`github-actions-expressions`][github-actions-expressions-dir] | [][github-actions-expressions-crates] | [][github-actions-expressions-docs] | Parser and library for GitHub Actions expressions. |
|
||||
| [`tree-sitter-iter`][tree-sitter-iter-dir] | [][tree-sitter-iter-crates] | [][tree-sitter-iter-docs] | A very simple pre-order iterator for tree-sitter CSTs. |
|
||||
|
||||
[zizmor-dir]: ./zizmor
|
||||
[zizmor-crates]: https://crates.io/crates/zizmor
|
||||
|
|
@ -35,3 +36,7 @@ See the table and each subdirectory for more details on each crate.
|
|||
[github-actions-expressions-dir]: ./github-actions-expressions
|
||||
[github-actions-expressions-crates]: https://crates.io/crates/github-actions-expressions
|
||||
[github-actions-expressions-docs]: https://docs.rs/github-actions-expressions
|
||||
|
||||
[tree-sitter-iter-dir]: ./tree-sitter-iter
|
||||
[tree-sitter-iter-crates]: https://crates.io/crates/tree-sitter-iter
|
||||
[tree-sitter-iter-docs]: https://docs.rs/tree-sitter-iter
|
||||
|
|
|
|||
18
crates/tree-sitter-iter/Cargo.toml
Normal file
18
crates/tree-sitter-iter/Cargo.toml
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
[package]
|
||||
name = "tree-sitter-iter"
|
||||
description = "A very simple pre-order iterator for tree-sitter CSTs"
|
||||
version = "0.0.2"
|
||||
authors.workspace = true
|
||||
homepage.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tree-sitter.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
tree-sitter-yaml = { workspace = true }
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
49
crates/tree-sitter-iter/README.md
Normal file
49
crates/tree-sitter-iter/README.md
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
# tree-sitter-iter
|
||||
|
||||
[](https://zizmor.sh/)
|
||||
[](https://github.com/zizmorcore/zizmor/actions/workflows/ci.yml)
|
||||
[](https://crates.io/crates/tree-sitter-iter)
|
||||
[](https://docs.rs/tree-sitter-iter)
|
||||
[](https://github.com/sponsors/woodruffw)
|
||||
[](https://discord.com/invite/PGU3zGZuGG)
|
||||
|
||||
A very simple pre-order iterator for tree-sitter CSTs.
|
||||
|
||||
This library is part of [zizmor].
|
||||
|
||||
## Usage
|
||||
|
||||
Given a `tree_sitter::Tree`, you can create a `TreeIter` to iterate
|
||||
over its nodes in pre-order:
|
||||
|
||||
```rust
|
||||
use tree_sitter_iter::TreeIter;
|
||||
|
||||
let tree: tree_sitter::Tree = parse(); // Your parsing logic here.
|
||||
|
||||
for node in TreeIter::new(&tree) {
|
||||
println!("Node kind: {}", node.kind());
|
||||
}
|
||||
```
|
||||
|
||||
`TreeIter` implements the standard `Iterator` trait, meaning that
|
||||
you can use any of the normal iterator combinators. For example, to
|
||||
filter only to nodes of a specific kind:
|
||||
|
||||
```rust
|
||||
for node in TreeIter::new(&tree).filter(|n| n.kind() == "call") {
|
||||
// Do something with each "call" node.
|
||||
}
|
||||
```
|
||||
|
||||
`tree-sitter-iter`'s space and time performance is equivalent to a
|
||||
walk of the tree using the `TreeCursor` APIs. In other words, it's
|
||||
exactly the same as using a `TreeCursor` manually, but with a more ergonomic
|
||||
iterator interface.
|
||||
|
||||
|
||||
See the [documentation] for more details.
|
||||
|
||||
[documentation]: https://docs.rs/tree-sitter-iter
|
||||
[zizmor]: https://zizmor.sh
|
||||
|
||||
102
crates/tree-sitter-iter/src/lib.rs
Normal file
102
crates/tree-sitter-iter/src/lib.rs
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
//! A very simple pre-order iterator for tree-sitter CSTs.
|
||||
|
||||
#![deny(rustdoc::broken_intra_doc_links)]
|
||||
#![deny(missing_docs)]
|
||||
#![allow(clippy::redundant_field_names)]
|
||||
#![forbid(unsafe_code)]
|
||||
|
||||
use tree_sitter::{Node, Tree, TreeCursor};
|
||||
|
||||
/// A pre-order iterator over the nodes of a tree-sitter syntax tree.
|
||||
pub struct TreeIter<'tree> {
|
||||
cursor: Option<TreeCursor<'tree>>,
|
||||
}
|
||||
|
||||
impl<'tree> TreeIter<'tree> {
|
||||
/// Creates a new `TreeSitterIter` for the given syntax tree.
|
||||
pub fn new(tree: &'tree Tree) -> Self {
|
||||
Self {
|
||||
cursor: Some(tree.root_node().walk()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'tree> Iterator for TreeIter<'tree> {
|
||||
type Item = Node<'tree>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let cursor = match &mut self.cursor {
|
||||
Some(cursor) => cursor,
|
||||
None => return None,
|
||||
};
|
||||
|
||||
let node = cursor.node();
|
||||
|
||||
if cursor.goto_first_child() || cursor.goto_next_sibling() {
|
||||
return Some(node);
|
||||
}
|
||||
|
||||
loop {
|
||||
if !cursor.goto_parent() {
|
||||
// If we can't go to the parent, the walk will be
|
||||
// complete *after* the current node.
|
||||
self.cursor = None;
|
||||
break;
|
||||
}
|
||||
|
||||
if cursor.goto_next_sibling() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Some(node)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn test_iter_is_total() {
|
||||
let anchors = r#"
|
||||
jobs:
|
||||
job1:
|
||||
env: &env_vars # Define the anchor on first use
|
||||
NODE_ENV: production
|
||||
DATABASE_URL: ${{ secrets.DATABASE_URL }}
|
||||
steps:
|
||||
- run: echo "Using production settings"
|
||||
|
||||
job2:
|
||||
env: *env_vars # Reuse the environment variables
|
||||
steps:
|
||||
- run: echo "Same environment variables here"
|
||||
"#;
|
||||
|
||||
// NOTE(ww): These node counts will probably change if
|
||||
// tree-sitter-yaml changes its node structure. Hopefully
|
||||
// that doesn't happen often.
|
||||
let testcases = &[
|
||||
("foo:", 9),
|
||||
("foo: # comment", 10),
|
||||
("foo: bar", 12),
|
||||
("foo: bar # comment", 13),
|
||||
("foo: []", 13),
|
||||
("foo: [] # comment", 14),
|
||||
(anchors, 100),
|
||||
];
|
||||
|
||||
for (src, expected_count) in testcases {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_yaml::LANGUAGE.into())
|
||||
.expect("Error loading YAML grammar");
|
||||
let tree = parser.parse(src, None).expect("Failed to parse source");
|
||||
|
||||
let node_count = tree.root_node().descendant_count();
|
||||
let iter_count = super::TreeIter::new(&tree).count();
|
||||
|
||||
assert_eq!(node_count, *expected_count);
|
||||
assert_eq!(node_count, iter_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -16,9 +16,11 @@ workspace = true
|
|||
|
||||
[dependencies]
|
||||
line-index.workspace = true
|
||||
self_cell.workspace = true
|
||||
serde.workspace = true
|
||||
thiserror.workspace = true
|
||||
tree-sitter.workspace = true
|
||||
tree-sitter-iter = { workspace = true }
|
||||
tree-sitter-yaml = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
|||
|
|
@ -13,10 +13,13 @@
|
|||
#![allow(clippy::redundant_field_names)]
|
||||
#![forbid(unsafe_code)]
|
||||
|
||||
use std::{collections::HashMap, ops::Deref};
|
||||
|
||||
use line_index::LineIndex;
|
||||
use serde::Serialize;
|
||||
use thiserror::Error;
|
||||
use tree_sitter::{Language, Node, Parser, Tree};
|
||||
use tree_sitter::{Language, Node, Parser};
|
||||
use tree_sitter_iter::TreeIter;
|
||||
|
||||
/// Possible errors when performing YAML path routes.
|
||||
#[derive(Error, Debug)]
|
||||
|
|
@ -49,6 +52,11 @@ pub enum QueryError {
|
|||
/// the given field name.
|
||||
#[error("syntax node `{0}` is missing child field `{1}`")]
|
||||
MissingChildField(String, &'static str),
|
||||
/// The input contains a duplicate YAML anchor.
|
||||
/// This is valid YAML, but we intentionally forbid it for now
|
||||
/// for simplicity's sake.
|
||||
#[error("input contains duplicate YAML anchor: `{0}`")]
|
||||
DuplicateAnchor(String),
|
||||
/// Any other route error that doesn't fit cleanly above.
|
||||
#[error("route error: {0}")]
|
||||
Other(String),
|
||||
|
|
@ -272,10 +280,80 @@ enum QueryMode {
|
|||
Exact,
|
||||
}
|
||||
|
||||
/// A holder type so that we can associate both source and node references
|
||||
/// with the same lifetime for [`self_cell`].
|
||||
#[derive(Clone)]
|
||||
struct SourceTree {
|
||||
source: String,
|
||||
tree: tree_sitter::Tree,
|
||||
}
|
||||
|
||||
impl Deref for SourceTree {
|
||||
type Target = tree_sitter::Tree;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.tree
|
||||
}
|
||||
}
|
||||
|
||||
type AnchorMap<'tree> = HashMap<&'tree str, Node<'tree>>;
|
||||
|
||||
self_cell::self_cell!(
|
||||
/// A wrapper for a [`SourceTree`] that also contains a computed
|
||||
/// anchor map.
|
||||
struct Tree {
|
||||
owner: SourceTree,
|
||||
|
||||
#[covariant]
|
||||
dependent: AnchorMap,
|
||||
}
|
||||
);
|
||||
|
||||
impl Tree {
|
||||
fn build(inner: SourceTree) -> Result<Self, QueryError> {
|
||||
Tree::try_new(SourceTree::clone(&inner), |tree| {
|
||||
let mut anchor_map = HashMap::new();
|
||||
|
||||
for anchor in TreeIter::new(tree).filter(|n| n.kind() == "anchor") {
|
||||
let anchor_name = anchor.utf8_text(tree.source.as_bytes()).unwrap();
|
||||
|
||||
// Only insert if the anchor name is unique.
|
||||
if anchor_map.contains_key(&anchor_name[1..]) {
|
||||
return Err(QueryError::DuplicateAnchor(anchor_name[1..].to_string()));
|
||||
}
|
||||
|
||||
// NOTE(ww): We could poke into the `anchor_name` child
|
||||
// instead of slicing, but this is simpler.
|
||||
anchor_map.insert(&anchor_name[1..], anchor.parent().unwrap());
|
||||
}
|
||||
|
||||
Ok(anchor_map)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Tree {
|
||||
fn clone(&self) -> Self {
|
||||
// Cloning is mildly annoying: we can clone the tree itself,
|
||||
// but we need to reconstruct the anchor map from scratch since
|
||||
// it borrows from the tree.
|
||||
// TODO: Can we do better here?
|
||||
// Unwrap safety: we're cloning from an existing valid owner.
|
||||
Self::build(self.borrow_owner().clone()).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Tree {
|
||||
type Target = tree_sitter::Tree;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.borrow_owner().tree
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a queryable YAML document.
|
||||
#[derive(Clone)]
|
||||
pub struct Document {
|
||||
source: String,
|
||||
tree: Tree,
|
||||
line_index: LineIndex,
|
||||
document_id: u16,
|
||||
|
|
@ -313,9 +391,16 @@ impl Document {
|
|||
|
||||
let line_index = LineIndex::new(&source);
|
||||
|
||||
Ok(Self {
|
||||
source,
|
||||
let source_tree = SourceTree {
|
||||
source: source,
|
||||
tree,
|
||||
};
|
||||
|
||||
// let anchor_id = language.id_for_node_kind("anchor", true);
|
||||
// let alias_id = language.id_for_node_kind("alias", true);
|
||||
|
||||
Ok(Self {
|
||||
tree: Tree::build(source_tree)?,
|
||||
line_index,
|
||||
document_id: language.id_for_node_kind("document", true),
|
||||
block_node_id: language.id_for_node_kind("block_node", true),
|
||||
|
|
@ -340,7 +425,7 @@ impl Document {
|
|||
/// Return a view of the original YAML source that this document was
|
||||
/// loaded from.
|
||||
pub fn source(&self) -> &str {
|
||||
&self.source
|
||||
&self.tree.borrow_owner().source
|
||||
}
|
||||
|
||||
/// Returns a [`Feature`] for the topmost semantic object in this document.
|
||||
|
|
@ -443,7 +528,7 @@ impl Document {
|
|||
///
|
||||
/// Panics if the feature's span is invalid.
|
||||
pub fn extract(&self, feature: &Feature) -> &str {
|
||||
&self.source[feature.location.byte_span.0..feature.location.byte_span.1]
|
||||
&self.source()[feature.location.byte_span.0..feature.location.byte_span.1]
|
||||
}
|
||||
|
||||
/// Returns a string slice of the original document corresponding to the given
|
||||
|
|
@ -458,11 +543,11 @@ impl Document {
|
|||
/// Panics if the feature's span is invalid.
|
||||
pub fn extract_with_leading_whitespace<'a>(&'a self, feature: &Feature) -> &'a str {
|
||||
let mut start_idx = feature.location.byte_span.0;
|
||||
let pre_slice = &self.source[0..start_idx];
|
||||
let pre_slice = &self.source()[0..start_idx];
|
||||
if let Some(last_newline) = pre_slice.rfind('\n') {
|
||||
// If everything between the last newline and the start_index
|
||||
// is ASCII spaces, then we include it.
|
||||
if self.source[last_newline + 1..start_idx]
|
||||
if self.source()[last_newline + 1..start_idx]
|
||||
.bytes()
|
||||
.all(|b| b == b' ')
|
||||
{
|
||||
|
|
@ -470,7 +555,7 @@ impl Document {
|
|||
}
|
||||
}
|
||||
|
||||
&self.source[start_idx..feature.location.byte_span.1]
|
||||
&self.source()[start_idx..feature.location.byte_span.1]
|
||||
}
|
||||
|
||||
/// Given a [`Feature`], return all comments that span the same range
|
||||
|
|
@ -541,6 +626,11 @@ impl Document {
|
|||
)
|
||||
}
|
||||
|
||||
/// Returns whether this document contains any YAML anchors.
|
||||
pub fn has_anchors(&self) -> bool {
|
||||
!self.tree.borrow_dependent().is_empty()
|
||||
}
|
||||
|
||||
/// Returns the topmost semantic object in the YAML document,
|
||||
/// i.e. the node corresponding to the first block or flow feature.
|
||||
fn top_object(&self) -> Result<Node<'_>, QueryError> {
|
||||
|
|
@ -697,7 +787,7 @@ impl Document {
|
|||
// NOTE: text unwraps are infallible, since our document is UTF-8.
|
||||
let key_value = match key.named_child(0) {
|
||||
Some(scalar) => {
|
||||
let key_value = scalar.utf8_text(self.source.as_bytes()).unwrap();
|
||||
let key_value = scalar.utf8_text(self.source().as_bytes()).unwrap();
|
||||
|
||||
match scalar.kind() {
|
||||
"single_quote_scalar" | "double_quote_scalar" => {
|
||||
|
|
@ -709,7 +799,7 @@ impl Document {
|
|||
_ => key_value,
|
||||
}
|
||||
}
|
||||
None => key.utf8_text(self.source.as_bytes()).unwrap(),
|
||||
None => key.utf8_text(self.source().as_bytes()).unwrap(),
|
||||
};
|
||||
|
||||
if key_value == expected {
|
||||
|
|
@ -768,7 +858,7 @@ impl Document {
|
|||
mod tests {
|
||||
use std::vec;
|
||||
|
||||
use crate::{Component, Document, FeatureKind, Route};
|
||||
use crate::{Component, Document, FeatureKind, QueryError, Route};
|
||||
|
||||
#[test]
|
||||
fn test_query_parent() {
|
||||
|
|
@ -1068,4 +1158,31 @@ nested:
|
|||
assert_eq!(feature.kind(), *expected_kind);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reject_duplicate_anchors() {
|
||||
let anchors = r#"
|
||||
foo: &dup-anchor bar
|
||||
baz: &dup-anchor quux
|
||||
"#;
|
||||
|
||||
let result = Document::new(anchors);
|
||||
assert!(matches!(result, Err(QueryError::DuplicateAnchor(_))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_anchor_map() {
|
||||
let anchors = r#"
|
||||
foo: &foo-anchor
|
||||
bar: &bar-anchor
|
||||
baz: quux
|
||||
"#;
|
||||
|
||||
let doc = Document::new(anchors).unwrap();
|
||||
let anchor_map = doc.tree.borrow_dependent();
|
||||
|
||||
assert_eq!(anchor_map.len(), 2);
|
||||
assert_eq!(anchor_map["foo-anchor"].kind(), "block_node");
|
||||
assert_eq!(anchor_map["bar-anchor"].kind(), "block_node");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue