diff --git a/Cargo.lock b/Cargo.lock index c9a244bc..479ff0b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2224,6 +2224,18 @@ dependencies = [ "slab", ] +[[package]] +name = "fuzz" +version = "0.0.0" +dependencies = [ + "harper-comments", + "harper-core", + "harper-html", + "harper-literate-haskell", + "harper-typst", + "libfuzzer-sys", +] + [[package]] name = "gemm" version = "0.17.1" @@ -3370,6 +3382,16 @@ version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +[[package]] +name = "libfuzzer-sys" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5037190e1f70cbeef565bd267599242926f724d3b8a9f510fd7e0b540cfa4404" +dependencies = [ + "arbitrary", + "cc", +] + [[package]] name = "libloading" version = "0.8.8" diff --git a/Cargo.toml b/Cargo.toml index 75ff6eea..1ac05367 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst", "harper-stats", "harper-pos-utils", "harper-brill", "harper-ink", "harper-python", "harper-jjdescription"] +members = ["harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst", "harper-stats", "harper-pos-utils", "harper-brill", "harper-ink", "harper-python", "harper-jjdescription", "fuzz"] resolver = "2" [profile.test] diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 00000000..1a45eee7 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 00000000..c3631bba --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,52 @@ +[package] +name = "fuzz" +version = "0.0.0" +publish = false +edition = "2024" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +harper-core = { path = "../harper-core" } +harper-typst = { path = "../harper-typst" } +harper-literate-haskell = { path = "../harper-literate-haskell" } +harper-html = { path = "../harper-html" } +harper-comments = { path = "../harper-comments" } + +[[bin]] +name = "fuzz_harper_typst" +path = "fuzz_targets/fuzz_harper_typst.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_harper_literate_haskell" +path = "fuzz_targets/fuzz_harper_literate_haskell.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_harper_html" +path = "fuzz_targets/fuzz_harper_html.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_harper_comment" +path = "fuzz_targets/fuzz_harper_comment.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_harper_core_markdown" +path = "fuzz_targets/fuzz_harper_core_markdown.rs" +test = false +doc = false +bench = false diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 00000000..b62cf57e --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,34 @@ +# cargo-fuzz targets + +## Setup + +Follow the rust-fuzz [setup guide](https://rust-fuzz.github.io/book/cargo-fuzz/setup.html). +You need a nightly toolchain and the cargo-fuzz plugin. + +Simple installation steps: + +- `rustup install nightly` +- `cargo install cargo-fuzz` + +## Adding a new fuzzing target + +To add a new target, run `cargo fuzz add $TARGET_NAME` + +## Doing a fuzzing run + +If possible, prefill the `fuzz/corpus/$TARGET_NAME` directory with appropriate examples to speed up fuzzing. +The fuzzer should be coverage aware, so providing a well formed input document to fuzzing targets only expecting a string as input can speed things up a lot. + +Then, run `cargo +nightly fuzz run $TARGET_NAME -- -timeout=$TIMEOUT` + +The timeout flag accepts a timeout in seconds, after which a long-running test case will be aborted. +This should be set to a low number to quickly report endless loops / deep recursion in parsers. + +The normal fuzzing run will continue until a crash is found. + +Alternatively, if you want to run all the fuzzing targets at once: `cargo +nightly fuzz list | parallel -j0 cargo +nightly fuzz run {} -- -timeout=$TIMEOUT` + +## Minifying a test case + +Once the fuzzer finds a crash, we probably want to minify the result. +This can be done with `CARGO_PROFILE_RELEASE_LTO=false cargo +nightly fuzz tmin $TARGET $TEST_CASE_PATH` diff --git a/fuzz/fuzz_targets/fuzz_harper_comment.rs b/fuzz/fuzz_targets/fuzz_harper_comment.rs new file mode 100644 index 00000000..b5904579 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_harper_comment.rs @@ -0,0 +1,76 @@ +#![no_main] + +use harper_core::parsers::{MarkdownOptions, StrParser}; +use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; +use libfuzzer_sys::fuzz_target; + +#[derive(Debug)] +struct Language(String); + +const LANGUAGES: [&str; 32] = [ + "cmake", + "cpp", + "csharp", + "c", + "dart", + "go", + "haskell", + "javascriptreact", + "javascript", + "java", + "kotlin", + "lua", + "nix", + "php", + "python", + "ruby", + "rust", + "scala", + "shellscript", + "solidity", + "swift", + "toml", + "typescriptreact", + "typescript", + "clojure", + "go", + "lua", + "java", + "javascriptreact", + "typescript", + "typescriptreact", + "solidity", +]; + +impl<'a> Arbitrary<'a> for Language { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let &lang = u.choose(&LANGUAGES)?; + Ok(Language(lang.to_owned())) + } +} + +#[derive(Debug)] +struct Input { + language: Language, + text: String, +} + +impl<'a> Arbitrary<'a> for Input { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let (language, text) = Arbitrary::arbitrary(u)?; + Ok(Input { language, text }) + } + + fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { + let (language, text) = Arbitrary::arbitrary_take_rest(u)?; + Ok(Input { language, text }) + } +} + +fuzz_target!(|data: Input| { + let opts = MarkdownOptions::default(); + let parser = harper_comments::CommentParser::new_from_language_id(&data.language.0, opts); + if let Some(parser) = parser { + let _res = parser.parse_str(&data.text); + } +}); diff --git a/fuzz/fuzz_targets/fuzz_harper_core_markdown.rs b/fuzz/fuzz_targets/fuzz_harper_core_markdown.rs new file mode 100644 index 00000000..596a269d --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_harper_core_markdown.rs @@ -0,0 +1,10 @@ +#![no_main] + +use harper_core::parsers::{Markdown, MarkdownOptions, StrParser}; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &str| { + let opts = MarkdownOptions::default(); + let parser = Markdown::new(opts); + let _res = parser.parse_str(data); +}); diff --git a/fuzz/fuzz_targets/fuzz_harper_html.rs b/fuzz/fuzz_targets/fuzz_harper_html.rs new file mode 100644 index 00000000..b0f11cf6 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_harper_html.rs @@ -0,0 +1,9 @@ +#![no_main] + +use harper_core::parsers::StrParser; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &str| { + let parser = harper_html::HtmlParser::default(); + let _res = parser.parse_str(data); +}); diff --git a/fuzz/fuzz_targets/fuzz_harper_literate_haskell.rs b/fuzz/fuzz_targets/fuzz_harper_literate_haskell.rs new file mode 100644 index 00000000..ea3df937 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_harper_literate_haskell.rs @@ -0,0 +1,9 @@ +#![no_main] + +// use harper_core::parsers::StrParser; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|_data: &str| { + // TODO: figure out how to create a literate haskell parser + // let _res = typst.parse_str(&data); +}); diff --git a/fuzz/fuzz_targets/fuzz_harper_typst.rs b/fuzz/fuzz_targets/fuzz_harper_typst.rs new file mode 100644 index 00000000..166282c6 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_harper_typst.rs @@ -0,0 +1,9 @@ +#![no_main] + +use harper_core::parsers::StrParser; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &str| { + let typst = harper_typst::Typst; + let _res = typst.parse_str(data); +});