fix: convert html to markdown in docs (#66)

Documentation for builtin starlark methods contains HTML tags, while LSP
clients handle markdown much easier. Used htmd crate to convert HTML
tags to markdown, and additionally change links from relative to
pointing to bazel website.

Fixes https://github.com/cameron-martin/bazel-lsp/issues/65
This commit is contained in:
Grzegorz Lukasik 2024-12-13 00:30:07 +01:00 committed by GitHub
parent 860f9240e4
commit bf9d1c7de2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 299 additions and 22 deletions

1
BUILD
View file

@ -20,6 +20,7 @@ rust_binary(
"@crates//:prost",
"@crates//:either",
"@crates//:hex",
"@crates//:htmd",
"@crates//:lsp-types",
"@crates//:ring",
"@crates//:serde_json",

160
Cargo.lock generated
View file

@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
version = 4
[[package]]
name = "Inflector"
@ -262,6 +262,7 @@ dependencies = [
"clap",
"either",
"hex",
"htmd",
"itertools 0.12.1",
"lsp-types",
"prost 0.12.4",
@ -637,6 +638,16 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "futf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]]
name = "futures-channel"
version = "0.3.30"
@ -770,6 +781,30 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "htmd"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad1642def6e8e4dc182941f35454f7d2af917787f91f3f5133300030b41006d0"
dependencies = [
"html5ever",
"markup5ever_rcdom",
]
[[package]]
name = "html5ever"
version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
dependencies = [
"log",
"mac",
"markup5ever",
"proc-macro2",
"quote",
"syn 2.0.59",
]
[[package]]
name = "http"
version = "0.2.12"
@ -1049,12 +1084,44 @@ dependencies = [
"url",
]
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "maplit"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]]
name = "markup5ever"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
dependencies = [
"log",
"phf",
"phf_codegen",
"string_cache",
"string_cache_codegen",
"tendril",
]
[[package]]
name = "markup5ever_rcdom"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18"
dependencies = [
"html5ever",
"markup5ever",
"tendril",
"xml5ever",
]
[[package]]
name = "matchit"
version = "0.7.3"
@ -1224,6 +1291,45 @@ dependencies = [
"indexmap 2.2.6",
]
[[package]]
name = "phf"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
dependencies = [
"phf_shared 0.11.2",
]
[[package]]
name = "phf_codegen"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
dependencies = [
"phf_generator 0.11.2",
"phf_shared 0.11.2",
]
[[package]]
name = "phf_generator"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [
"phf_shared 0.10.0",
"rand",
]
[[package]]
name = "phf_generator"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
dependencies = [
"phf_shared 0.11.2",
"rand",
]
[[package]]
name = "phf_shared"
version = "0.10.0"
@ -1233,6 +1339,15 @@ dependencies = [
"siphasher",
]
[[package]]
name = "phf_shared"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b"
dependencies = [
"siphasher",
]
[[package]]
name = "pin-project"
version = "1.1.5"
@ -1867,8 +1982,21 @@ dependencies = [
"new_debug_unreachable",
"once_cell",
"parking_lot",
"phf_shared",
"phf_shared 0.10.0",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988"
dependencies = [
"phf_generator 0.10.0",
"phf_shared 0.10.0",
"proc-macro2",
"quote",
]
[[package]]
@ -1923,6 +2051,17 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "tendril"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]]
name = "term"
version = "0.7.0"
@ -2203,6 +2342,12 @@ dependencies = [
"serde",
]
[[package]]
name = "utf-8"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf8parse"
version = "0.2.1"
@ -2403,6 +2548,17 @@ version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
[[package]]
name = "xml5ever"
version = "0.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bbb26405d8e919bc1547a5aa9abc95cbfa438f04844f5fdd9dc7596b748bf69"
dependencies = [
"log",
"mac",
"markup5ever",
]
[[package]]
name = "zerocopy"
version = "0.7.32"

View file

@ -11,6 +11,7 @@ cc = "1.1.13"
clap = { version = "4.4.18", features = ["derive"] }
either = "1.9.0"
hex = "0.4.3"
htmd = "0.1.6"
itertools = "0.12.0"
lsp-types = "0.94.1"
prost = "0.12.3"

View file

@ -1103,28 +1103,85 @@ mod tests {
Ok(())
}
#[test]
fn test_environment_function_arguments() -> anyhow::Result<()> {
let fixture = TestFixture::new("simple")?;
let context = fixture.context()?;
fn get_function_doc(file_path: &str, function_name: &str) -> DocFunction {
let fixture = TestFixture::new("simple").unwrap();
let context = fixture.context().unwrap();
let module = context.get_environment(&LspUrl::File(PathBuf::from("/foo/bar/BUILD")));
let module = context.get_environment(&LspUrl::File(PathBuf::from(file_path)));
fn doc_function<'a>(module: &'a DocModule, func_name: &str) -> &'a DocFunction {
let (_, glob_member) = module
let (_, function) = module
.members
.iter()
.find(|(member, _)| *member == func_name)
.find(|(member, _)| *member == function_name)
.unwrap();
match glob_member {
DocItem::Member(DocMember::Function(f)) => f,
match function {
DocItem::Member(DocMember::Function(f)) => f.clone(),
_ => panic!(),
}
}
#[test]
fn test_function_doc_code_block() -> anyhow::Result<()> {
let doc = get_function_doc("/foo/bar/sample.bzl", "hasattr");
assert_eq!(
doc_function(&module, "glob").params.pos_or_named,
doc.docs.clone().unwrap().summary,
"Returns True if the object `x` has an attribute or method of the given `name`, otherwise False. Example: \n```python\nhasattr(ctx.attr, \"myattr\")\n```"
);
Ok(())
}
#[test]
fn test_function_doc_links() -> anyhow::Result<()> {
// select doc contains both page relative links (#) and absolute links (/).
let select_doc = get_function_doc("sample.bzl", "select");
assert_eq!(
select_doc.docs.clone().unwrap().summary,
"`select()` is the helper function that makes a rule attribute configurable. See [build encyclopedia](https://bazel.build/reference/be/functions#select) for details."
);
assert_eq!(
select_doc.params.pos_or_named,
vec![
DocParam {
name: "x".into(),
default_value: None,
docs: Some(DocString {
summary: "A dict that maps configuration conditions to values. Each key is a [Label](https://bazel.build/rules/lib/builtins/Label.html) or a label string that identifies a config\\_setting or constraint\\_value instance. See the [documentation on macros](https://bazel.build/rules/macros#label-resolution) for when to use a Label instead of a string.".into(),
details: None,
}),
typ: Ty::any(),
},
DocParam {
name: "no_match_error".into(),
default_value: Some("''".into()),
docs: Some(DocString {
summary: "Optional custom error to report if no condition matches.".into(),
details: None,
}),
typ: Ty::any(),
},
]
);
// aspect contains absolute link.
let aspect_doc = get_function_doc("sample.bzl", "aspect");
assert_eq!(
aspect_doc.docs.clone().unwrap().summary,
"Creates a new aspect. The result of this function must be stored in a global value. Please see the [introduction to Aspects](https://bazel.build/rules/aspects) for more details."
);
Ok(())
}
#[test]
fn test_function_doc_params() -> anyhow::Result<()> {
let glob_doc = get_function_doc("/foo/bar/BUILD", "glob");
assert_eq!(
glob_doc.params.pos_or_named,
vec![
DocParam {
name: "include".into(),
@ -1166,8 +1223,13 @@ mod tests {
]
);
Ok(())
}
#[test]
fn test_function_doc_args_kwargs() -> anyhow::Result<()> {
assert_eq!(
doc_function(&module, "max").params.args,
get_function_doc("BUILD", "max").params.args,
Some(DocParam {
name: "args".into(),
default_value: None,
@ -1180,7 +1242,7 @@ mod tests {
);
assert_eq!(
doc_function(&module, "dict").params.kwargs,
get_function_doc("BUILD", "dict").params.kwargs,
Some(DocParam {
name: "kwargs".into(),
default_value: None,

View file

@ -1,5 +1,8 @@
use std::sync::LazyLock;
pub use build_proto::blaze_query::*;
pub use builtin_proto::builtin::*;
use htmd::{Element, HtmlToMarkdown};
use starlark::{
docs::{DocFunction, DocMember, DocParam, DocParams, DocProperty, DocString},
typing::Ty,
@ -46,6 +49,45 @@ pub static MISSING_GLOBALS: &'static [&'static str] = &[
"distribs",
];
static HTML_CONVERTER: LazyLock<htmd::HtmlToMarkdown> = LazyLock::new(|| {
HtmlToMarkdown::builder()
.add_handler(vec!["pre"], |element: Element| {
for attr in element.attrs {
if &attr.name.local == "class" && attr.value.to_string() == "language-python" {
return Some(format!("\n```python\n{}\n```\n", element.content));
}
}
Some(element.content.to_string())
})
.add_handler(vec!["a"], |element: Element| {
for attr in element.attrs {
if &attr.name.local == "href" {
// For local links, just remove link altogether.
if attr.value.starts_with("#") {
return Some(element.content.to_string());
}
// For relative links, guess the page it points to.
let link = if attr.value.starts_with("/") {
format!("https://bazel.build{}", attr.value.to_string())
} else if attr.value.starts_with("../") {
format!(
"https://bazel.build/rules/lib/{}",
attr.value.strip_prefix("../").unwrap()
)
} else {
// For absolute links, just use the link.
attr.value.to_string()
};
return Some(format!("[{}]({})", element.content.to_string(), link));
}
}
Some(element.content.to_string())
})
.build()
});
pub fn build_language_to_doc_members<'a>(
build_language: &'a BuildLanguage,
) -> impl Iterator<Item = (String, DocMember)> + 'a {
@ -98,7 +140,7 @@ pub fn builtins_to_doc_members<'a>(
}
fn value_to_doc_member(value: &Value) -> DocMember {
let docs = create_docstring(&value.doc);
let docs = create_docstring_for_possible_html(&value.doc);
if let Some(callable) = &value.callable {
let mut params = DocParams {
@ -117,7 +159,7 @@ fn value_to_doc_member(value: &Value) -> DocMember {
let doc_param = DocParam {
name,
docs: create_docstring(&param.doc),
docs: create_docstring_for_possible_html(&param.doc),
typ: Ty::any(),
default_value: if param.is_mandatory {
None
@ -163,3 +205,18 @@ fn create_docstring(summary: &str) -> Option<DocString> {
})
}
}
fn create_docstring_for_possible_html(html: &str) -> Option<DocString> {
// Some documentation is using markdown, use simple heuristic to check whether need to convert
// from html to markdown.
let markdown = if str::contains(html, "<") {
match HTML_CONVERTER.convert(html).ok() {
Some(markdown) => markdown,
None => html.to_string(),
}
} else {
html.to_string()
};
create_docstring(&markdown)
}