Move text-edit into ide-db

This commit is contained in:
Lukas Wirth 2024-10-27 12:23:10 +01:00
parent 80e9d014be
commit 64f56f458f
63 changed files with 684 additions and 707 deletions

View file

@ -27,7 +27,6 @@ ra-ap-rustc_lexer.workspace = true
parser.workspace = true
stdx.workspace = true
text-edit.workspace = true
[dev-dependencies]
rayon.workspace = true

View file

@ -1,11 +1,6 @@
//! Collection of assorted algorithms for syntax trees.
use std::hash::BuildHasherDefault;
use indexmap::IndexMap;
use itertools::Itertools;
use rustc_hash::FxHashMap;
use text_edit::TextEditBuilder;
use crate::{
AstNode, Direction, NodeOrToken, SyntaxElement, SyntaxKind, SyntaxNode, SyntaxToken, TextRange,
@ -101,559 +96,3 @@ pub fn neighbor<T: AstNode>(me: &T, direction: Direction) -> Option<T> {
pub fn has_errors(node: &SyntaxNode) -> bool {
node.children().any(|it| it.kind() == SyntaxKind::ERROR)
}
type FxIndexMap<K, V> = IndexMap<K, V, BuildHasherDefault<rustc_hash::FxHasher>>;
#[derive(Debug, Hash, PartialEq, Eq)]
enum TreeDiffInsertPos {
After(SyntaxElement),
AsFirstChild(SyntaxElement),
}
#[derive(Debug)]
pub struct TreeDiff {
replacements: FxHashMap<SyntaxElement, SyntaxElement>,
deletions: Vec<SyntaxElement>,
// the vec as well as the indexmap are both here to preserve order
insertions: FxIndexMap<TreeDiffInsertPos, Vec<SyntaxElement>>,
}
impl TreeDiff {
pub fn into_text_edit(&self, builder: &mut TextEditBuilder) {
let _p = tracing::info_span!("into_text_edit").entered();
for (anchor, to) in &self.insertions {
let offset = match anchor {
TreeDiffInsertPos::After(it) => it.text_range().end(),
TreeDiffInsertPos::AsFirstChild(it) => it.text_range().start(),
};
to.iter().for_each(|to| builder.insert(offset, to.to_string()));
}
for (from, to) in &self.replacements {
builder.replace(from.text_range(), to.to_string());
}
for text_range in self.deletions.iter().map(SyntaxElement::text_range) {
builder.delete(text_range);
}
}
pub fn is_empty(&self) -> bool {
self.replacements.is_empty() && self.deletions.is_empty() && self.insertions.is_empty()
}
}
/// Finds a (potentially minimal) diff, which, applied to `from`, will result in `to`.
///
/// Specifically, returns a structure that consists of a replacements, insertions and deletions
/// such that applying this map on `from` will result in `to`.
///
/// This function tries to find a fine-grained diff.
pub fn diff(from: &SyntaxNode, to: &SyntaxNode) -> TreeDiff {
let _p = tracing::info_span!("diff").entered();
let mut diff = TreeDiff {
replacements: FxHashMap::default(),
insertions: FxIndexMap::default(),
deletions: Vec::new(),
};
let (from, to) = (from.clone().into(), to.clone().into());
if !syntax_element_eq(&from, &to) {
go(&mut diff, from, to);
}
return diff;
fn syntax_element_eq(lhs: &SyntaxElement, rhs: &SyntaxElement) -> bool {
lhs.kind() == rhs.kind()
&& lhs.text_range().len() == rhs.text_range().len()
&& match (&lhs, &rhs) {
(NodeOrToken::Node(lhs), NodeOrToken::Node(rhs)) => {
lhs == rhs || lhs.text() == rhs.text()
}
(NodeOrToken::Token(lhs), NodeOrToken::Token(rhs)) => lhs.text() == rhs.text(),
_ => false,
}
}
// FIXME: this is horribly inefficient. I bet there's a cool algorithm to diff trees properly.
fn go(diff: &mut TreeDiff, lhs: SyntaxElement, rhs: SyntaxElement) {
let (lhs, rhs) = match lhs.as_node().zip(rhs.as_node()) {
Some((lhs, rhs)) => (lhs, rhs),
_ => {
cov_mark::hit!(diff_node_token_replace);
diff.replacements.insert(lhs, rhs);
return;
}
};
let mut look_ahead_scratch = Vec::default();
let mut rhs_children = rhs.children_with_tokens();
let mut lhs_children = lhs.children_with_tokens();
let mut last_lhs = None;
loop {
let lhs_child = lhs_children.next();
match (lhs_child.clone(), rhs_children.next()) {
(None, None) => break,
(None, Some(element)) => {
let insert_pos = match last_lhs.clone() {
Some(prev) => {
cov_mark::hit!(diff_insert);
TreeDiffInsertPos::After(prev)
}
// first iteration, insert into out parent as the first child
None => {
cov_mark::hit!(diff_insert_as_first_child);
TreeDiffInsertPos::AsFirstChild(lhs.clone().into())
}
};
diff.insertions.entry(insert_pos).or_default().push(element);
}
(Some(element), None) => {
cov_mark::hit!(diff_delete);
diff.deletions.push(element);
}
(Some(ref lhs_ele), Some(ref rhs_ele)) if syntax_element_eq(lhs_ele, rhs_ele) => {}
(Some(lhs_ele), Some(rhs_ele)) => {
// nodes differ, look for lhs_ele in rhs, if its found we can mark everything up
// until that element as insertions. This is important to keep the diff minimal
// in regards to insertions that have been actually done, this is important for
// use insertions as we do not want to replace the entire module node.
look_ahead_scratch.push(rhs_ele.clone());
let mut rhs_children_clone = rhs_children.clone();
let mut insert = false;
for rhs_child in &mut rhs_children_clone {
if syntax_element_eq(&lhs_ele, &rhs_child) {
cov_mark::hit!(diff_insertions);
insert = true;
break;
}
look_ahead_scratch.push(rhs_child);
}
let drain = look_ahead_scratch.drain(..);
if insert {
let insert_pos = if let Some(prev) = last_lhs.clone().filter(|_| insert) {
TreeDiffInsertPos::After(prev)
} else {
cov_mark::hit!(insert_first_child);
TreeDiffInsertPos::AsFirstChild(lhs.clone().into())
};
diff.insertions.entry(insert_pos).or_default().extend(drain);
rhs_children = rhs_children_clone;
} else {
go(diff, lhs_ele, rhs_ele);
}
}
}
last_lhs = lhs_child.or(last_lhs);
}
}
}
#[cfg(test)]
mod tests {
use expect_test::{expect, Expect};
use itertools::Itertools;
use parser::{Edition, SyntaxKind};
use text_edit::TextEdit;
use crate::{AstNode, SyntaxElement};
#[test]
fn replace_node_token() {
cov_mark::check!(diff_node_token_replace);
check_diff(
r#"use node;"#,
r#"ident"#,
expect![[r#"
insertions:
replacements:
Line 0: Token(USE_KW@0..3 "use") -> ident
deletions:
Line 1: " "
Line 1: node
Line 1: ;
"#]],
);
}
#[test]
fn replace_parent() {
cov_mark::check!(diff_insert_as_first_child);
check_diff(
r#""#,
r#"use foo::bar;"#,
expect![[r#"
insertions:
Line 0: AsFirstChild(Node(SOURCE_FILE@0..0))
-> use foo::bar;
replacements:
deletions:
"#]],
);
}
#[test]
fn insert_last() {
cov_mark::check!(diff_insert);
check_diff(
r#"
use foo;
use bar;"#,
r#"
use foo;
use bar;
use baz;"#,
expect![[r#"
insertions:
Line 2: After(Node(USE@10..18))
-> "\n"
-> use baz;
replacements:
deletions:
"#]],
);
}
#[test]
fn insert_middle() {
check_diff(
r#"
use foo;
use baz;"#,
r#"
use foo;
use bar;
use baz;"#,
expect![[r#"
insertions:
Line 2: After(Token(WHITESPACE@9..10 "\n"))
-> use bar;
-> "\n"
replacements:
deletions:
"#]],
)
}
#[test]
fn insert_first() {
check_diff(
r#"
use bar;
use baz;"#,
r#"
use foo;
use bar;
use baz;"#,
expect![[r#"
insertions:
Line 0: After(Token(WHITESPACE@0..1 "\n"))
-> use foo;
-> "\n"
replacements:
deletions:
"#]],
)
}
#[test]
fn first_child_insertion() {
cov_mark::check!(insert_first_child);
check_diff(
r#"fn main() {
stdi
}"#,
r#"use foo::bar;
fn main() {
stdi
}"#,
expect![[r#"
insertions:
Line 0: AsFirstChild(Node(SOURCE_FILE@0..30))
-> use foo::bar;
-> "\n\n "
replacements:
deletions:
"#]],
);
}
#[test]
fn delete_last() {
cov_mark::check!(diff_delete);
check_diff(
r#"use foo;
use bar;"#,
r#"use foo;"#,
expect![[r#"
insertions:
replacements:
deletions:
Line 1: "\n "
Line 2: use bar;
"#]],
);
}
#[test]
fn delete_middle() {
cov_mark::check!(diff_insertions);
check_diff(
r#"
use expect_test::{expect, Expect};
use text_edit::TextEdit;
use crate::AstNode;
"#,
r#"
use expect_test::{expect, Expect};
use crate::AstNode;
"#,
expect![[r#"
insertions:
Line 1: After(Node(USE@1..35))
-> "\n\n"
-> use crate::AstNode;
replacements:
deletions:
Line 2: use text_edit::TextEdit;
Line 3: "\n\n"
Line 4: use crate::AstNode;
Line 5: "\n"
"#]],
)
}
#[test]
fn delete_first() {
check_diff(
r#"
use text_edit::TextEdit;
use crate::AstNode;
"#,
r#"
use crate::AstNode;
"#,
expect![[r#"
insertions:
replacements:
Line 2: Token(IDENT@5..14 "text_edit") -> crate
Line 2: Token(IDENT@16..24 "TextEdit") -> AstNode
Line 2: Token(WHITESPACE@25..27 "\n\n") -> "\n"
deletions:
Line 3: use crate::AstNode;
Line 4: "\n"
"#]],
)
}
#[test]
fn merge_use() {
check_diff(
r#"
use std::{
fmt,
hash::BuildHasherDefault,
ops::{self, RangeInclusive},
};
"#,
r#"
use std::fmt;
use std::hash::BuildHasherDefault;
use std::ops::{self, RangeInclusive};
"#,
expect![[r#"
insertions:
Line 2: After(Node(PATH_SEGMENT@5..8))
-> ::
-> fmt
Line 6: After(Token(WHITESPACE@86..87 "\n"))
-> use std::hash::BuildHasherDefault;
-> "\n"
-> use std::ops::{self, RangeInclusive};
-> "\n"
replacements:
Line 2: Token(IDENT@5..8 "std") -> std
deletions:
Line 2: ::
Line 2: {
fmt,
hash::BuildHasherDefault,
ops::{self, RangeInclusive},
}
"#]],
)
}
#[test]
fn early_return_assist() {
check_diff(
r#"
fn main() {
if let Ok(x) = Err(92) {
foo(x);
}
}
"#,
r#"
fn main() {
let x = match Err(92) {
Ok(it) => it,
_ => return,
};
foo(x);
}
"#,
expect![[r#"
insertions:
Line 3: After(Node(BLOCK_EXPR@40..63))
-> " "
-> match Err(92) {
Ok(it) => it,
_ => return,
}
-> ;
Line 3: After(Node(IF_EXPR@17..63))
-> "\n "
-> foo(x);
replacements:
Line 3: Token(IF_KW@17..19 "if") -> let
Line 3: Token(LET_KW@20..23 "let") -> x
Line 3: Node(BLOCK_EXPR@40..63) -> =
deletions:
Line 3: " "
Line 3: Ok(x)
Line 3: " "
Line 3: =
Line 3: " "
Line 3: Err(92)
"#]],
)
}
fn check_diff(from: &str, to: &str, expected_diff: Expect) {
let from_node = crate::SourceFile::parse(from, Edition::CURRENT).tree().syntax().clone();
let to_node = crate::SourceFile::parse(to, Edition::CURRENT).tree().syntax().clone();
let diff = super::diff(&from_node, &to_node);
let line_number =
|syn: &SyntaxElement| from[..syn.text_range().start().into()].lines().count();
let fmt_syntax = |syn: &SyntaxElement| match syn.kind() {
SyntaxKind::WHITESPACE => format!("{:?}", syn.to_string()),
_ => format!("{syn}"),
};
let insertions =
diff.insertions.iter().format_with("\n", |(k, v), f| -> Result<(), std::fmt::Error> {
f(&format!(
"Line {}: {:?}\n-> {}",
line_number(match k {
super::TreeDiffInsertPos::After(syn) => syn,
super::TreeDiffInsertPos::AsFirstChild(syn) => syn,
}),
k,
v.iter().format_with("\n-> ", |v, f| f(&fmt_syntax(v)))
))
});
let replacements = diff
.replacements
.iter()
.sorted_by_key(|(syntax, _)| syntax.text_range().start())
.format_with("\n", |(k, v), f| {
f(&format!("Line {}: {k:?} -> {}", line_number(k), fmt_syntax(v)))
});
let deletions = diff
.deletions
.iter()
.format_with("\n", |v, f| f(&format!("Line {}: {}", line_number(v), fmt_syntax(v))));
let actual = format!(
"insertions:\n\n{insertions}\n\nreplacements:\n\n{replacements}\n\ndeletions:\n\n{deletions}\n"
);
expected_diff.assert_eq(&actual);
let mut from = from.to_owned();
let mut text_edit = TextEdit::builder();
diff.into_text_edit(&mut text_edit);
text_edit.finish().apply(&mut from);
assert_eq!(&*from, to, "diff did not turn `from` to `to`");
}
}

View file

@ -5,7 +5,6 @@
use std::str::{self, FromStr};
use parser::Edition;
use text_edit::Indel;
use crate::{validation, AstNode, SourceFile, TextRange};
@ -22,7 +21,8 @@ pub fn check_parser(text: &str) {
#[derive(Debug, Clone)]
pub struct CheckReparse {
text: String,
edit: Indel,
delete: TextRange,
insert: String,
edited_text: String,
}
@ -43,14 +43,13 @@ impl CheckReparse {
TextRange::at(delete_start.try_into().unwrap(), delete_len.try_into().unwrap());
let edited_text =
format!("{}{}{}", &text[..delete_start], &insert, &text[delete_start + delete_len..]);
let edit = Indel { insert, delete };
Some(CheckReparse { text, edit, edited_text })
Some(CheckReparse { text, insert, delete, edited_text })
}
#[allow(clippy::print_stderr)]
pub fn run(&self) {
let parse = SourceFile::parse(&self.text, Edition::CURRENT);
let new_parse = parse.reparse(&self.edit, Edition::CURRENT);
let new_parse = parse.reparse(self.delete, &self.insert, Edition::CURRENT);
check_file_invariants(&new_parse.tree());
assert_eq!(&new_parse.tree().syntax().text().to_string(), &self.edited_text);
let full_reparse = SourceFile::parse(&self.edited_text, Edition::CURRENT);

View file

@ -44,10 +44,9 @@ pub mod syntax_editor;
pub mod ted;
pub mod utils;
use std::marker::PhantomData;
use std::{marker::PhantomData, ops::Range};
use stdx::format_to;
use text_edit::Indel;
use triomphe::Arc;
pub use crate::{
@ -150,16 +149,22 @@ impl Parse<SourceFile> {
buf
}
pub fn reparse(&self, indel: &Indel, edition: Edition) -> Parse<SourceFile> {
self.incremental_reparse(indel, edition)
.unwrap_or_else(|| self.full_reparse(indel, edition))
pub fn reparse(&self, delete: TextRange, insert: &str, edition: Edition) -> Parse<SourceFile> {
self.incremental_reparse(delete, insert, edition)
.unwrap_or_else(|| self.full_reparse(delete, insert, edition))
}
fn incremental_reparse(&self, indel: &Indel, edition: Edition) -> Option<Parse<SourceFile>> {
fn incremental_reparse(
&self,
delete: TextRange,
insert: &str,
edition: Edition,
) -> Option<Parse<SourceFile>> {
// FIXME: validation errors are not handled here
parsing::incremental_reparse(
self.tree().syntax(),
indel,
delete,
insert,
self.errors.as_deref().unwrap_or_default().iter().cloned(),
edition,
)
@ -170,9 +175,9 @@ impl Parse<SourceFile> {
})
}
fn full_reparse(&self, indel: &Indel, edition: Edition) -> Parse<SourceFile> {
fn full_reparse(&self, delete: TextRange, insert: &str, edition: Edition) -> Parse<SourceFile> {
let mut text = self.tree().syntax().text().to_string();
indel.apply(&mut text);
text.replace_range(Range::<usize>::from(delete), insert);
SourceFile::parse(&text, edition)
}
}

View file

@ -6,8 +6,9 @@
//! - otherwise, we search for the nearest `{}` block which contains the edit
//! and try to parse only this block.
use std::ops::Range;
use parser::{Edition, Reparser};
use text_edit::Indel;
use crate::{
parsing::build_tree,
@ -19,38 +20,48 @@ use crate::{
pub(crate) fn incremental_reparse(
node: &SyntaxNode,
edit: &Indel,
delete: TextRange,
insert: &str,
errors: impl IntoIterator<Item = SyntaxError>,
edition: Edition,
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
if let Some((green, new_errors, old_range)) = reparse_token(node, edit, edition) {
return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
if let Some((green, new_errors, old_range)) = reparse_token(node, delete, insert, edition) {
return Some((
green,
merge_errors(errors, new_errors, old_range, delete, insert),
old_range,
));
}
if let Some((green, new_errors, old_range)) = reparse_block(node, edit, edition) {
return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
if let Some((green, new_errors, old_range)) = reparse_block(node, delete, insert, edition) {
return Some((
green,
merge_errors(errors, new_errors, old_range, delete, insert),
old_range,
));
}
None
}
fn reparse_token(
root: &SyntaxNode,
edit: &Indel,
delete: TextRange,
insert: &str,
edition: Edition,
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
let prev_token = root.covering_element(edit.delete).as_token()?.clone();
let prev_token = root.covering_element(delete).as_token()?.clone();
let prev_token_kind = prev_token.kind();
match prev_token_kind {
WHITESPACE | COMMENT | IDENT | STRING | BYTE_STRING | C_STRING => {
if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT {
// removing a new line may extends previous token
let deleted_range = edit.delete - prev_token.text_range().start();
let deleted_range = delete - prev_token.text_range().start();
if prev_token.text()[deleted_range].contains('\n') {
return None;
}
}
let mut new_text = get_text_after_edit(prev_token.clone().into(), edit);
let mut new_text = get_text_after_edit(prev_token.clone().into(), delete, insert);
let (new_token_kind, new_err) = parser::LexedStr::single_token(edition, &new_text)?;
if new_token_kind != prev_token_kind
@ -85,11 +96,12 @@ fn reparse_token(
fn reparse_block(
root: &SyntaxNode,
edit: &Indel,
delete: TextRange,
insert: &str,
edition: parser::Edition,
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
let (node, reparser) = find_reparsable_node(root, edit.delete)?;
let text = get_text_after_edit(node.clone().into(), edit);
let (node, reparser) = find_reparsable_node(root, delete)?;
let text = get_text_after_edit(node.clone().into(), delete, insert);
let lexed = parser::LexedStr::new(edition, text.as_str());
let parser_input = lexed.to_input(edition);
@ -104,14 +116,14 @@ fn reparse_block(
Some((node.replace_with(green), new_parser_errors, node.text_range()))
}
fn get_text_after_edit(element: SyntaxElement, edit: &Indel) -> String {
let edit = Indel::replace(edit.delete - element.text_range().start(), edit.insert.clone());
fn get_text_after_edit(element: SyntaxElement, mut delete: TextRange, insert: &str) -> String {
delete -= element.text_range().start();
let mut text = match element {
NodeOrToken::Token(token) => token.text().to_owned(),
NodeOrToken::Node(node) => node.text().to_string(),
};
edit.apply(&mut text);
text.replace_range(Range::<usize>::from(delete), insert);
text
}
@ -153,7 +165,8 @@ fn merge_errors(
old_errors: impl IntoIterator<Item = SyntaxError>,
new_errors: Vec<SyntaxError>,
range_before_reparse: TextRange,
edit: &Indel,
delete: TextRange,
insert: &str,
) -> Vec<SyntaxError> {
let mut res = Vec::new();
@ -162,8 +175,8 @@ fn merge_errors(
if old_err_range.end() <= range_before_reparse.start() {
res.push(old_err);
} else if old_err_range.start() >= range_before_reparse.end() {
let inserted_len = TextSize::of(&edit.insert);
res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len()));
let inserted_len = TextSize::of(insert);
res.push(old_err.with_range((old_err_range + inserted_len) - delete.len()));
// Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug)
}
}
@ -177,6 +190,8 @@ fn merge_errors(
#[cfg(test)]
mod tests {
use std::ops::Range;
use parser::Edition;
use test_utils::{assert_eq_text, extract_range};
@ -185,10 +200,9 @@ mod tests {
fn do_check(before: &str, replace_with: &str, reparsed_len: u32) {
let (range, before) = extract_range(before);
let edit = Indel::replace(range, replace_with.to_owned());
let after = {
let mut after = before.clone();
edit.apply(&mut after);
after.replace_range(Range::<usize>::from(range), replace_with);
after
};
@ -197,7 +211,8 @@ mod tests {
let before = SourceFile::parse(&before, Edition::CURRENT);
let (green, new_errors, range) = incremental_reparse(
before.tree().syntax(),
&edit,
range,
replace_with,
before.errors.as_deref().unwrap_or_default().iter().cloned(),
Edition::CURRENT,
)