merged_tree: move directory->file handling in diff to stream adapter

This refactors `MergedTree::diff_stream()` so `TreeDiffIterator` and
`TreeDiffStreamImpl` no longer handle directory->file transitions
specially. That's instead handled by a stream adapter afterwards. To
allow the adapter to handle such transitions, the inner streams now
include tree entries if the other side of the diff is a non-tree. I
think this makes the code easier to follow. It also makes it easier to
add a version of `diff_stream()` that doesn't emit directories after
files. I think that's a more natural order for most use cases.

I timed this patch by running `jj diff --ignore-working-copy -s --from
v5.0 --to v6.0` in the Linux repo. I didn't see any significant
difference.
This commit is contained in:
Martin von Zweigbergk 2025-05-29 22:46:41 -07:00
parent 470a6e65c6
commit 1b1edc7a90

View file

@ -16,13 +16,13 @@
use std::borrow::Borrow;
use std::cmp::max;
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::collections::VecDeque;
use std::iter;
use std::iter::zip;
use std::pin::Pin;
use std::sync::Arc;
use std::task::ready;
use std::task::Context;
use std::task::Poll;
use std::vec;
@ -300,7 +300,7 @@ impl MergedTree {
matcher: &'matcher dyn Matcher,
) -> TreeDiffStream<'matcher> {
let concurrency = self.store().concurrency();
if concurrency <= 1 {
let inner: TreeDiffStream<'matcher> = if concurrency <= 1 {
Box::pin(futures::stream::iter(TreeDiffIterator::new(
&self.trees,
&other.trees,
@ -313,7 +313,8 @@ impl MergedTree {
matcher,
concurrency,
))
}
};
Box::pin(DiffStreamForFileSystem::new(inner))
}
/// Like `diff_stream()` but takes the given copy records into account.
@ -693,35 +694,27 @@ impl Iterator for ConflictIterator {
}
/// Iterator over the differences between two trees.
///
/// Tree entries (`MergedTreeValue::is_tree()`) are included only if the other
/// side is present and not a tree.
pub struct TreeDiffIterator<'matcher> {
store: Arc<Store>,
stack: Vec<TreeDiffItem>,
stack: Vec<TreeDiffDir>,
matcher: &'matcher dyn Matcher,
}
struct TreeDiffDirItem {
struct TreeDiffDir {
entries: Vec<(RepoPathBuf, MergedTreeValue, MergedTreeValue)>,
}
enum TreeDiffItem {
Dir(TreeDiffDirItem),
// This is used for making sure that when a directory gets replaced by a file, we
// yield the value for the addition of the file after we yield the values
// for removing files in the directory.
File(RepoPathBuf, MergedTreeValue, MergedTreeValue),
}
impl<'matcher> TreeDiffIterator<'matcher> {
/// Creates a iterator over the differences between two trees. Generally
/// prefer `MergedTree::diff()` of calling this directly.
/// Creates a iterator over the differences between two trees.
pub fn new(trees1: &Merge<Tree>, trees2: &Merge<Tree>, matcher: &'matcher dyn Matcher) -> Self {
assert!(Arc::ptr_eq(trees1.first().store(), trees2.first().store()));
let root_dir = RepoPath::root();
let mut stack = Vec::new();
if !matcher.visit(root_dir).is_nothing() {
stack.push(TreeDiffItem::Dir(TreeDiffDirItem::from_trees(
root_dir, trees1, trees2, matcher,
)));
stack.push(TreeDiffDir::from_trees(root_dir, trees1, trees2, matcher));
};
Self {
store: trees1.first().store().clone(),
@ -744,7 +737,7 @@ impl<'matcher> TreeDiffIterator<'matcher> {
}
}
impl TreeDiffDirItem {
impl TreeDiffDir {
fn from_trees(
dir: &RepoPath,
trees1: &Merge<Tree>,
@ -787,29 +780,15 @@ impl Iterator for TreeDiffIterator<'_> {
fn next(&mut self) -> Option<Self::Item> {
while let Some(top) = self.stack.last_mut() {
let (path, before, after) = match top {
TreeDiffItem::Dir(dir) => match dir.entries.pop() {
Some(entry) => entry,
None => {
self.stack.pop().unwrap();
continue;
}
},
TreeDiffItem::File(..) => {
if let TreeDiffItem::File(path, before, after) = self.stack.pop().unwrap() {
return Some(TreeDiffEntry {
path,
values: Ok((before, after)),
});
} else {
unreachable!();
}
let (path, before, after) = match top.entries.pop() {
Some(entry) => entry,
None => {
self.stack.pop().unwrap();
continue;
}
};
let tree_before = before.is_tree();
let tree_after = after.is_tree();
let post_subdir = if tree_before || tree_after {
if before.is_tree() || after.is_tree() {
let (before_tree, after_tree) = match (
Self::trees(&self.store, &path, &before),
Self::trees(&self.store, &path, &after),
@ -829,27 +808,10 @@ impl Iterator for TreeDiffIterator<'_> {
}
};
let subdir =
TreeDiffDirItem::from_trees(&path, &before_tree, &after_tree, self.matcher);
self.stack.push(TreeDiffItem::Dir(subdir));
self.stack.len() - 1
} else {
self.stack.len()
TreeDiffDir::from_trees(&path, &before_tree, &after_tree, self.matcher);
self.stack.push(subdir);
};
if !tree_before && tree_after {
if before.is_present() {
return Some(TreeDiffEntry {
path,
values: Ok((before, Merge::absent())),
});
}
} else if tree_before && !tree_after {
if after.is_present() {
self.stack.insert(
post_subdir,
TreeDiffItem::File(path, Merge::absent(), after),
);
}
} else if !tree_before && !tree_after {
if before.is_file_like() || after.is_file_like() {
return Some(TreeDiffEntry {
path,
values: Ok((before, after)),
@ -861,13 +823,16 @@ impl Iterator for TreeDiffIterator<'_> {
}
/// Stream of differences between two trees.
///
/// Tree entries (`MergedTreeValue::is_tree()`) are included only if the other
/// side is present and not a tree.
pub struct TreeDiffStreamImpl<'matcher> {
store: Arc<Store>,
matcher: &'matcher dyn Matcher,
/// Pairs of tree values that may or may not be ready to emit, sorted in the
/// order we want to emit them. If either side is a tree, there will be
/// a corresponding entry in `pending_trees`.
items: BTreeMap<DiffStreamKey, BackendResult<(MergedTreeValue, MergedTreeValue)>>,
items: BTreeMap<RepoPathBuf, BackendResult<(MergedTreeValue, MergedTreeValue)>>,
// TODO: Is it better to combine this and `items` into a single map?
#[expect(clippy::type_complexity)]
pending_trees: VecDeque<(
@ -887,50 +852,6 @@ pub struct TreeDiffStreamImpl<'matcher> {
max_queued_items: usize,
}
/// A wrapper around `RepoPath` that allows us to optionally sort files after
/// directories that have the file as a prefix.
#[derive(PartialEq, Eq, Clone, Debug)]
struct DiffStreamKey {
path: RepoPathBuf,
file_after_dir: bool,
}
impl DiffStreamKey {
fn normal(path: RepoPathBuf) -> Self {
DiffStreamKey {
path,
file_after_dir: false,
}
}
fn file_after_dir(path: RepoPathBuf) -> Self {
DiffStreamKey {
path,
file_after_dir: true,
}
}
}
impl PartialOrd for DiffStreamKey {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for DiffStreamKey {
fn cmp(&self, other: &Self) -> Ordering {
if self == other {
Ordering::Equal
} else if self.file_after_dir && other.path.starts_with(&self.path) {
Ordering::Greater
} else if other.file_after_dir && self.path.starts_with(&other.path) {
Ordering::Less
} else {
self.path.cmp(&other.path)
}
}
}
impl<'matcher> TreeDiffStreamImpl<'matcher> {
/// Creates a iterator over the differences between two trees. Generally
/// prefer `MergedTree::diff_stream()` of calling this directly.
@ -1017,10 +938,10 @@ impl<'matcher> TreeDiffStreamImpl<'matcher> {
.push_back((path.clone(), Box::pin(both_trees_future)));
}
self.items.insert(
DiffStreamKey::normal(path),
Ok((before.cloned(), after.cloned())),
);
if before.is_file_like() || after.is_file_like() {
self.items
.insert(path, Ok((before.cloned(), after.cloned())));
}
}
}
@ -1033,27 +954,12 @@ impl<'matcher> TreeDiffStreamImpl<'matcher> {
let (_, future) = &mut self.pending_trees[pending_index];
if let Poll::Ready(tree_diff) = future.as_mut().poll(cx) {
let (dir, _) = self.pending_trees.remove(pending_index).unwrap();
let key = DiffStreamKey::normal(dir);
// Whenever we add an entry to `self.pending_trees`, we also add an Ok() entry
// to `self.items`.
let (before, after) = self.items.remove(&key).unwrap().unwrap();
// If this was a transition from file to tree or vice versa, add back an item
// for just the removal/addition of the file.
if before.is_file_like() {
self.items
.insert(key.clone(), Ok((before, Merge::absent())));
} else if after.is_file_like() {
self.items.insert(
DiffStreamKey::file_after_dir(key.path.clone()),
Ok((Merge::absent(), after)),
);
}
match tree_diff {
Ok((trees1, trees2)) => {
self.add_dir_diff_items(&key.path, &trees1, &trees2);
self.add_dir_diff_items(&dir, &trees1, &trees2);
}
Err(err) => {
self.items.insert(DiffStreamKey::normal(key.path), Err(err));
self.items.insert(dir, Err(err));
}
}
} else {
@ -1075,45 +981,103 @@ impl Stream for TreeDiffStreamImpl<'_> {
match entry.get() {
Err(_) => {
// File or tree with error
let (key, result) = entry.remove_entry();
let (path, result) = entry.remove_entry();
Poll::Ready(Some(match result {
Err(err) => TreeDiffEntry {
path: key.path,
path,
values: Err(err),
},
Ok((before, after)) => TreeDiffEntry {
path: key.path,
path,
values: Ok((before, after)),
},
}))
}
Ok((before, after)) if !before.is_tree() && !after.is_tree() => {
// A diff with no trees involved
let (key, result) = entry.remove_entry();
Ok(_) => {
// A diff with a file on at least one side
let (path, result) = entry.remove_entry();
Poll::Ready(Some(match result {
Err(err) => TreeDiffEntry {
path: key.path,
path,
values: Err(err),
},
Ok((before, after)) => TreeDiffEntry {
path: key.path,
path,
values: Ok((before, after)),
},
}))
}
_ => {
// The first entry has a tree on at least one side (before or after). We need to
// wait for that future to complete.
assert!(!self.pending_trees.is_empty());
Poll::Pending
}
}
} else {
} else if self.pending_trees.is_empty() {
Poll::Ready(None)
} else {
Poll::Pending
}
}
}
/// Adapts a `TreeDiffStream` to emit a added file at a given path after a
/// removed directory at the same path.
struct DiffStreamForFileSystem<'a> {
inner: TreeDiffStream<'a>,
next_item: Option<TreeDiffEntry>,
held_file: Option<TreeDiffEntry>,
}
impl<'a> DiffStreamForFileSystem<'a> {
fn new(inner: TreeDiffStream<'a>) -> Self {
Self {
inner,
next_item: None,
held_file: None,
}
}
}
impl<'a> Stream for DiffStreamForFileSystem<'a> {
type Item = TreeDiffEntry;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
while let Some(next) = match self.next_item.take() {
Some(next) => Some(next),
None => ready!(self.inner.as_mut().poll_next(cx)),
} {
// If there's a held file "foo" and the next item to emit is not "foo/...", then
// we must be done with the "foo/" directory and it's time to emit "foo" as a
// removed file.
if let Some(held_entry) = self
.held_file
.take_if(|held_entry| !next.path.starts_with(&held_entry.path))
{
self.next_item = Some(next);
return Poll::Ready(Some(held_entry));
}
match next.values {
Ok((before, after)) if before.is_tree() => {
assert!(after.is_present());
assert!(self.held_file.is_none());
self.held_file = Some(TreeDiffEntry {
path: next.path,
values: Ok((Merge::absent(), after)),
});
}
Ok((before, after)) if after.is_tree() => {
assert!(before.is_present());
return Poll::Ready(Some(TreeDiffEntry {
path: next.path,
values: Ok((before, Merge::absent())),
}));
}
_ => {
return Poll::Ready(Some(next));
}
}
}
Poll::Ready(self.held_file.take())
}
}
/// Helper for writing trees with conflicts.
///
/// You start by creating an instance of this type with one or more