mirror of
https://github.com/jj-vcs/jj.git
synced 2025-12-23 06:01:01 +00:00
CommitRef now stores raw author/committer headers and parses them when needed. Since parsing errors would have been detected at .try_to_commit_ref(), this patch makes new decode errors propagate as before. Fixes #8350, #8214
496 lines
16 KiB
Rust
496 lines
16 KiB
Rust
// Copyright 2024 The Jujutsu Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
use std::collections::HashMap;
|
|
use std::collections::HashSet;
|
|
use std::path::Path;
|
|
use std::sync::Arc;
|
|
use std::time::Duration;
|
|
use std::time::SystemTime;
|
|
|
|
use futures::executor::block_on_stream;
|
|
use itertools::Itertools as _;
|
|
use jj_lib::backend::CommitId;
|
|
use jj_lib::backend::CopyRecord;
|
|
use jj_lib::commit::Commit;
|
|
use jj_lib::conflict_labels::ConflictLabels;
|
|
use jj_lib::git_backend::GitBackend;
|
|
use jj_lib::git_backend::JJ_TREES_COMMIT_HEADER;
|
|
use jj_lib::merge::Merge;
|
|
use jj_lib::merged_tree::MergedTree;
|
|
use jj_lib::object_id::ObjectId as _;
|
|
use jj_lib::repo::ReadonlyRepo;
|
|
use jj_lib::repo::Repo as _;
|
|
use jj_lib::repo_path::RepoPath;
|
|
use jj_lib::repo_path::RepoPathBuf;
|
|
use jj_lib::stacked_table::TableSegment as _;
|
|
use jj_lib::stacked_table::TableStore;
|
|
use jj_lib::store::Store;
|
|
use jj_lib::transaction::Transaction;
|
|
use maplit::hashmap;
|
|
use maplit::hashset;
|
|
use testutils::TestRepo;
|
|
use testutils::TestRepoBackend;
|
|
use testutils::assert_tree_eq;
|
|
use testutils::commit_with_tree;
|
|
use testutils::create_random_commit;
|
|
use testutils::create_single_tree;
|
|
use testutils::create_tree;
|
|
use testutils::is_external_tool_installed;
|
|
use testutils::repo_path;
|
|
use testutils::repo_path_buf;
|
|
use testutils::write_random_commit;
|
|
use testutils::write_random_commit_with_parents;
|
|
|
|
fn get_git_backend(repo: &Arc<ReadonlyRepo>) -> &GitBackend {
|
|
repo.store().backend_impl().unwrap()
|
|
}
|
|
|
|
fn collect_no_gc_refs(git_repo_path: &Path) -> HashSet<CommitId> {
|
|
// Load fresh git repo to isolate from false caching issue. Here we want to
|
|
// ensure that the underlying data is correct. We could test the in-memory
|
|
// data as well, but we don't have any special handling in our code.
|
|
let git_repo = gix::open(git_repo_path).unwrap();
|
|
let git_refs = git_repo.references().unwrap();
|
|
let no_gc_refs_iter = git_refs.prefixed("refs/jj/keep/").unwrap();
|
|
no_gc_refs_iter
|
|
.map(|git_ref| CommitId::from_bytes(git_ref.unwrap().id().as_bytes()))
|
|
.collect()
|
|
}
|
|
|
|
fn get_copy_records(
|
|
store: &Store,
|
|
paths: Option<&[RepoPathBuf]>,
|
|
a: &Commit,
|
|
b: &Commit,
|
|
) -> HashMap<String, String> {
|
|
let stream = store.get_copy_records(paths, a.id(), b.id()).unwrap();
|
|
let mut res: HashMap<String, String> = HashMap::new();
|
|
for CopyRecord { target, source, .. } in block_on_stream(stream).filter_map(|r| r.ok()) {
|
|
res.insert(
|
|
target.as_internal_file_string().into(),
|
|
source.as_internal_file_string().into(),
|
|
);
|
|
}
|
|
res
|
|
}
|
|
|
|
fn make_commit(
|
|
tx: &mut Transaction,
|
|
parents: Vec<CommitId>,
|
|
content: &[(&RepoPath, &str)],
|
|
) -> Commit {
|
|
let tree = create_tree(tx.base_repo(), content);
|
|
tx.repo_mut().new_commit(parents, tree).write().unwrap()
|
|
}
|
|
|
|
fn list_dir(dir: &Path) -> Vec<String> {
|
|
std::fs::read_dir(dir)
|
|
.unwrap()
|
|
.map(|entry| entry.unwrap().file_name().to_str().unwrap().to_owned())
|
|
.sorted()
|
|
.collect()
|
|
}
|
|
|
|
#[test]
|
|
fn test_gc() {
|
|
// TODO: Better way to disable the test if git command couldn't be executed
|
|
if !is_external_tool_installed("git") {
|
|
eprintln!("Skipping because git command might fail to run");
|
|
return;
|
|
}
|
|
|
|
let test_repo = TestRepo::init_with_backend(TestRepoBackend::Git);
|
|
let repo = test_repo.repo;
|
|
let git_repo_path = get_git_backend(&repo).git_repo_path();
|
|
let base_index = repo.readonly_index();
|
|
|
|
// Set up commits:
|
|
//
|
|
// H (predecessor: D)
|
|
// G |
|
|
// |\|
|
|
// | F
|
|
// E |
|
|
// D | |
|
|
// C |/
|
|
// |/
|
|
// B
|
|
// A
|
|
let mut tx = repo.start_transaction();
|
|
let commit_a = write_random_commit(tx.repo_mut());
|
|
let commit_b = write_random_commit_with_parents(tx.repo_mut(), &[&commit_a]);
|
|
let commit_c = write_random_commit_with_parents(tx.repo_mut(), &[&commit_b]);
|
|
let commit_d = write_random_commit_with_parents(tx.repo_mut(), &[&commit_c]);
|
|
let commit_e = write_random_commit_with_parents(tx.repo_mut(), &[&commit_b]);
|
|
let commit_f = write_random_commit_with_parents(tx.repo_mut(), &[&commit_b]);
|
|
let commit_g = write_random_commit_with_parents(tx.repo_mut(), &[&commit_e, &commit_f]);
|
|
let commit_h = create_random_commit(tx.repo_mut())
|
|
.set_parents(vec![commit_f.id().clone()])
|
|
.set_predecessors(vec![commit_d.id().clone()])
|
|
.write()
|
|
.unwrap();
|
|
let repo = tx.commit("test").unwrap();
|
|
assert_eq!(
|
|
*repo.view().heads(),
|
|
hashset! {
|
|
commit_d.id().clone(),
|
|
commit_g.id().clone(),
|
|
commit_h.id().clone(),
|
|
},
|
|
);
|
|
|
|
// At first, all commits have no-gc refs
|
|
assert_eq!(
|
|
collect_no_gc_refs(git_repo_path),
|
|
hashset! {
|
|
commit_a.id().clone(),
|
|
commit_b.id().clone(),
|
|
commit_c.id().clone(),
|
|
commit_d.id().clone(),
|
|
commit_e.id().clone(),
|
|
commit_f.id().clone(),
|
|
commit_g.id().clone(),
|
|
commit_h.id().clone(),
|
|
},
|
|
);
|
|
|
|
// Empty index, but all kept by file modification time
|
|
// (Beware that this invokes "git gc" and refs will be packed.)
|
|
repo.store()
|
|
.gc(base_index.as_index(), SystemTime::UNIX_EPOCH)
|
|
.unwrap();
|
|
assert_eq!(
|
|
collect_no_gc_refs(git_repo_path),
|
|
hashset! {
|
|
commit_a.id().clone(),
|
|
commit_b.id().clone(),
|
|
commit_c.id().clone(),
|
|
commit_d.id().clone(),
|
|
commit_e.id().clone(),
|
|
commit_f.id().clone(),
|
|
commit_g.id().clone(),
|
|
commit_h.id().clone(),
|
|
},
|
|
);
|
|
|
|
// Don't rely on the exact system time because file modification time might
|
|
// have lower precision for example.
|
|
let now = || SystemTime::now() + Duration::from_secs(1);
|
|
|
|
// All reachable: redundant no-gc refs will be removed
|
|
repo.store().gc(repo.index(), now()).unwrap();
|
|
assert_eq!(
|
|
collect_no_gc_refs(git_repo_path),
|
|
hashset! {
|
|
commit_d.id().clone(),
|
|
commit_g.id().clone(),
|
|
commit_h.id().clone(),
|
|
},
|
|
);
|
|
|
|
// G is no longer reachable
|
|
let mut mut_index = base_index.start_modification();
|
|
mut_index.add_commit(&commit_a).unwrap();
|
|
mut_index.add_commit(&commit_b).unwrap();
|
|
mut_index.add_commit(&commit_c).unwrap();
|
|
mut_index.add_commit(&commit_d).unwrap();
|
|
mut_index.add_commit(&commit_e).unwrap();
|
|
mut_index.add_commit(&commit_f).unwrap();
|
|
mut_index.add_commit(&commit_h).unwrap();
|
|
repo.store().gc(mut_index.as_index(), now()).unwrap();
|
|
assert_eq!(
|
|
collect_no_gc_refs(git_repo_path),
|
|
hashset! {
|
|
commit_d.id().clone(),
|
|
commit_e.id().clone(),
|
|
commit_h.id().clone(),
|
|
},
|
|
);
|
|
|
|
// D|E|H are no longer reachable
|
|
let mut mut_index = base_index.start_modification();
|
|
mut_index.add_commit(&commit_a).unwrap();
|
|
mut_index.add_commit(&commit_b).unwrap();
|
|
mut_index.add_commit(&commit_c).unwrap();
|
|
mut_index.add_commit(&commit_f).unwrap();
|
|
repo.store().gc(mut_index.as_index(), now()).unwrap();
|
|
assert_eq!(
|
|
collect_no_gc_refs(git_repo_path),
|
|
hashset! {
|
|
commit_c.id().clone(),
|
|
commit_f.id().clone(),
|
|
},
|
|
);
|
|
|
|
// B|C|F are no longer reachable
|
|
let mut mut_index = base_index.start_modification();
|
|
mut_index.add_commit(&commit_a).unwrap();
|
|
repo.store().gc(mut_index.as_index(), now()).unwrap();
|
|
assert_eq!(
|
|
collect_no_gc_refs(git_repo_path),
|
|
hashset! {
|
|
commit_a.id().clone(),
|
|
},
|
|
);
|
|
|
|
// All unreachable
|
|
repo.store().gc(base_index.as_index(), now()).unwrap();
|
|
assert_eq!(collect_no_gc_refs(git_repo_path), hashset! {});
|
|
}
|
|
|
|
#[test]
|
|
fn test_gc_extra_table() {
|
|
if !is_external_tool_installed("git") {
|
|
eprintln!("Skipping because git command might fail to run");
|
|
return;
|
|
}
|
|
|
|
let test_repo = TestRepo::init_with_backend(TestRepoBackend::Git);
|
|
let extra_path = test_repo.repo_path().join("store").join("extra");
|
|
let extra_key_size = test_repo.repo.store().root_commit_id().as_bytes().len();
|
|
let load_repo = || {
|
|
test_repo
|
|
.env
|
|
.load_repo_at_head(test_repo.repo.settings(), test_repo.repo_path())
|
|
};
|
|
let load_extra_table = || {
|
|
TableStore::load(extra_path.clone(), extra_key_size)
|
|
.get_head()
|
|
.unwrap()
|
|
};
|
|
let collect_extra_segment_num_entries = || {
|
|
let mut num_entries = load_extra_table()
|
|
.ancestor_segments()
|
|
.map(|table| table.segment_num_entries())
|
|
.collect_vec();
|
|
num_entries.reverse();
|
|
num_entries
|
|
};
|
|
|
|
// Sanity check for the initial state
|
|
assert_eq!(collect_extra_segment_num_entries(), [0]);
|
|
assert_eq!(list_dir(&extra_path).len(), 1 + 1); // empty segment + "heads"
|
|
|
|
// Write 4 commits
|
|
let mut tx = test_repo.repo.start_transaction();
|
|
for _ in 0..4 {
|
|
write_random_commit(tx.repo_mut());
|
|
}
|
|
tx.commit("test").unwrap();
|
|
// The first 3 will be squashed into one table segment
|
|
assert_eq!(collect_extra_segment_num_entries(), [3, 1]);
|
|
assert_eq!(list_dir(&extra_path).len(), 5 + 1);
|
|
|
|
// Reload repo to invalidate cache in TableStore
|
|
let repo = load_repo();
|
|
let index = repo.readonly_index().as_index();
|
|
|
|
// All segments should be kept by modification time
|
|
repo.store().gc(index, SystemTime::UNIX_EPOCH).unwrap();
|
|
assert_eq!(collect_extra_segment_num_entries(), [3, 1]);
|
|
assert_eq!(list_dir(&extra_path).len(), 5 + 1);
|
|
|
|
// All unreachable segments should be removed
|
|
let now = SystemTime::now() + Duration::from_secs(1);
|
|
repo.store().gc(index, now).unwrap();
|
|
assert_eq!(collect_extra_segment_num_entries(), [3, 1]);
|
|
assert_eq!(list_dir(&extra_path).len(), 2 + 1);
|
|
|
|
// Ensure that repo is still loadable
|
|
load_repo();
|
|
}
|
|
|
|
#[test]
|
|
fn test_copy_detection() {
|
|
let test_repo = TestRepo::init_with_backend(TestRepoBackend::Git);
|
|
let repo = &test_repo.repo;
|
|
|
|
let paths = &[
|
|
repo_path_buf("file0"),
|
|
repo_path_buf("file1"),
|
|
repo_path_buf("file2"),
|
|
];
|
|
|
|
let mut tx = repo.start_transaction();
|
|
let commit_a = make_commit(
|
|
&mut tx,
|
|
vec![repo.store().root_commit_id().clone()],
|
|
&[(&paths[0], "content")],
|
|
);
|
|
let commit_b = make_commit(
|
|
&mut tx,
|
|
vec![commit_a.id().clone()],
|
|
&[(&paths[1], "content")],
|
|
);
|
|
let commit_c = make_commit(
|
|
&mut tx,
|
|
vec![commit_b.id().clone()],
|
|
&[(&paths[2], "content")],
|
|
);
|
|
|
|
let store = repo.store();
|
|
assert_eq!(
|
|
get_copy_records(store, Some(paths), &commit_a, &commit_b),
|
|
HashMap::from([("file1".to_string(), "file0".to_string())])
|
|
);
|
|
assert_eq!(
|
|
get_copy_records(store, Some(paths), &commit_b, &commit_c),
|
|
HashMap::from([("file2".to_string(), "file1".to_string())])
|
|
);
|
|
assert_eq!(
|
|
get_copy_records(store, Some(paths), &commit_a, &commit_c),
|
|
HashMap::from([("file2".to_string(), "file0".to_string())])
|
|
);
|
|
assert_eq!(
|
|
get_copy_records(store, None, &commit_a, &commit_c),
|
|
HashMap::from([("file2".to_string(), "file0".to_string())])
|
|
);
|
|
assert_eq!(
|
|
get_copy_records(store, Some(&[paths[1].clone()]), &commit_a, &commit_c),
|
|
HashMap::default(),
|
|
);
|
|
assert_eq!(
|
|
get_copy_records(store, Some(paths), &commit_c, &commit_c),
|
|
HashMap::default(),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_copy_detection_file_and_dir() {
|
|
let test_repo = TestRepo::init_with_backend(TestRepoBackend::Git);
|
|
let repo = &test_repo.repo;
|
|
|
|
// a -> b (file)
|
|
// b -> a (dir)
|
|
// c -> c/file (file)
|
|
let mut tx = repo.start_transaction();
|
|
let commit_a = make_commit(
|
|
&mut tx,
|
|
vec![repo.store().root_commit_id().clone()],
|
|
&[
|
|
(repo_path("a"), "content1"),
|
|
(repo_path("b/file"), "content2"),
|
|
(repo_path("c"), "content3"),
|
|
],
|
|
);
|
|
let commit_b = make_commit(
|
|
&mut tx,
|
|
vec![commit_a.id().clone()],
|
|
&[
|
|
(repo_path("a/file"), "content2"),
|
|
(repo_path("b"), "content1"),
|
|
(repo_path("c/file"), "content3"),
|
|
],
|
|
);
|
|
|
|
assert_eq!(
|
|
get_copy_records(repo.store(), None, &commit_a, &commit_b),
|
|
hashmap! {
|
|
"b".to_owned() => "a".to_owned(),
|
|
"a/file".to_owned() => "b/file".to_owned(),
|
|
"c/file".to_owned() => "c".to_owned(),
|
|
}
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_jj_trees_header_with_one_tree() {
|
|
let test_repo = TestRepo::init_with_backend(TestRepoBackend::Git);
|
|
let repo = test_repo.repo;
|
|
let git_backend = get_git_backend(&repo);
|
|
let git_repo = git_backend.git_repo();
|
|
|
|
let tree_1 = create_single_tree(&repo, &[(repo_path("file"), "aaa")]);
|
|
let tree_2 = create_single_tree(&repo, &[(repo_path("file"), "bbb")]);
|
|
|
|
// Create a normal commit with tree 1
|
|
let commit = commit_with_tree(
|
|
repo.store(),
|
|
MergedTree::resolved(repo.store().clone(), tree_1.id().clone()),
|
|
);
|
|
let git_commit_id = gix::ObjectId::from_bytes_or_panic(commit.id().as_bytes());
|
|
let git_commit = git_repo.find_commit(git_commit_id).unwrap();
|
|
|
|
// Add `jj:trees` with a single tree which is different from the Git commit tree
|
|
let mut new_commit: gix::objs::Commit = git_commit.decode().unwrap().try_into().unwrap();
|
|
new_commit.extra_headers = vec![(
|
|
JJ_TREES_COMMIT_HEADER.into(),
|
|
tree_2.id().to_string().into(),
|
|
)];
|
|
let new_commit_id = git_repo.write_object(&new_commit).unwrap();
|
|
let new_commit_id = CommitId::from_bytes(new_commit_id.as_bytes());
|
|
|
|
// Import new commit into `jj` repo. This should fail, because allowing a
|
|
// non-conflicted commit to have a different tree in `jj` than in Git could be
|
|
// used to hide malicious code.
|
|
insta::assert_debug_snapshot!(git_backend.import_head_commits(std::slice::from_ref(&new_commit_id)), @r#"
|
|
Err(
|
|
ReadObject {
|
|
object_type: "commit",
|
|
hash: "87df728a30166ce1de0bf883948dd66b74cf25a0",
|
|
source: "Invalid jj:trees header",
|
|
},
|
|
)
|
|
"#);
|
|
}
|
|
|
|
#[test]
|
|
fn test_conflict_headers_roundtrip() {
|
|
let test_repo = TestRepo::init_with_backend(TestRepoBackend::Git);
|
|
let repo = test_repo.repo;
|
|
|
|
let tree_1 = create_single_tree(&repo, &[(repo_path("file"), "aaa")]);
|
|
let tree_2 = create_single_tree(&repo, &[(repo_path("file"), "bbb")]);
|
|
let tree_3 = create_single_tree(&repo, &[(repo_path("file"), "ccc")]);
|
|
let tree_4 = create_single_tree(&repo, &[(repo_path("file"), "ddd")]);
|
|
let tree_5 = create_single_tree(&repo, &[(repo_path("file"), "eee")]);
|
|
let tree_6 = create_single_tree(&repo, &[(repo_path("file"), "fff")]);
|
|
let tree_7 = create_single_tree(&repo, &[(repo_path("file"), "ggg")]);
|
|
|
|
// This creates a Git commit header with leading and trailing newlines to ensure
|
|
// that it can still be parsed correctly. The resulting `jj:conflict-labels`
|
|
// header value will look like `\nbase 1\nside 2\n\nside 3\n\n\n`.
|
|
let merged_tree = MergedTree::new(
|
|
repo.store().clone(),
|
|
Merge::from_vec(vec![
|
|
tree_1.id().clone(),
|
|
tree_2.id().clone(),
|
|
tree_3.id().clone(),
|
|
tree_4.id().clone(),
|
|
tree_5.id().clone(),
|
|
tree_6.id().clone(),
|
|
tree_7.id().clone(),
|
|
]),
|
|
ConflictLabels::from_vec(vec![
|
|
"".into(),
|
|
"base 1".into(),
|
|
"side 2".into(),
|
|
"".into(),
|
|
"side 3".into(),
|
|
"".into(),
|
|
"".into(),
|
|
]),
|
|
);
|
|
|
|
// Create a commit with the conflicted tree.
|
|
let commit = commit_with_tree(repo.store(), merged_tree.clone());
|
|
// Clear cached commit to ensure it is re-read.
|
|
repo.store().clear_caches();
|
|
// Conflict trees and labels should be preserved on read.
|
|
assert_tree_eq!(
|
|
repo.store().get_commit(commit.id()).unwrap().tree(),
|
|
merged_tree
|
|
);
|
|
}
|