working_copy: do not traverse symlinks when checking reserved names

Since we test new paths by using temporary (normal) files, we didn't have this
problem when creating new symlinks. However, the created symlinks pointing to
.git/.jj couldn't be deleted because a same_file::Handle is created for an open
file object, which inherently means symlinks are resolved.

This patch reimplements Unix version of same_file::Handle. Our implementation
uses lstat(path) instead of fstat(fd). Windows version is still broken. I think
we can use symlink_metadata() on both platforms and extract platform-specific
fields, but I don't know when Windows MetatdataExt functions will be stabilized.

Fixes #8348
This commit is contained in:
Yuya Nishihara 2024-10-22 19:20:54 +09:00
parent 361d8d6a21
commit 8ff56deb7d
5 changed files with 233 additions and 29 deletions

View file

@ -80,6 +80,10 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
* `jj gerrit upload` now correctly handles mixed explicit and implicit
Change-Ids in chains of commits ([#8219](https://github.com/jj-vcs/jj/pull/8219))
* Fixed checkout of symlinks pointing to themselves or `.git`/`.jj` on Unix. The
problem would still remain on Windows if symlinks are enabled.
[#8348](https://github.com/jj-vcs/jj/issues/8348)
## [0.36.0] - 2025-12-03
### Release highlights

View file

@ -61,7 +61,6 @@ rand_chacha = { workspace = true }
rayon = { workspace = true }
ref-cast = { workspace = true }
regex = { workspace = true }
same-file = { workspace = true }
serde = { workspace = true }
smallvec = { workspace = true }
strsim = { workspace = true }
@ -76,6 +75,7 @@ watchman_client = { workspace = true, optional = true }
rustix = { workspace = true }
[target.'cfg(windows)'.dependencies]
same-file = { workspace = true }
winreg = { workspace = true }
[dev-dependencies]

View file

@ -257,6 +257,29 @@ pub fn persist_content_addressed_temp_file<P: AsRef<Path>>(
}
}
/// Opaque value that can be tested to know whether file or directory paths
/// point to the same filesystem entity.
///
/// The primary use case is to detect file name aliases on case-insensitive
/// filesystem. On Unix, device and inode numbers are compared.
#[derive(Debug, Eq, Hash, PartialEq)]
pub struct FileIdentity(platform::FileIdentity);
impl FileIdentity {
/// Queries file identity without following symlinks.
///
/// BUG: On Windows, symbolic links would be followed.
pub fn from_symlink_path(path: impl AsRef<Path>) -> io::Result<Self> {
platform::file_identity_from_symlink_path(path.as_ref()).map(Self)
}
/// Queries file identity of the given `file`.
// TODO: do not consume file object
pub fn from_file(file: File) -> io::Result<Self> {
platform::file_identity_from_file(file).map(Self)
}
}
/// Reads from an async source and writes to a sync destination. Does not spawn
/// a task, so writes will block.
pub async fn copy_async_to_sync<R: AsyncRead, W: Write + ?Sized>(
@ -307,8 +330,11 @@ impl<R: Read + Unpin> AsyncRead for BlockingAsyncReader<R> {
mod platform {
use std::convert::Infallible;
use std::ffi::OsStr;
use std::fs;
use std::fs::File;
use std::io;
use std::os::unix::ffi::OsStrExt as _;
use std::os::unix::fs::MetadataExt as _;
use std::os::unix::fs::PermissionsExt;
use std::os::unix::fs::symlink;
use std::path::Path;
@ -351,10 +377,35 @@ mod platform {
pub fn try_symlink<P: AsRef<Path>, Q: AsRef<Path>>(original: P, link: Q) -> io::Result<()> {
symlink(original, link)
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub struct FileIdentity {
// https://github.com/BurntSushi/same-file/blob/1.0.6/src/unix.rs#L30
dev: u64,
ino: u64,
}
impl FileIdentity {
fn from_metadata(metadata: fs::Metadata) -> Self {
Self {
dev: metadata.dev(),
ino: metadata.ino(),
}
}
}
pub fn file_identity_from_symlink_path(path: &Path) -> io::Result<FileIdentity> {
path.symlink_metadata().map(FileIdentity::from_metadata)
}
pub fn file_identity_from_file(file: File) -> io::Result<FileIdentity> {
file.metadata().map(FileIdentity::from_metadata)
}
}
#[cfg(windows)]
mod platform {
use std::fs::File;
use std::io;
use std::os::windows::fs::symlink_file;
use std::path::Path;
@ -385,6 +436,22 @@ mod platform {
symlink_file(original, link)
}
pub type FileIdentity = same_file::Handle;
// FIXME: This shouldn't follow symlinks when querying file identity.
// Perhaps, we need to open file with FILE_FLAG_BACKUP_SEMANTICS and
// FILE_FLAG_OPEN_REPARSE_POINT, then pass it to from_file(). Alternatively,
// maybe we can use symlink_metadata(), volume_serial_number(), and
// file_index() when they get stabilized. See the same-file crate and std
// lstat() implementation. https://github.com/rust-lang/rust/issues/63010
pub fn file_identity_from_symlink_path(path: &Path) -> io::Result<FileIdentity> {
same_file::Handle::from_path(path)
}
pub fn file_identity_from_file(file: File) -> io::Result<FileIdentity> {
same_file::Handle::from_file(file)
}
}
#[cfg_attr(unix, expect(dead_code))]
@ -513,6 +580,88 @@ mod tests {
assert!(persist_content_addressed_temp_file(temp_file, &target).is_ok());
}
#[test]
fn test_file_identity_hard_link() {
let temp_dir = new_temp_dir();
let file_path = temp_dir.path().join("file");
let other_file_path = temp_dir.path().join("other_file");
let link_path = temp_dir.path().join("link");
fs::write(&file_path, "").unwrap();
fs::write(&other_file_path, "").unwrap();
fs::hard_link(&file_path, &link_path).unwrap();
assert_eq!(
FileIdentity::from_symlink_path(&file_path).unwrap(),
FileIdentity::from_symlink_path(&link_path).unwrap()
);
assert_ne!(
FileIdentity::from_symlink_path(&other_file_path).unwrap(),
FileIdentity::from_symlink_path(&link_path).unwrap()
);
assert_eq!(
FileIdentity::from_symlink_path(&file_path).unwrap(),
FileIdentity::from_file(File::open(&link_path).unwrap()).unwrap()
);
}
#[cfg(unix)]
#[test]
fn test_file_identity_unix_symlink_dir() {
let temp_dir = new_temp_dir();
let dir_path = temp_dir.path().join("dir");
let symlink_path = temp_dir.path().join("symlink");
fs::create_dir(&dir_path).unwrap();
std::os::unix::fs::symlink("dir", &symlink_path).unwrap();
// symlink should be identical to itself
assert_eq!(
FileIdentity::from_symlink_path(&symlink_path).unwrap(),
FileIdentity::from_symlink_path(&symlink_path).unwrap()
);
// symlink should be different from the target directory
assert_ne!(
FileIdentity::from_symlink_path(&dir_path).unwrap(),
FileIdentity::from_symlink_path(&symlink_path).unwrap()
);
// File::open() follows symlinks
assert_eq!(
FileIdentity::from_symlink_path(&dir_path).unwrap(),
FileIdentity::from_file(File::open(&symlink_path).unwrap()).unwrap()
);
assert_ne!(
FileIdentity::from_symlink_path(&symlink_path).unwrap(),
FileIdentity::from_file(File::open(&symlink_path).unwrap()).unwrap()
);
}
#[cfg(unix)]
#[test]
fn test_file_identity_unix_symlink_loop() {
let temp_dir = new_temp_dir();
let lower_file_path = temp_dir.path().join("file");
let upper_file_path = temp_dir.path().join("FILE");
let lower_symlink_path = temp_dir.path().join("symlink");
let upper_symlink_path = temp_dir.path().join("SYMLINK");
fs::write(&lower_file_path, "").unwrap();
std::os::unix::fs::symlink("symlink", &lower_symlink_path).unwrap();
let is_icase_fs = upper_file_path.try_exists().unwrap();
// symlink should be identical to itself
assert_eq!(
FileIdentity::from_symlink_path(&lower_symlink_path).unwrap(),
FileIdentity::from_symlink_path(&lower_symlink_path).unwrap()
);
assert_ne!(
FileIdentity::from_symlink_path(&lower_symlink_path).unwrap(),
FileIdentity::from_symlink_path(&lower_file_path).unwrap()
);
if is_icase_fs {
assert_eq!(
FileIdentity::from_symlink_path(&lower_symlink_path).unwrap(),
FileIdentity::from_symlink_path(&upper_symlink_path).unwrap()
);
} else {
assert!(FileIdentity::from_symlink_path(&upper_symlink_path).is_err());
}
}
#[test]
fn test_copy_async_to_sync_small() {
let input = b"hello";

View file

@ -80,6 +80,7 @@ use crate::conflicts::materialize_tree_value;
pub use crate::eol::EolConversionMode;
use crate::eol::TargetEolStrategy;
use crate::file_util::BlockingAsyncReader;
use crate::file_util::FileIdentity;
use crate::file_util::check_symlink_support;
use crate::file_util::copy_async_to_sync;
use crate::file_util::persist_temp_file;
@ -790,86 +791,88 @@ fn can_create_new_file(disk_path: &Path) -> Result<bool, CheckoutError> {
const RESERVED_DIR_NAMES: &[&str] = &[".git", ".jj"];
fn same_file_handle_from_path(disk_path: &Path) -> io::Result<Option<same_file::Handle>> {
match same_file::Handle::from_path(disk_path) {
Ok(handle) => Ok(Some(handle)),
fn file_identity_from_symlink_path(disk_path: &Path) -> io::Result<Option<FileIdentity>> {
match FileIdentity::from_symlink_path(disk_path) {
Ok(identity) => Ok(Some(identity)),
Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(err),
}
}
/// Wrapper for [`reject_reserved_existing_handle`] which avoids a syscall
/// by converting the provided `file` to a `same_file::Handle` via its
/// Wrapper for [`reject_reserved_existing_file_identity`] which avoids a
/// syscall by converting the provided `file` to a `FileIdentity` via its
/// file descriptor.
///
/// See [`reject_reserved_existing_handle`] for more info.
/// See [`reject_reserved_existing_file_identity`] for more info.
fn reject_reserved_existing_file(file: File, disk_path: &Path) -> Result<(), CheckoutError> {
// Note: since the file is open, we don't expect that it's possible for
// `io::ErrorKind::NotFound` to be a possible error returned here.
let file_handle = same_file::Handle::from_file(file).map_err(|err| CheckoutError::Other {
let file_identity = FileIdentity::from_file(file).map_err(|err| CheckoutError::Other {
message: format!("Failed to validate path {}", disk_path.display()),
err: err.into(),
})?;
reject_reserved_existing_handle(file_handle, disk_path)
reject_reserved_existing_file_identity(file_identity, disk_path)
}
/// Wrapper for [`reject_reserved_existing_handle`] which converts
/// the provided `disk_path` to a `same_file::Handle`.
/// Wrapper for [`reject_reserved_existing_file_identity`] which converts
/// the provided `disk_path` to a `FileIdentity`.
///
/// See [`reject_reserved_existing_handle`] for more info.
/// See [`reject_reserved_existing_file_identity`] for more info.
///
/// # Remarks
///
/// Incurs an additional syscall cost to open and close the file
/// descriptor/`HANDLE` for `disk_path`.
/// On Windows, this incurs an additional syscall cost to open and close the
/// file `HANDLE` for `disk_path`. On Unix, `lstat()` is used.
fn reject_reserved_existing_path(disk_path: &Path) -> Result<(), CheckoutError> {
let Some(disk_handle) =
same_file_handle_from_path(disk_path).map_err(|err| CheckoutError::Other {
let Some(disk_identity) =
file_identity_from_symlink_path(disk_path).map_err(|err| CheckoutError::Other {
message: format!("Failed to validate path {}", disk_path.display()),
err: err.into(),
})?
else {
// If the existing disk_path pointed to the reserved path, we would have
// gotten a handle back. Since we got nothing, the file does not exist
// gotten an identity back. Since we got nothing, the file does not exist
// and cannot be a reserved path name.
return Ok(());
};
reject_reserved_existing_handle(disk_handle, disk_path)
reject_reserved_existing_file_identity(disk_identity, disk_path)
}
/// Suppose the `disk_path` exists, checks if the last component points to
/// ".git" or ".jj" in the same parent directory.
///
/// `disk_handle` is expected to be a handle to the file described by
/// `disk_identity` is expected to be an identity of the file described by
/// `disk_path`.
///
/// # Remarks
///
/// Incurs a syscall cost to open and close a file descriptor/`HANDLE` for
/// each filename in `RESERVED_DIR_NAMES`.
fn reject_reserved_existing_handle(
disk_handle: same_file::Handle,
/// On Windows, this incurs a syscall cost to open and close a file `HANDLE` for
/// each filename in `RESERVED_DIR_NAMES`. On Unix, `lstat()` is used.
fn reject_reserved_existing_file_identity(
disk_identity: FileIdentity,
disk_path: &Path,
) -> Result<(), CheckoutError> {
let parent_dir_path = disk_path.parent().expect("content path shouldn't be root");
for name in RESERVED_DIR_NAMES {
let reserved_path = parent_dir_path.join(name);
let Some(reserved_handle) =
same_file_handle_from_path(&reserved_path).map_err(|err| CheckoutError::Other {
message: format!("Failed to validate path {}", disk_path.display()),
err: err.into(),
let Some(reserved_identity) =
file_identity_from_symlink_path(&reserved_path).map_err(|err| {
CheckoutError::Other {
message: format!("Failed to validate path {}", disk_path.display()),
err: err.into(),
}
})?
else {
// If the existing disk_path pointed to the reserved path, we would have
// gotten a handle back. Since we got nothing, the file does not exist
// gotten an identity back. Since we got nothing, the file does not exist
// and cannot be a reserved path name.
continue;
};
if disk_handle == reserved_handle {
if disk_identity == reserved_identity {
return Err(CheckoutError::ReservedPathComponent {
path: disk_path.to_owned(),
name,

View file

@ -13,6 +13,7 @@
// limitations under the License.
use std::fs::File;
use std::io;
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt as _;
use std::path::Component;
@ -1713,6 +1714,53 @@ fn test_check_out_file_removal_over_existing_directory_symlink() {
assert!(victim_file_path.exists());
}
#[test_case(".git"; "reserved .git dir")]
#[test_case(".jj"; "reserved .jj dir")]
#[test_case("symlink"; "looped")]
#[test_case("unknown"; "dead")]
#[cfg_attr(windows, ignore = "Windows impl follows symlink")] // FIXME
fn test_check_out_symlink_unusual_target(link_target: &str) {
if !check_symlink_support().unwrap() {
eprintln!("Skipping test because symlink isn't supported");
return;
}
let mut test_workspace = TestWorkspace::init();
let repo = &test_workspace.repo;
let workspace_root = test_workspace.workspace.workspace_root().to_owned();
std::fs::create_dir(workspace_root.join(".git")).unwrap();
let symlink_path = repo_path("symlink");
let symlink_disk_path = symlink_path.to_fs_path_unchecked(&workspace_root);
let tree1 = create_tree_with(repo, |builder| {
builder.symlink(symlink_path, link_target);
});
let tree2 = create_tree(repo, &[]);
let commit1 = commit_with_tree(repo.store(), tree1);
let commit2 = commit_with_tree(repo.store(), tree2);
// Check out tree containing symlink
let ws = &mut test_workspace.workspace;
let stats = ws.check_out(repo.op_id().clone(), None, &commit1).unwrap();
assert_eq!(stats.added_files, 1);
// Symlink should be created
assert_eq!(
symlink_disk_path.read_link().unwrap().as_os_str(),
link_target
);
// Check out empty tree
let stats = ws.check_out(repo.op_id().clone(), None, &commit2).unwrap();
assert_eq!(stats.removed_files, 1);
// Symlink should be deleted
assert_matches!(
symlink_disk_path.symlink_metadata().map_err(|e| e.kind()),
Err(io::ErrorKind::NotFound)
);
}
#[test_case("../pwned"; "escape from root")]
#[test_case("sub/../../pwned"; "escape from sub dir")]
fn test_check_out_malformed_file_path(file_path_str: &str) {