Add support for captured groups in Find & Replace (#222)

Closes #111

Co-authored-by: Leonard Hecker <leonard@hecker.io>
This commit is contained in:
viyic 2025-06-18 06:51:29 +07:00 committed by GitHub
parent 91a9a5f808
commit 70f5b73878
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 179 additions and 20 deletions

View file

@ -181,12 +181,12 @@ pub fn search_execute(ctx: &mut Context, state: &mut State, action: SearchAction
SearchAction::Replace => doc.buffer.borrow_mut().find_and_replace(
&state.search_needle,
state.search_options,
&state.search_replacement,
state.search_replacement.as_bytes(),
),
SearchAction::ReplaceAll => doc.buffer.borrow_mut().find_and_replace_all(
&state.search_needle,
state.search_options,
&state.search_replacement,
state.search_replacement.as_bytes(),
),
}
.is_ok();

View file

@ -36,7 +36,7 @@ use std::str;
pub use gap_buffer::GapBuffer;
use crate::arena::{ArenaString, scratch_arena};
use crate::arena::{Arena, ArenaString, scratch_arena};
use crate::cell::SemiRefCell;
use crate::clipboard::Clipboard;
use crate::document::{ReadableDocument, WriteableDocument};
@ -136,6 +136,11 @@ pub struct SearchOptions {
pub use_regex: bool,
}
enum RegexReplacement<'a> {
Group(i32),
Text(Vec<u8, &'a Arena>),
}
/// Caches the start and length of the active edit line for a single edit.
/// This helps us avoid having to remeasure the buffer after an edit.
struct ActiveEditLineInfo {
@ -1078,13 +1083,18 @@ impl TextBuffer {
&mut self,
pattern: &str,
options: SearchOptions,
replacement: &str,
replacement: &[u8],
) -> apperr::Result<()> {
// Editors traditionally replace the previous search hit, not the next possible one.
if let (Some(search), Some(..)) = (&mut self.search, &self.selection) {
let search = search.get_mut();
if let (Some(search), Some(..)) = (&self.search, &self.selection) {
let search = unsafe { &mut *search.get() };
if search.selection_generation == self.selection_generation {
self.write(replacement.as_bytes(), self.cursor, true);
let scratch = scratch_arena(None);
let parsed_replacements =
Self::find_parse_replacement(&scratch, &mut *search, replacement);
let replacement =
self.find_fill_replacement(&mut *search, replacement, &parsed_replacements);
self.write(&replacement, self.cursor, true);
}
}
@ -1096,18 +1106,22 @@ impl TextBuffer {
&mut self,
pattern: &str,
options: SearchOptions,
replacement: &str,
replacement: &[u8],
) -> apperr::Result<()> {
let replacement = replacement.as_bytes();
let scratch = scratch_arena(None);
let mut search = self.find_construct_search(pattern, options)?;
let mut offset = 0;
let parsed_replacements = Self::find_parse_replacement(&scratch, &mut search, replacement);
loop {
self.find_select_next(&mut search, offset, false);
if !self.has_selection() {
break;
}
self.write(replacement, self.cursor, true);
let replacement =
self.find_fill_replacement(&mut search, replacement, &parsed_replacements);
self.write(&replacement, self.cursor, true);
offset = self.cursor.offset;
}
@ -1215,6 +1229,130 @@ impl TextBuffer {
};
}
fn find_parse_replacement<'a>(
arena: &'a Arena,
search: &mut ActiveSearch,
replacement: &[u8],
) -> Vec<RegexReplacement<'a>, &'a Arena> {
let mut res = Vec::new_in(arena);
if !search.options.use_regex {
return res;
}
let group_count = search.regex.group_count();
let mut text = Vec::new_in(arena);
let mut text_beg = 0;
loop {
let mut off = memchr2(b'$', b'\\', replacement, text_beg);
// Push the raw, unescaped text, if any.
if text_beg < off {
text.extend_from_slice(&replacement[text_beg..off]);
}
// Unescape any escaped characters.
while off < replacement.len() && replacement[off] == b'\\' {
off += 2;
// If this backslash is the last character (e.g. because
// `replacement` is just 1 byte long, holding just b"\\"),
// we can't unescape it. In that case, we map it to `b'\\'` here.
// This results in us appending a literal backslash to the text.
let ch = replacement.get(off - 1).map_or(b'\\', |&c| c);
// Unescape and append the character.
text.push(match ch {
b'n' => b'\n',
b'r' => b'\r',
b't' => b'\t',
ch => ch,
});
}
// Parse out a group number, if any.
let mut group = -1;
if off < replacement.len() && replacement[off] == b'$' {
let mut beg = off;
let mut end = off + 1;
let mut acc = 0i32;
let mut acc_bad = true;
if end < replacement.len() {
let ch = replacement[end];
if ch == b'$' {
// Translate "$$" to "$".
beg += 1;
end += 1;
} else if ch.is_ascii_digit() {
// Parse "$1234" into 1234i32.
// If the number is larger than the group count,
// we flag `acc_bad` which causes us to treat it as text.
acc_bad = false;
while {
acc =
acc.wrapping_mul(10).wrapping_add((replacement[end] - b'0') as i32);
acc_bad |= acc > group_count;
end += 1;
end < replacement.len() && replacement[end].is_ascii_digit()
} {}
}
}
if !acc_bad {
group = acc;
} else {
text.extend_from_slice(&replacement[beg..end]);
}
off = end;
}
if !text.is_empty() {
res.push(RegexReplacement::Text(text));
text = Vec::new_in(arena);
}
if group >= 0 {
res.push(RegexReplacement::Group(group));
}
text_beg = off;
if text_beg >= replacement.len() {
break;
}
}
res
}
fn find_fill_replacement<'a>(
&self,
search: &mut ActiveSearch,
replacement: &'a [u8],
parsed_replacements: &[RegexReplacement],
) -> Cow<'a, [u8]> {
if !search.options.use_regex {
Cow::Borrowed(replacement)
} else {
let mut res = Vec::new();
for replacement in parsed_replacements {
match replacement {
RegexReplacement::Text(text) => res.extend_from_slice(text),
RegexReplacement::Group(group) => {
if let Some(range) = search.regex.group(*group) {
self.buffer.extract_raw(range, &mut res, usize::MAX);
}
}
}
}
Cow::Owned(res)
}
}
fn measurement_config(&self) -> MeasurementConfig<'_> {
MeasurementConfig::new(&self.buffer)
.with_word_wrap_column(self.word_wrap_column)

View file

@ -677,6 +677,31 @@ impl Regex {
let mut status = icu_ffi::U_ZERO_ERROR;
unsafe { (f.uregex_reset64)(self.0, offset as i64, &mut status) };
}
/// Gets captured group count.
pub fn group_count(&mut self) -> i32 {
let f = assume_loaded();
let mut status = icu_ffi::U_ZERO_ERROR;
let count = unsafe { (f.uregex_groupCount)(self.0, &mut status) };
if status.is_failure() { 0 } else { count }
}
/// Gets the text range of a captured group by index.
pub fn group(&mut self, group: i32) -> Option<Range<usize>> {
let f = assume_loaded();
let mut status = icu_ffi::U_ZERO_ERROR;
let start = unsafe { (f.uregex_start64)(self.0, group, &mut status) };
let end = unsafe { (f.uregex_end64)(self.0, group, &mut status) };
if status.is_failure() {
None
} else {
let start = start.max(0);
let end = end.max(start);
Some(start as usize..end as usize)
}
}
}
impl Iterator for Regex {
@ -691,15 +716,7 @@ impl Iterator for Regex {
return None;
}
let start = unsafe { (f.uregex_start64)(self.0, 0, &mut status) };
let end = unsafe { (f.uregex_end64)(self.0, 0, &mut status) };
if status.is_failure() {
return None;
}
let start = start.max(0);
let end = end.max(start);
Some(start as usize..end as usize)
self.group(0)
}
}
@ -900,6 +917,7 @@ struct LibraryFunctions {
uregex_setUText: icu_ffi::uregex_setUText,
uregex_reset64: icu_ffi::uregex_reset64,
uregex_findNext: icu_ffi::uregex_findNext,
uregex_groupCount: icu_ffi::uregex_groupCount,
uregex_start64: icu_ffi::uregex_start64,
uregex_end64: icu_ffi::uregex_end64,
}
@ -919,7 +937,7 @@ const LIBICUUC_PROC_NAMES: [&CStr; 10] = [
];
// Found in libicui18n.so on UNIX, icuin.dll/icu.dll on Windows.
const LIBICUI18N_PROC_NAMES: [&CStr; 10] = [
const LIBICUI18N_PROC_NAMES: [&CStr; 11] = [
c"ucol_open",
c"ucol_strcollUTF8",
c"uregex_open",
@ -928,6 +946,7 @@ const LIBICUI18N_PROC_NAMES: [&CStr; 10] = [
c"uregex_setUText",
c"uregex_reset64",
c"uregex_findNext",
c"uregex_groupCount",
c"uregex_start64",
c"uregex_end64",
];
@ -1277,6 +1296,8 @@ mod icu_ffi {
unsafe extern "C" fn(regexp: *mut URegularExpression, index: i64, status: &mut UErrorCode);
pub type uregex_findNext =
unsafe extern "C" fn(regexp: *mut URegularExpression, status: &mut UErrorCode) -> bool;
pub type uregex_groupCount =
unsafe extern "C" fn(regexp: *mut URegularExpression, status: &mut UErrorCode) -> i32;
pub type uregex_start64 = unsafe extern "C" fn(
regexp: *mut URegularExpression,
group_num: i32,