mirror of
https://github.com/microsoft/edit.git
synced 2025-07-07 21:35:16 +00:00
Add support for captured groups in Find & Replace (#222)
Closes #111 Co-authored-by: Leonard Hecker <leonard@hecker.io>
This commit is contained in:
parent
91a9a5f808
commit
70f5b73878
3 changed files with 179 additions and 20 deletions
|
@ -181,12 +181,12 @@ pub fn search_execute(ctx: &mut Context, state: &mut State, action: SearchAction
|
|||
SearchAction::Replace => doc.buffer.borrow_mut().find_and_replace(
|
||||
&state.search_needle,
|
||||
state.search_options,
|
||||
&state.search_replacement,
|
||||
state.search_replacement.as_bytes(),
|
||||
),
|
||||
SearchAction::ReplaceAll => doc.buffer.borrow_mut().find_and_replace_all(
|
||||
&state.search_needle,
|
||||
state.search_options,
|
||||
&state.search_replacement,
|
||||
state.search_replacement.as_bytes(),
|
||||
),
|
||||
}
|
||||
.is_ok();
|
||||
|
|
|
@ -36,7 +36,7 @@ use std::str;
|
|||
|
||||
pub use gap_buffer::GapBuffer;
|
||||
|
||||
use crate::arena::{ArenaString, scratch_arena};
|
||||
use crate::arena::{Arena, ArenaString, scratch_arena};
|
||||
use crate::cell::SemiRefCell;
|
||||
use crate::clipboard::Clipboard;
|
||||
use crate::document::{ReadableDocument, WriteableDocument};
|
||||
|
@ -136,6 +136,11 @@ pub struct SearchOptions {
|
|||
pub use_regex: bool,
|
||||
}
|
||||
|
||||
enum RegexReplacement<'a> {
|
||||
Group(i32),
|
||||
Text(Vec<u8, &'a Arena>),
|
||||
}
|
||||
|
||||
/// Caches the start and length of the active edit line for a single edit.
|
||||
/// This helps us avoid having to remeasure the buffer after an edit.
|
||||
struct ActiveEditLineInfo {
|
||||
|
@ -1078,13 +1083,18 @@ impl TextBuffer {
|
|||
&mut self,
|
||||
pattern: &str,
|
||||
options: SearchOptions,
|
||||
replacement: &str,
|
||||
replacement: &[u8],
|
||||
) -> apperr::Result<()> {
|
||||
// Editors traditionally replace the previous search hit, not the next possible one.
|
||||
if let (Some(search), Some(..)) = (&mut self.search, &self.selection) {
|
||||
let search = search.get_mut();
|
||||
if let (Some(search), Some(..)) = (&self.search, &self.selection) {
|
||||
let search = unsafe { &mut *search.get() };
|
||||
if search.selection_generation == self.selection_generation {
|
||||
self.write(replacement.as_bytes(), self.cursor, true);
|
||||
let scratch = scratch_arena(None);
|
||||
let parsed_replacements =
|
||||
Self::find_parse_replacement(&scratch, &mut *search, replacement);
|
||||
let replacement =
|
||||
self.find_fill_replacement(&mut *search, replacement, &parsed_replacements);
|
||||
self.write(&replacement, self.cursor, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1096,18 +1106,22 @@ impl TextBuffer {
|
|||
&mut self,
|
||||
pattern: &str,
|
||||
options: SearchOptions,
|
||||
replacement: &str,
|
||||
replacement: &[u8],
|
||||
) -> apperr::Result<()> {
|
||||
let replacement = replacement.as_bytes();
|
||||
let scratch = scratch_arena(None);
|
||||
let mut search = self.find_construct_search(pattern, options)?;
|
||||
let mut offset = 0;
|
||||
let parsed_replacements = Self::find_parse_replacement(&scratch, &mut search, replacement);
|
||||
|
||||
loop {
|
||||
self.find_select_next(&mut search, offset, false);
|
||||
if !self.has_selection() {
|
||||
break;
|
||||
}
|
||||
self.write(replacement, self.cursor, true);
|
||||
|
||||
let replacement =
|
||||
self.find_fill_replacement(&mut search, replacement, &parsed_replacements);
|
||||
self.write(&replacement, self.cursor, true);
|
||||
offset = self.cursor.offset;
|
||||
}
|
||||
|
||||
|
@ -1215,6 +1229,130 @@ impl TextBuffer {
|
|||
};
|
||||
}
|
||||
|
||||
fn find_parse_replacement<'a>(
|
||||
arena: &'a Arena,
|
||||
search: &mut ActiveSearch,
|
||||
replacement: &[u8],
|
||||
) -> Vec<RegexReplacement<'a>, &'a Arena> {
|
||||
let mut res = Vec::new_in(arena);
|
||||
|
||||
if !search.options.use_regex {
|
||||
return res;
|
||||
}
|
||||
|
||||
let group_count = search.regex.group_count();
|
||||
let mut text = Vec::new_in(arena);
|
||||
let mut text_beg = 0;
|
||||
|
||||
loop {
|
||||
let mut off = memchr2(b'$', b'\\', replacement, text_beg);
|
||||
|
||||
// Push the raw, unescaped text, if any.
|
||||
if text_beg < off {
|
||||
text.extend_from_slice(&replacement[text_beg..off]);
|
||||
}
|
||||
|
||||
// Unescape any escaped characters.
|
||||
while off < replacement.len() && replacement[off] == b'\\' {
|
||||
off += 2;
|
||||
|
||||
// If this backslash is the last character (e.g. because
|
||||
// `replacement` is just 1 byte long, holding just b"\\"),
|
||||
// we can't unescape it. In that case, we map it to `b'\\'` here.
|
||||
// This results in us appending a literal backslash to the text.
|
||||
let ch = replacement.get(off - 1).map_or(b'\\', |&c| c);
|
||||
|
||||
// Unescape and append the character.
|
||||
text.push(match ch {
|
||||
b'n' => b'\n',
|
||||
b'r' => b'\r',
|
||||
b't' => b'\t',
|
||||
ch => ch,
|
||||
});
|
||||
}
|
||||
|
||||
// Parse out a group number, if any.
|
||||
let mut group = -1;
|
||||
if off < replacement.len() && replacement[off] == b'$' {
|
||||
let mut beg = off;
|
||||
let mut end = off + 1;
|
||||
let mut acc = 0i32;
|
||||
let mut acc_bad = true;
|
||||
|
||||
if end < replacement.len() {
|
||||
let ch = replacement[end];
|
||||
|
||||
if ch == b'$' {
|
||||
// Translate "$$" to "$".
|
||||
beg += 1;
|
||||
end += 1;
|
||||
} else if ch.is_ascii_digit() {
|
||||
// Parse "$1234" into 1234i32.
|
||||
// If the number is larger than the group count,
|
||||
// we flag `acc_bad` which causes us to treat it as text.
|
||||
acc_bad = false;
|
||||
while {
|
||||
acc =
|
||||
acc.wrapping_mul(10).wrapping_add((replacement[end] - b'0') as i32);
|
||||
acc_bad |= acc > group_count;
|
||||
end += 1;
|
||||
end < replacement.len() && replacement[end].is_ascii_digit()
|
||||
} {}
|
||||
}
|
||||
}
|
||||
|
||||
if !acc_bad {
|
||||
group = acc;
|
||||
} else {
|
||||
text.extend_from_slice(&replacement[beg..end]);
|
||||
}
|
||||
|
||||
off = end;
|
||||
}
|
||||
|
||||
if !text.is_empty() {
|
||||
res.push(RegexReplacement::Text(text));
|
||||
text = Vec::new_in(arena);
|
||||
}
|
||||
if group >= 0 {
|
||||
res.push(RegexReplacement::Group(group));
|
||||
}
|
||||
|
||||
text_beg = off;
|
||||
if text_beg >= replacement.len() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
|
||||
fn find_fill_replacement<'a>(
|
||||
&self,
|
||||
search: &mut ActiveSearch,
|
||||
replacement: &'a [u8],
|
||||
parsed_replacements: &[RegexReplacement],
|
||||
) -> Cow<'a, [u8]> {
|
||||
if !search.options.use_regex {
|
||||
Cow::Borrowed(replacement)
|
||||
} else {
|
||||
let mut res = Vec::new();
|
||||
|
||||
for replacement in parsed_replacements {
|
||||
match replacement {
|
||||
RegexReplacement::Text(text) => res.extend_from_slice(text),
|
||||
RegexReplacement::Group(group) => {
|
||||
if let Some(range) = search.regex.group(*group) {
|
||||
self.buffer.extract_raw(range, &mut res, usize::MAX);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Cow::Owned(res)
|
||||
}
|
||||
}
|
||||
|
||||
fn measurement_config(&self) -> MeasurementConfig<'_> {
|
||||
MeasurementConfig::new(&self.buffer)
|
||||
.with_word_wrap_column(self.word_wrap_column)
|
||||
|
|
41
src/icu.rs
41
src/icu.rs
|
@ -677,6 +677,31 @@ impl Regex {
|
|||
let mut status = icu_ffi::U_ZERO_ERROR;
|
||||
unsafe { (f.uregex_reset64)(self.0, offset as i64, &mut status) };
|
||||
}
|
||||
|
||||
/// Gets captured group count.
|
||||
pub fn group_count(&mut self) -> i32 {
|
||||
let f = assume_loaded();
|
||||
|
||||
let mut status = icu_ffi::U_ZERO_ERROR;
|
||||
let count = unsafe { (f.uregex_groupCount)(self.0, &mut status) };
|
||||
if status.is_failure() { 0 } else { count }
|
||||
}
|
||||
|
||||
/// Gets the text range of a captured group by index.
|
||||
pub fn group(&mut self, group: i32) -> Option<Range<usize>> {
|
||||
let f = assume_loaded();
|
||||
|
||||
let mut status = icu_ffi::U_ZERO_ERROR;
|
||||
let start = unsafe { (f.uregex_start64)(self.0, group, &mut status) };
|
||||
let end = unsafe { (f.uregex_end64)(self.0, group, &mut status) };
|
||||
if status.is_failure() {
|
||||
None
|
||||
} else {
|
||||
let start = start.max(0);
|
||||
let end = end.max(start);
|
||||
Some(start as usize..end as usize)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Regex {
|
||||
|
@ -691,15 +716,7 @@ impl Iterator for Regex {
|
|||
return None;
|
||||
}
|
||||
|
||||
let start = unsafe { (f.uregex_start64)(self.0, 0, &mut status) };
|
||||
let end = unsafe { (f.uregex_end64)(self.0, 0, &mut status) };
|
||||
if status.is_failure() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let start = start.max(0);
|
||||
let end = end.max(start);
|
||||
Some(start as usize..end as usize)
|
||||
self.group(0)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -900,6 +917,7 @@ struct LibraryFunctions {
|
|||
uregex_setUText: icu_ffi::uregex_setUText,
|
||||
uregex_reset64: icu_ffi::uregex_reset64,
|
||||
uregex_findNext: icu_ffi::uregex_findNext,
|
||||
uregex_groupCount: icu_ffi::uregex_groupCount,
|
||||
uregex_start64: icu_ffi::uregex_start64,
|
||||
uregex_end64: icu_ffi::uregex_end64,
|
||||
}
|
||||
|
@ -919,7 +937,7 @@ const LIBICUUC_PROC_NAMES: [&CStr; 10] = [
|
|||
];
|
||||
|
||||
// Found in libicui18n.so on UNIX, icuin.dll/icu.dll on Windows.
|
||||
const LIBICUI18N_PROC_NAMES: [&CStr; 10] = [
|
||||
const LIBICUI18N_PROC_NAMES: [&CStr; 11] = [
|
||||
c"ucol_open",
|
||||
c"ucol_strcollUTF8",
|
||||
c"uregex_open",
|
||||
|
@ -928,6 +946,7 @@ const LIBICUI18N_PROC_NAMES: [&CStr; 10] = [
|
|||
c"uregex_setUText",
|
||||
c"uregex_reset64",
|
||||
c"uregex_findNext",
|
||||
c"uregex_groupCount",
|
||||
c"uregex_start64",
|
||||
c"uregex_end64",
|
||||
];
|
||||
|
@ -1277,6 +1296,8 @@ mod icu_ffi {
|
|||
unsafe extern "C" fn(regexp: *mut URegularExpression, index: i64, status: &mut UErrorCode);
|
||||
pub type uregex_findNext =
|
||||
unsafe extern "C" fn(regexp: *mut URegularExpression, status: &mut UErrorCode) -> bool;
|
||||
pub type uregex_groupCount =
|
||||
unsafe extern "C" fn(regexp: *mut URegularExpression, status: &mut UErrorCode) -> i32;
|
||||
pub type uregex_start64 = unsafe extern "C" fn(
|
||||
regexp: *mut URegularExpression,
|
||||
group_num: i32,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue