//! Tools to work with expressions present in format string literals for the `format_args!` family of macros. //! Primarily meant for assists and completions. /// Enum for representing extracted format string args. /// Can either be extracted expressions (which includes identifiers), /// or placeholders `{}`. #[derive(Debug, PartialEq, Eq)] pub enum Arg { Placeholder, Ident(String), Expr(String), } /// Add placeholders like `$1` and `$2` in place of [`Arg::Placeholder`], /// and unwraps the [`Arg::Ident`] and [`Arg::Expr`] enums. /// ```rust /// # use ide_db::syntax_helpers::format_string_exprs::*; /// assert_eq!(with_placeholders(vec![Arg::Ident("ident".to_owned()), Arg::Placeholder, Arg::Expr("expr + 2".to_owned())]), vec!["ident".to_owned(), "$1".to_owned(), "expr + 2".to_owned()]) /// ``` pub fn with_placeholders(args: Vec) -> Vec { let mut placeholder_id = 1; args.into_iter() .map(move |a| match a { Arg::Expr(s) | Arg::Ident(s) => s, Arg::Placeholder => { let s = format!("${placeholder_id}"); placeholder_id += 1; s } }) .collect() } // FIXME Remove this, we have this information in the HIR now /// Parser for a format-like string. It is more allowing in terms of string contents, /// as we expect variable placeholders to be filled with expressions. /// /// Splits a format string that may contain expressions /// like /// ```rust /// # use ide_db::syntax_helpers::format_string_exprs::*; /// assert_eq!(parse_format_exprs("{ident} {} {expr + 42} ").unwrap(), ("{ident} {} {} ".to_owned(), vec![Arg::Placeholder, Arg::Expr("expr + 42".to_owned())])); /// ``` pub fn parse_format_exprs(input: &str) -> Result<(String, Vec), ()> { #[derive(Debug, Clone, Copy, PartialEq)] enum State { NotArg, MaybeArg, Expr, Ident, MaybeIncorrect, FormatOpts, } let mut state = State::NotArg; let mut current_expr = String::new(); let mut extracted_expressions = Vec::new(); let mut output = String::new(); // Count of open braces inside of an expression. // We assume that user knows what they're doing, thus we treat it like a correct pattern, e.g. // "{MyStruct { val_a: 0, val_b: 1 }}". let mut inexpr_open_count = 0; let mut chars = input.chars().peekable(); while let Some(chr) = chars.next() { match (state, chr) { (State::NotArg, '{') => { output.push(chr); state = State::MaybeArg; } (State::NotArg, '}') => { output.push(chr); state = State::MaybeIncorrect; } (State::NotArg, _) => { output.push(chr); } (State::MaybeIncorrect, '}') => { // It's okay, we met "}}". output.push(chr); state = State::NotArg; } (State::MaybeIncorrect, _) => { // Error in the string. return Err(()); } // Escaped braces `{{` (State::MaybeArg, '{') => { output.push(chr); state = State::NotArg; } (State::MaybeArg, '}') => { // This is an empty sequence '{}'. output.push(chr); extracted_expressions.push(Arg::Placeholder); state = State::NotArg; } (State::MaybeArg, ':') => { output.push(chr); extracted_expressions.push(Arg::Placeholder); state = State::FormatOpts; } (State::MaybeArg, _) => { current_expr.push(chr); // While Rust uses the unicode sets of XID_start and XID_continue for Identifiers // this is probably the best we can do to avoid a false positive if chr.is_alphabetic() || chr == '_' { state = State::Ident; } else { state = State::Expr; } } (State::Ident | State::Expr, ':') if matches!(chars.peek(), Some(':')) => { // path separator state = State::Expr; current_expr.push_str("::"); chars.next(); } (State::Ident | State::Expr, ':' | '}') => { if inexpr_open_count == 0 { let trimmed = current_expr.trim(); // if the expression consists of a single number, like "0" or "12", it can refer to // format args in the order they are specified. // see: https://doc.rust-lang.org/std/fmt/#positional-parameters if trimmed.chars().fold(true, |only_num, c| c.is_ascii_digit() && only_num) { output.push_str(trimmed); } else if matches!(state, State::Expr) { extracted_expressions.push(Arg::Expr(trimmed.into())); } else if matches!(state, State::Ident) { output.push_str(trimmed); } output.push(chr); current_expr.clear(); state = if chr == ':' { State::FormatOpts } else if chr == '}' { State::NotArg } else { unreachable!() }; } else if chr == '}' { // We're closing one brace met before inside of the expression. current_expr.push(chr); inexpr_open_count -= 1; } else if chr == ':' { // We're inside of braced expression, assume that it's a struct field name/value delimiter. current_expr.push(chr); } } (State::Ident | State::Expr, '{') => { state = State::Expr; current_expr.push(chr); inexpr_open_count += 1; } (State::Ident | State::Expr, _) => { if !(chr.is_alphanumeric() || chr == '_' || chr == '#') { state = State::Expr; } current_expr.push(chr); } (State::FormatOpts, '}') => { output.push(chr); state = State::NotArg; } (State::FormatOpts, _) => { output.push(chr); } } } if state != State::NotArg { return Err(()); } Ok((output, extracted_expressions)) } #[cfg(test)] mod tests { use super::*; use expect_test::{Expect, expect}; fn check(input: &str, expect: &Expect) { let (output, exprs) = parse_format_exprs(input).unwrap_or(("-".to_owned(), vec![])); let outcome_repr = if !exprs.is_empty() { format!("{output}; {}", with_placeholders(exprs).join(", ")) } else { output }; expect.assert_eq(&outcome_repr); } #[test] fn format_str_parser() { let test_vector = &[ ("no expressions", expect![["no expressions"]]), (r"no expressions with \$0$1", expect![r"no expressions with \$0$1"]), ("{expr} is {2 + 2}", expect![["{expr} is {}; 2 + 2"]]), ("{expr:?}", expect![["{expr:?}"]]), ("{expr:1$}", expect![[r"{expr:1$}"]]), ("{:1$}", expect![[r"{:1$}; $1"]]), ("{:>padding$}", expect![[r"{:>padding$}; $1"]]), ("{}, {}, {0}", expect![[r"{}, {}, {0}; $1, $2"]]), ("{}, {}, {0:b}", expect![[r"{}, {}, {0:b}; $1, $2"]]), ("{$0}", expect![[r"{}; $0"]]), ("{malformed", expect![["-"]]), ("malformed}", expect![["-"]]), ("{{correct", expect![["{{correct"]]), ("correct}}", expect![["correct}}"]]), ("{correct}}}", expect![["{correct}}}"]]), ("{correct}}}}}", expect![["{correct}}}}}"]]), ("{incorrect}}", expect![["-"]]), ("placeholders {} {}", expect![["placeholders {} {}; $1, $2"]]), ("mixed {} {2 + 2} {}", expect![["mixed {} {} {}; $1, 2 + 2, $2"]]), ( "{SomeStruct { val_a: 0, val_b: 1 }}", expect![["{}; SomeStruct { val_a: 0, val_b: 1 }"]], ), ("{expr:?} is {2.32f64:.5}", expect![["{expr:?} is {:.5}; 2.32f64"]]), ( "{SomeStruct { val_a: 0, val_b: 1 }:?}", expect![["{:?}; SomeStruct { val_a: 0, val_b: 1 }"]], ), ("{ 2 + 2 }", expect![["{}; 2 + 2"]]), ("{strsim::jaro_winkle(a)}", expect![["{}; strsim::jaro_winkle(a)"]]), ("{foo::bar::baz()}", expect![["{}; foo::bar::baz()"]]), ("{foo::bar():?}", expect![["{:?}; foo::bar()"]]), ]; for (input, output) in test_vector { check(input, output) } } #[test] fn arg_type() { assert_eq!( parse_format_exprs("{_ident} {r#raw_ident} {expr.obj} {name {thing: 42} } {}") .unwrap() .1, vec![ Arg::Expr("expr.obj".to_owned()), Arg::Expr("name {thing: 42}".to_owned()), Arg::Placeholder ] ); } }