mirror of
https://github.com/rust-lang/rust-analyzer.git
synced 2025-08-22 03:14:30 +00:00
Rename ra_syntax -> syntax
This commit is contained in:
parent
3d6889cba7
commit
a1c187eef3
958 changed files with 353 additions and 363 deletions
244
crates/syntax/src/parsing/lexer.rs
Normal file
244
crates/syntax/src/parsing/lexer.rs
Normal file
|
@ -0,0 +1,244 @@
|
|||
//! Lexer analyzes raw input string and produces lexemes (tokens).
|
||||
//! It is just a bridge to `rustc_lexer`.
|
||||
|
||||
use rustc_lexer::{LiteralKind as LK, RawStrError};
|
||||
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::{
|
||||
SyntaxError,
|
||||
SyntaxKind::{self, *},
|
||||
TextRange, TextSize, T,
|
||||
};
|
||||
|
||||
/// A token of Rust source.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Token {
|
||||
/// The kind of token.
|
||||
pub kind: SyntaxKind,
|
||||
/// The length of the token.
|
||||
pub len: TextSize,
|
||||
}
|
||||
|
||||
/// Break a string up into its component tokens.
|
||||
/// Beware that it checks for shebang first and its length contributes to resulting
|
||||
/// tokens offsets.
|
||||
pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
|
||||
// non-empty string is a precondtion of `rustc_lexer::strip_shebang()`.
|
||||
if text.is_empty() {
|
||||
return Default::default();
|
||||
}
|
||||
|
||||
let mut tokens = Vec::new();
|
||||
let mut errors = Vec::new();
|
||||
|
||||
let mut offset = match rustc_lexer::strip_shebang(text) {
|
||||
Some(shebang_len) => {
|
||||
tokens.push(Token { kind: SHEBANG, len: shebang_len.try_into().unwrap() });
|
||||
shebang_len
|
||||
}
|
||||
None => 0,
|
||||
};
|
||||
|
||||
let text_without_shebang = &text[offset..];
|
||||
|
||||
for rustc_token in rustc_lexer::tokenize(text_without_shebang) {
|
||||
let token_len: TextSize = rustc_token.len.try_into().unwrap();
|
||||
let token_range = TextRange::at(offset.try_into().unwrap(), token_len);
|
||||
|
||||
let (syntax_kind, err_message) =
|
||||
rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]);
|
||||
|
||||
tokens.push(Token { kind: syntax_kind, len: token_len });
|
||||
|
||||
if let Some(err_message) = err_message {
|
||||
errors.push(SyntaxError::new(err_message, token_range));
|
||||
}
|
||||
|
||||
offset += rustc_token.len;
|
||||
}
|
||||
|
||||
(tokens, errors)
|
||||
}
|
||||
|
||||
/// Returns `SyntaxKind` and `Option<SyntaxError>` of the first token
|
||||
/// encountered at the beginning of the string.
|
||||
///
|
||||
/// Returns `None` if the string contains zero *or two or more* tokens.
|
||||
/// The token is malformed if the returned error is not `None`.
|
||||
///
|
||||
/// Beware that unescape errors are not checked at tokenization time.
|
||||
pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxError>)> {
|
||||
lex_first_token(text)
|
||||
.filter(|(token, _)| token.len == TextSize::of(text))
|
||||
.map(|(token, error)| (token.kind, error))
|
||||
}
|
||||
|
||||
/// The same as `lex_single_syntax_kind()` but returns only `SyntaxKind` and
|
||||
/// returns `None` if any tokenization error occured.
|
||||
///
|
||||
/// Beware that unescape errors are not checked at tokenization time.
|
||||
pub fn lex_single_valid_syntax_kind(text: &str) -> Option<SyntaxKind> {
|
||||
lex_first_token(text)
|
||||
.filter(|(token, error)| !error.is_some() && token.len == TextSize::of(text))
|
||||
.map(|(token, _error)| token.kind)
|
||||
}
|
||||
|
||||
/// Returns `SyntaxKind` and `Option<SyntaxError>` of the first token
|
||||
/// encountered at the beginning of the string.
|
||||
///
|
||||
/// Returns `None` if the string contains zero tokens or if the token was parsed
|
||||
/// with an error.
|
||||
/// The token is malformed if the returned error is not `None`.
|
||||
///
|
||||
/// Beware that unescape errors are not checked at tokenization time.
|
||||
fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> {
|
||||
// non-empty string is a precondtion of `rustc_lexer::first_token()`.
|
||||
if text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let rustc_token = rustc_lexer::first_token(text);
|
||||
let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
|
||||
|
||||
let token = Token { kind: syntax_kind, len: rustc_token.len.try_into().unwrap() };
|
||||
let optional_error = err_message
|
||||
.map(|err_message| SyntaxError::new(err_message, TextRange::up_to(TextSize::of(text))));
|
||||
|
||||
Some((token, optional_error))
|
||||
}
|
||||
|
||||
/// Returns `SyntaxKind` and an optional tokenize error message.
|
||||
fn rustc_token_kind_to_syntax_kind(
|
||||
rustc_token_kind: &rustc_lexer::TokenKind,
|
||||
token_text: &str,
|
||||
) -> (SyntaxKind, Option<&'static str>) {
|
||||
// A note on an intended tradeoff:
|
||||
// We drop some useful infromation here (see patterns with double dots `..`)
|
||||
// Storing that info in `SyntaxKind` is not possible due to its layout requirements of
|
||||
// being `u16` that come from `rowan::SyntaxKind`.
|
||||
|
||||
let syntax_kind = {
|
||||
match rustc_token_kind {
|
||||
rustc_lexer::TokenKind::LineComment => COMMENT,
|
||||
|
||||
rustc_lexer::TokenKind::BlockComment { terminated: true } => COMMENT,
|
||||
rustc_lexer::TokenKind::BlockComment { terminated: false } => {
|
||||
return (
|
||||
COMMENT,
|
||||
Some("Missing trailing `*/` symbols to terminate the block comment"),
|
||||
);
|
||||
}
|
||||
|
||||
rustc_lexer::TokenKind::Whitespace => WHITESPACE,
|
||||
|
||||
rustc_lexer::TokenKind::Ident => {
|
||||
if token_text == "_" {
|
||||
UNDERSCORE
|
||||
} else {
|
||||
SyntaxKind::from_keyword(token_text).unwrap_or(IDENT)
|
||||
}
|
||||
}
|
||||
|
||||
rustc_lexer::TokenKind::RawIdent => IDENT,
|
||||
rustc_lexer::TokenKind::Literal { kind, .. } => return match_literal_kind(&kind),
|
||||
|
||||
rustc_lexer::TokenKind::Lifetime { starts_with_number: false } => LIFETIME,
|
||||
rustc_lexer::TokenKind::Lifetime { starts_with_number: true } => {
|
||||
return (LIFETIME, Some("Lifetime name cannot start with a number"))
|
||||
}
|
||||
|
||||
rustc_lexer::TokenKind::Semi => T![;],
|
||||
rustc_lexer::TokenKind::Comma => T![,],
|
||||
rustc_lexer::TokenKind::Dot => T![.],
|
||||
rustc_lexer::TokenKind::OpenParen => T!['('],
|
||||
rustc_lexer::TokenKind::CloseParen => T![')'],
|
||||
rustc_lexer::TokenKind::OpenBrace => T!['{'],
|
||||
rustc_lexer::TokenKind::CloseBrace => T!['}'],
|
||||
rustc_lexer::TokenKind::OpenBracket => T!['['],
|
||||
rustc_lexer::TokenKind::CloseBracket => T![']'],
|
||||
rustc_lexer::TokenKind::At => T![@],
|
||||
rustc_lexer::TokenKind::Pound => T![#],
|
||||
rustc_lexer::TokenKind::Tilde => T![~],
|
||||
rustc_lexer::TokenKind::Question => T![?],
|
||||
rustc_lexer::TokenKind::Colon => T![:],
|
||||
rustc_lexer::TokenKind::Dollar => T![$],
|
||||
rustc_lexer::TokenKind::Eq => T![=],
|
||||
rustc_lexer::TokenKind::Not => T![!],
|
||||
rustc_lexer::TokenKind::Lt => T![<],
|
||||
rustc_lexer::TokenKind::Gt => T![>],
|
||||
rustc_lexer::TokenKind::Minus => T![-],
|
||||
rustc_lexer::TokenKind::And => T![&],
|
||||
rustc_lexer::TokenKind::Or => T![|],
|
||||
rustc_lexer::TokenKind::Plus => T![+],
|
||||
rustc_lexer::TokenKind::Star => T![*],
|
||||
rustc_lexer::TokenKind::Slash => T![/],
|
||||
rustc_lexer::TokenKind::Caret => T![^],
|
||||
rustc_lexer::TokenKind::Percent => T![%],
|
||||
rustc_lexer::TokenKind::Unknown => ERROR,
|
||||
}
|
||||
};
|
||||
|
||||
return (syntax_kind, None);
|
||||
|
||||
fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) {
|
||||
#[rustfmt::skip]
|
||||
let syntax_kind = match *kind {
|
||||
LK::Int { empty_int: false, .. } => INT_NUMBER,
|
||||
LK::Int { empty_int: true, .. } => {
|
||||
return (INT_NUMBER, Some("Missing digits after the integer base prefix"))
|
||||
}
|
||||
|
||||
LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER,
|
||||
LK::Float { empty_exponent: true, .. } => {
|
||||
return (FLOAT_NUMBER, Some("Missing digits after the exponent symbol"))
|
||||
}
|
||||
|
||||
LK::Char { terminated: true } => CHAR,
|
||||
LK::Char { terminated: false } => {
|
||||
return (CHAR, Some("Missing trailing `'` symbol to terminate the character literal"))
|
||||
}
|
||||
|
||||
LK::Byte { terminated: true } => BYTE,
|
||||
LK::Byte { terminated: false } => {
|
||||
return (BYTE, Some("Missing trailing `'` symbol to terminate the byte literal"))
|
||||
}
|
||||
|
||||
LK::Str { terminated: true } => STRING,
|
||||
LK::Str { terminated: false } => {
|
||||
return (STRING, Some("Missing trailing `\"` symbol to terminate the string literal"))
|
||||
}
|
||||
|
||||
|
||||
LK::ByteStr { terminated: true } => BYTE_STRING,
|
||||
LK::ByteStr { terminated: false } => {
|
||||
return (BYTE_STRING, Some("Missing trailing `\"` symbol to terminate the byte string literal"))
|
||||
}
|
||||
|
||||
LK::RawStr { err, .. } => match err {
|
||||
None => RAW_STRING,
|
||||
Some(RawStrError::InvalidStarter { .. }) => return (RAW_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw string literal")),
|
||||
Some(RawStrError::NoTerminator { expected, found, .. }) => if expected == found {
|
||||
return (RAW_STRING, Some("Missing trailing `\"` to terminate the raw string literal"))
|
||||
} else {
|
||||
return (RAW_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw string literal"))
|
||||
|
||||
},
|
||||
Some(RawStrError::TooManyDelimiters { .. }) => return (RAW_STRING, Some("Too many `#` symbols: raw strings may be delimited by up to 65535 `#` symbols")),
|
||||
},
|
||||
LK::RawByteStr { err, .. } => match err {
|
||||
None => RAW_BYTE_STRING,
|
||||
Some(RawStrError::InvalidStarter { .. }) => return (RAW_BYTE_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw byte string literal")),
|
||||
Some(RawStrError::NoTerminator { expected, found, .. }) => if expected == found {
|
||||
return (RAW_BYTE_STRING, Some("Missing trailing `\"` to terminate the raw byte string literal"))
|
||||
} else {
|
||||
return (RAW_BYTE_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"))
|
||||
|
||||
},
|
||||
Some(RawStrError::TooManyDelimiters { .. }) => return (RAW_BYTE_STRING, Some("Too many `#` symbols: raw byte strings may be delimited by up to 65535 `#` symbols")),
|
||||
},
|
||||
};
|
||||
|
||||
(syntax_kind, None)
|
||||
}
|
||||
}
|
455
crates/syntax/src/parsing/reparsing.rs
Normal file
455
crates/syntax/src/parsing/reparsing.rs
Normal file
|
@ -0,0 +1,455 @@
|
|||
//! Implementation of incremental re-parsing.
|
||||
//!
|
||||
//! We use two simple strategies for this:
|
||||
//! - if the edit modifies only a single token (like changing an identifier's
|
||||
//! letter), we replace only this token.
|
||||
//! - otherwise, we search for the nearest `{}` block which contains the edit
|
||||
//! and try to parse only this block.
|
||||
|
||||
use parser::Reparser;
|
||||
use text_edit::Indel;
|
||||
|
||||
use crate::{
|
||||
algo,
|
||||
parsing::{
|
||||
lexer::{lex_single_syntax_kind, tokenize, Token},
|
||||
text_token_source::TextTokenSource,
|
||||
text_tree_sink::TextTreeSink,
|
||||
},
|
||||
syntax_node::{GreenNode, GreenToken, NodeOrToken, SyntaxElement, SyntaxNode},
|
||||
SyntaxError,
|
||||
SyntaxKind::*,
|
||||
TextRange, TextSize, T,
|
||||
};
|
||||
|
||||
pub(crate) fn incremental_reparse(
|
||||
node: &SyntaxNode,
|
||||
edit: &Indel,
|
||||
errors: Vec<SyntaxError>,
|
||||
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
|
||||
if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) {
|
||||
return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
|
||||
}
|
||||
|
||||
if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) {
|
||||
return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn reparse_token<'node>(
|
||||
root: &'node SyntaxNode,
|
||||
edit: &Indel,
|
||||
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
|
||||
let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone();
|
||||
let prev_token_kind = prev_token.kind();
|
||||
match prev_token_kind {
|
||||
WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => {
|
||||
if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT {
|
||||
// removing a new line may extends previous token
|
||||
let deleted_range = edit.delete - prev_token.text_range().start();
|
||||
if prev_token.text()[deleted_range].contains('\n') {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit);
|
||||
let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?;
|
||||
|
||||
if new_token_kind != prev_token_kind
|
||||
|| (new_token_kind == IDENT && is_contextual_kw(&new_text))
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
// Check that edited token is not a part of the bigger token.
|
||||
// E.g. if for source code `bruh"str"` the user removed `ruh`, then
|
||||
// `b` no longer remains an identifier, but becomes a part of byte string literal
|
||||
if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) {
|
||||
new_text.push(next_char);
|
||||
let token_with_next_char = lex_single_syntax_kind(&new_text);
|
||||
if let Some((_kind, _error)) = token_with_next_char {
|
||||
return None;
|
||||
}
|
||||
new_text.pop();
|
||||
}
|
||||
|
||||
let new_token =
|
||||
GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into());
|
||||
Some((
|
||||
prev_token.replace_with(new_token),
|
||||
new_err.into_iter().collect(),
|
||||
prev_token.text_range(),
|
||||
))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn reparse_block<'node>(
|
||||
root: &'node SyntaxNode,
|
||||
edit: &Indel,
|
||||
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
|
||||
let (node, reparser) = find_reparsable_node(root, edit.delete)?;
|
||||
let text = get_text_after_edit(node.clone().into(), edit);
|
||||
|
||||
let (tokens, new_lexer_errors) = tokenize(&text);
|
||||
if !is_balanced(&tokens) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut token_source = TextTokenSource::new(&text, &tokens);
|
||||
let mut tree_sink = TextTreeSink::new(&text, &tokens);
|
||||
reparser.parse(&mut token_source, &mut tree_sink);
|
||||
|
||||
let (green, mut new_parser_errors) = tree_sink.finish();
|
||||
new_parser_errors.extend(new_lexer_errors);
|
||||
|
||||
Some((node.replace_with(green), new_parser_errors, node.text_range()))
|
||||
}
|
||||
|
||||
fn get_text_after_edit(element: SyntaxElement, edit: &Indel) -> String {
|
||||
let edit = Indel::replace(edit.delete - element.text_range().start(), edit.insert.clone());
|
||||
|
||||
let mut text = match element {
|
||||
NodeOrToken::Token(token) => token.text().to_string(),
|
||||
NodeOrToken::Node(node) => node.text().to_string(),
|
||||
};
|
||||
edit.apply(&mut text);
|
||||
text
|
||||
}
|
||||
|
||||
fn is_contextual_kw(text: &str) -> bool {
|
||||
matches!(text, "auto" | "default" | "union")
|
||||
}
|
||||
|
||||
fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> {
|
||||
let node = algo::find_covering_element(node, range);
|
||||
|
||||
let mut ancestors = match node {
|
||||
NodeOrToken::Token(it) => it.parent().ancestors(),
|
||||
NodeOrToken::Node(it) => it.ancestors(),
|
||||
};
|
||||
ancestors.find_map(|node| {
|
||||
let first_child = node.first_child_or_token().map(|it| it.kind());
|
||||
let parent = node.parent().map(|it| it.kind());
|
||||
Reparser::for_node(node.kind(), first_child, parent).map(|r| (node, r))
|
||||
})
|
||||
}
|
||||
|
||||
fn is_balanced(tokens: &[Token]) -> bool {
|
||||
if tokens.is_empty()
|
||||
|| tokens.first().unwrap().kind != T!['{']
|
||||
|| tokens.last().unwrap().kind != T!['}']
|
||||
{
|
||||
return false;
|
||||
}
|
||||
let mut balance = 0usize;
|
||||
for t in &tokens[1..tokens.len() - 1] {
|
||||
match t.kind {
|
||||
T!['{'] => balance += 1,
|
||||
T!['}'] => {
|
||||
balance = match balance.checked_sub(1) {
|
||||
Some(b) => b,
|
||||
None => return false,
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
balance == 0
|
||||
}
|
||||
|
||||
fn merge_errors(
|
||||
old_errors: Vec<SyntaxError>,
|
||||
new_errors: Vec<SyntaxError>,
|
||||
range_before_reparse: TextRange,
|
||||
edit: &Indel,
|
||||
) -> Vec<SyntaxError> {
|
||||
let mut res = Vec::new();
|
||||
|
||||
for old_err in old_errors {
|
||||
let old_err_range = old_err.range();
|
||||
if old_err_range.end() <= range_before_reparse.start() {
|
||||
res.push(old_err);
|
||||
} else if old_err_range.start() >= range_before_reparse.end() {
|
||||
let inserted_len = TextSize::of(&edit.insert);
|
||||
res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len()));
|
||||
// Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug)
|
||||
}
|
||||
}
|
||||
res.extend(new_errors.into_iter().map(|new_err| {
|
||||
// fighting borrow checker with a variable ;)
|
||||
let offseted_range = new_err.range() + range_before_reparse.start();
|
||||
new_err.with_range(offseted_range)
|
||||
}));
|
||||
res
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use test_utils::{assert_eq_text, extract_range};
|
||||
|
||||
use super::*;
|
||||
use crate::{AstNode, Parse, SourceFile};
|
||||
|
||||
fn do_check(before: &str, replace_with: &str, reparsed_len: u32) {
|
||||
let (range, before) = extract_range(before);
|
||||
let edit = Indel::replace(range, replace_with.to_owned());
|
||||
let after = {
|
||||
let mut after = before.clone();
|
||||
edit.apply(&mut after);
|
||||
after
|
||||
};
|
||||
|
||||
let fully_reparsed = SourceFile::parse(&after);
|
||||
let incrementally_reparsed: Parse<SourceFile> = {
|
||||
let before = SourceFile::parse(&before);
|
||||
let (green, new_errors, range) =
|
||||
incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap();
|
||||
assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length");
|
||||
Parse::new(green, new_errors)
|
||||
};
|
||||
|
||||
assert_eq_text!(
|
||||
&format!("{:#?}", fully_reparsed.tree().syntax()),
|
||||
&format!("{:#?}", incrementally_reparsed.tree().syntax()),
|
||||
);
|
||||
assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors());
|
||||
}
|
||||
|
||||
#[test] // FIXME: some test here actually test token reparsing
|
||||
fn reparse_block_tests() {
|
||||
do_check(
|
||||
r"
|
||||
fn foo() {
|
||||
let x = foo + <|>bar<|>
|
||||
}
|
||||
",
|
||||
"baz",
|
||||
3,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
fn foo() {
|
||||
let x = foo<|> + bar<|>
|
||||
}
|
||||
",
|
||||
"baz",
|
||||
25,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
struct Foo {
|
||||
f: foo<|><|>
|
||||
}
|
||||
",
|
||||
",\n g: (),",
|
||||
14,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
fn foo {
|
||||
let;
|
||||
1 + 1;
|
||||
<|>92<|>;
|
||||
}
|
||||
",
|
||||
"62",
|
||||
31, // FIXME: reparse only int literal here
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
mod foo {
|
||||
fn <|><|>
|
||||
}
|
||||
",
|
||||
"bar",
|
||||
11,
|
||||
);
|
||||
|
||||
do_check(
|
||||
r"
|
||||
trait Foo {
|
||||
type <|>Foo<|>;
|
||||
}
|
||||
",
|
||||
"Output",
|
||||
3,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
impl IntoIterator<Item=i32> for Foo {
|
||||
f<|><|>
|
||||
}
|
||||
",
|
||||
"n next(",
|
||||
9,
|
||||
);
|
||||
do_check(r"use a::b::{foo,<|>,bar<|>};", "baz", 10);
|
||||
do_check(
|
||||
r"
|
||||
pub enum A {
|
||||
Foo<|><|>
|
||||
}
|
||||
",
|
||||
"\nBar;\n",
|
||||
11,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
foo!{a, b<|><|> d}
|
||||
",
|
||||
", c[3]",
|
||||
8,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
fn foo() {
|
||||
vec![<|><|>]
|
||||
}
|
||||
",
|
||||
"123",
|
||||
14,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
extern {
|
||||
fn<|>;<|>
|
||||
}
|
||||
",
|
||||
" exit(code: c_int)",
|
||||
11,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reparse_token_tests() {
|
||||
do_check(
|
||||
r"<|><|>
|
||||
fn foo() -> i32 { 1 }
|
||||
",
|
||||
"\n\n\n \n",
|
||||
1,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
fn foo() -> <|><|> {}
|
||||
",
|
||||
" \n",
|
||||
2,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
fn <|>foo<|>() -> i32 { 1 }
|
||||
",
|
||||
"bar",
|
||||
3,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
fn foo<|><|>foo() { }
|
||||
",
|
||||
"bar",
|
||||
6,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
fn foo /* <|><|> */ () {}
|
||||
",
|
||||
"some comment",
|
||||
6,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
fn baz <|><|> () {}
|
||||
",
|
||||
" \t\t\n\n",
|
||||
2,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
fn baz <|><|> () {}
|
||||
",
|
||||
" \t\t\n\n",
|
||||
2,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
/// foo <|><|>omment
|
||||
mod { }
|
||||
",
|
||||
"c",
|
||||
14,
|
||||
);
|
||||
do_check(
|
||||
r#"
|
||||
fn -> &str { "Hello<|><|>" }
|
||||
"#,
|
||||
", world",
|
||||
7,
|
||||
);
|
||||
do_check(
|
||||
r#"
|
||||
fn -> &str { // "Hello<|><|>"
|
||||
"#,
|
||||
", world",
|
||||
10,
|
||||
);
|
||||
do_check(
|
||||
r##"
|
||||
fn -> &str { r#"Hello<|><|>"#
|
||||
"##,
|
||||
", world",
|
||||
10,
|
||||
);
|
||||
do_check(
|
||||
r"
|
||||
#[derive(<|>Copy<|>)]
|
||||
enum Foo {
|
||||
|
||||
}
|
||||
",
|
||||
"Clone",
|
||||
4,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reparse_str_token_with_error_unchanged() {
|
||||
do_check(r#""<|>Unclosed<|> string literal"#, "Still unclosed", 24);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reparse_str_token_with_error_fixed() {
|
||||
do_check(r#""unterinated<|><|>"#, "\"", 12);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reparse_block_with_error_in_middle_unchanged() {
|
||||
do_check(
|
||||
r#"fn main() {
|
||||
if {}
|
||||
32 + 4<|><|>
|
||||
return
|
||||
if {}
|
||||
}"#,
|
||||
"23",
|
||||
105,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reparse_block_with_error_in_middle_fixed() {
|
||||
do_check(
|
||||
r#"fn main() {
|
||||
if {}
|
||||
32 + 4<|><|>
|
||||
return
|
||||
if {}
|
||||
}"#,
|
||||
";",
|
||||
105,
|
||||
)
|
||||
}
|
||||
}
|
84
crates/syntax/src/parsing/text_token_source.rs
Normal file
84
crates/syntax/src/parsing/text_token_source.rs
Normal file
|
@ -0,0 +1,84 @@
|
|||
//! See `TextTokenSource` docs.
|
||||
|
||||
use parser::TokenSource;
|
||||
|
||||
use crate::{parsing::lexer::Token, SyntaxKind::EOF, TextRange, TextSize};
|
||||
|
||||
/// Implementation of `parser::TokenSource` that takes tokens from source code text.
|
||||
pub(crate) struct TextTokenSource<'t> {
|
||||
text: &'t str,
|
||||
/// token and its start position (non-whitespace/comment tokens)
|
||||
/// ```non-rust
|
||||
/// struct Foo;
|
||||
/// ^------^--^-
|
||||
/// | | \________
|
||||
/// | \____ \
|
||||
/// | \ |
|
||||
/// (struct, 0) (Foo, 7) (;, 10)
|
||||
/// ```
|
||||
/// `[(struct, 0), (Foo, 7), (;, 10)]`
|
||||
token_offset_pairs: Vec<(Token, TextSize)>,
|
||||
|
||||
/// Current token and position
|
||||
curr: (parser::Token, usize),
|
||||
}
|
||||
|
||||
impl<'t> TokenSource for TextTokenSource<'t> {
|
||||
fn current(&self) -> parser::Token {
|
||||
self.curr.0
|
||||
}
|
||||
|
||||
fn lookahead_nth(&self, n: usize) -> parser::Token {
|
||||
mk_token(self.curr.1 + n, &self.token_offset_pairs)
|
||||
}
|
||||
|
||||
fn bump(&mut self) {
|
||||
if self.curr.0.kind == EOF {
|
||||
return;
|
||||
}
|
||||
|
||||
let pos = self.curr.1 + 1;
|
||||
self.curr = (mk_token(pos, &self.token_offset_pairs), pos);
|
||||
}
|
||||
|
||||
fn is_keyword(&self, kw: &str) -> bool {
|
||||
self.token_offset_pairs
|
||||
.get(self.curr.1)
|
||||
.map(|(token, offset)| &self.text[TextRange::at(*offset, token.len)] == kw)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
fn mk_token(pos: usize, token_offset_pairs: &[(Token, TextSize)]) -> parser::Token {
|
||||
let (kind, is_jointed_to_next) = match token_offset_pairs.get(pos) {
|
||||
Some((token, offset)) => (
|
||||
token.kind,
|
||||
token_offset_pairs
|
||||
.get(pos + 1)
|
||||
.map(|(_, next_offset)| offset + token.len == *next_offset)
|
||||
.unwrap_or(false),
|
||||
),
|
||||
None => (EOF, false),
|
||||
};
|
||||
parser::Token { kind, is_jointed_to_next }
|
||||
}
|
||||
|
||||
impl<'t> TextTokenSource<'t> {
|
||||
/// Generate input from tokens(expect comment and whitespace).
|
||||
pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> TextTokenSource<'t> {
|
||||
let token_offset_pairs: Vec<_> = raw_tokens
|
||||
.iter()
|
||||
.filter_map({
|
||||
let mut len = 0.into();
|
||||
move |token| {
|
||||
let pair = if token.kind.is_trivia() { None } else { Some((*token, len)) };
|
||||
len += token.len;
|
||||
pair
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let first = mk_token(0, &token_offset_pairs);
|
||||
TextTokenSource { text, token_offset_pairs, curr: (first, 0) }
|
||||
}
|
||||
}
|
183
crates/syntax/src/parsing/text_tree_sink.rs
Normal file
183
crates/syntax/src/parsing/text_tree_sink.rs
Normal file
|
@ -0,0 +1,183 @@
|
|||
//! FIXME: write short doc here
|
||||
|
||||
use std::mem;
|
||||
|
||||
use parser::{ParseError, TreeSink};
|
||||
|
||||
use crate::{
|
||||
parsing::Token,
|
||||
syntax_node::GreenNode,
|
||||
SmolStr, SyntaxError,
|
||||
SyntaxKind::{self, *},
|
||||
SyntaxTreeBuilder, TextRange, TextSize,
|
||||
};
|
||||
|
||||
/// Bridges the parser with our specific syntax tree representation.
|
||||
///
|
||||
/// `TextTreeSink` also handles attachment of trivia (whitespace) to nodes.
|
||||
pub(crate) struct TextTreeSink<'a> {
|
||||
text: &'a str,
|
||||
tokens: &'a [Token],
|
||||
text_pos: TextSize,
|
||||
token_pos: usize,
|
||||
state: State,
|
||||
inner: SyntaxTreeBuilder,
|
||||
}
|
||||
|
||||
enum State {
|
||||
PendingStart,
|
||||
Normal,
|
||||
PendingFinish,
|
||||
}
|
||||
|
||||
impl<'a> TreeSink for TextTreeSink<'a> {
|
||||
fn token(&mut self, kind: SyntaxKind, n_tokens: u8) {
|
||||
match mem::replace(&mut self.state, State::Normal) {
|
||||
State::PendingStart => unreachable!(),
|
||||
State::PendingFinish => self.inner.finish_node(),
|
||||
State::Normal => (),
|
||||
}
|
||||
self.eat_trivias();
|
||||
let n_tokens = n_tokens as usize;
|
||||
let len = self.tokens[self.token_pos..self.token_pos + n_tokens]
|
||||
.iter()
|
||||
.map(|it| it.len)
|
||||
.sum::<TextSize>();
|
||||
self.do_token(kind, len, n_tokens);
|
||||
}
|
||||
|
||||
fn start_node(&mut self, kind: SyntaxKind) {
|
||||
match mem::replace(&mut self.state, State::Normal) {
|
||||
State::PendingStart => {
|
||||
self.inner.start_node(kind);
|
||||
// No need to attach trivias to previous node: there is no
|
||||
// previous node.
|
||||
return;
|
||||
}
|
||||
State::PendingFinish => self.inner.finish_node(),
|
||||
State::Normal => (),
|
||||
}
|
||||
|
||||
let n_trivias =
|
||||
self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count();
|
||||
let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias];
|
||||
let mut trivia_end =
|
||||
self.text_pos + leading_trivias.iter().map(|it| it.len).sum::<TextSize>();
|
||||
|
||||
let n_attached_trivias = {
|
||||
let leading_trivias = leading_trivias.iter().rev().map(|it| {
|
||||
let next_end = trivia_end - it.len;
|
||||
let range = TextRange::new(next_end, trivia_end);
|
||||
trivia_end = next_end;
|
||||
(it.kind, &self.text[range])
|
||||
});
|
||||
n_attached_trivias(kind, leading_trivias)
|
||||
};
|
||||
self.eat_n_trivias(n_trivias - n_attached_trivias);
|
||||
self.inner.start_node(kind);
|
||||
self.eat_n_trivias(n_attached_trivias);
|
||||
}
|
||||
|
||||
fn finish_node(&mut self) {
|
||||
match mem::replace(&mut self.state, State::PendingFinish) {
|
||||
State::PendingStart => unreachable!(),
|
||||
State::PendingFinish => self.inner.finish_node(),
|
||||
State::Normal => (),
|
||||
}
|
||||
}
|
||||
|
||||
fn error(&mut self, error: ParseError) {
|
||||
self.inner.error(error, self.text_pos)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TextTreeSink<'a> {
|
||||
pub(super) fn new(text: &'a str, tokens: &'a [Token]) -> Self {
|
||||
Self {
|
||||
text,
|
||||
tokens,
|
||||
text_pos: 0.into(),
|
||||
token_pos: 0,
|
||||
state: State::PendingStart,
|
||||
inner: SyntaxTreeBuilder::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn finish(mut self) -> (GreenNode, Vec<SyntaxError>) {
|
||||
match mem::replace(&mut self.state, State::Normal) {
|
||||
State::PendingFinish => {
|
||||
self.eat_trivias();
|
||||
self.inner.finish_node()
|
||||
}
|
||||
State::PendingStart | State::Normal => unreachable!(),
|
||||
}
|
||||
|
||||
self.inner.finish_raw()
|
||||
}
|
||||
|
||||
fn eat_trivias(&mut self) {
|
||||
while let Some(&token) = self.tokens.get(self.token_pos) {
|
||||
if !token.kind.is_trivia() {
|
||||
break;
|
||||
}
|
||||
self.do_token(token.kind, token.len, 1);
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_n_trivias(&mut self, n: usize) {
|
||||
for _ in 0..n {
|
||||
let token = self.tokens[self.token_pos];
|
||||
assert!(token.kind.is_trivia());
|
||||
self.do_token(token.kind, token.len, 1);
|
||||
}
|
||||
}
|
||||
|
||||
fn do_token(&mut self, kind: SyntaxKind, len: TextSize, n_tokens: usize) {
|
||||
let range = TextRange::at(self.text_pos, len);
|
||||
let text: SmolStr = self.text[range].into();
|
||||
self.text_pos += len;
|
||||
self.token_pos += n_tokens;
|
||||
self.inner.token(kind, text);
|
||||
}
|
||||
}
|
||||
|
||||
fn n_attached_trivias<'a>(
|
||||
kind: SyntaxKind,
|
||||
trivias: impl Iterator<Item = (SyntaxKind, &'a str)>,
|
||||
) -> usize {
|
||||
match kind {
|
||||
MACRO_CALL | CONST | TYPE_ALIAS | STRUCT | ENUM | VARIANT | FN | TRAIT | MODULE
|
||||
| RECORD_FIELD | STATIC => {
|
||||
let mut res = 0;
|
||||
let mut trivias = trivias.enumerate().peekable();
|
||||
|
||||
while let Some((i, (kind, text))) = trivias.next() {
|
||||
match kind {
|
||||
WHITESPACE => {
|
||||
if text.contains("\n\n") {
|
||||
// we check whether the next token is a doc-comment
|
||||
// and skip the whitespace in this case
|
||||
if let Some((peek_kind, peek_text)) =
|
||||
trivias.peek().map(|(_, pair)| pair)
|
||||
{
|
||||
if *peek_kind == COMMENT
|
||||
&& peek_text.starts_with("///")
|
||||
&& !peek_text.starts_with("////")
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
COMMENT => {
|
||||
res = i + 1;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
res
|
||||
}
|
||||
_ => 0,
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue