parser/
shortcuts.rs

1//! Shortcuts that span lexer/parser abstraction.
2//!
3//! The way Rust works, parser doesn't necessary parse text, and you might
4//! tokenize text without parsing it further. So, it makes sense to keep
5//! abstract token parsing, and string tokenization as completely separate
6//! layers.
7//!
8//! However, often you do parse text into syntax trees and the glue code for
9//! that needs to live somewhere. Rather than putting it to lexer or parser, we
10//! use a separate shortcuts module for that.
11
12use std::mem;
13
14use crate::{
15    Edition, LexedStr, Step,
16    SyntaxKind::{self, *},
17};
18
19#[derive(Debug)]
20pub enum StrStep<'a> {
21    Token { kind: SyntaxKind, text: &'a str },
22    Enter { kind: SyntaxKind },
23    Exit,
24    Error { msg: &'a str, pos: usize },
25}
26
27impl LexedStr<'_> {
28    pub fn to_input(&self, edition: Edition) -> crate::Input {
29        let _p = tracing::info_span!("LexedStr::to_input").entered();
30        let mut res = crate::Input::with_capacity(self.len());
31        let mut was_joint = false;
32        for i in 0..self.len() {
33            let kind = self.kind(i);
34            if kind.is_trivia() {
35                was_joint = false
36            } else if kind == SyntaxKind::IDENT {
37                let token_text = self.text(i);
38                res.push_ident(
39                    SyntaxKind::from_contextual_keyword(token_text, edition)
40                        .unwrap_or(SyntaxKind::IDENT),
41                )
42            } else {
43                if was_joint {
44                    res.was_joint();
45                }
46                res.push(kind);
47                // Tag the token as joint if it is float with a fractional part
48                // we use this jointness to inform the parser about what token split
49                // event to emit when we encounter a float literal in a field access
50                if kind == SyntaxKind::FLOAT_NUMBER {
51                    if !self.text(i).ends_with('.') {
52                        res.was_joint();
53                    } else {
54                        was_joint = false;
55                    }
56                } else {
57                    was_joint = true;
58                }
59            }
60        }
61        res
62    }
63
64    /// NB: only valid to call with Output from Reparser/TopLevelEntry.
65    pub fn intersperse_trivia(
66        &self,
67        output: &crate::Output,
68        sink: &mut dyn FnMut(StrStep<'_>),
69    ) -> bool {
70        let mut builder = Builder { lexed: self, pos: 0, state: State::PendingEnter, sink };
71
72        for event in output.iter() {
73            match event {
74                Step::Token { kind, n_input_tokens: n_raw_tokens } => {
75                    builder.token(kind, n_raw_tokens)
76                }
77                Step::FloatSplit { ends_in_dot: has_pseudo_dot } => {
78                    builder.float_split(has_pseudo_dot)
79                }
80                Step::Enter { kind } => builder.enter(kind),
81                Step::Exit => builder.exit(),
82                Step::Error { msg } => {
83                    let text_pos = builder.lexed.text_start(builder.pos);
84                    (builder.sink)(StrStep::Error { msg, pos: text_pos });
85                }
86            }
87        }
88
89        match mem::replace(&mut builder.state, State::Normal) {
90            State::PendingExit => {
91                builder.eat_trivias();
92                (builder.sink)(StrStep::Exit);
93            }
94            State::PendingEnter | State::Normal => unreachable!(),
95        }
96
97        // is_eof?
98        builder.pos == builder.lexed.len()
99    }
100}
101
102struct Builder<'a, 'b> {
103    lexed: &'a LexedStr<'a>,
104    pos: usize,
105    state: State,
106    sink: &'b mut dyn FnMut(StrStep<'_>),
107}
108
109enum State {
110    PendingEnter,
111    Normal,
112    PendingExit,
113}
114
115impl Builder<'_, '_> {
116    fn token(&mut self, kind: SyntaxKind, n_tokens: u8) {
117        match mem::replace(&mut self.state, State::Normal) {
118            State::PendingEnter => unreachable!(),
119            State::PendingExit => (self.sink)(StrStep::Exit),
120            State::Normal => (),
121        }
122        self.eat_trivias();
123        self.do_token(kind, n_tokens as usize);
124    }
125
126    fn float_split(&mut self, has_pseudo_dot: bool) {
127        match mem::replace(&mut self.state, State::Normal) {
128            State::PendingEnter => unreachable!(),
129            State::PendingExit => (self.sink)(StrStep::Exit),
130            State::Normal => (),
131        }
132        self.eat_trivias();
133        self.do_float_split(has_pseudo_dot);
134    }
135
136    fn enter(&mut self, kind: SyntaxKind) {
137        match mem::replace(&mut self.state, State::Normal) {
138            State::PendingEnter => {
139                (self.sink)(StrStep::Enter { kind });
140                // No need to attach trivias to previous node: there is no
141                // previous node.
142                return;
143            }
144            State::PendingExit => (self.sink)(StrStep::Exit),
145            State::Normal => (),
146        }
147
148        let n_trivias =
149            (self.pos..self.lexed.len()).take_while(|&it| self.lexed.kind(it).is_trivia()).count();
150        let leading_trivias = self.pos..self.pos + n_trivias;
151        let n_attached_trivias = n_attached_trivias(
152            kind,
153            leading_trivias.rev().map(|it| (self.lexed.kind(it), self.lexed.text(it))),
154        );
155        self.eat_n_trivias(n_trivias - n_attached_trivias);
156        (self.sink)(StrStep::Enter { kind });
157        self.eat_n_trivias(n_attached_trivias);
158    }
159
160    fn exit(&mut self) {
161        match mem::replace(&mut self.state, State::PendingExit) {
162            State::PendingEnter => unreachable!(),
163            State::PendingExit => (self.sink)(StrStep::Exit),
164            State::Normal => (),
165        }
166    }
167
168    fn eat_trivias(&mut self) {
169        while self.pos < self.lexed.len() {
170            let kind = self.lexed.kind(self.pos);
171            if !kind.is_trivia() {
172                break;
173            }
174            self.do_token(kind, 1);
175        }
176    }
177
178    fn eat_n_trivias(&mut self, n: usize) {
179        for _ in 0..n {
180            let kind = self.lexed.kind(self.pos);
181            assert!(kind.is_trivia());
182            self.do_token(kind, 1);
183        }
184    }
185
186    fn do_token(&mut self, kind: SyntaxKind, n_tokens: usize) {
187        let text = &self.lexed.range_text(self.pos..self.pos + n_tokens);
188        self.pos += n_tokens;
189        (self.sink)(StrStep::Token { kind, text });
190    }
191
192    fn do_float_split(&mut self, has_pseudo_dot: bool) {
193        let text = &self.lexed.range_text(self.pos..self.pos + 1);
194
195        match text.split_once('.') {
196            Some((left, right)) => {
197                assert!(!left.is_empty());
198                (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF });
199                (self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: left });
200                (self.sink)(StrStep::Exit);
201
202                // here we move the exit up, the original exit has been deleted in process
203                (self.sink)(StrStep::Exit);
204
205                (self.sink)(StrStep::Token { kind: SyntaxKind::DOT, text: "." });
206
207                if has_pseudo_dot {
208                    assert!(right.is_empty(), "{left}.{right}");
209                    self.state = State::Normal;
210                } else {
211                    assert!(!right.is_empty(), "{left}.{right}");
212                    (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF });
213                    (self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: right });
214                    (self.sink)(StrStep::Exit);
215
216                    // the parser creates an unbalanced start node, we are required to close it here
217                    self.state = State::PendingExit;
218                }
219            }
220            None => {
221                // illegal float literal which doesn't have dot in form (like 1e0)
222                // we should emit an error node here
223                (self.sink)(StrStep::Error { msg: "illegal float literal", pos: self.pos });
224                (self.sink)(StrStep::Enter { kind: SyntaxKind::ERROR });
225                (self.sink)(StrStep::Token { kind: SyntaxKind::FLOAT_NUMBER, text });
226                (self.sink)(StrStep::Exit);
227
228                // move up
229                (self.sink)(StrStep::Exit);
230
231                self.state = if has_pseudo_dot { State::Normal } else { State::PendingExit };
232            }
233        }
234
235        self.pos += 1;
236    }
237}
238
239fn n_attached_trivias<'a>(
240    kind: SyntaxKind,
241    trivias: impl Iterator<Item = (SyntaxKind, &'a str)>,
242) -> usize {
243    match kind {
244        CONST | ENUM | FN | IMPL | MACRO_CALL | MACRO_DEF | MACRO_RULES | MODULE | RECORD_FIELD
245        | STATIC | STRUCT | TRAIT | TUPLE_FIELD | TYPE_ALIAS | UNION | USE | VARIANT
246        | EXTERN_CRATE => {
247            let mut res = 0;
248            let mut trivias = trivias.enumerate().peekable();
249
250            while let Some((i, (kind, text))) = trivias.next() {
251                match kind {
252                    WHITESPACE if text.contains("\n\n") => {
253                        // we check whether the next token is a doc-comment
254                        // and skip the whitespace in this case
255                        if let Some((COMMENT, peek_text)) = trivias.peek().map(|(_, pair)| pair) {
256                            if is_outer(peek_text) {
257                                continue;
258                            }
259                        }
260                        break;
261                    }
262                    COMMENT => {
263                        if is_inner(text) {
264                            break;
265                        }
266                        res = i + 1;
267                    }
268                    _ => (),
269                }
270            }
271            res
272        }
273        _ => 0,
274    }
275}
276
277fn is_outer(text: &str) -> bool {
278    if text.starts_with("////") || text.starts_with("/***") {
279        return false;
280    }
281    text.starts_with("///") || text.starts_with("/**")
282}
283
284fn is_inner(text: &str) -> bool {
285    text.starts_with("//!") || text.starts_with("/*!")
286}