Add a Converter type for token conversion

This commit is contained in:
Jonas Schievink 2022-05-02 17:47:12 +02:00
parent f83dccf5b9
commit 1f50e19eb2

View file

@ -29,29 +29,19 @@ struct LexError {
impl<'a> LexedStr<'a> { impl<'a> LexedStr<'a> {
pub fn new(text: &'a str) -> LexedStr<'a> { pub fn new(text: &'a str) -> LexedStr<'a> {
let mut res = LexedStr { text, kind: Vec::new(), start: Vec::new(), error: Vec::new() }; let mut conv = Converter::new(text);
let mut offset = 0;
if let Some(shebang_len) = rustc_lexer::strip_shebang(text) { if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
res.push(SHEBANG, offset); conv.res.push(SHEBANG, conv.offset);
offset = shebang_len conv.offset = shebang_len;
}; };
for token in rustc_lexer::tokenize(&text[offset..]) {
let token_text = &text[offset..][..token.len];
let (kind, err) = from_rustc(&token.kind, token_text); for token in rustc_lexer::tokenize(&text[conv.offset..]) {
res.push(kind, offset); let token_text = &text[conv.offset..][..token.len];
offset += token.len;
if let Some(err) = err { conv.extend_token(&token.kind, token_text);
let token = res.len() as u32;
let msg = err.to_string();
res.error.push(LexError { msg, token });
} }
}
res.push(EOF, offset);
res conv.finalize_with_eof()
} }
pub fn single_token(text: &'a str) -> Option<(SyntaxKind, Option<String>)> { pub fn single_token(text: &'a str) -> Option<(SyntaxKind, Option<String>)> {
@ -64,8 +54,12 @@ impl<'a> LexedStr<'a> {
return None; return None;
} }
let (kind, err) = from_rustc(&token.kind, text); let mut conv = Converter::new(text);
Some((kind, err.map(|it| it.to_owned()))) conv.extend_token(&token.kind, text);
match &*conv.res.kind {
[kind] => Some((*kind, conv.res.error.pop().map(|it| it.msg.clone()))),
_ => None,
}
} }
pub fn as_str(&self) -> &str { pub fn as_str(&self) -> &str {
@ -128,11 +122,36 @@ impl<'a> LexedStr<'a> {
} }
} }
/// Returns `SyntaxKind` and an optional tokenize error message. struct Converter<'a> {
fn from_rustc( res: LexedStr<'a>,
kind: &rustc_lexer::TokenKind, offset: usize,
token_text: &str, }
) -> (SyntaxKind, Option<&'static str>) {
impl<'a> Converter<'a> {
fn new(text: &'a str) -> Self {
Self {
res: LexedStr { text, kind: Vec::new(), start: Vec::new(), error: Vec::new() },
offset: 0,
}
}
fn finalize_with_eof(mut self) -> LexedStr<'a> {
self.res.push(EOF, self.offset);
self.res
}
fn push(&mut self, kind: SyntaxKind, len: usize, err: Option<&str>) {
self.res.push(kind, self.offset);
self.offset += len;
if let Some(err) = err {
let token = self.res.len() as u32;
let msg = err.to_string();
self.res.error.push(LexError { msg, token });
}
}
fn extend_token(&mut self, kind: &rustc_lexer::TokenKind, token_text: &str) {
// A note on an intended tradeoff: // A note on an intended tradeoff:
// We drop some useful information here (see patterns with double dots `..`) // We drop some useful information here (see patterns with double dots `..`)
// Storing that info in `SyntaxKind` is not possible due to its layout requirements of // Storing that info in `SyntaxKind` is not possible due to its layout requirements of
@ -152,10 +171,15 @@ fn from_rustc(
rustc_lexer::TokenKind::Whitespace => WHITESPACE, rustc_lexer::TokenKind::Whitespace => WHITESPACE,
rustc_lexer::TokenKind::Ident if token_text == "_" => UNDERSCORE, rustc_lexer::TokenKind::Ident if token_text == "_" => UNDERSCORE,
rustc_lexer::TokenKind::Ident => SyntaxKind::from_keyword(token_text).unwrap_or(IDENT), rustc_lexer::TokenKind::Ident => {
SyntaxKind::from_keyword(token_text).unwrap_or(IDENT)
}
rustc_lexer::TokenKind::RawIdent => IDENT, rustc_lexer::TokenKind::RawIdent => IDENT,
rustc_lexer::TokenKind::Literal { kind, .. } => return from_rustc_literal(kind), rustc_lexer::TokenKind::Literal { kind, .. } => {
self.extend_literal(token_text.len(), kind);
return;
}
rustc_lexer::TokenKind::Lifetime { starts_with_number } => { rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
if *starts_with_number { if *starts_with_number {
@ -196,10 +220,10 @@ fn from_rustc(
}; };
let err = if err.is_empty() { None } else { Some(err) }; let err = if err.is_empty() { None } else { Some(err) };
(syntax_kind, err) self.push(syntax_kind, token_text.len(), err);
} }
fn from_rustc_literal(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) { fn extend_literal(&mut self, len: usize, kind: &rustc_lexer::LiteralKind) {
let mut err = ""; let mut err = "";
let syntax_kind = match *kind { let syntax_kind = match *kind {
@ -271,5 +295,6 @@ fn from_rustc_literal(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'
}; };
let err = if err.is_empty() { None } else { Some(err) }; let err = if err.is_empty() { None } else { Some(err) };
(syntax_kind, err) self.push(syntax_kind, len, err);
}
} }