mirror of
https://github.com/erg-lang/erg.git
synced 2025-09-29 12:24:45 +00:00
Fix #255
This commit is contained in:
parent
b9a77bb695
commit
708d154f60
3 changed files with 20 additions and 26 deletions
7
Cargo.lock
generated
7
Cargo.lock
generated
|
@ -33,6 +33,7 @@ name = "erg_parser"
|
||||||
version = "0.5.13"
|
version = "0.5.13"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"erg_common",
|
"erg_common",
|
||||||
|
"unicode-xid",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -50,6 +51,12 @@ version = "0.2.132"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5"
|
checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-xid"
|
||||||
|
version = "0.2.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winapi"
|
name = "winapi"
|
||||||
version = "0.3.9"
|
version = "0.3.9"
|
||||||
|
|
|
@ -19,6 +19,7 @@ pretty = ["erg_common/pretty"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
erg_common = { version = "0.5.13", path = "../erg_common" }
|
erg_common = { version = "0.5.13", path = "../erg_common" }
|
||||||
|
unicode-xid = "0.2.4"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
//! defines and implements `Lexer` (Tokenizer).
|
//! defines and implements `Lexer` (Tokenizer).
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
|
|
||||||
|
use unicode_xid::UnicodeXID;
|
||||||
|
|
||||||
use erg_common::cache::CacheSet;
|
use erg_common::cache::CacheSet;
|
||||||
use erg_common::config::ErgConfig;
|
use erg_common::config::ErgConfig;
|
||||||
use erg_common::config::Input;
|
use erg_common::config::Input;
|
||||||
|
@ -156,28 +158,12 @@ impl Lexer /*<'a>*/ {
|
||||||
Some(Err(LexError::feature_error(0, token.loc(), feat_name)))
|
Some(Err(LexError::feature_error(0, token.loc(), feat_name)))
|
||||||
}
|
}
|
||||||
|
|
||||||
const fn is_valid_symbol_ch(c: char) -> bool {
|
fn is_valid_start_symbol_ch(c: char) -> bool {
|
||||||
match c {
|
c.is_xid_start() || c == '_'
|
||||||
'0'..='9' => true,
|
|
||||||
// control characters
|
|
||||||
'\0' | '\u{0009}'..='\u{001F}' => false,
|
|
||||||
// white spaces
|
|
||||||
' ' | '\u{00A0}' => false,
|
|
||||||
'\u{007F}' | '\u{0085}' | '\u{05C1}' | '\u{05C2}' => false,
|
|
||||||
'\u{0701}'..='\u{070d}' => false,
|
|
||||||
'\u{07B2}'..='\u{07BF}' => false,
|
|
||||||
'\u{1680}' | '\u{180E}' => false,
|
|
||||||
'\u{2000}'..='\u{200F}' => false,
|
|
||||||
'\u{2028}'..='\u{202F}' => false,
|
|
||||||
'\u{205F}'..='\u{206F}' => false,
|
|
||||||
'\u{3000}' | '\u{3164}' | '\u{FEFF}' => false,
|
|
||||||
// operator characters + special markers
|
|
||||||
'<' | '>' | '$' | '%' | '.' | ',' | ':' | ';' | '+' | '-' | '*' | '/' | '=' | '#'
|
|
||||||
| '&' | '|' | '^' | '~' | '@' | '!' | '?' | '\\' => false,
|
|
||||||
// enclosures
|
|
||||||
'[' | ']' | '(' | ')' | '{' | '}' | '\"' | '\'' | '`' => false,
|
|
||||||
_ => true,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_valid_continue_symbol_ch(c: char) -> bool {
|
||||||
|
c.is_xid_continue() && !('0'..='9').contains(&c) || c == '_'
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Detect `c` is a bidirectional overriding character.
|
/// Detect `c` is a bidirectional overriding character.
|
||||||
|
@ -504,7 +490,7 @@ impl Lexer /*<'a>*/ {
|
||||||
n if n.is_ascii_digit() || n == '_' => {
|
n if n.is_ascii_digit() || n == '_' => {
|
||||||
num.push(self.consume().unwrap());
|
num.push(self.consume().unwrap());
|
||||||
}
|
}
|
||||||
c if Self::is_valid_symbol_ch(c) => {
|
c if Self::is_valid_continue_symbol_ch(c) => {
|
||||||
// exponent (e.g. 10e+3)
|
// exponent (e.g. 10e+3)
|
||||||
if c == 'e'
|
if c == 'e'
|
||||||
&& (self.peek_next_ch() == Some('+') || self.peek_next_ch() == Some('-'))
|
&& (self.peek_next_ch() == Some('+') || self.peek_next_ch() == Some('-'))
|
||||||
|
@ -544,7 +530,7 @@ impl Lexer /*<'a>*/ {
|
||||||
}
|
}
|
||||||
// method call of IntLit
|
// method call of IntLit
|
||||||
// or range operator (e.g. 1..)
|
// or range operator (e.g. 1..)
|
||||||
Some(c) if Self::is_valid_symbol_ch(c) || c == '.' => {
|
Some(c) if Self::is_valid_continue_symbol_ch(c) || c == '.' => {
|
||||||
let kind = if num.starts_with('-') && !Self::is_zero(&num) {
|
let kind = if num.starts_with('-') && !Self::is_zero(&num) {
|
||||||
IntLit
|
IntLit
|
||||||
} else {
|
} else {
|
||||||
|
@ -583,7 +569,7 @@ impl Lexer /*<'a>*/ {
|
||||||
fn lex_symbol(&mut self, first_ch: char) -> LexResult<Token> {
|
fn lex_symbol(&mut self, first_ch: char) -> LexResult<Token> {
|
||||||
let mut cont = first_ch.to_string();
|
let mut cont = first_ch.to_string();
|
||||||
while let Some(c) = self.peek_cur_ch() {
|
while let Some(c) = self.peek_cur_ch() {
|
||||||
if Self::is_valid_symbol_ch(c) {
|
if Self::is_valid_continue_symbol_ch(c) {
|
||||||
cont.push(self.consume().unwrap());
|
cont.push(self.consume().unwrap());
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
|
@ -1245,7 +1231,7 @@ impl Iterator for Lexer /*<'a>*/ {
|
||||||
// IntLit or RatioLit
|
// IntLit or RatioLit
|
||||||
Some(n) if n.is_ascii_digit() => Some(self.lex_num(n)),
|
Some(n) if n.is_ascii_digit() => Some(self.lex_num(n)),
|
||||||
// Symbol (includes '_')
|
// Symbol (includes '_')
|
||||||
Some(c) if Self::is_valid_symbol_ch(c) => Some(self.lex_symbol(c)),
|
Some(c) if Self::is_valid_start_symbol_ch(c) => Some(self.lex_symbol(c)),
|
||||||
// Invalid character (e.g. space-like character)
|
// Invalid character (e.g. space-like character)
|
||||||
Some(invalid) => {
|
Some(invalid) => {
|
||||||
let token = self.emit_token(Illegal, &invalid.to_string());
|
let token = self.emit_token(Illegal, &invalid.to_string());
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue