mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-28 06:14:46 +00:00
improved private tag parsing
This commit is contained in:
parent
edd54ab4ab
commit
ea32a37315
4 changed files with 242 additions and 70 deletions
|
@ -299,25 +299,33 @@ pub enum BadIdent {
|
|||
WeirdDotQualified(Row, Col),
|
||||
DoubleDot(Row, Col),
|
||||
StrayDot(Row, Col),
|
||||
StrayAt(Row, Col),
|
||||
BadPrivateTag(Row, Col),
|
||||
}
|
||||
|
||||
/// Parse an identifier into a string.
|
||||
///
|
||||
/// This is separate from the `ident` Parser because string interpolation
|
||||
/// wants to use it this way.
|
||||
fn chomp_lowercase_part(buffer: &[u8]) -> Result<&str, Progress> {
|
||||
chomp_part(|c: char| c.is_lowercase(), buffer)
|
||||
}
|
||||
|
||||
/// a `.foo` accessor function
|
||||
fn chomp_accessor(buffer: &[u8], row: Row, col: Col) -> Result<&str, BadIdent> {
|
||||
fn chomp_uppercase_part(buffer: &[u8]) -> Result<&str, Progress> {
|
||||
chomp_part(|c: char| c.is_uppercase(), buffer)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn chomp_part<F>(leading_is_good: F, buffer: &[u8]) -> Result<&str, Progress>
|
||||
where
|
||||
F: Fn(char) -> bool,
|
||||
{
|
||||
// assumes the leading `.` has been chomped already
|
||||
use encode_unicode::CharExt;
|
||||
|
||||
let mut chomped = 0;
|
||||
|
||||
if let Ok((ch, width)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||
if ch.is_lowercase() {
|
||||
if leading_is_good(ch) {
|
||||
chomped += width;
|
||||
} else {
|
||||
return Err(BadIdent::StrayDot(row, col + 1));
|
||||
return Err(NoProgress);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -331,17 +339,56 @@ fn chomp_accessor(buffer: &[u8], row: Row, col: Col) -> Result<&str, BadIdent> {
|
|||
}
|
||||
|
||||
if chomped == 0 {
|
||||
Err(BadIdent::StrayDot(row, col + 1))
|
||||
} else if let Ok(('.', _)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||
Err(BadIdent::WeirdAccessor(row, col))
|
||||
Err(NoProgress)
|
||||
} else {
|
||||
let name = unsafe { std::str::from_utf8_unchecked(&buffer[..chomped]) };
|
||||
|
||||
dbg!(name);
|
||||
Ok(name)
|
||||
}
|
||||
}
|
||||
|
||||
/// a `.foo` accessor function
|
||||
fn chomp_accessor(buffer: &[u8], row: Row, col: Col) -> Result<&str, BadIdent> {
|
||||
// assumes the leading `.` has been chomped already
|
||||
use encode_unicode::CharExt;
|
||||
|
||||
match chomp_lowercase_part(buffer) {
|
||||
Ok(name) => {
|
||||
let chomped = name.len();
|
||||
|
||||
if let Ok(('.', _)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||
Err(BadIdent::WeirdAccessor(row, col))
|
||||
} else {
|
||||
Ok(name)
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
// we've already made progress with the initial `.`
|
||||
Err(BadIdent::StrayDot(row, col + 1))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// a `@Token` private tag
|
||||
fn chomp_private_tag(buffer: &[u8], row: Row, col: Col) -> Result<&str, BadIdent> {
|
||||
// assumes the leading `@` has NOT been chomped already
|
||||
debug_assert_eq!(buffer.get(0), Some(&b'@'));
|
||||
use encode_unicode::CharExt;
|
||||
|
||||
match chomp_uppercase_part(&buffer[1..]) {
|
||||
Ok(name) => {
|
||||
let chomped = 1 + name.len();
|
||||
|
||||
if let Ok(('.', _)) = char::from_utf8_slice_start(&buffer[chomped..]) {
|
||||
Err(BadIdent::BadPrivateTag(row, col + chomped as u16))
|
||||
} else {
|
||||
Ok(name)
|
||||
}
|
||||
}
|
||||
Err(_) => Err(BadIdent::BadPrivateTag(row, col + 1)),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_ident_help_help<'a>(
|
||||
arena: &'a Bump,
|
||||
mut state: State<'a>,
|
||||
|
@ -350,7 +397,6 @@ fn parse_ident_help_help<'a>(
|
|||
let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena);
|
||||
let mut is_capitalized;
|
||||
let is_accessor_fn;
|
||||
let mut is_private_tag = false;
|
||||
|
||||
let bytes = state.bytes;
|
||||
let mut chomped_capitalized = 0;
|
||||
|
@ -387,35 +433,17 @@ fn parse_ident_help_help<'a>(
|
|||
Err(fail) => return Err((MadeProgress, fail, state)),
|
||||
}
|
||||
} else if first_ch == '@' {
|
||||
state = advance_state!(state, bytes_parsed)?;
|
||||
match chomp_private_tag(state.bytes, state.line, state.column) {
|
||||
Ok(tagname) => {
|
||||
let bytes_parsed = 1 + tagname.len();
|
||||
|
||||
// '@' must always be followed by a capital letter!
|
||||
match peek_utf8_char(&state) {
|
||||
Ok((next_ch, next_bytes_parsed)) => {
|
||||
if next_ch.is_uppercase() {
|
||||
state = advance_state!(state, next_bytes_parsed)?;
|
||||
state = advance_state!(state, bytes_parsed)?;
|
||||
|
||||
part_buf.push('@');
|
||||
part_buf.push(next_ch);
|
||||
chomped_part_buf += 1 + next_bytes_parsed;
|
||||
|
||||
is_private_tag = true;
|
||||
is_capitalized = true;
|
||||
is_accessor_fn = false;
|
||||
} else {
|
||||
return Err((
|
||||
MadeProgress,
|
||||
BadIdent::PrivateTagNotUppercase(state.line, state.column),
|
||||
state,
|
||||
));
|
||||
}
|
||||
return Ok((MadeProgress, (Ident::PrivateTag(tagname), None), state));
|
||||
}
|
||||
Err(_reason) => {
|
||||
return Err((
|
||||
MadeProgress,
|
||||
BadIdent::PrivateTagNotUppercase(state.line, state.column),
|
||||
state,
|
||||
));
|
||||
Err(fail) => {
|
||||
state = advance_state!(state, 1)?;
|
||||
return Err((MadeProgress, fail, state));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -555,7 +583,7 @@ fn parse_ident_help_help<'a>(
|
|||
let answer = if is_accessor_fn {
|
||||
// Handle accessor functions first because they have the strictest requirements.
|
||||
// Accessor functions may have exactly 1 noncapitalized part, and no capitalzed parts.
|
||||
if cparts == 0 && noncapitalized_parts.len() == 1 && !is_private_tag {
|
||||
if cparts == 0 && noncapitalized_parts.len() == 1 {
|
||||
// an accessor starts with a `.`, but we drop that from the name
|
||||
let value = unsafe {
|
||||
std::str::from_utf8_unchecked(&bytes[1 + chomped..1 + chomped + chomped_part_buf])
|
||||
|
@ -585,11 +613,7 @@ fn parse_ident_help_help<'a>(
|
|||
1 => {
|
||||
let chomped = chomped_capitalized;
|
||||
let value = unsafe { std::str::from_utf8_unchecked(&bytes[..chomped]) };
|
||||
if is_private_tag {
|
||||
Ident::PrivateTag(value)
|
||||
} else {
|
||||
Ident::GlobalTag(value)
|
||||
}
|
||||
Ident::GlobalTag(value)
|
||||
}
|
||||
_ => {
|
||||
// This is a qualified tag, which is not allowed!
|
||||
|
@ -600,13 +624,6 @@ fn parse_ident_help_help<'a>(
|
|||
));
|
||||
}
|
||||
}
|
||||
} else if is_private_tag {
|
||||
// This is qualified field access with an '@' in front, which does not make sense!
|
||||
return Err((
|
||||
MadeProgress,
|
||||
BadIdent::PrivateTagFieldAccess(state.line, state.column),
|
||||
state,
|
||||
));
|
||||
} else {
|
||||
// We have multiple noncapitalized parts, so this must be field access.
|
||||
let module_name = if cparts == 0 {
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use roc_collections::all::MutSet;
|
||||
use roc_parse::parser::{Col, Row};
|
||||
use roc_problem::can::PrecedenceProblem::BothNonAssociative;
|
||||
use roc_problem::can::{FloatErrorKind, IntErrorKind, Problem, RuntimeError};
|
||||
use roc_region::all::Region;
|
||||
|
@ -450,14 +451,106 @@ fn to_bad_ident_expr_report<'b>(
|
|||
])
|
||||
}
|
||||
|
||||
PrivateTagFieldAccess(_row, _col) => alloc.stack(vec![
|
||||
alloc.reflow("I am very confused by this field access:"),
|
||||
alloc.region(surroundings),
|
||||
alloc.concat(vec![
|
||||
alloc.reflow(r"It looks like a record field access on a private tag.")
|
||||
]),
|
||||
]),
|
||||
_ => todo!(),
|
||||
PrivateTagFieldAccess(row, col) => {
|
||||
let region =
|
||||
Region::from_rows_cols(surroundings.start_line, surroundings.start_col, row, col);
|
||||
alloc.stack(vec![
|
||||
alloc.reflow("I am very confused by this field access:"),
|
||||
alloc.region_with_subregion(surroundings, region),
|
||||
alloc.concat(vec![
|
||||
alloc.reflow(r"It looks like a record field access on a private tag.")
|
||||
]),
|
||||
])
|
||||
}
|
||||
|
||||
Underscore(row, col) => {
|
||||
let region =
|
||||
Region::from_rows_cols(surroundings.start_line, surroundings.start_col, row, col);
|
||||
alloc.stack(vec![
|
||||
alloc.reflow("Underscores are not allowed in identifier names:"),
|
||||
alloc.region_with_subregion(surroundings, region),
|
||||
alloc.concat(vec![alloc.reflow(
|
||||
r"I recommend using camelCase, it is the standard in the Roc ecosystem.",
|
||||
)]),
|
||||
])
|
||||
}
|
||||
|
||||
DoubleDot(row, col) => {
|
||||
let region =
|
||||
Region::from_rows_cols(surroundings.start_line, surroundings.start_col, row, col);
|
||||
alloc.stack(vec![
|
||||
alloc.reflow("I am very confused by these two dots in a row:"),
|
||||
alloc.region_with_subregion(surroundings, region),
|
||||
alloc.concat(vec![
|
||||
alloc.reflow(r"There always needs to be a name after a dot.")
|
||||
]),
|
||||
])
|
||||
}
|
||||
|
||||
StrayAt(row, col) => {
|
||||
let region =
|
||||
Region::from_rows_cols(surroundings.start_line, surroundings.start_col, row, col);
|
||||
alloc.stack(vec![
|
||||
alloc.reflow("I am very confused by this @ symbol"),
|
||||
alloc.region_with_subregion(surroundings, region),
|
||||
alloc.concat(vec![alloc.reflow(r"I expected a private tag.")]),
|
||||
])
|
||||
}
|
||||
|
||||
BadPrivateTag(row, col) => {
|
||||
use BadIdentNext::*;
|
||||
match what_is_next(alloc.src_lines, row, col) {
|
||||
LowercaseAccess(width) => {
|
||||
let region = Region::from_rows_cols(row, col, row, col + width);
|
||||
alloc.stack(vec![
|
||||
alloc.reflow("I am very confused by this field access:"),
|
||||
alloc.region_with_subregion(surroundings, region),
|
||||
alloc.concat(vec![
|
||||
alloc.reflow(r"It looks like a record field access on a private tag.")
|
||||
]),
|
||||
])
|
||||
}
|
||||
UppercaseAccess(width) => {
|
||||
let region = Region::from_rows_cols(row, col, row, col + width);
|
||||
alloc.stack(vec![
|
||||
alloc.reflow("I am very confused by this expression:"),
|
||||
alloc.region_with_subregion(surroundings, region),
|
||||
alloc.concat(vec![
|
||||
alloc.reflow(
|
||||
r"Looks like a private tag is treated like a module name. ",
|
||||
),
|
||||
alloc.reflow(r"Maybe you wanted a qualified name, like "),
|
||||
alloc.parser_suggestion("Json.Decode.string"),
|
||||
alloc.text("?"),
|
||||
]),
|
||||
])
|
||||
}
|
||||
Other(Some(c)) if c.is_lowercase() => {
|
||||
let region = Region::from_rows_cols(
|
||||
surroundings.start_line,
|
||||
surroundings.start_col + 1,
|
||||
row,
|
||||
col + 1,
|
||||
);
|
||||
alloc.stack(vec![
|
||||
alloc.reflow("I am trying to parse a private tag here:"),
|
||||
alloc.region_with_subregion(surroundings, region),
|
||||
alloc.concat(vec![
|
||||
alloc.reflow(r"But after the "),
|
||||
alloc.keyword("@"),
|
||||
alloc.reflow(r" symbol I found a lowercase letter. "),
|
||||
alloc.reflow(r"All tag names (global and private)"),
|
||||
alloc.reflow(r" must start with an uppercase letter, like "),
|
||||
alloc.parser_suggestion("@UUID"),
|
||||
alloc.reflow(" or "),
|
||||
alloc.parser_suggestion("@Secrets"),
|
||||
alloc.reflow("."),
|
||||
]),
|
||||
])
|
||||
}
|
||||
other => todo!("{:?}", other),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -591,6 +684,69 @@ fn to_bad_ident_pattern_report<'b>(
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum BadIdentNext<'a> {
|
||||
LowercaseAccess(u16),
|
||||
UppercaseAccess(u16),
|
||||
NumberAccess(u16),
|
||||
Keyword(&'a str),
|
||||
DanglingDot,
|
||||
Other(Option<char>),
|
||||
}
|
||||
|
||||
fn what_is_next<'a>(source_lines: &'a [&'a str], row: Row, col: Col) -> BadIdentNext<'a> {
|
||||
let row_index = row as usize;
|
||||
let col_index = col as usize;
|
||||
match source_lines.get(row_index) {
|
||||
None => BadIdentNext::Other(None),
|
||||
Some(line) => {
|
||||
let chars = &line[col_index..];
|
||||
let mut it = chars.chars();
|
||||
|
||||
match roc_parse::keyword::KEYWORDS
|
||||
.iter()
|
||||
.find(|keyword| crate::error::parse::starts_with_keyword(chars, keyword))
|
||||
{
|
||||
Some(keyword) => BadIdentNext::Keyword(keyword),
|
||||
None => match it.next() {
|
||||
None => BadIdentNext::Other(None),
|
||||
Some('.') => match it.next() {
|
||||
Some(c) if c.is_lowercase() => {
|
||||
BadIdentNext::LowercaseAccess(2 + till_whitespace(it) as u16)
|
||||
}
|
||||
Some(c) if c.is_uppercase() => {
|
||||
BadIdentNext::UppercaseAccess(2 + till_whitespace(it) as u16)
|
||||
}
|
||||
Some(c) if c.is_ascii_digit() => {
|
||||
BadIdentNext::NumberAccess(2 + till_whitespace(it) as u16)
|
||||
}
|
||||
_ => BadIdentNext::DanglingDot,
|
||||
},
|
||||
Some(c) => BadIdentNext::Other(Some(c)),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn till_whitespace<I>(mut it: I) -> usize
|
||||
where
|
||||
I: Iterator<Item = char>,
|
||||
{
|
||||
let mut chomped = 0;
|
||||
|
||||
while let Some(c) = it.next() {
|
||||
if c.is_ascii_whitespace() || c == '#' {
|
||||
break;
|
||||
} else {
|
||||
chomped += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
chomped
|
||||
}
|
||||
|
||||
fn pretty_runtime_error<'b>(
|
||||
alloc: &'b RocDocAllocator<'b>,
|
||||
runtime_error: RuntimeError,
|
||||
|
|
|
@ -3012,7 +3012,7 @@ fn what_is_next<'a>(source_lines: &'a [&'a str], row: Row, col: Col) -> Next<'a>
|
|||
}
|
||||
}
|
||||
|
||||
fn starts_with_keyword(rest_of_line: &str, keyword: &str) -> bool {
|
||||
pub fn starts_with_keyword(rest_of_line: &str, keyword: &str) -> bool {
|
||||
if let Some(stripped) = rest_of_line.strip_prefix(keyword) {
|
||||
match stripped.chars().next() {
|
||||
None => true,
|
||||
|
|
|
@ -4148,14 +4148,13 @@ mod test_reporting {
|
|||
r#"
|
||||
── SYNTAX PROBLEM ──────────────────────────────────────────────────────────────
|
||||
|
||||
I am trying to parse a qualified name here:
|
||||
I am very confused by this expression:
|
||||
|
||||
1│ @Foo.Bar
|
||||
^
|
||||
^^^^
|
||||
|
||||
This looks like a qualified tag name to me, but tags cannot be
|
||||
qualified! Maybe you wanted a qualified name, something like
|
||||
Json.Decode.string?
|
||||
Looks like a private tag is treated like a module name. Maybe you
|
||||
wanted a qualified name, like Json.Decode.string?
|
||||
"#
|
||||
),
|
||||
)
|
||||
|
@ -5523,7 +5522,7 @@ mod test_reporting {
|
|||
I am very confused by this field access:
|
||||
|
||||
1│ @UUID.bar
|
||||
^^^^^^^^^
|
||||
^^^^
|
||||
|
||||
It looks like a record field access on a private tag.
|
||||
"#
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue