Switch to always encoding package names / paths as strings

This will simplify parsing and make it possible to have a uniform lexer for the language. Previously unquoted package names were allowed to include '-'s, which aren't valid identifiers.

In the future, we'll distinguish local paths from packages in the package-manager by looking for a ".roc" suffix, which should only be present in local paths.
This commit is contained in:
Joshua Warner 2021-12-20 18:23:41 -08:00
parent f680b83b9e
commit 8b58d5cbc7
31 changed files with 120 additions and 293 deletions

View file

@ -1,19 +1,13 @@
use crate::ast::{Collection, CommentOrNewline, Spaced, StrLiteral, TypeAnnotation};
use crate::blankspace::space0_e;
use crate::ident::lowercase_ident;
use crate::parser::Progress::{self, *};
use crate::parser::{specialize, word1, EPackageEntry, EPackageName, EPackageOrPath, Parser};
use crate::parser::Progress::*;
use crate::parser::{specialize, word1, EPackageEntry, EPackageName, Parser};
use crate::state::State;
use crate::string_literal;
use bumpalo::collections::Vec;
use roc_region::all::Loc;
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
pub struct PackageName<'a> {
pub account: &'a str,
pub pkg: &'a str,
}
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
pub enum Version<'a> {
Exact(&'a str),
@ -32,10 +26,7 @@ pub enum VersionComparison {
}
#[derive(Copy, Clone, PartialEq, Debug)]
pub enum PackageOrPath<'a> {
Package(PackageName<'a>, Version<'a>),
Path(StrLiteral<'a>),
}
pub struct PackageName<'a>(pub &'a str);
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
pub struct ModuleName<'a>(&'a str);
@ -93,7 +84,7 @@ pub struct InterfaceHeader<'a> {
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum To<'a> {
ExistingPackage(&'a str),
NewPackage(PackageOrPath<'a>),
NewPackage(PackageName<'a>),
}
#[derive(Clone, Debug, PartialEq)]
@ -121,7 +112,7 @@ pub struct AppHeader<'a> {
pub struct PackageHeader<'a> {
pub name: Loc<PackageName<'a>>,
pub exposes: Vec<'a, Loc<Spaced<'a, ExposedName<'a>>>>,
pub packages: Vec<'a, (Loc<&'a str>, Loc<PackageOrPath<'a>>)>,
pub packages: Vec<'a, (Loc<&'a str>, Loc<PackageName<'a>>)>,
pub imports: Vec<'a, Loc<ImportsEntry<'a>>>,
// Potential comments and newlines - these will typically all be empty.
@ -213,7 +204,7 @@ pub struct TypedIdent<'a> {
pub struct PackageEntry<'a> {
pub shorthand: &'a str,
pub spaces_after_shorthand: &'a [CommentOrNewline<'a>],
pub package_or_path: Loc<PackageOrPath<'a>>,
pub package_name: Loc<PackageName<'a>>,
}
pub fn package_entry<'a>() -> impl Parser<'a, Spaced<'a, PackageEntry<'a>>, EPackageEntry<'a>> {
@ -232,27 +223,24 @@ pub fn package_entry<'a>() -> impl Parser<'a, Spaced<'a, PackageEntry<'a>>, EPac
space0_e(
min_indent,
EPackageEntry::Space,
EPackageEntry::IndentPackageOrPath
EPackageEntry::IndentPackage
)
))
.parse(arena, state)?;
let (_, package_or_path, state) = loc!(specialize(
EPackageEntry::BadPackageOrPath,
package_or_path()
))
.parse(arena, state)?;
let (_, package_or_path, state) =
loc!(specialize(EPackageEntry::BadPackage, package_name())).parse(arena, state)?;
let entry = match opt_shorthand {
Some((shorthand, spaces_after_shorthand)) => PackageEntry {
shorthand,
spaces_after_shorthand,
package_or_path,
package_name: package_or_path,
},
None => PackageEntry {
shorthand: "",
spaces_after_shorthand: &[],
package_or_path,
package_name: package_or_path,
},
};
@ -260,113 +248,15 @@ pub fn package_entry<'a>() -> impl Parser<'a, Spaced<'a, PackageEntry<'a>>, EPac
}
}
pub fn package_or_path<'a>() -> impl Parser<'a, PackageOrPath<'a>, EPackageOrPath<'a>> {
one_of![
map!(
specialize(EPackageOrPath::BadPath, string_literal::parse()),
PackageOrPath::Path
),
map!(
and!(
specialize(EPackageOrPath::BadPackage, package_name()),
skip_first!(skip_spaces(), package_version())
),
|(name, version)| { PackageOrPath::Package(name, version) }
)
]
}
fn skip_spaces<'a, T>() -> impl Parser<'a, (), T>
where
T: 'a,
{
|_, mut state: State<'a>| {
let mut chomped = 0usize;
let mut it = state.bytes().iter();
while let Some(b' ') = it.next() {
chomped += 1;
}
if chomped == 0 {
Ok((NoProgress, (), state))
} else {
state.pos.column += chomped as u16;
state = state.advance(chomped);
Ok((MadeProgress, (), state))
}
}
}
fn package_version<'a, T>() -> impl Parser<'a, Version<'a>, T>
where
T: 'a,
{
move |_, _| todo!("TODO parse package version")
}
#[inline(always)]
pub fn package_name<'a>() -> impl Parser<'a, PackageName<'a>, EPackageName> {
use encode_unicode::CharExt;
// e.g. rtfeldman/blah
//
// Package names and accounts can be capitalized and can contain dashes.
// They cannot contain underscores or other special characters.
// They must be ASCII.
|_, mut state: State<'a>| match chomp_package_part(state.bytes()) {
Err(progress) => Err((progress, EPackageName::Account(state.pos), state)),
Ok(account) => {
let mut chomped = account.len();
if let Ok(('/', width)) = char::from_utf8_slice_start(&state.bytes()[chomped..]) {
chomped += width;
match chomp_package_part(&state.bytes()[chomped..]) {
Err(progress) => Err((
progress,
EPackageName::Pkg(state.pos.bump_column(chomped as u16)),
state,
)),
Ok(pkg) => {
chomped += pkg.len();
state.pos.column += chomped as u16;
state = state.advance(chomped);
let value = PackageName { account, pkg };
Ok((MadeProgress, value, state))
}
}
} else {
Err((
MadeProgress,
EPackageName::MissingSlash(state.pos.bump_column(chomped as u16)),
state,
))
}
}
}
}
fn chomp_package_part(buffer: &[u8]) -> Result<&str, Progress> {
use encode_unicode::CharExt;
let mut chomped = 0;
while let Ok((ch, width)) = char::from_utf8_slice_start(&buffer[chomped..]) {
if ch == '-' || ch.is_ascii_alphanumeric() {
chomped += width;
} else {
// we're done
break;
}
}
if chomped == 0 {
Err(Progress::NoProgress)
} else {
let name = unsafe { std::str::from_utf8_unchecked(&buffer[..chomped]) };
Ok(name)
pub fn package_name<'a>() -> impl Parser<'a, PackageName<'a>, EPackageName<'a>> {
move |arena, state: State<'a>| {
let pos = state.pos;
specialize(EPackageName::BadPath, string_literal::parse())
.parse(arena, state)
.and_then(|(progress, text, next_state)| match text {
StrLiteral::PlainLine(text) => Ok((progress, PackageName(text), next_state)),
StrLiteral::Line(_) => Err((progress, EPackageName::Escapes(pos), next_state)),
StrLiteral::Block(_) => Err((progress, EPackageName::Multiline(pos), next_state)),
})
}
}

View file

@ -1,9 +1,8 @@
use crate::ast::{Collection, CommentOrNewline, Def, Module, Spaced};
use crate::blankspace::{space0_around_ee, space0_before_e, space0_e};
use crate::header::{
package_entry, package_name, package_or_path, AppHeader, Effects, ExposedName, ImportsEntry,
InterfaceHeader, ModuleName, PackageEntry, PlatformHeader, PlatformRequires, PlatformRigid, To,
TypedIdent,
package_entry, package_name, AppHeader, Effects, ExposedName, ImportsEntry, InterfaceHeader,
ModuleName, PackageEntry, PlatformHeader, PlatformRequires, PlatformRigid, To, TypedIdent,
};
use crate::ident::{lowercase_ident, unqualified_ident, uppercase_ident};
use crate::parser::Progress::{self, *};
@ -312,7 +311,7 @@ fn provides_to_package<'a>() -> impl Parser<'a, To<'a>, EProvides<'a>> {
|_, pos| EProvides::Identifier(pos),
map!(lowercase_ident(), To::ExistingPackage)
),
specialize(EProvides::Package, map!(package_or_path(), To::NewPackage))
specialize(EProvides::Package, map!(package_name(), To::NewPackage))
]
}

View file

@ -79,7 +79,7 @@ pub enum EHeader<'a> {
Start(Position),
ModuleName(Position),
AppName(EString<'a>, Position),
PlatformName(EPackageName, Position),
PlatformName(EPackageName<'a>, Position),
IndentStart(Position),
}
@ -96,7 +96,7 @@ pub enum EProvides<'a> {
ListStart(Position),
ListEnd(Position),
Identifier(Position),
Package(EPackageOrPath<'a>, Position),
Package(EPackageName<'a>, Position),
Space(BadInputError, Position),
}
@ -151,25 +151,19 @@ pub enum EPackages<'a> {
PackageEntry(EPackageEntry<'a>, Position),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EPackageName {
MissingSlash(Position),
Account(Position),
Pkg(Position),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EPackageOrPath<'a> {
pub enum EPackageName<'a> {
BadPath(EString<'a>, Position),
BadPackage(EPackageName, Position),
Escapes(Position),
Multiline(Position),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EPackageEntry<'a> {
BadPackageOrPath(EPackageOrPath<'a>, Position),
BadPackage(EPackageName<'a>, Position),
Shorthand(Position),
Colon(Position),
IndentPackageOrPath(Position),
IndentPackage(Position),
Space(BadInputError, Position),
}