Get suffix from IdentId or Symbol

IdentId will now reserve the MSB for flagging whether the ident
is suffixed with a `!`.
We will use this later to constrain identifiers to be effectful or pure.
This commit is contained in:
Agus Zubiaga 2024-10-16 10:08:11 -03:00
parent 69e026f8bb
commit 2c8571537e
No known key found for this signature in database
2 changed files with 131 additions and 42 deletions

View file

@ -362,3 +362,60 @@ impl fmt::Display for Uppercase {
fmt::Display::fmt(&self.0, f)
}
}
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum IdentSuffix {
None,
Bang,
}
impl IdentSuffix {
#[inline(always)]
pub const fn from_name(name: &str) -> Self {
// Checking bytes directly so it can be const.
// This should be fine since the suffix is ASCII.
let bytes = name.as_bytes();
let len = bytes.len();
debug_assert!(len > 0, "Ident name must not be empty");
if bytes[len - 1] == ('!' as u8) {
IdentSuffix::Bang
} else {
IdentSuffix::None
}
}
pub fn is_suffixed(&self) -> bool {
match self {
IdentSuffix::None => false,
IdentSuffix::Bang => true,
}
}
}
#[cfg(test)]
mod suffix_test {
use crate::ident::IdentSuffix;
#[test]
fn ends_with_bang() {
assert_eq!(IdentSuffix::from_name("foo!"), IdentSuffix::Bang)
}
#[test]
fn ends_without_bang() {
assert_eq!(IdentSuffix::from_name("foo"), IdentSuffix::None)
}
#[test]
fn invalid() {
assert_eq!(IdentSuffix::from_name("foo!bar"), IdentSuffix::None)
}
#[test]
#[should_panic]
fn empty_panics() {
IdentSuffix::from_name("");
}
}

View file

@ -1,4 +1,4 @@
use crate::ident::{Ident, Lowercase, ModuleName};
use crate::ident::{Ident, IdentSuffix, Lowercase, ModuleName};
use crate::module_err::{ModuleError, ModuleResult};
use roc_collections::{SmallStringInterner, VecMap};
use roc_error_macros::internal_error;
@ -79,7 +79,7 @@ impl Symbol {
Self {
module_id: module_id.0,
ident_id: ident_id.0,
ident_id: ident_id.raw(),
}
}
@ -88,13 +88,17 @@ impl Symbol {
}
pub const fn ident_id(self) -> IdentId {
IdentId(self.ident_id)
IdentId::from_raw(self.ident_id)
}
pub const fn is_builtin(self) -> bool {
self.module_id().is_builtin()
}
pub const fn suffix(self) -> IdentSuffix {
self.ident_id().suffix()
}
pub fn is_derivable_ability(self) -> bool {
self.derivable_ability().is_some()
}
@ -146,7 +150,7 @@ impl Symbol {
.unwrap_or_else(|| {
internal_error!(
"ident_string's IdentIds did not contain an entry for {} in module {:?}",
self.ident_id().0,
self.ident_id().index(),
self.module_id()
)
})
@ -246,11 +250,9 @@ impl fmt::Debug for Symbol {
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let module_id = self.module_id();
let ident_id = self.ident_id();
let ident_id = self.ident_id().index();
match ident_id {
IdentId(value) => write!(f, "{module_id:?}.{value:?}"),
}
write!(f, "{module_id:?}.{ident_id:?}")
}
}
@ -319,10 +321,6 @@ impl Interns {
}
}
}
pub fn from_index(module_id: ModuleId, ident_id: u32) -> Symbol {
Symbol::new(module_id, IdentId(ident_id))
}
}
pub fn get_module_ident_ids<'a>(
@ -637,28 +635,59 @@ impl ModuleIds {
}
}
/// An ID that is assigned to interned string identifiers within a module.
/// By turning these strings into numbers, post-canonicalization processes
/// like unification and optimization can run a lot faster.
///
/// This ID is unique within a given module, not globally - so to turn this back into
/// a string, you would need a ModuleId, an IdentId, and a Map<ModuleId, Map<IdentId, String>>.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct IdentId(u32);
mod ident_id {
use crate::ident::IdentSuffix;
impl IdentId {
pub const fn index(self) -> usize {
self.0 as usize
}
/// # Safety
/// An ID that is assigned to interned string identifiers within a module.
/// By turning these strings into numbers, post-canonicalization processes
/// like unification and optimization can run a lot faster.
///
/// The index is not guaranteed to know to exist.
pub unsafe fn from_index(index: u32) -> Self {
Self(index)
/// This ID is unique within a given module, not globally - so to turn this back into
/// a string, you would need a ModuleId, an IdentId, and a Map<ModuleId, Map<IdentId, String>>.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct IdentId(u32);
const BANG_FLAG: u32 = 1u32 << 31;
const UNSUFFIXED: u32 = !BANG_FLAG;
impl IdentId {
pub const fn index(self) -> usize {
(self.0 & UNSUFFIXED) as usize
}
pub const fn suffix(self) -> IdentSuffix {
if self.0 & BANG_FLAG > 0 {
IdentSuffix::Bang
} else {
IdentSuffix::None
}
}
pub(super) const fn raw(self) -> u32 {
self.0
}
pub(super) const fn from_raw(raw: u32) -> Self {
Self(raw)
}
pub(super) const fn from_index(index: usize, suffix: IdentSuffix) -> Self {
assert!(index as u32 <= UNSUFFIXED, "IdentId index too large");
match suffix {
IdentSuffix::None => Self(index as u32),
IdentSuffix::Bang => Self((index as u32) | BANG_FLAG),
}
}
pub(super) const fn from_index_named(index: usize, name: &str) -> Self {
Self::from_index(index, IdentSuffix::from_name(name))
}
}
}
pub use ident_id::IdentId;
/// Stores a mapping between Ident and IdentId.
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct IdentIds {
@ -670,15 +699,15 @@ impl IdentIds {
self.interner
.iter()
.enumerate()
.map(|(index, ident)| (IdentId(index as u32), ident))
.map(|(index, ident)| (IdentId::from_index_named(index, ident), ident))
}
pub fn add_str(&mut self, ident_name: &str) -> IdentId {
IdentId(self.interner.insert(ident_name) as u32)
IdentId::from_index_named(self.interner.insert(ident_name), ident_name)
}
pub fn duplicate_ident(&mut self, ident_id: IdentId) -> IdentId {
IdentId(self.interner.duplicate(ident_id.0 as usize) as u32)
IdentId::from_index(self.interner.duplicate(ident_id.index()), ident_id.suffix())
}
pub fn get_or_insert(&mut self, name: &str) -> IdentId {
@ -692,7 +721,7 @@ impl IdentIds {
// TODO fix when same ident_name is present multiple times, see issue #2548
pub fn update_key(&mut self, old_name: &str, new_name: &str) -> Result<IdentId, String> {
match self.interner.find_and_update(old_name, new_name) {
Some(index) => Ok(IdentId(index as u32)),
Some(index) => Ok(IdentId::from_index_named(index, new_name)),
None => Err(format!("The identifier {old_name:?} is not in IdentIds")),
}
}
@ -705,12 +734,12 @@ impl IdentIds {
/// This is used, for example, during canonicalization of an Expr::Closure
/// to generate a unique symbol to refer to that closure.
pub fn gen_unique(&mut self) -> IdentId {
IdentId(self.interner.insert_index_str() as u32)
IdentId::from_index(self.interner.insert_index_str(), IdentSuffix::None)
}
pub fn is_generated_id(&self, id: IdentId) -> bool {
self.interner
.try_get(id.0 as usize)
.try_get(id.index())
.map_or(false, |str| str.starts_with(|c: char| c.is_ascii_digit()))
}
@ -718,18 +747,18 @@ impl IdentIds {
pub fn get_id(&self, ident_name: &str) -> Option<IdentId> {
self.interner
.find_index(ident_name)
.map(|i| IdentId(i as u32))
.map(|i| IdentId::from_index_named(i, ident_name))
}
#[inline(always)]
pub fn get_id_many<'a>(&'a self, ident_name: &'a str) -> impl Iterator<Item = IdentId> + 'a {
self.interner
.find_indices(ident_name)
.map(|i| IdentId(i as u32))
.map(|i| IdentId::from_index_named(i, ident_name))
}
pub fn get_name(&self, id: IdentId) -> Option<&str> {
self.interner.try_get(id.0 as usize)
self.interner.try_get(id.index())
}
pub fn get_name_str_res(&self, ident_id: IdentId) -> ModuleResult<&str> {
@ -1013,10 +1042,10 @@ macro_rules! define_builtins {
$(
$(
$(#[$ident_meta])*
pub const $ident_const: Symbol = Symbol::new(ModuleId::$module_const, IdentId($ident_id));
pub const $ident_const: Symbol = Symbol::new(ModuleId::$module_const, IdentId::from_index_named($ident_id, $ident_name));
)*
$(
pub const $u_ident_const: Symbol = Symbol::new(ModuleId::$module_const, IdentId($u_ident_id));
pub const $u_ident_const: Symbol = Symbol::new(ModuleId::$module_const, IdentId::from_index_named($u_ident_id, $u_ident_name));
)*
)+
@ -1038,9 +1067,11 @@ macro_rules! define_builtins {
// release builds, this condition is either `if true`
// or `if false` and will get optimized out.
debug_assert_eq!($exposed_apply_type, $ident_name.chars().next().unwrap().is_uppercase());
// Types should not be suffixed
debug_assert!(!IdentSuffix::from_name($ident_name).is_suffixed());
if $exposed_apply_type {
scope.insert($ident_name.into(), (Symbol::new(ModuleId::$module_const, IdentId($ident_id)), Region::zero()));
scope.insert($ident_name.into(), (Symbol::new(ModuleId::$module_const, IdentId::from_index($ident_id, IdentSuffix::None)), Region::zero()));
}
)?
)*
@ -1059,7 +1090,7 @@ macro_rules! define_builtins {
$(
$(
if $exposed_type {
($ident_name, (Symbol::new(ModuleId::$module_const, IdentId($ident_id)), Region::zero()))
($ident_name, (Symbol::new(ModuleId::$module_const, IdentId::from_raw($ident_id)), Region::zero()))
} else {
unreachable!()
},
@ -1474,6 +1505,7 @@ define_builtins! {
87 LIST_CLONE: "clone"
88 LIST_LEN_USIZE: "lenUsize"
89 LIST_CONCAT_UTF8: "concatUtf8"
90 LIST_WALK_FX: "walk!"
}
7 RESULT: "Result" => {
0 RESULT_RESULT: "Result" exposed_type=true // the Result.Result type alias