skip generated symbols when looking for an ident

This commit is contained in:
Folkert 2022-04-30 17:11:46 +02:00
parent 45197779ae
commit ec99d61953
No known key found for this signature in database
GPG key ID: 1F17F6FFD112B97C
4 changed files with 131 additions and 54 deletions

View file

@ -175,8 +175,8 @@ impl Scope {
fn scope_contains_ident(&self, ident: &Ident) -> ContainsIdent { fn scope_contains_ident(&self, ident: &Ident) -> ContainsIdent {
let result = self.locals.contains_ident(ident); let result = self.locals.contains_ident(ident);
match result { match result {
ContainsIdent::InScope(symbol, region) => result, ContainsIdent::InScope(_, _) => result,
ContainsIdent::NotInScope(ident_id) => match self.has_imported(ident) { ContainsIdent::NotInScope(_) => match self.has_imported(ident) {
Some((symbol, region)) => ContainsIdent::InScope(symbol, region), Some((symbol, region)) => ContainsIdent::InScope(symbol, region),
None => result, None => result,
}, },
@ -217,18 +217,27 @@ impl Scope {
ident: &Ident, ident: &Ident,
region: Region, region: Region,
) -> Result<Symbol, (Region, Loc<Ident>)> { ) -> Result<Symbol, (Region, Loc<Ident>)> {
let x = self.scope_contains_ident(ident); match self.introduce_help(ident, region) {
if !self.home.is_builtin() { Err((_, original_region)) => {
dbg!(ident, &x);
}
match x {
ContainsIdent::InScope(_, original_region) => {
let shadow = Loc { let shadow = Loc {
value: ident.clone(), value: ident.clone(),
region, region,
}; };
Err((original_region, shadow)) Err((original_region, shadow))
} }
Ok(symbol) => Ok(symbol),
}
}
fn introduce_help(
&mut self,
ident: &Ident,
region: Region,
) -> Result<Symbol, (Symbol, Region)> {
match self.scope_contains_ident(ident) {
ContainsIdent::InScope(original_symbol, original_region) => {
Err((original_symbol, original_region))
}
ContainsIdent::NotPresent => { ContainsIdent::NotPresent => {
let ident_id = self.locals.introduce_into_scope(ident, region); let ident_id = self.locals.introduce_into_scope(ident, region);
Ok(Symbol::new(self.home, ident_id)) Ok(Symbol::new(self.home, ident_id))
@ -266,8 +275,8 @@ impl Scope {
) -> Result<(Symbol, Option<Symbol>), (Region, Loc<Ident>, Symbol)> { ) -> Result<(Symbol, Option<Symbol>), (Region, Loc<Ident>, Symbol)> {
let ident = &ident; let ident = &ident;
match self.scope_contains_ident(ident) { match self.introduce_help(ident, region) {
ContainsIdent::InScope(original_symbol, original_region) => { Err((original_symbol, original_region)) => {
let shadow_symbol = self.scopeless_symbol(ident, region); let shadow_symbol = self.scopeless_symbol(ident, region);
if self.abilities_store.is_ability_member_name(original_symbol) { if self.abilities_store.is_ability_member_name(original_symbol) {
@ -285,26 +294,7 @@ impl Scope {
Err((original_region, shadow, shadow_symbol)) Err((original_region, shadow, shadow_symbol))
} }
} }
ContainsIdent::NotPresent => { Ok(symbol) => Ok((symbol, None)),
let ident_id = self.locals.introduce_into_scope(ident, region);
Ok((Symbol::new(self.home, ident_id), None))
}
ContainsIdent::NotInScope(existing) => {
if existing.index() < self.exposed_ident_count {
// if the identifier is exposed, use the IdentId we already have for it
// other modules depend on the symbol having that IdentId
let symbol = Symbol::new(self.home, existing);
self.locals.in_scope.set(existing.index(), true);
self.locals.regions[existing.index()] = region;
Ok((symbol, None))
} else {
let ident_id = self.locals.introduce_into_scope_duplicate(existing, region);
Ok((Symbol::new(self.home, ident_id), None))
}
}
} }
} }

View file

@ -1,3 +1,5 @@
use std::{fmt::Debug, mem::ManuallyDrop};
/// Collection of small (length < u16::MAX) strings, stored compactly. /// Collection of small (length < u16::MAX) strings, stored compactly.
#[derive(Clone, Default, PartialEq, Eq)] #[derive(Clone, Default, PartialEq, Eq)]
pub struct SmallStringInterner { pub struct SmallStringInterner {
@ -5,10 +7,54 @@ pub struct SmallStringInterner {
// lengths could be Vec<u8>, but the mono refcount generation // lengths could be Vec<u8>, but the mono refcount generation
// stringifies Layout's and that creates > 256 character strings // stringifies Layout's and that creates > 256 character strings
lengths: Vec<u16>, lengths: Vec<Length>,
offsets: Vec<u32>, offsets: Vec<u32>,
} }
#[derive(Copy, Clone, PartialEq, Eq)]
#[repr(transparent)]
struct Length(i16);
impl std::fmt::Debug for Length {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.kind().fmt(f)
}
}
impl Length {
#[inline(always)]
const fn kind(self) -> Kind {
if self.0 == 0 {
Kind::Emtpy
} else if self.0 > 0 {
Kind::Interned(self.0 as usize)
} else {
Kind::Generated(self.0.abs() as usize)
}
}
#[inline(always)]
const fn from_usize(len: usize) -> Self {
Self(len as i16)
}
}
enum Kind {
Generated(usize),
Emtpy,
Interned(usize),
}
impl Debug for Kind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Generated(arg0) => write!(f, "Generated({})", arg0),
Self::Emtpy => write!(f, "Emtpy"),
Self::Interned(arg0) => write!(f, "Interned({})", arg0),
}
}
}
impl std::fmt::Debug for SmallStringInterner { impl std::fmt::Debug for SmallStringInterner {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let strings: Vec<_> = self.iter().collect(); let strings: Vec<_> = self.iter().collect();
@ -33,7 +79,19 @@ impl SmallStringInterner {
} }
} }
pub const fn from_parts(buffer: Vec<u8>, lengths: Vec<u16>, offsets: Vec<u32>) -> Self { /// # Safety
///
/// lengths must be non-negative integers less than 2^15
pub unsafe fn from_parts(buffer: Vec<u8>, lengths: Vec<u16>, offsets: Vec<u32>) -> Self {
// the recommended way of transmuting a vector
let mut lengths = ManuallyDrop::new(lengths);
let lengths = Vec::from_raw_parts(
lengths.as_mut_ptr().cast(),
lengths.len(),
lengths.capacity(),
);
Self { Self {
buffer, buffer,
lengths, lengths,
@ -49,10 +107,10 @@ impl SmallStringInterner {
pub fn insert(&mut self, string: &str) -> usize { pub fn insert(&mut self, string: &str) -> usize {
let bytes = string.as_bytes(); let bytes = string.as_bytes();
assert!(bytes.len() < u16::MAX as usize); assert!(bytes.len() < (1 << 15));
let offset = self.buffer.len() as u32; let offset = self.buffer.len() as u32;
let length = bytes.len() as u16; let length = Length::from_usize(bytes.len());
let index = self.lengths.len(); let index = self.lengths.len();
@ -90,9 +148,11 @@ impl SmallStringInterner {
let offset = self.buffer.len(); let offset = self.buffer.len();
write!(self.buffer, "{}", index).unwrap(); write!(self.buffer, "{}", index).unwrap();
let length = self.buffer.len() - offset;
self.lengths.push(length as u16); // this is a generated name, so store it as a negative length
let length = Length(-((self.buffer.len() - offset) as i16));
self.lengths.push(length);
self.offsets.push(offset as u32); self.offsets.push(offset as u32);
index index
@ -105,34 +165,48 @@ impl SmallStringInterner {
#[inline(always)] #[inline(always)]
pub fn find_indices<'a>(&'a self, string: &'a str) -> impl Iterator<Item = usize> + 'a { pub fn find_indices<'a>(&'a self, string: &'a str) -> impl Iterator<Item = usize> + 'a {
let target_length = string.len() as u16; let target_length = string.len();
// there can be gaps in the parts of the string that we use (because of updates) // there can be gaps in the parts of the string that we use (because of updates)
// hence we can't just sum the lengths we've seen so far to get the next offset // hence we can't just sum the lengths we've seen so far to get the next offset
self.lengths self.lengths
.iter() .iter()
.enumerate() .enumerate()
.filter_map(move |(index, length)| { .filter_map(move |(index, length)| match length.kind() {
if *length == target_length { Kind::Generated(_) => None,
let offset = self.offsets[index]; Kind::Emtpy => {
let slice = &self.buffer[offset as usize..][..*length as usize]; if target_length == 0 {
Some(index)
if string.as_bytes() == slice { } else {
return Some(index); None
} }
} }
Kind::Interned(length) => {
if target_length == length {
let offset = self.offsets[index];
let slice = &self.buffer[offset as usize..][..length];
None if string.as_bytes() == slice {
return Some(index);
}
}
None
}
}) })
} }
fn get(&self, index: usize) -> &str { fn get(&self, index: usize) -> &str {
let length = self.lengths[index] as usize; match self.lengths[index].kind() {
let offset = self.offsets[index] as usize; Kind::Emtpy => "",
Kind::Generated(length) | Kind::Interned(length) => {
let offset = self.offsets[index] as usize;
let bytes = &self.buffer[offset..][..length]; let bytes = &self.buffer[offset..][..length];
unsafe { std::str::from_utf8_unchecked(bytes) } unsafe { std::str::from_utf8_unchecked(bytes) }
}
}
} }
pub fn try_get(&self, index: usize) -> Option<&str> { pub fn try_get(&self, index: usize) -> Option<&str> {
@ -151,7 +225,7 @@ impl SmallStringInterner {
// `buffer`, we can update them in-place // `buffer`, we can update them in-place
self.buffer.extend(new_string.bytes()); self.buffer.extend(new_string.bytes());
self.lengths[index] = length as u16; self.lengths[index] = Length::from_usize(length);
self.offsets[index] = offset as u32; self.offsets[index] = offset as u32;
} }

View file

@ -531,7 +531,7 @@ impl IdentId {
/// Stores a mapping between Ident and IdentId. /// Stores a mapping between Ident and IdentId.
#[derive(Clone, Debug, Default, PartialEq, Eq)] #[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct IdentIds { pub struct IdentIds {
interner: SmallStringInterner, pub interner: SmallStringInterner,
} }
impl IdentIds { impl IdentIds {
@ -759,11 +759,13 @@ macro_rules! define_builtins {
} }
}; };
let interner = SmallStringInterner::from_parts ( // Safety: all lengths are non-negative and smaller than 2^15
let interner = unsafe {
SmallStringInterner::from_parts (
BUFFER.as_bytes().to_vec(), BUFFER.as_bytes().to_vec(),
LENGTHS.to_vec(), LENGTHS.to_vec(),
OFFSETS.to_vec(), OFFSETS.to_vec(),
); )};
IdentIds{ interner } IdentIds{ interner }
}; };

View file

@ -8,4 +8,15 @@ app "helloWorld"
imports [] imports []
provides [ main ] to pf provides [ main ] to pf
main = "Hello, World!\n" a =
foobar = "Hello"
foobar
b =
foobar = "World"
foobar
# main = "Hello, World!\n"
main = Str.concat a b