skip generated symbols when looking for an ident

This commit is contained in:
Folkert 2022-04-30 17:11:46 +02:00
parent 45197779ae
commit ec99d61953
No known key found for this signature in database
GPG key ID: 1F17F6FFD112B97C
4 changed files with 131 additions and 54 deletions

View file

@ -175,8 +175,8 @@ impl Scope {
fn scope_contains_ident(&self, ident: &Ident) -> ContainsIdent {
let result = self.locals.contains_ident(ident);
match result {
ContainsIdent::InScope(symbol, region) => result,
ContainsIdent::NotInScope(ident_id) => match self.has_imported(ident) {
ContainsIdent::InScope(_, _) => result,
ContainsIdent::NotInScope(_) => match self.has_imported(ident) {
Some((symbol, region)) => ContainsIdent::InScope(symbol, region),
None => result,
},
@ -217,18 +217,27 @@ impl Scope {
ident: &Ident,
region: Region,
) -> Result<Symbol, (Region, Loc<Ident>)> {
let x = self.scope_contains_ident(ident);
if !self.home.is_builtin() {
dbg!(ident, &x);
}
match x {
ContainsIdent::InScope(_, original_region) => {
match self.introduce_help(ident, region) {
Err((_, original_region)) => {
let shadow = Loc {
value: ident.clone(),
region,
};
Err((original_region, shadow))
}
Ok(symbol) => Ok(symbol),
}
}
fn introduce_help(
&mut self,
ident: &Ident,
region: Region,
) -> Result<Symbol, (Symbol, Region)> {
match self.scope_contains_ident(ident) {
ContainsIdent::InScope(original_symbol, original_region) => {
Err((original_symbol, original_region))
}
ContainsIdent::NotPresent => {
let ident_id = self.locals.introduce_into_scope(ident, region);
Ok(Symbol::new(self.home, ident_id))
@ -266,8 +275,8 @@ impl Scope {
) -> Result<(Symbol, Option<Symbol>), (Region, Loc<Ident>, Symbol)> {
let ident = &ident;
match self.scope_contains_ident(ident) {
ContainsIdent::InScope(original_symbol, original_region) => {
match self.introduce_help(ident, region) {
Err((original_symbol, original_region)) => {
let shadow_symbol = self.scopeless_symbol(ident, region);
if self.abilities_store.is_ability_member_name(original_symbol) {
@ -285,26 +294,7 @@ impl Scope {
Err((original_region, shadow, shadow_symbol))
}
}
ContainsIdent::NotPresent => {
let ident_id = self.locals.introduce_into_scope(ident, region);
Ok((Symbol::new(self.home, ident_id), None))
}
ContainsIdent::NotInScope(existing) => {
if existing.index() < self.exposed_ident_count {
// if the identifier is exposed, use the IdentId we already have for it
// other modules depend on the symbol having that IdentId
let symbol = Symbol::new(self.home, existing);
self.locals.in_scope.set(existing.index(), true);
self.locals.regions[existing.index()] = region;
Ok((symbol, None))
} else {
let ident_id = self.locals.introduce_into_scope_duplicate(existing, region);
Ok((Symbol::new(self.home, ident_id), None))
}
}
Ok(symbol) => Ok((symbol, None)),
}
}

View file

@ -1,3 +1,5 @@
use std::{fmt::Debug, mem::ManuallyDrop};
/// Collection of small (length < u16::MAX) strings, stored compactly.
#[derive(Clone, Default, PartialEq, Eq)]
pub struct SmallStringInterner {
@ -5,10 +7,54 @@ pub struct SmallStringInterner {
// lengths could be Vec<u8>, but the mono refcount generation
// stringifies Layout's and that creates > 256 character strings
lengths: Vec<u16>,
lengths: Vec<Length>,
offsets: Vec<u32>,
}
#[derive(Copy, Clone, PartialEq, Eq)]
#[repr(transparent)]
struct Length(i16);
impl std::fmt::Debug for Length {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.kind().fmt(f)
}
}
impl Length {
#[inline(always)]
const fn kind(self) -> Kind {
if self.0 == 0 {
Kind::Emtpy
} else if self.0 > 0 {
Kind::Interned(self.0 as usize)
} else {
Kind::Generated(self.0.abs() as usize)
}
}
#[inline(always)]
const fn from_usize(len: usize) -> Self {
Self(len as i16)
}
}
enum Kind {
Generated(usize),
Emtpy,
Interned(usize),
}
impl Debug for Kind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Generated(arg0) => write!(f, "Generated({})", arg0),
Self::Emtpy => write!(f, "Emtpy"),
Self::Interned(arg0) => write!(f, "Interned({})", arg0),
}
}
}
impl std::fmt::Debug for SmallStringInterner {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let strings: Vec<_> = self.iter().collect();
@ -33,7 +79,19 @@ impl SmallStringInterner {
}
}
pub const fn from_parts(buffer: Vec<u8>, lengths: Vec<u16>, offsets: Vec<u32>) -> Self {
/// # Safety
///
/// lengths must be non-negative integers less than 2^15
pub unsafe fn from_parts(buffer: Vec<u8>, lengths: Vec<u16>, offsets: Vec<u32>) -> Self {
// the recommended way of transmuting a vector
let mut lengths = ManuallyDrop::new(lengths);
let lengths = Vec::from_raw_parts(
lengths.as_mut_ptr().cast(),
lengths.len(),
lengths.capacity(),
);
Self {
buffer,
lengths,
@ -49,10 +107,10 @@ impl SmallStringInterner {
pub fn insert(&mut self, string: &str) -> usize {
let bytes = string.as_bytes();
assert!(bytes.len() < u16::MAX as usize);
assert!(bytes.len() < (1 << 15));
let offset = self.buffer.len() as u32;
let length = bytes.len() as u16;
let length = Length::from_usize(bytes.len());
let index = self.lengths.len();
@ -90,9 +148,11 @@ impl SmallStringInterner {
let offset = self.buffer.len();
write!(self.buffer, "{}", index).unwrap();
let length = self.buffer.len() - offset;
self.lengths.push(length as u16);
// this is a generated name, so store it as a negative length
let length = Length(-((self.buffer.len() - offset) as i16));
self.lengths.push(length);
self.offsets.push(offset as u32);
index
@ -105,17 +165,26 @@ impl SmallStringInterner {
#[inline(always)]
pub fn find_indices<'a>(&'a self, string: &'a str) -> impl Iterator<Item = usize> + 'a {
let target_length = string.len() as u16;
let target_length = string.len();
// there can be gaps in the parts of the string that we use (because of updates)
// hence we can't just sum the lengths we've seen so far to get the next offset
self.lengths
.iter()
.enumerate()
.filter_map(move |(index, length)| {
if *length == target_length {
.filter_map(move |(index, length)| match length.kind() {
Kind::Generated(_) => None,
Kind::Emtpy => {
if target_length == 0 {
Some(index)
} else {
None
}
}
Kind::Interned(length) => {
if target_length == length {
let offset = self.offsets[index];
let slice = &self.buffer[offset as usize..][..*length as usize];
let slice = &self.buffer[offset as usize..][..length];
if string.as_bytes() == slice {
return Some(index);
@ -123,17 +192,22 @@ impl SmallStringInterner {
}
None
}
})
}
fn get(&self, index: usize) -> &str {
let length = self.lengths[index] as usize;
match self.lengths[index].kind() {
Kind::Emtpy => "",
Kind::Generated(length) | Kind::Interned(length) => {
let offset = self.offsets[index] as usize;
let bytes = &self.buffer[offset..][..length];
unsafe { std::str::from_utf8_unchecked(bytes) }
}
}
}
pub fn try_get(&self, index: usize) -> Option<&str> {
if index < self.lengths.len() {
@ -151,7 +225,7 @@ impl SmallStringInterner {
// `buffer`, we can update them in-place
self.buffer.extend(new_string.bytes());
self.lengths[index] = length as u16;
self.lengths[index] = Length::from_usize(length);
self.offsets[index] = offset as u32;
}

View file

@ -531,7 +531,7 @@ impl IdentId {
/// Stores a mapping between Ident and IdentId.
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct IdentIds {
interner: SmallStringInterner,
pub interner: SmallStringInterner,
}
impl IdentIds {
@ -759,11 +759,13 @@ macro_rules! define_builtins {
}
};
let interner = SmallStringInterner::from_parts (
// Safety: all lengths are non-negative and smaller than 2^15
let interner = unsafe {
SmallStringInterner::from_parts (
BUFFER.as_bytes().to_vec(),
LENGTHS.to_vec(),
OFFSETS.to_vec(),
);
)};
IdentIds{ interner }
};

View file

@ -8,4 +8,15 @@ app "helloWorld"
imports []
provides [ main ] to pf
main = "Hello, World!\n"
a =
foobar = "Hello"
foobar
b =
foobar = "World"
foobar
# main = "Hello, World!\n"
main = Str.concat a b