feat: move world implementation (#1183)

* feat: move world implementation

* dev: remove vector ir

* fix: errors

* fix: clippy

* fix: don't build world in web

* fix: unused patches

* fix: fmt

* fix: docs example

* fix: doc examples
This commit is contained in:
Myriad-Dreamin 2025-01-19 08:25:35 +08:00 committed by GitHub
parent a9437b2772
commit 6180e343e0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
122 changed files with 7829 additions and 439 deletions

View file

@ -0,0 +1,60 @@
[package]
name = "tinymist-std"
description = "Additional functions wrapping Rust's std library."
authors.workspace = true
version.workspace = true
license.workspace = true
edition.workspace = true
homepage.workspace = true
repository.workspace = true
[dependencies]
comemo.workspace = true
ecow.workspace = true
parking_lot.workspace = true
web-time.workspace = true
wasm-bindgen = { workspace = true, optional = true }
js-sys = { workspace = true, optional = true }
bitvec = { version = "1" }
dashmap = { version = "5" }
# tiny-skia-path.workspace = true
path-clean.workspace = true
base64.workspace = true
fxhash.workspace = true
rustc-hash.workspace = true
siphasher.workspace = true
serde = { workspace = true, features = ["derive"] }
serde_repr = "0.1"
serde_json.workspace = true
serde_with.workspace = true
rkyv = { workspace = true, optional = true }
typst = { workspace = true, optional = true }
typst-shim = { workspace = true, optional = true }
[dev-dependencies]
hex.workspace = true
[features]
default = ["full"]
full = ["web", "rkyv", "typst"]
typst = ["dep:typst", "dep:typst-shim"]
rkyv = ["dep:rkyv", "rkyv/alloc", "rkyv/archive_le"]
rkyv-validation = ["dep:rkyv", "rkyv/validation"]
# flat-vector = ["rkyv", "rkyv-validation"]
__web = ["dep:wasm-bindgen", "dep:js-sys"]
web = ["__web"]
system = []
bi-hash = []
item-dashmap = []
[lints]
workspace = true

View file

@ -0,0 +1,5 @@
# reflexo
A portable format to show (typst) document in web browser.
See [Typst.ts](https://github.com/Myriad-Dreamin/typst.ts)

View file

@ -0,0 +1,111 @@
use std::{collections::HashMap, num::NonZeroU32};
use crate::hash::Fingerprint;
/// A global upper bound on the shard size.
/// If there are too many shards, the memory overhead is unacceptable.
const MAX_SHARD_SIZE: u32 = 512;
/// Return a read-only default shard size.
fn default_shard_size() -> NonZeroU32 {
static ITEM_SHARD_SIZE: std::sync::OnceLock<NonZeroU32> = std::sync::OnceLock::new();
/// By testing, we found that the optimal shard size is 2 * number of
/// threads.
fn determine_default_shard_size() -> NonZeroU32 {
// This detection is from rayon.
let thread_cnt = {
std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(1)
};
// A valid shard size is a power of two.
let size = (thread_cnt.next_power_of_two() * 2) as u32;
// Perform early non-zero check to avoid panics.
NonZeroU32::new(size.min(MAX_SHARD_SIZE)).unwrap()
}
*ITEM_SHARD_SIZE.get_or_init(determine_default_shard_size)
}
type FMapBase<V> = parking_lot::RwLock<HashMap<Fingerprint, V>>;
/// A map that shards items by their fingerprint.
///
/// It is fast since a fingerprint could split items into different shards
/// efficiently.
///
/// Note: If a fingerprint is calculated from a hash function, it is not
/// guaranteed that the fingerprint is evenly distributed. Thus, in that case,
/// the performance of this map is not guaranteed.
pub struct FingerprintMap<V> {
mask: u32,
shards: Vec<parking_lot::RwLock<HashMap<Fingerprint, V>>>,
}
impl<V> Default for FingerprintMap<V> {
fn default() -> Self {
Self::new(default_shard_size())
}
}
impl<V> FingerprintMap<V> {
/// Create a new `FingerprintMap` with the given shard size.
pub fn new(shard_size: NonZeroU32) -> Self {
let shard_size = shard_size.get().next_power_of_two();
let shard_size = shard_size.min(MAX_SHARD_SIZE);
assert!(
shard_size.is_power_of_two(),
"shard size must be a power of two"
);
assert!(shard_size > 0, "shard size must be greater than zero");
Self {
mask: shard_size - 1,
shards: (0..shard_size)
.map(|_| parking_lot::RwLock::new(HashMap::new()))
.collect(),
}
}
/// Iterate over all items in the map.
pub fn into_items(self) -> impl Iterator<Item = (Fingerprint, V)> {
self.shards
.into_iter()
.flat_map(|shard| shard.into_inner().into_iter())
}
pub fn shard(&self, fg: Fingerprint) -> &FMapBase<V> {
let shards = &self.shards;
let route_idx = (fg.lower32() & self.mask) as usize;
// check that the route index is within the bounds of the shards
debug_assert!(route_idx < shards.len());
// SAFETY: `fg` is a valid index into `shards`, as shards size is never changed
// and mask is always a power of two.
unsafe { shards.get_unchecked(route_idx) }
}
/// Useful for parallel iteration
pub fn as_mut_slice(&mut self) -> &mut [FMapBase<V>] {
&mut self.shards
}
pub fn contains_key(&self, fg: &Fingerprint) -> bool {
self.shard(*fg).read().contains_key(fg)
}
}
#[cfg(test)]
mod tests {
#[test]
fn test_default_shard_size() {
let size = super::default_shard_size().get();
eprintln!("size = {size}");
assert!(size > 0);
assert_eq!(size & (size - 1), 0);
}
}

View file

@ -0,0 +1,5 @@
pub mod fmap;
pub use fmap::FingerprintMap;
// todo: remove it if we could find a better alternative
pub use dashmap::DashMap as CHashMap;

View file

@ -0,0 +1,30 @@
#[derive(Debug)]
pub enum CowMut<'a, T> {
Owned(T),
Borrowed(&'a mut T),
}
impl<T> std::ops::Deref for CowMut<'_, T> {
type Target = T;
fn deref(&self) -> &T {
match self {
CowMut::Owned(it) => it,
CowMut::Borrowed(it) => it,
}
}
}
impl<T> std::ops::DerefMut for CowMut<'_, T> {
fn deref_mut(&mut self) -> &mut T {
match self {
CowMut::Owned(it) => it,
CowMut::Borrowed(it) => it,
}
}
}
impl<T: Default> Default for CowMut<'_, T> {
fn default() -> Self {
CowMut::Owned(T::default())
}
}

View file

@ -0,0 +1,61 @@
//todo: move to core/src/hash.rs
use std::{
hash::{Hash, Hasher},
ops::Deref,
};
use crate::hash::item_hash128;
pub trait StaticHash128 {
fn get_hash(&self) -> u128;
}
impl Hash for dyn StaticHash128 {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
state.write_u128(self.get_hash());
}
}
pub struct HashedTrait<T: ?Sized> {
hash: u128,
t: Box<T>,
}
impl<T: ?Sized> HashedTrait<T> {
pub fn new(hash: u128, t: Box<T>) -> Self {
Self { hash, t }
}
}
impl<T: ?Sized> Deref for HashedTrait<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
&self.t
}
}
impl<T> Hash for HashedTrait<T> {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
state.write_u128(self.hash);
}
}
impl<T: Hash + Default + 'static> Default for HashedTrait<T> {
fn default() -> Self {
let t = T::default();
Self {
hash: item_hash128(&t),
t: Box::new(t),
}
}
}
impl<T: ?Sized> StaticHash128 for HashedTrait<T> {
fn get_hash(&self) -> u128 {
self.hash
}
}

View file

@ -0,0 +1,32 @@
use std::borrow::{Borrow, Cow};
use serde::{Deserializer, Serializer};
use serde_with::{
base64::{Base64, Standard},
formats::Padded,
};
use serde_with::{DeserializeAs, SerializeAs};
pub struct AsCowBytes;
type StdBase64 = Base64<Standard, Padded>;
impl<'b> SerializeAs<Cow<'b, [u8]>> for AsCowBytes {
fn serialize_as<S>(source: &Cow<'b, [u8]>, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let t: &[u8] = source.borrow();
StdBase64::serialize_as(&t, serializer)
}
}
impl<'b, 'de> DeserializeAs<'de, Cow<'b, [u8]>> for AsCowBytes {
fn deserialize_as<D>(deserializer: D) -> Result<Cow<'b, [u8]>, D::Error>
where
D: Deserializer<'de>,
{
let buf: Vec<u8> = StdBase64::deserialize_as(deserializer)?;
Ok(Cow::Owned(buf))
}
}

View file

@ -0,0 +1,22 @@
mod takable;
use std::{path::Path, sync::Arc};
pub use takable::*;
mod hash;
pub use hash::*;
pub mod cow_mut;
mod query;
pub use query::*;
mod read;
pub use read::*;
mod marker;
pub use marker::*;
pub type ImmutStr = Arc<str>;
pub type ImmutBytes = Arc<[u8]>;
pub type ImmutPath = Arc<Path>;

View file

@ -0,0 +1,83 @@
use core::fmt;
use std::sync::OnceLock;
use parking_lot::Mutex;
/// Represent the result of an immutable query reference.
/// The compute function should be pure enough.
///
/// [`compute`]: Self::compute
/// [`compute_ref`]: Self::compute_ref
pub struct QueryRef<Res, Err, QueryContext = ()> {
ctx: Mutex<Option<QueryContext>>,
/// `None` means no value has been computed yet.
cell: OnceLock<Result<Res, Err>>,
}
impl<T, E, QC> QueryRef<T, E, QC> {
pub fn with_value(value: T) -> Self {
let cell = OnceLock::new();
cell.get_or_init(|| Ok(value));
Self {
ctx: Mutex::new(None),
cell,
}
}
pub fn with_context(ctx: QC) -> Self {
Self {
ctx: Mutex::new(Some(ctx)),
cell: OnceLock::new(),
}
}
}
impl<T, E: Clone, QC> QueryRef<T, E, QC> {
/// Compute and return a checked reference guard.
#[inline]
pub fn compute<F: FnOnce() -> Result<T, E>>(&self, f: F) -> Result<&T, E> {
self.compute_with_context(|_| f())
}
/// Compute with context and return a checked reference guard.
#[inline]
pub fn compute_with_context<F: FnOnce(QC) -> Result<T, E>>(&self, f: F) -> Result<&T, E> {
let result = self.cell.get_or_init(|| f(self.ctx.lock().take().unwrap()));
result.as_ref().map_err(Clone::clone)
}
/// Gets the reference to the (maybe uninitialized) result.
///
/// Returns `None` if the cell is empty, or being initialized. This
/// method never blocks.
///
/// It is possible not hot, so that it is non-inlined
pub fn get_uninitialized(&self) -> Option<&Result<T, E>> {
self.cell.get()
}
}
impl<T, E> Default for QueryRef<T, E> {
fn default() -> Self {
QueryRef {
ctx: Mutex::new(Some(())),
cell: OnceLock::new(),
}
}
}
impl<T, E, QC> fmt::Debug for QueryRef<T, E, QC>
where
T: fmt::Debug,
E: fmt::Debug,
QC: fmt::Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let ctx = self.ctx.lock();
let res = self.cell.get();
f.debug_struct("QueryRef")
.field("context", &ctx)
.field("result", &res)
.finish()
}
}

View file

@ -0,0 +1,3 @@
pub trait ReadAllOnce {
fn read_all(self, buf: &mut Vec<u8>) -> std::io::Result<usize>;
}

View file

@ -0,0 +1,17 @@
use std::sync::Arc;
/// Trait for values being taken.
pub trait TakeAs<T> {
/// Takes the inner value if there is exactly one strong reference and
/// clones it otherwise.
fn take(self) -> T;
}
impl<T: Clone> TakeAs<T> for Arc<T> {
fn take(self) -> T {
match Arc::try_unwrap(self) {
Ok(v) => v,
Err(rc) => (*rc).clone(),
}
}
}

View file

@ -0,0 +1,216 @@
use core::fmt;
use serde::{Deserialize, Serialize};
/// A serializable physical position in a document.
///
/// Note that it uses [`f32`] instead of [`f64`] as same as
/// `TypstPosition` for the coordinates to improve both performance
/// of serialization and calculation. It does sacrifice the floating
/// precision, but it is enough in our use cases.
///
/// Also see `TypstPosition`.
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct DocumentPosition {
/// The page, starting at 1.
pub page_no: usize,
/// The exact x-coordinate on the page (from the left, as usual).
pub x: f32,
/// The exact y-coordinate on the page (from the top, as usual).
pub y: f32,
}
// impl From<TypstPosition> for DocumentPosition {
// fn from(position: TypstPosition) -> Self {
// Self {
// page_no: position.page.into(),
// x: position.point.x.to_pt() as f32,
// y: position.point.y.to_pt() as f32,
// }
// }
// }
/// Raw representation of a source span.
pub type RawSourceSpan = u64;
/// A resolved source (text) location.
///
/// See [`CharPosition`] for the definition of the position inside a file.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileLocation {
pub filepath: String,
}
/// A char position represented in form of line and column.
/// The position is encoded in Utf-8 or Utf-16, and the encoding is
/// determined by usage.
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq, Hash)]
pub struct CharPosition {
/// The line number, starting at 0.
pub line: usize,
/// The column number, starting at 0.
pub column: usize,
}
impl fmt::Display for CharPosition {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:{}", self.line, self.column)
}
}
impl From<Option<(usize, usize)>> for CharPosition {
fn from(loc: Option<(usize, usize)>) -> Self {
let (start, end) = loc.unwrap_or_default();
CharPosition {
line: start,
column: end,
}
}
}
/// A resolved source (text) location.
///
/// See [`CharPosition`] for the definition of the position inside a file.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SourceLocation {
pub filepath: String,
pub pos: CharPosition,
}
impl SourceLocation {
pub fn from_flat(
flat: FlatSourceLocation,
i: &impl std::ops::Index<usize, Output = FileLocation>,
) -> Self {
Self {
filepath: i[flat.filepath as usize].filepath.clone(),
pos: flat.pos,
}
}
}
/// A flat resolved source (text) location.
///
/// See [`CharPosition`] for the definition of the position inside a file.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FlatSourceLocation {
pub filepath: u32,
pub pos: CharPosition,
}
// /// A resolved file range.
// ///
// /// See [`CharPosition`] for the definition of the position inside a file.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct CharRange {
pub start: CharPosition,
pub end: CharPosition,
}
impl fmt::Display for CharRange {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.start == self.end {
write!(f, "{}", self.start)
} else {
write!(f, "{}-{}", self.start, self.end)
}
}
}
// /// A resolved source (text) range.
// ///
// /// See [`CharPosition`] for the definition of the position inside a file.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SourceRange {
pub path: String,
pub range: CharRange,
}
#[cfg(feature = "typst")]
mod typst_ext {
pub use typst::layout::Position as TypstPosition;
/// Unevaluated source span.
/// The raw source span is unsafe to serialize and deserialize.
/// Because the real source location is only known during liveness of
/// the compiled document.
pub type SourceSpan = typst::syntax::Span;
/// Unevaluated source span with offset.
///
/// It adds an additional offset relative to the start of the span.
///
/// The offset is usually generated when the location is inside of some
/// text or string content.
#[derive(Debug, Clone, Copy)]
pub struct SourceSpanOffset {
pub span: SourceSpan,
pub offset: usize,
}
/// Lifts a [`SourceSpan`] to [`SourceSpanOffset`].
impl From<SourceSpan> for SourceSpanOffset {
fn from(span: SourceSpan) -> Self {
Self { span, offset: 0 }
}
}
/// Converts a [`SourceSpan`] and an in-text offset to [`SourceSpanOffset`].
impl From<(SourceSpan, u16)> for SourceSpanOffset {
fn from((span, offset): (SourceSpan, u16)) -> Self {
Self {
span,
offset: offset as usize,
}
}
}
}
#[cfg(feature = "typst")]
pub use typst_ext::*;
/// A point on the element tree.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ElementPoint {
/// The element kind.
pub kind: u32,
/// The index of the element.
pub index: u32,
/// The fingerprint of the element.
pub fingerprint: String,
}
impl From<(u32, u32, String)> for ElementPoint {
fn from((kind, index, fingerprint): (u32, u32, String)) -> Self {
Self {
kind,
index,
fingerprint,
}
}
}
/// A file system data source.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct FsDataSource {
/// The name of the data source.
pub path: String,
}
/// A in-memory data source.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct MemoryDataSource {
/// The name of the data source.
pub name: String,
}
/// Data source for a document.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
#[serde(tag = "kind")]
pub enum DataSource {
/// File system data source.
#[serde(rename = "fs")]
Fs(FsDataSource),
/// Memory data source.
#[serde(rename = "memory")]
Memory(MemoryDataSource),
}

View file

@ -0,0 +1,327 @@
use core::fmt;
use ecow::EcoString;
use serde::{Deserialize, Serialize};
use crate::debug_loc::CharRange;
#[derive(serde_repr::Serialize_repr, serde_repr::Deserialize_repr, Debug, Clone)]
#[repr(u8)]
pub enum DiagSeverity {
Error = 1,
Warning = 2,
Information = 3,
Hint = 4,
}
impl fmt::Display for DiagSeverity {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
DiagSeverity::Error => write!(f, "error"),
DiagSeverity::Warning => write!(f, "warning"),
DiagSeverity::Information => write!(f, "information"),
DiagSeverity::Hint => write!(f, "hint"),
}
}
}
/// <https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#diagnostic>
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiagMessage {
pub package: String,
pub path: String,
pub message: String,
pub severity: DiagSeverity,
pub range: Option<CharRange>,
// These field could be added to ErrorImpl::arguments
// owner: Option<ImmutStr>,
// source: ImmutStr,
}
impl DiagMessage {}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum ErrKind {
None,
Msg(String),
Diag(DiagMessage),
Inner(Error),
}
pub trait ErrKindExt {
fn to_error_kind(self) -> ErrKind;
}
impl ErrKindExt for ErrKind {
fn to_error_kind(self) -> Self {
self
}
}
impl ErrKindExt for std::io::Error {
fn to_error_kind(self) -> ErrKind {
ErrKind::Msg(self.to_string())
}
}
impl ErrKindExt for String {
fn to_error_kind(self) -> ErrKind {
ErrKind::Msg(self)
}
}
impl ErrKindExt for &str {
fn to_error_kind(self) -> ErrKind {
ErrKind::Msg(self.to_string())
}
}
impl ErrKindExt for &String {
fn to_error_kind(self) -> ErrKind {
ErrKind::Msg(self.to_string())
}
}
impl ErrKindExt for EcoString {
fn to_error_kind(self) -> ErrKind {
ErrKind::Msg(self.to_string())
}
}
impl ErrKindExt for &dyn std::fmt::Display {
fn to_error_kind(self) -> ErrKind {
ErrKind::Msg(self.to_string())
}
}
impl ErrKindExt for serde_json::Error {
fn to_error_kind(self) -> ErrKind {
ErrKind::Msg(self.to_string())
}
}
#[derive(Debug, Clone)]
pub struct ErrorImpl {
loc: &'static str,
kind: ErrKind,
arguments: Box<[(&'static str, String)]>,
}
/// This type represents all possible errors that can occur in typst.ts
#[derive(Debug, Clone)]
pub struct Error {
/// This `Box` allows us to keep the size of `Error` as small as possible. A
/// larger `Error` type was substantially slower due to all the functions
/// that pass around `Result<T, Error>`.
err: Box<ErrorImpl>,
}
impl Error {
pub fn new(loc: &'static str, kind: ErrKind, arguments: Box<[(&'static str, String)]>) -> Self {
Self {
err: Box::new(ErrorImpl {
loc,
kind,
arguments,
}),
}
}
pub fn loc(&self) -> &'static str {
self.err.loc
}
pub fn kind(&self) -> &ErrKind {
&self.err.kind
}
pub fn arguments(&self) -> &[(&'static str, String)] {
&self.err.arguments
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let err = &self.err;
match &err.kind {
ErrKind::Msg(msg) => write!(f, "{}: {} with {:?}", err.loc, msg, err.arguments),
ErrKind::Diag(diag) => {
write!(f, "{}: {} with {:?}", err.loc, diag.message, err.arguments)
}
ErrKind::Inner(e) => write!(f, "{}: {} with {:?}", err.loc, e, err.arguments),
ErrKind::None => write!(f, "{}: with {:?}", err.loc, err.arguments),
}
}
}
impl std::error::Error for Error {}
#[cfg(feature = "web")]
impl ErrKindExt for wasm_bindgen::JsValue {
fn to_error_kind(self) -> ErrKind {
ErrKind::Msg(format!("{self:?}"))
}
}
#[cfg(feature = "web")]
impl From<Error> for wasm_bindgen::JsValue {
fn from(e: Error) -> Self {
js_sys::Error::new(&e.to_string()).into()
}
}
#[cfg(feature = "web")]
impl From<&Error> for wasm_bindgen::JsValue {
fn from(e: &Error) -> Self {
js_sys::Error::new(&e.to_string()).into()
}
}
pub mod prelude {
use super::ErrKindExt;
use crate::Error;
pub type ZResult<T> = Result<T, Error>;
pub trait WithContext<T>: Sized {
fn context(self, loc: &'static str) -> ZResult<T>;
fn with_context<F>(self, loc: &'static str, f: F) -> ZResult<T>
where
F: FnOnce() -> Box<[(&'static str, String)]>;
}
impl<T, E: ErrKindExt> WithContext<T> for Result<T, E> {
fn context(self, loc: &'static str) -> ZResult<T> {
self.map_err(|e| Error::new(loc, e.to_error_kind(), Box::new([])))
}
fn with_context<F>(self, loc: &'static str, f: F) -> ZResult<T>
where
F: FnOnce() -> Box<[(&'static str, String)]>,
{
self.map_err(|e| Error::new(loc, e.to_error_kind(), f()))
}
}
pub fn map_string_err<T: ToString>(loc: &'static str) -> impl Fn(T) -> Error {
move |e| Error::new(loc, e.to_string().to_error_kind(), Box::new([]))
}
pub fn map_into_err<S: ErrKindExt, T: Into<S>>(loc: &'static str) -> impl Fn(T) -> Error {
move |e| Error::new(loc, e.into().to_error_kind(), Box::new([]))
}
pub fn map_err<T: ErrKindExt>(loc: &'static str) -> impl Fn(T) -> Error {
move |e| Error::new(loc, e.to_error_kind(), Box::new([]))
}
pub fn wrap_err(loc: &'static str) -> impl Fn(Error) -> Error {
move |e| Error::new(loc, crate::ErrKind::Inner(e), Box::new([]))
}
pub fn map_string_err_with_args<
T: ToString,
Args: IntoIterator<Item = (&'static str, String)>,
>(
loc: &'static str,
arguments: Args,
) -> impl FnOnce(T) -> Error {
move |e| {
Error::new(
loc,
e.to_string().to_error_kind(),
arguments.into_iter().collect::<Vec<_>>().into_boxed_slice(),
)
}
}
pub fn map_into_err_with_args<
S: ErrKindExt,
T: Into<S>,
Args: IntoIterator<Item = (&'static str, String)>,
>(
loc: &'static str,
arguments: Args,
) -> impl FnOnce(T) -> Error {
move |e| {
Error::new(
loc,
e.into().to_error_kind(),
arguments.into_iter().collect::<Vec<_>>().into_boxed_slice(),
)
}
}
pub fn map_err_with_args<T: ErrKindExt, Args: IntoIterator<Item = (&'static str, String)>>(
loc: &'static str,
arguments: Args,
) -> impl FnOnce(T) -> Error {
move |e| {
Error::new(
loc,
e.to_error_kind(),
arguments.into_iter().collect::<Vec<_>>().into_boxed_slice(),
)
}
}
pub fn wrap_err_with_args<Args: IntoIterator<Item = (&'static str, String)>>(
loc: &'static str,
arguments: Args,
) -> impl FnOnce(Error) -> Error {
move |e| {
Error::new(
loc,
crate::ErrKind::Inner(e),
arguments.into_iter().collect::<Vec<_>>().into_boxed_slice(),
)
}
}
pub fn _error_once(loc: &'static str, args: Box<[(&'static str, String)]>) -> Error {
Error::new(loc, crate::ErrKind::None, args)
}
#[macro_export]
macro_rules! error_once {
($loc:expr, $($arg_key:ident: $arg:expr),+ $(,)?) => {
_error_once($loc, Box::new([$((stringify!($arg_key), $arg.to_string())),+]))
};
($loc:expr $(,)?) => {
_error_once($loc, Box::new([]))
};
}
#[macro_export]
macro_rules! error_once_map {
($loc:expr, $($arg_key:ident: $arg:expr),+ $(,)?) => {
map_err_with_args($loc, [$((stringify!($arg_key), $arg.to_string())),+])
};
($loc:expr $(,)?) => {
map_err($loc)
};
}
#[macro_export]
macro_rules! error_once_map_string {
($loc:expr, $($arg_key:ident: $arg:expr),+ $(,)?) => {
map_string_err_with_args($loc, [$((stringify!($arg_key), $arg.to_string())),+])
};
($loc:expr $(,)?) => {
map_string_err($loc)
};
}
pub use error_once;
pub use error_once_map;
pub use error_once_map_string;
}
#[test]
fn test_send() {
fn is_send<T: Send>() {}
is_send::<Error>();
}

View file

@ -0,0 +1,313 @@
use core::fmt;
use std::{
any::Any,
hash::{Hash, Hasher},
};
use base64::Engine;
use fxhash::FxHasher32;
use siphasher::sip128::{Hasher128, SipHasher13};
#[cfg(feature = "rkyv")]
use rkyv::{Archive, Deserialize as rDeser, Serialize as rSer};
use crate::error::prelude::ZResult;
pub(crate) type FxBuildHasher = std::hash::BuildHasherDefault<FxHasher>;
pub use rustc_hash::{FxHashMap, FxHashSet, FxHasher};
// pub type FxIndexSet<K> = indexmap::IndexSet<K, FxHasher>;
// pub type FxIndexMap<K, V> = indexmap::IndexMap<K, V, FxHasher>;
pub type FxDashMap<K, V> = dashmap::DashMap<K, V, FxBuildHasher>;
/// See <https://github.com/rust-lang/rust/blob/master/compiler/rustc_hir/src/stable_hash_impls.rs#L22>
/// The fingerprint conflicts should be very rare and should be handled by the
/// compiler.
///
/// > That being said, given a high quality hash function, the collision
/// > probabilities in question are very small. For example, for a big crate
/// > like `rustc_middle` (with ~50000 `LocalDefId`s as of the time of writing)
/// > there is a probability of roughly 1 in 14,750,000,000 of a crate-internal
/// > collision occurring. For a big crate graph with 1000 crates in it, there
/// > is a probability of 1 in 36,890,000,000,000 of a `StableCrateId`
/// > collision.
#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub struct Fingerprint {
lo: u64,
hi: u64,
}
impl fmt::Debug for Fingerprint {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.as_svg_id("fg"))
}
}
impl serde::Serialize for Fingerprint {
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_str(&self.as_svg_id(""))
}
}
impl<'de> serde::Deserialize<'de> for Fingerprint {
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let s = <std::string::String as serde::Deserialize>::deserialize(deserializer)?;
Fingerprint::try_from_str(&s).map_err(serde::de::Error::custom)
}
}
impl Fingerprint {
/// Create a new fingerprint from the given pair of 64-bit integers.
pub fn from_pair(lo: u64, hi: u64) -> Self {
Self { lo, hi }
}
/// Create a new fingerprint from the given 128-bit integer.
pub const fn from_u128(hash: u128) -> Self {
// Self(hash as u64, (hash >> 64) as u64)
Self {
lo: hash as u64,
hi: (hash >> 64) as u64,
}
}
/// Get the fingerprint as a 128-bit integer.
pub fn to_u128(self) -> u128 {
((self.hi as u128) << 64) | self.lo as u128
}
/// Cut the fingerprint into a 32-bit integer.
/// It could be used as a hash value if the fingerprint is calculated from a
/// stable hash function.
pub fn lower32(self) -> u32 {
self.lo as u32
}
/// Creates a new `Fingerprint` from a svg id that **doesn't have prefix**.
pub fn try_from_str(s: &str) -> ZResult<Self> {
let bytes = base64::engine::general_purpose::STANDARD_NO_PAD
.decode(&s.as_bytes()[..11])
.expect("invalid base64 string");
let lo = u64::from_le_bytes(bytes.try_into().unwrap());
let mut bytes = base64::engine::general_purpose::STANDARD_NO_PAD
.decode(&s.as_bytes()[11..])
.expect("invalid base64 string");
bytes.resize(8, 0);
let hi = u64::from_le_bytes(bytes.try_into().unwrap());
Ok(Self::from_pair(lo, hi))
}
/// Create a xml id from the given prefix and the fingerprint of this
/// reference. Note that the entire html document shares namespace for
/// ids.
#[comemo::memoize]
pub fn as_svg_id(self, prefix: &'static str) -> String {
let fingerprint_lo =
base64::engine::general_purpose::STANDARD_NO_PAD.encode(self.lo.to_le_bytes());
if self.hi == 0 {
return [prefix, &fingerprint_lo].join("");
}
// possible the id in the lower 64 bits.
let fingerprint_hi = {
let id = self.hi.to_le_bytes();
// truncate zero
let rev_zero = id.iter().rev().skip_while(|&&b| b == 0).count();
let id = &id[..rev_zero];
base64::engine::general_purpose::STANDARD_NO_PAD.encode(id)
};
[prefix, &fingerprint_lo, &fingerprint_hi].join("")
}
}
/// A fingerprint hasher that extends the [`std::hash::Hasher`] trait.
pub trait FingerprintHasher: std::hash::Hasher {
/// Finish the fingerprint and return the fingerprint and the data.
/// The data is used to resolve the conflict.
fn finish_fingerprint(self) -> (Fingerprint, Vec<u8>);
}
/// A fingerprint hasher that uses the [`SipHasher13`] algorithm.
#[derive(Default)]
pub struct FingerprintSipHasher {
/// The underlying data passed to the hasher.
data: Vec<u8>,
}
pub type FingerprintSipHasherBase = SipHasher13;
impl FingerprintSipHasher {
pub fn fast_hash(&self) -> (u32, &Vec<u8>) {
let mut inner = FxHasher32::default();
self.data.hash(&mut inner);
(inner.finish() as u32, &self.data)
}
}
impl std::hash::Hasher for FingerprintSipHasher {
fn write(&mut self, bytes: &[u8]) {
self.data.extend_from_slice(bytes);
}
fn finish(&self) -> u64 {
let mut inner = FingerprintSipHasherBase::default();
self.data.hash(&mut inner);
inner.finish()
}
}
impl FingerprintHasher for FingerprintSipHasher {
fn finish_fingerprint(self) -> (Fingerprint, Vec<u8>) {
let buffer = self.data.clone();
let mut inner = FingerprintSipHasherBase::default();
buffer.hash(&mut inner);
let hash = inner.finish128();
(
Fingerprint {
lo: hash.h1,
hi: hash.h2,
},
buffer,
)
}
}
/// A fingerprint builder that produces unique fingerprint for each item.
/// It resolves the conflict by checking the underlying data.
/// See [`Fingerprint`] for more information.
#[derive(Default)]
pub struct FingerprintBuilder {
/// The fast conflict checker mapping fingerprints to their underlying data.
#[cfg(feature = "bi-hash")]
fast_conflict_checker: crate::adt::CHashMap<u32, Vec<u8>>,
/// The conflict checker mapping fingerprints to their underlying data.
conflict_checker: crate::adt::CHashMap<Fingerprint, Vec<u8>>,
}
#[cfg(not(feature = "bi-hash"))]
impl FingerprintBuilder {
pub fn resolve_unchecked<T: Hash>(&self, item: &T) -> Fingerprint {
let mut s = FingerprintSipHasher { data: Vec::new() };
item.hash(&mut s);
let (fingerprint, _featured_data) = s.finish_fingerprint();
fingerprint
}
pub fn resolve<T: Hash + 'static>(&self, item: &T) -> Fingerprint {
let mut s = FingerprintSipHasher { data: Vec::new() };
item.type_id().hash(&mut s);
item.hash(&mut s);
let (fingerprint, featured_data) = s.finish_fingerprint();
let Some(prev_featured_data) = self.conflict_checker.get(&fingerprint) else {
self.conflict_checker.insert(fingerprint, featured_data);
return fingerprint;
};
if *prev_featured_data == *featured_data {
return fingerprint;
}
// todo: soft error
panic!("Fingerprint conflict detected!");
}
}
#[cfg(feature = "bi-hash")]
impl FingerprintBuilder {
pub fn resolve_unchecked<T: Hash>(&self, item: &T) -> Fingerprint {
let mut s = FingerprintSipHasher { data: Vec::new() };
item.hash(&mut s);
let (fingerprint, featured_data) = s.fast_hash();
let Some(prev_featured_data) = self.fast_conflict_checker.get(&fingerprint) else {
self.fast_conflict_checker.insert(fingerprint, s.data);
return Fingerprint::from_pair(fingerprint as u64, 0);
};
if *prev_featured_data == *featured_data {
return Fingerprint::from_pair(fingerprint as u64, 0);
}
let (fingerprint, _featured_data) = s.finish_fingerprint();
fingerprint
}
pub fn resolve<T: Hash + 'static>(&self, item: &T) -> Fingerprint {
let mut s = FingerprintSipHasher { data: Vec::new() };
item.type_id().hash(&mut s);
item.hash(&mut s);
let (fingerprint, featured_data) = s.fast_hash();
let Some(prev_featured_data) = self.fast_conflict_checker.get(&fingerprint) else {
self.fast_conflict_checker.insert(fingerprint, s.data);
return Fingerprint::from_pair(fingerprint as u64, 0);
};
if *prev_featured_data == *featured_data {
return Fingerprint::from_pair(fingerprint as u64, 0);
}
let (fingerprint, featured_data) = s.finish_fingerprint();
let Some(prev_featured_data) = self.conflict_checker.get(&fingerprint) else {
self.conflict_checker.insert(fingerprint, featured_data);
return fingerprint;
};
if *prev_featured_data == *featured_data {
return fingerprint;
}
// todo: soft error
panic!("Fingerprint conflict detected!");
}
}
/// This function provides a hash function for items, which also includes a type
/// id as part of the hash. Note: This function is not stable across different
/// versions of typst-ts, so it is preferred to be always used in memory.
/// Currently, this function use [`SipHasher13`] as the underlying hash
/// algorithm.
pub fn item_hash128<T: Hash + 'static>(item: &T) -> u128 {
// Also hash the TypeId because the type might be converted
// through an unsized coercion.
let mut state = SipHasher13::new();
item.type_id().hash(&mut state);
item.hash(&mut state);
state.finish128().as_u128()
}
/// Calculate a 128-bit siphash of a value.
/// Currently, this function use [`SipHasher13`] as the underlying hash
/// algorithm.
#[inline]
pub fn hash128<T: std::hash::Hash>(value: &T) -> u128 {
let mut state = SipHasher13::new();
value.hash(&mut state);
state.finish128().as_u128()
}
/// A convenience function for when you need a quick 64-bit hash.
#[inline]
pub fn hash64<T: Hash + ?Sized>(v: &T) -> u64 {
let mut state = FxHasher::default();
v.hash(&mut state);
state.finish()
}
// todo: rustc hash doesn't have 32-bit hash
pub use fxhash::hash32;
#[test]
fn test_fingerprint() {
let t = Fingerprint::from_pair(0, 1);
assert_eq!(Fingerprint::try_from_str(&t.as_svg_id("")).unwrap(), t);
let t = Fingerprint::from_pair(1, 1);
assert_eq!(Fingerprint::try_from_str(&t.as_svg_id("")).unwrap(), t);
let t = Fingerprint::from_pair(1, 0);
assert_eq!(Fingerprint::try_from_str(&t.as_svg_id("")).unwrap(), t);
let t = Fingerprint::from_pair(0, 0);
assert_eq!(Fingerprint::try_from_str(&t.as_svg_id("")).unwrap(), t);
}

View file

@ -0,0 +1,27 @@
#![allow(missing_docs)]
pub mod adt;
pub mod debug_loc;
pub mod error;
pub mod hash;
pub mod path;
pub mod time;
pub(crate) mod concepts;
pub use concepts::*;
pub use error::{ErrKind, Error};
#[cfg(feature = "typst")]
pub use typst_shim;
#[cfg(feature = "rkyv")]
use rkyv::{Archive, Deserialize as rDeser, Serialize as rSer};
/// The local id of a svg item.
/// This id is only unique within the svg document.
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub struct DefId(pub u64);

View file

@ -0,0 +1,229 @@
use std::path::{Component, Path};
pub use path_clean::PathClean;
/// Get the path cleaned as a unix-style string.
pub fn unix_slash(root: &Path) -> String {
let mut res = String::with_capacity(root.as_os_str().len());
let mut parent_norm = false;
for comp in root.components() {
match comp {
Component::Prefix(p) => {
res.push_str(&p.as_os_str().to_string_lossy());
parent_norm = false;
}
Component::RootDir => {
res.push('/');
parent_norm = false;
}
Component::CurDir => {
parent_norm = false;
}
Component::ParentDir => {
if parent_norm {
res.push('/');
}
res.push_str("..");
parent_norm = true;
}
Component::Normal(p) => {
if parent_norm {
res.push('/');
}
res.push_str(&p.to_string_lossy());
parent_norm = true;
}
}
}
if res.is_empty() {
res.push('.');
}
res
}
/// Get the path cleaned as a platform-style string.
pub use path_clean::clean;
#[cfg(test)]
mod test {
use std::path::{Path, PathBuf};
use super::{clean as inner_path_clean, unix_slash, PathClean};
pub fn clean<P: AsRef<Path>>(path: P) -> String {
unix_slash(&inner_path_clean(path))
}
#[test]
fn test_unix_slash() {
if cfg!(target_os = "windows") {
// windows group
assert_eq!(
unix_slash(std::path::Path::new("C:\\Users\\a\\b\\c")),
"C:/Users/a/b/c"
);
assert_eq!(
unix_slash(std::path::Path::new("C:\\Users\\a\\b\\c\\")),
"C:/Users/a/b/c"
);
assert_eq!(unix_slash(std::path::Path::new("a\\b\\c")), "a/b/c");
assert_eq!(unix_slash(std::path::Path::new("C:\\")), "C:/");
assert_eq!(unix_slash(std::path::Path::new("C:\\\\")), "C:/");
assert_eq!(unix_slash(std::path::Path::new("C:")), "C:");
assert_eq!(unix_slash(std::path::Path::new("C:\\a")), "C:/a");
assert_eq!(unix_slash(std::path::Path::new("C:\\a\\")), "C:/a");
assert_eq!(unix_slash(std::path::Path::new("C:\\a\\b")), "C:/a/b");
assert_eq!(
unix_slash(std::path::Path::new("C:\\Users\\a\\..\\b\\c")),
"C:/Users/a/../b/c"
);
assert_eq!(
unix_slash(std::path::Path::new("C:\\Users\\a\\..\\b\\c\\")),
"C:/Users/a/../b/c"
);
assert_eq!(
unix_slash(std::path::Path::new("C:\\Users\\a\\..\\..")),
"C:/Users/a/../.."
);
assert_eq!(
unix_slash(std::path::Path::new("C:\\Users\\a\\..\\..\\")),
"C:/Users/a/../.."
);
}
// unix group
assert_eq!(unix_slash(std::path::Path::new("/a/b/c")), "/a/b/c");
assert_eq!(unix_slash(std::path::Path::new("/a/b/c/")), "/a/b/c");
assert_eq!(unix_slash(std::path::Path::new("/")), "/");
assert_eq!(unix_slash(std::path::Path::new("//")), "/");
assert_eq!(unix_slash(std::path::Path::new("a")), "a");
assert_eq!(unix_slash(std::path::Path::new("a/")), "a");
assert_eq!(unix_slash(std::path::Path::new("a/b")), "a/b");
assert_eq!(unix_slash(std::path::Path::new("a/b/")), "a/b");
assert_eq!(unix_slash(std::path::Path::new("a/..")), "a/..");
assert_eq!(unix_slash(std::path::Path::new("a/../")), "a/..");
assert_eq!(unix_slash(std::path::Path::new("a/../..")), "a/../..");
assert_eq!(unix_slash(std::path::Path::new("a/../../")), "a/../..");
assert_eq!(unix_slash(std::path::Path::new("a/./b")), "a/b");
assert_eq!(unix_slash(std::path::Path::new("a/./b/")), "a/b");
assert_eq!(unix_slash(std::path::Path::new(".")), ".");
assert_eq!(unix_slash(std::path::Path::new("./")), ".");
assert_eq!(unix_slash(std::path::Path::new("./a")), "a");
assert_eq!(unix_slash(std::path::Path::new("./a/")), "a");
assert_eq!(unix_slash(std::path::Path::new("./a/b")), "a/b");
assert_eq!(unix_slash(std::path::Path::new("./a/b/")), "a/b");
assert_eq!(unix_slash(std::path::Path::new("./a/./b/")), "a/b");
}
#[test]
fn test_path_clean_empty_path_is_current_dir() {
assert_eq!(clean(""), ".");
}
#[test]
fn test_path_clean_clean_paths_dont_change() {
let tests = vec![(".", "."), ("..", ".."), ("/", "/")];
for test in tests {
assert_eq!(clean(test.0), test.1);
}
}
#[test]
fn test_path_clean_replace_multiple_slashes() {
let tests = vec![
("/", "/"),
("//", "/"),
("///", "/"),
(".//", "."),
("//..", "/"),
("..//", ".."),
("/..//", "/"),
("/.//./", "/"),
("././/./", "."),
("path//to///thing", "path/to/thing"),
("/path//to///thing", "/path/to/thing"),
];
for test in tests {
assert_eq!(clean(test.0), test.1);
}
}
#[test]
fn test_path_clean_eliminate_current_dir() {
let tests = vec![
("./", "."),
("/./", "/"),
("./test", "test"),
("./test/./path", "test/path"),
("/test/./path/", "/test/path"),
("test/path/.", "test/path"),
];
for test in tests {
assert_eq!(clean(test.0), test.1);
}
}
#[test]
fn test_path_clean_eliminate_parent_dir() {
let tests = vec![
("/..", "/"),
("/../test", "/test"),
("test/..", "."),
("test/path/..", "test"),
("test/../path", "path"),
("/test/../path", "/path"),
("test/path/../../", "."),
("test/path/../../..", ".."),
("/test/path/../../..", "/"),
("/test/path/../../../..", "/"),
("test/path/../../../..", "../.."),
("test/path/../../another/path", "another/path"),
("test/path/../../another/path/..", "another"),
("../test", "../test"),
("../test/", "../test"),
("../test/path", "../test/path"),
("../test/..", ".."),
];
for test in tests {
assert_eq!(clean(test.0), test.1);
}
}
#[test]
fn test_path_clean_pathbuf_trait() {
assert_eq!(
unix_slash(&PathBuf::from("/test/../path/").clean()),
"/path"
);
}
#[test]
fn test_path_clean_path_trait() {
assert_eq!(unix_slash(&Path::new("/test/../path/").clean()), "/path");
}
#[test]
#[cfg(target_os = "windows")]
fn test_path_clean_windows_paths() {
let tests = vec![
("\\..", "/"),
("\\..\\test", "/test"),
("test\\..", "."),
("test\\path\\..\\..\\..", ".."),
("test\\path/..\\../another\\path", "another/path"), // Mixed
("test\\path\\my/path", "test/path/my/path"), // Mixed 2
("/dir\\../otherDir/test.json", "/otherDir/test.json"), // User example
("c:\\test\\..", "c:/"), // issue #12
("c:/test/..", "c:/"), // issue #12
];
for test in tests {
assert_eq!(clean(test.0), test.1);
}
}
}

View file

@ -0,0 +1,23 @@
pub use std::time::SystemTime as Time;
pub use web_time::Duration;
pub use web_time::Instant;
/// Returns the current system time (UTC+0).
#[cfg(any(feature = "system", feature = "web"))]
pub fn now() -> Time {
#[cfg(not(all(target_family = "wasm", target_os = "unknown")))]
{
Time::now()
}
#[cfg(all(target_family = "wasm", target_os = "unknown"))]
{
use web_time::web::SystemTimeExt;
web_time::SystemTime::now().to_std()
}
}
/// Returns a dummy time on environments that do not support time.
#[cfg(not(any(feature = "system", feature = "web")))]
pub fn now() -> Time {
Time::UNIX_EPOCH
}