1use std::collections::VecDeque;
39
40use intern::Symbol;
41use rustc_hash::FxHashMap;
42use serde_derive::{Deserialize, Serialize};
43use span::{EditionedFileId, ErasedFileAstId, Span, SpanAnchor, SyntaxContext, TextRange};
44
45use crate::{
46 legacy_protocol::SpanId,
47 version::{ENCODE_CLOSE_SPAN_VERSION, EXTENDED_LEAF_DATA},
48};
49
50pub type SpanDataIndexMap =
51 indexmap::IndexSet<Span, std::hash::BuildHasherDefault<rustc_hash::FxHasher>>;
52
53pub fn serialize_span_data_index_map(map: &SpanDataIndexMap) -> Vec<u32> {
54 map.iter()
55 .flat_map(|span| {
56 [
57 span.anchor.file_id.as_u32(),
58 span.anchor.ast_id.into_raw(),
59 span.range.start().into(),
60 span.range.end().into(),
61 span.ctx.into_u32(),
62 ]
63 })
64 .collect()
65}
66
67pub fn deserialize_span_data_index_map(map: &[u32]) -> SpanDataIndexMap {
68 debug_assert!(map.len().is_multiple_of(5));
69 map.chunks_exact(5)
70 .map(|span| {
71 let &[file_id, ast_id, start, end, e] = span else { unreachable!() };
72 Span {
73 anchor: SpanAnchor {
74 file_id: EditionedFileId::from_raw(file_id),
75 ast_id: ErasedFileAstId::from_raw(ast_id),
76 },
77 range: TextRange::new(start.into(), end.into()),
78 ctx: unsafe { SyntaxContext::from_u32(e) },
81 }
82 })
83 .collect()
84}
85
86#[derive(Serialize, Deserialize, Debug)]
87pub struct FlatTree {
88 subtree: Vec<u32>,
89 literal: Vec<u32>,
90 punct: Vec<u32>,
91 ident: Vec<u32>,
92 token_tree: Vec<u32>,
93 text: Vec<String>,
94}
95
96struct SubtreeRepr {
97 open: SpanId,
98 close: SpanId,
99 kind: tt::DelimiterKind,
100 tt: [u32; 2],
101}
102
103struct LiteralRepr {
104 id: SpanId,
105 text: u32,
106 suffix: u32,
107 kind: u16,
108}
109
110struct PunctRepr {
111 id: SpanId,
112 char: char,
113 spacing: tt::Spacing,
114}
115
116struct IdentRepr {
117 id: SpanId,
118 text: u32,
119 is_raw: bool,
120}
121
122impl FlatTree {
123 pub fn new(
124 subtree: tt::SubtreeView<'_, Span>,
125 version: u32,
126 span_data_table: &mut SpanDataIndexMap,
127 ) -> FlatTree {
128 let mut w = Writer::<Span> {
129 string_table: FxHashMap::default(),
130 work: VecDeque::new(),
131 span_data_table,
132
133 subtree: Vec::new(),
134 literal: Vec::new(),
135 punct: Vec::new(),
136 ident: Vec::new(),
137 token_tree: Vec::new(),
138 text: Vec::new(),
139 version,
140 };
141 w.write(subtree);
142
143 FlatTree {
144 subtree: if version >= ENCODE_CLOSE_SPAN_VERSION {
145 write_vec(w.subtree, SubtreeRepr::write_with_close_span)
146 } else {
147 write_vec(w.subtree, SubtreeRepr::write)
148 },
149 literal: if version >= EXTENDED_LEAF_DATA {
150 write_vec(w.literal, LiteralRepr::write_with_kind)
151 } else {
152 write_vec(w.literal, LiteralRepr::write)
153 },
154 punct: write_vec(w.punct, PunctRepr::write),
155 ident: if version >= EXTENDED_LEAF_DATA {
156 write_vec(w.ident, IdentRepr::write_with_rawness)
157 } else {
158 write_vec(w.ident, IdentRepr::write)
159 },
160 token_tree: w.token_tree,
161 text: w.text,
162 }
163 }
164
165 pub fn new_raw<T: SpanTransformer<Table = ()>>(
166 subtree: tt::SubtreeView<'_, T::Span>,
167 version: u32,
168 ) -> FlatTree {
169 let mut w = Writer::<T> {
170 string_table: FxHashMap::default(),
171 work: VecDeque::new(),
172 span_data_table: &mut (),
173
174 subtree: Vec::new(),
175 literal: Vec::new(),
176 punct: Vec::new(),
177 ident: Vec::new(),
178 token_tree: Vec::new(),
179 text: Vec::new(),
180 version,
181 };
182 w.write(subtree);
183
184 FlatTree {
185 subtree: if version >= ENCODE_CLOSE_SPAN_VERSION {
186 write_vec(w.subtree, SubtreeRepr::write_with_close_span)
187 } else {
188 write_vec(w.subtree, SubtreeRepr::write)
189 },
190 literal: if version >= EXTENDED_LEAF_DATA {
191 write_vec(w.literal, LiteralRepr::write_with_kind)
192 } else {
193 write_vec(w.literal, LiteralRepr::write)
194 },
195 punct: write_vec(w.punct, PunctRepr::write),
196 ident: if version >= EXTENDED_LEAF_DATA {
197 write_vec(w.ident, IdentRepr::write_with_rawness)
198 } else {
199 write_vec(w.ident, IdentRepr::write)
200 },
201 token_tree: w.token_tree,
202 text: w.text,
203 }
204 }
205
206 pub fn to_subtree_resolved(
207 self,
208 version: u32,
209 span_data_table: &SpanDataIndexMap,
210 ) -> tt::TopSubtree<Span> {
211 Reader::<Span> {
212 subtree: if version >= ENCODE_CLOSE_SPAN_VERSION {
213 read_vec(self.subtree, SubtreeRepr::read_with_close_span)
214 } else {
215 read_vec(self.subtree, SubtreeRepr::read)
216 },
217 literal: if version >= EXTENDED_LEAF_DATA {
218 read_vec(self.literal, LiteralRepr::read_with_kind)
219 } else {
220 read_vec(self.literal, LiteralRepr::read)
221 },
222 punct: read_vec(self.punct, PunctRepr::read),
223 ident: if version >= EXTENDED_LEAF_DATA {
224 read_vec(self.ident, IdentRepr::read_with_rawness)
225 } else {
226 read_vec(self.ident, IdentRepr::read)
227 },
228 token_tree: self.token_tree,
229 text: self.text,
230 span_data_table,
231 version,
232 }
233 .read()
234 }
235
236 pub fn to_subtree_unresolved<T: SpanTransformer<Table = ()>>(
237 self,
238 version: u32,
239 ) -> tt::TopSubtree<T::Span> {
240 Reader::<T> {
241 subtree: if version >= ENCODE_CLOSE_SPAN_VERSION {
242 read_vec(self.subtree, SubtreeRepr::read_with_close_span)
243 } else {
244 read_vec(self.subtree, SubtreeRepr::read)
245 },
246 literal: if version >= EXTENDED_LEAF_DATA {
247 read_vec(self.literal, LiteralRepr::read_with_kind)
248 } else {
249 read_vec(self.literal, LiteralRepr::read)
250 },
251 punct: read_vec(self.punct, PunctRepr::read),
252 ident: if version >= EXTENDED_LEAF_DATA {
253 read_vec(self.ident, IdentRepr::read_with_rawness)
254 } else {
255 read_vec(self.ident, IdentRepr::read)
256 },
257 token_tree: self.token_tree,
258 text: self.text,
259 span_data_table: &(),
260 version,
261 }
262 .read()
263 }
264}
265
266fn read_vec<T, F: Fn([u32; N]) -> T, const N: usize>(xs: Vec<u32>, f: F) -> Vec<T> {
267 let mut chunks = xs.chunks_exact(N);
268 let res = chunks.by_ref().map(|chunk| f(chunk.try_into().unwrap())).collect();
269 assert!(chunks.remainder().is_empty());
270 res
271}
272
273fn write_vec<T, F: Fn(T) -> [u32; N], const N: usize>(xs: Vec<T>, f: F) -> Vec<u32> {
274 xs.into_iter().flat_map(f).collect()
275}
276
277impl SubtreeRepr {
278 fn write(self) -> [u32; 4] {
279 let kind = match self.kind {
280 tt::DelimiterKind::Invisible => 0,
281 tt::DelimiterKind::Parenthesis => 1,
282 tt::DelimiterKind::Brace => 2,
283 tt::DelimiterKind::Bracket => 3,
284 };
285 [self.open.0, kind, self.tt[0], self.tt[1]]
286 }
287 fn read([open, kind, lo, len]: [u32; 4]) -> SubtreeRepr {
288 let kind = match kind {
289 0 => tt::DelimiterKind::Invisible,
290 1 => tt::DelimiterKind::Parenthesis,
291 2 => tt::DelimiterKind::Brace,
292 3 => tt::DelimiterKind::Bracket,
293 other => panic!("bad kind {other}"),
294 };
295 SubtreeRepr { open: SpanId(open), close: SpanId(!0), kind, tt: [lo, len] }
296 }
297 fn write_with_close_span(self) -> [u32; 5] {
298 let kind = match self.kind {
299 tt::DelimiterKind::Invisible => 0,
300 tt::DelimiterKind::Parenthesis => 1,
301 tt::DelimiterKind::Brace => 2,
302 tt::DelimiterKind::Bracket => 3,
303 };
304 [self.open.0, self.close.0, kind, self.tt[0], self.tt[1]]
305 }
306 fn read_with_close_span([open, close, kind, lo, len]: [u32; 5]) -> SubtreeRepr {
307 let kind = match kind {
308 0 => tt::DelimiterKind::Invisible,
309 1 => tt::DelimiterKind::Parenthesis,
310 2 => tt::DelimiterKind::Brace,
311 3 => tt::DelimiterKind::Bracket,
312 other => panic!("bad kind {other}"),
313 };
314 SubtreeRepr { open: SpanId(open), close: SpanId(close), kind, tt: [lo, len] }
315 }
316}
317
318impl LiteralRepr {
319 fn write(self) -> [u32; 2] {
320 [self.id.0, self.text]
321 }
322 fn read([id, text]: [u32; 2]) -> LiteralRepr {
323 LiteralRepr { id: SpanId(id), text, kind: 0, suffix: !0 }
324 }
325 fn write_with_kind(self) -> [u32; 4] {
326 [self.id.0, self.text, self.kind as u32, self.suffix]
327 }
328 fn read_with_kind([id, text, kind, suffix]: [u32; 4]) -> LiteralRepr {
329 LiteralRepr { id: SpanId(id), text, kind: kind as u16, suffix }
330 }
331}
332
333impl PunctRepr {
334 fn write(self) -> [u32; 3] {
335 let spacing = match self.spacing {
336 tt::Spacing::Alone | tt::Spacing::JointHidden => 0,
337 tt::Spacing::Joint => 1,
338 };
339 [self.id.0, self.char as u32, spacing]
340 }
341 fn read([id, char, spacing]: [u32; 3]) -> PunctRepr {
342 let spacing = match spacing {
343 0 => tt::Spacing::Alone,
344 1 => tt::Spacing::Joint,
345 other => panic!("bad spacing {other}"),
346 };
347 PunctRepr { id: SpanId(id), char: char.try_into().unwrap(), spacing }
348 }
349}
350
351impl IdentRepr {
352 fn write(self) -> [u32; 2] {
353 [self.id.0, self.text]
354 }
355 fn read(data: [u32; 2]) -> IdentRepr {
356 IdentRepr { id: SpanId(data[0]), text: data[1], is_raw: false }
357 }
358 fn write_with_rawness(self) -> [u32; 3] {
359 [self.id.0, self.text, self.is_raw as u32]
360 }
361 fn read_with_rawness([id, text, is_raw]: [u32; 3]) -> IdentRepr {
362 IdentRepr { id: SpanId(id), text, is_raw: is_raw == 1 }
363 }
364}
365
366pub trait SpanTransformer {
367 type Table;
368 type Span: Copy;
369 fn token_id_of(table: &mut Self::Table, s: Self::Span) -> SpanId;
370 fn span_for_token_id(table: &Self::Table, id: SpanId) -> Self::Span;
371}
372impl SpanTransformer for SpanId {
373 type Table = ();
374 type Span = Self;
375 fn token_id_of((): &mut Self::Table, token_id: Self::Span) -> SpanId {
376 token_id
377 }
378
379 fn span_for_token_id((): &Self::Table, id: SpanId) -> Self::Span {
380 id
381 }
382}
383impl SpanTransformer for Span {
384 type Table = SpanDataIndexMap;
385 type Span = Self;
386 fn token_id_of(table: &mut Self::Table, span: Self::Span) -> SpanId {
387 SpanId(table.insert_full(span).0 as u32)
388 }
389 fn span_for_token_id(table: &Self::Table, id: SpanId) -> Self::Span {
390 *table.get_index(id.0 as usize).unwrap_or_else(|| &table[0])
391 }
392}
393
394struct Writer<'a, 'span, S: SpanTransformer> {
395 work: VecDeque<(usize, tt::iter::TtIter<'a, S::Span>)>,
396 string_table: FxHashMap<std::borrow::Cow<'a, str>, u32>,
397 span_data_table: &'span mut S::Table,
398 version: u32,
399
400 subtree: Vec<SubtreeRepr>,
401 literal: Vec<LiteralRepr>,
402 punct: Vec<PunctRepr>,
403 ident: Vec<IdentRepr>,
404 token_tree: Vec<u32>,
405 text: Vec<String>,
406}
407
408impl<'a, T: SpanTransformer> Writer<'a, '_, T> {
409 fn write(&mut self, root: tt::SubtreeView<'a, T::Span>) {
410 let subtree = root.top_subtree();
411 self.enqueue(subtree, root.iter());
412 while let Some((idx, subtree)) = self.work.pop_front() {
413 self.subtree(idx, subtree);
414 }
415 }
416
417 fn token_id_of(&mut self, span: T::Span) -> SpanId {
418 T::token_id_of(self.span_data_table, span)
419 }
420
421 fn subtree(&mut self, idx: usize, subtree: tt::iter::TtIter<'a, T::Span>) {
422 let mut first_tt = self.token_tree.len();
423 let n_tt = subtree.clone().count(); self.token_tree.resize(first_tt + n_tt, !0);
425
426 self.subtree[idx].tt = [first_tt as u32, (first_tt + n_tt) as u32];
427
428 for child in subtree {
429 let idx_tag = match child {
430 tt::iter::TtElement::Subtree(subtree, subtree_iter) => {
431 let idx = self.enqueue(subtree, subtree_iter);
432 idx << 2
433 }
434 tt::iter::TtElement::Leaf(leaf) => match leaf {
435 tt::Leaf::Literal(lit) => {
436 let idx = self.literal.len() as u32;
437 let id = self.token_id_of(lit.span);
438 let (text, suffix) = if self.version >= EXTENDED_LEAF_DATA {
439 (
440 self.intern(lit.symbol.as_str()),
441 lit.suffix.as_ref().map(|s| self.intern(s.as_str())).unwrap_or(!0),
442 )
443 } else {
444 (self.intern_owned(format!("{lit}")), !0)
445 };
446 self.literal.push(LiteralRepr {
447 id,
448 text,
449 kind: u16::from_le_bytes(match lit.kind {
450 tt::LitKind::Err(_) => [0, 0],
451 tt::LitKind::Byte => [1, 0],
452 tt::LitKind::Char => [2, 0],
453 tt::LitKind::Integer => [3, 0],
454 tt::LitKind::Float => [4, 0],
455 tt::LitKind::Str => [5, 0],
456 tt::LitKind::StrRaw(r) => [6, r],
457 tt::LitKind::ByteStr => [7, 0],
458 tt::LitKind::ByteStrRaw(r) => [8, r],
459 tt::LitKind::CStr => [9, 0],
460 tt::LitKind::CStrRaw(r) => [10, r],
461 }),
462 suffix,
463 });
464 (idx << 2) | 0b01
465 }
466 tt::Leaf::Punct(punct) => {
467 let idx = self.punct.len() as u32;
468 let id = self.token_id_of(punct.span);
469 self.punct.push(PunctRepr { char: punct.char, spacing: punct.spacing, id });
470 (idx << 2) | 0b10
471 }
472 tt::Leaf::Ident(ident) => {
473 let idx = self.ident.len() as u32;
474 let id = self.token_id_of(ident.span);
475 let text = if self.version >= EXTENDED_LEAF_DATA {
476 self.intern(ident.sym.as_str())
477 } else if ident.is_raw.yes() {
478 self.intern_owned(format!("r#{}", ident.sym.as_str(),))
479 } else {
480 self.intern(ident.sym.as_str())
481 };
482 self.ident.push(IdentRepr { id, text, is_raw: ident.is_raw.yes() });
483 (idx << 2) | 0b11
484 }
485 },
486 };
487 self.token_tree[first_tt] = idx_tag;
488 first_tt += 1;
489 }
490 }
491
492 fn enqueue(
493 &mut self,
494 subtree: &'a tt::Subtree<T::Span>,
495 contents: tt::iter::TtIter<'a, T::Span>,
496 ) -> u32 {
497 let idx = self.subtree.len();
498 let open = self.token_id_of(subtree.delimiter.open);
499 let close = self.token_id_of(subtree.delimiter.close);
500 let delimiter_kind = subtree.delimiter.kind;
501 self.subtree.push(SubtreeRepr { open, close, kind: delimiter_kind, tt: [!0, !0] });
502 self.work.push_back((idx, contents));
503 idx as u32
504 }
505
506 pub(crate) fn intern(&mut self, text: &'a str) -> u32 {
507 let table = &mut self.text;
508 *self.string_table.entry(text.into()).or_insert_with(|| {
509 let idx = table.len();
510 table.push(text.to_owned());
511 idx as u32
512 })
513 }
514
515 pub(crate) fn intern_owned(&mut self, text: String) -> u32 {
516 let table = &mut self.text;
517 *self.string_table.entry(text.clone().into()).or_insert_with(|| {
518 let idx = table.len();
519 table.push(text);
520 idx as u32
521 })
522 }
523}
524
525struct Reader<'span, S: SpanTransformer> {
526 version: u32,
527 subtree: Vec<SubtreeRepr>,
528 literal: Vec<LiteralRepr>,
529 punct: Vec<PunctRepr>,
530 ident: Vec<IdentRepr>,
531 token_tree: Vec<u32>,
532 text: Vec<String>,
533 span_data_table: &'span S::Table,
534}
535
536impl<T: SpanTransformer> Reader<'_, T> {
537 pub(crate) fn read(self) -> tt::TopSubtree<T::Span> {
538 let mut res: Vec<Option<(tt::Delimiter<T::Span>, Vec<tt::TokenTree<T::Span>>)>> =
539 vec![None; self.subtree.len()];
540 let read_span = |id| T::span_for_token_id(self.span_data_table, id);
541 for i in (0..self.subtree.len()).rev() {
542 let repr = &self.subtree[i];
543 let token_trees = &self.token_tree[repr.tt[0] as usize..repr.tt[1] as usize];
544 let delimiter = tt::Delimiter {
545 open: read_span(repr.open),
546 close: read_span(repr.close),
547 kind: repr.kind,
548 };
549 let mut s = Vec::new();
550 for &idx_tag in token_trees {
551 let tag = idx_tag & 0b11;
552 let idx = (idx_tag >> 2) as usize;
553 match tag {
554 0b00 => {
557 let (delimiter, subtree) = res[idx].take().unwrap();
558 s.push(tt::TokenTree::Subtree(tt::Subtree {
559 delimiter,
560 len: subtree.len() as u32,
561 }));
562 s.extend(subtree)
563 }
564 0b01 => {
565 use tt::LitKind::*;
566 let repr = &self.literal[idx];
567 let text = self.text[repr.text as usize].as_str();
568 let span = read_span(repr.id);
569 s.push(
570 tt::Leaf::Literal(if self.version >= EXTENDED_LEAF_DATA {
571 tt::Literal {
572 symbol: Symbol::intern(text),
573 span,
574 kind: match u16::to_le_bytes(repr.kind) {
575 [0, _] => Err(()),
576 [1, _] => Byte,
577 [2, _] => Char,
578 [3, _] => Integer,
579 [4, _] => Float,
580 [5, _] => Str,
581 [6, r] => StrRaw(r),
582 [7, _] => ByteStr,
583 [8, r] => ByteStrRaw(r),
584 [9, _] => CStr,
585 [10, r] => CStrRaw(r),
586 _ => unreachable!(),
587 },
588 suffix: if repr.suffix != !0 {
589 Some(Symbol::intern(
590 self.text[repr.suffix as usize].as_str(),
591 ))
592 } else {
593 None
594 },
595 }
596 } else {
597 tt::token_to_literal(text, span)
598 })
599 .into(),
600 )
601 }
602 0b10 => {
603 let repr = &self.punct[idx];
604 s.push(
605 tt::Leaf::Punct(tt::Punct {
606 char: repr.char,
607 spacing: repr.spacing,
608 span: read_span(repr.id),
609 })
610 .into(),
611 )
612 }
613 0b11 => {
614 let repr = &self.ident[idx];
615 let text = self.text[repr.text as usize].as_str();
616 let (is_raw, text) = if self.version >= EXTENDED_LEAF_DATA {
617 (
618 if repr.is_raw { tt::IdentIsRaw::Yes } else { tt::IdentIsRaw::No },
619 text,
620 )
621 } else {
622 tt::IdentIsRaw::split_from_symbol(text)
623 };
624 s.push(
625 tt::Leaf::Ident(tt::Ident {
626 sym: Symbol::intern(text),
627 span: read_span(repr.id),
628 is_raw,
629 })
630 .into(),
631 )
632 }
633 other => panic!("bad tag: {other}"),
634 }
635 }
636 res[i] = Some((delimiter, s));
637 }
638
639 let (delimiter, mut res) = res[0].take().unwrap();
640 res.insert(0, tt::TokenTree::Subtree(tt::Subtree { delimiter, len: res.len() as u32 }));
641 tt::TopSubtree(res.into_boxed_slice())
642 }
643}