Add BlockTree prototype with TagIndex grammar (#241)

2025-12-23 08:47:53 +00:00 · 2025-09-18 22:16:56 -05:00 · 2025-09-18 22:16:56 -05:00 · a080e18279
commit a080e18279
parent 3d140e5e1f
33 changed files with 2062 additions and 307 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -486,6 +486,7 @@ dependencies = [
 "djls-source",
 "djls-templates",
 "djls-workspace",
+ "insta",
 "rustc-hash",
 "salsa",
 "serde",
@ -527,6 +528,7 @@ dependencies = [
 "camino",
 "salsa",
 "serde",
+ "thiserror 2.0.16",
 ]

 [[package]]
--- a/crates/djls-ide/src/completions.rs
+++ b/crates/djls-ide/src/completions.rs
@ -390,7 +390,7 @@ fn generate_tag_name_completions(
        let specs = tag_specs.unwrap();

        // Add all end tags that match the partial
-        for (opener_name, spec) in specs.iter() {
+        for (opener_name, spec) in specs {
            if let Some(end_tag) = &spec.end_tag {
                if end_tag.name.starts_with(partial) {
                    // Create a completion for the end tag
--- a/crates/djls-ide/src/diagnostics.rs
+++ b/crates/djls-ide/src/diagnostics.rs
@ -1,6 +1,7 @@
 use djls_semantic::ValidationError;
 use djls_source::File;
 use djls_source::LineIndex;
+use djls_source::Offset;
 use djls_source::Span;
 use djls_templates::TemplateError;
 use djls_templates::TemplateErrorAccumulator;
@ -13,6 +14,30 @@ trait DiagnosticError: std::fmt::Display {
    fn message(&self) -> String {
        self.to_string()
    }
+
+    fn as_diagnostic(&self, line_index: &LineIndex) -> lsp_types::Diagnostic {
+        let range = self
+            .span()
+            .map(|(start, length)| {
+                let span = Span::new(start, length);
+                LspRange::from((&span, line_index)).into()
+            })
+            .unwrap_or_default();
+
+        lsp_types::Diagnostic {
+            range,
+            severity: Some(lsp_types::DiagnosticSeverity::ERROR),
+            code: Some(lsp_types::NumberOrString::String(
+                self.diagnostic_code().to_string(),
+            )),
+            code_description: None,
+            source: Some("Django Language Server".to_string()),
+            message: self.message(),
+            related_information: None,
+            tags: None,
+            data: None,
+        }
+    }
 }

 impl DiagnosticError for TemplateError {
@ -32,14 +57,12 @@ impl DiagnosticError for TemplateError {
 impl DiagnosticError for ValidationError {
    fn span(&self) -> Option<(u32, u32)> {
        match self {
-            ValidationError::UnbalancedStructure { opening_span, .. } => {
-                Some(opening_span.as_tuple())
-            }
+            ValidationError::UnbalancedStructure { opening_span, .. } => Some(opening_span.into()),
            ValidationError::UnclosedTag { span, .. }
            | ValidationError::OrphanedTag { span, .. }
            | ValidationError::UnmatchedBlockName { span, .. }
            | ValidationError::MissingRequiredArguments { span, .. }
-            | ValidationError::TooManyArguments { span, .. } => Some(span.as_tuple()),
+            | ValidationError::TooManyArguments { span, .. } => Some(span.into()),
        }
    }

@ -55,47 +78,43 @@ impl DiagnosticError for ValidationError {
    }
 }

-/// Convert a Span to an LSP Range using line offsets.
-fn span_to_lsp_range(span: Span, line_index: &LineIndex) -> lsp_types::Range {
-    let (start_pos, end_pos) = span.to_line_col(line_index);
+#[derive(Debug, Clone, Copy, PartialEq)]
+#[repr(transparent)]
+pub struct LspRange(pub lsp_types::Range);

-    lsp_types::Range {
-        start: lsp_types::Position {
-            line: start_pos.line(),
-            character: start_pos.column(),
-        },
-        end: lsp_types::Position {
-            line: end_pos.line(),
-            character: end_pos.column(),
-        },
+impl From<(&Span, &LineIndex)> for LspRange {
+    #[inline]
+    fn from((s, line_index): (&Span, &LineIndex)) -> Self {
+        let start = LspPosition::from((s.start_offset(), line_index)).into();
+        let end = LspPosition::from((s.end_offset(), line_index)).into();
+
+        LspRange(lsp_types::Range { start, end })
    }
 }

-/// Convert any error implementing `DiagnosticError` to an LSP diagnostic.
-fn error_to_diagnostic(
-    error: &impl DiagnosticError,
-    line_index: &LineIndex,
-) -> lsp_types::Diagnostic {
-    let range = error
-        .span()
-        .map(|(start, length)| {
-            let span = Span::new(start, length);
-            span_to_lsp_range(span, line_index)
-        })
-        .unwrap_or_default();
+impl From<LspRange> for lsp_types::Range {
+    #[inline]
+    fn from(value: LspRange) -> Self {
+        value.0
+    }
+}

-    lsp_types::Diagnostic {
-        range,
-        severity: Some(lsp_types::DiagnosticSeverity::ERROR),
-        code: Some(lsp_types::NumberOrString::String(
-            error.diagnostic_code().to_string(),
-        )),
-        code_description: None,
-        source: Some("Django Language Server".to_string()),
-        message: error.message(),
-        related_information: None,
-        tags: None,
-        data: None,
+#[derive(Debug, Clone, Copy, PartialEq)]
+#[repr(transparent)]
+pub struct LspPosition(pub lsp_types::Position);
+
+impl From<(Offset, &LineIndex)> for LspPosition {
+    #[inline]
+    fn from((offset, line_index): (Offset, &LineIndex)) -> Self {
+        let (line, character) = line_index.to_line_col(offset).into();
+        Self(lsp_types::Position { line, character })
+    }
+}
+
+impl From<LspPosition> for lsp_types::Position {
+    #[inline]
+    fn from(value: LspPosition) -> Self {
+        value.0
    }
 }

@ -133,7 +152,7 @@ pub fn collect_diagnostics(
    let line_index = file.line_index(db);

    for error_acc in template_errors {
-        diagnostics.push(error_to_diagnostic(&error_acc.0, line_index));
+        diagnostics.push(error_acc.0.as_diagnostic(line_index));
    }

    if let Some(nodelist) = nodelist {
@ -142,7 +161,7 @@ pub fn collect_diagnostics(
        >(db, nodelist);

        for error_acc in validation_errors {
-            diagnostics.push(error_to_diagnostic(&error_acc.0, line_index));
+            diagnostics.push(error_acc.0.as_diagnostic(line_index));
        }
    }

--- a/crates/djls-semantic/Cargo.toml
+++ b/crates/djls-semantic/Cargo.toml
@ -16,6 +16,7 @@ serde = { workspace = true }
 thiserror = { workspace = true }

 [dev-dependencies]
+insta = { workspace = true }
 tempfile = { workspace = true }

 [lints]
--- a/crates/djls-semantic/src/blocks.rs
+++ b/crates/djls-semantic/src/blocks.rs
@ -0,0 +1,5 @@
+mod builder;
+mod grammar;
+mod nodes;
+mod snapshot;
+mod tree;
--- a/crates/djls-semantic/src/blocks/builder.rs
+++ b/crates/djls-semantic/src/blocks/builder.rs
@ -0,0 +1,418 @@
+use djls_source::Span;
+use djls_templates::nodelist::TagBit;
+use djls_templates::nodelist::TagName;
+use djls_templates::tokens::TagDelimiter;
+use djls_templates::Node;
+
+use super::grammar::CloseValidation;
+use super::grammar::TagClass;
+use super::grammar::TagIndex;
+use super::nodes::BlockId;
+use super::nodes::BlockNode;
+use super::nodes::BranchKind;
+use super::tree::BlockTree;
+use crate::traits::SemanticModel;
+use crate::Db;
+
+#[derive(Debug, Clone)]
+enum BlockSemantics {
+    AddRoot {
+        id: BlockId,
+    },
+    AddBranchNode {
+        target: BlockId,
+        tag: String,
+        marker_span: Span,
+        body: BlockId,
+        kind: BranchKind,
+    },
+    AddErrorNode {
+        target: BlockId,
+        message: String,
+        span: Span,
+    },
+    AddLeafNode {
+        target: BlockId,
+        label: String,
+        span: Span,
+    },
+    ExtendBlockSpan {
+        id: BlockId,
+        span: Span,
+    },
+    FinalizeSpanTo {
+        id: BlockId,
+        end: u32,
+    },
+}
+
+pub struct BlockTreeBuilder<'db> {
+    db: &'db dyn Db,
+    index: &'db TagIndex,
+    stack: Vec<TreeFrame<'db>>,
+    block_allocs: Vec<(Span, Option<BlockId>)>,
+    semantic_ops: Vec<BlockSemantics>,
+}
+
+impl<'db> BlockTreeBuilder<'db> {
+    #[allow(dead_code)] // use is gated behind cfg(test) for now
+    pub fn new(db: &'db dyn Db, index: &'db TagIndex) -> Self {
+        Self {
+            db,
+            index,
+            stack: Vec::new(),
+            block_allocs: Vec::new(),
+            semantic_ops: Vec::new(),
+        }
+    }
+
+    /// Allocate a new `BlockId` and track its metadata for later creation
+    fn alloc_block_id(&mut self, span: Span, parent: Option<BlockId>) -> BlockId {
+        let id = BlockId::new(u32::try_from(self.block_allocs.len()).unwrap_or_default());
+        self.block_allocs.push((span, parent));
+        id
+    }
+
+    /// Apply all semantic operations to build a `BlockTree`
+    fn apply_operations(self) -> BlockTree {
+        let mut tree = BlockTree::new();
+
+        // Allocate all blocks using metadata
+        for (span, parent) in self.block_allocs {
+            if let Some(p) = parent {
+                tree.blocks_mut().alloc(span, Some(p));
+            } else {
+                tree.blocks_mut().alloc(span, None);
+            }
+        }
+
+        for op in self.semantic_ops {
+            match op {
+                BlockSemantics::AddRoot { id } => {
+                    tree.roots_mut().push(id);
+                }
+                BlockSemantics::AddBranchNode {
+                    target,
+                    tag,
+                    marker_span,
+                    body,
+                    kind,
+                } => {
+                    tree.blocks_mut().push_node(
+                        target,
+                        BlockNode::Branch {
+                            tag,
+                            marker_span,
+                            body,
+                            kind,
+                        },
+                    );
+                }
+                BlockSemantics::AddLeafNode {
+                    target,
+                    label,
+                    span,
+                } => {
+                    tree.blocks_mut()
+                        .push_node(target, BlockNode::Leaf { label, span });
+                }
+                BlockSemantics::AddErrorNode {
+                    target,
+                    message,
+                    span,
+                } => {
+                    tree.blocks_mut()
+                        .push_node(target, BlockNode::Error { message, span });
+                }
+                BlockSemantics::ExtendBlockSpan { id, span } => {
+                    tree.blocks_mut().extend_block(id, span);
+                }
+                BlockSemantics::FinalizeSpanTo { id, end } => {
+                    tree.blocks_mut().finalize_block_span(id, end);
+                }
+            }
+        }
+
+        tree
+    }
+
+    fn handle_tag(&mut self, name: TagName<'db>, bits: Vec<TagBit<'db>>, span: Span) {
+        let tag_name = name.text(self.db);
+        match self.index.classify(&tag_name) {
+            TagClass::Opener => {
+                let parent = get_active_segment(&self.stack);
+
+                let container = self.alloc_block_id(span, parent);
+                let segment = self.alloc_block_id(
+                    Span::new(span.end().saturating_add(TagDelimiter::LENGTH_U32), 0),
+                    Some(container),
+                );
+
+                if let Some(parent_id) = parent {
+                    // Nested block
+                    self.semantic_ops.push(BlockSemantics::AddBranchNode {
+                        target: parent_id,
+                        tag: tag_name.clone(),
+                        marker_span: span,
+                        body: container,
+                        kind: BranchKind::Opener,
+                    });
+                    self.semantic_ops.push(BlockSemantics::AddBranchNode {
+                        target: container,
+                        tag: tag_name.clone(),
+                        marker_span: span,
+                        body: segment,
+                        kind: BranchKind::Segment,
+                    });
+                } else {
+                    // Root block
+                    self.semantic_ops
+                        .push(BlockSemantics::AddRoot { id: container });
+                    self.semantic_ops.push(BlockSemantics::AddBranchNode {
+                        target: container,
+                        tag: tag_name.clone(),
+                        marker_span: span,
+                        body: segment,
+                        kind: BranchKind::Segment,
+                    });
+                }
+
+                self.stack.push(TreeFrame {
+                    opener_name: tag_name,
+                    opener_bits: bits,
+                    opener_span: span,
+                    container_body: container,
+                    segment_body: segment,
+                    parent_body: parent,
+                });
+            }
+            TagClass::Closer { opener_name } => {
+                self.close_block(&opener_name, &bits, span);
+            }
+            TagClass::Intermediate { possible_openers } => {
+                self.add_intermediate(&tag_name, &possible_openers, span);
+            }
+            TagClass::Unknown => {
+                if let Some(segment) = get_active_segment(&self.stack) {
+                    self.semantic_ops.push(BlockSemantics::AddLeafNode {
+                        target: segment,
+                        label: tag_name,
+                        span,
+                    });
+                }
+            }
+        }
+    }
+
+    fn close_block(&mut self, opener_name: &str, closer_bits: &[TagBit<'db>], span: Span) {
+        if let Some(frame_idx) = find_frame_from_opener(&self.stack, opener_name) {
+            // Pop any unclosed blocks above this one
+            while self.stack.len() > frame_idx + 1 {
+                if let Some(unclosed) = self.stack.pop() {
+                    if let Some(parent) = unclosed.parent_body {
+                        self.semantic_ops.push(BlockSemantics::AddErrorNode {
+                            target: parent,
+                            message: format!("Unclosed block '{}'", unclosed.opener_name),
+                            span: unclosed.opener_span,
+                        });
+                    }
+                    // If no parent, this was a root block that wasn't closed - we could track this separately
+                }
+            }
+
+            // validate and close
+            let frame = self.stack.pop().unwrap();
+            match self
+                .index
+                .validate_close(opener_name, &frame.opener_bits, closer_bits, self.db)
+            {
+                CloseValidation::Valid => {
+                    // Finalize the last segment body to end just before the closer marker
+                    let content_end = span.start().saturating_sub(TagDelimiter::LENGTH_U32);
+                    self.semantic_ops.push(BlockSemantics::FinalizeSpanTo {
+                        id: frame.segment_body,
+                        end: content_end,
+                    });
+                    // Extend to include closer
+                    self.semantic_ops.push(BlockSemantics::ExtendBlockSpan {
+                        id: frame.container_body,
+                        span,
+                    });
+                }
+                CloseValidation::ArgumentMismatch { arg, expected, got } => {
+                    self.semantic_ops.push(BlockSemantics::AddErrorNode {
+                        target: frame.segment_body,
+                        message: format!(
+                            "Argument '{arg}' mismatch: expected '{expected}', got '{got}'"
+                        ),
+                        span,
+                    });
+                    self.stack.push(frame); // Restore frame
+                }
+                CloseValidation::MissingRequiredArg { arg, expected } => {
+                    self.semantic_ops.push(BlockSemantics::AddErrorNode {
+                        target: frame.segment_body,
+                        message: format!(
+                            "Missing required argument '{arg}': expected '{expected}'"
+                        ),
+                        span,
+                    });
+                    self.stack.push(frame);
+                }
+                CloseValidation::UnexpectedArg { arg, got } => {
+                    self.semantic_ops.push(BlockSemantics::AddErrorNode {
+                        target: frame.segment_body,
+                        message: format!("Unexpected argument '{arg}' with value '{got}'"),
+                        span,
+                    });
+                    self.stack.push(frame);
+                }
+                CloseValidation::NotABlock => {
+                    // Should not happen as we already classified it
+                    if let Some(segment) = get_active_segment(&self.stack) {
+                        self.semantic_ops.push(BlockSemantics::AddErrorNode {
+                            target: segment,
+                            message: format!("Internal error: {opener_name} is not a block"),
+                            span,
+                        });
+                    }
+                }
+            }
+        } else if let Some(segment) = get_active_segment(&self.stack) {
+            self.semantic_ops.push(BlockSemantics::AddErrorNode {
+                target: segment,
+                message: format!("Unexpected closing tag '{opener_name}'"),
+                span,
+            });
+        }
+    }
+
+    fn add_intermediate(&mut self, tag_name: &str, possible_openers: &[String], span: Span) {
+        if let Some(frame) = self.stack.last() {
+            if possible_openers.contains(&frame.opener_name) {
+                // Finalize previous segment body to just before this marker (full start)
+                let content_end = span.start().saturating_sub(TagDelimiter::LENGTH_U32);
+                let segment_to_finalize = frame.segment_body;
+                let container = frame.container_body;
+
+                self.semantic_ops.push(BlockSemantics::FinalizeSpanTo {
+                    id: segment_to_finalize,
+                    end: content_end,
+                });
+
+                let body_start = span.end().saturating_add(TagDelimiter::LENGTH_U32);
+                let new_segment_id = self.alloc_block_id(Span::new(body_start, 0), Some(container));
+
+                // Add the branch node for the new segment
+                self.semantic_ops.push(BlockSemantics::AddBranchNode {
+                    target: container,
+                    tag: tag_name.to_string(),
+                    marker_span: span,
+                    body: new_segment_id,
+                    kind: BranchKind::Segment,
+                });
+
+                self.stack.last_mut().unwrap().segment_body = new_segment_id;
+            } else {
+                let segment = frame.segment_body;
+                let opener_name = frame.opener_name.clone();
+
+                self.semantic_ops.push(BlockSemantics::AddErrorNode {
+                    target: segment,
+                    message: format!("'{tag_name}' is not valid in '{opener_name}'"),
+                    span,
+                });
+            }
+        } else {
+            // Intermediate tag at top level - this is an error
+            // Could track this in a separate error list
+        }
+    }
+
+    fn finish(&mut self) {
+        while let Some(frame) = self.stack.pop() {
+            if self.index.is_end_optional(&frame.opener_name) {
+                // No explicit closer: finalize last segment to end of input (best-effort)
+                // We do not know the real end; leave as-is and extend container by opener span only.
+                self.semantic_ops.push(BlockSemantics::ExtendBlockSpan {
+                    id: frame.container_body,
+                    span: frame.opener_span,
+                });
+            } else if let Some(parent) = frame.parent_body {
+                self.semantic_ops.push(BlockSemantics::AddErrorNode {
+                    target: parent,
+                    message: format!("Unclosed block '{}'", frame.opener_name),
+                    span: frame.opener_span,
+                });
+            }
+        }
+    }
+}
+
+type TreeStack<'db> = Vec<TreeFrame<'db>>;
+
+/// Get the currently active segment (the innermost block we're in)
+fn get_active_segment(stack: &TreeStack) -> Option<BlockId> {
+    stack.last().map(|frame| frame.segment_body)
+}
+
+/// Find a frame in the stack by name
+fn find_frame_from_opener(stack: &TreeStack, opener_name: &str) -> Option<usize> {
+    stack.iter().rposition(|f| f.opener_name == opener_name)
+}
+
+struct TreeFrame<'db> {
+    opener_name: String,
+    opener_bits: Vec<TagBit<'db>>,
+    opener_span: Span,
+    container_body: BlockId,
+    segment_body: BlockId,
+    parent_body: Option<BlockId>, // Can be None for root blocks
+}
+
+impl<'db> SemanticModel<'db> for BlockTreeBuilder<'db> {
+    type Model = BlockTree;
+
+    fn observe(&mut self, node: Node<'db>) {
+        match node {
+            Node::Tag { name, bits, span } => {
+                self.handle_tag(name, bits, span);
+            }
+            Node::Comment { span, .. } => {
+                if let Some(parent) = get_active_segment(&self.stack) {
+                    self.semantic_ops.push(BlockSemantics::AddLeafNode {
+                        target: parent,
+                        label: "<comment>".into(),
+                        span,
+                    });
+                }
+            }
+            Node::Variable { span, .. } => {
+                if let Some(parent) = get_active_segment(&self.stack) {
+                    self.semantic_ops.push(BlockSemantics::AddLeafNode {
+                        target: parent,
+                        label: "<var>".into(),
+                        span,
+                    });
+                }
+            }
+            Node::Error {
+                full_span, error, ..
+            } => {
+                if let Some(parent) = get_active_segment(&self.stack) {
+                    self.semantic_ops.push(BlockSemantics::AddLeafNode {
+                        target: parent,
+                        label: error.to_string(),
+                        span: full_span,
+                    });
+                }
+            }
+            Node::Text { .. } => {} // Skip text nodes - we only care about Django constructs
+        }
+    }
+
+    fn construct(mut self) -> Self::Model {
+        self.finish();
+        self.apply_operations()
+    }
+}
--- a/crates/djls-semantic/src/blocks/grammar.rs
+++ b/crates/djls-semantic/src/blocks/grammar.rs
@ -0,0 +1,198 @@
+use djls_templates::nodelist::TagBit;
+use rustc_hash::FxHashMap;
+
+use crate::templatetags::TagSpecs;
+
+/// Index for tag grammar lookups
+#[derive(Clone, Debug)]
+pub struct TagIndex {
+    /// Opener tags and their end tag metadata
+    openers: FxHashMap<String, EndMeta>,
+    /// Map from closer tag name to opener tag name
+    closers: FxHashMap<String, String>,
+    /// Map from intermediate tag name to list of possible opener tags
+    intermediate_to_openers: FxHashMap<String, Vec<String>>,
+}
+
+#[derive(Clone, Debug)]
+struct EndMeta {
+    optional: bool,
+    match_args: Vec<MatchArgSpec>,
+}
+
+/// Specification for matching arguments between opener and closer
+#[derive(Clone, Debug)]
+struct MatchArgSpec {
+    name: String,
+    required: bool,
+    position: usize,
+}
+
+impl TagIndex {
+    pub fn classify(&self, tag_name: &str) -> TagClass {
+        if self.openers.contains_key(tag_name) {
+            return TagClass::Opener;
+        }
+        if let Some(opener) = self.closers.get(tag_name) {
+            return TagClass::Closer {
+                opener_name: opener.clone(),
+            };
+        }
+        if let Some(openers) = self.intermediate_to_openers.get(tag_name) {
+            return TagClass::Intermediate {
+                possible_openers: openers.clone(),
+            };
+        }
+        TagClass::Unknown
+    }
+
+    pub fn is_end_optional(&self, opener_name: &str) -> bool {
+        self.openers
+            .get(opener_name)
+            .is_some_and(|meta| meta.optional)
+    }
+
+    pub fn validate_close<'db>(
+        &self,
+        opener_name: &str,
+        opener_bits: &[TagBit<'db>],
+        closer_bits: &[TagBit<'db>],
+        db: &'db dyn crate::db::Db,
+    ) -> CloseValidation {
+        let Some(meta) = self.openers.get(opener_name) else {
+            return CloseValidation::NotABlock;
+        };
+
+        // No args to match? Simple close
+        if meta.match_args.is_empty() {
+            return CloseValidation::Valid;
+        }
+
+        for match_arg in &meta.match_args {
+            let opener_val = extract_arg_value(opener_bits, match_arg.position, db);
+            let closer_val = extract_arg_value(closer_bits, match_arg.position, db);
+
+            match (opener_val, closer_val, match_arg.required) {
+                (Some(o), Some(c), _) if o != c => {
+                    return CloseValidation::ArgumentMismatch {
+                        arg: match_arg.name.clone(),
+                        expected: o,
+                        got: c,
+                    };
+                }
+                (Some(o), None, true) => {
+                    return CloseValidation::MissingRequiredArg {
+                        arg: match_arg.name.clone(),
+                        expected: o,
+                    };
+                }
+                (None, Some(c), _) if match_arg.required => {
+                    return CloseValidation::UnexpectedArg {
+                        arg: match_arg.name.clone(),
+                        got: c,
+                    };
+                }
+                _ => {}
+            }
+        }
+
+        CloseValidation::Valid
+    }
+
+    #[allow(dead_code)] // TODO: is this still needed?
+    pub fn is_valid_intermediate(&self, inter_name: &str, opener_name: &str) -> bool {
+        self.intermediate_to_openers
+            .get(inter_name)
+            .is_some_and(|openers| openers.iter().any(|o| o == opener_name))
+    }
+}
+
+impl From<&TagSpecs> for TagIndex {
+    fn from(specs: &TagSpecs) -> Self {
+        let mut openers = FxHashMap::default();
+        let mut closers = FxHashMap::default();
+        let mut intermediate_to_openers: FxHashMap<String, Vec<String>> = FxHashMap::default();
+
+        for (name, spec) in specs {
+            if let Some(end_tag) = &spec.end_tag {
+                let match_args = end_tag
+                    .args
+                    .iter()
+                    .enumerate()
+                    .map(|(i, arg)| MatchArgSpec {
+                        name: arg.name().as_ref().to_owned(),
+                        required: arg.is_required(),
+                        position: i,
+                    })
+                    .collect();
+
+                let meta = EndMeta {
+                    optional: end_tag.optional,
+                    match_args,
+                };
+
+                // opener -> meta
+                openers.insert(name.clone(), meta);
+                // closer -> opener
+                closers.insert(end_tag.name.as_ref().to_owned(), name.clone());
+                // intermediates -> opener
+                for inter in spec.intermediate_tags.iter() {
+                    intermediate_to_openers
+                        .entry(inter.name.as_ref().to_owned())
+                        .or_default()
+                        .push(name.clone());
+                }
+            }
+        }
+
+        TagIndex {
+            openers,
+            closers,
+            intermediate_to_openers,
+        }
+    }
+}
+
+/// Classification of a tag based on its role
+#[derive(Clone, Debug)]
+pub enum TagClass {
+    /// This tag opens a block
+    Opener,
+    /// This tag closes a block
+    Closer { opener_name: String },
+    /// This tag is an intermediate (elif, else, etc.)
+    Intermediate { possible_openers: Vec<String> },
+    /// Unknown tag - treat as leaf
+    Unknown,
+}
+
+#[derive(Clone, Debug)]
+pub enum CloseValidation {
+    Valid,
+    NotABlock,
+    ArgumentMismatch {
+        arg: String,
+        expected: String,
+        got: String,
+    },
+    MissingRequiredArg {
+        arg: String,
+        expected: String,
+    },
+    UnexpectedArg {
+        arg: String,
+        got: String,
+    },
+}
+
+fn extract_arg_value<'db>(
+    bits: &[TagBit<'db>],
+    position: usize,
+    db: &'db dyn crate::db::Db,
+) -> Option<String> {
+    if position < bits.len() {
+        Some(bits[position].text(db).to_string())
+    } else {
+        None
+    }
+}
--- a/crates/djls-semantic/src/blocks/nodes.rs
+++ b/crates/djls-semantic/src/blocks/nodes.rs
@ -0,0 +1,159 @@
+use djls_source::Span;
+use serde::Serialize;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize)]
+pub struct BlockId(u32);
+
+impl BlockId {
+    pub fn new(id: u32) -> Self {
+        Self(id)
+    }
+
+    pub fn id(self) -> u32 {
+        self.0
+    }
+
+    pub fn index(self) -> usize {
+        self.0 as usize
+    }
+}
+
+#[derive(Clone, Debug, Default, Serialize)]
+pub struct Blocks(Vec<Region>);
+
+impl Blocks {
+    pub fn get(&self, id: usize) -> &Region {
+        &self.0[id]
+    }
+}
+
+impl IntoIterator for Blocks {
+    type Item = Region;
+    type IntoIter = std::vec::IntoIter<Region>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
+    }
+}
+
+impl<'a> IntoIterator for &'a Blocks {
+    type Item = &'a Region;
+    type IntoIter = std::slice::Iter<'a, Region>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.iter()
+    }
+}
+
+impl<'a> IntoIterator for &'a mut Blocks {
+    type Item = &'a mut Region;
+    type IntoIter = std::slice::IterMut<'a, Region>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.iter_mut()
+    }
+}
+
+impl Blocks {
+    pub fn alloc(&mut self, span: Span, parent: Option<BlockId>) -> BlockId {
+        let id = BlockId(u32::try_from(self.0.len()).unwrap_or_default());
+        self.0.push(Region::new(span, parent));
+        id
+    }
+
+    pub fn extend_block(&mut self, id: BlockId, span: Span) {
+        self.block_mut(id).extend_span(span);
+    }
+
+    pub fn set_block_span(&mut self, id: BlockId, span: Span) {
+        self.block_mut(id).set_span(span);
+    }
+
+    pub fn finalize_block_span(&mut self, id: BlockId, end: u32) {
+        let block = self.block_mut(id);
+        let start = block.span().start();
+        block.set_span(Span::saturating_from_bounds_usize(
+            start as usize,
+            end as usize,
+        ));
+    }
+
+    pub fn push_node(&mut self, target: BlockId, node: BlockNode) {
+        let span = node.span();
+        self.extend_block(target, span);
+        self.block_mut(target).nodes.push(node);
+    }
+
+    fn block_mut(&mut self, id: BlockId) -> &mut Region {
+        let idx = id.index();
+        &mut self.0[idx]
+    }
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub struct Region {
+    span: Span,
+    nodes: Vec<BlockNode>,
+    parent: Option<BlockId>,
+}
+
+impl Region {
+    fn new(span: Span, parent: Option<BlockId>) -> Self {
+        Self {
+            span,
+            nodes: Vec::new(),
+            parent,
+        }
+    }
+
+    pub fn span(&self) -> &Span {
+        &self.span
+    }
+
+    pub fn set_span(&mut self, span: Span) {
+        self.span = span;
+    }
+
+    pub fn nodes(&self) -> &Vec<BlockNode> {
+        &self.nodes
+    }
+
+    fn extend_span(&mut self, span: Span) {
+        let opening = self.span.start().saturating_sub(span.start());
+        let closing = span.end().saturating_sub(self.span.end());
+        self.span = self.span.expand(opening, closing);
+    }
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub enum BranchKind {
+    Opener,
+    Segment,
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub enum BlockNode {
+    Leaf {
+        label: String,
+        span: Span,
+    },
+    Branch {
+        tag: String,
+        marker_span: Span,
+        body: BlockId,
+        kind: BranchKind,
+    },
+    Error {
+        message: String,
+        span: Span,
+    },
+}
+
+impl BlockNode {
+    fn span(&self) -> Span {
+        match self {
+            BlockNode::Leaf { span, .. } | BlockNode::Error { span, .. } => *span,
+            BlockNode::Branch { marker_span, .. } => *marker_span,
+        }
+    }
+}
--- a/crates/djls-semantic/src/blocks/snapshot.rs
+++ b/crates/djls-semantic/src/blocks/snapshot.rs
@ -0,0 +1,174 @@
+use std::collections::HashSet;
+
+use djls_source::Span;
+use serde::Serialize;
+
+use super::nodes::BlockId;
+use super::nodes::BlockNode;
+use super::nodes::BranchKind;
+use super::tree::BlockTree;
+
+// TODO: centralize salsa struct snapshots so this mess can be shared
+
+#[derive(Serialize)]
+pub struct BlockTreeSnapshot {
+    roots: Vec<u32>,
+    root_ids: Vec<u32>,
+    blocks: Vec<BlockSnapshot>,
+}
+
+impl From<&BlockTree> for BlockTreeSnapshot {
+    #[allow(clippy::too_many_lines)]
+    fn from(tree: &BlockTree) -> Self {
+        let mut container_ids: HashSet<u32> = HashSet::new();
+        let mut body_ids: HashSet<u32> = HashSet::new();
+
+        for r in tree.roots() {
+            container_ids.insert(r.id());
+        }
+        for (i, b) in tree.blocks().into_iter().enumerate() {
+            let i_u = u32::try_from(i).unwrap_or(u32::MAX);
+            for n in b.nodes() {
+                match n {
+                    BlockNode::Leaf { .. } | BlockNode::Error { .. } => {}
+                    BlockNode::Branch {
+                        body,
+                        kind: BranchKind::Opener,
+                        ..
+                    } => {
+                        container_ids.insert(body.id());
+                    }
+                    BlockNode::Branch {
+                        body,
+                        kind: BranchKind::Segment,
+                        ..
+                    } => {
+                        body_ids.insert(body.id());
+                    }
+                }
+            }
+            if container_ids.contains(&i_u) {
+                body_ids.remove(&i_u);
+            }
+        }
+
+        let blocks = tree
+            .blocks()
+            .into_iter()
+            .enumerate()
+            .map(|(i, b)| {
+                let id_u = u32::try_from(i).unwrap_or(u32::MAX);
+                let nodes: Vec<BlockNodeSnapshot> = b
+                    .nodes()
+                    .iter()
+                    .map(|n| match n {
+                        BlockNode::Leaf { label, span } => BlockNodeSnapshot::Leaf {
+                            label: label.clone(),
+                            span: *span,
+                        },
+                        BlockNode::Error { message, span } => BlockNodeSnapshot::Error {
+                            message: message.clone(),
+                            span: *span,
+                        },
+                        BlockNode::Branch {
+                            tag,
+                            marker_span,
+                            body,
+                            ..
+                        } => BlockNodeSnapshot::Branch {
+                            block_id: body.id(),
+                            tag: tag.clone(),
+                            marker_span: *marker_span,
+                            content_span: *tree.blocks().get(body.index()).span(),
+                        },
+                    })
+                    .collect();
+
+                if container_ids.contains(&id_u) {
+                    BlockSnapshot::Container {
+                        container_span: *b.span(),
+                        nodes,
+                    }
+                } else {
+                    BlockSnapshot::Body {
+                        content_span: *b.span(),
+                        nodes,
+                    }
+                }
+            })
+            .collect();
+
+        // Also compute root_id for every block/region
+        let root_ids: Vec<u32> = tree
+            .blocks()
+            .into_iter()
+            .enumerate()
+            .map(|(i, _)| {
+                let mut cur = BlockId::new(u32::try_from(i).unwrap_or(u32::MAX));
+                // climb via snapshot-internal parent pointers
+                loop {
+                    // safety: we have no direct parent access in snapshot; infer by scanning containers
+                    // If any Branch points to `cur` as body, that region's parent is its container id
+                    let mut parent: Option<BlockId> = None;
+                    for (j, b) in tree.blocks().into_iter().enumerate() {
+                        for n in b.nodes() {
+                            if let BlockNode::Branch { body, .. } = n {
+                                if body.index() == cur.index() {
+                                    parent =
+                                        Some(BlockId::new(u32::try_from(j).unwrap_or(u32::MAX)));
+                                    break;
+                                }
+                            }
+                        }
+                        if parent.is_some() {
+                            break;
+                        }
+                    }
+                    if let Some(p) = parent {
+                        cur = p;
+                    } else {
+                        break cur.id();
+                    }
+                }
+            })
+            .collect();
+
+        Self {
+            roots: tree.roots().iter().map(|r| r.id()).collect(),
+            blocks,
+            root_ids,
+        }
+    }
+}
+
+#[derive(Serialize)]
+#[serde(tag = "kind")]
+pub enum BlockSnapshot {
+    Container {
+        container_span: Span,
+        nodes: Vec<BlockNodeSnapshot>,
+    },
+    Body {
+        content_span: Span,
+        nodes: Vec<BlockNodeSnapshot>,
+    },
+}
+
+#[derive(Serialize)]
+#[serde(tag = "node")]
+pub enum BlockNodeSnapshot {
+    Branch {
+        block_id: u32,
+        tag: String,
+        marker_span: Span,
+        content_span: Span,
+    },
+    Leaf {
+        label: String,
+        span: Span,
+    },
+    Error {
+        message: String,
+        span: Span,
+    },
+}
--- a/crates/djls-semantic/src/blocks/snapshots/djls_semanticblockstreetestsblocktree.snap
+++ b/crates/djls-semantic/src/blocks/snapshots/djls_semanticblockstreetestsblocktree.snap
@ -0,0 +1,158 @@
+---
+source: crates/djls-semantic/src/blocks/tree.rs
+expression: block_tree.to_snapshot()
+---
+roots:
+  - 0
+  - 2
+  - 9
+root_ids:
+  - 0
+  - 0
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 9
+  - 9
+blocks:
+  - kind: Container
+    container_span:
+      start: 3
+      length: 55
+    nodes:
+      - node: Branch
+        block_id: 1
+        tag: block
+        marker_span:
+          start: 3
+          length: 14
+        content_span:
+          start: 19
+          length: 20
+  - kind: Body
+    content_span:
+      start: 19
+      length: 20
+    nodes: []
+  - kind: Container
+    container_span:
+      start: 64
+      length: 290
+    nodes:
+      - node: Branch
+        block_id: 3
+        tag: if
+        marker_span:
+          start: 64
+          length: 26
+        content_span:
+          start: 92
+          length: 217
+      - node: Branch
+        block_id: 8
+        tag: else
+        marker_span:
+          start: 311
+          length: 6
+        content_span:
+          start: 319
+          length: 26
+  - kind: Body
+    content_span:
+      start: 92
+      length: 217
+    nodes:
+      - node: Leaf
+        label: "<var>"
+        span:
+          start: 110
+          length: 11
+      - node: Branch
+        block_id: 4
+        tag: if
+        marker_span:
+          start: 134
+          length: 22
+        content_span:
+          start: 134
+          length: 172
+  - kind: Container
+    container_span:
+      start: 134
+      length: 172
+    nodes:
+      - node: Branch
+        block_id: 5
+        tag: if
+        marker_span:
+          start: 134
+          length: 22
+        content_span:
+          start: 158
+          length: 32
+      - node: Branch
+        block_id: 6
+        tag: elif
+        marker_span:
+          start: 192
+          length: 20
+        content_span:
+          start: 214
+          length: 34
+      - node: Branch
+        block_id: 7
+        tag: else
+        marker_span:
+          start: 250
+          length: 6
+        content_span:
+          start: 258
+          length: 39
+  - kind: Body
+    content_span:
+      start: 158
+      length: 32
+    nodes: []
+  - kind: Body
+    content_span:
+      start: 214
+      length: 34
+    nodes: []
+  - kind: Body
+    content_span:
+      start: 258
+      length: 39
+    nodes: []
+  - kind: Body
+    content_span:
+      start: 319
+      length: 26
+    nodes: []
+  - kind: Container
+    container_span:
+      start: 360
+      length: 56
+    nodes:
+      - node: Branch
+        block_id: 10
+        tag: for
+        marker_span:
+          start: 360
+          length: 19
+        content_span:
+          start: 381
+          length: 25
+  - kind: Body
+    content_span:
+      start: 381
+      length: 25
+    nodes:
+      - node: Leaf
+        label: "<var>"
+        span:
+          start: 392
+          length: 6
--- a/crates/djls-semantic/src/blocks/snapshots/djls_semanticblockstreetestsnodelist.snap
+++ b/crates/djls-semantic/src/blocks/snapshots/djls_semanticblockstreetestsnodelist.snap
@ -0,0 +1,147 @@
+---
+source: crates/djls-semantic/src/blocks/tree.rs
+expression: nodelist_view
+---
+nodes:
+  - kind: Text
+    span:
+      start: 0
+      length: 1
+  - kind: Tag
+    name: block
+    bits:
+      - header
+    span:
+      start: 3
+      length: 14
+  - kind: Text
+    span:
+      start: 19
+      length: 20
+  - kind: Tag
+    name: endblock
+    bits:
+      - header
+    span:
+      start: 41
+      length: 17
+  - kind: Text
+    span:
+      start: 60
+      length: 2
+  - kind: Tag
+    name: if
+    bits:
+      - user.is_authenticated
+    span:
+      start: 64
+      length: 26
+  - kind: Text
+    span:
+      start: 92
+      length: 16
+  - kind: Variable
+    var: user.name
+    filters: []
+    span:
+      start: 110
+      length: 11
+  - kind: Text
+    span:
+      start: 123
+      length: 9
+  - kind: Tag
+    name: if
+    bits:
+      - user.is_superuser
+    span:
+      start: 134
+      length: 22
+  - kind: Text
+    span:
+      start: 158
+      length: 32
+  - kind: Tag
+    name: elif
+    bits:
+      - user.is_staff
+    span:
+      start: 192
+      length: 20
+  - kind: Text
+    span:
+      start: 214
+      length: 34
+  - kind: Tag
+    name: else
+    bits: []
+    span:
+      start: 250
+      length: 6
+  - kind: Text
+    span:
+      start: 258
+      length: 39
+  - kind: Tag
+    name: endif
+    bits: []
+    span:
+      start: 299
+      length: 7
+  - kind: Text
+    span:
+      start: 308
+      length: 1
+  - kind: Tag
+    name: else
+    bits: []
+    span:
+      start: 311
+      length: 6
+  - kind: Text
+    span:
+      start: 319
+      length: 26
+  - kind: Tag
+    name: endif
+    bits: []
+    span:
+      start: 347
+      length: 7
+  - kind: Text
+    span:
+      start: 356
+      length: 2
+  - kind: Tag
+    name: for
+    bits:
+      - item
+      - in
+      - items
+    span:
+      start: 360
+      length: 19
+  - kind: Text
+    span:
+      start: 381
+      length: 9
+  - kind: Variable
+    var: item
+    filters: []
+    span:
+      start: 392
+      length: 6
+  - kind: Text
+    span:
+      start: 400
+      length: 6
+  - kind: Tag
+    name: endfor
+    bits: []
+    span:
+      start: 408
+      length: 8
+  - kind: Text
+    span:
+      start: 418
+      length: 1
--- a/crates/djls-semantic/src/blocks/tree.rs
+++ b/crates/djls-semantic/src/blocks/tree.rs
@ -0,0 +1,226 @@
+use serde::Serialize;
+
+use super::nodes::BlockId;
+use super::nodes::Blocks;
+
+#[derive(Clone, Debug, Serialize)]
+pub struct BlockTree {
+    roots: Vec<BlockId>,
+    blocks: Blocks,
+}
+
+impl BlockTree {
+    pub fn new() -> Self {
+        Self {
+            roots: Vec::new(),
+            blocks: Blocks::default(),
+        }
+    }
+
+    pub fn roots(&self) -> &Vec<BlockId> {
+        &self.roots
+    }
+
+    pub fn roots_mut(&mut self) -> &mut Vec<BlockId> {
+        &mut self.roots
+    }
+
+    pub fn blocks(&self) -> &Blocks {
+        &self.blocks
+    }
+
+    pub fn blocks_mut(&mut self) -> &mut Blocks {
+        &mut self.blocks
+    }
+
+    #[cfg(test)]
+    pub fn build(
+        db: &dyn crate::Db,
+        nodelist: djls_templates::NodeList,
+        index: &super::grammar::TagIndex,
+    ) -> Self {
+        use super::builder::BlockTreeBuilder;
+        use crate::traits::SemanticModel;
+
+        BlockTreeBuilder::new(db, index).model(db, nodelist)
+    }
+}
+
+impl Default for BlockTree {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+    use std::sync::Mutex;
+
+    use camino::Utf8Path;
+    use djls_source::File;
+    use djls_source::Span;
+    use djls_templates::parse_template;
+    use djls_templates::Node;
+    use djls_workspace::FileSystem;
+    use djls_workspace::InMemoryFileSystem;
+
+    use super::*;
+    use crate::blocks::grammar::TagIndex;
+    use crate::blocks::snapshot::BlockTreeSnapshot;
+    use crate::templatetags::django_builtin_specs;
+    use crate::TagSpecs;
+
+    impl BlockTree {
+        pub fn to_snapshot(&self) -> BlockTreeSnapshot {
+            BlockTreeSnapshot::from(self)
+        }
+    }
+
+    #[salsa::db]
+    #[derive(Clone)]
+    struct TestDatabase {
+        storage: salsa::Storage<Self>,
+        fs: Arc<Mutex<InMemoryFileSystem>>,
+    }
+
+    impl TestDatabase {
+        fn new() -> Self {
+            Self {
+                storage: salsa::Storage::default(),
+                fs: Arc::new(Mutex::new(InMemoryFileSystem::new())),
+            }
+        }
+
+        fn add_file(&self, path: &str, content: &str) {
+            self.fs
+                .lock()
+                .unwrap()
+                .add_file(path.into(), content.to_string());
+        }
+    }
+
+    #[salsa::db]
+    impl salsa::Database for TestDatabase {}
+
+    #[salsa::db]
+    impl djls_source::Db for TestDatabase {
+        fn read_file_source(&self, path: &Utf8Path) -> std::io::Result<String> {
+            self.fs.lock().unwrap().read_to_string(path)
+        }
+    }
+
+    #[salsa::db]
+    impl djls_templates::Db for TestDatabase {}
+
+    #[salsa::db]
+    impl crate::Db for TestDatabase {
+        fn tag_specs(&self) -> TagSpecs {
+            django_builtin_specs()
+        }
+    }
+
+    #[test]
+    fn test_block_tree_building() {
+        use crate::Db as SemanticDb;
+
+        let db = TestDatabase::new();
+
+        let source = r"
+{% block header %}
+    <h1>Title</h1>
+{% endblock header %}
+
+{% if user.is_authenticated %}
+    <p>Welcome {{ user.name }}</p>
+    {% if user.is_superuser %}
+        <span>Admin</span>
+    {% elif user.is_staff %}
+        <span>Manager</span>
+    {% else %}
+        <span>Regular user</span>
+    {% endif %}
+{% else %}
+    <p>Please log in</p>
+{% endif %}
+
+{% for item in items %}
+    <li>{{ item }}</li>
+{% endfor %}
+";
+
+        db.add_file("test.html", source);
+        let file = File::new(&db, "test.html".into(), 0);
+        let nodelist = parse_template(&db, file).expect("should parse");
+
+        let nodelist_view = {
+            #[derive(serde::Serialize)]
+            struct NodeListView {
+                nodes: Vec<NodeView>,
+            }
+            #[derive(serde::Serialize)]
+            #[serde(tag = "kind")]
+            enum NodeView {
+                Tag {
+                    name: String,
+                    bits: Vec<String>,
+                    span: Span,
+                },
+                Variable {
+                    var: String,
+                    filters: Vec<String>,
+                    span: Span,
+                },
+                Comment {
+                    content: String,
+                    span: Span,
+                },
+                Text {
+                    span: Span,
+                },
+                Error {
+                    span: Span,
+                    full_span: Span,
+                    error: String,
+                },
+            }
+
+            let nodes = nodelist
+                .nodelist(&db)
+                .iter()
+                .map(|n| match n {
+                    Node::Tag { name, bits, span } => NodeView::Tag {
+                        name: name.text(&db).to_string(),
+                        bits: bits.iter().map(|b| b.text(&db).to_string()).collect(),
+                        span: *span,
+                    },
+                    Node::Variable { var, filters, span } => NodeView::Variable {
+                        var: var.text(&db).to_string(),
+                        filters: filters.iter().map(|f| f.text(&db).to_string()).collect(),
+                        span: *span,
+                    },
+                    Node::Comment { content, span } => NodeView::Comment {
+                        content: content.clone(),
+                        span: *span,
+                    },
+                    Node::Text { span } => NodeView::Text { span: *span },
+                    Node::Error {
+                        span,
+                        full_span,
+                        error,
+                    } => NodeView::Error {
+                        span: *span,
+                        full_span: *full_span,
+                        error: error.to_string(),
+                    },
+                })
+                .collect();
+
+            NodeListView { nodes }
+        };
+        insta::assert_yaml_snapshot!("nodelist", nodelist_view);
+        let tag_index = TagIndex::from(&db.tag_specs());
+        let block_tree = BlockTree::build(&db, nodelist, &tag_index);
+        insta::assert_yaml_snapshot!("blocktree", block_tree.to_snapshot());
+    }
+}
--- a/crates/djls-semantic/src/db.rs
+++ b/crates/djls-semantic/src/db.rs
@ -1,5 +1,3 @@
-use std::sync::Arc;
-
 use djls_templates::Db as TemplateDb;

 use crate::errors::ValidationError;
@ -8,9 +6,8 @@ use crate::templatetags::TagSpecs;
 #[salsa::db]
 pub trait Db: TemplateDb {
    /// Get the Django tag specifications for semantic analysis
-    fn tag_specs(&self) -> Arc<TagSpecs>;
+    fn tag_specs(&self) -> TagSpecs;
 }

-/// Accumulator for validation errors
 #[salsa::accumulator]
 pub struct ValidationErrorAccumulator(pub ValidationError);
--- a/crates/djls-semantic/src/lib.rs
+++ b/crates/djls-semantic/src/lib.rs
@ -1,6 +1,8 @@
+mod blocks;
 mod db;
 mod errors;
 mod templatetags;
+mod traits;
 mod validation;

 pub use db::Db;
--- a/crates/djls-semantic/src/templatetags/builtins.rs
+++ b/crates/djls-semantic/src/templatetags/builtins.rs
@ -726,16 +726,13 @@ mod tests {
        let specs = django_builtin_specs();

        // Verify we have specs loaded
-        assert!(
-            specs.iter().count() > 0,
-            "Should have loaded at least one spec"
-        );
+        assert!(!specs.is_empty(), "Should have loaded at least one spec");

        // Check a key tag is present as a smoke test
        assert!(specs.get("if").is_some(), "'if' tag should be present");

        // Verify all tag names are non-empty
-        for (name, _) in specs.iter() {
+        for (name, _) in specs {
            assert!(!name.is_empty(), "Tag name should not be empty");
        }
    }
--- a/crates/djls-semantic/src/templatetags/specs.rs
+++ b/crates/djls-semantic/src/templatetags/specs.rs
@ -1,4 +1,8 @@
 use std::borrow::Cow;
+use std::collections::hash_map::IntoIter;
+use std::collections::hash_map::Iter;
+use std::ops::Deref;
+use std::ops::DerefMut;

 use rustc_hash::FxHashMap;

@ -36,16 +40,6 @@ impl TagSpecs {
        TagSpecs(specs)
    }

-    #[must_use]
-    pub fn get(&self, key: &str) -> Option<&TagSpec> {
-        self.0.get(key)
-    }
-
-    /// Iterate over all tag specs
-    pub fn iter(&self) -> impl Iterator<Item = (&String, &TagSpec)> {
-        self.0.iter()
-    }
-
    /// Find the opener tag for a given closer tag
    #[must_use]
    pub fn find_opener_for_closer(&self, closer: &str) -> Option<String> {
@ -121,6 +115,38 @@ impl TagSpecs {
    }
 }

+impl Deref for TagSpecs {
+    type Target = FxHashMap<String, TagSpec>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl DerefMut for TagSpecs {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
+impl<'a> IntoIterator for &'a TagSpecs {
+    type Item = (&'a String, &'a TagSpec);
+    type IntoIter = Iter<'a, String, TagSpec>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.iter()
+    }
+}
+
+impl IntoIterator for TagSpecs {
+    type Item = (String, TagSpec);
+    type IntoIter = IntoIter<String, TagSpec>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
+    }
+}
+
 impl From<&djls_conf::Settings> for TagSpecs {
    fn from(settings: &djls_conf::Settings) -> Self {
        // Start with built-in specs
@ -482,10 +508,10 @@ mod tests {
    fn test_iter() {
        let specs = create_test_specs();

-        let count = specs.iter().count();
+        let count = specs.len();
        assert_eq!(count, 4);

-        let mut found_keys: Vec<String> = specs.iter().map(|(k, _)| k.clone()).collect();
+        let mut found_keys: Vec<String> = specs.keys().cloned().collect();
        found_keys.sort();

        let mut expected_keys = ["block", "csrf_token", "for", "if"];
@ -682,19 +708,19 @@ mod tests {
        assert!(specs1.get("block").is_some());

        // Total count should be 5 (original 4 + 1 new)
-        assert_eq!(specs1.iter().count(), 5);
+        assert_eq!(specs1.len(), 5);
    }

    #[test]
    fn test_merge_empty() {
        let mut specs = create_test_specs();
-        let original_count = specs.iter().count();
+        let original_count = specs.len();

        // Merge with empty TagSpecs
        specs.merge(TagSpecs::new(FxHashMap::default()));

        // Should remain unchanged
-        assert_eq!(specs.iter().count(), original_count);
+        assert_eq!(specs.len(), original_count);
    }

    #[test]
--- a/crates/djls-semantic/src/traits.rs
+++ b/crates/djls-semantic/src/traits.rs
@ -0,0 +1,33 @@
+use djls_templates::Node;
+use djls_templates::NodeList;
+
+use crate::Db;
+
+/// Semantic model builder that operates on Django template nodelists.
+///
+/// This trait defines the interface for building semantic models from Django templates.
+/// A semantic model is any representation that captures some aspect of the template's
+/// meaning - structure, dependencies, types, security properties, etc.
+pub trait SemanticModel<'db> {
+    type Model;
+
+    /// Build the semantic model from a nodelist
+    #[allow(dead_code)] // use is gated behind cfg(test) for now
+    fn model(mut self, db: &'db dyn Db, nodelist: NodeList<'db>) -> Self::Model
+    where
+        Self: Sized,
+    {
+        for node in nodelist.nodelist(db).iter().cloned() {
+            self.observe(node);
+        }
+        self.construct()
+    }
+
+    /// Observe a single node during traversal and extract semantic information
+    #[allow(dead_code)] // use is gated behind cfg(test) for now
+    fn observe(&mut self, node: Node<'db>);
+
+    /// Construct the final semantic model from observed semantics
+    #[allow(dead_code)] // use is gated behind cfg(test) for now
+    fn construct(self) -> Self::Model;
+}
--- a/crates/djls-server/src/db.rs
+++ b/crates/djls-server/src/db.rs
@ -121,15 +121,13 @@ impl TemplateDb for DjangoDatabase {}

 #[salsa::db]
 impl SemanticDb for DjangoDatabase {
-    fn tag_specs(&self) -> Arc<TagSpecs> {
+    fn tag_specs(&self) -> TagSpecs {
        let project_root = self.project_root_or_cwd();

-        let tag_specs = match djls_conf::Settings::new(&project_root) {
+        match djls_conf::Settings::new(&project_root) {
            Ok(settings) => TagSpecs::from(&settings),
            Err(_) => djls_semantic::django_builtin_specs(),
-        };
-
-        Arc::new(tag_specs)
+        }
    }
 }

--- a/crates/djls-server/src/server.rs
+++ b/crates/djls-server/src/server.rs
@ -88,7 +88,7 @@ impl DjangoLanguageServer {
            return;
        };

-        if FileKind::from_path(&path) != FileKind::Template {
+        if FileKind::from(&path) != FileKind::Template {
            return;
        }

@ -270,7 +270,7 @@ impl LanguageServer for DjangoLanguageServer {
        // Clear diagnostics when closing a template file
        if let Some(url) = url {
            if let Some(path) = paths::url_to_path(&url) {
-                if FileKind::from_path(&path) == FileKind::Template {
+                if FileKind::from(&path) == FileKind::Template {
                    let Some(lsp_uri) = paths::url_to_lsp_uri(&url) else {
                        tracing::debug!("Could not convert URL to LSP Uri: {}", url);
                        return;
@ -307,7 +307,7 @@ impl LanguageServer for DjangoLanguageServer {
                    let document = session.get_document(&url)?;
                    let position = params.text_document_position.position;
                    let encoding = session.position_encoding();
-                    let file_kind = FileKind::from_path(&path);
+                    let file_kind = FileKind::from(&path);
                    let template_tags = session.with_db(|db| {
                        if let Some(project) = db.project() {
                            djls_project::get_templatetags(db, project)
@ -368,7 +368,7 @@ impl LanguageServer for DjangoLanguageServer {
        };

        // Only provide diagnostics for template files
-        let file_kind = FileKind::from_path(url.path().into());
+        let file_kind = FileKind::from(url.path());
        if file_kind != FileKind::Template {
            return Ok(lsp_types::DocumentDiagnosticReportResult::Report(
                lsp_types::DocumentDiagnosticReport::Full(
--- a/crates/djls-server/src/session.rs
+++ b/crates/djls-server/src/session.rs
@ -189,7 +189,7 @@ impl Session {
    }

    fn handle_file_event(&self, event: &WorkspaceFileEvent) {
-        if FileKind::from_path(event.path()) == FileKind::Template {
+        if FileKind::from(event.path()) == FileKind::Template {
            let nodelist = djls_templates::parse_template(&self.db, event.file());
            if let Some(nodelist) = nodelist {
                djls_semantic::validate_nodelist(&self.db, nodelist);
--- a/crates/djls-source/Cargo.toml
+++ b/crates/djls-source/Cargo.toml
@ -7,6 +7,7 @@ edition = "2021"
 camino = { workspace = true }
 salsa = { workspace = true }
 serde = { workspace = true }
+thiserror = { workspace = true }

 [dev-dependencies]

--- a/crates/djls-source/src/file.rs
+++ b/crates/djls-source/src/file.rs
@ -5,7 +5,7 @@ use camino::Utf8Path;
 use camino::Utf8PathBuf;

 use crate::db::Db;
-use crate::position::LineIndex;
+use crate::line::LineIndex;

 #[salsa::input]
 pub struct File {
@ -28,7 +28,7 @@ impl File {
    #[salsa::tracked(returns(ref))]
    pub fn line_index(self, db: &dyn Db) -> LineIndex {
        let text = self.source(db);
-        LineIndex::from_text(text.0.source.as_str())
+        LineIndex::from(text.0.source.as_str())
    }
 }

@ -38,12 +38,8 @@ pub struct SourceText(Arc<SourceTextInner>);
 impl SourceText {
    #[must_use]
    pub fn new(path: &Utf8Path, source: String) -> Self {
-        let encoding = if source.is_ascii() {
-            FileEncoding::Ascii
-        } else {
-            FileEncoding::Utf8
-        };
-        let kind = FileKind::from_path(path);
+        let encoding = FileEncoding::from(source.as_str());
+        let kind = FileKind::from(path);
        Self(Arc::new(SourceTextInner {
            encoding,
            kind,
@ -99,6 +95,16 @@ pub enum FileEncoding {
    Utf8,
 }

+impl From<&str> for FileEncoding {
+    fn from(value: &str) -> Self {
+        if value.is_ascii() {
+            Self::Ascii
+        } else {
+            Self::Utf8
+        }
+    }
+}
+
 #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
 pub enum FileKind {
    Other,
@ -106,13 +112,29 @@ pub enum FileKind {
    Template,
 }

-impl FileKind {
-    /// Determine [`FileKind`] from a file path extension.
-    #[must_use]
-    pub fn from_path(path: &Utf8Path) -> Self {
-        match path.extension() {
-            Some("py") => FileKind::Python,
-            Some("html" | "htm") => FileKind::Template,
+impl From<&str> for FileKind {
+    fn from(value: &str) -> Self {
+        match value {
+            "py" => FileKind::Python,
+            "html" | "htm" => FileKind::Template,
+            _ => FileKind::Other,
+        }
+    }
+}
+
+impl From<&Utf8Path> for FileKind {
+    fn from(path: &Utf8Path) -> Self {
+        match path.extension() {
+            Some(ext) => Self::from(ext),
+            _ => FileKind::Other,
+        }
+    }
+}
+
+impl From<&Utf8PathBuf> for FileKind {
+    fn from(path: &Utf8PathBuf) -> Self {
+        match path.extension() {
+            Some(ext) => Self::from(ext),
            _ => FileKind::Other,
        }
    }
--- a/crates/djls-source/src/lib.rs
+++ b/crates/djls-source/src/lib.rs
@ -1,13 +1,14 @@
 mod db;
 mod file;
+mod line;
 mod position;
 mod protocol;

 pub use db::Db;
 pub use file::File;
 pub use file::FileKind;
-pub use position::ByteOffset;
+pub use line::LineIndex;
 pub use position::LineCol;
-pub use position::LineIndex;
+pub use position::Offset;
 pub use position::Span;
 pub use protocol::PositionEncoding;
--- a/crates/djls-source/src/line.rs
+++ b/crates/djls-source/src/line.rs
@ -0,0 +1,164 @@
+use crate::LineCol;
+use crate::Offset;
+
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
+pub enum LineEnding {
+    #[default]
+    Lf,
+    Crlf,
+    Cr,
+}
+
+impl LineEnding {
+    #[inline]
+    #[allow(dead_code)]
+    pub const fn as_str(self) -> &'static str {
+        match self {
+            Self::Lf => "\n",
+            Self::Crlf => "\r\n",
+            Self::Cr => "\r",
+        }
+    }
+
+    #[inline]
+    pub const fn len(self) -> usize {
+        match self {
+            Self::Cr | Self::Lf => 1,
+            Self::Crlf => 2,
+        }
+    }
+
+    #[allow(dead_code)]
+    pub const fn is_line_feed(self) -> bool {
+        matches!(self, Self::Lf)
+    }
+
+    #[allow(dead_code)]
+    pub const fn is_carriage_return_line_feed(self) -> bool {
+        matches!(self, Self::Crlf)
+    }
+
+    #[allow(dead_code)]
+    pub const fn is_carriage_return(self) -> bool {
+        matches!(self, Self::Cr)
+    }
+
+    #[inline]
+    pub fn match_at(bytes: &[u8], i: usize) -> Option<Self> {
+        match bytes.get(i) {
+            Some(b'\n') => Some(Self::Lf),
+            Some(b'\r') if bytes.get(i + 1) == Some(&b'\n') => Some(Self::Crlf),
+            Some(b'\r') => Some(Self::Cr),
+            _ => None,
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct LineIndex(Vec<u32>);
+
+impl LineIndex {
+    #[must_use]
+    pub fn lines(&self) -> &[u32] {
+        &self.0
+    }
+
+    #[must_use]
+    pub fn line_start(&self, line: u32) -> Option<u32> {
+        self.0.get(line as usize).copied()
+    }
+
+    #[must_use]
+    pub fn to_line_col(&self, offset: Offset) -> LineCol {
+        if self.lines().is_empty() {
+            return LineCol::new(0, 0);
+        }
+
+        let offset_u32 = offset.as_ref();
+
+        let line = match self.lines().binary_search(offset_u32) {
+            Ok(exact) => exact,
+            Err(0) => 0,
+            Err(next) => next - 1,
+        };
+        let column = offset_u32.saturating_sub(self.0[line]);
+
+        LineCol::new(u32::try_from(line).unwrap_or_default(), column)
+    }
+}
+
+impl From<&[u8]> for LineIndex {
+    fn from(bytes: &[u8]) -> Self {
+        let mut starts = Vec::with_capacity(256);
+        starts.push(0);
+
+        let mut i = 0;
+        while i < bytes.len() {
+            if let Some(ending) = LineEnding::match_at(bytes, i) {
+                let len = ending.len();
+                starts.push(u32::try_from(i + len).unwrap_or(u32::MAX));
+                i += len;
+            } else {
+                i += 1;
+            }
+        }
+
+        Self(starts)
+    }
+}
+
+impl From<&str> for LineIndex {
+    fn from(text: &str) -> Self {
+        let bytes = text.as_bytes();
+        Self::from(bytes)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_line_index_unix_endings() {
+        let text = "line1\nline2\nline3";
+        let index = LineIndex::from(text);
+        assert_eq!(index.lines(), &[0, 6, 12]);
+    }
+
+    #[test]
+    fn test_line_index_windows_endings() {
+        let text = "line1\r\nline2\r\nline3";
+        let index = LineIndex::from(text);
+        // After "line1\r\n" (7 bytes), next line starts at byte 7
+        // After "line2\r\n" (7 bytes), next line starts at byte 14
+        assert_eq!(index.lines(), &[0, 7, 14]);
+    }
+
+    #[test]
+    fn test_line_index_mixed_endings() {
+        let text = "line1\nline2\r\nline3\rline4";
+        let index = LineIndex::from(text);
+        // "line1\n" -> next at 6
+        // "line2\r\n" -> next at 13
+        // "line3\r" -> next at 19
+        assert_eq!(index.lines(), &[0, 6, 13, 19]);
+    }
+
+    #[test]
+    fn test_line_index_empty() {
+        let text = "";
+        let index = LineIndex::from(text);
+        assert_eq!(index.lines(), &[0]);
+    }
+
+    #[test]
+    fn test_to_line_col_with_crlf() {
+        let text = "hello\r\nworld";
+        let index = LineIndex::from(text);
+
+        // "hello" is 5 bytes, then \r\n, so "world" starts at byte 7
+        assert_eq!(index.to_line_col(Offset::new(0)), LineCol::new(0, 0));
+        assert_eq!(index.to_line_col(Offset::new(7)), LineCol::new(1, 0));
+        assert_eq!(index.to_line_col(Offset::new(8)), LineCol::new(1, 1));
+    }
+}
--- a/crates/djls-source/src/position.rs
+++ b/crates/djls-source/src/position.rs
@ -1,26 +1,66 @@
 use serde::Serialize;
+use thiserror::Error;

 /// A byte offset within a text document.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
-pub struct ByteOffset(u32);
+pub struct Offset(u32);

-impl ByteOffset {
+impl Offset {
    #[must_use]
    pub fn new(offset: u32) -> Self {
        Self(offset)
    }

    #[must_use]
-    pub fn from_usize(offset: usize) -> Self {
-        Self(u32::try_from(offset).unwrap_or(u32::MAX))
-    }
-
-    #[must_use]
-    pub fn offset(&self) -> u32 {
+    pub fn get(&self) -> u32 {
        self.0
    }
 }

+impl From<u32> for Offset {
+    #[inline]
+    fn from(offset: u32) -> Self {
+        Offset(offset)
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Error)]
+pub enum OffsetConversionError {
+    #[error("value does not fit into u32")]
+    Overflow,
+}
+
+impl TryFrom<usize> for Offset {
+    type Error = OffsetConversionError;
+
+    #[inline]
+    fn try_from(offset: usize) -> Result<Self, Self::Error> {
+        Ok(Self(
+            u32::try_from(offset).map_err(|_| OffsetConversionError::Overflow)?,
+        ))
+    }
+}
+
+impl AsRef<u32> for Offset {
+    #[inline]
+    fn as_ref(&self) -> &u32 {
+        &self.0
+    }
+}
+
+impl std::borrow::Borrow<u32> for Offset {
+    #[inline]
+    fn borrow(&self) -> &u32 {
+        &self.0
+    }
+}
+
+impl core::fmt::Display for Offset {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        self.0.fmt(f)
+    }
+}
+
 /// A line and column position within a text document.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub struct LineCol {
@ -45,6 +85,27 @@ impl LineCol {
    }
 }

+impl From<(u32, u32)> for LineCol {
+    #[inline]
+    fn from((line, column): (u32, u32)) -> Self {
+        Self { line, column }
+    }
+}
+
+impl From<LineCol> for (u32, u32) {
+    #[inline]
+    fn from(value: LineCol) -> Self {
+        (value.line, value.column)
+    }
+}
+
+impl From<&LineCol> for (u32, u32) {
+    #[inline]
+    fn from(value: &LineCol) -> Self {
+        (value.line, value.column)
+    }
+}
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
 pub struct Span {
    start: u32,
@ -57,36 +118,6 @@ impl Span {
        Self { start, length }
    }

-    #[must_use]
-    pub fn from_parts(start: usize, length: usize) -> Self {
-        let start_u32 = u32::try_from(start).unwrap_or(u32::MAX);
-        let length_u32 = u32::try_from(length).unwrap_or(u32::MAX.saturating_sub(start_u32));
-        Span::new(start_u32, length_u32)
-    }
-
-    #[must_use]
-    pub fn with_length_usize(self, length: usize) -> Self {
-        Self::from_parts(self.start_usize(), length)
-    }
-
-    /// Construct a span from integer bounds expressed as byte offsets.
-    #[must_use]
-    pub fn from_bounds(start: usize, end: usize) -> Self {
-        Self::from_parts(start, end.saturating_sub(start))
-    }
-
-    #[must_use]
-    pub fn expand(self, opening: u32, closing: u32) -> Self {
-        let start_expand = self.start.saturating_sub(opening);
-        let length_expand = opening + self.length + closing;
-        Self::new(start_expand, length_expand)
-    }
-
-    #[must_use]
-    pub fn as_tuple(self) -> (u32, u32) {
-        (self.start, self.length)
-    }
-
    #[must_use]
    pub fn start(self) -> u32 {
        self.start
@ -97,11 +128,6 @@ impl Span {
        self.start as usize
    }

-    #[must_use]
-    pub fn end(self) -> u32 {
-        self.start + self.length
-    }
-
    #[must_use]
    pub fn length(self) -> u32 {
        self.length
@ -113,135 +139,112 @@ impl Span {
    }

    #[must_use]
-    pub fn start_offset(&self) -> ByteOffset {
-        ByteOffset(self.start)
+    pub fn end(self) -> u32 {
+        self.start.saturating_add(self.length)
    }

    #[must_use]
-    pub fn end_offset(&self) -> ByteOffset {
-        ByteOffset(self.start.saturating_add(self.length))
+    pub fn start_offset(&self) -> Offset {
+        Offset(self.start)
    }

-    /// Convert this span to start and end line/column positions using the given line index.
    #[must_use]
-    pub fn to_line_col(&self, line_index: &LineIndex) -> (LineCol, LineCol) {
-        let start = line_index.to_line_col(self.start_offset());
-        let end = line_index.to_line_col(self.end_offset());
-        (start, end)
+    pub fn end_offset(&self) -> Offset {
+        Offset(self.end())
    }
-}

-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct LineIndex(Vec<u32>);
-
-impl LineIndex {
    #[must_use]
-    pub fn from_text(text: &str) -> Self {
-        let mut starts = Vec::with_capacity(256);
-        starts.push(0);
-
-        let bytes = text.as_bytes();
-        let mut i = 0;
-        while i < bytes.len() {
-            match bytes[i] {
-                b'\n' => {
-                    // LF - Unix style line ending
-                    starts.push(u32::try_from(i + 1).unwrap_or_default());
-                    i += 1;
-                }
-                b'\r' => {
-                    // CR - check if followed by LF for Windows style
-                    if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
-                        // CRLF - Windows style line ending
-                        starts.push(u32::try_from(i + 2).unwrap_or_default());
-                        i += 2;
-                    } else {
-                        // Just CR - old Mac style line ending
-                        starts.push(u32::try_from(i + 1).unwrap_or_default());
-                        i += 1;
-                    }
-                }
-                _ => i += 1,
-            }
+    pub fn with_length_usize_saturating(self, length: usize) -> Self {
+        let max_length = u32::MAX.saturating_sub(self.start);
+        let length_u32 = u32::try_from(length.min(max_length as usize)).unwrap_or(u32::MAX);
+        Self {
+            start: self.start,
+            length: length_u32,
        }
-
-        LineIndex(starts)
    }

    #[must_use]
-    pub fn to_line_col(&self, offset: ByteOffset) -> LineCol {
-        if self.0.is_empty() {
-            return LineCol::new(0, 0);
+    pub fn saturating_from_parts_usize(start: usize, length: usize) -> Self {
+        let start_u32 = u32::try_from(start.min(u32::MAX as usize)).unwrap_or(u32::MAX);
+        let max_length = u32::MAX.saturating_sub(start_u32);
+        let length_u32 = u32::try_from(length.min(max_length as usize)).unwrap_or(u32::MAX);
+        Self {
+            start: start_u32,
+            length: length_u32,
        }
-
-        let line = match self.0.binary_search(&offset.0) {
-            Ok(exact) => exact,
-            Err(0) => 0,
-            Err(next) => next - 1,
-        };
-
-        let line_start = self.0[line];
-        let column = offset.0.saturating_sub(line_start);
-
-        LineCol::new(u32::try_from(line).unwrap_or_default(), column)
    }

    #[must_use]
-    pub fn line_start(&self, line: u32) -> Option<u32> {
-        self.0.get(line as usize).copied()
+    pub fn saturating_from_bounds_usize(start: usize, end: usize) -> Self {
+        let s32 = u32::try_from(start.min(u32::MAX as usize)).unwrap_or(u32::MAX);
+        let e32 = u32::try_from(end.min(u32::MAX as usize)).unwrap_or(u32::MAX);
+        let (start_u32, end_u32) = if e32 >= s32 { (s32, e32) } else { (s32, s32) };
+        Self {
+            start: start_u32,
+            length: end_u32 - start_u32,
+        }
+    }
+
+    pub fn try_from_bounds_usize(start: usize, end: usize) -> Result<Self, SpanConversionError> {
+        if end < start {
+            return Err(SpanConversionError::EndBeforeStart);
+        }
+        let start_u32 = u32::try_from(start).map_err(|_| SpanConversionError::Overflow)?;
+        let end_u32 = u32::try_from(end).map_err(|_| SpanConversionError::Overflow)?;
+        Ok(Self {
+            start: start_u32,
+            length: end_u32 - start_u32,
+        })
    }

    #[must_use]
-    pub fn lines(&self) -> &[u32] {
-        &self.0
+    pub fn expand(self, opening: u32, closing: u32) -> Self {
+        let start_expand = self.start.saturating_sub(opening);
+        let length_expand = opening + self.length + closing;
+        Self {
+            start: start_expand,
+            length: length_expand,
+        }
    }
 }

-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_line_index_unix_endings() {
-        let text = "line1\nline2\nline3";
-        let index = LineIndex::from_text(text);
-        assert_eq!(index.lines(), &[0, 6, 12]);
-    }
-
-    #[test]
-    fn test_line_index_windows_endings() {
-        let text = "line1\r\nline2\r\nline3";
-        let index = LineIndex::from_text(text);
-        // After "line1\r\n" (7 bytes), next line starts at byte 7
-        // After "line2\r\n" (7 bytes), next line starts at byte 14
-        assert_eq!(index.lines(), &[0, 7, 14]);
-    }
-
-    #[test]
-    fn test_line_index_mixed_endings() {
-        let text = "line1\nline2\r\nline3\rline4";
-        let index = LineIndex::from_text(text);
-        // "line1\n" -> next at 6
-        // "line2\r\n" -> next at 13
-        // "line3\r" -> next at 19
-        assert_eq!(index.lines(), &[0, 6, 13, 19]);
-    }
-
-    #[test]
-    fn test_line_index_empty() {
-        let text = "";
-        let index = LineIndex::from_text(text);
-        assert_eq!(index.lines(), &[0]);
-    }
-
-    #[test]
-    fn test_to_line_col_with_crlf() {
-        let text = "hello\r\nworld";
-        let index = LineIndex::from_text(text);
-
-        // "hello" is 5 bytes, then \r\n, so "world" starts at byte 7
-        assert_eq!(index.to_line_col(ByteOffset(0)), LineCol::new(0, 0));
-        assert_eq!(index.to_line_col(ByteOffset(7)), LineCol::new(1, 0));
-        assert_eq!(index.to_line_col(ByteOffset(8)), LineCol::new(1, 1));
+impl From<(u32, u32)> for Span {
+    #[inline]
+    fn from((start, length): (u32, u32)) -> Self {
+        Self { start, length }
+    }
+}
+
+impl From<Span> for (u32, u32) {
+    #[inline]
+    fn from(val: Span) -> Self {
+        (val.start, val.length)
+    }
+}
+
+impl From<&Span> for (u32, u32) {
+    #[inline]
+    fn from(val: &Span) -> Self {
+        (val.start, val.length)
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Error)]
+pub enum SpanConversionError {
+    #[error("value does not fit into u32")]
+    Overflow,
+    #[error("end is before start")]
+    EndBeforeStart,
+}
+
+impl TryFrom<(usize, usize)> for Span {
+    type Error = SpanConversionError;
+
+    #[inline]
+    fn try_from((start, length): (usize, usize)) -> Result<Self, Self::Error> {
+        Ok(Self {
+            start: u32::try_from(start).map_err(|_| SpanConversionError::Overflow)?,
+            length: u32::try_from(length).map_err(|_| SpanConversionError::Overflow)?,
+        })
    }
 }
--- a/crates/djls-source/src/protocol.rs
+++ b/crates/djls-source/src/protocol.rs
@ -1,8 +1,8 @@
 use std::fmt;

-use crate::position::ByteOffset;
+use crate::line::LineIndex;
 use crate::position::LineCol;
-use crate::position::LineIndex;
+use crate::position::Offset;

 /// Specifies how column positions are counted in text.
 ///
@ -53,9 +53,9 @@ impl PositionEncoding {
    /// # Examples
    ///
    /// ```
-    /// # use djls_source::{LineIndex, LineCol, ByteOffset, PositionEncoding};
+    /// # use djls_source::{LineIndex, LineCol, Offset, PositionEncoding};
    /// let text = "Hello 🌍 world";
-    /// let index = LineIndex::from_text(text);
+    /// let index = LineIndex::from(text);
    ///
    /// // UTF-16: "Hello " (6) + "🌍" (2 UTF-16 units) = position 8
    /// let offset = PositionEncoding::Utf16.line_col_to_offset(
@ -63,7 +63,7 @@ impl PositionEncoding {
    ///     LineCol::new(0, 8),
    ///     text
    /// );
-    /// assert_eq!(offset, Some(ByteOffset::new(10))); // "Hello 🌍" is 10 bytes
+    /// assert_eq!(offset, Some(Offset::new(10))); // "Hello 🌍" is 10 bytes
    /// ```
    #[must_use]
    pub fn line_col_to_offset(
@ -71,18 +71,18 @@ impl PositionEncoding {
        index: &LineIndex,
        line_col: LineCol,
        text: &str,
-    ) -> Option<ByteOffset> {
+    ) -> Option<Offset> {
        let line = line_col.line();
        let character = line_col.column();

        // Handle line bounds - if line > line_count, return document length
        let line_start_utf8 = match index.lines().get(line as usize) {
            Some(start) => *start,
-            None => return Some(ByteOffset::from_usize(text.len())),
+            None => return Offset::try_from(text.len()).ok(),
        };

        if character == 0 {
-            return Some(ByteOffset::new(line_start_utf8));
+            return Some(Offset::new(line_start_utf8));
        }

        let next_line_start = index
@ -96,14 +96,14 @@ impl PositionEncoding {
        // Fast path optimization for ASCII text, all encodings are equivalent to byte offsets
        if line_text.is_ascii() {
            let char_offset = character.min(u32::try_from(line_text.len()).unwrap_or(u32::MAX));
-            return Some(ByteOffset::new(line_start_utf8 + char_offset));
+            return Some(Offset::new(line_start_utf8 + char_offset));
        }

        match self {
            PositionEncoding::Utf8 => {
                // UTF-8: character positions are already byte offsets
                let char_offset = character.min(u32::try_from(line_text.len()).unwrap_or(u32::MAX));
-                Some(ByteOffset::new(line_start_utf8 + char_offset))
+                Some(Offset::new(line_start_utf8 + char_offset))
            }
            PositionEncoding::Utf16 => {
                // UTF-16: count UTF-16 code units
@ -119,7 +119,7 @@ impl PositionEncoding {
                }

                // If character position exceeds line length, clamp to line end
-                Some(ByteOffset::new(line_start_utf8 + utf8_pos))
+                Some(Offset::new(line_start_utf8 + utf8_pos))
            }
            PositionEncoding::Utf32 => {
                // UTF-32: count Unicode code points (characters)
@ -133,7 +133,7 @@ impl PositionEncoding {
                }

                // If character position exceeds line length, clamp to line end
-                Some(ByteOffset::new(line_start_utf8 + utf8_pos))
+                Some(Offset::new(line_start_utf8 + utf8_pos))
            }
        }
    }
@ -153,26 +153,26 @@ mod tests {
    #[test]
    fn test_line_col_to_offset_utf16() {
        let text = "Hello 🌍 world";
-        let index = LineIndex::from_text(text);
+        let index = LineIndex::from(text);

        // "Hello " = 6 UTF-16 units, "🌍" = 2 UTF-16 units
        // So position (0, 8) in UTF-16 should be after the emoji
        let offset = PositionEncoding::Utf16
            .line_col_to_offset(&index, LineCol::new(0, 8), text)
            .expect("Should get offset");
-        assert_eq!(offset, ByteOffset::new(10)); // "Hello 🌍" is 10 bytes
+        assert_eq!(offset, Offset::new(10)); // "Hello 🌍" is 10 bytes

        // In UTF-8, character 10 would be at the 'r' in 'world'
        let offset_utf8 = PositionEncoding::Utf8
            .line_col_to_offset(&index, LineCol::new(0, 10), text)
            .expect("Should get offset");
-        assert_eq!(offset_utf8, ByteOffset::new(10));
+        assert_eq!(offset_utf8, Offset::new(10));
    }

    #[test]
    fn test_line_col_to_offset_ascii_fast_path() {
        let text = "Hello world";
-        let index = LineIndex::from_text(text);
+        let index = LineIndex::from(text);

        // For ASCII text, all encodings should give the same result
        let offset_utf8 = PositionEncoding::Utf8
@ -185,8 +185,8 @@ mod tests {
            .line_col_to_offset(&index, LineCol::new(0, 5), text)
            .expect("Should get offset");

-        assert_eq!(offset_utf8, ByteOffset::new(5));
-        assert_eq!(offset_utf16, ByteOffset::new(5));
-        assert_eq!(offset_utf32, ByteOffset::new(5));
+        assert_eq!(offset_utf8, Offset::new(5));
+        assert_eq!(offset_utf16, Offset::new(5));
+        assert_eq!(offset_utf32, Offset::new(5));
    }
 }
--- a/crates/djls-templates/src/lexer.rs
+++ b/crates/djls-templates/src/lexer.rs
@ -75,7 +75,7 @@ impl<'db> Lexer<'db> {
            Ok(text) => {
                let len = text.len();
                let content = TokenContent::new(self.db, text);
-                let span = Span::from_parts(content_start, len);
+                let span = Span::saturating_from_parts_usize(content_start, len);
                self.consume_n(delimiter.closer().len());
                token_fn(content, span)
            }
@ -83,9 +83,9 @@ impl<'db> Lexer<'db> {
                let len = err_text.len();
                let content = TokenContent::new(self.db, err_text);
                let span = if len == 0 {
-                    Span::from_bounds(content_start, self.current)
+                    Span::saturating_from_bounds_usize(content_start, self.current)
                } else {
-                    Span::from_parts(content_start, len)
+                    Span::saturating_from_parts_usize(content_start, len)
                };
                Token::Error { content, span }
            }
@ -98,7 +98,7 @@ impl<'db> Lexer<'db> {
            if c == '\r' && self.peek() == '\n' {
                self.consume(); // \n of \r\n
            }
-            let span = Span::from_bounds(self.start, self.current);
+            let span = Span::saturating_from_bounds_usize(self.start, self.current);
            Token::Newline { span }
        } else {
            self.consume(); // Consume the first whitespace
@ -108,7 +108,7 @@ impl<'db> Lexer<'db> {
                }
                self.consume();
            }
-            let span = Span::from_bounds(self.start, self.current);
+            let span = Span::saturating_from_bounds_usize(self.start, self.current);
            Token::Whitespace { span }
        }
    }
@ -130,7 +130,7 @@ impl<'db> Lexer<'db> {

        let text = self.consumed_source_from(text_start);
        let content = TokenContent::new(self.db, text.to_string());
-        let span = Span::from_bounds(self.start, self.current);
+        let span = Span::saturating_from_bounds_usize(self.start, self.current);
        Token::Text { content, span }
    }

--- a/crates/djls-templates/src/lib.rs
+++ b/crates/djls-templates/src/lib.rs
@ -50,15 +50,16 @@ mod error;
 mod lexer;
 pub mod nodelist;
 mod parser;
-mod tokens;
+pub mod tokens;

 pub use db::Db;
 pub use db::TemplateErrorAccumulator;
 use djls_source::File;
 use djls_source::FileKind;
+use djls_source::Span;
 pub use error::TemplateError;
 pub use lexer::Lexer;
-use nodelist::Node;
+pub use nodelist::Node;
 pub use nodelist::NodeList;
 pub use parser::ParseError;
 pub use parser::Parser;
@ -106,7 +107,7 @@ pub fn parse_template(db: &dyn Db, file: File) -> Option<NodeList<'_>> {
            TemplateErrorAccumulator(template_error).accumulate(db);

            let text = source.as_ref();
-            let span = djls_source::Span::from_bounds(0, text.len());
+            let span = Span::saturating_from_bounds_usize(0, text.len());
            let error_node = Node::Error {
                span,
                full_span: span,
--- a/crates/djls-templates/src/nodelist.rs
+++ b/crates/djls-templates/src/nodelist.rs
@ -64,11 +64,11 @@ impl<'db> Node<'db> {
        match self {
            Node::Tag { name, span, .. } => {
                // Just the tag name (e.g., "if" in "{% if user.is_authenticated %}")
-                Some(span.with_length_usize(name.text(db).len()))
+                Some(span.with_length_usize_saturating(name.text(db).len()))
            }
            Node::Variable { var, span, .. } => {
                // Just the variable name (e.g., "user" in "{{ user.name|title }}")
-                Some(span.with_length_usize(var.text(db).len()))
+                Some(span.with_length_usize_saturating(var.text(db).len()))
            }
            Node::Comment { .. } | Node::Text { .. } | Node::Error { .. } => None,
        }
--- a/crates/djls-templates/src/parser.rs
+++ b/crates/djls-templates/src/parser.rs
@ -400,31 +400,31 @@ mod tests {
                Node::Tag { name, bits, span } => TestNode::Tag {
                    name: name.text(db).to_string(),
                    bits: bits.iter().map(|b| b.text(db).to_string()).collect(),
-                    span: span.as_tuple(),
-                    full_span: node.full_span().as_tuple(),
+                    span: span.into(),
+                    full_span: node.full_span().into(),
                },
                Node::Comment { content, span } => TestNode::Comment {
                    content: content.clone(),
-                    span: span.as_tuple(),
-                    full_span: node.full_span().as_tuple(),
+                    span: span.into(),
+                    full_span: node.full_span().into(),
                },
                Node::Text { span } => TestNode::Text {
-                    span: span.as_tuple(),
-                    full_span: node.full_span().as_tuple(),
+                    span: span.into(),
+                    full_span: node.full_span().into(),
                },
                Node::Variable { var, filters, span } => TestNode::Variable {
                    var: var.text(db).to_string(),
                    filters: filters.iter().map(|f| f.text(db).to_string()).collect(),
-                    span: span.as_tuple(),
-                    full_span: node.full_span().as_tuple(),
+                    span: span.into(),
+                    full_span: node.full_span().into(),
                },
                Node::Error {
                    span,
                    full_span,
                    error,
                } => TestNode::Error {
-                    span: span.as_tuple(),
-                    full_span: full_span.as_tuple(),
+                    span: span.into(),
+                    full_span: full_span.into(),
                    error: error.clone(),
                },
            }
--- a/crates/djls-templates/src/tokens.rs
+++ b/crates/djls-templates/src/tokens.rs
@ -132,6 +132,7 @@ impl<'db> Token<'db> {
        }
    }

+    #[must_use]
    pub fn offset(&self) -> Option<u32> {
        match self {
            Token::Block { span, .. }
@ -158,9 +159,10 @@ impl<'db> Token<'db> {
            Token::Whitespace { span, .. } | Token::Newline { span, .. } => span.length_usize(),
            Token::Eof => 0,
        };
-        u32::try_from(len).expect("Token length should fit in u32")
+        u32::try_from(len).unwrap_or(u32::MAX)
    }

+    #[must_use]
    pub fn full_span(&self) -> Option<Span> {
        match self {
            Token::Block { span, .. }
@ -176,6 +178,7 @@ impl<'db> Token<'db> {
        }
    }

+    #[must_use]
    pub fn content_span(&self) -> Option<Span> {
        match self {
            Token::Block { span, .. }
@ -245,40 +248,40 @@ pub enum TokenSnapshot {

 #[cfg(test)]
 impl<'db> Token<'db> {
+    /// ## Panics
+    ///
+    /// This may panic on the `full_span` calls, but it's only used in testing,
+    /// so it's all good.
    pub fn to_snapshot(&self, db: &'db dyn TemplateDb) -> TokenSnapshot {
        match self {
            Token::Block { span, .. } => TokenSnapshot::Block {
                content: self.content(db),
-                span: span.as_tuple(),
-                full_span: self.full_span().unwrap().as_tuple(),
+                span: span.into(),
+                full_span: self.full_span().unwrap().into(),
            },
            Token::Comment { span, .. } => TokenSnapshot::Comment {
                content: self.content(db),
-                span: span.as_tuple(),
-                full_span: self.full_span().unwrap().as_tuple(),
+                span: span.into(),
+                full_span: self.full_span().unwrap().into(),
            },
            Token::Eof => TokenSnapshot::Eof,
            Token::Error { span, .. } => TokenSnapshot::Error {
                content: self.content(db),
-                span: span.as_tuple(),
-                full_span: self.full_span().unwrap().as_tuple(),
-            },
-            Token::Newline { span } => TokenSnapshot::Newline {
-                span: span.as_tuple(),
+                span: span.into(),
+                full_span: self.full_span().unwrap().into(),
            },
+            Token::Newline { span } => TokenSnapshot::Newline { span: span.into() },
            Token::Text { span, .. } => TokenSnapshot::Text {
                content: self.content(db),
-                span: span.as_tuple(),
-                full_span: span.as_tuple(),
+                span: span.into(),
+                full_span: span.into(),
            },
            Token::Variable { span, .. } => TokenSnapshot::Variable {
                content: self.content(db),
-                span: span.as_tuple(),
-                full_span: self.full_span().unwrap().as_tuple(),
-            },
-            Token::Whitespace { span } => TokenSnapshot::Whitespace {
-                span: span.as_tuple(),
+                span: span.into(),
+                full_span: self.full_span().unwrap().into(),
            },
+            Token::Whitespace { span } => TokenSnapshot::Whitespace { span: span.into() },
        }
    }
 }
--- a/crates/djls-workspace/src/document.rs
+++ b/crates/djls-workspace/src/document.rs
@ -32,7 +32,7 @@ pub struct TextDocument {
 impl TextDocument {
    #[must_use]
    pub fn new(content: String, version: i32, language_id: LanguageId) -> Self {
-        let line_index = LineIndex::from_text(&content);
+        let line_index = LineIndex::from(content.as_str());
        Self {
            content,
            version,
@ -99,7 +99,7 @@ impl TextDocument {
        // Fast path: single change without range = full document replacement
        if changes.len() == 1 && changes[0].range.is_none() {
            self.content.clone_from(&changes[0].text);
-            self.line_index = LineIndex::from_text(&self.content);
+            self.line_index = LineIndex::from(self.content.as_str());
            self.version = version;
            return;
        }
@ -128,7 +128,7 @@ impl TextDocument {
            }

            // Rebuild line index to match the new content state
-            new_line_index = LineIndex::from_text(&new_content);
+            new_line_index = LineIndex::from(new_content.as_str());
        }

        // Update all document state at once
@ -147,7 +147,7 @@ impl TextDocument {
        let line_col = djls_source::LineCol::new(position.line, position.character);
        encoding
            .line_col_to_offset(line_index, line_col, text)
-            .map(|offset| offset.offset())
+            .map(|offset| offset.get())
    }
 }

--- a/crates/djls-workspace/src/workspace.rs
+++ b/crates/djls-workspace/src/workspace.rs
@ -341,11 +341,11 @@ mod tests {

        let line_index = file.line_index(&db);
        assert_eq!(
-            line_index.to_line_col(djls_source::ByteOffset::new(0)),
+            line_index.to_line_col(djls_source::Offset::new(0)),
            djls_source::LineCol::new(0, 0)
        );
        assert_eq!(
-            line_index.to_line_col(djls_source::ByteOffset::new(6)),
+            line_index.to_line_col(djls_source::Offset::new(6)),
            djls_source::LineCol::new(1, 0)
        );
    }