slint/internal/core/textlayout.rs

// Copyright © SixtyFPS GmbH <info@slint-ui.com>
// SPDX-License-Identifier: GPL-3.0-only OR LicenseRef-Slint-commercial

//! module for basic text layout
//!
//! The basic algorithm for breaking text into multiple lines:
//! 1. First we determine the boundaries for text shaping. As shaping happens based on a single font and we know that different fonts cater different
//!    writing systems, we split up the text into chunks that maximize our chances of finding a font that covers all glyphs in the chunk. This way for
//!    example arabic text can be covered by a font that has excellent arabic coverage while latin text is rendered using a different font.
//!    Shaping boundaries are always also grapheme boundaries.
//! 2. Then we shape the text at shaping boundaries, to determine the metrics of glyphs and glyph clusters (grapheme boundaries with the shapable)
//! 3. Allocate graphemes into new text lines until all graphemes are consumed:
//! 4. Loop over all graphemes:
//!     Compute the width of the grapheme
//!     Determine if the grapheme is produced by a white space character
//!     If grapheme is not at break opportunity:
//!         Add grapheme to fragment
//!         If width of current line <= available width AND width of current line + fragment > available width:
//!             Emit current line
//!             Current line starts with fragment
//!             Clear fragment
//!         Else:
//!              Continue
//!     Else if break opportunity at grapheme boundary is optional OR if current is space and next is optional:
//!         If width of current line + fragment <= available width:
//!              Add fragment to current line
//!              Clear fragment
//!         Else:
//!              Emit current line
//!              Current line starts with fragment
//!              Clear fragment
//!         Add grapheme to fragment
//!
//!     Else if break opportunity at grapheme boundary is mandatory:
//!         Add fragment to current line
//!         Emit current line
//!         Clear fragment
//!         Add grapheme to fragment
//!

use core::ops::Range;

use alloc::boxed::Box;
use alloc::vec::Vec;

use euclid::num::Zero;

#[derive(Clone, Debug, Default)]
pub struct ShapedGlyph<Length> {
    pub offset_x: Length,
    pub offset_y: Length,
    pub bearing_x: Length,
    pub bearing_y: Length,
    pub width: Length,
    pub height: Length,
    pub advance_x: Length,
    pub glyph_id: Option<core::num::NonZeroU16>,
    pub glyph_cluster_index: u32,
}

pub trait TextShaper {
    type Length: euclid::num::Zero
        + core::ops::AddAssign
        + core::ops::Add<Output = Self::Length>
        + Default
        + Clone
        + Copy
        + core::cmp::PartialOrd;
    fn shape_text<GlyphStorage: core::iter::Extend<ShapedGlyph<Self::Length>>>(
        &self,
        text: &str,
        glyphs: &mut GlyphStorage,
    );
}

pub struct ShapeBoundaries<'a> {
    text: &'a str,
    // TODO: We should do a better analysis to find boundaries for text shaping; including
    // boundaries when the bidi level changes, the script changes or an explicit separator like
    // paragraph/lineseparator/space is encountered.
    chars: core::str::CharIndices<'a>,
    next_boundary_start: Option<usize>,
    last_script: Option<unicode_script::Script>,
}

impl<'a> ShapeBoundaries<'a> {
    pub fn new(text: &'a str) -> Self {
        let chars = text.char_indices();
        let next_boundary_start = if !text.is_empty() { Some(0) } else { None };
        Self { text, chars, next_boundary_start, last_script: None }
    }
}

impl<'a> Iterator for ShapeBoundaries<'a> {
    type Item = Range<usize>;

    fn next(&mut self) -> Option<Self::Item> {
        let start = self.next_boundary_start?;

        use unicode_script::UnicodeScript;
        let (next_offset, script) = loop {
            match self.chars.next() {
                Some((byte_offset, ch)) => {
                    let next_script = ch.script();
                    let previous_script = *self.last_script.get_or_insert(next_script);

                    if next_script == previous_script {
                        continue;
                    }
                    if matches!(
                        next_script,
                        unicode_script::Script::Unknown
                            | unicode_script::Script::Common
                            | unicode_script::Script::Inherited,
                    ) {
                        continue;
                    }

                    break (Some(byte_offset), Some(next_script));
                }
                None => {
                    break (None, None);
                }
            }
        };

        let item = Range { start, end: next_offset.unwrap_or(self.text.len()) };

        self.last_script = script;
        self.next_boundary_start = next_offset;

        Some(item)
    }
}

#[derive(Clone, Default, Debug)]
struct Whitespace<Length: Default + Clone> {
    // size in bytes in the text
    len: usize,
    // width in pixels
    width: Length,
}

#[derive(Clone, Default, Debug)]
pub struct TextLine<Length: Default + Clone> {
    // The range excludes trailing whitespace
    byte_range: Range<usize>,
    trailing_whitespace: Option<Whitespace<Length>>,
    text_width: Length, // with as occupied by the glyphs
}

impl<Length: Default + Copy + Clone + Zero + core::ops::Add<Output = Length>> TextLine<Length> {
    pub fn line_text<'a>(&self, paragraph: &'a str) -> &'a str {
        &paragraph[self.byte_range.clone()]
    }

    pub fn width_including_trailing_whitespace(&self) -> Length {
        self.text_width + self.trailing_whitespace.as_ref().map_or(Length::zero(), |ws| ws.width)
    }
}

#[derive(Clone)]
struct Grapheme<Length> {
    byte_range: Range<usize>,
    width: Length,
    is_whitespace: bool,
}

impl<Length: Clone + Copy + Default + core::ops::AddAssign> TextLine<Length> {
    fn add_grapheme(&mut self, grapheme: &Grapheme<Length>) {
        if self.byte_range.is_empty() {
            if grapheme.is_whitespace {
                return;
            } else {
                self.byte_range.start = grapheme.byte_range.start;
                self.byte_range.end = self.byte_range.start;
            }
        }

        match (self.trailing_whitespace.as_mut(), grapheme.is_whitespace) {
            (Some(existing_trailing_whitespace), true) => {
                existing_trailing_whitespace.len += grapheme.byte_range.len();
                existing_trailing_whitespace.width += grapheme.width;
            }
            (None, true) => {
                self.trailing_whitespace =
                    Some(Whitespace { len: grapheme.byte_range.len(), width: grapheme.width });
            }
            (Some(_), false) => {
                let Whitespace { len: whitespace_len, width: whitespace_width } =
                    self.trailing_whitespace.take().unwrap();

                self.byte_range.end += whitespace_len;
                self.text_width += whitespace_width;
                self.trailing_whitespace = None;

                // There should not be any gaps between the whitespace and the added grapheme
                debug_assert_eq!(self.byte_range.end, grapheme.byte_range.start);
                self.byte_range.end += grapheme.byte_range.len();

                self.text_width += grapheme.width;
            }
            (None, false) => {
                if !self.byte_range.is_empty() {
                    // There should not be any gaps between the whitespace and the added grapheme
                    debug_assert_eq!(self.byte_range.end, grapheme.byte_range.start);
                }
                self.byte_range.end += grapheme.byte_range.len();

                self.text_width += grapheme.width;
            }
        }
    }
    fn add_line(&mut self, candidate: &mut Self) {
        if candidate.byte_range.is_empty() && candidate.trailing_whitespace.is_none() {
            return;
        }

        if self.byte_range.is_empty() && self.trailing_whitespace.is_none() {
            self.byte_range.start = candidate.byte_range.start;
            self.byte_range.end = self.byte_range.start;
        }

        match (self.trailing_whitespace.as_mut(), candidate.trailing_whitespace.as_ref()) {
            (Some(existing_trailing_whitespace), Some(new_trailing_whitespace)) => {
                existing_trailing_whitespace.len += new_trailing_whitespace.len;
                existing_trailing_whitespace.width += new_trailing_whitespace.width;
            }
            (None, Some(new_trailing_whitespace)) => {
                self.trailing_whitespace = Some(new_trailing_whitespace.clone());
            }
            (Some(_), None) => {
                let Whitespace { len: whitespace_len, width: whitespace_width } =
                    self.trailing_whitespace.take().unwrap();
                self.byte_range.end += whitespace_len;
                self.text_width += whitespace_width;
            }
            (None, None) => {}
        }

        self.byte_range.end = candidate.byte_range.end;

        self.text_width += candidate.text_width;
        *candidate = Default::default();
    }
}

struct GraphemeCursor<'a, Font: TextShaper> {
    font: &'a Font,
    shape_boundaries: ShapeBoundaries<'a>,
    current_shapable: Range<usize>,
    glyphs: Vec<ShapedGlyph<Font::Length>>,
    // absolute byte offset in the entire text
    byte_offset: usize,
    glyph_index: usize,
}

impl<'a, Font: TextShaper> GraphemeCursor<'a, Font> {
    fn new(text: &'a str, font: &'a Font) -> Self {
        let mut shape_boundaries = ShapeBoundaries::new(text);

        let current_shapable = shape_boundaries.next().unwrap_or(Range { start: 0, end: 0 });

        let mut glyphs = Vec::new();
        font.shape_text(&text[current_shapable.clone()], &mut glyphs);

        Self { font, shape_boundaries, current_shapable, glyphs, byte_offset: 0, glyph_index: 0 }
    }
}

impl<'a, Font: TextShaper> Iterator for GraphemeCursor<'a, Font> {
    type Item = Grapheme<Font::Length>;

    fn next(&mut self) -> Option<Self::Item> {
        if self.byte_offset >= self.current_shapable.end {
            self.current_shapable = match self.shape_boundaries.next() {
                Some(shapable) => shapable,
                None => return None,
            };
            self.byte_offset = self.current_shapable.start;
            self.glyph_index = 0;
            self.glyphs.clear();
            self.font.shape_text(
                &self.shape_boundaries.text[self.current_shapable.clone()],
                &mut self.glyphs,
            );
        }

        let mut grapheme_width: Font::Length = Font::Length::zero();

        let mut cluster_byte_offset;
        loop {
            let glyph = &self.glyphs[self.glyph_index];
            // Rustybuzz uses a relative byte offset as cluster index
            cluster_byte_offset = self.current_shapable.start + glyph.glyph_cluster_index as usize;
            if cluster_byte_offset != self.byte_offset {
                break;
            }
            grapheme_width += glyph.advance_x;

            self.glyph_index += 1;

            if self.glyph_index >= self.glyphs.len() {
                cluster_byte_offset = self.current_shapable.end;
                break;
            }
        }
        let grapheme_byte_offset = self.byte_offset;
        let grapheme_byte_len = cluster_byte_offset - self.byte_offset;
        let first_char = self.shape_boundaries.text[self.byte_offset..].chars().next();
        let is_whitespace = first_char.map(|ch| ch.is_whitespace()).unwrap_or_default();
        self.byte_offset = cluster_byte_offset;

        Some(Grapheme {
            byte_range: Range {
                start: grapheme_byte_offset,
                end: grapheme_byte_offset + grapheme_byte_len,
            },
            width: grapheme_width,
            is_whitespace,
        })
    }
}

pub struct TextLineBreaker<'a, Font: TextShaper> {
    line_breaks: Box<dyn Iterator<Item = (usize, unicode_linebreak::BreakOpportunity)> + 'a>, // Would be nice to get rid of that Box...
    next_break_opportunity: Option<(usize, unicode_linebreak::BreakOpportunity)>,
    grapheme_cursor: GraphemeCursor<'a, Font>,
    available_width: Option<Font::Length>,
    current_line: TextLine<Font::Length>,
    fragment: TextLine<Font::Length>,
    num_emitted_lines: usize,
}

impl<'a, Font: TextShaper> TextLineBreaker<'a, Font> {
    fn commit_fragment(&mut self) {
        self.current_line.add_line(&mut self.fragment);
    }
    fn current_line_fits(&self) -> bool {
        self.available_width.map_or(true, |available_width| {
            self.current_line.width_including_trailing_whitespace() <= available_width
        })
    }
    fn fragment_fits(&self) -> bool {
        self.available_width.map_or(true, |available_width| {
            self.current_line.width_including_trailing_whitespace()
                + self.fragment.width_including_trailing_whitespace()
                <= available_width
        })
    }

    pub fn new(text: &'a str, font: &'a Font, available_width: Option<Font::Length>) -> Self {
        let mut line_breaks = unicode_linebreak::linebreaks(text);
        let next_break_opportunity = line_breaks.next();

        let grapheme_cursor = GraphemeCursor::new(text, font);

        Self {
            line_breaks: Box::new(line_breaks),
            next_break_opportunity,
            grapheme_cursor,
            available_width,
            current_line: Default::default(),
            fragment: Default::default(),
            num_emitted_lines: 0,
        }
    }
}

impl<'a, Font: TextShaper> Iterator for TextLineBreaker<'a, Font> {
    type Item = TextLine<Font::Length>;

    fn next(&mut self) -> Option<Self::Item> {
        while let Some(grapheme) = self.grapheme_cursor.next() {
            // let ch = self.grapheme_cursor.shape_boundaries.text[grapheme.byte_range.clone()]
            //     .chars()
            //     .next();
            let mut line_to_emit = None;

            match self.next_break_opportunity.as_ref() {
                Some((offset, unicode_linebreak::BreakOpportunity::Mandatory))
                    if *offset == grapheme.byte_range.start
                        || (*offset == grapheme.byte_range.end && grapheme.is_whitespace) =>
                {
                    self.next_break_opportunity = self.line_breaks.next();

                    self.commit_fragment();
                    line_to_emit = Some(core::mem::take(&mut self.current_line));
                    self.fragment.add_grapheme(&grapheme);
                }
                Some((offset, unicode_linebreak::BreakOpportunity::Allowed))
                    if (*offset == grapheme.byte_range.start)
                        || (*offset == grapheme.byte_range.end && grapheme.is_whitespace) =>
                {
                    self.next_break_opportunity = self.line_breaks.next();

                    if self.fragment_fits() {
                        self.commit_fragment();
                    } else {
                        line_to_emit = Some(core::mem::take(&mut self.current_line));
                        self.commit_fragment();
                    }

                    self.fragment.add_grapheme(&grapheme);
                }
                _ => {
                    self.fragment.add_grapheme(&grapheme);

                    if self.current_line_fits() && !self.fragment_fits() {
                        if !self.current_line.byte_range.is_empty() {
                            line_to_emit = Some(core::mem::take(&mut self.current_line));
                        }
                        self.commit_fragment();
                    }
                }
            };

            if line_to_emit.is_some() {
                self.num_emitted_lines += 1;
                return line_to_emit;
            }
        }

        self.commit_fragment();
        if !self.current_line.byte_range.is_empty() || self.num_emitted_lines == 0 {
            self.num_emitted_lines += 1;
            return Some(core::mem::take(&mut self.current_line));
        }

        None
    }
}

// Measures the size of the given text when rendered with the specified font and optionally constrained
// by the provided `max_width`.
// Returns a tuple of the width of the longest line as well as the number of lines.
pub fn text_size<Font: TextShaper>(
    font: &Font,
    text: &str,
    max_width: Option<Font::Length>,
) -> (Font::Length, usize) {
    let mut max_line_width = Font::Length::zero();
    let mut line_count: usize = 0;

    for line in TextLineBreaker::new(text, font, max_width) {
        max_line_width = euclid::approxord::max(max_line_width, line.text_width);
        line_count += 1;
    }

    (max_line_width, line_count)
}

#[test]
fn test_shape_boundaries_simple() {
    {
        let simple_text = "Hello World";
        let mut itemizer = ShapeBoundaries::new(simple_text);
        assert_eq!(itemizer.next().map(|range| &simple_text[range]), Some("Hello World"));
        assert_eq!(itemizer.next(), None);
    }
}

#[test]
fn test_shape_boundaries_empty() {
    {
        let mut itemizer = ShapeBoundaries::new("");
        assert_eq!(itemizer.next(), None);
    }
}

#[test]
fn test_shape_boundaries_script_change() {
    {
        let text = "abc🍌🐒defதோசை.";
        let mut itemizer = ShapeBoundaries::new(text);
        assert_eq!(itemizer.next().map(|range| &text[range]), Some("abc🍌🐒def"));
        assert_eq!(itemizer.next().map(|range| &text[range]), Some("தோசை."));
        assert_eq!(itemizer.next(), None);
    }
}

#[cfg(test)]
mod shape_tests {
    use super::*;

    impl<'a> TextShaper for rustybuzz::Face<'a> {
        type Length = f32;
        fn shape_text<GlyphStorage: std::iter::Extend<ShapedGlyph<Self::Length>>>(
            &self,
            text: &str,
            glyphs: &mut GlyphStorage,
        ) {
            let mut buffer = rustybuzz::UnicodeBuffer::new();
            buffer.push_str(text);
            let glyph_buffer = rustybuzz::shape(self, &[], buffer);

            let output_glyph_generator =
                glyph_buffer.glyph_infos().iter().zip(glyph_buffer.glyph_positions().iter()).map(
                    |(info, position)| {
                        let mut out_glyph = ShapedGlyph::default();

                        out_glyph.glyph_id = core::num::NonZeroU16::new(info.glyph_id as u16);
                        out_glyph.glyph_cluster_index = info.cluster;

                        out_glyph.offset_x = position.x_offset as _;
                        out_glyph.offset_y = position.y_offset as _;
                        out_glyph.advance_x = position.x_advance as _;

                        if let Some(bounding_box) = out_glyph
                            .glyph_id
                            .and_then(|id| self.glyph_bounding_box(ttf_parser::GlyphId(id.get())))
                        {
                            out_glyph.width = bounding_box.width() as _;
                            out_glyph.height = bounding_box.height() as _;
                            out_glyph.bearing_x = bounding_box.x_min as _;
                            out_glyph.bearing_y = bounding_box.y_min as _;
                        }

                        out_glyph
                    },
                );

            // Cannot return impl Iterator, so extend argument instead
            glyphs.extend(output_glyph_generator);
        }
    }

    #[test]
    fn test_shaping() {
        use std::num::NonZeroU16;
        use TextShaper;

        let mut fontdb = fontdb::Database::new();
        let dejavu_path: std::path::PathBuf =
            [env!("CARGO_MANIFEST_DIR"), "..", "backends", "gl", "fonts", "DejaVuSans.ttf"]
                .iter()
                .collect();
        fontdb.load_font_file(dejavu_path).expect("unable to load test dejavu font");
        let font_id = fontdb.faces()[0].id;
        fontdb.with_face_data(font_id, |data, font_index| {
            let face =
                rustybuzz::Face::from_slice(data, font_index).expect("unable to parse dejavu font");

            {
                let mut shaped_glyphs = Vec::new();
                // two glyph clusters: ā́b
                face.shape_text("a\u{0304}\u{0301}b", &mut shaped_glyphs);

                assert_eq!(shaped_glyphs.len(), 3);
                assert_eq!(shaped_glyphs[0].glyph_id, NonZeroU16::new(195));
                assert_eq!(shaped_glyphs[0].glyph_cluster_index, 0);

                assert_eq!(shaped_glyphs[1].glyph_id, NonZeroU16::new(690));
                assert_eq!(shaped_glyphs[1].glyph_cluster_index, 0);

                assert_eq!(shaped_glyphs[2].glyph_id, NonZeroU16::new(69));
                assert_eq!(shaped_glyphs[2].glyph_cluster_index, 5);
            }

            {
                let mut shaped_glyphs = Vec::new();
                // two glyph clusters: ā́b
                face.shape_text("a b", &mut shaped_glyphs);

                assert_eq!(shaped_glyphs.len(), 3);
                assert_eq!(shaped_glyphs[0].glyph_id, NonZeroU16::new(68));
                assert_eq!(shaped_glyphs[0].glyph_cluster_index, 0);

                assert_eq!(shaped_glyphs[1].glyph_cluster_index, 1);

                assert_eq!(shaped_glyphs[2].glyph_id, NonZeroU16::new(69));
                assert_eq!(shaped_glyphs[2].glyph_cluster_index, 2);
            }
        });
    }
}

#[cfg(test)]
mod linebreak_tests {
    use super::*;

    // All glyphs are 10 pixels wide, break on ascii rules
    struct FixedTestFont;

    impl TextShaper for FixedTestFont {
        type Length = f32;
        fn shape_text<GlyphStorage: std::iter::Extend<ShapedGlyph<Self::Length>>>(
            &self,
            text: &str,
            glyphs: &mut GlyphStorage,
        ) {
            for (byte_offset, _) in text.char_indices() {
                let out_glyph = ShapedGlyph {
                    offset_x: 0.,
                    offset_y: 0.,
                    bearing_x: 0.,
                    bearing_y: 0.,
                    width: 10.,
                    height: 10.,
                    advance_x: 10.,
                    glyph_id: None,
                    glyph_cluster_index: byte_offset as u32,
                };
                glyphs.extend(core::iter::once(out_glyph));
            }
        }
    }

    #[test]
    fn test_empty_line_break() {
        let font = FixedTestFont;
        let text = "";
        let lines = TextLineBreaker::new(text, &font, Some(50.)).collect::<Vec<_>>();
        assert_eq!(lines.len(), 1);
        assert_eq!(lines[0].line_text(&text), "");
    }

    #[test]
    fn test_basic_line_break() {
        let font = FixedTestFont;
        let text = "Hello World";
        let lines = TextLineBreaker::new(text, &font, Some(50.)).collect::<Vec<_>>();
        assert_eq!(lines.len(), 2);
        assert_eq!(lines[0].line_text(&text), "Hello");
        assert_eq!(lines[1].line_text(&text), "World");
    }

    #[test]
    fn test_linebreak_trailing_space() {
        let font = FixedTestFont;
        let text = "Hello              ";
        let lines = TextLineBreaker::new(text, &font, Some(50.)).collect::<Vec<_>>();
        assert_eq!(lines.len(), 1);
        assert_eq!(lines[0].line_text(&text), "Hello");
    }

    #[test]
    fn test_forced_break() {
        let font = FixedTestFont;
        let text = "Hello\nWorld";
        let lines = TextLineBreaker::new(text, &font, None).collect::<Vec<_>>();
        assert_eq!(lines.len(), 2);
        assert_eq!(lines[0].line_text(&text), "Hello");
        assert_eq!(lines[1].line_text(&text), "World");
    }

    #[test]
    fn test_forced_break_multi() {
        let font = FixedTestFont;
        let text = "Hello\n\n\nWorld";
        let lines = TextLineBreaker::new(text, &font, None).collect::<Vec<_>>();
        assert_eq!(lines.len(), 4);
        assert_eq!(lines[0].line_text(&text), "Hello");
        assert_eq!(lines[1].line_text(&text), "");
        assert_eq!(lines[2].line_text(&text), "");
        assert_eq!(lines[3].line_text(&text), "World");
    }

    #[test]
    fn test_nbsp_break() {
        let font = FixedTestFont;
        let text = "Hello\u{00a0}World";
        let lines = TextLineBreaker::new(text, &font, Some(50.)).collect::<Vec<_>>();
        assert_eq!(lines.len(), 1);
        assert_eq!(lines[0].line_text(&text), "Hello\u{00a0}World");
    }

    #[test]
    fn test_single_line_multi_break_opportunity() {
        let font = FixedTestFont;
        let text = "a b c";
        let lines = TextLineBreaker::new(text, &font, None).collect::<Vec<_>>();
        assert_eq!(lines.len(), 1);
        assert_eq!(lines[0].line_text(&text), "a b c");
    }
}