salsa/print.html

<!DOCTYPE HTML>
<html lang="en" class="sidebar-visible no-js light">
    <head>
        <!-- Book generated using mdBook -->
        <meta charset="UTF-8">
        <title>Salsa</title>
                <meta name="robots" content="noindex" />


        <!-- Custom HTML head -->


        <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
        <meta name="description" content="">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <meta name="theme-color" content="#ffffff" />

                <link rel="icon" href="favicon.svg">
                        <link rel="shortcut icon" href="favicon.png">
                <link rel="stylesheet" href="css/variables.css">
        <link rel="stylesheet" href="css/general.css">
        <link rel="stylesheet" href="css/chrome.css">
                <link rel="stylesheet" href="css/print.css" media="print">

        <!-- Fonts -->
        <link rel="stylesheet" href="FontAwesome/css/font-awesome.css">
                <link rel="stylesheet" href="fonts/fonts.css">

        <!-- Highlight.js Stylesheets -->
        <link rel="stylesheet" href="highlight.css">
        <link rel="stylesheet" href="tomorrow-night.css">
        <link rel="stylesheet" href="ayu-highlight.css">

        <!-- Custom theme stylesheets -->
                <link rel="stylesheet" href="mermaid.css">

            </head>
    <body>
        <!-- Provide site root to javascript -->
        <script type="text/javascript">
            var path_to_root = "";
            var default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? "navy" : "light";
        </script>

        <!-- Work around some values being stored in localStorage wrapped in quotes -->
        <script type="text/javascript">
            try {
                var theme = localStorage.getItem('mdbook-theme');
                var sidebar = localStorage.getItem('mdbook-sidebar');

                if (theme.startsWith('"') && theme.endsWith('"')) {
                    localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
                }

                if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
                    localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
                }
            } catch (e) { }
        </script>

        <!-- Set the theme before any content is loaded, prevents flash -->
        <script type="text/javascript">
            var theme;
            try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
            if (theme === null || theme === undefined) { theme = default_theme; }
            var html = document.querySelector('html');
            html.classList.remove('no-js')
            html.classList.remove('light')
            html.classList.add(theme);
            html.classList.add('js');
        </script>

        <!-- Hide / unhide sidebar before it is displayed -->
        <script type="text/javascript">
            var html = document.querySelector('html');
            var sidebar = 'hidden';
            if (document.body.clientWidth >= 1080) {
                try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
                sidebar = sidebar || 'visible';
            }
            html.classList.remove('sidebar-visible');
            html.classList.add("sidebar-" + sidebar);
        </script>

        <nav id="sidebar" class="sidebar" aria-label="Table of contents">
            <div class="sidebar-scrollbox">
                <ol class="chapter"><li class="chapter-item expanded "><a href="about_salsa.html"><strong aria-hidden="true">1.</strong> About salsa</a></li><li class="chapter-item expanded affix "><li class="part-title">How to use Salsa</li><li class="chapter-item expanded "><a href="overview.html"><strong aria-hidden="true">2.</strong> Overview</a></li><li class="chapter-item expanded "><a href="tutorial.html"><strong aria-hidden="true">3.</strong> Tutorial: calc language</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="tutorial/structure.html"><strong aria-hidden="true">3.1.</strong> Basic structure</a></li><li class="chapter-item expanded "><a href="tutorial/jar.html"><strong aria-hidden="true">3.2.</strong> Jars and databases</a></li><li class="chapter-item expanded "><a href="tutorial/db.html"><strong aria-hidden="true">3.3.</strong> Defining the database struct</a></li><li class="chapter-item expanded "><a href="tutorial/ir.html"><strong aria-hidden="true">3.4.</strong> Defining the IR: the various &quot;salsa structs&quot;</a></li><li class="chapter-item expanded "><a href="tutorial/parser.html"><strong aria-hidden="true">3.5.</strong> Defining the parser: memoized functions and inputs</a></li><li class="chapter-item expanded "><a href="tutorial/accumulators.html"><strong aria-hidden="true">3.6.</strong> Defining the parser: reporting errors</a></li><li class="chapter-item expanded "><a href="tutorial/debug.html"><strong aria-hidden="true">3.7.</strong> Defining the parser: debug impls and testing</a></li><li class="chapter-item expanded "><a href="tutorial/checker.html"><strong aria-hidden="true">3.8.</strong> Defining the checker</a></li><li class="chapter-item expanded "><a href="tutorial/interpreter.html"><strong aria-hidden="true">3.9.</strong> Defining the interpreter</a></li></ol></li><li class="chapter-item expanded "><a href="reference.html"><strong aria-hidden="true">4.</strong> Reference</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="reference/algorithm.html"><strong aria-hidden="true">4.1.</strong> Algorithm</a></li></ol></li><li class="chapter-item expanded "><a href="common_patterns.html"><strong aria-hidden="true">5.</strong> Common patterns</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="common_patterns/selection.html"><strong aria-hidden="true">5.1.</strong> Selection</a></li><li class="chapter-item expanded "><a href="common_patterns/on_demand_inputs.html"><strong aria-hidden="true">5.2.</strong> On-demand (Lazy) inputs</a></li></ol></li><li class="chapter-item expanded "><a href="tuning.html"><strong aria-hidden="true">6.</strong> Tuning</a></li><li class="chapter-item expanded "><a href="cycles.html"><strong aria-hidden="true">7.</strong> Cycle handling</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="cycles/fallback.html"><strong aria-hidden="true">7.1.</strong> Recovering via fallback</a></li></ol></li><li class="chapter-item expanded "><li class="part-title">How Salsa works internally</li><li class="chapter-item expanded "><a href="how_salsa_works.html"><strong aria-hidden="true">8.</strong> How Salsa works</a></li><li class="chapter-item expanded "><a href="videos.html"><strong aria-hidden="true">9.</strong> Videos</a></li><li class="chapter-item expanded "><a href="plumbing.html"><strong aria-hidden="true">10.</strong> Plumbing</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="plumbing/jars_and_ingredients.html"><strong aria-hidden="true">10.1.</strong> Jars and ingredients</a></li><li class="chapter-item expanded "><a href="plumbing/database_and_runtime.html"><strong aria-hidden="true">10.2.</strong> Databases and runtime</a></li><li class="chapter-item expanded "><a href="plumbing/query_ops.html"><strong aria-hidden="true">10.3.</strong> Query operations</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="plumbing/maybe_changed_after.html"><strong aria-hidden="true">10.3.1.</strong> maybe changed after</a></li><li class="chapter-item expanded "><a href="plumbing/fetch.html"><strong aria-hidden="true">10.3.2.</strong> Fetch</a></li><li class="chapter-item expanded "><a href="plumbing/derived_flowchart.html"><strong aria-hidden="true">10.3.3.</strong> Derived queries flowchart</a></li><li class="chapter-item expanded "><a href="plumbing/cycles.html"><strong aria-hidden="true">10.3.4.</strong> Cycle handling</a></li></ol></li><li class="chapter-item expanded "><a href="plumbing/terminology.html"><strong aria-hidden="true">10.4.</strong> Terminology</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="plumbing/terminology/backdate.html"><strong aria-hidden="true">10.4.1.</strong> Backdate</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/changed_at.html"><strong aria-hidden="true">10.4.2.</strong> Changed at</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/dependency.html"><strong aria-hidden="true">10.4.3.</strong> Dependency</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/derived_query.html"><strong aria-hidden="true">10.4.4.</strong> Derived query</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/durability.html"><strong aria-hidden="true">10.4.5.</strong> Durability</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/input_query.html"><strong aria-hidden="true">10.4.6.</strong> Input query</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/ingredient.html"><strong aria-hidden="true">10.4.7.</strong> Ingredient</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/LRU.html"><strong aria-hidden="true">10.4.8.</strong> LRU</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/memo.html"><strong aria-hidden="true">10.4.9.</strong> Memo</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/query.html"><strong aria-hidden="true">10.4.10.</strong> Query</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/query_function.html"><strong aria-hidden="true">10.4.11.</strong> Query function</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/revision.html"><strong aria-hidden="true">10.4.12.</strong> Revision</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/salsa_item.html"><strong aria-hidden="true">10.4.13.</strong> Salsa item</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/salsa_struct.html"><strong aria-hidden="true">10.4.14.</strong> Salsa struct</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/untracked.html"><strong aria-hidden="true">10.4.15.</strong> Untracked dependency</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/verified.html"><strong aria-hidden="true">10.4.16.</strong> Verified</a></li></ol></li></ol></li><li class="chapter-item expanded "><li class="part-title">Salsa RFCs</li><li class="chapter-item expanded "><a href="rfcs.html"><strong aria-hidden="true">11.</strong> RFCs</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="rfcs/template.html"><strong aria-hidden="true">11.1.</strong> Template</a></li><li class="chapter-item expanded "><a href="rfcs/RFC0001-Query-Group-Traits.html"><strong aria-hidden="true">11.2.</strong> RFC 0001: Query group traits</a></li><li class="chapter-item expanded "><a href="rfcs/RFC0002-Intern-Queries.html"><strong aria-hidden="true">11.3.</strong> RFC 0002: Intern queries</a></li><li class="chapter-item expanded "><a href="rfcs/RFC0003-Query-Dependencies.html"><strong aria-hidden="true">11.4.</strong> RFC 0003: Query dependencies</a></li><li class="chapter-item expanded "><a href="rfcs/RFC0004-LRU.html"><strong aria-hidden="true">11.5.</strong> RFC 0004: LRU</a></li><li class="chapter-item expanded "><a href="rfcs/RFC0005-Durability.html"><strong aria-hidden="true">11.6.</strong> RFC 0005: Durability</a></li><li class="chapter-item expanded "><a href="rfcs/RFC0006-Dynamic-Databases.html"><strong aria-hidden="true">11.7.</strong> RFC 0006: Dynamic database</a></li><li class="chapter-item expanded "><a href="rfcs/RFC0007-Opinionated-Cancelation.html"><strong aria-hidden="true">11.8.</strong> RFC 0007: Opinionated cancelation</a></li><li class="chapter-item expanded "><a href="rfcs/RFC0008-Remove-Garbage-Collection.html"><strong aria-hidden="true">11.9.</strong> RFC 0008: Remove garbage collection</a></li><li class="chapter-item expanded "><a href="rfcs/RFC0009-Cycle-recovery.html"><strong aria-hidden="true">11.10.</strong> RFC 0009: Cycle recovery</a></li><li class="chapter-item expanded "><a href="rfcs/RFC0010-Slot-no-more.html"><strong aria-hidden="true">11.11.</strong> RFC 0010: Slot no more</a></li></ol></li><li class="chapter-item expanded "><li class="part-title">Appendices</li><li class="chapter-item expanded "><a href="meta.html"><strong aria-hidden="true">12.</strong> Meta: about the book itself</a></li></ol>            </div>
            <div id="sidebar-resize-handle" class="sidebar-resize-handle"></div>
        </nav>

        <div id="page-wrapper" class="page-wrapper">

            <div class="page">

                <div id="menu-bar-hover-placeholder"></div>
                <div id="menu-bar" class="menu-bar sticky bordered">
                    <div class="left-buttons">
                        <button id="sidebar-toggle" class="icon-button" type="button" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
                            <i class="fa fa-bars"></i>
                        </button>
                        <button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
                            <i class="fa fa-paint-brush"></i>
                        </button>
                        <ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
                            <li role="none"><button role="menuitem" class="theme" id="light">Light (default)</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
                            <li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
                        </ul>
                                                <button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
                            <i class="fa fa-search"></i>
                        </button>
                                            </div>

                    <h1 class="menu-title">Salsa</h1>

                    <div class="right-buttons">
                                                <a href="print.html" title="Print this book" aria-label="Print this book">
                            <i id="print-button" class="fa fa-print"></i>
                        </a>

                    </div>
                </div>

                                <div id="search-wrapper" class="hidden">
                    <form id="searchbar-outer" class="searchbar-outer">
                        <input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
                    </form>
                    <div id="searchresults-outer" class="searchresults-outer hidden">
                        <div id="searchresults-header" class="searchresults-header"></div>
                        <ul id="searchresults">
                        </ul>
                    </div>
                </div>

                <!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
                <script type="text/javascript">
                    document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
                    document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
                    Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
                        link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
                    });
                </script>

                <div id="content" class="content">
                    <main>
                        <h1 id="about-salsa"><a class="header" href="#about-salsa">About salsa</a></h1>
<p>Salsa is a Rust framework for writing incremental, on-demand programs
-- these are programs that want to adapt to changes in their inputs,
continuously producing a new output that is up-to-date. Salsa is based
on the the incremental recompilation techniques that we built for
rustc, and many (but not all) of its users are building compilers or
other similar tooling.</p>
<p>If you'd like to learn more about Salsa, check out:</p>
<ul>
<li>The <a href="./overview.html">overview</a>, for a brief summary.</li>
<li>The <a href="./tutorial.html">tutorial</a>, for a detailed look.</li>
<li>You can also watch some of our <a href="./videos.html">videos</a>, though the content there is rather out of date.</li>
</ul>
<p>If you'd like to chat about Salsa, or you think you might like to
contribute, please jump on to our Zulip instance at
<a href="https://salsa.zulipchat.com/">salsa.zulipchat.com</a>.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="salsa-overview"><a class="header" href="#salsa-overview">Salsa overview</a></h1>
<blockquote>
<p>⚠️ <strong>IN-PROGRESS VERSION OF SALSA.</strong> ⚠️</p>
<p>This page describes the unreleased &quot;Salsa 2022&quot; version, which is a major departure from older versions of salsa. The code here works but is only available on github and from the <code>salsa-2022</code> crate.</p>
</blockquote>
<p>This page contains a brief overview of the pieces of a salsa program.
For a more detailed look, check out the <a href="./tutorial.html">tutorial</a>, which walks through the creation of an entire project end-to-end.</p>
<h2 id="goal-of-salsa"><a class="header" href="#goal-of-salsa">Goal of Salsa</a></h2>
<p>The goal of salsa is to support efficient <strong>incremental recomputation</strong>.
salsa is used in rust-analyzer, for example, to help it recompile your program quickly as you type.</p>
<p>The basic idea of a salsa program is like this:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let mut input = ...;
loop {
    let output = your_program(&amp;input);
    modify(&amp;mut input);
}
<span class="boring">}
</span></code></pre></pre>
<p>You start out with an input that has some value.
You invoke your program to get back a result.
Some time later, you modify the input and invoke your program again.
<strong>Our goal is to make this second call faster by re-using some of the results from the first call.</strong></p>
<p>In reality, of course, you can have many inputs and &quot;your program&quot; may be many different methods and functions defined on those inputs.
But this picture still conveys a few important concepts:</p>
<ul>
<li>Salsa separates out the &quot;incremental computation&quot; (the function <code>your_program</code>) from some outer loop that is defining the inputs.</li>
<li>Salsa gives you the tools to define <code>your_program</code>.</li>
<li>Salsa assumes that <code>your_program</code> is a purely deterministic function of its inputs, or else this whole setup makes no sense.</li>
<li>The mutation of inputs always happens outside of <code>your_program</code>, as part of this master loop.</li>
</ul>
<h2 id="database"><a class="header" href="#database">Database</a></h2>
<p>Each time you run your program, salsa remembers the values of each computation in a <strong>database</strong>.
When the inputs change, it consults this database to look for values that can be reused.
The database is also used to implement interning (making a canonical version of a value that can be copied around and cheaply compared for equality) and other convenient salsa features.</p>
<h2 id="inputs"><a class="header" href="#inputs">Inputs</a></h2>
<p>Every Salsa program begins with an <strong>input</strong>.
Inputs are special structs that define the starting point of your program.
Everything else in your program is ultimately a deterministic function of these inputs.</p>
<p>For example, in a compiler, there might be an input defining the contents of a file on disk:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::input]
pub struct ProgramFile {
    pub path: PathBuf,
    pub contents: String,
}
<span class="boring">}
</span></code></pre></pre>
<p>You create an input by using the <code>new</code> method.
Because the values of input fields are stored in the database, you also give an <code>&amp;mut</code>-reference to the database:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let file: ProgramFile = ProgramFile::new(
    &amp;mut db,
    PathBuf::from(&quot;some_path.txt&quot;),
    String::from(&quot;fn foo() { }&quot;),
);
<span class="boring">}
</span></code></pre></pre>
<h3 id="salsa-structs-are-just-an-integer"><a class="header" href="#salsa-structs-are-just-an-integer">Salsa structs are just an integer</a></h3>
<p>The <code>ProgramFile</code> struct generates by the <code>salsa::input</code> macro doesn't actually store any data. It's just a newtyped integer id:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>// Generated by the `#[salsa::input]` macro:
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ProgramFile(salsa::Id);
<span class="boring">}
</span></code></pre></pre>
<p>This means that, when you have a <code>ProgramFile</code>, you can easily copy it around and put it wherever you like.
To actually read any of its fields, however, you will need to use the database and a getter method.</p>
<h3 id="reading-fields-and-return_ref"><a class="header" href="#reading-fields-and-return_ref">Reading fields and <code>return_ref</code></a></h3>
<p>You can access the value of an input's fields by using the getter method.
As this is only reading the field, it just needs a <code>&amp;</code>-reference to the database:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let contents: String = file.contents(&amp;db);
<span class="boring">}
</span></code></pre></pre>
<p>Invoking the accessor clones the value from the database.
Sometimes this is not what you want, so you can annotate fields with <code>#[return_ref]</code> to indicate that they should return a reference into the database instead:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::input]
pub struct ProgramFile {
    pub path: PathBuf,
    #[return_ref]
    pub contents: String,
}
<span class="boring">}
</span></code></pre></pre>
<p>Now <code>file.contents(&amp;db)</code> will return an <code>&amp;String</code>.</p>
<p>You can also use the <code>data</code> method to access the entire struct:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>file.data(&amp;db)
<span class="boring">}
</span></code></pre></pre>
<h3 id="writing-input-fields"><a class="header" href="#writing-input-fields">Writing input fields</a></h3>
<p>Finally, you can also modify the value of an input field by using the setter method.
Since this is modifying the input, the setter takes an <code>&amp;mut</code>-reference to the database:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>file.set_contents(&amp;mut db, String::from(&quot;fn foo() { /* add a comment */ }&quot;));
<span class="boring">}
</span></code></pre></pre>
<h2 id="tracked-functions"><a class="header" href="#tracked-functions">Tracked functions</a></h2>
<p>Once you've defined your inputs, the next thing to define are <strong>tracked functions</strong>:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::tracked]
fn parse_file(db: &amp;dyn crate::Db, file: ProgramFile) -&gt; Ast {
    let contents: &amp;str = file.contents(db);
    ...
}
<span class="boring">}
</span></code></pre></pre>
<p>When you call a tracked function, salsa will track which inputs it accesses (in this example, <code>file.contents(db)</code>).
It will also memoize the return value (the <code>Ast</code>, in this case).
If you call a tracked function twice, salsa checks if the inputs have changed; if not, it can return the memoized value.
The algorithm salsa uses to decide when a tracked function needs to be re-executed is called the <a href="./reference/algorithm.html">red-green algorithm</a>, and it's where the name salsa comes from.</p>
<p>Tracked functions have to follow a particular structure:</p>
<ul>
<li>They must take a <code>&amp;</code>-reference to the database as their first argument.
<ul>
<li>Note that because this is an <code>&amp;</code>-reference, it is not possible to create or modify inputs during a tracked function!</li>
</ul>
</li>
<li>They must take a &quot;salsa struct&quot; as the second argument -- in our example, this is an input struct, but there are other kinds of salsa structs we'll describe shortly.</li>
<li>They <em>can</em> take additional arguments, but it's faster and better if they don't.</li>
</ul>
<p>Tracked functions can return any clone-able type. A clone is required since, when the value is cached, the result will be cloned out of the database. Tracked functions can also be annotated with <code>#[return_ref]</code> if you would prefer to return a reference into the database instead (if <code>parse_file</code> were so annotated, then callers would actually get back an <code>&amp;Ast</code>, for example).</p>
<h2 id="tracked-structs"><a class="header" href="#tracked-structs">Tracked structs</a></h2>
<p><strong>Tracked structs</strong> are intermediate structs created during your computation.
Like inputs, their fields are stored inside the database, and the struct itself just wraps an id.
Unlike inputs, they can only be created inside a tracked function, and their fields can never change once they are created.
Getter methods are provided to read the fields, but there are no setter methods<sup class="footnote-reference"><a href="#specify">1</a></sup>. Example:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::tracked]
struct Ast {
    #[return_ref]
    top_level_items: Vec&lt;Item&gt;,
}
<span class="boring">}
</span></code></pre></pre>
<p>Just as with an input, new values are created by invoking <code>Ast::new</code>.
Unlike with an input, the <code>new</code> for a tracked struct only requires a <code>&amp;</code>-reference to the database:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::tracked]
fn parse_file(db: &amp;dyn crate::Db, file: ProgramFile) -&gt; Ast {
    let contents: &amp;str = file.contents(db);
    let parser = Parser::new(contents);
    let mut top_level_items = vec![];
    while let Some(item) = parser.parse_top_level_item() {
        top_level_items.push(item);
    }
    Ast::new(db, top_level_items) // &lt;-- create an Ast!
}
<span class="boring">}
</span></code></pre></pre>
<h3 id="id-fields"><a class="header" href="#id-fields"><code>#[id]</code> fields</a></h3>
<p>When a tracked function is re-executed because its inputs have changed, the tracked structs it creates in the new execution are matched against those from the old execution, and the values of their fields are compared.
If the field values have not changed, then other tracked functions that only read those fields will not be re-executed.</p>
<p>Normally, tracked structs are matched up by the order in which they are created.
For example, the first <code>Ast</code> that is created by <code>parse_file</code> in the old execution will be matched against the first <code>Ast</code> created by <code>parse_file</code> in the new execution.
In our example, <code>parse_file</code> only ever creates a single <code>Ast</code>, so this works great.
Sometimes, however, it doesn't work so well.
For example, imagine that we had a tracked struct for items in the file:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::tracked]
struct Item {
    name: Word, // we'll define Word in a second!
    ...
}
<span class="boring">}
</span></code></pre></pre>
<p>Maybe our parser first creates an <code>Item</code> with the name <code>foo</code> and then later a second <code>Item</code> with the name <code>bar</code>.
Then the user changes the input to reorder the functions.
Although we are still creating the same number of items, we are now creating them in the reverse order, so the naive algorithm will match up the <em>old</em> <code>foo</code> struct with the new <code>bar</code> struct.
This will look to salsa as though the <code>foo</code> function was renamed to <code>bar</code> and the <code>bar</code> function was renamed to <code>foo</code>.
We'll still get the right result, but we might do more recomputation than we needed to do if we understood that they were just reordered.</p>
<p>To address this, you can tag fields in a tracked struct as <code>#[id]</code>. These fields are then used to &quot;match up&quot; struct instances across executions:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::tracked]
struct Item {
    #[id]
    name: Word, // we'll define Word in a second!
    ...
}
<span class="boring">}
</span></code></pre></pre>
<h3 id="specified-the-result-of-tracked-functions-for-particular-structs"><a class="header" href="#specified-the-result-of-tracked-functions-for-particular-structs">Specified the result of tracked functions for particular structs</a></h3>
<p>Sometimes it is useful to define a tracked function but specify its value for some particular struct specially.
For example, maybe the default way to compute the representation for a function is to read the AST, but you also have some built-in functions in your language and you want to hard-code their results.
This can also be used to simulate a field that is initialized after the tracked struct is created.</p>
<p>To support this use case, you can use the <code>specify</code> method associated with tracked functions.
To enable this method, you need to add the <code>specify</code> flag to the function to alert users that its value may sometimes be specified externally.</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::tracked(specify)] // &lt;-- specify flag required
fn representation(db: &amp;dyn crate::Db, item: Item) -&gt; Representation {
    // read the user's input AST by default
    let ast = ast(db, item);
    // ...
}

fn create_builtin_item(db: &amp;dyn crate::Db) -&gt; Item {
    let i = Item::new(db, ...);
    let r = hardcoded_representation();
    representation::specify(db, i, r); // &lt;-- use the method!
    i
}
<span class="boring">}
</span></code></pre></pre>
<p>Specifying is only possible for tracked functions that take a single tracked struct as argument (besides the database).</p>
<h2 id="interned-structs"><a class="header" href="#interned-structs">Interned structs</a></h2>
<p>The final kind of salsa struct are <strong>interned structs</strong>.
Interned structs are useful for quick equality comparison.
They are commonly used to represent strings or other primitive values.</p>
<p>Most compilers, for example, will define a type to represent a user identifier:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::interned]
struct Word {
    #[return_ref]
    pub text: String,
}
<span class="boring">}
</span></code></pre></pre>
<p>As with input and tracked structs, the <code>Word</code> struct itself is just a newtyped integer, and the actual data is stored in the database.</p>
<p>You can create a new interned struct using <code>new</code>, just like with input and tracked structs:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let w1 = Word::new(db, &quot;foo&quot;.to_string());
let w2 = Word::new(db, &quot;bar&quot;.to_string());
let w3 = Word::new(db, &quot;foo&quot;.to_string());
<span class="boring">}
</span></code></pre></pre>
<p>When you create two interned structs with the same field values, you are guaranted to get back the same integer id. So here, we know that <code>assert_eq!(w1, w3)</code> is true and <code>assert_ne!(w1, w2)</code>.</p>
<p>You can access the fields of an interned struct using a getter, like <code>word.text(db)</code>. These getters respect the <code>#[return_ref]</code> annotation. Like tracked structs, the fields of interned structs are immutable.</p>
<h2 id="accumulators"><a class="header" href="#accumulators">Accumulators</a></h2>
<p>The final salsa concept are <strong>accumulators</strong>. Accumulators are a way to report errors or other &quot;side channel&quot; information that is separate from the main return value of your function.</p>
<p>To create an accumulator, you declare a type as an <em>accumulator</em>:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::accumulator]
pub struct Diagnostics(String);
<span class="boring">}
</span></code></pre></pre>
<p>It must be a newtype of something, like <code>String</code>. Now, during a tracked function's execution, you can push those values:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>Diagnostics::push(db, &quot;some_string&quot;.to_string())
<span class="boring">}
</span></code></pre></pre>
<p>Then later, from outside the execution, you can ask for the set of diagnostics that were accumulated by some particular tracked function. For example, imagine that we have a type-checker and, during type-checking, it reports some diagnostics:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::tracked]
fn type_check(db: &amp;dyn Db, item: Item) {
    // ...
    Diagnostics::push(db, &quot;some error message&quot;.to_string())
    // ...
}
<span class="boring">}
</span></code></pre></pre>
<p>we can then later invoke the associated <code>accumulated</code> function to get all the <code>String</code> values that were pushed:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let v: Vec&lt;String&gt; = type_check::accumulated::&lt;Diagnostics&gt;(db);
<span class="boring">}
</span></code></pre></pre>
<div style="break-before: page; page-break-before: always;"></div><h1 id="tutorial-calc"><a class="header" href="#tutorial-calc">Tutorial: calc</a></h1>
<blockquote>
<p>⚠️ <strong>IN-PROGRESS VERSION OF SALSA.</strong> ⚠️</p>
<p>This page describes the unreleased &quot;Salsa 2022&quot; version, which is a major departure from older versions of salsa. The code here works but is only available on github and from the <code>salsa-2022</code> crate.</p>
</blockquote>
<p>This tutorial walks through an end-to-end example of using Salsa.
It does not assume you know anything about salsa,
but reading the <a href="./overview.html">overview</a> first is probably a good idea to get familiar with the basic concepts.</p>
<p>Our goal is define a compiler/interpreter for a simple language called <code>calc</code>.
The <code>calc</code> compiler takes programs like the following and then parses and executes them:</p>
<pre><code>fn area_rectangle(w, h) = w * h
fn area_circle(r) = 3.14 * r * r
print area_rectangle(3, 4)
print area_circle(1)
print 11 * 2
</code></pre>
<p>When executed, this program prints <code>12</code>, <code>3.14</code>, and <code>22</code>.</p>
<p>If the program contains errors (e.g., a reference to an undefined function), it prints those out too.
And, of course, it will be reactive, so small changes to the input don't require recompiling (or rexecuting, necessarily) the entire thing.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="basic-structure"><a class="header" href="#basic-structure">Basic structure</a></h1>
<p>Before we do anything with salsa, let's talk about the basic structure of the calc compiler.
Part of salsa's design is that you are able to write programs that feel 'pretty close' to what a natural Rust program looks like.</p>
<h2 id="example-program"><a class="header" href="#example-program">Example program</a></h2>
<p>This is our example calc program:</p>
<pre><code>x = 5
y = 10
z = x + y * 3
print z
</code></pre>
<h2 id="parser"><a class="header" href="#parser">Parser</a></h2>
<p>The calc compiler takes as input a program, represented by a string:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>struct ProgramSource {
    text: String
}
<span class="boring">}
</span></code></pre></pre>
<p>The first thing it does it to parse that string into a series of statements that look something like the following pseudo-Rust:<sup class="footnote-reference"><a href="#lexer">1</a></sup></p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>enum Statement {
    /// Defines `fn &lt;name&gt;(&lt;args&gt;) = &lt;body&gt;`
    Function(Function),
    /// Defines `print &lt;expr&gt;`
    Print(Expression),
}

    /// Defines `fn &lt;name&gt;(&lt;args&gt;) = &lt;body&gt;`
struct Function {
    name: FunctionId,
    args: Vec&lt;VariableId&gt;,
    body: Expression
}
<span class="boring">}
</span></code></pre></pre>
<p>where an expression is something like this (pseudo-Rust, because the <code>Expression</code> enum is recursive):</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>enum Expression {
    Op(Expression, Op, Expression),
    Number(f64),
    Variable(VariableId),
    Call(FunctionId, Vec&lt;Expression&gt;),
}

enum Op {
    Add,
    Subtract,
    Multiply,
    Divide,
}
<span class="boring">}
</span></code></pre></pre>
<p>Finally, for function/variable names, the <code>FunctionId</code> and <code>VariableId</code> types will be interned strings:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>type FunctionId = /* interned string */;
type VariableId = /* interned string */;
<span class="boring">}
</span></code></pre></pre>
<div class="footnote-definition" id="lexer"><sup class="footnote-definition-label">1</sup>
<p>Because calc is so simple, we don't have to bother separating out the lexer from the parser.</p>
</div>
<h2 id="checker"><a class="header" href="#checker">Checker</a></h2>
<p>The &quot;checker&quot; has the job of ensuring that the user only references variables that have been defined.
We're going to write the checker in a &quot;context-less&quot; style,
which is a bit less intuitive but allows for more incremental re-use.
The idea is to compute, for a given expression, which variables it references.
Then there is a function &quot;check&quot; which ensures that those variables are a subset of those that are already defined.</p>
<h2 id="interpreter"><a class="header" href="#interpreter">Interpreter</a></h2>
<p>The interpreter will execute the program and print the result. We don't bother with much incremental re-use here,
though it's certainly possible.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="jars-and-databases"><a class="header" href="#jars-and-databases">Jars and databases</a></h1>
<p>Before we can define the interesting parts of our salsa program, we have to setup a bit of structure that defines the salsa <strong>database</strong>.
The database is a struct that ultimately stores all of salsa's intermediate state, such as the memoized return values from <a href="tutorial/../overview.html#tracked-functions">tracked functions</a>.</p>
<p>The database itself is defined in terms of intermediate structures, called <strong>jars</strong><sup class="footnote-reference"><a href="#jar">1</a></sup>, which themselves contain the data for each function.
This setup allows salsa programs to be divided amongst many crates.
Typically, you define one jar struct per crate, and then when you construct the final database, you simply list the jar structs.
This permits the crates to define private functions and other things that are members of the jar struct, but not known directly to the database.</p>
<div class="footnote-definition" id="jar"><sup class="footnote-definition-label">1</sup>
<p>Jars of salsa -- get it? Get it??<sup class="footnote-reference"><a href="#java">2</a></sup></p>
</div>
<div class="footnote-definition" id="java"><sup class="footnote-definition-label">2</sup>
<p>OK, maybe it also brings to mind Java <code>.jar</code> files, but there's no real relationship. A jar is just a Rust struct, not a packaging format.</p>
</div>
<h2 id="defining-a-jar-struct"><a class="header" href="#defining-a-jar-struct">Defining a jar struct</a></h2>
<p>To define a jar struct, you create a tuple struct with the <code>#[salsa::jar]</code> annotation:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::jar(db = Db)]
pub struct Jar(
    crate::ir::SourceProgram,
    crate::ir::VariableId,
    crate::ir::FunctionId,
    crate::ir::Expression,
    crate::ir::Statement,
    crate::ir::Function,
    crate::ir::Diagnostics,
    crate::parser::parse_statements,
);
<span class="boring">}
</span></code></pre></pre>
<p>Although it's not required, it's highly recommended to put the <code>jar</code> struct at the root of your crate, so that it can be referred to as <code>crate::Jar</code>.
All of the other salsa annotations reference a jar struct, and they all default to the path <code>crate::Jar</code>.
If you put the jar somewhere else, you will have to override that default.</p>
<h2 id="defining-the-database-trait"><a class="header" href="#defining-the-database-trait">Defining the database trait</a></h2>
<p>The <code>#[salsa::jar]</code> annotation also includes a <code>db = Db</code> field.
The value of this field (normally <code>Db</code>) is the name of a trait that represents the database.
Salsa programs never refer <em>directly</em> to the database; instead, they take a <code>&amp;dyn Db</code> argument.
This allows for separate compilation, where you have a database that contains the data for two jars, but those jars don't depend on one another.</p>
<p>The database trait for our <code>calc</code> crate is very simple:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>pub trait Db: salsa::DbWithJar&lt;Jar&gt; {}
<span class="boring">}
</span></code></pre></pre>
<p>When you define a database trait like <code>Db</code>, the one thing that is required is that it must have a supertrait <code>salsa::DbWithJar&lt;Jar&gt;</code>,
where <code>Jar</code> is the jar struct. If your jar depends on other jars, you can have multiple such supertraits (e.g., <code>salsa::DbWithJar&lt;other_crate::Jar&gt;</code>).</p>
<p>Typically the <code>Db</code> trait has no other members or supertraits, but you are also free to add whatever other things you want in the trait.
When you define your final database, it will implement the trait, and you can then define the implementation of those other things.
This allows you to create a way for your jar to request context or other info from the database that is not moderated through salsa,
should you need that.</p>
<h2 id="implementing-the-database-trait-for-the-jar"><a class="header" href="#implementing-the-database-trait-for-the-jar">Implementing the database trait for the jar</a></h2>
<p>The <code>Db</code> trait must be implemented by the database struct.
We're going to define the database struct in a <a href="tutorial/./db.html">later section</a>,
and one option would be to simply implement the jar <code>Db</code> trait there.
However, since we don't define any custom logic in the trait,
a common choice is to write a blanket impl for any type that implements <code>DbWithJar&lt;Jar&gt;</code>,
and that's what we do here:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>impl&lt;DB&gt; Db for DB where DB: ?Sized + salsa::DbWithJar&lt;Jar&gt; {}
<span class="boring">}
</span></code></pre></pre>
<h2 id="summary"><a class="header" href="#summary">Summary</a></h2>
<p>If the concept of a jar seems a bit abstract to you, don't overthink it. The TL;DR is that when you create a salsa program, you need to do:</p>
<ul>
<li>In each of your crates:
<ul>
<li>Define a <code>#[salsa::jar(db = Db)]</code> struct, typically at <code>crate::Jar</code>, and list each of your various salsa-annotated things inside of it.</li>
<li>Define a <code>Db</code> trait, typically at <code>crate::Db</code>, that you will use in memoized functions and elsewhere to refer to the database struct.</li>
</ul>
</li>
<li>Once, typically in your final crate:
<ul>
<li>Define a database <code>D</code>, as described in the <a href="tutorial/./db.html">next section</a>, that will contain a list of each of the jars for each of your crates.</li>
<li>Implement the <code>Db</code> traits for each jar for your database type <code>D</code> (often we do this through blanket impls in the jar crates).</li>
</ul>
</li>
</ul>
<div style="break-before: page; page-break-before: always;"></div><h1 id="defining-the-database-struct"><a class="header" href="#defining-the-database-struct">Defining the database struct</a></h1>
<p>Now that we have defined a <a href="tutorial/./jar.html">jar</a>, we need to create the <strong>database struct</strong>.
The database struct is where all the jars come together.
Typically it is only used by the &quot;driver&quot; of your application;
the one which starts up the program, supplies the inputs, and relays the outputs.</p>
<p>In <code>calc</code>, the database struct is in the <a href="https://github.com/salsa-rs/salsa/blob/master/calc-example/calc/src/db.rs"><code>db</code></a> module, and it looks like this:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::db(crate::Jar)]
pub(crate) struct Database {
    storage: salsa::Storage&lt;Self&gt;,
}
<span class="boring">}
</span></code></pre></pre>
<p>The <code>#[salsa::db(...)]</code> attribute takes a list of all the jars to include.
The struct must have a field named <code>storage</code> whose types is <code>salsa::Storage&lt;Self&gt;</code>, but it can also contain whatever other fields you want.
The <code>storage</code> struct owns all the data for the jars listed in the <code>db</code> attribute.</p>
<p>The <code>salsa::db</code> attribute autogenerates a bunch of impls for things like the <code>salsa::HasJar&lt;crate::Jar&gt;</code> trait that we saw earlier.
This means that</p>
<h2 id="implementing-the-salsadatabase-trait"><a class="header" href="#implementing-the-salsadatabase-trait">Implementing the <code>salsa::Database</code> trait</a></h2>
<p>In addition to the struct itself, we must add an impl of <code>salsa::Database</code>:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>impl salsa::Database for Database {
    fn salsa_runtime(&amp;self) -&gt; &amp;salsa::Runtime {
        self.storage.runtime()
    }
}
<span class="boring">}
</span></code></pre></pre>
<h2 id="impementing-the-salsaparalleldatabase-trait"><a class="header" href="#impementing-the-salsaparalleldatabase-trait">Impementing the <code>salsa::ParallelDatabase</code> trait</a></h2>
<p>If you want to permit accessing your database from multiple threads at once, then you also need to implement the <code>ParallelDatabase</code> trait:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>impl salsa::ParallelDatabase for Database {
    fn snapshot(&amp;self) -&gt; salsa::Snapshot&lt;Self&gt; {
        salsa::Snapshot::new(Database {
            storage: self.storage.snapshot(),
        })
    }
}
<span class="boring">}
</span></code></pre></pre>
<h2 id="implementing-the-default-trait"><a class="header" href="#implementing-the-default-trait">Implementing the <code>Default</code> trait</a></h2>
<p>It's not required, but implementing the <code>Default</code> trait is often a convenient way to let users instantiate your database:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>impl Default for Database {
    fn default() -&gt; Self {
        Self {
            storage: Default::default(),
        }
    }
}
<span class="boring">}
</span></code></pre></pre>
<h2 id="implementing-the-traits-for-each-jar"><a class="header" href="#implementing-the-traits-for-each-jar">Implementing the traits for each Jar</a></h2>
<p>The <code>Database</code> struct also needs to implement the <a href="tutorial/./jar.html#database-trait-for-the-jar">database traits for each jar</a>.
In our case, though, we already wrote that impl as a <a href="tutorial/./jar.html#implementing-the-database-trait-for-the-jar">blanket impl alongside the jar itself</a>,
so no action is needed.
This is the recommended strategy unless your trait has custom members that depend on fields of the <code>Database</code> itself
(for example, sometimes the <code>Database</code> holds some kind of custom resource that you want to give access to).</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="defining-the-ir"><a class="header" href="#defining-the-ir">Defining the IR</a></h1>
<p>Before we can define the <a href="tutorial/./parser.html">parser</a>, we need to define the intermediate representation (IR) that we will use for <code>calc</code> programs.
In the <a href="tutorial/./structure.html">basic structure</a>, we defined some &quot;pseudo-Rust&quot; structures like <code>Statement</code> and <code>Expression</code>;
now we are going to define them for real.</p>
<h2 id="salsa-structs"><a class="header" href="#salsa-structs">&quot;Salsa structs&quot;</a></h2>
<p>In addition to regular Rust types, we will make use of various <strong>salsa structs</strong>.
A salsa struct is a struct that has been annotated with one of the salsa annotations:</p>
<ul>
<li><a href="tutorial/ir.html#input-structs"><code>#[salsa::input]</code></a>, which designates the &quot;base inputs&quot; to your computation;</li>
<li><a href="tutorial/ir.html#tracked-structs"><code>#[salsa::tracked]</code></a>, which designate intermediate values created during your computation;</li>
<li><a href="tutorial/ir.html#interned-structs"><code>#[salsa::interned]</code></a>, which designate small values that are easy to compare for equality.</li>
</ul>
<p>All salsa structs store the actual values of their fields in the salsa database.
This permits us to track when the values of those fields change to figure out what work will need to be re-executed.</p>
<p>When you annotate a struct with one of the above salsa attributes, salsa actually generates a bunch of code to link that struct into the database.
This code must be connected to some <a href="tutorial/./jar.html">jar</a>.
By default, this is <code>crate::Jar</code>, but you can specify a different jar with the <code>jar=</code> attribute (e.g., <code>#[salsa::input(jar = MyJar)]</code>).
You must also list the struct in the jar definition itself, or you will get errors.</p>
<h2 id="input-structs"><a class="header" href="#input-structs">Input structs</a></h2>
<p>The first thing we will define is our <strong>input</strong>.
Every salsa program has some basic inputs that drive the rest of the computation.
The rest of the program must be some deterministic function of those base inputs,
such that when those inputs change, we can try to efficiently recompute the new result of that function.</p>
<p>Inputs are defined as Rust structs with a <code>#[salsa::input]</code> annotation:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::input]
pub struct SourceProgram {
    #[return_ref]
    text: String,
}
<span class="boring">}
</span></code></pre></pre>
<p>In our compiler, we have just one simple input, the <code>ProgramSource</code>, which has a <code>text</code> field (the string).</p>
<h3 id="the-data-lives-in-the-database"><a class="header" href="#the-data-lives-in-the-database">The data lives in the database</a></h3>
<p>Although they are declared like other Rust structs, salsa structs are implemented quite differently.
The values of their fields are stored in the salsa database, and the struct itself just contains a numeric identifier.
This means that the struct instances are copy (no matter what fields they contain).
Creating instances of the struct and accessing fields is done by invoking methods like <code>new</code> as well as getters and setters.</p>
<p>More concretely, the <code>#[salsa::input]</code> annotation will generate a struct for <code>ProgramSource</code> like this:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[define(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ProgramSource(salsa::Id);
<span class="boring">}
</span></code></pre></pre>
<p>It will also generate a method <code>new</code> that lets you create a <code>ProgramSource</code> in the database.
For an input, a <code>&amp;mut db</code> reference is required, along with the values for each field:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let source = ProgramSource::new(&amp;mut db, &quot;print 11 + 11&quot;.to_string());
<span class="boring">}
</span></code></pre></pre>
<p>You can read the value of the field with <code>source.text(&amp;db)</code>,
and you can set the value of the field with <code>source.set_text(&amp;mut db, &quot;print 11 * 2&quot;.to_string())</code>.</p>
<h3 id="database-revisions"><a class="header" href="#database-revisions">Database revisions</a></h3>
<p>Whenever a function takes an <code>&amp;mut</code> reference to the database,
that means that it can only be invoked from outside the incrementalized part of your program,
as explained in <a href="tutorial/../overview.html#goal-of-salsa">the overview</a>.
When you change the value of an input field, that increments a 'revision counter' in the database,
indicating that some inputs are different now.
When we talk about a &quot;revision&quot; of the database, we are referring to the state of the database in between changes to the input values.</p>
<h2 id="tracked-structs-1"><a class="header" href="#tracked-structs-1">Tracked structs</a></h2>
<p>Next we will define a <strong>tracked struct</strong> to represent the functions in our input.
Whereas inputs represent the <em>start</em> of a computation, tracked structs represent intermediate values created during your computation.
In this case, we are going to parse the raw input program, and create a <code>Function</code> for each of the functions defined by the user.</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::tracked]
pub struct Function {
    #[id]
    name: FunctionId,
    args: Vec&lt;VariableId&gt;,
    body: Expression,
}
<span class="boring">}
</span></code></pre></pre>
<p>Unlike with inputs, the fields of tracked structs are immutable once created. Otherwise, working with a tracked struct is quite similar to an input:</p>
<ul>
<li>You can create a new value by using <code>new</code>, but with a tracked struct, you only need an <code>&amp;dyn</code> database, not <code>&amp;mut</code> (e.g., <code>Function::new(&amp;db, some_name, some_args, some_body)</code>)</li>
<li>You use a getter to read the value of a field, just like with an input (e.g., <code>my_func.args(db)</code> to read the <code>args</code> field).</li>
</ul>
<h3 id="id-fields-1"><a class="header" href="#id-fields-1">id fields</a></h3>
<p>To get better reuse across revisions, particularly when things are reordered, you can mark some entity fields with <code>#[id]</code>.
Normally, you would do this on fields that represent the &quot;name&quot; of an entity.
This indicates that, across two revisions R1 and R2, if two functions are created with the same name, they refer to the same entity, so we can compare their other fields for equality to determine what needs to be re-executed.
Adding <code>#[id]</code> attributes is an optimization and never affects correctness.
For more details, see the <a href="tutorial/../reference/algorithm.html">algorithm</a> page of the reference.</p>
<h2 id="interned-structs-1"><a class="header" href="#interned-structs-1">Interned structs</a></h2>
<p>The final kind of salsa struct are <em>interned structs</em>.
As with input and tracked structs, the data for an interned struct is stored in the database, and you just pass around a single integer.
Unlike those structs, if you intern the same data twice, you get back the <strong>same integer</strong>.</p>
<p>A classic use of interning is for small strings like function names and variables.
It's annoying and inefficient to pass around those names with <code>String</code> values which must be cloned;
it's also inefficient to have to compare them for equality via string comparison.
Therefore, we define two interned structs, <code>FunctionId</code> and <code>VariableId</code>, each with a single field that stores the string:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::interned]
pub struct VariableId {
    #[return_ref]
    pub text: String,
}

#[salsa::interned]
pub struct FunctionId {
    #[return_ref]
    pub text: String,
}
<span class="boring">}
</span></code></pre></pre>
<p>When you invoke e.g. <code>FunctionId::new(&amp;db, &quot;my_string&quot;.to_string())</code>, you will get back a <code>FunctionId</code> that is just a newtype'd integer.
But if you invoke the same call to <code>new</code> again, you get back the same integer:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let f1 = FunctionId::new(&amp;db, &quot;my_string&quot;.to_string());
let f2 = FunctionId::new(&amp;db, &quot;my_string&quot;.to_string());
assert_eq!(f1, f2);
<span class="boring">}
</span></code></pre></pre>
<h3 id="expressions-and-statements"><a class="header" href="#expressions-and-statements">Expressions and statements</a></h3>
<p>We'll also intern expressions and statements. This is convenient primarily because it allows us to have recursive structures very easily. Since we don't really need the &quot;cheap equality comparison&quot; aspect of interning, this isn't the most efficient choice, and many compilers would opt to represent expressions/statements in some other way.</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::interned]
pub struct Statement {
    data: StatementData,
}

#[derive(Eq, PartialEq, Clone, Hash)]
pub enum StatementData {
    /// Defines `fn &lt;name&gt;(&lt;args&gt;) = &lt;body&gt;`
    Function(Function),
    /// Defines `print &lt;expr&gt;`
    Print(Expression),
}

#[salsa::interned]
pub struct Expression {
    #[return_ref]
    data: ExpressionData,
}

#[derive(Eq, PartialEq, Clone, Hash)]
pub enum ExpressionData {
    Op(Expression, Op, Expression),
    Number(OrderedFloat&lt;f64&gt;),
    Variable(VariableId),
    Call(FunctionId, Vec&lt;Expression&gt;),
}

#[derive(Eq, PartialEq, Copy, Clone, Hash, Debug)]
pub enum Op {
    Add,
    Subtract,
    Multiply,
    Divide,
}
<span class="boring">}
</span></code></pre></pre>
<h3 id="interned-ids-are-guaranteed-to-be-consistent-within-a-revision-but-not-across-revisions-but-you-dont-have-to-care"><a class="header" href="#interned-ids-are-guaranteed-to-be-consistent-within-a-revision-but-not-across-revisions-but-you-dont-have-to-care">Interned ids are guaranteed to be consistent within a revision, but not across revisions (but you don't have to care)</a></h3>
<p>Interned ids are guaranteed not to change within a single revision, so you can intern things from all over your program and get back consistent results.
When you change the inputs, however, salsa may opt to clear some of the interned values and choose different integers.
However, if this happens, it will also be sure to re-execute every function that interned that value, so all of them still see a consistent value,
just a different one than they saw in a previous revision.</p>
<p>In other words, within a salsa computation, you can assume that interning produces a single consistent integer, and you don't have to think about it.
If however you export interned identifiers outside the computation, and then change the inputs, they may not longer be valid or may refer to different values.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="defining-the-parser-memoized-functions-and-inputs"><a class="header" href="#defining-the-parser-memoized-functions-and-inputs">Defining the parser: memoized functions and inputs</a></h1>
<p>The next step in the <code>calc</code> compiler is to define the parser.
The role of the parser will be to take the <code>ProgramSource</code> input,
read the string from the <code>text</code> field,
and create the <code>Statement</code>, <code>Function</code>, and <code>Expression</code> structures that <a href="tutorial/./ir.html">we defined in the <code>ir</code> module</a>.</p>
<p>To minimize dependencies, we are going to write a <a href="https://en.wikipedia.org/wiki/Recursive_descent_parser">recursive descent parser</a>.
Another option would be to use a <a href="https://rustrepo.com/catalog/rust-parsing_newest_1">Rust parsing framework</a>.
We won't cover the parsing itself in this tutorial -- you can read the code if you want to see how it works.
We're going to focus only on the salsa-related aspects.</p>
<h2 id="the-parse_statements-function"><a class="header" href="#the-parse_statements-function">The <code>parse_statements</code> function</a></h2>
<p>The starting point for the parser is the <code>parse_statements</code> function:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::tracked(return_ref)]
pub fn parse_statements(db: &amp;dyn crate::Db, source: SourceProgram) -&gt; Vec&lt;Statement&gt; {
    // Get the source text from the database
    let source_text = source.text(db);

    // Create the parser
    let mut parser = Parser {
        db,
        source_text,
        position: 0,
    };

    // Read in statements until we reach the end of the input
    let mut result = vec![];
    loop {
        // Skip over any whitespace
        parser.skip_whitespace();

        // If there are no more tokens, break
        if let None = parser.peek() {
            break;
        }

        // Otherwise, there is more input, so parse a statement.
        if let Some(statement) = parser.parse_statement() {
            result.push(statement);
        } else {
            // If we failed, report an error at whatever position the parser
            // got stuck. We could recover here by skipping to the end of the line
            // or something like that. But we leave that as an exercise for the reader!
            parser.report_error();
            break;
        }
    }

    result
}
<span class="boring">}
</span></code></pre></pre>
<p>This function is annotated as <code>#[salsa::tracked]</code>.
That means that, when it is called, salsa will track what inputs it reads as well as what value it returns.
The return value is <em>memoized</em>,
which means that if you call this function again without changing the inputs,
salsa will just clone the result rather than re-execute it.</p>
<h3 id="tracked-functions-are-the-unit-of-reuse"><a class="header" href="#tracked-functions-are-the-unit-of-reuse">Tracked functions are the unit of reuse</a></h3>
<p>Tracked functions are the core part of how salsa enables incremental reuse.
The goal of the framework is to avoid re-executing tracked functions and instead to clone their result.
Salsa uses the <a href="tutorial/../reference/algorithm.html">red-green algorithm</a> to decide when to re-execute a function.
The short version is that a tracked function is re-executed if either (a) it directly reads an input, and that input has changed
or (b) it directly invokes another tracked function, and that function's return value has changed.
In the case of <code>parse_statements</code>, it directly reads <code>ProgramSource::text</code>, so if the text changes, then the parser will re-execute.</p>
<p>By choosing which functions to mark as <code>#[tracked]</code>, you control how much reuse you get.
In our case, we're opting to mark the outermost parsing function as tracked, but not the inner ones.
This means that if the input changes, we will always re-parse the entire input and re-create the resulting statements and so forth.
We'll see later that this <em>doesn't</em> mean we will always re-run the type checker and other parts of the compiler.</p>
<p>This trade-off makes sense because (a) parsing is very cheap, so the overhead of tracking and enabling finer-grained reuse doesn't pay off
and because (b) since strings are just a big blob-o-bytes without any structure, it's rather hard to identify which parts of the IR need to be reparsed.
Some systems do choose to do more granular reparsing, often by doing a &quot;first pass&quot; over the string to give it a bit of structure,
e.g. to identify the functions,
but deferring the parsing of the body of each function until later.
Setting up a scheme like this is relatively easy in salsa, and uses the same principles that we will use later to avoid re-executing the type checker.</p>
<h3 id="parameters-to-a-tracked-function"><a class="header" href="#parameters-to-a-tracked-function">Parameters to a tracked function</a></h3>
<p>The <strong>first</strong> parameter to a tracked function is <strong>always</strong> the database, <code>db: &amp;dyn crate::Db</code>.
It must be a <code>dyn</code> value of whatever database is associated with the jar.</p>
<p>The <strong>second</strong> parameter to a tracked function is <strong>always</strong> some kind of salsa struct.
The first parameter to a memoized function is always the database,
which should be a <code>dyn Trait</code> value for the database trait associated with the jar
(the default jar is <code>crate::Jar</code>).</p>
<p>Tracked functions may take other arguments as well, though our examples here do not.
Functions that take additional arguments are less efficient and flexible.
It's generally better to structure tracked functions as functions of a single salsa struct if possible.</p>
<h3 id="the-return_ref-annotation"><a class="header" href="#the-return_ref-annotation">The <code>return_ref</code> annotation</a></h3>
<p>You may have noticed that <code>parse_statements</code> is tagged with <code>#[salsa::tracked(return_ref)]</code>.
Ordinarily, when you call a tracked function, the result you get back is cloned out of the database.
The <code>return_ref</code> attribute means that a reference into the database is returned instead.
So, when called, <code>parse_statements</code> will return an <code>&amp;Vec&lt;Statement&gt;</code> rather than cloning the <code>Vec</code>.
This is useful as a performance optimization.
(You may recall the <code>return_ref</code> annotation from the <a href="tutorial/./ir.html">ir</a> section of the tutorial,
where it was placed on struct fields, with roughly the same meaning.)</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="defining-the-parser-reporting-errors"><a class="header" href="#defining-the-parser-reporting-errors">Defining the parser: reporting errors</a></h1>
<p>The last interesting case in the parser is how to handle a parse error.
Because salsa functions are memoized and may not execute, they should not have side-effects,
so we don't just want to call <code>eprintln!</code>.
If we did so, the error would only be reported the first time the function was called.</p>
<p>Salsa defines a mechanism for managing this called an <strong>accumulator</strong>.
In our case, we define an accumulator struct called <code>Diagnostics</code> in the <code>ir</code> module:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::accumulator]
pub struct Diagnostics(Diagnostic);

#[derive(Clone, Debug)]
pub struct Diagnostic {
    pub position: usize,
    pub message: String,
}
<span class="boring">}
</span></code></pre></pre>
<p>Accumulator structs are always newtype structs with a single field, in this case of type <code>Diagnostic</code>.
Memoized functions can <em>push</em> <code>Diagnostic</code> values onto the accumulator.
Later, you can invoke a method to find all the values that were pushed by the memoized functions
or any function that it called
(e.g., we could get the set of <code>Diagnostic</code> values produced by the <code>parse_statements</code> function).</p>
<p>The <code>Parser::report_error</code> method contains an example of pushing a diagnostic:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>    /// Report an error diagnostic at the current position.
    fn report_error(&amp;self) {
        Diagnostics::push(
            self.db,
            Diagnostic {
                position: self.position,
                message: &quot;unexpected character&quot;.to_string(),
            },
        );
    }
<span class="boring">}
</span></code></pre></pre>
<p>To get the set of diagnostics produced by <code>parse_errors</code>, or any other memoized function,
we invoke the associated <code>accumulated</code> function:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let accumulated: Vec&lt;Diagnostic&gt; =
    parse_statements::accumulated::&lt;Diagnostics&gt;(db);
                      //            -----------
                      //     Use turbofish to specify
                      //     the diagnostics type.
<span class="boring">}
</span></code></pre></pre>
<p><code>accumulated</code> takes the database <code>db</code> as argument and returns a <code>Vec</code>.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="defining-the-parser-debug-impls-and-testing"><a class="header" href="#defining-the-parser-debug-impls-and-testing">Defining the parser: debug impls and testing</a></h1>
<p>As the final part of the parser, we need to write some tests.
To do so, we will create a database, set the input source text, run the parser, and check the result.
Before we can do that, though, we have to address one question: how do we inspect the value of an interned type like <code>Expression</code>?</p>
<h2 id="the-debugwithdb-trait"><a class="header" href="#the-debugwithdb-trait">The <code>DebugWithDb</code> trait</a></h2>
<p>Because an interned type like <code>Expression</code> just stores an integer, the traditional <code>Debug</code> trait is not very useful.
To properly print a <code>Expression</code>, you need to access the salsa database to find out what its value is.
To solve this, <code>salsa</code> provides a <code>DebugWithDb</code> trait that acts like the regular <code>Debug</code>, but takes a database as argument.
For types that implement this trait, you can invoke the <code>debug</code> method.
This returns a temporary that implements the ordinary <code>Debug</code> trait, allowing you to write something like</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>eprintln!(&quot;Expression = {:?}&quot;, expr.debug(db));
<span class="boring">}
</span></code></pre></pre>
<p>and get back the output you expect.</p>
<h2 id="implementing-the-debugwithdb-trait"><a class="header" href="#implementing-the-debugwithdb-trait">Implementing the <code>DebugWithDb</code> trait</a></h2>
<p>For now, unfortunately, you have to implement the <code>DebugWithDb</code> trait manually, as we do not provide a derive.
This is tedious but not difficult. Here is an example of implementing the trait for <code>Expression</code>:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>impl DebugWithDb&lt;dyn crate::Db + '_&gt; for Expression {
    fn fmt(&amp;self, f: &amp;mut std::fmt::Formatter&lt;'_&gt;, db: &amp;dyn crate::Db) -&gt; std::fmt::Result {
        match self.data(db) {
            ExpressionData::Op(a, b, c) =&gt; f
                .debug_tuple(&quot;ExpressionData::Op&quot;)
                .field(&amp;a.debug(db)) // use `a.debug(db)` for interned things
                .field(&amp;b.debug(db))
                .field(&amp;c.debug(db))
                .finish(),
            ExpressionData::Number(a) =&gt; {
                f.debug_tuple(&quot;Number&quot;)
                    .field(a) // use just `a` otherwise
                    .finish()
            }
            ExpressionData::Variable(a) =&gt; f.debug_tuple(&quot;Variable&quot;).field(&amp;a.debug(db)).finish(),
            ExpressionData::Call(a, b) =&gt; f
                .debug_tuple(&quot;Call&quot;)
                .field(&amp;a.debug(db))
                .field(&amp;b.debug(db))
                .finish(),
        }
    }
}
<span class="boring">}
</span></code></pre></pre>
<p>Some things to note:</p>
<ul>
<li>The <code>data</code> method gives access to the full enum from the database.</li>
<li>The <a href="https://doc.rust-lang.org/std/fmt/struct.Formatter.html#"><code>Formatter</code></a> methods (e.g., <a href="https://doc.rust-lang.org/std/fmt/struct.Formatter.html#method.debug_tuple"><code>debug_tuple</code></a>) can be used to provide consistent output.</li>
<li>When printing the value of a field, use <code>.field(&amp;a.debug(db))</code> for fields that are themselves interned or entities, and use <code>.field(&amp;a)</code> for fields that just implement the ordinary <code>Debug</code> trait.</li>
</ul>
<h2 id="forwarding-to-the-ordinary-debug-trait"><a class="header" href="#forwarding-to-the-ordinary-debug-trait">Forwarding to the ordinary <code>Debug</code> trait</a></h2>
<p>For consistency, it is sometimes useful to have a <code>DebugWithDb</code> implementation even for types, like <code>Op</code>, that are just ordinary enums. You can do that like so:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>impl DebugWithDb&lt;dyn crate::Db + '_&gt; for Op {
    fn fmt(&amp;self, f: &amp;mut std::fmt::Formatter&lt;'_&gt;, _db: &amp;dyn crate::Db) -&gt; std::fmt::Result {
        write!(f, &quot;{:?}&quot;, self)
    }
}

impl DebugWithDb&lt;dyn crate::Db + '_&gt; for Diagnostic {
    fn fmt(&amp;self, f: &amp;mut std::fmt::Formatter&lt;'_&gt;, _db: &amp;dyn crate::Db) -&gt; std::fmt::Result {
        write!(f, &quot;{:?}&quot;, self)
    }
}

#[salsa::tracked]
pub struct Function {
    #[id]
    name: FunctionId,
    args: Vec&lt;VariableId&gt;,
    body: Expression,
}

#[salsa::accumulator]
pub struct Diagnostics(Diagnostic);

#[derive(Clone, Debug)]
pub struct Diagnostic {
    pub position: usize,
    pub message: String,
}
<span class="boring">}
</span></code></pre></pre>
<h2 id="writing-the-unit-test"><a class="header" href="#writing-the-unit-test">Writing the unit test</a></h2>
<p>Now that we have our <code>DebugWithDb</code> impls in place, we can write a simple unit test harness.
The <code>parse_string</code> function below creates a database, sets the source text, and then invokes the parser:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>/// Create a new database with the given source text and parse the result.
/// Returns the statements and the diagnostics generated.
#[cfg(test)]
fn parse_string(source_text: &amp;str) -&gt; String {
    use salsa::debug::DebugWithDb;

    // Create the database
    let mut db = crate::db::Database::default();

    // Create the source program
    let source_program = SourceProgram::new(&amp;mut db, source_text.to_string());

    // Invoke the parser
    let statements = parse_statements(&amp;db, source_program);

    // Read out any diagnostics
    let accumulated = parse_statements::accumulated::&lt;Diagnostics&gt;(&amp;db, source_program);

    // Format the result as a string and return it
    format!(&quot;{:#?}&quot;, (statements, accumulated).debug(&amp;db))
}
<span class="boring">}
</span></code></pre></pre>
<p>Combined with the <a href="https://crates.io/crates/expect-test"><code>expect-test</code></a> crate, we can then write unit tests like this one:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[test]
fn parse_print() {
    let actual = parse_string(&quot;print 1 + 2&quot;);
    let expected = expect_test::expect![[r#&quot;
        (
            [
                ExpressionData::Op(
                    Number(
                        OrderedFloat(
                            1.0,
                        ),
                    ),
                    Add,
                    Number(
                        OrderedFloat(
                            2.0,
                        ),
                    ),
                ),
            ],
            [],
        )&quot;#]];
    expected.assert_eq(&amp;actual);
}
<span class="boring">}
</span></code></pre></pre>
<div style="break-before: page; page-break-before: always;"></div><h1 id="defining-the-checker"><a class="header" href="#defining-the-checker">Defining the checker</a></h1>
<div style="break-before: page; page-break-before: always;"></div><h1 id="defining-the-interpreter"><a class="header" href="#defining-the-interpreter">Defining the interpreter</a></h1>
<div style="break-before: page; page-break-before: always;"></div><h1 id="reference"><a class="header" href="#reference">Reference</a></h1>
<div style="break-before: page; page-break-before: always;"></div><h1 id="the-red-green-algorithm"><a class="header" href="#the-red-green-algorithm">The &quot;red-green&quot; algorithm</a></h1>
<p>This page explains the basic salsa incremental algorithm.
The algorithm is called the &quot;red-green&quot; algorithm, which is where the name salsa comes from.</p>
<h3 id="database-revisions-1"><a class="header" href="#database-revisions-1">Database revisions</a></h3>
<p>The salsa database always tracks a single <strong>revision</strong>. Each time you set an input, the revision is incremented. So we start in revision <code>R1</code>, but when a <code>set</code> method is called, we will go to <code>R2</code>, then <code>R3</code>, and so on. For each input, we also track the revision in which it was last changed.</p>
<h3 id="basic-rule-when-inputs-change-re-execute"><a class="header" href="#basic-rule-when-inputs-change-re-execute">Basic rule: when inputs change, re-execute!</a></h3>
<p>When you invoke a tracked function, in addition to storing the value that was returned, we also track what <em>other</em> tracked functions it depends on, and the revisions when their value last changed. When you invoke the function again, if the database is in a new revision, then we check whether any of the inputs to this function have changed in that new revision. If not, we can just return our cached value. But if the inputs <em>have</em> changed (or may have changed), we will re-execute the function to find the most up-to-date answer.</p>
<p>Here is a simple example, where the <code>parse_module</code> function invokes the <code>module_text</code> function:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::tracked]
fn parse_module(db: &amp;dyn Db, module: Module) -&gt; Ast {
    let module_text: &amp;String = module_text(db, module);
    Ast::parse_text(module_text)
}

#[salsa::tracked(ref)]
fn module_text(db: &amp;dyn Db, module: Module) -&gt; String {
    panic!(&quot;text for module `{module:?}` not set&quot;)
}
<span class="boring">}
</span></code></pre></pre>
<p>If we invoke <code>parse_module</code> twice, but change the module text in between, then we will have to re-execute <code>parse_module</code>:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>module_text::set(
    db,
    module,
    &quot;fn foo() { }&quot;.to_string(),
);
parse_module(db, module); // executes

// ...some time later...

module_text::set(
    db,
    module,
    &quot;fn foo() { /* add a comment */ }&quot;.to_string(),
);
parse_module(db, module); // executes again!
<span class="boring">}
</span></code></pre></pre>
<h3 id="backdating-sometimes-we-can-be-smarter"><a class="header" href="#backdating-sometimes-we-can-be-smarter">Backdating: sometimes we can be smarter</a></h3>
<p>Often, though, tracked functions don't depend directly on the inputs. Instead, they'll depend on some other tracked function. For example, perhaps we have a <code>type_check</code> function that reads the AST:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::tracked]
fn type_check(db: &amp;dyn Db, module: Module) {
    let ast = parse_module(db, module);
    ...
}
<span class="boring">}
</span></code></pre></pre>
<p>If the module text is changed, we saw that we have to re-execute <code>parse_module</code>, but there are many changes to the source text that still produce the same AST. For example, suppose we simply add a comment? In that case, if <code>type_check</code> is called again, we will:</p>
<ul>
<li>First re-execute <code>parse_module</code>, since its input changed.</li>
<li>We will then compare the resulting AST. If it's the same as last time, we can <em>backdate</em> the result, meaning that we say that, even though the inputs changed, the output didn't.</li>
</ul>
<h2 id="durability-an-optimization"><a class="header" href="#durability-an-optimization">Durability: an optimization</a></h2>
<p>As an optimization, salsa includes the concept of <strong>durability</strong>. When you set the value of a tracked function, you can also set it with a given <em>durability</em>:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>module_text::set_with_durability(
    db,
    module,
    &quot;fn foo() { }&quot;.to_string(),
    salsa::Durability::HIGH
);
<span class="boring">}
</span></code></pre></pre>
<p>For each durability, we track the revision in which <em>some input</em> with that durability changed. If a tracked function depends (transitively) only on high durability inputs, and you change a low durability input, then we can very easily determine that the tracked function result is still valid, avoiding the need to traverse the input edges one by one.</p>
<p>An example: if compiling a Rust program, you might mark the inputs from crates.io as <em>high durability</em> inputs, since they are unlikely to change. The current workspace could be marked as <em>low durability</em>.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="common-patterns"><a class="header" href="#common-patterns">Common patterns</a></h1>
<p>This section documents patterns for using Salsa.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="selection"><a class="header" href="#selection">Selection</a></h1>
<p>The &quot;selection&quot; (or &quot;firewall&quot;) pattern is when you have a query Qsel that reads from some
other Qbase and extracts some small bit of information from Qbase that it returns.
In particular, Qsel does not combine values from other queries. In some sense,
then, Qsel is redundant -- you could have just extracted the information
the information from Qbase yourself, and done without the salsa machinery. But
Qsel serves a role in that it limits the amount of re-execution that is required
when Qbase changes.</p>
<h2 id="example-the-base-query"><a class="header" href="#example-the-base-query">Example: the base query</a></h2>
<p>For example, imagine that you have a query <code>parse</code> that parses the input text of a request
and returns a <code>ParsedResult</code>, which contains a header and a body:</p>
<pre><code class="language-rust ignore">#[derive(Clone, Debug, PartialEq, Eq)]
struct ParsedResult {
    header: Vec&lt;ParsedHeader&gt;,
    body: String,
}

#[derive(Clone, Debug, PartialEq, Eq)]
struct ParsedHeader {
    key: String,
    value: String,
}

#[salsa::query_group(Request)]
trait RequestParser {
    /// The base text of the request.
    #[salsa::input]
    fn request_text(&amp;self) -&gt; String;

    /// The parsed form of the request.
    fn parse(&amp;self) -&gt; ParsedResult;
}
</code></pre>
<h2 id="example-a-selecting-query"><a class="header" href="#example-a-selecting-query">Example: a selecting query</a></h2>
<p>And now you have a number of derived queries that only look at the header.
For example, one might extract the &quot;content-type' header:</p>
<pre><code class="language-rust ignore">#[salsa::query_group(Request)]
trait RequestUtil: RequestParser {
    fn content_type(&amp;self) -&gt; Option&lt;String&gt;;
}

fn content_type(db: &amp;dyn RequestUtil) -&gt; Option&lt;String&gt; {
    db.parse()
        .header
        .iter()
        .find(|header| header.key == &quot;content-type&quot;)
        .map(|header| header.value.clone())
}
</code></pre>
<h2 id="why-prefer-a-selecting-query"><a class="header" href="#why-prefer-a-selecting-query">Why prefer a selecting query?</a></h2>
<p>This <code>content_type</code> query is an instance of the <em>selection</em> pattern. It only
&quot;selects&quot; a small bit of information from the <code>ParsedResult</code>. You might not have
made it a query at all, but instead made it a method on <code>ParsedResult</code>.</p>
<p>But using a query for <code>content_type</code> has an advantage: now if there are downstream
queries that only depend on the <code>content_type</code> (or perhaps on other headers extracted
via a similar pattern), those queries will not have to be re-executed when the request
changes <em>unless</em> the content-type header changes. Consider the dependency graph:</p>
<pre><code class="language-text">request_text  --&gt;  parse  --&gt;  content_type  --&gt;  (other queries)
</code></pre>
<p>When the <code>request_text</code> changes, we are always going to have to re-execute <code>parse</code>.
If that produces a new parsed result, we are <em>also</em> going to re-execute <code>content_type</code>.
But if the result of <code>content_type</code> has not changed, then we will <em>not</em> re-execute
the other queries.</p>
<h2 id="more-levels-of-selection"><a class="header" href="#more-levels-of-selection">More levels of selection</a></h2>
<p>In fact, in our example we might consider introducing another level of selection.
Instead of having <code>content_type</code> directly access the results of <code>parse</code>, it might be better
to insert a selecting query that just extracts the header:</p>
<pre><code class="language-rust ignore">#[salsa::query_group(Request)]
trait RequestUtil: RequestParser {
    fn header(&amp;self) -&gt; Vec&lt;ParsedHeader&gt;;
    fn content_type(&amp;self) -&gt; Option&lt;String&gt;;
}

fn header(db: &amp;dyn RequestUtil) -&gt; Vec&lt;ParsedHeader&gt; {
    db.parse().header
}

fn content_type(db: &amp;dyn RequestUtil) -&gt; Option&lt;String&gt; {
    db.header()
        .iter()
        .find(|header| header.key == &quot;content-type&quot;)
        .map(|header| header.value.clone())
}
</code></pre>
<p>This will result in a dependency graph like so:</p>
<pre><code class="language-text">request_text  --&gt;  parse  --&gt;  header --&gt;  content_type  --&gt;  (other queries)
</code></pre>
<p>The advantage of this is that changes that only effect the &quot;body&quot; or
only consume small parts of the request will
not require us to re-execute <code>content_type</code> at all. This would be particularly
valuable if there are a lot of dependent headers.</p>
<h2 id="a-note-on-cloning-and-efficiency"><a class="header" href="#a-note-on-cloning-and-efficiency">A note on cloning and efficiency</a></h2>
<p>In this example, we used common Rust types like <code>Vec</code> and <code>String</code>,
and we cloned them quite frequently. This will work just fine in Salsa,
but it may not be the most efficient choice. This is because each clone
is going to produce a deep copy of the result. As a simple fix, you
might convert your data structures to use <code>Arc</code> (e.g., <code>Arc&lt;Vec&lt;ParsedHeader&gt;&gt;</code>),
which makes cloning cheap.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="on-demand-lazy-inputs"><a class="header" href="#on-demand-lazy-inputs">On-Demand (Lazy) Inputs</a></h1>
<p>Salsa input queries work best if you can easily provide all of the inputs upfront.
However sometimes the set of inputs is not known beforehand.</p>
<p>A typical example is reading files from disk.
While it is possible to eagerly scan a particular directory and create an in-memory file tree in a salsa input query, a more straight-forward approach is to read the files lazily.
That is, when someone requests the text of a file for the first time:</p>
<ol>
<li>Read the file from disk and cache it.</li>
<li>Setup a file-system watcher for this path.</li>
<li>Invalidate the cached file once the watcher sends a change notification.</li>
</ol>
<p>This is possible to achieve in salsa, using a derived query and <code>report_synthetic_read</code> and <code>invalidate</code> queries.
The setup looks roughly like this:</p>
<pre><code class="language-rust ignore">#[salsa::query_group(VfsDatabaseStorage)]
trait VfsDatabase: salsa::Database + FileWatcher {
    fn read(&amp;self, path: PathBuf) -&gt; String;
}

trait FileWatcher {
    fn watch(&amp;self, path: &amp;Path);
    fn did_change_file(&amp;mut self, path: &amp;Path);
}

fn read(db: &amp;dyn VfsDatabase, path: PathBuf) -&gt; String {
    db.salsa_runtime()
        .report_synthetic_read(salsa::Durability::LOW);
    db.watch(&amp;path);
    std::fs::read_to_string(&amp;path).unwrap_or_default()
}

#[salsa::database(VfsDatabaseStorage)]
struct MyDatabase { ... }

impl FileWatcher for MyDatabase {
    fn watch(&amp;self, path: &amp;Path) { ... }
    fn did_change_file(&amp;mut self, path: &amp;Path) {
        ReadQuery.in_db_mut(self).invalidate(path);
    }
}
</code></pre>
<ul>
<li>We declare the query as a derived query (which is the default).</li>
<li>In the query implementation, we don't call any other query and just directly read file from disk.</li>
<li>Because the query doesn't read any inputs, it will be assigned a <code>HIGH</code> durability by default, which we override with <code>report_synthetic_read</code>.</li>
<li>The result of the query is cached, and we must call <code>invalidate</code> to clear this cache.</li>
</ul>
<p>A complete, runnable file-watching example can be found in <a href="https://github.com/ChristopherBiscardi/salsa-file-watch-example/blob/f968dc8ea13a90373f91d962f173de3fe6ae24cd/main.rs">this git repo</a> along with <a href="https://www.christopherbiscardi.com/on-demand-lazy-inputs-for-incremental-computation-in-salsa-with-file-watching-powered-by-notify-in-rust">a write-up</a> that explains more about the code and what it is doing.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="tuning-salsa"><a class="header" href="#tuning-salsa">Tuning Salsa</a></h1>
<h2 id="lru-cache"><a class="header" href="#lru-cache">LRU Cache</a></h2>
<p>You can specify an LRU cache size for any non-input query:</p>
<pre><code class="language-rs">let lru_capacity: usize = 128;
base_db::ParseQuery.in_db_mut(self).set_lru_capacity(lru_capacity);
</code></pre>
<p>The default is <code>0</code>, which disables LRU-caching entirely.</p>
<p>See <a href="./rfcs/RFC0004-LRU.html">The LRU RFC for more details</a>.</p>
<p>Note that there is no garbage collection for keys and
results of old queries, so LRU caches are currently the
only knob available for avoiding unbounded memory usage
for long-running apps built on Salsa.</p>
<h2 id="intern-queries"><a class="header" href="#intern-queries">Intern Queries</a></h2>
<p>Intern queries can make key lookup cheaper, save memory, and
avoid the need for <a href="https://doc.rust-lang.org/std/sync/struct.Arc.html"><code>Arc</code></a>.</p>
<p>Interning is especially useful for queries that involve nested,
tree-like data structures.</p>
<p>See:</p>
<ul>
<li>The <a href="./rfcs/RFC0002-Intern-Queries.html">Intern Queries RFC</a></li>
<li>The <a href="https://github.com/salsa-rs/salsa/blob/master/examples/compiler/main.rs"><code>compiler</code> example</a>,
which uses interning.</li>
</ul>
<h2 id="granularity-of-incrementality"><a class="header" href="#granularity-of-incrementality">Granularity of Incrementality</a></h2>
<p>See:</p>
<ul>
<li><a href="./common_patterns/selection.html">common patterns: selection</a> and</li>
<li>The <a href="https://github.com/salsa-rs/salsa/blob/master/examples/selection/main.rs"><code>selection</code> example</a></li>
</ul>
<h2 id="cancellation"><a class="header" href="#cancellation">Cancellation</a></h2>
<p>Queries that are no longer needed due to concurrent writes or changes in dependencies are cancelled
by Salsa. Each accesss of an intermediate query is a potential cancellation point. cancellation is
implemented via panicking, and Salsa internals are intended to be panic-safe.</p>
<p>If you have a query that contains a long loop which does not execute any intermediate queries,
salsa won't be able to cancel it automatically. You may wish to check for cancellation yourself
by invoking <code>db.unwind_if_cancelled()</code>.</p>
<p>For more details on cancellation, see:</p>
<ul>
<li><a href="./rfcs/RFC0007-Opinionated-Cancelation.html">the Opinionated cancellation RFC</a>.</li>
<li>The tests for cancellation behavior in the Salsa repo.</li>
</ul>
<div style="break-before: page; page-break-before: always;"></div><h1 id="cycle-handling"><a class="header" href="#cycle-handling">Cycle handling</a></h1>
<p>By default, when Salsa detects a cycle in the computation graph, Salsa will panic with a <a href="https://github.com/salsa-rs/salsa/blob/0f9971ad94d5d137f1192fde2b02ccf1d2aca28c/src/lib.rs#L654-L672"><code>salsa::Cycle</code></a> as the panic value. The <a href="https://github.com/salsa-rs/salsa/blob/0f9971ad94d5d137f1192fde2b02ccf1d2aca28c/src/lib.rs#L654-L672"><code>salsa::Cycle</code></a> structure that describes the cycle, which can be useful for diagnosing what went wrong.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="recovering-via-fallback"><a class="header" href="#recovering-via-fallback">Recovering via fallback</a></h1>
<p>Panicking when a cycle occurs is ok for situations where you believe a cycle is impossible. But sometimes cycles can result from illegal user input and cannot be statically prevented. In these cases, you might prefer to gracefully recover from a cycle rather than panicking the entire query. Salsa supports that with the idea of <em>cycle recovery</em>.</p>
<p>To use cycle recovery, you annotate potential participants in the cycle with a <code>#[salsa::recover(my_recover_fn)]</code> attribute. When a cycle occurs, if any participant P has recovery information, then no panic occurs. Instead, the execution of P is aborted and P will execute the recovery function to generate its result. Participants in the cycle that do not have recovery information continue executing as normal, using this recovery result.</p>
<p>The recovery function has a similar signature to a query function. It is given a reference to your database along with a <code>salsa::Cycle</code> describing the cycle that occurred; it returns the result of the query. Example:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>fn my_recover_fn(
    db: &amp;dyn MyDatabase,
    cycle: &amp;salsa::Cycle,
) -&gt; MyResultValue
<span class="boring">}
</span></code></pre></pre>
<p>The <code>db</code> and <code>cycle</code> argument can be used to prepare a useful error message for your users. </p>
<p><strong>Important:</strong> Although the recovery function is given a <code>db</code> handle, you should be careful to avoid creating a cycle from within recovery or invoking queries that may be participating in the current cycle. Attempting to do so can result in inconsistent results.</p>
<h2 id="figuring-out-why-recovery-did-not-work"><a class="header" href="#figuring-out-why-recovery-did-not-work">Figuring out why recovery did not work</a></h2>
<p>If a cycle occurs and <em>some</em> of the participant queries have <code>#[salsa::recover]</code> annotations and others do not, then the query will be treated as irrecoverable and will simply panic. You can use the <code>Cycle::unexpected_participants</code> method to figure out why recovery did not succeed and add the appropriate <code>#[salsa::recover]</code> annotations.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="how-salsa-works"><a class="header" href="#how-salsa-works">How Salsa works</a></h1>
<h2 id="video-available"><a class="header" href="#video-available">Video available</a></h2>
<p>To get the most complete introduction to Salsa's inner works, check
out <a href="https://youtu.be/_muY4HjSqVw">the &quot;How Salsa Works&quot; video</a>.  If
you'd like a deeper dive, <a href="https://www.youtube.com/watch?v=i_IhACacPRY">the &quot;Salsa in more depth&quot;
video</a> digs into the
details of the incremental algorithm.</p>
<blockquote>
<p>If you're in China, watch videos on <a href="https://www.bilibili.com/video/BV1Df4y1A7t3/">&quot;How Salsa Works&quot;</a>, <a href="https://www.bilibili.com/video/BV1AM4y1G7E4/">&quot;Salsa In More Depth&quot;</a>.</p>
</blockquote>
<h2 id="key-idea"><a class="header" href="#key-idea">Key idea</a></h2>
<p>The key idea of <code>salsa</code> is that you define your program as a set of
<strong>queries</strong>. Every query is used like function <code>K -&gt; V</code> that maps from
some key of type <code>K</code> to a value of type <code>V</code>. Queries come in two basic
varieties:</p>
<ul>
<li><strong>Inputs</strong>: the base inputs to your system. You can change these
whenever you like.</li>
<li><strong>Functions</strong>: pure functions (no side effects) that transform your
inputs into other values. The results of queries is memoized to
avoid recomputing them a lot. When you make changes to the inputs,
we'll figure out (fairly intelligently) when we can re-use these
memoized values and when we have to recompute them.</li>
</ul>
<h2 id="how-to-use-salsa-in-three-easy-steps"><a class="header" href="#how-to-use-salsa-in-three-easy-steps">How to use Salsa in three easy steps</a></h2>
<p>Using salsa is as easy as 1, 2, 3...</p>
<ol>
<li>Define one or more <strong>query groups</strong> that contain the inputs
and queries you will need. We'll start with one such group, but
later on you can use more than one to break up your system into
components (or spread your code across crates).</li>
<li>Define the <strong>query functions</strong> where appropriate.</li>
<li>Define the <strong>database</strong>, which contains the storage for all
the inputs/queries you will be using. The query struct will contain
the storage for all of the inputs/queries and may also contain
anything else that your code needs (e.g., configuration data).</li>
</ol>
<p>To see an example of this in action, check out <a href="https://github.com/salsa-rs/salsa/blob/master/examples/hello_world/main.rs">the <code>hello_world</code>
example</a>, which has a number of comments explaining how
things work.</p>
<h2 id="digging-into-the-plumbing"><a class="header" href="#digging-into-the-plumbing">Digging into the plumbing</a></h2>
<p>Check out the <a href="plumbing.html">plumbing</a> chapter to see a deeper explanation of the
code that salsa generates and how it connects to the salsa library.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="videos"><a class="header" href="#videos">Videos</a></h1>
<p>There are currently two videos about Salsa available, but they describe an older version of Salsa and so they are rather outdated:</p>
<ul>
<li><a href="https://youtu.be/_muY4HjSqVw">How Salsa Works</a>, which gives a
high-level introduction to the key concepts involved and shows how
to use salsa;</li>
<li><a href="https://www.youtube.com/watch?v=i_IhACacPRY">Salsa In More Depth</a>,
which digs into the incremental algorithm and explains -- at a
high-level -- how Salsa is implemented.</li>
</ul>
<blockquote>
<p>If you're in China, watch videos on <a href="https://www.bilibili.com/video/BV1Df4y1A7t3/">How Salsa Works</a>, <a href="https://www.bilibili.com/video/BV1AM4y1G7E4/">Salsa In More Depth</a>.</p>
</blockquote>
<div style="break-before: page; page-break-before: always;"></div><h1 id="plumbing"><a class="header" href="#plumbing">Plumbing</a></h1>
<blockquote>
<p>⚠️ <strong>IN-PROGRESS VERSION OF SALSA.</strong> ⚠️</p>
<p>This page describes the unreleased &quot;Salsa 2022&quot; version, which is a major departure from older versions of salsa. The code here works but is only available on github and from the <code>salsa-2022</code> crate.</p>
</blockquote>
<p>This chapter documents the code that salsa generates and its &quot;inner workings&quot;.
We refer to this as the &quot;plumbing&quot;.</p>
<h2 id="overview"><a class="header" href="#overview">Overview</a></h2>
<p>The plumbing section is broken up into chapters:</p>
<ul>
<li>The <a href="./plumbing/jars_and_ingredients.html">jars and ingredients</a> covers how each salsa item (like a tracked function) specifies what data it needs and runtime, and how links between items work.</li>
<li>The <a href="./plumbing/database_and_runtime.html">database and runtime</a> covers the data structures that are used at runtime to coordinate workers, trigger cancellation, track which functions are active and what dependencies they have accrued, and so forth.</li>
<li>The <a href="./plumbing/query_ops.html">query operations</a> chapter describes how the major operations on function ingredients work. This text was written for an older version of salsa but the logic is the same:
<ul>
<li>The <a href="./plumbing/maybe_changed_after.html">maybe changed after</a> operation determines when a memoized value for a tracked function is out of date.</li>
<li>The <a href="./plumbing/fetch.html">fetch</a> operation computes the most recent value.</li>
<li>The <a href="./plumbing/derived_flowchart.html">derived queries flowchart</a> depicts the logic in flowchart form.</li>
<li>The <a href="./plumbing/cycles.html">cycle handling</a> handling chapter describes what happens when cycles occur.</li>
</ul>
</li>
<li>The <a href="./plumbing/terminology.html">terminology</a> section describes various words that appear throughout.</li>
</ul>
<div style="break-before: page; page-break-before: always;"></div><h1 id="jars-and-ingredients"><a class="header" href="#jars-and-ingredients">Jars and ingredients</a></h1>
<blockquote>
<p>⚠️ <strong>IN-PROGRESS VERSION OF SALSA.</strong> ⚠️</p>
<p>This page describes the unreleased &quot;Salsa 2022&quot; version, which is a major departure from older versions of salsa. The code here works but is only available on github and from the <code>salsa-2022</code> crate.</p>
</blockquote>
<p>This page covers how data is organized in salsa and how links between salsa items (e.g., dependency tracking) works.</p>
<h2 id="salsa-items-and-ingredients"><a class="header" href="#salsa-items-and-ingredients">Salsa items and ingredients</a></h2>
<p>A <strong>salsa item</strong> is some item annotated with a salsa annotation that can be included in a jar.
For example, a tracked function is a salsa item:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::tracked]
fn foo(db: &amp;dyn Db, input: MyInput) { }
<span class="boring">}
</span></code></pre></pre>
<p>...and so is a salsa input...</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::input]
struct MyInput { }
<span class="boring">}
</span></code></pre></pre>
<p>...or a tracked struct:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::tracked]
struct MyStruct { }
<span class="boring">}
</span></code></pre></pre>
<p>Each salsa item needs certain bits of data at runtime to operate.
These bits of data are called <strong>ingredients</strong>.
Most salsa items generate a single ingredient, but sometimes they make more than one.
For example, a tracked function generates a <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/function.rs#L42"><code>FunctionIngredient</code></a>.
A tracked struct however generates several ingredients, one for the struct itself (a <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/tracked_struct.rs#L18"><code>TrackedStructIngredient</code></a>,
and one <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/function.rs#L42"><code>FunctionIngredient</code></a> for each value field.</p>
<h3 id="ingredients-define-the-core-logic-of-salsa"><a class="header" href="#ingredients-define-the-core-logic-of-salsa">Ingredients define the core logic of salsa</a></h3>
<p>Most of the interesting salsa code lives in these ingredients.
For example, when you create a new tracked struct, the method <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/tracked_struct.rs#L76"><code>TrackedStruct::new_struct</code></a> is invoked;
it is responsible for determining the tracked struct's id.
Similarly, when you call a tracked function, that is translated into a call to <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/function/fetch.rs#L15"><code>TrackedFunction::fetch</code></a>,
which decides whether there is a valid memoized value to return,
or whether the function must be executed.</p>
<h3 id="ingredient-interfaces-are-not-stable-or-subject-to-semver"><a class="header" href="#ingredient-interfaces-are-not-stable-or-subject-to-semver">Ingredient interfaces are not stable or subject to semver</a></h3>
<p>Interfaces are not meant to be directly used by salsa users.
The salsa macros generate code that invokes the ingredients.
The APIs may change in arbitrary ways across salsa versions,
as the macros are kept in sync.</p>
<h3 id="the-ingredient-trait"><a class="header" href="#the-ingredient-trait">The <code>Ingredient</code> trait</a></h3>
<p>Each ingredient implements the <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/ingredient.rs#L15"><code>Ingredient&lt;DB&gt;</code></a> trait, which defines generic operations supported by any kind of ingredient.
For example, the method <code>maybe_changed_after</code> can be used to check whether some particular piece of data stored in the ingredient may have changed since a given revision:</p>
<p>We'll see below that each database <code>DB</code> is able to take an <code>IngredientIndex</code> and use that to get a <code>&amp;dyn Ingredient&lt;DB&gt;</code> for the corresponding ingredient.
This allows the database to perform generic operations on a numbered ingredient without knowing exactly what the type of that ingredient is.</p>
<h3 id="jars-are-a-collection-of-ingredients"><a class="header" href="#jars-are-a-collection-of-ingredients">Jars are a collection of ingredients</a></h3>
<p>When you declare a salsa jar, you list out each of the salsa items that are included in that jar:</p>
<pre><code class="language-rust ignore">#[salsa::jar]
struct Jar(
    foo,
    MyInput,
    MyStruct
);
</code></pre>
<p>This expands to a struct like so:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>struct Jar(
    &lt;foo as IngredientsFor&gt;::Ingredient,
    &lt;MyInput as IngredientsFor&gt;::Ingredient,
    &lt;MyStruct as IngredientsFor&gt;::Ingredient,
)
<span class="boring">}
</span></code></pre></pre>
<p>The <code>IngredientsFor</code> trait is used to define the ingredients needed by some salsa item, such as the tracked function <code>foo</code>
or the tracked struct <code>MyInput</code>.
Each salsa item defines a type <code>I</code>, so that <code>&lt;I as IngredientsFor&gt;::Ingredient</code> gives the ingredients needed by <code>I</code>.</p>
<h3 id="database-is-a-tuple-of-jars"><a class="header" href="#database-is-a-tuple-of-jars">Database is a tuple of jars</a></h3>
<p>Salsa's database storage ultimately boils down to a tuple of jar structs,
where each jar struct (as we just saw) itself contains the ingredients
for the salsa items within that jar.
The database can thus be thought of as a list of ingredients,
although that list is organized into a 2-level hierarchy.</p>
<p>The reason for this 2-level hierarchy is that it permits separate compilation and privacy.
The crate that lists the jars doens't have to know the contents of the jar to embed the jar struct in the database.
And some of the types that appear in the jar may be private to another struct.</p>
<h3 id="the-hasjars-trait-and-the-jars-type"><a class="header" href="#the-hasjars-trait-and-the-jars-type">The HasJars trait and the Jars type</a></h3>
<p>Each salsa database implements the <code>HasJars</code> trait,
generated by the <code>salsa::db</code> procedural macro.
The <code>HarJars</code> trait, among other things, defines a <code>Jars</code> associated type that maps to a tuple of the jars in the trait.</p>
<p>For example, given a database like this...</p>
<pre><code class="language-rust ignore">#[salsa::db(Jar1, ..., JarN)]
struct MyDatabase {
    storage: salsa::Storage&lt;Self&gt;
}
</code></pre>
<p>...the <code>salsa::db</code> macro would generate a <code>HasJars</code> impl that (among other things) contains <code>type Jars = (Jar1, ..., JarN)</code>:</p>
<pre><code class="language-rust ignore">        impl salsa::storage::HasJars for #db {
            type Jars = (#(#jar_paths,)*);
</code></pre>
<p>In turn, the <code>salsa::Storage&lt;DB&gt;</code> type ultimately contains a struct <code>Shared</code> that embeds <code>DB::Jars</code>, thus embedding all the data for each jar.</p>
<h3 id="ingredient-indices"><a class="header" href="#ingredient-indices">Ingredient indices</a></h3>
<p>During initialization, each ingredient in the database is assigned a unique index called the <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/routes.rs#L5-L9"><code>IngredientIndex</code></a>.
This is a 32-bit number that identifies a particular ingredient from a particular jar.</p>
<h3 id="routes"><a class="header" href="#routes">Routes</a></h3>
<p>In addition to an index, each ingredient in the database also has a corresponding <em>route</em>.
A route is a closure that, given a reference to the <code>DB::Jars</code> tuple,
returns a <code>&amp;dyn Ingredient&lt;DB&gt;</code> reference.
The route table allows us to go from the <code>IngredientIndex</code> for a particular ingredient
to its <code>&amp;dyn Ingredient&lt;DB&gt;</code> trait object.
The route table is created while the database is being initialized,
as described shortly.</p>
<h3 id="database-keys-and-dependency-keys"><a class="header" href="#database-keys-and-dependency-keys">Database keys and dependency keys</a></h3>
<p>A <code>DatabaseKeyIndex</code> identifies a specific value stored in some specific ingredient.
It combines an <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/routes.rs#L5-L9"><code>IngredientIndex</code></a> with a <code>key_index</code>, which is a <code>salsa::Id</code>:</p>
<pre><code class="language-rust ignore">/// An &quot;active&quot; database key index represents a database key index
/// that is actively executing. In that case, the `key_index` cannot be
/// None.
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub struct DatabaseKeyIndex {
    pub(crate) ingredient_index: IngredientIndex,
    pub(crate) key_index: Id,
}
</code></pre>
<p>A <code>DependencyIndex</code> is similar, but the <code>key_index</code> is optional.
This is used when we sometimes wish to refer to the ingredient as a whole, and not any specific value within the ingredient.</p>
<p>These kinds of indices are used to store connetions between ingredients.
For example, each memoized value has to track its inputs.
Those inputs are stored as dependency indices.
We can then do things like ask, &quot;did this input change since revision R?&quot; by</p>
<ul>
<li>using the ingredient index to find the route and get a <code>&amp;dyn Ingredient&lt;DB&gt;</code></li>
<li>and then invoking the <code>maybe_changed_since</code> method on that trait object.</li>
</ul>
<h3 id="hasjarsdyn"><a class="header" href="#hasjarsdyn">HasJarsDyn</a></h3>
<p>There is one catch in the above setup.
We need the database to be dyn-safe, and we also need to be able to define the database trait and so forth without knowing the final database type to enable separate compilation.
Traits like <code>Ingredient&lt;DB&gt;</code> require knowing the full <code>DB</code> type.
If we had one function ingredient directly invoke a method on <code>Ingredient&lt;DB&gt;</code>, that would imply that it has to be fully generic and only instantiated at the final crate, when the full database type is available.</p>
<p>We solve this via the <code>HasJarsDyn</code> trait. The <code>HasJarsDyn</code> trait exports method that combine the &quot;find ingredient, invoking method&quot; steps into one method:</p>
<pre><code class="language-rust ignore">/// Dyn friendly subset of HasJars
pub trait HasJarsDyn {
    fn runtime(&amp;self) -&gt; &amp;Runtime;

    fn maybe_changed_after(&amp;self, input: DependencyIndex, revision: Revision) -&gt; bool;

    fn cycle_recovery_strategy(&amp;self, input: IngredientIndex) -&gt; CycleRecoveryStrategy;

    fn origin(&amp;self, input: DatabaseKeyIndex) -&gt; Option&lt;QueryOrigin&gt;;

    fn mark_validated_output(&amp;self, executor: DatabaseKeyIndex, output: DependencyIndex);

    /// Invoked when `executor` used to output `stale_output` but no longer does.
    /// This method routes that into a call to the [`remove_stale_output`](`crate::ingredient::Ingredient::remove_stale_output`)
    /// method on the ingredient for `stale_output`.
    fn remove_stale_output(&amp;self, executor: DatabaseKeyIndex, stale_output: DependencyIndex);

    /// Informs `ingredient` that the salsa struct with id `id` has been deleted.
    /// This means that `id` will not be used in this revision and hence
    /// any memoized values keyed by that struct can be discarded.
    ///
    /// In order to receive this callback, `ingredient` must have registered itself
    /// as a dependent function using
    /// [`SalsaStructInDb::register_dependent_fn`](`crate::salsa_struct::SalsaStructInDb::register_dependent_fn`).
    fn salsa_struct_deleted(&amp;self, ingredient: IngredientIndex, id: Id);
}
</code></pre>
<p>So, technically, to check if an input has changed, an ingredient:</p>
<ul>
<li>Invokes <code>HasJarsDyn::maybe_changed_after</code> on the <code>dyn Database</code></li>
<li>The impl for this method (generated by <code>#[salsa::db]</code>):
<ul>
<li>gets the route for the ingredient from the ingredient index</li>
<li>uses the route to get a <code>&amp;dyn Ingredient</code></li>
<li>invokes <code>maybe_changed_after</code> on that ingredient</li>
</ul>
</li>
</ul>
<h3 id="initializing-the-database"><a class="header" href="#initializing-the-database">Initializing the database</a></h3>
<p>The last thing to dicsuss is how the database is initialized.
The <code>Default</code> implementation for <code>Storage&lt;DB&gt;</code> does the work:</p>
<pre><code class="language-rust ignore">impl&lt;DB&gt; Default for Storage&lt;DB&gt;
where
    DB: HasJars,
{
    fn default() -&gt; Self {
        let mut routes = Routes::new();
        let jars = DB::create_jars(&amp;mut routes);
        Self {
            shared: Arc::new(Shared {
                jars,
                cvar: Default::default(),
            }),
            routes: Arc::new(routes),
            runtime: Runtime::default(),
        }
    }
}
</code></pre>
<p>First, it creates an empty <code>Routes</code> instance.
Then it invokes the <code>DB::create_jars</code> method.
The implementation of this method is defined by the <code>#[salsa::db]</code> macro; it simply invokes the <code>Jar::create_jar</code> method on each of the jars:</p>
<pre><code class="language-rust ignore">            fn create_jars(routes: &amp;mut salsa::routes::Routes&lt;Self&gt;) -&gt; Self::Jars {
                (
<span class="boring">                    (
</span>                        &lt;#jar_paths as salsa::jar::Jar&gt;::create_jar(routes),
                    )*
                )
            }
</code></pre>
<p>This implementation for <code>create_jar</code> is geneated by the <code>#[salsa::jar]</code> macro, and simply walks over the representative type for each salsa item and ask <em>it</em> to create its ingredients</p>
<pre><code class="language-rust ignore">    quote! {
        impl&lt;'salsa_db&gt; salsa::jar::Jar&lt;'salsa_db&gt; for #jar_struct {
            type DynDb = dyn #jar_trait + 'salsa_db;

            fn create_jar&lt;DB&gt;(routes: &amp;mut salsa::routes::Routes&lt;DB&gt;) -&gt; Self
            where
                DB: salsa::storage::JarFromJars&lt;Self&gt; + salsa::storage::DbWithJar&lt;Self&gt;,
            {
<span class="boring">                (
</span>                    let #field_var_names = &lt;#field_tys as salsa::storage::IngredientsFor&gt;::create_ingredients(routes);
                )*
                Self(#(#field_var_names),*)
            }
        }
    }
</code></pre>
<p>The code to create the ingredients for any particular item is generated by their associated macros (e.g., <code>#[salsa::tracked]</code>, <code>#[salsa::input]</code>), but it always follows a particular structure.
To create an ingredient, we first invoke <code>Routes::push</code> which creates the routes to that ingredient and assigns it an <code>IngredientIndex</code>.
We can then invoke (e.g.) <code>FunctionIngredient::new</code> to create the structure.
The <em>routes</em> to an ingredient are defined as closures that, given the <code>DB::Jars</code>, can find the data for a particular ingredient.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="database-and-runtime"><a class="header" href="#database-and-runtime">Database and runtime</a></h1>
<p>A salsa database struct is declared by the user with the <code>#[salsa::db]</code> annotation.
It contains all the data that the program needs to execute:</p>
<pre><code class="language-rust ignore">#[salsa::db(jar0...jarn)]
struct MyDatabase {
    storage: Storage&lt;Self&gt;,
    maybe_other_fields: u32,
}
</code></pre>
<p>This data is divided into two categories:</p>
<ul>
<li>Salsa-governed storage, contained in the <code>Storage&lt;Self&gt;</code> field. This data is mandatory.</li>
<li>Other fields (like <code>maybe_other_fields</code>) defined by the user. This can be anything. This allows for you to give access to special resources or whatever.</li>
</ul>
<h2 id="parallel-handles"><a class="header" href="#parallel-handles">Parallel handles</a></h2>
<p>When used across parallel threads, the database type defined by the user must support a &quot;snapshot&quot; operation.
This snapshot should create a clone of the database that can be used by the parallel threads.
The <code>Storage</code> operation itself supports <code>snapshot</code>.
The <code>Snapshot</code> method returns a <code>Snapshot&lt;DB&gt;</code> type, which prevents these clones from being accessed via an <code>&amp;mut</code> reference.</p>
<h2 id="the-storage-struct"><a class="header" href="#the-storage-struct">The Storage struct</a></h2>
<p>The salsa <code>Storage</code> struct contains all the data that salsa itself will use and work with.
There are three key bits of data:</p>
<ul>
<li>The <code>Shared</code> struct, which contains the data stored across all snapshots. This is primarily the ingredients described in the <a href="plumbing/./jars_and_ingredients.html">jars and ingredients chapter</a>, but it also contains some synchronization information (a cond var). This is used for cancellation, as described below.
<ul>
<li>The data in the <code>Shared</code> struct is only shared across threads when other threads are active. Some operations, like mutating an input, require an <code>&amp;mut</code> handle to the <code>Shared</code> struct. This is obtained by using the <code>Arc::get_mut</code> methods; obviously this is only possible when all snapshots and threads have ceased executing, since there must be a single handle to the <code>Arc</code>.</li>
</ul>
</li>
<li>The <code>Routes</code> struct, which contains the information to find any particular ingredient -- this is also shared across all handles, and its construction is also described in the <a href="plumbing/./jars_and_ingredients.html">jars and ingredients chapter</a>. The routes are separated out from the <code>Shared</code> struct because they are truly immutable at all times, and we want to be able to hold a handle to them while getting <code>&amp;mut</code> access to the <code>Shared</code> struct.</li>
<li>The <code>Runtime</code> struct, which is specific to a particular database instance. It contains the data for a single active thread, along with some links to shraed data of its own.</li>
</ul>
<h2 id="incrementing-the-revision-counter-and-getting-mutable-access-to-the-jars"><a class="header" href="#incrementing-the-revision-counter-and-getting-mutable-access-to-the-jars">Incrementing the revision counter and getting mutable access to the jars</a></h2>
<p>Salsa's general model is that there is a single &quot;master&quot; copy of the database and, potentially, multiple snapshots.
The snapshots are not directly owned, they are instead enclosed in a <code>Snapshot&lt;DB&gt;</code> type that permits only <code>&amp;</code>-deref,
and so the only database that can be accessed with an <code>&amp;mut</code>-ref is the master database.
Each of the snapshots however onlys another handle on the <code>Arc</code> in <code>Storage</code> that stores the ingredients.</p>
<p>Whenever the user attempts to do an <code>&amp;mut</code>-operation, such as modifying an input field, that needs to
first cancel any parallel snapshots and wait for those parallel threads to finish.
Once the snapshots have completed, we can use <code>Arc::get_mut</code> to get an <code>&amp;mut</code> reference to the ingredient data.
This allows us to get <code>&amp;mut</code> access without any unsafe code and
guarantees that we have successfully managed to cancel the other worker threads
(or gotten ourselves into a deadlock).</p>
<p>The code to acquire <code>&amp;mut</code> access to the database is the <code>jars_mut</code> method:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>    /// Gets mutable access to the jars. This will trigger a new revision
    /// and it will also cancel any ongoing work in the current revision.
    /// Any actual writes that occur to data in a jar should use
    /// [`Runtime::report_tracked_write`].
    pub fn jars_mut(&amp;mut self) -&gt; (&amp;mut DB::Jars, &amp;mut Runtime) {
        // Wait for all snapshots to be dropped.
        self.cancel_other_workers();

        // Increment revision counter.
        self.runtime.new_revision();

        // Acquire `&amp;mut` access to `self.shared` -- this is only possible because
        // the snapshots have all been dropped, so we hold the only handle to the `Arc`.
        let shared = Arc::get_mut(&amp;mut self.shared).unwrap();

        // Inform other ingredients that a new revision has begun.
        // This gives them a chance to free resources that were being held until the next revision.
        let routes = self.routes.clone();
        for route in routes.reset_routes() {
            route(&amp;mut shared.jars).reset_for_new_revision();
        }

        // Return mut ref to jars + runtime.
        (&amp;mut shared.jars, &amp;mut self.runtime)
    }
<span class="boring">}
</span></code></pre></pre>
<p>The key initial point is that it invokes <code>cancel_other_workers</code> before proceeding:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>    /// Sets cancellation flag and blocks until all other workers with access
    /// to this storage have completed.
    ///
    /// This could deadlock if there is a single worker with two handles to the
    /// same database!
    fn cancel_other_workers(&amp;mut self) {
        loop {
            self.runtime.set_cancellation_flag();

            // If we have unique access to the jars, we are done.
            if Arc::get_mut(&amp;mut self.shared).is_some() {
                return;
            }

            // Otherwise, wait until some other storage entites have dropped.
            // We create a mutex here because the cvar api requires it, but we
            // don't really need one as the data being protected is actually
            // the jars above.
            //
            // The cvar `self.shared.cvar` is notified by the `Drop` impl.
            let mutex = parking_lot::Mutex::new(());
            let mut guard = mutex.lock();
            self.shared.cvar.wait(&amp;mut guard);
        }
    }
<span class="boring">}
</span></code></pre></pre>
<h2 id="the-salsa-runtime"><a class="header" href="#the-salsa-runtime">The Salsa runtime</a></h2>
<p>The salsa runtime offers helper methods that are accessed by the ingredients.
It tracks, for example, the active query stack, and contains methods for adding dependencies between queries (e.g., <code>report_tracked_read</code>) or <a href="plumbing/./cycles.html">resolving cycles</a>.
It also tracks the current revision and information about when values with low or high durability last changed.</p>
<p>Basically, the ingredient structures store the &quot;data at rest&quot; -- like memoized values -- and things that are &quot;per ingredient&quot;.</p>
<p>The runtime stores the &quot;active, in-progress&quot; data, such as which queries are on the stack, and/or the dependencies accessed by the currently active query.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="query-operations"><a class="header" href="#query-operations">Query operations</a></h1>
<p>Each of the query storage struct implements the <code>QueryStorageOps</code> trait found in the <a href="https://github.com/salsa-rs/salsa/blob/master/src/plumbing.rs"><code>plumbing</code></a> module:</p>
<pre><code class="language-rust no_run noplayground">pub trait QueryStorageOps&lt;Q&gt;
where
    Self: QueryStorageMassOps,
    Q: Query,
{
</code></pre>
<p>which defines the basic operations that all queries support. The most important are these two:</p>
<ul>
<li><a href="plumbing/./maybe_changed_after.html">maybe changed after</a>: Returns true if the value of the query (for the given key) may have changed since the given revision.</li>
<li><a href="plumbing/./fetch.html">Fetch</a>: Returms the up-to-date value for the given K (or an error in the case of an &quot;unrecovered&quot; cycle).</li>
</ul>
<div style="break-before: page; page-break-before: always;"></div><h1 id="maybe-changed-after"><a class="header" href="#maybe-changed-after">Maybe changed after</a></h1>
<pre><code class="language-rust no_run noplayground">    /// True if the value of `input`, which must be from this query, may have
    /// changed after the given revision ended.
    ///
    /// This function should only be invoked with a revision less than the current
    /// revision.
    fn maybe_changed_after(
        &amp;self,
        db: &amp;&lt;Q as QueryDb&lt;'_&gt;&gt;::DynDb,
        input: DatabaseKeyIndex,
        revision: Revision,
    ) -&gt; bool;
</code></pre>
<p>The <code>maybe_changed_after</code> operation computes whether a query's value <em>may have changed</em> <strong>after</strong> the given revision. In other words, <code>Q.maybe_change_since(R)</code> is true if the value of the query <code>Q</code> may have changed in the revisions <code>(R+1)..R_now</code>, where <code>R_now</code> is the current revision. Note that it doesn't make sense to ask <code>maybe_changed_after(R_now)</code>.</p>
<h2 id="input-queries"><a class="header" href="#input-queries">Input queries</a></h2>
<p>Input queries are set explicitly by the user. <code>maybe_changed_after</code> can therefore just check when the value was last set and compare.</p>
<h2 id="interned-queries"><a class="header" href="#interned-queries">Interned queries</a></h2>
<h2 id="derived-queries"><a class="header" href="#derived-queries">Derived queries</a></h2>
<p>The logic for derived queries is more complex. We summarize the high-level ideas here, but you may find the <a href="plumbing/./derived_flowchart.html">flowchart</a> useful to dig deeper. The <a href="plumbing/./terminology.html">terminology</a> section may also be useful; in some cases, we link to that section on the first usage of a word.</p>
<ul>
<li>If an existing <a href="plumbing/./terminology/memo.html">memo</a> is found, then we check if the memo was <a href="plumbing/./terminology/verified.html">verified</a> in the current <a href="plumbing/./terminology/revision.html">revision</a>. If so, we can compare its <a href="plumbing/./terminology/changed_at.html">changed at</a> revision and return true or false appropriately.</li>
<li>Otherwise, we must check whether <a href="plumbing/./terminology/dependency.html">dependencies</a> have been modified:
<ul>
<li>Let R be the revision in which the memo was last verified; we wish to know if any of the dependencies have changed since revision R.</li>
<li>First, we check the <a href="plumbing/./terminology/durability.html">durability</a>. For each memo, we track the minimum durability of the memo's dependencies. If the memo has durability D, and there have been no changes to an input with durability D since the last time the memo was verified, then we can consider the memo verified without any further work.</li>
<li>If the durability check is not sufficient, then we must check the dependencies individually. For this, we iterate over each dependency D and invoke the <a href="plumbing/./maybe_changed_after.html">maybe changed after</a> operation to check whether D has changed since the revision R.</li>
<li>If no dependency was modified:
<ul>
<li>We can mark the memo as verified and use its <a href="plumbing/./terminology/changed_at.html">changed at</a> revision to return true or false.</li>
</ul>
</li>
</ul>
</li>
<li>Assuming dependencies have been modified:
<ul>
<li>Then we execute the user's query function (same as in <a href="plumbing/./fetch.html">fetch</a>), which potentially <a href="plumbing/./terminology/backdate.html">backdates</a> the resulting value.</li>
<li>Compare the <a href="plumbing/./terminology/changed_at.html">changed at</a> revision in the resulting memo and return true or false.</li>
</ul>
</li>
</ul>
<div style="break-before: page; page-break-before: always;"></div><h1 id="fetch"><a class="header" href="#fetch">Fetch</a></h1>
<pre><code class="language-rust no_run noplayground">    /// Execute the query, returning the result (often, the result
    /// will be memoized).  This is the &quot;main method&quot; for
    /// queries.
    ///
    /// Returns `Err` in the event of a cycle, meaning that computing
    /// the value for this `key` is recursively attempting to fetch
    /// itself.
    fn fetch(&amp;self, db: &amp;&lt;Q as QueryDb&lt;'_&gt;&gt;::DynDb, key: &amp;Q::Key) -&gt; Q::Value;
</code></pre>
<p>The <code>fetch</code> operation computes the value of a query. It prefers to reuse memoized values when it can.</p>
<h2 id="input-queries-1"><a class="header" href="#input-queries-1">Input queries</a></h2>
<p>Input queries simply load the result from the table.</p>
<h2 id="interned-queries-1"><a class="header" href="#interned-queries-1">Interned queries</a></h2>
<p>Interned queries map the input into a hashmap to find an existing integer. If none is present, a new value is created.</p>
<h2 id="derived-queries-1"><a class="header" href="#derived-queries-1">Derived queries</a></h2>
<p>The logic for derived queries is more complex. We summarize the high-level ideas here, but you may find the <a href="plumbing/./derived_flowchart.html">flowchart</a> useful to dig deeper. The <a href="plumbing/./terminology.html">terminology</a> section may also be useful; in some cases, we link to that section on the first usage of a word.</p>
<ul>
<li>If an existing <a href="plumbing/./terminology/memo.html">memo</a> is found, then we check if the memo was <a href="plumbing/./terminology/verified.html">verified</a> in the current <a href="plumbing/./terminology/revision.html">revision</a>. If so, we can directly return the memoized value.</li>
<li>Otherwise, if the memo contains a memoized value, we must check whether <a href="plumbing/./terminology/dependency.html">dependencies</a> have been modified:
<ul>
<li>Let R be the revision in which the memo was last verified; we wish to know if any of the dependencies have changed since revision R.</li>
<li>First, we check the <a href="plumbing/./terminology/durability.html">durability</a>. For each memo, we track the minimum durability of the memo's dependencies. If the memo has durability D, and there have been no changes to an input with durability D since the last time the memo was verified, then we can consider the memo verified without any further work.</li>
<li>If the durability check is not sufficient, then we must check the dependencies individually. For this, we iterate over each dependency D and invoke the <a href="plumbing/./maybe_changed_after.html">maybe changed after</a> operation to check whether D has changed since the revision R.</li>
<li>If no dependency was modified:
<ul>
<li>We can mark the memo as verified and return its memoized value.</li>
</ul>
</li>
</ul>
</li>
<li>Assuming dependencies have been modified or the memo does not contain a memoized value:
<ul>
<li>Then we execute the user's query function.</li>
<li>Next, we compute the revision in which the memoized value last changed:
<ul>
<li><em>Backdate:</em> If there was a previous memoized value, and the new value is equal to that old value, then we can <em>backdate</em> the memo, which means to use the 'changed at' revision from before.
<ul>
<li>Thanks to backdating, it is possible for a dependency of the query to have changed in some revision R1 but for the <em>output</em> of the query to have changed in some revision R2 where R2 predates R1.</li>
</ul>
</li>
<li>Otherwise, we use the current revision.</li>
</ul>
</li>
<li>Construct a memo for the new value and return it.</li>
</ul>
</li>
</ul>
<div style="break-before: page; page-break-before: always;"></div><h1 id="derived-queries-flowchart"><a class="header" href="#derived-queries-flowchart">Derived queries flowchart</a></h1>
<p>Derived queries are by far the most complex. This flowchart documents the flow of the <a href="plumbing/./maybe_changed_after.html">maybe changed after</a> and <a href="plumbing/./fetch.html">fetch</a> operations. This flowchart can be edited on <a href="https://draw.io">draw.io</a>:</p>
<!-- The explicit div is there because, otherwise, the flowchart is unreadable when using "dark mode" -->
<div style="background-color:white;">
<p><img src="plumbing/../derived-query-read.drawio.svg" alt="Flowchart" /></p>
</div>
<div style="break-before: page; page-break-before: always;"></div><h1 id="cycles"><a class="header" href="#cycles">Cycles</a></h1>
<h2 id="cross-thread-blocking"><a class="header" href="#cross-thread-blocking">Cross-thread blocking</a></h2>
<p>The interface for blocking across threads now works as follows:</p>
<ul>
<li>When one thread <code>T1</code> wishes to block on a query <code>Q</code> being executed by another thread <code>T2</code>, it invokes <code>Runtime::try_block_on</code>. This will check for cycles. Assuming no cycle is detected, it will block <code>T1</code> until <code>T2</code> has completed with <code>Q</code>. At that point, <code>T1</code> reawakens. However, we don't know the result of executing <code>Q</code>, so <code>T1</code> now has to &quot;retry&quot;. Typically, this will result in successfully reading the cached value.</li>
<li>While <code>T1</code> is blocking, the runtime moves its query stack (a <code>Vec</code>) into the shared dependency graph data structure. When <code>T1</code> reawakens, it recovers ownership of its query stack before returning from <code>try_block_on</code>.</li>
</ul>
<h2 id="cycle-detection"><a class="header" href="#cycle-detection">Cycle detection</a></h2>
<p>When a thread <code>T1</code> attempts to execute a query <code>Q</code>, it will try to load the value for <code>Q</code> from the memoization tables. If it finds an <code>InProgress</code> marker, that indicates that <code>Q</code> is currently being computed. This indicates a potential cycle. <code>T1</code> will then try to block on the query <code>Q</code>:</p>
<ul>
<li>If <code>Q</code> is also being computed by <code>T1</code>, then there is a cycle.</li>
<li>Otherwise, if <code>Q</code> is being computed by some other thread <code>T2</code>, we have to check whether <code>T2</code> is (transitively) blocked on <code>T1</code>. If so, there is a cycle.</li>
</ul>
<p>These two cases are handled internally by the <code>Runtime::try_block_on</code> function. Detecting the intra-thread cycle case is easy; to detect cross-thread cycles, the runtime maintains a dependency DAG between threads (identified by <code>RuntimeId</code>). Before adding an edge <code>T1 -&gt; T2</code> (i.e., <code>T1</code> is blocked waiting for <code>T2</code>) into the DAG, it checks whether a path exists from <code>T2</code> to <code>T1</code>. If so, we have a cycle and the edge cannot be added (then the DAG would not longer be acyclic).</p>
<p>When a cycle is detected, the current thread <code>T1</code> has full access to the query stacks that are participating in the cycle. Consider: naturally, <code>T1</code> has access to its own stack. There is also a path <code>T2 -&gt; ... -&gt; Tn -&gt; T1</code> of blocked threads. Each of the blocked threads <code>T2 ..= Tn</code> will have moved their query stacks into the dependency graph, so those query stacks are available for inspection.</p>
<p>Using the available stacks, we can create a list of cycle participants <code>Q0 ... Qn</code> and store that into a <code>Cycle</code> struct. If none of the participants <code>Q0 ... Qn</code> have cycle recovery enabled, we panic with the <code>Cycle</code> struct, which will trigger all the queries on this thread to panic.</p>
<h2 id="cycle-recovery-via-fallback"><a class="header" href="#cycle-recovery-via-fallback">Cycle recovery via fallback</a></h2>
<p>If any of the cycle participants <code>Q0 ... Qn</code> has cycle recovery set, we recover from the cycle. To help explain how this works, we will use this example cycle which contains three threads. Beginning with the current query, the cycle participants are <code>QA3</code>, <code>QB2</code>, <code>QB3</code>, <code>QC2</code>, <code>QC3</code>, and <code>QA2</code>.</p>
<pre><code>        The cyclic
        edge we have
        failed to add.
          :
   A      :    B         C
          :
   QA1    v    QB1       QC1
┌► QA2    ┌──► QB2   ┌─► QC2
│  QA3 ───┘    QB3 ──┘   QC3 ───┐
│                               │
└───────────────────────────────┘
</code></pre>
<p>Recovery works in phases:</p>
<ul>
<li><strong>Analyze:</strong> As we enumerate the query participants, we collect their collective inputs (all queries invoked so far by any cycle participant) and the max changed-at and min duration. We then remove the cycle participants themselves from this list of inputs, leaving only the queries external to the cycle.</li>
<li><strong>Mark</strong>: For each query Q that is annotated with <code>#[salsa::recover]</code>, we mark it and all of its successors on the same thread by setting its <code>cycle</code> flag to the <code>c: Cycle</code> we constructed earlier; we also reset its inputs to the collective inputs gathering during analysis. If those queries resume execution later, those marks will trigger them to immediately unwind and use cycle recovery, and the inputs will be used as the inputs to the recovery value.
<ul>
<li>Note that we mark <em>all</em> the successors of Q on the same thread, whether or not they have recovery set. We'll discuss later how this is important in the case where the active thread (A, here) doesn't have any recovery set.</li>
</ul>
</li>
<li><strong>Unblock</strong>: Each blocked thread T that has a recovering query is forcibly reawoken; the outgoing edge from that thread to its successor in the cycle is removed. Its condvar is signalled with a <code>WaitResult::Cycle(c)</code>. When the thread reawakens, it will see that and start unwinding with the cycle <code>c</code>.</li>
<li><strong>Handle the current thread:</strong> Finally, we have to choose how to have the current thread proceed. If the current thread includes any cycle with recovery information, then we can begin unwinding. Otherwise, the current thread simply continues as if there had been no cycle, and so the cyclic edge is added to the graph and the current thread blocks. This is possible because some other thread had recovery information and therefore has been awoken.</li>
</ul>
<p>Let's walk through the process with a few examples.</p>
<h3 id="example-1-recovery-on-the-detecting-thread"><a class="header" href="#example-1-recovery-on-the-detecting-thread">Example 1: Recovery on the detecting thread</a></h3>
<p>Consider the case where only the query QA2 has recovery set. It and QA3 will be marked with their <code>cycle</code> flag set to <code>c: Cycle</code>. Threads B and C will not be unblocked, as they do not have any cycle recovery nodes. The current thread (Thread A) will initiate unwinding with the cycle <code>c</code> as the value. Unwinding will pass through QA3 and be caught by QA2. QA2 will substitute the recovery value and return normally. QA1 and QC3 will then complete normally and so forth, on up until all queries have completed.</p>
<h3 id="example-2-recovery-in-two-queries-on-the-detecting-thread"><a class="header" href="#example-2-recovery-in-two-queries-on-the-detecting-thread">Example 2: Recovery in two queries on the detecting thread</a></h3>
<p>Consider the case where both query QA2 and QA3 have recovery set. It proceeds the same Example 1 until the the current initiates unwinding, as described in Example 1. When QA3 receives the cycle, it stores its recovery value and completes normally. QA2 then adds QA3 as an input dependency: at that point, QA2 observes that it too has the cycle mark set, and so it initiates unwinding. The rest of QA2 therefore never executes. This unwinding is caught by QA2's entry point and it stores the recovery value and returns normally. QA1 and QC3 then continue normally, as they have not had their <code>cycle</code> flag set.</p>
<h3 id="example-3-recovery-on-another-thread"><a class="header" href="#example-3-recovery-on-another-thread">Example 3: Recovery on another thread</a></h3>
<p>Now consider the case where only the query QB2 has recovery set. It and QB3 will be marked with the cycle <code>c: Cycle</code> and thread B will be unblocked; the edge <code>QB3 -&gt; QC2</code> will be removed from the dependency graph. Thread A will then add an edge <code>QA3 -&gt; QB2</code> and block on thread B. At that point, thread A releases the lock on the dependency graph, and so thread B is re-awoken. It observes the <code>WaitResult::Cycle</code> and initiates unwinding. Unwinding proceeds through QB3 and into QB2, which recovers. QB1 is then able to execute normally, as is QA3, and execution proceeds from there.</p>
<h3 id="example-4-recovery-on-all-queries"><a class="header" href="#example-4-recovery-on-all-queries">Example 4: Recovery on all queries</a></h3>
<p>Now consider the case where all the queries have recovery set. In that case, they are all marked with the cycle, and all the cross-thread edges are removed from the graph. Each thread will independently awaken and initiate unwinding. Each query will recover.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="terminology"><a class="header" href="#terminology">Terminology</a></h1>
<div style="break-before: page; page-break-before: always;"></div><h1 id="backdate"><a class="header" href="#backdate">Backdate</a></h1>
<p><em>Backdating</em> is when we mark a value that was computed in revision R as having last changed in some earlier revision. This is done when we have an older <a href="plumbing/terminology/./memo.html">memo</a> M and we can compare the two values to see that, while the <a href="plumbing/terminology/./dependency.html">dependencies</a> to M may have changed, the result of the <a href="plumbing/terminology/./query_function.html">query function</a> did not.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="changed-at"><a class="header" href="#changed-at">Changed at</a></h1>
<p>The <em>changed at</em> revision for a <a href="plumbing/terminology/./memo.html">memo</a> is the <a href="plumbing/terminology/./revision.html">revision</a> in which that memo's value last changed. Typically, this is the same as the revision in which the <a href="plumbing/terminology/./query_function.html">query function</a> was last executed, but it may be an earlier revision if the memo was <a href="plumbing/terminology/./backdate.html">backdated</a>.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="dependency"><a class="header" href="#dependency">Dependency</a></h1>
<p>A <em>dependency</em> of a <a href="plumbing/terminology/./query.html">query</a> Q is some other query Q1 that was invoked as part of computing the value for Q (typically, invoking by Q's <a href="plumbing/terminology/./query_function.html">query function</a>).</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="derived-query"><a class="header" href="#derived-query">Derived query</a></h1>
<p>A <em>derived query</em> is a <a href="plumbing/terminology/./query.html">query</a> whose value is defined by the result of a user-provided <a href="plumbing/terminology/./query_function.html">query function</a>. That function is executed to get the result of the query. Unlike <a href="plumbing/terminology/./input_query.html">input queries</a>, the result of a derived queries can always be recomputed whenever needed simply by re-executing the function.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="durability"><a class="header" href="#durability">Durability</a></h1>
<p><em>Durability</em> is an optimization that we use to avoid checking the <a href="plumbing/terminology/./dependency.html">dependencies</a> of a <a href="plumbing/terminology/./query.html">query</a> individually. It was introduced in <a href="plumbing/terminology/../../rfcs/RFC0005-Durability.html">RFC #5</a>.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="input-query"><a class="header" href="#input-query">Input query</a></h1>
<p>An <em>input query</em> is a <a href="plumbing/terminology/./query.html">query</a> whose value is explicitly set by the user. When that value is set, a <a href="plumbing/terminology/./durability.html">durability</a> can also be provided.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="ingredient"><a class="header" href="#ingredient">Ingredient</a></h1>
<p>An <em>ingredient</em> is an individual piece of storage used to create a <a href="plumbing/terminology/./salsa_item.html">salsa item</a>
See the <a href="plumbing/terminology/../jars_and_ingredients.html">jars and ingredients</a> chapter for more details.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="lru"><a class="header" href="#lru">LRU</a></h1>
<p>the <a href="https://docs.rs/salsa/0.16.1/salsa/struct.QueryTableMut.html#method.set_lru_capacity"><code>set_lru_capacity</code></a> method can be used to fix the maximum capacity for a query at a specific number of values. If more values are added after that point, then salsa will drop the values from older <a href="plumbing/terminology/./memo.html">memos</a> to conserve memory (we always retain the <a href="plumbing/terminology/./dependency.html">dependency</a> information for those memos, however, so that we can still compute whether values may have changed, even if we don't know what that value is). The LRU mechanism was introduced in <a href="plumbing/terminology/../../rfcs/RFC0004-LRU.html">RFC #4</a>.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="memo"><a class="header" href="#memo">Memo</a></h1>
<p>A <em>memo</em> stores information about the last time that a <a href="plumbing/terminology/./query_function.html">query function</a> for some <a href="plumbing/terminology/./query.html">query</a> Q was executed:</p>
<ul>
<li>Typically, it contains the value that was returned from that function, so that we don't have to execute it again.
<ul>
<li>However, this is not always true: some queries don't cache their result values, and values can also be dropped as a result of <a href="plumbing/terminology/./LRU.html">LRU</a> collection. In those cases, the memo just stores <a href="plumbing/terminology/./dependency.html">dependency</a> information, which can still be useful to determine if other queries that have Q as a <a href="plumbing/terminology/./dependency.html">dependency</a> may have changed.</li>
</ul>
</li>
<li>The revision in which the memo last <a href="plumbing/terminology/./verified.html">verified</a>.</li>
<li>The <a href="plumbing/terminology/./changed_at.html">changed at</a> revision in which the memo's value last changed. (Note that it may be <a href="plumbing/terminology/./backdate.html">backdated</a>.)</li>
<li>The minimum durability of the memo's <a href="plumbing/terminology/./dependency.html">dependencies</a>.</li>
<li>The complete set of <a href="plumbing/terminology/./dependency.html">dependencies</a>, if available, or a marker that the memo has an <a href="plumbing/terminology/./untracked.html">untracked dependency</a>.</li>
</ul>
<div style="break-before: page; page-break-before: always;"></div><h1 id="query"><a class="header" href="#query">Query</a></h1>
<div style="break-before: page; page-break-before: always;"></div><h1 id="query-function"><a class="header" href="#query-function">Query function</a></h1>
<p>The <em>query function</em> is the user-provided function that we execute to compute the value of a <a href="plumbing/terminology/./derived_query.html">derived query</a>. Salsa assumed that all query functions are a 'pure' function of their <a href="plumbing/terminology/./dependency.html">dependencies</a> unless the user reports an <a href="plumbing/terminology/./untracked.html">untracked read</a>. Salsa always assumes that functions have no important side-effects (i.e., that they don't send messages over the network whose results you wish to observe) and thus that it doesn't have to re-execute functions unless it needs their return value.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="revision"><a class="header" href="#revision">Revision</a></h1>
<p>A <em>revision</em> is a monotonically increasing integer that we use to track the &quot;version&quot; of the database. Each time the value of an <a href="plumbing/terminology/./input_query.html">input query</a> is modified, we create a new revision.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="salsa-item"><a class="header" href="#salsa-item">Salsa item</a></h1>
<p>A salsa item is something that is decorated with a <code>#[salsa::foo]</code> macro, like a tracked function or struct.
See the <a href="plumbing/terminology/../jars_and_ingredients.html">jars and ingredients</a> chapter for more details.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="salsa-struct"><a class="header" href="#salsa-struct">Salsa struct</a></h1>
<p>A salsa struct is a struct decorated with one of the salsa macros:</p>
<ul>
<li><code>#[salsa::tracked]</code></li>
<li><code>#[salsa::input]</code></li>
<li><code>#[salsa::interned]</code></li>
</ul>
<p>See the <a href="plumbing/terminology/../../overview.html">salsa overview</a> for more details.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="untracked-dependency"><a class="header" href="#untracked-dependency">Untracked dependency</a></h1>
<p>An <em>untracked dependency</em> is an indication that the result of a <a href="plumbing/terminology/./derived_query.html">derived query</a> depends on something not visible to the salsa database. Untracked dependencies are created by invoking <a href="https://docs.rs/salsa/0.16.1/salsa/struct.Runtime.html#method.report_untracked_read"><code>report_untracked_read</code></a> or <a href="https://docs.rs/salsa/0.16.1/salsa/struct.Runtime.html#method.report_synthetic_read"><code>report_synthetic_read</code></a>. When an untracked dependency is present, <a href="plumbing/terminology/./derived_query.html">derived queries</a> are always re-executed if the durability check fails (see the description of the <a href="plumbing/terminology/../fetch.html#derived-queries">fetch operation</a> for more details).</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="verified"><a class="header" href="#verified">Verified</a></h1>
<p>A <a href="plumbing/terminology/./memo.html">memo</a> is <em>verified</em> in a revision R if we have checked that its value is still up-to-date (i.e., if we were to reexecute the <a href="plumbing/terminology/./query_function.html">query function</a>, we are guaranteed to get the same result). Each memo tracks the revision in which it was last verified to avoid repeatedly checking whether dependencies have changed during the <a href="plumbing/terminology/../fetch.html">fetch</a> and <a href="plumbing/terminology/../maybe_changed_after.html">maybe changed after</a> operations.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="rfcs"><a class="header" href="#rfcs">RFCs</a></h1>
<p>The Salsa RFC process is used to describe the motivations for major changes made
to Salsa. RFCs are recorded here in the Salsa book as a historical record of the
considerations that were raised at the time. Note that the contents of RFCs,
once merged, is typically not updated to match further changes. Instead, the
rest of the book is updated to include the RFC text and then kept up to
date as more PRs land and so forth.</p>
<h2 id="creating-an-rfc"><a class="header" href="#creating-an-rfc">Creating an RFC</a></h2>
<p>If you'd like to propose a major new Salsa feature, simply clone the repository
and create a new chapter under the list of RFCs based on the <a href="./rfcs/template.html">RFC template</a>.
Then open a PR with a subject line that starts with &quot;RFC:&quot;.</p>
<h2 id="rfc-vs-implementation"><a class="header" href="#rfc-vs-implementation">RFC vs Implementation</a></h2>
<p>The RFC can be in its own PR, or it can also includ work on the implementation
together, whatever works best for you.</p>
<h2 id="does-my-change-need-an-rfc"><a class="header" href="#does-my-change-need-an-rfc">Does my change need an RFC?</a></h2>
<p>Not all PRs require RFCs. RFCs are only needed for larger features or major
changes to how Salsa works. And they don't have to be super complicated, but
they should capture the most important reasons you would like to make the
change. When in doubt, it's ok to just open a PR, and we can always request an
RFC if we want one.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="descriptiontitle"><a class="header" href="#descriptiontitle">Description/title</a></h1>
<h2 id="metadata"><a class="header" href="#metadata">Metadata</a></h2>
<ul>
<li>Author: (Github username(s) or real names, as you prefer)</li>
<li>Date: (today's date)</li>
<li>Introduced in: https://github.com/salsa-rs/salsa/pull/1 (please update once you open your PR)</li>
</ul>
<h2 id="summary-1"><a class="header" href="#summary-1">Summary</a></h2>
<p>Summarize the effects of the RFC bullet point form.</p>
<h2 id="motivation"><a class="header" href="#motivation">Motivation</a></h2>
<p>Say something about your goals here.</p>
<h2 id="users-guide"><a class="header" href="#users-guide">User's guide</a></h2>
<p>Describe effects on end users here.</p>
<h2 id="reference-guide"><a class="header" href="#reference-guide">Reference guide</a></h2>
<p>Describe implementation details or other things here.</p>
<h2 id="frequently-asked-questions"><a class="header" href="#frequently-asked-questions">Frequently asked questions</a></h2>
<p>Use this section to add in design notes, downsides, rejected approaches, or other considerations.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="query-group-traits"><a class="header" href="#query-group-traits">Query group traits</a></h1>
<h2 id="metadata-1"><a class="header" href="#metadata-1">Metadata</a></h2>
<ul>
<li>Author: nikomatsakis</li>
<li>Date: 2019-01-15</li>
<li>Introduced in: https://github.com/salsa-rs/salsa-rfcs/pull/1</li>
</ul>
<h2 id="motivation-1"><a class="header" href="#motivation-1">Motivation</a></h2>
<ul>
<li>Support <code>dyn QueryGroup</code> for each query group trait as well as <code>impl QueryGroup</code>
<ul>
<li><code>dyn QueryGroup</code> will be much more convenient, at the cost of runtime efficiency</li>
</ul>
</li>
<li>Don't require you to redeclare each query in the final database, just the query groups</li>
</ul>
<h2 id="users-guide-1"><a class="header" href="#users-guide-1">User's guide</a></h2>
<h3 id="declaring-a-query-group"><a class="header" href="#declaring-a-query-group">Declaring a query group</a></h3>
<p>User's will declare query groups by decorating a trait with <code>salsa::query_group</code>:</p>
<pre><code class="language-rust ignore">#[salsa::query_group(MyGroupStorage)]
trait MyGroup {
  // Inputs are annotated with `#[salsa::input]`. For inputs, the final trait will include
  // a `set_my_input(&amp;mut self, key: K1, value: V1)` method automatically added,
  // as well as possibly other mutation methods.
  #[salsa::input]
  fn my_input(&amp;self, key: K1) -&gt; V1;

  // &quot;Derived&quot; queries are just a getter.
  fn my_query(&amp;self, key: K2) -&gt; V2;
}
</code></pre>
<p>The <code>query_group</code> attribute is a procedural macro. It takes as
argument the name of the <strong>storage struct</strong> for the query group --
this is a struct, generated by the macro, which represents the query
group as a whole. It is attached to a trait definition which defines the
individual queries in the query group.</p>
<p>The macro generates three things that users interact with:</p>
<ul>
<li>the trait, here named <code>MyGroup</code>. This will be used when writing the definitions
for the queries and other code that invokes them.</li>
<li>the storage struct, here named <code>MyGroupStorage</code>. This will be used later when
constructing the final database.</li>
<li>query structs, named after each query but converted to camel-case
and with the word query (e.g., <code>MyInputQuery</code> for <code>my_input</code>). These
types are rarely needed, but are presently useful for things like
invoking the GC. These types violate our rule that &quot;things the user
needs to name should be given names by the user&quot;, but we choose not
to fully resolve this question in this RFC.</li>
</ul>
<p>In addition, the macro generates a number of structs that users should
not have to be aware of. These are described in the &quot;reference guide&quot;
section.</p>
<h4 id="controlling-query-modes"><a class="header" href="#controlling-query-modes">Controlling query modes</a></h4>
<p>Input queries, as described in the trait, are specified via the
<code>#[salsa::input]</code> attribute.</p>
<p>Derived queries can be customized by the following attributes,
attached to the getter method (e.g., <code>fn my_query(..)</code>):</p>
<ul>
<li><code>#[salsa::invoke(foo::bar)]</code> specifies the path to the function to invoke
when the query is called (default is <code>my_query</code>).</li>
<li><code>#[salsa::volatile]</code> specifies a &quot;volatile&quot; query, which is assumed to
read untracked input and hence must be re-executed on every revision.</li>
<li><code>#[salsa::dependencies]</code> specifies a &quot;dependencies-only&quot; query, which is assumed to
read untracked input and hence must be re-executed on every revision.</li>
</ul>
<h3 id="creating-the-database"><a class="header" href="#creating-the-database">Creating the database</a></h3>
<p>Creating a salsa database works by using a <code>#[salsa::database(..)]</code>
attribute. The <code>..</code> content should be a list of paths leading to the
storage structs for each query group that the database will
implement. It is no longer necessary to list the individual
queries. In addition to the <code>salsa::database</code> query, the struct must
have access to a <code>salsa::Runtime</code> and implement the <code>salsa::Database</code>
trait. Hence the complete declaration looks roughly like so:</p>
<pre><code class="language-rust ignore">#[salsa::database(MyGroupStorage)]
struct MyDatabase {
  runtime: salsa::Runtime&lt;MyDatabase&gt;,
}

impl salsa::Database for MyDatabase {
  fn salsa_runtime(&amp;self) -&gt; salsa::Runtime&lt;MyDatabase&gt; {
    &amp;self.runtime
  }
}
</code></pre>
<p>This (procedural) macro generates various impls and types that cause
<code>MyDatabase</code> to implement all the traits for the query groups it
supports, and which customize the storage in the runtime to have all
the data needed. Users should not have to interact with these details,
and they are written out in the reference guide section.</p>
<h2 id="reference-guide-1"><a class="header" href="#reference-guide-1">Reference guide</a></h2>
<p>The goal here is not to give the <em>full</em> details of how to do the
lowering, but to describe the key concepts. Throughout the text, we
will refer to names (e.g., <code>MyGroup</code> or <code>MyGroupStorage</code>) that appear
in the example from the User's Guide -- this indicates that we use
whatever name the user provided.</p>
<h3 id="the-plumbingquerygroup-trait"><a class="header" href="#the-plumbingquerygroup-trait">The <code>plumbing::QueryGroup</code> trait</a></h3>
<p>The <code>QueryGroup</code> trait is a new trait added to the plumbing module. It
is implemented by the query group storage struct <code>MyGroupStorage</code>. Its
role is to link from that struct to the various bits of data that the
salsa runtime needs:</p>
<pre><code class="language-rust ignore">pub trait QueryGroup&lt;DB: Database&gt; {
    type GroupStorage;
    type GroupKey;
}
</code></pre>
<p>This trait is implemented by the <strong>storage struct</strong> (<code>MyGroupStorage</code>)
in our example. You can see there is a bit of confusing nameing going
on here -- what we call (for user's) the &quot;storage struct&quot; actually
does not wind up containing the true <em>storage</em> (that is, the hasmaps
and things salsa uses). Instead, it merely implements the <code>QueryGroup</code>
trait, which has associated types that lead us to structs we need:</p>
<ul>
<li>the <strong>group storage</strong> contains the hashmaps and things for all the queries in the group</li>
<li>the <strong>group key</strong> is an enum with variants for each of the
queries. It basically stores all the data needed to identify some
particular <em>query value</em> from within the group -- that is, the name
of the query, plus the keys used to invoke it.</li>
</ul>
<p>As described further on, the <code>#[salsa::query_group]</code> macro is
responsible will generate an impl of this trait for the
<code>MyGroupStorage</code> struct, along with the group storage and group key
type definitions.</p>
<h3 id="the-plumbinghasquerygroupg-trait"><a class="header" href="#the-plumbinghasquerygroupg-trait">The <code>plumbing::HasQueryGroup&lt;G&gt;</code> trait</a></h3>
<p>The <code>HasQueryGroup&lt;G&gt;</code> struct a new trait added to the plumbing
module. It is implemented by the database struct <code>MyDatabase</code> for
every query group that <code>MyDatabase</code> supports. Its role is to offer
methods that move back and forth between the context of the <em>full
database</em> to the context of an <em>individual query group</em>:</p>
<pre><code class="language-rust ignore">pub trait HasQueryGroup&lt;G&gt;: Database
where
    G: QueryGroup&lt;Self&gt;,
{
    /// Access the group storage struct from the database.
    fn group_storage(db: &amp;Self) -&gt; &amp;G::GroupStorage;

    /// &quot;Upcast&quot; a group key into a database key.
    fn database_key(group_key: G::GroupKey) -&gt; Self::DatabaseKey;
}
</code></pre>
<p>Here the &quot;database key&quot; is an enum that contains variants for each
group. Its role is to take group key and puts it into the context of
the entire database.</p>
<h3 id="the-query-trait"><a class="header" href="#the-query-trait">The <code>Query</code> trait</a></h3>
<p>The query trait (pre-existing) is extended to include links to its
group, and methods to convert from the group storage to the query
storage, plus methods to convert from a query key up to the group key:</p>
<pre><code class="language-rust ignore">pub trait Query&lt;DB: Database&gt;: Debug + Default + Sized + 'static {
    /// Type that you you give as a parameter -- for queries with zero
    /// or more than one input, this will be a tuple.
    type Key: Clone + Debug + Hash + Eq;

    /// What value does the query return?
    type Value: Clone + Debug;

    /// Internal struct storing the values for the query.
    type Storage: plumbing::QueryStorageOps&lt;DB, Self&gt; + Send + Sync;

    /// Associate query group struct.
    type Group: plumbing::QueryGroup&lt;
        DB,
        GroupStorage = Self::GroupStorage,
        GroupKey = Self::GroupKey,
    &gt;;

    /// Generated struct that contains storage for all queries in a group.
    type GroupStorage;

    /// Type that identifies a particular query within the group + its key.
    type GroupKey;

    /// Extact storage for this query from the storage for its group.
    fn query_storage(group_storage: &amp;Self::GroupStorage) -&gt; &amp;Self::Storage;

    /// Create group key for this query.
    fn group_key(key: Self::Key) -&gt; Self::GroupKey;
}
</code></pre>
<h3 id="converting-tofrom-the-context-of-the-full-database-generically"><a class="header" href="#converting-tofrom-the-context-of-the-full-database-generically">Converting to/from the context of the full database generically</a></h3>
<p>Putting all the previous plumbing traits together, this means
that given:</p>
<ul>
<li>a database <code>DB</code> that implements <code>HasGroupStorage&lt;G&gt;</code>;</li>
<li>a group struct <code>G</code> that implements <code>QueryGroup&lt;DB&gt;</code>; and,</li>
<li>and a query struct <code>Q</code> that implements <code>Query&lt;DB, Group = G&gt;</code></li>
</ul>
<p>we can (generically) get the storage for the individual query
<code>Q</code> out from the database <code>db</code> via a two-step process:</p>
<pre><code class="language-rust ignore">let group_storage = HasGroupStorage::group_storage(db);
let query_storage = Query::query_storage(group_storage);
</code></pre>
<p>Similarly, we can convert from the key to an individual query
up to the &quot;database key&quot; in a two-step process:</p>
<pre><code class="language-rust ignore">let group_key = Query::group_key(key);
let db_key = HasGroupStorage::database_key(group_key);
</code></pre>
<h3 id="lowering-query-groups"><a class="header" href="#lowering-query-groups">Lowering query groups</a></h3>
<p>The role of the <code>#[salsa::query_group(MyGroupStorage)] trait MyGroup { .. }</code> macro is primarily to generate the group storage struct and the
impl of <code>QueryGroup</code>.  That involves generating the following things:</p>
<ul>
<li>the query trait <code>MyGroup</code> itself, but with:
<ul>
<li><code>salsa::foo</code> attributes stripped</li>
<li><code>#[salsa::input]</code> methods expanded to include setters:
<ul>
<li><code>fn set_my_input(&amp;mut self, key: K1, value__: V1);</code></li>
<li><code>fn set_constant_my_input(&amp;mut self, key: K1, value__: V1);</code></li>
</ul>
</li>
</ul>
</li>
<li>the query group storage struct <code>MyGroupStorage</code>
<ul>
<li>We also generate an impl of <code>QueryGroup&lt;DB&gt;</code> for <code>MyGroupStorage</code>,
linking to the internal strorage struct and group key enum</li>
</ul>
</li>
<li>the individual query types
<ul>
<li>Ideally, we would use Rust hygiene to hide these struct, but as
that is not currently possible they are given names based on the
queries, but converted to camel-case (e.g., <code>MyInputQuery</code> and <code>MyQueryQuery</code>).</li>
<li>They implement the <code>salsa::Query</code> trait.</li>
</ul>
</li>
<li>the internal group storage struct
<ul>
<li>Ideally, we would use Rust hygiene to hide this struct, but as
that is not currently possible it is entitled
<code>MyGroupGroupStorage&lt;DB&gt;</code>. Note that it is generic with respect to
the database <code>DB</code>. This is because the actual query storage
requires sometimes storing database key's and hence we need to
know the final database type.</li>
<li>It contains one field per query with a link to the storage information
for that query:
<ul>
<li><code>my_query: &lt;MyQueryQuery as salsa::plumbing::Query&lt;DB&gt;&gt;::Storage</code></li>
<li>(the <code>MyQueryQuery</code> type is also generated, see the &quot;individual query types&quot; below)</li>
</ul>
</li>
<li>The internal group storage struct offers a public, inherent method
<code>for_each_query</code>:
<ul>
<li><code>fn for_each_query(db: &amp;DB, op: &amp;mut dyn FnMut(...)</code></li>
<li>this is invoked by the code geneated by <code>#[salsa::database]</code> when implementing the
<code>for_each_query</code> method of the <code>plumbing::DatabaseOps</code> trait</li>
</ul>
</li>
</ul>
</li>
<li>the group key
<ul>
<li>Again, ideally we would use hygiene to hide the name of this struct,
but since we cannot, it is entitled <code>MyGroupGroupKey</code></li>
<li>It is an enum which contains one variant per query with the value being the key:
<ul>
<li><code>my_query(&lt;MyQueryQuery as salsa::plumbing::Query&lt;DB&gt;&gt;::Key)</code></li>
</ul>
</li>
<li>The group key enum offers a public, inherent method <code>maybe_changed_after</code>:
<ul>
<li><code>fn maybe_changed_after&lt;DB&gt;(db: &amp;DB, db_descriptor: &amp;DB::DatabaseKey, revision: Revision)</code></li>
<li>it is invoked when implementing <code>maybe_changed_after</code> for the database key</li>
</ul>
</li>
</ul>
</li>
</ul>
<h3 id="lowering-database-storage"><a class="header" href="#lowering-database-storage">Lowering database storage</a></h3>
<p>The <code>#[salsa::database(MyGroup)]</code> attribute macro creates the links to the query groups.
It generates the following things:</p>
<ul>
<li>impl of <code>HasQueryGroup&lt;MyGroup&gt;</code> for <code>MyDatabase</code>
<ul>
<li>Naturally, there is one such impl for each query group.</li>
</ul>
</li>
<li>the database key enum
<ul>
<li>Ideally, we would use Rust hygiene to hide this enum, but currently
it is called <code>__SalsaDatabaseKey</code>.</li>
<li>The database key is an enum with one variant per query group:
<ul>
<li><code>MyGroupStorage(&lt;MyGroupStorage as QueryGroup&lt;MyDatabase&gt;&gt;::GroupKey)</code></li>
</ul>
</li>
</ul>
</li>
<li>the database storage struct
<ul>
<li>Ideally, we would use Rust hygiene to hide this enum, but currently
it is called <code>__SalsaDatabaseStorage</code>.</li>
<li>The database storage struct contains one field per query group, storing
its internal storage:
<ul>
<li><code>my_group_storage: &lt;MyGroupStorage as QueryGroup&lt;MyDatabase&gt;&gt;::GroupStorage</code></li>
</ul>
</li>
</ul>
</li>
<li>impl of <code>plumbing::DatabaseStorageTypes</code> for <code>MyDatabase</code>
<ul>
<li>This is a plumbing trait that links to the database storage / database key types.</li>
<li>The <code>salsa::Runtime</code> uses it to determine what data to include. The query types
use it to determine a database-key.</li>
</ul>
</li>
<li>impl of <code>plumbing::DatabaseOps</code> for <code>MyDatabase</code>
<ul>
<li>This contains a <code>for_each_query</code> method, which is implemented by invoking, in turn,
the inherent methods defined on each query group storage struct.</li>
</ul>
</li>
<li>impl of <code>plumbing::DatabaseKey</code> for the database key enum
<ul>
<li>This contains a method <code>maybe_changed_after</code>. We implement this by
matching to get a particular group key, and then invoking the
inherent method on the group key struct.</li>
</ul>
</li>
</ul>
<h2 id="alternatives"><a class="header" href="#alternatives">Alternatives</a></h2>
<p>This proposal results from a fair amount of iteration. Compared to the
status quo, there is one primary downside. We also explain a few things here that
may not be obvious.</p>
<h3 id="why-include-a-group-storage-struct"><a class="header" href="#why-include-a-group-storage-struct">Why include a group storage struct?</a></h3>
<p>You might wonder why we need the <code>MyGroupStorage</code> struct at all. It is a touch of boilerplate,
but there are several advantages to it:</p>
<ul>
<li>You can't attach associated types to the trait itself. This is because the &quot;type version&quot;
of the trait (<code>dyn MyGroup</code>) may not be available, since not all traits are dyn-capable.</li>
<li>We try to keep to the principle that &quot;any type that might be named
externally from the macro is given its name by the user&quot;. In this
case, the <code>[salsa::database]</code> attribute needed to name group storage
structs.
<ul>
<li>In earlier versions, we tried to auto-generate these names, but
this failed because sometimes users would want to <code>pub use</code> the
query traits and hide their original paths.</li>
<li>(One exception to this principle today are the per-query structs.)</li>
</ul>
</li>
<li>We expect that we can use the <code>MyGroupStorage</code> to achieve more
encapsulation in the future. While the struct must be public and
named from the database, the <em>trait</em> (and query key/value types)
actually does not have to be.</li>
</ul>
<h3 id="downside-size-of-a-database-key"><a class="header" href="#downside-size-of-a-database-key">Downside: Size of a database key</a></h3>
<p>Database keys now wind up with two discriminants: one to identify the
group, and one to identify the query. That's a bit sad. This could be
overcome by using unsafe code: the idea would be that a group/database
key would be stored as the pair of an integer and a <code>union</code>. Each
group within a given database would be assigned a range of integer
values, and the unions would store the actual key values. We leave
such a change for future work.</p>
<h2 id="future-possibilities"><a class="header" href="#future-possibilities">Future possibilities</a></h2>
<p>Here are some ideas we might want to do later.</p>
<h3 id="no-generics"><a class="header" href="#no-generics">No generics</a></h3>
<p>We leave generic parameters on the query group trait etc for future work.</p>
<h3 id="public--private"><a class="header" href="#public--private">Public / private</a></h3>
<p>We'd like the ability to make more details from the query groups
private. This will require some tinkering.</p>
<h3 id="inline-query-definitions"><a class="header" href="#inline-query-definitions">Inline query definitions</a></h3>
<p>Instead of defining queries in separate functions, it might be nice to
have the option of defining query methods in the trait itself:</p>
<pre><code class="language-rust ignore">#[salsa::query_group(MyGroupStorage)]
trait MyGroup {
  #[salsa::input]
  fn my_input(&amp;self, key: K1) -&gt; V1;

  fn my_query(&amp;self, key: K2) -&gt; V2 {
      // define my-query right here!
  }
}
</code></pre>
<p>It's a bit tricky to figure out how to handle this, so that is left
for future work. Also, it would mean that the method body itself is
inside of a macro (the procedural macro) which can make IDE
integration harder.</p>
<h3 id="non-query-functions"><a class="header" href="#non-query-functions">Non-query functions</a></h3>
<p>It might be nice to be able to include functions in the trait that are
<em>not</em> queries, but rather helpers that compose queries. This should be
pretty easy, just need a suitable <code>#[salsa]</code> attribute.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="summary-2"><a class="header" href="#summary-2">Summary</a></h1>
<ul>
<li>We introduce <code>#[salsa::interned]</code> queries which convert a <code>Key</code> type
into a numeric index of type <code>Value</code>, where <code>Value</code> is either the
type <code>InternId</code> (defined by a salsa) or some newtype thereof.</li>
<li>Each interned query <code>foo</code> also produces an inverse <code>lookup_foo</code>
method that converts back from the <code>Value</code> to the <code>Key</code> that was
interned.</li>
<li>The <code>InternId</code> type (defined by salsa) is basically a newtype'd integer,
but it internally uses <code>NonZeroU32</code> to enable space-saving optimizations
in memory layout.</li>
<li>The <code>Value</code> types can be any type that implements the
<code>salsa::InternIndex</code> trait, also introduced by this RFC. This trait
has two methods, <code>from_intern_id</code> and <code>as_intern_id</code>.</li>
<li>The interning is integrated into the GC and tracked like any other
query, which means that interned values can be garbage-collected,
and any computation that was dependent on them will be collected.</li>
</ul>
<h1 id="motivation-2"><a class="header" href="#motivation-2">Motivation</a></h1>
<h2 id="the-need-for-interning"><a class="header" href="#the-need-for-interning">The need for interning</a></h2>
<p>Many salsa applications wind up needing the ability to construct
&quot;interned keys&quot;. Frequently this pattern emerges because we wish to
construct identifiers for things in the input. These identifiers
generally have a &quot;tree-like shape&quot;. For example, in a compiler, there
may be some set of input files -- these are enumerated in the inputs
and serve as the &quot;base&quot; for a path that leads to items in the user's
input. But within an input file, there are additional structures, such
as <code>struct</code> or <code>impl</code> declarations, and these structures may contain
further structures within them (such as fields or methods). This gives
rise to a path like so that can be used to identify a given item:</p>
<pre><code class="language-notrust">PathData = &lt;file-name&gt;
         | PathData / &lt;identifier&gt;
</code></pre>
<p>These paths <em>could</em> be represented in the compiler with an <code>Arc</code>, but
because they are omnipresent, it is convenient to intern them instead
and use an integer. Integers are <code>Copy</code> types, which is convenient,
and they are also small (32 bits typically suffices in practice).</p>
<h2 id="why-interning-is-difficult-today-garbage-collection"><a class="header" href="#why-interning-is-difficult-today-garbage-collection">Why interning is difficult today: garbage collection</a></h2>
<p>Unfortunately, integrating interning into salsa at present presents
some hard choices, particularly with a long-lived application. You can
easily add an interning table into the database, but unless you do
something clever, <strong>it will simply grow and grow forever</strong>. But as the
user edits their programs, some paths that used to exist will no
longer be relevant -- for example, a given file or impl may be
removed, invalidating all those paths that were based on it. </p>
<p>Due to the nature of salsa's recomputation model, it is not easy to
detect when paths that used to exist in a prior revision are no longer
relevant in the next revision. <strong>This is because salsa never
explicitly computes &quot;diffs&quot; of this kind between revisions -- it just
finds subcomputations that might have gone differently and re-executes
them.</strong> Therefore, if the code that created the paths (e.g., that
processed the result of the parser) is part of a salsa query, it will
simply not re-create the invalidated paths -- there is no explicit
&quot;deletion&quot; point.</p>
<p>In fact, the same is true of all of salsa's memoized query values. We
may find that in a new revision, some memoized query values are no
longer relevant. For example, in revision R1, perhaps we computed
<code>foo(22)</code> and <code>foo(44)</code>, but in the new input, we now only need to
compute <code>foo(22)</code>. The <code>foo(44)</code> value is still memoized, we just
never asked for its value. <strong>This is why salsa includes a garbage
collector, which can be used to cleanup these memoized values that are
no longer relevant.</strong></p>
<p>But using a garbage collection strategy with a hand-rolled interning
scheme is not easy. You <em>could</em> trace through all the values in
salsa's memoization tables to implement a kind of mark-and-sweep
scheme, but that would require for salsa to add such a mechanism. It
might also be quite a lot of tracing! The current salsa GC mechanism has no
need to walk through the values themselves in a memoization table, it only
examines the keys and the metadata (unless we are freeing a value, of course).</p>
<h2 id="how-this-rfc-changes-the-situation"><a class="header" href="#how-this-rfc-changes-the-situation">How this RFC changes the situation</a></h2>
<p>This RFC presents an alternative. The idea is to move the interning
into salsa itself by creating special &quot;interning
queries&quot;. Dependencies on these queries are tracked like any other
query and hence they integrate naturally with salsa's garbage
collection mechanisms.</p>
<h1 id="users-guide-2"><a class="header" href="#users-guide-2">User's guide</a></h1>
<p>This section covers how interned queries are expected to be used.</p>
<h2 id="declaring-an-interned-query"><a class="header" href="#declaring-an-interned-query">Declaring an interned query</a></h2>
<p>You can declare an interned query like so:</p>
<pre><code class="language-rust ignore">#[salsa::query_group]
trait Foo {
  #[salsa::interned]
  fn intern_path_data(&amp;self, data: PathData) -&gt; salsa::InternId;
]
</code></pre>
<p><strong>Query keys.</strong> Like any query, these queries can take any number of keys. If multiple
keys are provided, then the interned key is a tuple of each key
value. In order to be interned, the keys must implement <code>Clone</code>,
<code>Hash</code> and <code>Eq</code>. </p>
<p><strong>Return type.</strong> The return type of an interned key may be of any type
that implements <code>salsa::InternIndex</code>: salsa provides an impl for the
type <code>salsa::InternId</code>, but you can implement it for your own.</p>
<p><strong>Inverse query.</strong> For each interning query, we automatically generate
a reverse query that will invert the interning step. It is named
<code>lookup_XXX</code>, where <code>XXX</code> is the name of the query. Hence here it
would be <code>fn lookup_intern_path(&amp;self, key: salsa::InternId) -&gt; Path</code>.</p>
<h2 id="the-expected-us"><a class="header" href="#the-expected-us">The expected us</a></h2>
<p>Using an interned query is quite straightforward. You simply invoke it
with a key, and you will get back an integer, and you can use the
generated <code>lookup</code> method to convert back to the original value:</p>
<pre><code class="language-rust ignore">let key = db.intern_path(path_data1);
let path_data2 = db.lookup_intern_path_data(key);
</code></pre>
<p>Note that the interned value will be cloned -- so, like all Salsa
values, it is best if that is a cheap operation. Interestingly,
interning can help to keep recursive, tree-shapes values cheap,
because the &quot;pointers&quot; within can be replaced with interned keys.</p>
<h2 id="custom-return-types"><a class="header" href="#custom-return-types">Custom return types</a></h2>
<p>The return type for an intern query does not have to be a <code>InternId</code>. It can
be any type that implements the <code>salsa::InternKey</code> trait:</p>
<pre><code class="language-rust ignore">pub trait InternKey {
    /// Create an instance of the intern-key from a `InternId` value.
    fn from_intern_id(v: InternId) -&gt; Self;

    /// Extract the `InternId` with which the intern-key was created.
    fn as_intern_id(&amp;self) -&gt; InternId;
}
</code></pre>
<h2 id="recommended-practice"><a class="header" href="#recommended-practice">Recommended practice</a></h2>
<p>This section shows the recommended practice for using interned keys,
building on the <code>Path</code> and <code>PathData</code> example that we've been working
with. </p>
<h3 id="naming-convention"><a class="header" href="#naming-convention">Naming Convention</a></h3>
<p>First, note the recommended naming convention: the <em>intern key</em> is
<code>Foo</code> and the key's associated data <code>FooData</code> (in our case, <code>Path</code> and
<code>PathData</code>). The intern key is given the shorter name because it is
used far more often. Moreover, other types should never store the full
data, but rather should store the interned key.</p>
<h3 id="defining-the-intern-key"><a class="header" href="#defining-the-intern-key">Defining the intern key</a></h3>
<p>The intern key should always be a newtype struct that implements
the <code>InternKey</code> trait. So, something like this:</p>
<pre><code class="language-rust ignore">pub struct Path(InternId);

impl salsa::InternKey for Path {
    fn from_intern_id(v: InternId) -&gt; Self {
        Path(v)
    }

    fn as_intern_id(&amp;self) -&gt; InternId {
        self.0
    }
}
</code></pre>
<h3 id="convenient-lookup-method"><a class="header" href="#convenient-lookup-method">Convenient lookup method</a></h3>
<p>It is often convenient to add a <code>lookup</code> method to the newtype key:</p>
<pre><code class="language-rust ignore">impl Path {
    // Adding this method is often convenient, since you can then
    // write `path.lookup(db)` to access the data, which reads a bit better.
    pub fn lookup(&amp;self, db: &amp;impl MyDatabase) -&gt; PathData {
        db.lookup_intern_path_data(*self)
    }
}
</code></pre>
<h3 id="defining-the-data-type"><a class="header" href="#defining-the-data-type">Defining the data type</a></h3>
<p>Recall that our paths were defined by a recursive grammar like so:</p>
<pre><code class="language-notrust">PathData = &lt;file-name&gt;
         | PathData / &lt;identifier&gt;
</code></pre>
<p>This recursion is quite typical of salsa applications. The recommended
way to encode it in the <code>PathData</code> structure itself is to build on other
intern keys, like so:</p>
<pre><code class="language-rust ignore">#[derive(Clone, Hash, Eq, ..)]
enum PathData {
  Root(String),
  Child(Path, String),
  //    ^^^^ Note that the recursive reference here
  //         is encoded as a Path.
}
</code></pre>
<p>Note though that the <code>PathData</code> type will be cloned whenever the value
for an interned key is looked up, and it may also be cloned to store
dependency information between queries. So, as an optimization, you
might prefer to avoid <code>String</code> in favor of <code>Arc&lt;String&gt;</code> -- or even
intern the strings as well.</p>
<h2 id="interaction-with-the-garbage-collector"><a class="header" href="#interaction-with-the-garbage-collector">Interaction with the garbage collector</a></h2>
<p>Interned keys can be garbage collected as normal, with one
caveat. Even if requested, Salsa will never collect the results
generated in the current revision. This is because it would permit the
same key to be interned twice in the same revision, possibly mapping
to distinct intern keys each time.</p>
<p>Note that if an interned key <em>is</em> collected, its index will be
re-used.  Salsa's dependency tracking system should ensure that
anything incorporating the older value is considered dirty, but you
may see the same index showing up more than once in the logs.</p>
<h1 id="reference-guide-2"><a class="header" href="#reference-guide-2">Reference guide</a></h1>
<p>Interned keys are implemented using a hash-map that maps from the
interned data to its index, as well as a vector containing (for each
index) various bits of data. In addition to the interned data, we must
track the revision in which the value was interned and the revision in
which it was last accessed, to help manage the interaction with the
GC. Finally, we have to track some sort of free list that tracks the
keys that are being re-used. The current implementation never actually
shrinks the vectors and maps from their maximum size, but this might
be a useful thing to be able to do (this is effectively a memory
allocator, so standard allocation strategies could be used here).</p>
<h2 id="internid"><a class="header" href="#internid">InternId</a></h2>
<p>Presently the <code>InternId</code> type is implemented to wrap a <code>NonZeroU32</code>:</p>
<pre><code class="language-rust ignore">pub struct InternId {
    value: NonZeroU32,
}
</code></pre>
<p>This means that <code>Option&lt;InternId&gt;</code> (or <code>Option&lt;Path&gt;</code>, continuing our
example from before) will only be a single word. To accommodate this,
the <code>InternId</code> constructors require that the value is less than
<code>InternId::MAX</code>; the value is deliberately set low (currently to
<code>0xFFFF_FF00</code>) to allow for more sentinel values in the future (Rust
doesn't presently expose the capability of having sentinel values
other than zero on stable, but it is possible on nightly).</p>
<h1 id="alternatives-and-future-work"><a class="header" href="#alternatives-and-future-work">Alternatives and future work</a></h1>
<p>None at present.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="summary-3"><a class="header" href="#summary-3">Summary</a></h1>
<p>Allow to specify a dependency on a query group without making it a super trait.</p>
<h1 id="motivation-3"><a class="header" href="#motivation-3">Motivation</a></h1>
<p>Currently, there's only one way to express that queries from group <code>A</code> can use
another group <code>B</code>: namely, <code>B</code> can be a super-trait of <code>A</code>:</p>
<pre><code class="language-rust ignore">#[salsa::query_group(AStorage)]
trait A: B {

}
</code></pre>
<p>This approach works and allows one to express complex dependencies. However,
this approach falls down when one wants to make a dependency a private
implementation detail: Clients with <code>db: &amp;impl A</code> can freely call <code>B</code> methods on
the <code>db</code>.</p>
<p>This is a bad situation from software engineering point of view: if everything
is accessible, it's hard to make distinction between public API and private
implementation details. In the context of salsa the situation is even worse,
because it breaks &quot;firewall&quot; pattern. It's customary to wrap low-level
frequently-changing or volatile queries into higher-level queries which produce
stable results and contain invalidation. In the current salsa, however, it's
very easy to accidentally call a low-level volatile query instead of a wrapper,
introducing and undesired dependency.</p>
<h1 id="users-guide-3"><a class="header" href="#users-guide-3">User's guide</a></h1>
<p>To specify query dependencies, a <code>requires</code> attribute should be used:</p>
<pre><code class="language-rust ignore">#[salsa::query_group(SymbolsDatabaseStorage)]
#[salsa::requires(SyntaxDatabase)]
#[salsa::requires(EnvDatabase)]
pub trait SymbolsDatabase {
    fn get_symbol_by_name(&amp;self, name: String) -&gt; Symbol;
}
</code></pre>
<p>The argument of <code>requires</code> is a path to a trait. The traits from all <code>requires</code>
attributes are available when implementing the query:</p>
<pre><code class="language-rust ignore">fn get_symbol_by_name(
    db: &amp;(impl SymbolsDatabase + SyntaxDatabase + EnvDatabase),
    name: String,
) -&gt; Symbol {
    // ...
}
</code></pre>
<p>However, these traits are <strong>not</strong> available without explicit bounds:</p>
<pre><code class="language-rust ignore">fn fuzzy_find_symbol(db: &amp;impl SymbolsDatabase, name: String) {
    // Can't accidentally call methods of the `SyntaxDatabase`
}
</code></pre>
<p>Note that, while the RFC does not propose to add per-query dependencies, query
implementation can voluntarily specify only a subset of traits from <code>requires</code>
attribute:</p>
<pre><code class="language-rust ignore">fn get_symbol_by_name(
    // Purposefully don't depend on EnvDatabase
    db: &amp;(impl SymbolsDatabase + SyntaxDatabase),
    name: String,
) -&gt; Symbol {
    // ...
}
</code></pre>
<h1 id="reference-guide-3"><a class="header" href="#reference-guide-3">Reference guide</a></h1>
<p>The implementation is straightforward and consists of adding traits from
<code>requires</code> attributes to various <code>where</code> bounds. For example, we would generate
the following blanket for above example:</p>
<pre><code class="language-rust ignore">impl&lt;T&gt; SymbolsDatabase for T
where
    T: SyntaxDatabase + EnvDatabase,
    T: salsa::plumbing::HasQueryGroup&lt;SymbolsDatabaseStorage&gt;
{
    ...
}
</code></pre>
<h1 id="alternatives-and-future-work-1"><a class="header" href="#alternatives-and-future-work-1">Alternatives and future work</a></h1>
<p>The semantics of <code>requires</code> closely resembles <code>where</code>, so we could imagine a
syntax based on magical where clauses:</p>
<pre><code class="language-rust ignore">#[salsa::query_group(SymbolsDatabaseStorage)]
pub trait SymbolsDatabase
    where ???: SyntaxDatabase + EnvDatabase
{
    fn get_symbol_by_name(&amp;self, name: String) -&gt; Symbol;
}
</code></pre>
<p>However, it's not obvious what should stand for <code>???</code>. <code>Self</code> won't be ideal,
because supertraits are a sugar for bounds on <code>Self</code>, and we deliberately want
different semantics. Perhaps picking a magical identifier like <code>DB</code> would work
though?</p>
<p>One potential future development here is per-query-function bounds, but they can
already be simulated by voluntarily requiring less bounds in the implementation
function.</p>
<p>Another direction for future work is privacy: because traits from <code>requires</code>
clause are not a part of public interface, in theory it should be possible to
restrict their visibility. In practice, this still hits public-in-private lint,
at least with a trivial implementation.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="summary-4"><a class="header" href="#summary-4">Summary</a></h1>
<p>Add Least Recently Used values eviction as a supplement to garbage collection.</p>
<h1 id="motivation-4"><a class="header" href="#motivation-4">Motivation</a></h1>
<p>Currently, the single mechanism for controlling memory usage in salsa is garbage
collection. Experience with rust-analyzer shown that it is insufficient for two
reasons:</p>
<ul>
<li>
<p>It's hard to determine which values should be collected. Current
implementation in rust-analyzer just periodically clears all values of
specific queries.</p>
</li>
<li>
<p>GC is in generally run in-between revision. However, especially after just
opening the project, the number of values <em>within a single revision</em> can be
high. In other words, GC doesn't really help with keeping peak memory usage
under control. While it is possible to run GC concurrently with calculations
(and this is in fact what rust-analyzer is doing right now to try to keep high
water mark of memory lower), this is highly unreliable an inefficient.</p>
</li>
</ul>
<p>The mechanism of LRU targets both of these weaknesses:</p>
<ul>
<li>
<p>LRU tracks which values are accessed, and uses this information to determine
which values are actually unused.</p>
</li>
<li>
<p>LRU has a fixed cap on the maximal number of entries, thus bounding the memory
usage.</p>
</li>
</ul>
<h1 id="users-guide-4"><a class="header" href="#users-guide-4">User's guide</a></h1>
<p>It is possible to call <code>set_lru_capacity(n)</code> method on any non-input query. The
effect of this is that the table for the query stores at most <code>n</code> <em>values</em> in
the database. If a new value is computed, and there are already <code>n</code> existing
ones in the database, the least recently used one is evicted. Note that
information about query dependencies is <strong>not</strong> evicted. It is possible to
change lru capacity at runtime at any time. <code>n == 0</code> is a special case, which
completely disables LRU logic. LRU is not enabled by default.</p>
<h1 id="reference-guide-4"><a class="header" href="#reference-guide-4">Reference guide</a></h1>
<p>Implementation wise, we store a linked hash map of keys, in the recently-used
order. Because reads of the queries are considered uses, we now need to
write-lock the query map even if the query is fresh. However we don't do this
bookkeeping if LRU is disabled, so you don't have to pay for it unless you use
it.</p>
<p>A slight complication arises with volatile queries (and, in general, with any
query with an untracked input). Similarly to GC, evicting such a query could
lead to an inconsistent database. For this reason, volatile queries are never
evicted.</p>
<h1 id="alternatives-and-future-work-2"><a class="header" href="#alternatives-and-future-work-2">Alternatives and future work</a></h1>
<p>LRU is a compromise, as it is prone to both accidentally evicting useful queries
and needlessly holding onto useless ones. In particular, in the steady state and
without additional GC, memory usage will be proportional to the lru capacity: it
is not only an upper bound, but a lower bound as well!</p>
<p>In theory, some deterministic way of evicting values when you for sure don't
need them anymore maybe more efficient. However, it is unclear how exactly that
would work! Experiments in rust-analyzer show that it's not easy to tame a
dynamic crate graph, and that simplistic phase-based strategies fall down.</p>
<p>It's also worth noting that, unlike GC, LRU can in theory be <em>more</em> memory
efficient than deterministic memory management. Unlike a traditional GC, we can
safely evict &quot;live&quot; objects and recalculate them later. That makes possible to
use LRU for problems whose working set of &quot;live&quot; queries is larger than the
available memory, at the cost of guaranteed recomputations.</p>
<p>Currently, eviction is strictly LRU base. It should be possible to be smarter
and to take size of values and time that is required to recompute them into
account when making decisions about eviction.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="summary-5"><a class="header" href="#summary-5">Summary</a></h1>
<ul>
<li>Introduce a user-visibile concept of <code>Durability</code></li>
<li>Adjusting the &quot;durability&quot; of an input can allow salsa to skip a lot of validation work</li>
<li>Garbage collection -- particularly of interned values -- however becomes more complex</li>
<li>Possible future expansion: automatic detection of more &quot;durable&quot; input values</li>
</ul>
<h1 id="motivation-5"><a class="header" href="#motivation-5">Motivation</a></h1>
<h2 id="making-validation-faster-by-optimizing-for-durability"><a class="header" href="#making-validation-faster-by-optimizing-for-durability">Making validation faster by optimizing for &quot;durability&quot;</a></h2>
<p>Presently, salsa's validation logic requires traversing all
dependencies to check that they have not changed. This can sometimes
be quite costly in practice: rust-analyzer for example sometimes
spends as much as 90ms revalidating the results from a no-op
change. One option to improve this is simply optimization --
<a href="https://github.com/salsa-rs/salsa/pull/176">salsa#176</a> for example reduces validation times significantly, and
there remains opportunity to do better still. However, even if we are
able to traverse the dependency graph more efficiently, it will still
be an O(n) process. It would be nice if we could do better.</p>
<p>One observation is that, in practice, there are often input values
that are known to change quite infrequently. For example, in
rust-analyzer, the standard library and crates downloaded from
crates.io are unlikely to change (though changes are possible; see
below). Similarly, the <code>Cargo.toml</code> file for a project changes
relatively infrequently compared to the sources. We say then that
these inputs are more <strong>durable</strong> -- that is, they change less frequently.</p>
<p>This RFC proposes a mechanism to take advantage of durability for
optimization purposes. Imagine that we have some query Q that depends
solely on the standard library. The idea is that we can track the last
revision R when the standard library was changed. Then, when
traversing dependencies, we can skip traversing the dependencies of Q
if it was last validated after the revision R. Put another way, we
only need to traverse the dependencies of Q when the standard library
changes -- which is unusual. If the standard library <em>does</em> change,
for example by user's tinkering with the internal sources, then yes we
walk the dependencies of Q to see if it is affected.</p>
<h1 id="users-guide-5"><a class="header" href="#users-guide-5">User's guide</a></h1>
<h2 id="the-durability-type"><a class="header" href="#the-durability-type">The durability type</a></h2>
<p>We add a new type <code>salsa::Durability</code> which has there associated constants:</p>
<pre><code class="language-rust ignore">#[derive(Copy, Clone, Debug, Ord)]
pub struct Durability(..);

impl Durability {
  // Values that change regularly, like the source to the current crate.
  pub const LOW: Durability;

  // Values that change infrequently, like Cargo.toml.
  pub const MEDIUM: Durability;

  // Values that are not expected to change, like sources from crates.io or the stdlib.
  pub const HIGH: Durability;
}
</code></pre>
<p>h## Specifying the durability of an input</p>
<p>When setting an input <code>foo</code>, one can now invoke a method
<code>set_foo_with_durability</code>, which takes a <code>Durability</code> as the final
argument:</p>
<pre><code class="language-rust ignore">// db.set_foo(key, value) is equivalent to:
db.set_foo_with_durability(key, value, Durability::LOW);

// This would indicate that `foo` is not expected to change:
db.set_foo_with_durability(key, value, Durability::HIGH);
</code></pre>
<h2 id="durability-of-interned-values"><a class="header" href="#durability-of-interned-values">Durability of interned values</a></h2>
<p>Interned values are always considered <code>Durability::HIGH</code>. This makes
sense as many queries that only use high durability inputs will also
make use of interning internally. A consequence of this is that they
will not be garbage collected unless you use the specific patterns
recommended below.</p>
<h2 id="synthetic-writes"><a class="header" href="#synthetic-writes">Synthetic writes</a></h2>
<p>Finally, we add one new method, <code>synthetic_write(durability)</code>,
available on the salsa runtime:</p>
<pre><code class="language-rust ignore">db.salsa_runtime().synthetic_write(Durability::HIGH)
</code></pre>
<p>As the name suggests, <code>synthetic_write</code> causes salsa to act <em>as
though</em> a write to an input of the given durability had taken
place. This can be used for benchmarking, but it's also important to
controlling what values get garbaged collected, as described below.</p>
<h2 id="tracing-and-garbage-collection"><a class="header" href="#tracing-and-garbage-collection">Tracing and garbage collection</a></h2>
<p>Durability affects garbage collection. The <code>SweepStrategy</code> struct is
modified as follows:</p>
<pre><code class="language-rust ignore">/// Sweeps values which may be outdated, but which have not
/// been verified since the start of the current collection.
/// These are typically memoized values from previous computations
/// that are no longer relevant.
pub fn sweep_outdated(self) -&gt; SweepStrategy;

/// Sweeps values which have not been verified since the start
/// of the current collection, even if they are known to be
/// up to date. This can be used to collect &quot;high durability&quot; values
/// that are not *directly* used by the main query.
///
/// So, for example, imagine a main query `result` which relies
/// on another query `threshold` and (indirectly) on a `threshold_inner`:
///
/// ```
/// result(10) [durability: Low]
///    |
///    v
/// threshold(10) [durability: High]
///    |
///    v
/// threshold_inner(10)  [durability: High]
/// ```
///
/// If you modify a low durability input and then access `result`,
/// then `result(10)` and its *immediate* dependencies will
/// be considered &quot;verified&quot;. However, because `threshold(10)`
/// has high durability and no high durability input was modified,
/// we will not verify *its* dependencies, so `threshold_inner` is not
/// verified (but it is also not outdated).
///
/// Collecting unverified things would therefore collect `threshold_inner(10)`.
/// Collecting only *outdated* things (i.e., with `sweep_outdated`)
/// would collect nothing -- but this does mean that some high durability
/// queries that are no longer relevant to your main query may stick around.
///
/// To get the most precise garbage collection, do a synthetic write with
/// high durability -- this will force us to verify *all* values. You can then
/// sweep unverified values.
pub fn sweep_unverified(self) -&gt; SweepStrategy;
</code></pre>
<h1 id="reference-guide-5"><a class="header" href="#reference-guide-5">Reference guide</a></h1>
<h2 id="review-the-need-for-gc-to-collect-outdated-values"><a class="header" href="#review-the-need-for-gc-to-collect-outdated-values">Review: The need for GC to collect outdated values</a></h2>
<p>In general, salsa's lazy validation scheme can lead to the accumulation
of garbage that is no longer needed. Consider a query like this one:</p>
<pre><code class="language-rust ignore">fn derived1(db: &amp;impl Database, start: usize) {
  let middle = self.input(start);
  self.derived2(middle)
}
</code></pre>
<p>Now imagine that, on some particular run, we compute <code>derived1(22)</code>:</p>
<ul>
<li><code>derived1(22)</code>
<ul>
<li>executes <code>input(22)</code>, which returns <code>44</code></li>
<li>then executes <code>derived2(44)</code></li>
</ul>
</li>
</ul>
<p>The end result of this execution will be a dependency graph
like:</p>
<pre><code class="language-notrust">derived1(22) -&gt; derived2(44)
  |
  v
input(22)
</code></pre>
<p>Now. imagine that the user modifies <code>input(22)</code> to have the value <code>45</code>.
The next time <code>derived1(22)</code> executes, it will load <code>input(22)</code> as before,
but then execute <code>derived2(45)</code>. This leaves us with a dependency
graph as follows:</p>
<pre><code class="language-notrust">derived1(22) -&gt; derived2(45)
  |
  v
input(22)       derived2(44)
</code></pre>
<p>Notice that we still see <code>derived2(44)</code> in the graph. This is because
we memoized the result in last round and then simply had no use for it
in this round. The role of GC is to collect &quot;outdated&quot; values like
this one.</p>
<p>###Review: Tracing and GC before durability</p>
<p>In the absence of durability, when you execute a query Q in some new
revision where Q has not previously executed, salsa must trace back
through all the queries that Q depends on to ensure that they are
still up to date. As each of Q's dependencies is validated, we mark it
to indicate that it has been checked in the current revision (and
thus, within a particular revision, we would never validate or trace a
particular query twice).</p>
<p>So, to continue our example, when we first executed <code>derived1(22)</code>
in revision R1, we might have had a graph like:</p>
<pre><code class="language-notrust">derived1(22)   -&gt; derived2(44)
[verified: R1]    [verified: R1]
  |
  v
input(22)
</code></pre>
<p>Now, after we modify <code>input(22)</code> and execute <code>derived1(22)</code> again, we
would have a graph like:</p>
<pre><code class="language-notrust">derived1(22)   -&gt; derived2(45)
[verified: R2]    [verified: R2]
  |
  v
input(22)         derived2(44)
                  [verified: R1]
</code></pre>
<p>Note that <code>derived2(44)</code>, the outdated value, never had its &quot;verified&quot;
revision updated, because we never accessed it.</p>
<p>Salsa leverages this validation stamp to serve as the &quot;marking&quot; phase
of a simple mark-sweep garbage collector. The idea is that the sweep
method can collect any values that are &quot;outdated&quot; (whose &quot;verified&quot;
revision is less than the current revision).</p>
<p>The intended model is that one can do a &quot;mark-sweep&quot; style garbage
collection like so:</p>
<pre><code class="language-rust ignore">// Modify some input, triggering a new revision.
db.set_input(22, 45);

// The **mark** phase: execute the &quot;main query&quot;, with the intention
// that we wish to retain all the memoized values needed to compute
// this main query, but discard anything else. For example, in an IDE
// context, this might be a &quot;compute all errors&quot; query.
db.derived1(22);

// The **sweep** phase: discard anything that was not traced during
// the mark phase.
db.sweep_all(...);
</code></pre>
<p>In the case of our example, when we execute <code>sweep_all</code>, it would
collect <code>derived2(44)</code>.</p>
<h2 id="challenge-durability-lets-us-avoid-tracing"><a class="header" href="#challenge-durability-lets-us-avoid-tracing">Challenge: Durability lets us avoid tracing</a></h2>
<p>This tracing model is affected by the move to durability. Now, if some
derived value has a high durability, we may skip tracing its
descendants altogether. This means that they would never be &quot;verified&quot;
-- that is, their &quot;verified date&quot; would never be updated.</p>
<p>This is why we modify the definition of &quot;outdated&quot; as follows:</p>
<ul>
<li>For a query value <code>Q</code> with durability <code>D</code>, let <code>R_lc</code> be the revision when
values of durability <code>D</code> last changed. Let <code>R_v</code> be the revision when
<code>Q</code> was last verified.</li>
<li><code>Q</code> is outdated if <code>R_v &lt; R_lc</code>.
<ul>
<li>In other words, if <code>Q</code> may have changed since it was last verified.</li>
</ul>
</li>
</ul>
<h2 id="collecting-interned-and-untracked-values"><a class="header" href="#collecting-interned-and-untracked-values">Collecting interned and untracked values</a></h2>
<p>Most values can be collected whenever we like without influencing
correctness.  However, interned values and those with untracked
dependencies are an exception -- <strong>they can only be collected when
outdated</strong>.  This is because their values may not be reproducible --
in other words, re-executing an interning query (or one with untracked
dependencies, which can read arbitrary program state) twice in a row
may produce a different value. In the case of an interning query, for
example, we may wind up using a different integer than we did before.
If the query is outdated, this is not a problem: anything that
dependend on its result must also be outdated, and hence would be
re-executed and would observe the new value. But if the query is <em>not</em>
outdated, then we could get inconsistent result.s</p>
<h1 id="alternatives-and-future-work-3"><a class="header" href="#alternatives-and-future-work-3">Alternatives and future work</a></h1>
<h2 id="rejected-arbitrary-durabilities"><a class="header" href="#rejected-arbitrary-durabilities">Rejected: Arbitrary durabilities</a></h2>
<p>We considered permitting arbitrary &quot;levels&quot; of durability -- for
example, allowing the user to specify a number -- rather than offering
just three. Ultimately it seemed like that level of control wasn't
really necessary and that having just three levels would be sufficient
and simpler.</p>
<h2 id="rejected-durability-lattices"><a class="header" href="#rejected-durability-lattices">Rejected: Durability lattices</a></h2>
<p>We also considered permitting a &quot;lattice&quot; of durabilities -- e.g., to
mirror the crate DAG in rust-analyzer -- but this is tricky because
the lattice itself would be dependent on other inputs.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="dynamic-databases"><a class="header" href="#dynamic-databases">Dynamic databases</a></h1>
<h2 id="metadata-2"><a class="header" href="#metadata-2">Metadata</a></h2>
<ul>
<li>Author: nikomatsakis</li>
<li>Date: 2020-06-29</li>
<li>Introduced in: <a href="https://github.com/salsa-rs/salsa/pull/1">salsa-rs/salsa#1</a> (please update once you open your PR)</li>
</ul>
<h2 id="summary-6"><a class="header" href="#summary-6">Summary</a></h2>
<ul>
<li>Retool Salsa's setup so that the generated code for a query group is not
dependent on the final database type, and interacts with the database only
through <code>dyn</code> trait values.</li>
<li>This imposes a certain amount of indirecton but has the benefit that when a
query group definition changes, less code must be recompiled as a result.</li>
<li>Key changes include:
<ul>
<li>Database keys are &quot;interned&quot; in the database to produce a
<code>DatabaseKeyIndex</code>.</li>
<li>The values for cached query are stored directly in the hashtable instead of
in an <code>Arc</code>. There is still an Arc per cached query, but it stores the
dependency information.</li>
<li>The various traits are changed to make <code>salsa::Database</code> dyn-safe. Invoking
methods on the runtime must now go through a <code>salsa::Runtime</code> trait.</li>
<li>The <code>salsa::requires</code> functionality is removed.</li>
</ul>
</li>
<li>Upsides of the proposal:
<ul>
<li>Potentially improved recompilation time. Minimal code is regenerated.</li>
<li>Removing the <code>DatabaseData</code> unsafe code hack that was required by slots.</li>
</ul>
</li>
<li>Downsides of the proposal:
<ul>
<li>The effect on runtime performance must be measured.</li>
<li><code>DatabaseKeyIndex</code> values will leak, as we propose no means to reclaim them.
However, the same is true of <code>Slot</code> values today.</li>
<li>Storing values for the tables directly in the hashtable makes it less
obvious how we would return references to them in a safe fashion (before, I
had planned to have a separate module that held onto the Arc for the slot,
so we were sure the value would not be deallocated; one can still imagine
supporting this feature, but it would require some fancier unsafe code
reasoning, although it would be more efficient.)</li>
<li>The <code>salsa::requires</code> functionality is removed.</li>
</ul>
</li>
</ul>
<h2 id="motivation-6"><a class="header" href="#motivation-6">Motivation</a></h2>
<p>Under the current salsa setup, all of the &quot;glue code&quot; that manages cache
invalidation and other logic is ultimately parameterized by a type <code>DB</code> that
refers to the full database. The problem is that, if you consider a typical
salsa crate graph, the actual value for that type is not available until the
final database crate is compiled:</p>
<pre class="mermaid">graph TD;
  Database[&quot;Crate that defines the database&quot;];
  QueryGroupA[&quot;Crate with query group A&quot;];
  QueryGroupB[&quot;Crate with query group B&quot;];
  SalsaCrate[&quot;the `salsa` crate&quot;];
  Database -- depends on --&gt; QueryGroupA;
  Database -- depends on --&gt; QueryGroupB;
  QueryGroupA -- depends on --&gt; SalsaCrate;
  QueryGroupB -- depends on --&gt; SalsaCrate;
</pre>
<p>The result is that we do not actually compile a good part of the code from
<code>QueryGroupA</code> or <code>QueryGroupB</code> until we build the final database crate.</p>
<h3 id="what-you-can-do-today-dyn-traits"><a class="header" href="#what-you-can-do-today-dyn-traits">What you can do today: dyn traits</a></h3>
<p>What you can do today is to use define a &quot;dyn-compatible&quot; query group
trait and then write your derived functions using a <code>dyn</code> type as the
argument:</p>
<pre><code class="language-rust ignore">#[salsa::query_group(QueryGroupAStorage)]
trait QueryGroupA {
    fn derived(&amp;self, key: usize) -&gt; usize;
}

fn derived(db: &amp;dyn QueryGroupA, key: usize) -&gt; usize {
    key * 2
}
</code></pre>
<p>This has the benefit that the <code>derived</code> function is not generic. However, it's
still true that the glue code salsa makes will be generic over a <code>DB</code> type --
this includes the impl of <code>QueryGroupA</code> but also the <code>Slot</code> and other machinery.
This means that even if the only change is to query group B, in a different
crate, the glue code for query group A ultimately has to be recompiled whenever
the <code>Database</code> crate is rebuilt (though incremental compilation may help here).
Moreover, as reported in <a href="https://github.com/salsa-rs/salsa/issues/220">salsa-rs/salsa#220</a>, measurements of rust-analyzer
suggest that this code may be duplicated and accounting for more of the binary
than we would expect.</p>
<p>FIXME: I'd like to have better measurements on the above!</p>
<h3 id="our-goal"><a class="header" href="#our-goal">Our goal</a></h3>
<p>The primary goal of this RFC is to make it so that the glue code we generate for
query groups is not dependent on the database type, thus enabling better
incremental rebuilds.</p>
<h2 id="users-guide-6"><a class="header" href="#users-guide-6">User's guide</a></h2>
<p>Most of the changes in this RFC are &quot;under the hood&quot;. But there are various user
visibile changes proposed here.</p>
<h3 id="all-query-groups-must-be-dyn-safe"><a class="header" href="#all-query-groups-must-be-dyn-safe">All query groups must be dyn safe</a></h3>
<p>The largest one is that <strong>all Salsa query groups must now be dyn-safe</strong>. The
existing salsa query methods are all dyn-safe, so what this really implies is
that one cannot have super-traits that use generic methods or other things that
are not dyn safe. For example, this query group would be illegal:</p>
<pre><code class="language-rust ignore">#[salsa::query_group(QueryGroupAStorage)]
trait QueryGroupA: Foo {
}

trait Foo {
    fn method&lt;T&gt;(t: T) { }
}
</code></pre>
<p>We could support query groups that are not dyn safe, but it would require us to
have two &quot;similar but different&quot; ways of generating plumbing, and I'm not
convinced that it's worth it. Moreover, it would require some form of opt-in so
that would be a measure of user complexity as well.</p>
<h3 id="all-query-functions-must-take-a-dyn-database"><a class="header" href="#all-query-functions-must-take-a-dyn-database">All query functions must take a dyn database</a></h3>
<p>You used to be able to implement queries by using <code>impl MyDatabase</code>, like so:</p>
<pre><code class="language-rust ignore">fn my_query(db: &amp;impl MyDatabase, ...) { .. }
</code></pre>
<p>but you must now use <code>dyn MyDatabase</code>:</p>
<pre><code class="language-rust ignore">fn my_query(db: &amp;dyn MyDatabase, ...) { .. }
</code></pre>
<h3 id="databases-embed-a-storagedb-with-a-fixed-field-name"><a class="header" href="#databases-embed-a-storagedb-with-a-fixed-field-name">Databases embed a <code>Storage&lt;DB&gt;</code> with a fixed field name</a></h3>
<p>The &quot;Hello World&quot; database becomes the following:</p>
<pre><code class="language-rust ignore">#[salsa::database(QueryGroup1, ..., QueryGroupN)]
struct MyDatabase {
    storage: salsa::Storage&lt;Self&gt;
}

impl salsa::Database for MyDatabase {}
</code></pre>
<p>In particular:</p>
<ul>
<li>You now embed a <code>salsa::Storage&lt;Self&gt;</code> instead of a <code>salsa::Runtime&lt;Self&gt;</code></li>
<li>The field <strong>must</strong> be named <code>storage</code> by default; we can include a <code>#[salsa::storge_field(xxx)]</code> annotation to change that default if desired.
<ul>
<li>Or we could scrape the struct declaration and infer it, I suppose.</li>
</ul>
</li>
<li>You no longer have to define the <code>salsa_runtime</code> and <code>salsa_runtime_mut</code> methods, they move to the <code>DatabaseOps</code> trait and are manually implemented by doing <code>self.storage.runtime()</code> and so forth.</li>
</ul>
<p>Why these changes, and what is this <code>Storage</code> struct? This is because the actual
storage for queries is moving outside of the runtime. The Storage struct just
combines the <code>Runtime</code> (whose type no longer references <code>DB</code> directly) with an
<code>Arc&lt;DB::Storage&gt;</code>. The full type of <code>Storage</code>, since it includes the database
type, cannot appear in any public interface, it is just used by the various
implementations that are created by <code>salsa::database</code>.</p>
<h3 id="instead-of-dbqueryq-you-write-qin_dbdb"><a class="header" href="#instead-of-dbqueryq-you-write-qin_dbdb">Instead of <code>db.query(Q)</code>, you write <code>Q.in_db(&amp;db)</code></a></h3>
<p>As a consequence of the previous point, the existing <code>query</code> and <code>query_mut</code>
methods on the <code>salsa::Database</code> trait are changed to methods on the query types
themselves. So instead of <code>db.query(SomeQuery)</code>, one would write
<code>SomeQuery.in_db(&amp;db)</code> (or <code>in_db_mut</code>). This both helps by making the
<code>salsa::Database</code> trait dyn-safe and also works better with the new use of <code>dyn</code>
types, since it permits a coercion from <code>&amp;db</code> to the appropriate <code>dyn</code> database
type at the point of call.</p>
<h3 id="the-salsa-event-mechanism-will-move-to-dynamic-dispatch"><a class="header" href="#the-salsa-event-mechanism-will-move-to-dynamic-dispatch">The salsa-event mechanism will move to dynamic dispatch</a></h3>
<p>A further consequence is that the existing <code>salsa_event</code> method will be
simplified and made suitable for dynamic dispatch. It used to take a closure
that would produce the event if necessary; it now simply takes the event itself.
This is partly because events themselves no longer contain complex information:
they used to have database-keys, which could require expensive cloning, but they
now have simple indices.</p>
<pre><code class="language-rust ignore">fn salsa_event(&amp;self, event: Event) {
    #![allow(unused_variables)]
}
</code></pre>
<p>This may imply some runtime cost, since various parts of the machinery invoke
<code>salsa_event</code>, and those calls will now be virtual calls. They would previously
have been static calls that would likely have been optimized away entirely.</p>
<p>It is however possible that ThinLTO or other such optimization could remove
those calls, this has not been tested, and in any case the runtime effects are
not expected to be high, since all the calls will always go to the same
function.</p>
<h3 id="the-salsarequires-function-is-removed"><a class="header" href="#the-salsarequires-function-is-removed">The <code>salsa::requires</code> function is removed</a></h3>
<p>We currently offer a feature for &quot;private&quot; dependencies between query groups
called <code>#[salsa::requires(ExtraDatabase)]</code>. This then requires query
functions to be written like:</p>
<pre><code class="language-rust ignore">fn query_fn(db: &amp;impl Database + ExtraDatabase, ...) { }
</code></pre>
<p>This format is not compatible with <code>dyn</code>, so this feature is removed.</p>
<h2 id="reference-guide-6"><a class="header" href="#reference-guide-6">Reference guide</a></h2>
<h3 id="example"><a class="header" href="#example">Example</a></h3>
<p>To explain the proposal, we'll use the Hello World example, lightly adapted:</p>
<pre><code class="language-rust ignore">#[salsa::query_group(HelloWorldStorage)]
trait HelloWorld: salsa::Database {
    #[salsa::input]
    fn input_string(&amp;self, key: ()) -&gt; Arc&lt;String&gt;;

    fn length(&amp;self, key: ()) -&gt; usize;
}

fn length(db: &amp;dyn HelloWorld, (): ()) -&gt; usize {
    // Read the input string:
    let input_string = db.input_string(());

    // Return its length:
    input_string.len()
}

#[salsa::database(HelloWorldStorage)]
struct DatabaseStruct {
    runtime: salsa::Runtime&lt;DatabaseStruct&gt;,
}

impl salsa::Database for DatabaseStruct {
    fn salsa_runtime(&amp;self) -&gt; &amp;salsa::Runtime&lt;Self&gt; {
        &amp;self.runtime
    }

    fn salsa_runtime_mut(&amp;mut self) -&gt; &amp;mut salsa::Runtime&lt;Self&gt; {
        &amp;mut self.runtime
    }
}
</code></pre>
<h3 id="identifying-queries-using-the-databasekeyindex"><a class="header" href="#identifying-queries-using-the-databasekeyindex">Identifying queries using the <code>DatabaseKeyIndex</code></a></h3>
<p>We introduce the following struct that represents a database key using a series
of indices:</p>
<pre><code class="language-rust ignore">struct DatabaseKeyIndex {
    /// Identifies the query group.
    group_index: u16,

    /// Identifies the query within the group.
    query_index: u16,

    /// Identifies the key within the query.
    key_index: u32,
}
</code></pre>
<p>This struct allows the various query group structs to refer to database keys
without having to use a type like <code>DB::DatabaseKey</code> that is dependent on the
<code>DB</code>.</p>
<p>The group/query indices will be assigned by the <code>salsa::database</code> and
<code>salsa::query_group</code> macros respectively. When query group storage is created,
it will be passed in its group index by the database. Each query will be able to
access its query-index through the <code>Query</code> trait, as they are statically known
at the time that the query is compiled (the group index, in contrast, depends on
the full set of groups for the database).</p>
<p>The key index can be assigned by the query as it executes without any central
coordination. Each query will use a <code>IndexMap</code> (from the <code>indexmap</code> crate)
mapping <code>Q::Key -&gt; QueryState</code>. Inserting new keys into this map also creates
new indices, and it is possible to index into the map in O(1) time later to
obtain the state (or key) from a given query. This map replaces the existing
<code>Q::Key -&gt; Arc&lt;Slot&lt;..&gt;&gt;</code> map that is used today.</p>
<p>One notable implication: we cannot remove entries from the query index map
(e.g., for GC) because that would invalidate the existing indices. We can
however replace the query-state with a &quot;not computed&quot; value. This is not new:
slots already take this approach today. In principle, we could extend the
tracing GC to permit compressing and perhaps even rewriting indices, but it's
not clear that this is a problem in practice.</p>
<p>The <code>DatabaseKeyIndex</code> also supports a <code>debug</code> method that returns a value with
a human readable <code>debug!</code> output, so that you can do <code>debug!(&quot;{:?}&quot;, index.debug(db))</code>. This works by generating a <code>fmt_debug</code> method that is
supported by the various query groups.</p>
<h3 id="the-various-query-traits-are-not-generic-over-a-database"><a class="header" href="#the-various-query-traits-are-not-generic-over-a-database">The various query traits are not generic over a database</a></h3>
<p>Today, the <code>Query</code>, <code>QueryFunction</code>, and <code>QueryGroup</code> traits are generic over
the database <code>DB</code>, which allows them to name the final database type and
associated types derived from it. In the new scheme, we never want to do that,
and so instead they will now have an associated type, <code>DynDb</code>, that maps to the
<code>dyn</code> version of the query group trait that the query is associated with.</p>
<p>Therefore <code>QueryFunction</code> for example can become:</p>
<pre><code class="language-rust ignore">pub trait QueryFunction: Query {
    fn execute(db: &amp;&lt;Self as QueryDb&lt;'_&gt;&gt;::DynDb, key: Self::Key) -&gt; Self::Value;
    fn recover(db: &amp;&lt;Self as QueryDb&lt;'_&gt;&gt;::DynDb, cycle: &amp;[DB::DatabaseKey], key: &amp;Self::Key) -&gt; Option&lt;Self::Value&gt; {
        let _ = (db, cycle, key);
        None
    }
}
</code></pre>
<h3 id="storing-query-results-and-tracking-dependencies"><a class="header" href="#storing-query-results-and-tracking-dependencies">Storing query results and tracking dependencies</a></h3>
<p>In today's setup, we have all the data for a particular query stored in a
<code>Slot&lt;Q, DB, MP&gt;</code>, and these slots hold references to one another to track
dependencies. Because the type of each slot is specific to the particular query
<code>Q</code>, the references between slots are done using a <code>Arc&lt;dyn DatabaseSlot&lt;DB&gt;&gt;</code>
handle. This requires some unsafe hacks, including the <code>DatabaseData</code> associated
type.</p>
<p>This RFC proposes to alter this setup. Dependencies will store a <code>DatabaseIndex</code>
instead. This means that validating dependencies is less efficient, as we no
longer have a direct pointer to the dependency information but instead must
execute three index lookups (one to find the query group, one to locate the
query, and then one to locate the key). Similarly the LRU list can be reverted
to a <code>LinkedHashMap</code> of indices.</p>
<p>We may tinker with other approaches too: the key change in the RFC is that we
do not need to store a <code>DB::DatabaseKey</code> or <code>Slot&lt;..DB..&gt;</code>, but instead can use
some type for dependencies that is independent of the dtabase type <code>DB</code>.</p>
<h3 id="dispatching-methods-from-a-databasekeyindex"><a class="header" href="#dispatching-methods-from-a-databasekeyindex">Dispatching methods from a <code>DatabaseKeyIndex</code></a></h3>
<p>There are a number of methods that can be dispatched through the database
interface on a <code>DatabaseKeyIndex</code>. For example, we already mentioned
<code>fmt_debug</code>, which emits a debug representation of the key, but there is also
<code>maybe_changed_after</code>, which checks whether the value for a given key may have
changed since the given revision. Each of these methods is a member of the
<code>DatabaseOps</code> trait and they are dispatched as follows.</p>
<p>First, the <code>#[salsa::database]</code> procedural macro is the one which
generates the <code>DatabaseOps</code> impl for the database. This base method
simply matches on the group index to determine which query group
contains the key, and then dispatches to an inherent
method defined on the appropriate query group struct:</p>
<pre><code class="language-rust ignore">impl salsa::plumbing::DatabaseOps for DatabaseStruct {
    // We'll use the `fmt_debug` method as an example
    fn fmt_debug(&amp;self, index: DatabaseKeyIndex, fmt: &amp;mut std::fmt::Formatter&lt;'_&gt;) -&gt; std::fmt::Result {
        match index.group_index() {
            0 =&gt; {
                let storage = &lt;Self as HasQueryGroup&lt;HelloWorld&gt;&gt;::group_storage(self);
                storage.fmt_debug(index, fmt)
            }

            _ =&gt; panic!(&quot;Invalid index&quot;)
        }
    }
}
</code></pre>
<p>The query group struct has a very similar inherent method that dispatches based
on the query index and invokes a method on the query storage:</p>
<pre><code class="language-rust ignore">impl HelloWorldGroupStorage__ {
    // We'll use the `fmt_debug` method as an example
    fn fmt_debug(&amp;self, index: DatabaseKeyIndex, fmt: &amp;mut std::fmt::Formatter&lt;'_&gt;) -&gt; std::fmt::Result {
        match index.query_index() {
            0 =&gt; self.appropriate_query_field.fmt_debug(index, fmt),
            1 =&gt; ...
            _ =&gt; panic!(&quot;Invalid index&quot;)
        }
    }
}
</code></pre>
<p>Finally, the query storage can use the key index to lookup the appropriate
data from the <code>FxIndexSet</code>.</p>
<h3 id="wrap-runtime-in-a-storagedb-type"><a class="header" href="#wrap-runtime-in-a-storagedb-type">Wrap runtime in a <code>Storage&lt;DB&gt;</code> type</a></h3>
<p>The Salsa runtime is currently <code>Runtime&lt;DB&gt;</code> but it will change to just
<code>Runtime</code> and thus not be generic over the database. This means it can be
referenced directly by query storage implementations. This is very useful
because it allows that type to have a number of <code>pub(crate)</code> details that query
storage implementations make use of but which are not exposed as part of our
public API.</p>
<p>However, the <code>Runtime</code> crate used to contain a <code>DB::Storage</code>, and without the
<code>DB</code> in its type, it no longer can. Therefore, we will introduce a new type
<code>Storage&lt;DB&gt;</code> type which is defined like so:</p>
<pre><code class="language-rust ignore">pub struct Storage&lt;DB: DatabaseImpl&gt; {
    query_store: Arc&lt;DB::DatabaseStorage&gt;,
    runtime: Runtime,
}

impl&lt;DB&gt; Storage&lt;DB&gt; {
    pub fn query_store(&amp;self) -&gt; &amp;DB::DatabaseStorage {
        &amp;self.query_store
    }

    pub fn salsa_runtime(&amp;self) -&gt; &amp;Runtime {
        &amp;self.runtime
    }

    pub fn salsa_runtime_mut(&amp;mut self) -&gt; &amp;mut Runtime {
        &amp;self.runtime
    }

    /// Used for parallel queries
    pub fn snapshot(&amp;self) -&gt; Self {
        Storage {
            query_store: query_store.clone(),
            runtime: runtime.snapshot(),
        }
    }
}
</code></pre>
<p>The user is expected to include a field <code>storage: Storage&lt;DB&gt;</code> in their database
definition. The <code>salsa::database</code> procedural macro, when it generates impls of
traits like <code>HasQueryGroup</code>, will embed code like <code>self.storage</code> that looks for
that field.</p>
<h3 id="salsa_runtime-methods-move-to-databaseops-trait"><a class="header" href="#salsa_runtime-methods-move-to-databaseops-trait"><code>salsa_runtime</code> methods move to <code>DatabaseOps</code> trait</a></h3>
<p>The <code>salsa_runtime</code> methods used to be manually implemented by users to define
the field that contains the salsa runtime. This was always boilerplate. The
<code>salsa::database</code> macro now handles that job by defining them to invoke the
corresponding methods on <code>Storage</code>.</p>
<h3 id="salsa-database-trait-becomes-dyn-safe"><a class="header" href="#salsa-database-trait-becomes-dyn-safe">Salsa database trait becomes dyn safe</a></h3>
<p>Under this proposal, the Salsa database must be dyn safe. This implies that
we have to make a few changes:</p>
<ul>
<li>The <code>query</code> and <code>query_mut</code> methods move to an extension trait.</li>
<li>The <code>DatabaseStorageTypes</code> supertrait is removed (that trait is renamed and altered, see next section).</li>
<li>The <code>salsa_event</code> method changes, as described in the User's guide.</li>
</ul>
<h3 id="salsa-database-trait-requires-static-at-least-for-now"><a class="header" href="#salsa-database-trait-requires-static-at-least-for-now">Salsa database trait requires <code>'static</code>, at least for now</a></h3>
<p>One downside of this proposal is that the <code>salsa::Database</code> trait now has a
<code>'static</code> bound. This is a result of the lack of GATs -- in particular, the
queries expect a <code>&lt;Q as QueryDb&lt;'_&gt;&gt;::DynDb</code> as argument. In the query definition, we have
something like <code>type DynDb = dyn QueryGroupDatabase</code>, which in turn defaults to
<code>dyn::QueryGroupDatabase + 'static</code>.</p>
<p>At the moment, this limitation is harmless, since salsa databases don't support
generic parameters. But it would be good to lift in the future, especially as we
would like to support arena allocation and other such patterns. The limitation
could be overcome in the future by:</p>
<ul>
<li>converting to a GAT like <code>DynDb&lt;'a&gt;</code>, if those were available;</li>
<li>or by simulating GATs by introducing a trait to carry the <code>DynDb</code> definition,
like <code>QueryDb&lt;'a&gt;</code>, where <code>Query</code> has the supertrait <code>for&lt;'a&gt; Self: QueryDb&lt;'a&gt;</code>. This would permit the <code>DynDb</code> type to be referenced by writing
<code>&lt;Q as QueryDb&lt;'a&gt;&gt;::DynDb</code>. </li>
</ul>
<h3 id="salsa-query-group-traits-are-extended-with-database-and-hasquerygroup-supertrait"><a class="header" href="#salsa-query-group-traits-are-extended-with-database-and-hasquerygroup-supertrait">Salsa query group traits are extended with <code>Database</code> and <code>HasQueryGroup</code> supertrait</a></h3>
<p>When <code>#[salsa::query_group]</code> is applied to a trait, we currently generate a copy
of the trait that is &quot;more or less&quot; unmodified (although we sometimes add
additional synthesized methods, such as the <code>set</code> method for an input). Under
this proposal, we will also introduce a <code>HasQueryGroup&lt;QueryGroupStorage&gt;</code>
supertrait. Therefore the following input:</p>
<pre><code class="language-rust ignore">#[salsa::query_group(HelloWorldStorage)]
trait HelloWorld { .. }
</code></pre>
<p>will generate a trait like:</p>
<pre><code class="language-rust ignore">trait HelloWorld:
    salsa::Database +
    salsa::plumbing::HasQueryGroup&lt;HelloWorldStorage&gt;
{
    ..
}
</code></pre>
<p>The <code>Database</code> trait is the standard <code>salsa::Database</code> trait and contains
various helper methods. The <code>HasQueryGroup</code> trait is implemented by the database
and defines various plumbing methods that are used by the storage
implementations.</p>
<p>One downside of this is that <code>salsa::Database</code> methods become available on the
trait; we might want to give internal plumbing methods more obscure names.</p>
<h4 id="bounds-were-already-present-on-the-blanket-impl-of-salsa-query-group-trait"><a class="header" href="#bounds-were-already-present-on-the-blanket-impl-of-salsa-query-group-trait">Bounds were already present on the blanket impl of salsa query group trait</a></h4>
<p>The new bounds that are appearing on the trait were always present on the
blanket impl that the <code>salsa::query_group</code> procedural macro generated, which
looks like so (and continues unchanged under this RFC):</p>
<pre><code class="language-rust ignore">impl&lt;DB&gt; HelloWorld for DB
where
    DB: salsa::Database +
    DB: salsa::plumbing::HasQueryGroup&lt;HelloWorldStorage&gt;
{
    ...
}
</code></pre>
<p>The reason we generate the impl is so that the <code>salsa::database</code> procedural
macro can simply create the <code>HasQueryGroup</code> impl and never needs to know the
name of the <code>HelloWorld</code> trait, only the <code>HelloWorldStorage</code> type.</p>
<h3 id="storage-types-no-longer-parameterized-by-the-database"><a class="header" href="#storage-types-no-longer-parameterized-by-the-database">Storage types no longer parameterized by the database</a></h3>
<p>Today's storage types, such as <code>Derived</code>, are parameterized over both a query <code>Q</code> and a <code>DB</code> (along with the memoization policy <code>MP</code>):</p>
<pre><code class="language-rust ignore">// Before this RFC:
pub struct DerivedStorage&lt;DB, Q, MP&gt;
where
    Q: QueryFunction&lt;DB&gt;,
    DB: Database + HasQueryGroup&lt;Q::Group&gt;,
    MP: MemoizationPolicy&lt;DB, Q&gt;,
</code></pre>
<p>The <code>DB</code> parameter should no longer be needed after the previously described
changes are made, so that the signature looks like:</p>
<pre><code class="language-rust ignore">// Before this RFC:
pub struct DerivedStorage&lt;Q, MP&gt;
where
    Q: QueryFunction,
    MP: MemoizationPolicy&lt;DB, Q&gt;,
</code></pre>
<h2 id="alternatives-and-future-work-4"><a class="header" href="#alternatives-and-future-work-4">Alternatives and future work</a></h2>
<p>The 'linch-pin' of this design is the <code>DatabaseKeyIndex</code> type, which allows for
signatures to refer to &quot;any query in the system&quot; without reference to the <code>DB</code>
type. The biggest downside of the system is that this type is an integer which
then requires a tracing GC to recover index values. The primary alternative
would be to use an <code>Arc</code>-like scheme,but this has some severe downsides:</p>
<ul>
<li>Requires reference counting, allocation</li>
<li>Hashing and equality comparisons have more data to process versus an integer</li>
<li>Equality comparisons must still be deep since you may have older and newer keys co-existing</li>
<li>Requires a <code>Arc&lt;dyn DatabaseKey&gt;</code>-like setup, which then encounters the
problems that this type is not <code>Send</code> or <code>Sync</code>, leading to hacks like the
<code>DB::DatabaseData</code> we use today.</li>
</ul>
<div style="break-before: page; page-break-before: always;"></div><h1 id="opinionated-cancelation"><a class="header" href="#opinionated-cancelation">Opinionated cancelation</a></h1>
<h2 id="metadata-3"><a class="header" href="#metadata-3">Metadata</a></h2>
<ul>
<li>Author: nikomatsakis</li>
<li>Date: 2021-05-15</li>
<li>Introduced in: <a href="https://github.com/salsa-rs/salsa/pull/265">salsa-rs/salsa#265</a></li>
</ul>
<h2 id="summary-7"><a class="header" href="#summary-7">Summary</a></h2>
<ul>
<li>Define stack unwinding as the one true way to handle cancelation in salsa queries</li>
<li>Modify salsa queries to automatically initiate unwinding when they are canceled</li>
<li>Use a distinguished value for this panic so that people can test if the panic was a result of cancelation</li>
</ul>
<h2 id="motivation-7"><a class="header" href="#motivation-7">Motivation</a></h2>
<p>Salsa's database model is fundamentally like a read-write lock. There is always a single <em>master copy</em> of the database which supports writes, and any number of concurrent <em>snapshots</em> that support reads. Whenever a write to the database occurs, any queries executing in those snapshots are considered <em>canceled</em>, because their results are based on stale data. The write blocks until they complete before it actually takes effect. It is therefore advantageous for those reads to complete as quickly as possible.</p>
<p>cancelation in salsa is currently quite minimal. Effectively, a flag becomes true, and queries can manually check for this flag. This is easy to forget to do. Moreover, we support two modes of cancelation: you can either use <code>Result</code> values or use unwinding. In practice, though, there isn't much point to using <code>Result</code>: you can't really &quot;recover&quot; from cancelation.</p>
<p>The largest user of salsa, rust-analyzer, uses a fairly opinionated and aggressive form of cancelation:</p>
<ul>
<li>Every query is instrumented, using salsa's various hooks, to check for cancelation before it begins.</li>
<li>If a query is canceled, then it immediately panics, using a special sentinel value.</li>
<li>Any worker threads holding a snapshot of the DB recognize this value and go back to waiting for work.</li>
</ul>
<p>We propose to make this model of cancelation the <em>only</em> model of cancelation.</p>
<h2 id="users-guide-7"><a class="header" href="#users-guide-7">User's guide</a></h2>
<p>When you do a write to the salsa database, that write will block until any queries running in background threads have completed. You really want those queries to complete quickly, though, because they are now operating on stale data and their results are therefore not meaningful. To expedite the process, salsa will <em>cancel</em> those queries. That means that the queries will panic as soon as they try to execute another salsa query. Those panics occur using a sentinel value that you can check for if you wish. If you have a query that contains a long loop which does not execute any intermediate queries, salsa won't be able to cancel it automatically. You may wish to check for cancelation yourself by invoking the <code>unwind_if_cancelled</code> method.</p>
<h2 id="reference-guide-7"><a class="header" href="#reference-guide-7">Reference guide</a></h2>
<p>The changes required to implement this RFC are as follows:</p>
<ul>
<li>Remove on <code>is_current_revision_canceled</code>.</li>
<li>Introduce a sentinel cancellation token that can be used with <a href="https://doc.rust-lang.org/std/panic/fn.resume_unwind.html"><code>resume_unwind</code></a></li>
<li>Introduce a <code>unwind_if_cancelled</code> method into the <code>Database</code> which checks whether cancelation has occured and panics if so.
<ul>
<li>This method also triggers a <code>salsa_event</code> callback.</li>
<li>This should probably be inline for the <code>if</code> with an outlined function to do the actual panic.</li>
</ul>
</li>
<li>Modify the code for the various queries to invoke <code>unwind_if_cancelled</code> when they are invoked or validated.</li>
</ul>
<h2 id="frequently-asked-questions-1"><a class="header" href="#frequently-asked-questions-1">Frequently asked questions</a></h2>
<h3 id="isnt-it-hard-to-write-panic-safe-code"><a class="header" href="#isnt-it-hard-to-write-panic-safe-code">Isn't it hard to write panic-safe code?</a></h3>
<p>It is. However, the salsa runtime is panic-safe, and all salsa queries must already avoid side-effects for other reasons, so in our case, being panic-safe happens by default.</p>
<h3 id="isnt-recovering-from-panics-a-bad-idea"><a class="header" href="#isnt-recovering-from-panics-a-bad-idea">Isn't recovering from panics a bad idea?</a></h3>
<p>No. It's a bad idea to do &quot;fine-grained&quot; recovery from panics, but catching a panic at a high-level of your application and soldiering on is actually exactly how panics were meant to be used. This is especially true in salsa, since all code is already panic-safe.</p>
<h3 id="does-this-affect-users-of-salsa-who-do-not-use-threads"><a class="header" href="#does-this-affect-users-of-salsa-who-do-not-use-threads">Does this affect users of salsa who do not use threads?</a></h3>
<p>No. Cancelation in salsa only occurs when there are parallel readers and writers.</p>
<h3 id="what-about-people-using-panic-as-abort"><a class="header" href="#what-about-people-using-panic-as-abort">What about people using panic-as-abort?</a></h3>
<p>This does mean that salsa is not compatible with panic-as-abort. Strictly speaking, you could still use salsa in single-threaded mode, so that cancelation is not possible.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="remove-garbage-collection"><a class="header" href="#remove-garbage-collection">Remove garbage collection</a></h1>
<h2 id="metadata-4"><a class="header" href="#metadata-4">Metadata</a></h2>
<ul>
<li>Author: nikomatsakis</li>
<li>Date: 2021-06-06</li>
<li>Introduced in: https://github.com/salsa-rs/salsa/pull/267</li>
</ul>
<h2 id="summary-8"><a class="header" href="#summary-8">Summary</a></h2>
<ul>
<li>Remove support for tracing garbage collection</li>
<li>Make interned keys immortal, for now at least</li>
</ul>
<h2 id="motivation-8"><a class="header" href="#motivation-8">Motivation</a></h2>
<p>Salsa has traditionally supported &quot;tracing garbage collection&quot;, which allowed the user to remove values that were not used in the most recent revision. While this feature is nice in theory, it is not used in practice. Rust Analyzer, for example, prefers to use the LRU mechanism, which offers stricter limits. Considering that it is not used, supporting the garbage collector involves a decent amount of complexity and makes it harder to experiment with Salsa's structure. Therefore, this RFC proposes to remove support for tracing garbage collection. If desired, it can be added back at some future date in an altered form.</p>
<h2 id="users-guide-8"><a class="header" href="#users-guide-8">User's guide</a></h2>
<p>The primary effect for users is that the various 'sweep' methods from the database and queries are removed. The only way to control memory usage in Salsa now is through the LRU mechanisms.</p>
<h2 id="reference-guide-8"><a class="header" href="#reference-guide-8">Reference guide</a></h2>
<p>Removing the GC involves deleting a fair bit of code. The most interesting and subtle code is in the interning support. Previously, interned keys tracked the revision in which they were interned, but also the revision in which they were last accessed. when the sweeping method would run, any interned keys that had not been accessed in the current revision were collected. Since we permitted the GC to run with the read only, we had to be prepared for accesses to interned keys to occur concurrently with the GC, and thus for the possibility that various operations could fail. This complexity is removed, but it means that there is no way to remove interned keys at present.</p>
<h2 id="frequently-asked-questions-2"><a class="header" href="#frequently-asked-questions-2">Frequently asked questions</a></h2>
<h3 id="why-not-just-keep-the-gc"><a class="header" href="#why-not-just-keep-the-gc">Why not just keep the GC?</a></h3>
<p>The complex.</p>
<h3 id="are-any-users-relying-on-the-sweeping-functionality"><a class="header" href="#are-any-users-relying-on-the-sweeping-functionality">Are any users relying on the sweeping functionality?</a></h3>
<p>Hard to say for sure, but none that we know of.</p>
<h3 id="dont-we-want-some-mechanism-to-control-memory-usage"><a class="header" href="#dont-we-want-some-mechanism-to-control-memory-usage">Don't we want some mechanism to control memory usage?</a></h3>
<p>Yes, but we don't quite know what it looks like. LRU seems to be adequate in practice for present.</p>
<h3 id="what-about-for-interned-keys-in-particular"><a class="header" href="#what-about-for-interned-keys-in-particular">What about for interned keys in particular?</a></h3>
<p>We could add an LRU-like mechanism to interning.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="descriptiontitle-1"><a class="header" href="#descriptiontitle-1">Description/title</a></h1>
<h2 id="metadata-5"><a class="header" href="#metadata-5">Metadata</a></h2>
<ul>
<li>Author: nikomatsakis</li>
<li>Date: 2021-10-31</li>
<li>Introduced in: https://github.com/salsa-rs/salsa/pull/285</li>
</ul>
<h2 id="summary-9"><a class="header" href="#summary-9">Summary</a></h2>
<ul>
<li>Permit cycle recovery as long as at least one participant has recovery enabled.</li>
<li>Modify cycle recovery to take a <code>&amp;Cycle</code>.</li>
<li>Introduce <code>Cycle</code> type that carries information about a cycle and lists participants in a deterministic order.</li>
</ul>
<h2 id="motivation-9"><a class="header" href="#motivation-9">Motivation</a></h2>
<p>Cycle recovery has been found to have some subtle bugs that could lead to panics. Furthermore, the existing cycle recovery APIs require all participants in a cycle to have recovery enabled and give limited and non-deterministic information. This RFC tweaks the user exposed APIs to correct these shortcomings. It also describes a major overhaul of how cycles are handled internally.</p>
<h2 id="users-guide-9"><a class="header" href="#users-guide-9">User's guide</a></h2>
<p>By default, cycles in the computation graph are considered a &quot;programmer bug&quot; and result in a panic. Sometimes, though, cycles are outside of the programmer's control. Salsa provides mechanisms to recover from cycles that can help in those cases.</p>
<h3 id="default-cycle-handling-panic"><a class="header" href="#default-cycle-handling-panic">Default cycle handling: panic</a></h3>
<p>By default, when Salsa detects a cycle in the computation graph, Salsa will panic with a <code>salsa::Cycle</code> as the panic value. Your queries should not attempt to catch this value; rather, the <code>salsa::Cycle</code> is meant to be caught by the outermost thread, which can print out information from it to diagnose what went wrong. The <code>Cycle</code> type offers a few methods for inspecting the participants in the cycle:</p>
<ul>
<li><code>participant_keys</code> -- returns an iterator over the <code>DatabaseKeyIndex</code> for each participant in the cycle.</li>
<li><code>all_participants</code> -- returns an iterator over <code>String</code> values for each participant in the cycle (debug output).</li>
<li><code>unexpected_participants</code> -- returns an iterator over <code>String</code> values for each participant in the cycle that doesn't have recovery information (see next section).</li>
</ul>
<p><code>Cycle</code> implements <code>Debug</code>, but because the standard trait doesn't provide access to the database, the output can be kind of inscrutable. To get more readable <code>Debug</code> values, use the method <code>cycle.debug(db)</code>, which returns an <code>impl Debug</code> that is more readable.</p>
<h3 id="cycle-recovery"><a class="header" href="#cycle-recovery">Cycle recovery</a></h3>
<p>Panicking when a cycle occurs is ok for situations where you believe a cycle is impossible. But sometimes cycles can result from illegal user input and cannot be statically prevented. In these cases, you might prefer to gracefully recover from a cycle rather than panicking the entire query. Salsa supports that with the idea of <em>cycle recovery</em>.</p>
<p>To use cycle recovery, you annotate potential participants in the cycle with a <code>#[salsa::recover(my_recover_fn)]</code> attribute. When a cycle occurs, if any participant P has recovery information, then no panic occurs. Instead, the execution of P is aborted and P will execute the recovery function to generate its result. Participants in the cycle that do not have recovery information continue executing as normal, using this recovery result.</p>
<p>The recovery function has a similar signature to a query function. It is given a reference to your database along with a <code>salsa::Cycle</code> describing the cycle that occurred; it returns the result of the query. Example:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>fn my_recover_fn(
    db: &amp;dyn MyDatabase,
    cycle: &amp;salsa::Cycle,
) -&gt; MyResultValue
<span class="boring">}
</span></code></pre></pre>
<p>The <code>db</code> and <code>cycle</code> argument can be used to prepare a useful error message for your users. </p>
<p><strong>Important:</strong> Although the recovery function is given a <code>db</code> handle, you should be careful to avoid creating a cycle from within recovery or invoking queries that may be participating in the current cycle. Attempting to do so can result in inconsistent results.</p>
<h3 id="figuring-out-why-recovery-did-not-work-1"><a class="header" href="#figuring-out-why-recovery-did-not-work-1">Figuring out why recovery did not work</a></h3>
<p>If a cycle occurs and <em>some</em> of the participant queries have <code>#[salsa::recover]</code> annotations and others do not, then the query will be treated as irrecoverable and will simply panic. You can use the <code>Cycle::unexpected_participants</code> method to figure out why recovery did not succeed and add the appropriate <code>#[salsa::recover]</code> annotations.</p>
<h2 id="reference-guide-9"><a class="header" href="#reference-guide-9">Reference guide</a></h2>
<p>This RFC accompanies a rather long and complex PR with a number of changes to the implementation. We summarize the most important points here.</p>
<h1 id="cycles-1"><a class="header" href="#cycles-1">Cycles</a></h1>
<h2 id="cross-thread-blocking-1"><a class="header" href="#cross-thread-blocking-1">Cross-thread blocking</a></h2>
<p>The interface for blocking across threads now works as follows:</p>
<ul>
<li>When one thread <code>T1</code> wishes to block on a query <code>Q</code> being executed by another thread <code>T2</code>, it invokes <code>Runtime::try_block_on</code>. This will check for cycles. Assuming no cycle is detected, it will block <code>T1</code> until <code>T2</code> has completed with <code>Q</code>. At that point, <code>T1</code> reawakens. However, we don't know the result of executing <code>Q</code>, so <code>T1</code> now has to &quot;retry&quot;. Typically, this will result in successfully reading the cached value.</li>
<li>While <code>T1</code> is blocking, the runtime moves its query stack (a <code>Vec</code>) into the shared dependency graph data structure. When <code>T1</code> reawakens, it recovers ownership of its query stack before returning from <code>try_block_on</code>.</li>
</ul>
<h2 id="cycle-detection-1"><a class="header" href="#cycle-detection-1">Cycle detection</a></h2>
<p>When a thread <code>T1</code> attempts to execute a query <code>Q</code>, it will try to load the value for <code>Q</code> from the memoization tables. If it finds an <code>InProgress</code> marker, that indicates that <code>Q</code> is currently being computed. This indicates a potential cycle. <code>T1</code> will then try to block on the query <code>Q</code>:</p>
<ul>
<li>If <code>Q</code> is also being computed by <code>T1</code>, then there is a cycle.</li>
<li>Otherwise, if <code>Q</code> is being computed by some other thread <code>T2</code>, we have to check whether <code>T2</code> is (transitively) blocked on <code>T1</code>. If so, there is a cycle.</li>
</ul>
<p>These two cases are handled internally by the <code>Runtime::try_block_on</code> function. Detecting the intra-thread cycle case is easy; to detect cross-thread cycles, the runtime maintains a dependency DAG between threads (identified by <code>RuntimeId</code>). Before adding an edge <code>T1 -&gt; T2</code> (i.e., <code>T1</code> is blocked waiting for <code>T2</code>) into the DAG, it checks whether a path exists from <code>T2</code> to <code>T1</code>. If so, we have a cycle and the edge cannot be added (then the DAG would not longer be acyclic).</p>
<p>When a cycle is detected, the current thread <code>T1</code> has full access to the query stacks that are participating in the cycle. Consider: naturally, <code>T1</code> has access to its own stack. There is also a path <code>T2 -&gt; ... -&gt; Tn -&gt; T1</code> of blocked threads. Each of the blocked threads <code>T2 ..= Tn</code> will have moved their query stacks into the dependency graph, so those query stacks are available for inspection.</p>
<p>Using the available stacks, we can create a list of cycle participants <code>Q0 ... Qn</code> and store that into a <code>Cycle</code> struct. If none of the participants <code>Q0 ... Qn</code> have cycle recovery enabled, we panic with the <code>Cycle</code> struct, which will trigger all the queries on this thread to panic.</p>
<h2 id="cycle-recovery-via-fallback-1"><a class="header" href="#cycle-recovery-via-fallback-1">Cycle recovery via fallback</a></h2>
<p>If any of the cycle participants <code>Q0 ... Qn</code> has cycle recovery set, we recover from the cycle. To help explain how this works, we will use this example cycle which contains three threads. Beginning with the current query, the cycle participants are <code>QA3</code>, <code>QB2</code>, <code>QB3</code>, <code>QC2</code>, <code>QC3</code>, and <code>QA2</code>.</p>
<pre><code>        The cyclic
        edge we have
        failed to add.
          :
   A      :    B         C
          :
   QA1    v    QB1       QC1
┌► QA2    ┌──► QB2   ┌─► QC2
│  QA3 ───┘    QB3 ──┘   QC3 ───┐
│                               │
└───────────────────────────────┘
</code></pre>
<p>Recovery works in phases:</p>
<ul>
<li><strong>Analyze:</strong> As we enumerate the query participants, we collect their collective inputs (all queries invoked so far by any cycle participant) and the max changed-at and min duration. We then remove the cycle participants themselves from this list of inputs, leaving only the queries external to the cycle.</li>
<li><strong>Mark</strong>: For each query Q that is annotated with <code>#[salsa::recover]</code>, we mark it and all of its successors on the same thread by setting its <code>cycle</code> flag to the <code>c: Cycle</code> we constructed earlier; we also reset its inputs to the collective inputs gathering during analysis. If those queries resume execution later, those marks will trigger them to immediately unwind and use cycle recovery, and the inputs will be used as the inputs to the recovery value.
<ul>
<li>Note that we mark <em>all</em> the successors of Q on the same thread, whether or not they have recovery set. We'll discuss later how this is important in the case where the active thread (A, here) doesn't have any recovery set.</li>
</ul>
</li>
<li><strong>Unblock</strong>: Each blocked thread T that has a recovering query is forcibly reawoken; the outgoing edge from that thread to its successor in the cycle is removed. Its condvar is signalled with a <code>WaitResult::Cycle(c)</code>. When the thread reawakens, it will see that and start unwinding with the cycle <code>c</code>.</li>
<li><strong>Handle the current thread:</strong> Finally, we have to choose how to have the current thread proceed. If the current thread includes any cycle with recovery information, then we can begin unwinding. Otherwise, the current thread simply continues as if there had been no cycle, and so the cyclic edge is added to the graph and the current thread blocks. This is possible because some other thread had recovery information and therefore has been awoken.</li>
</ul>
<p>Let's walk through the process with a few examples.</p>
<h3 id="example-1-recovery-on-the-detecting-thread-1"><a class="header" href="#example-1-recovery-on-the-detecting-thread-1">Example 1: Recovery on the detecting thread</a></h3>
<p>Consider the case where only the query QA2 has recovery set. It and QA3 will be marked with their <code>cycle</code> flag set to <code>c: Cycle</code>. Threads B and C will not be unblocked, as they do not have any cycle recovery nodes. The current thread (Thread A) will initiate unwinding with the cycle <code>c</code> as the value. Unwinding will pass through QA3 and be caught by QA2. QA2 will substitute the recovery value and return normally. QA1 and QC3 will then complete normally and so forth, on up until all queries have completed.</p>
<h3 id="example-2-recovery-in-two-queries-on-the-detecting-thread-1"><a class="header" href="#example-2-recovery-in-two-queries-on-the-detecting-thread-1">Example 2: Recovery in two queries on the detecting thread</a></h3>
<p>Consider the case where both query QA2 and QA3 have recovery set. It proceeds the same Example 1 until the the current initiates unwinding, as described in Example 1. When QA3 receives the cycle, it stores its recovery value and completes normally. QA2 then adds QA3 as an input dependency: at that point, QA2 observes that it too has the cycle mark set, and so it initiates unwinding. The rest of QA2 therefore never executes. This unwinding is caught by QA2's entry point and it stores the recovery value and returns normally. QA1 and QC3 then continue normally, as they have not had their <code>cycle</code> flag set.</p>
<h3 id="example-3-recovery-on-another-thread-1"><a class="header" href="#example-3-recovery-on-another-thread-1">Example 3: Recovery on another thread</a></h3>
<p>Now consider the case where only the query QB2 has recovery set. It and QB3 will be marked with the cycle <code>c: Cycle</code> and thread B will be unblocked; the edge <code>QB3 -&gt; QC2</code> will be removed from the dependency graph. Thread A will then add an edge <code>QA3 -&gt; QB2</code> and block on thread B. At that point, thread A releases the lock on the dependency graph, and so thread B is re-awoken. It observes the <code>WaitResult::Cycle</code> and initiates unwinding. Unwinding proceeds through QB3 and into QB2, which recovers. QB1 is then able to execute normally, as is QA3, and execution proceeds from there.</p>
<h3 id="example-4-recovery-on-all-queries-1"><a class="header" href="#example-4-recovery-on-all-queries-1">Example 4: Recovery on all queries</a></h3>
<p>Now consider the case where all the queries have recovery set. In that case, they are all marked with the cycle, and all the cross-thread edges are removed from the graph. Each thread will independently awaken and initiate unwinding. Each query will recover.</p>
<h2 id="frequently-asked-questions-3"><a class="header" href="#frequently-asked-questions-3">Frequently asked questions</a></h2>
<h3 id="why-have-other-threads-retry-instead-of-giving-them-the-value"><a class="header" href="#why-have-other-threads-retry-instead-of-giving-them-the-value">Why have other threads retry instead of giving them the value?</a></h3>
<p>In the past, when one thread T1 blocked on some query Q being executed by another thread T2, we would create a custom channel between the threads. T2 would then send the result of Q directly to T1, and T1 had no need to retry. This mechanism was simplified in this RFC because we don't always have a value available: sometimes the cycle results when T2 is just verifying whether a memoized value is still valid. In that case, the value may not have been computed, and so when T1 retries it will in fact go on to compute the value. (Previously, this case was overlooked by the cycle handling logic and resulted in a panic.)</p>
<h3 id="why-do-we-use-unwinding-to-manage-cycle-recovery"><a class="header" href="#why-do-we-use-unwinding-to-manage-cycle-recovery">Why do we use unwinding to manage cycle recovery?</a></h3>
<p>When a query Q participates in cycle recovery, we use unwinding to get from the point where the cycle is detected back to the query's execution function. This ensures that the rest of Q never runs. This is important because Q might otherwise go on to create new cycles even while recovery is proceeding. Consider an example like:</p>
<pre><pre class="playground"><code class="language-rust">
<span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#[salsa::recovery]
fn query_q1(db: &amp;dyn Database) {
    db.query_q2()
    db.query_q3() // &lt;-- this never runs, thanks to unwinding
}

#[salsa::recovery]
fn query_q2(db: &amp;dyn Database) {
    db.query_q1()
}

#[salsa::recovery]
fn query_q3(db: &amp;dyn Database) {
    db.query_q1()
}
<span class="boring">}
</span></code></pre></pre>
<h3 id="why-not-invoke-the-recovery-functions-all-at-once"><a class="header" href="#why-not-invoke-the-recovery-functions-all-at-once">Why not invoke the recovery functions all at once?</a></h3>
<p>The code currently unwinds frame by frame and invokes recovery as it goes. Another option might be to invoke the recovery function for all participants in the cycle up-front. This would be fine, but it's a bit difficult to do, since the types for each cycle are different, and the <code>Runtime</code> code doesn't know what they are. We also don't have access to the memoization tables and so forth.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="parallel-friendly-caching"><a class="header" href="#parallel-friendly-caching">Parallel friendly caching</a></h1>
<h2 id="metadata-6"><a class="header" href="#metadata-6">Metadata</a></h2>
<ul>
<li>Author: nikomatsakis</li>
<li>Date: 2021-05-29</li>
<li>Introduced in: (please update once you open your PR)</li>
</ul>
<h2 id="summary-10"><a class="header" href="#summary-10">Summary</a></h2>
<ul>
<li>Rework query storage to be based on concurrent hashmaps instead of slots with read-write locked state.</li>
</ul>
<h2 id="motivation-10"><a class="header" href="#motivation-10">Motivation</a></h2>
<p>Two-fold:</p>
<ul>
<li>Simpler, cleaner, and hopefully faster algorithm.</li>
<li>Enables some future developments that are not part of this RFC:
<ul>
<li>Derived queries whose keys are known to be integers.</li>
<li>Fixed point cycles so that salsa and chalk can be deeply integrated.</li>
<li>Non-synchronized queries that potentially execute on many threads in parallel (required for fixed point cycles, but potentially valuable in their own right).</li>
</ul>
</li>
</ul>
<h2 id="users-guide-10"><a class="header" href="#users-guide-10">User's guide</a></h2>
<p>No user visible changes.</p>
<h2 id="reference-guide-10"><a class="header" href="#reference-guide-10">Reference guide</a></h2>
<h3 id="background-current-structure"><a class="header" href="#background-current-structure">Background: Current structure</a></h3>
<p>Before this RFC, the <strong>overall structure</strong> of derived queries is as follows:</p>
<ul>
<li>Each derived query has a <code>DerivedStorage&lt;Q&gt;</code> (stored in the database) that contains:
<ul>
<li>the <code>slot_map</code>, a monotonically growing, indexable map from keys (<code>Q::Key</code>) to the <code>Slot&lt;Q&gt;</code> for the given key</li>
<li>lru list</li>
</ul>
</li>
<li>Each <code>Slot&lt;Q&gt;</code> has
<ul>
<li>r-w locked query-state that can be:
<ul>
<li>not-computed</li>
<li>in-progress with synchronization storage:
<ul>
<li><code>id</code> of the runtime computing the value</li>
<li><code>anyone_waiting</code>: <code>AtomicBool</code> set to true if other threads are awaiting result</li>
</ul>
</li>
<li>a <code>Memo&lt;Q&gt;</code></li>
</ul>
</li>
</ul>
</li>
<li>A <code>Memo&lt;Q&gt;</code> has
<ul>
<li>an optional value <code>Option&lt;Q::Value&gt;</code></li>
<li>dependency information:
<ul>
<li>verified-at</li>
<li>changed-at</li>
<li>durability</li>
<li>input set (typically a <code>Arc&lt;[DatabaseKeyIndex]&gt;</code>)</li>
</ul>
</li>
</ul>
</li>
</ul>
<p><strong>Fetching the value for a query</strong> currently works as follows:</p>
<ul>
<li>Acquire the read lock on the (indexable) <code>slot_map</code> and hash key to find the slot.
<ul>
<li>If no slot exists, acquire write lock and insert.</li>
</ul>
</li>
<li>Acquire the slot's internal lock to perform the <a href="rfcs/../plumbing/fetch.html">fetch</a> operation.</li>
</ul>
<p><strong>Verifying a dependency</strong> uses a scheme introduced in <a href="rfcs/./RFC0006-Dynamic-Databases.html">RFC #6</a>. Each dependency is represented as a <code>DatabaseKeyIndex</code> which contains three indices (group, query, and key). The group and query indices are used to find the query storage via <code>match</code> statements and then the next operation depends on the query type:</p>
<ul>
<li>Acquire the read lock on the (indexable) <code>slot_map</code> and use key index to load the slot. Read lock is released afterwards.</li>
<li>Acquire the slot's internal lock to perform the <a href="rfcs/../plumbing/maybe_changed_after.html">maybe changed after</a> operation.</li>
</ul>
<h3 id="new-structure-introduced-by-this-rfc"><a class="header" href="#new-structure-introduced-by-this-rfc">New structure (introduced by this RFC)</a></h3>
<p>The <strong>overall structure</strong> of derived queries after this RFC is as follows:</p>
<ul>
<li>Each derived query has a <code>DerivedStorage&lt;Q&gt;</code> (stored in the database) that contains:
<ul>
<li>a set of concurrent hashmaps:
<ul>
<li><code>key_map</code>: maps from <code>Q::Key</code> to an internal key index <code>K</code></li>
<li><code>memo_map</code>: maps from <code>K</code> to cached memo <code>ArcSwap&lt;Memo&lt;Q&gt;&gt;</code></li>
<li><code>sync_map</code>: maps from <code>K</code> to a <code>Sync&lt;Q&gt;</code> synchronization value</li>
</ul>
</li>
<li>lru set</li>
</ul>
</li>
<li>A <code>Memo&lt;Q&gt;</code> has
<ul>
<li>an <em>immutable</em> optional value <code>Option&lt;Q::Value&gt;</code></li>
<li>dependency information:
<ul>
<li><em>updatable</em> verified-at (<code>AtomicCell&lt;Option&lt;Revision&gt;&gt;</code>)</li>
<li><em>immutable</em> changed-at (<code>Revision</code>)</li>
<li><em>immutable</em> durability (<code>Durability</code>)</li>
<li><em>immutable</em> input set (typically a <code>Arc&lt;[DatabaseKeyIndex]&gt;</code>)</li>
</ul>
</li>
<li>information for LRU:
<ul>
<li><code>DatabaseKeyIndex</code></li>
<li><code>lru_index</code>, an <code>AtomicUsize</code></li>
</ul>
</li>
</ul>
</li>
<li>A <code>Sync&lt;Q&gt;</code> has
<ul>
<li><code>id</code> of the runtime computing the value</li>
<li><code>anyone_waiting</code>: <code>AtomicBool</code> set to true if other threads are awaiting result</li>
</ul>
</li>
</ul>
<p><strong>Fetching the value for a <em>derived</em> query</strong> will work as follows:</p>
<ol>
<li>Find internal index <code>K</code> by hashing key, as today.
<ul>
<li>Precise operation for this will depend on the concurrent hashmap implementation.</li>
</ul>
</li>
<li>Load memo <code>M: Arc&lt;Memo&lt;Q&gt;&gt;</code> from <code>memo_map[K]</code> (if present):
<ul>
<li>If verified is <code>None</code>, then another thread has found this memo to be invalid; ignore it.</li>
<li>Else, let <code>Rv</code> be the &quot;last verified revision&quot;.</li>
<li>If <code>Rv</code> is the current revision, or last change to an input with durability <code>M.durability</code> was before <code>Rv</code>:
<ul>
<li>Update &quot;last verified revision&quot; and <strong>return</strong> memoized value.</li>
</ul>
</li>
</ul>
</li>
<li>Atomically check <code>sync_map</code> for an existing <code>Sync&lt;Q&gt;</code>:
<ul>
<li>If one exists, block on the thread within and return to step 2 after it completes:
<ul>
<li>If this results in a cycle, unwind as today.</li>
</ul>
</li>
<li>If none exists, insert a new entry with current runtime-id.</li>
</ul>
</li>
<li>Check dependencies deeply
<ul>
<li>Iterate over each dependency <code>D</code> and check <code>db.maybe_changed_after(D, Rv)</code>.
<ul>
<li>If no dependency has changed, update <code>verified_at</code> to current revision and <strong>return</strong> memoized value.</li>
</ul>
</li>
<li>Mark memo as invalid by storing <code>None</code> in the verified-at.</li>
</ul>
</li>
<li>Construct the new memo:
<ul>
<li>Push query onto the local stack and execute the query function:
<ul>
<li>If this query is found to be a cycle participant, execute recovery function.</li>
</ul>
</li>
<li>Backdate result if it is equal to the old memo's value.</li>
<li>Allocate new memo.</li>
</ul>
</li>
<li>Store results:
<ul>
<li>Store new memo into <code>memo_map[K]</code>.</li>
<li>Remove query from the <code>sync_map</code>.</li>
</ul>
</li>
<li><strong>Return</strong> newly constructed value._</li>
</ol>
<p><strong>Verifying a dependency for a <em>derived</em> query</strong> will work as follows:</p>
<ol>
<li>Find internal index <code>K</code> by hashing key, as today.
<ul>
<li>Precise operation for this will depend on the concurrent hashmap implementation.</li>
</ul>
</li>
<li>Load memo <code>M: Arc&lt;Memo&lt;Q&gt;&gt;</code> from <code>memo_map[K]</code> (if present):
<ul>
<li>If verified is <code>None</code>, then another thread has found this memo to be invalid; ignore it.</li>
<li>Else, let <code>Rv</code> be the &quot;last verified revision&quot;.</li>
<li>If <code>Rv</code> is the current revision, <strong>return</strong> true or false depending on whether changed-at from memo.</li>
<li>If last change to an input with durability <code>M.durability</code> was before <code>Rv</code>:
<ul>
<li>Update <code>verified_at</code> to current revision and <strong>return</strong> memoized value.</li>
</ul>
</li>
<li>Iterate over each dependency <code>D</code> and check <code>db.maybe_changed_after(D, Rv)</code>.
<ul>
<li>If no dependency has changed, update <code>verified_at</code> to current revision and <strong>return</strong> memoized value.</li>
</ul>
</li>
<li>Mark memo as invalid by storing <code>None</code> in the verified-at.</li>
</ul>
</li>
<li>Atomically check <code>sync_map</code> for an existing <code>Sync&lt;Q&gt;</code>:
<ul>
<li>If one exists, block on the thread within and return to step 2 after it completes:
<ul>
<li>If this results in a cycle, unwind as today.</li>
</ul>
</li>
<li>If none exists, insert a new entry with current runtime-id.</li>
</ul>
</li>
<li>Construct the new memo:
<ul>
<li>Push query onto the local stack and execute the query function:
<ul>
<li>If this query is found to be a cycle participant, execute recovery function.</li>
</ul>
</li>
<li>Backdate result if it is equal to the old memo's value.</li>
<li>Allocate new memo.</li>
</ul>
</li>
<li>Store results:
<ul>
<li>Store new memo into <code>memo_map[K]</code>.</li>
<li>Remove query from the <code>sync_map</code>.</li>
</ul>
</li>
<li><strong>Return</strong> true or false depending on whether memo was backdated.</li>
</ol>
<h2 id="frequently-asked-questions-4"><a class="header" href="#frequently-asked-questions-4">Frequently asked questions</a></h2>
<h3 id="why-use-arcswap"><a class="header" href="#why-use-arcswap">Why use <code>ArcSwap</code>?</a></h3>
<p>It's a relatively minor implementation detail, but the code in this PR uses <code>ArcSwap</code> to store the values in the memo-map. In the case of a cache hit or other transient operations, this allows us to read from the arc while avoiding a full increment of the ref count. It adds a small bit of complexity because we have to be careful to do a full load before any recursive operations, since arc-swap only gives a fixed number of &quot;guards&quot; per thread before falling back to more expensive loads.</p>
<h3 id="do-we-really-need-maybe_changed_after-and-fetch"><a class="header" href="#do-we-really-need-maybe_changed_after-and-fetch">Do we really need <code>maybe_changed_after</code> <em>and</em> <code>fetch</code>?</a></h3>
<p>Yes, we do. &quot;maybe changed after&quot; is very similar to &quot;fetch&quot;, but it doesn't require that we have a memoized value. This is important for LRU.</p>
<h3 id="the-lru-map-in-the-code-is-just-a-big-lock"><a class="header" href="#the-lru-map-in-the-code-is-just-a-big-lock">The LRU map in the code is just a big lock!</a></h3>
<p>That's not a question. But it's true, I simplified the LRU code to just use a mutex. My assumption is that there are relatively few LRU-ified queries, and their values are relatively expensive to compute, so this is ok. If we find it's a bottleneck, though, I believe we could improve it by using a similar &quot;zone scheme&quot; to what we use now. We would add a <code>lru_index</code> to the <code>Memo</code> so that we can easily check if the memo is in the &quot;green zone&quot; when reading (if so, no updates are needed). The complexity there is that when we produce a replacement memo, we have to install it and swap the index. Thinking about that made my brain hurt a little so I decided to just take the simple option for now.</p>
<h3 id="how-do-the-synchronized--atomic-operations-compare-after-this-rfc"><a class="header" href="#how-do-the-synchronized--atomic-operations-compare-after-this-rfc">How do the synchronized / atomic operations compare after this RFC?</a></h3>
<p>After this RFC, to perform a read, in the best case:</p>
<ul>
<li>We do one &quot;dashmap get&quot; to map key to key index.</li>
<li>We do another &quot;dashmap get&quot; from key index to memo.</li>
<li>We do an &quot;arcswap load&quot; to get the memo.</li>
<li>We do an &quot;atomiccell read&quot; to load the current revision or durability information.</li>
</ul>
<p>dashmap is implemented with a striped set of read-write locks, so this is roughly the same (two read locks) as before this RFC. However:</p>
<ul>
<li>We no longer do any atomic ref count increments.</li>
<li>It is theoretically possible to replace dashmap with something that doesn't use locks.</li>
<li>The first dashmap get should be removable, if we know that the key is a 32 bit integer.
<ul>
<li>I plan to propose this in a future RFC.</li>
</ul>
</li>
</ul>
<h3 id="yeah-yeah-show-me-some-benchmarks"><a class="header" href="#yeah-yeah-show-me-some-benchmarks">Yeah yeah, show me some benchmarks!</a></h3>
<p>I didn't run any. I'll get on that.</p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="meta-about-the-book-itself"><a class="header" href="#meta-about-the-book-itself">Meta: about the book itself</a></h1>
<h2 id="linking-policy"><a class="header" href="#linking-policy">Linking policy</a></h2>
<p>We try to avoid links that easily become fragile. </p>
<p><strong>Do:</strong></p>
<ul>
<li>Link to <code>docs.rs</code> types to document the public API, but modify the link to use <code>latest</code> as the version.</li>
<li>Link to modules in the source code.</li>
<li>Create <a href="https://rust-lang.github.io/mdBook/format/mdbook.html?highlight=ANCHOR#including-portions-of-a-file">&quot;named anchors&quot;</a> and embed source code directly.</li>
</ul>
<p><strong>Don't:</strong></p>
<ul>
<li>Link to direct lines on github, even within a specific commit, unless you are trying to reference a historical piece of code (&quot;how things were at the time&quot;).</li>
</ul>

                    </main>

                    <nav class="nav-wrapper" aria-label="Page navigation">
                        <!-- Mobile navigation buttons -->


                        <div style="clear: both"></div>
                    </nav>
                </div>
            </div>

            <nav class="nav-wide-wrapper" aria-label="Page navigation">

                            </nav>

        </div>


                <script type="text/javascript">
            window.playground_copyable = true;
        </script>


                <script src="elasticlunr.min.js" type="text/javascript" charset="utf-8"></script>
        <script src="mark.min.js" type="text/javascript" charset="utf-8"></script>
        <script src="searcher.js" type="text/javascript" charset="utf-8"></script>

        <script src="clipboard.min.js" type="text/javascript" charset="utf-8"></script>
        <script src="highlight.js" type="text/javascript" charset="utf-8"></script>
        <script src="book.js" type="text/javascript" charset="utf-8"></script>

        <!-- Custom JS scripts -->
                <script type="text/javascript" src="mermaid.min.js"></script>
                <script type="text/javascript" src="mermaid-init.js"></script>

                        <script type="text/javascript">
        window.addEventListener('load', function() {
            window.setTimeout(window.print, 100);
        });
        </script>

    </body>
</html>