diff --git a/Cargo.lock b/Cargo.lock
index 427407bdb8..fdeb7f76b8 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -884,6 +884,7 @@ dependencies = [
  "roc_constrain",
  "roc_fmt",
  "roc_module",
+ "roc_mono",
  "roc_parse",
  "roc_problem",
  "roc_region",
@@ -998,6 +999,29 @@ dependencies = [
  "roc_region",
 ]
 
+[[package]]
+name = "roc_mono"
+version = "0.1.0"
+dependencies = [
+ "bumpalo",
+ "indoc",
+ "maplit",
+ "pretty_assertions",
+ "quickcheck",
+ "quickcheck_macros",
+ "roc_builtins",
+ "roc_can",
+ "roc_collections",
+ "roc_constrain",
+ "roc_module",
+ "roc_parse",
+ "roc_problem",
+ "roc_region",
+ "roc_solve",
+ "roc_types",
+ "roc_unify",
+]
+
 [[package]]
 name = "roc_parse"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index 0721ff7522..924cb86531 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -16,6 +16,7 @@ members = [
     "compiler/solve",
     "compiler/reporting",
     "compiler/fmt",
+    "compiler/mono",
     "vendor/ena",
     "vendor/pathfinding"
 ]
diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml
index b148fbd61c..d82ff7f728 100644
--- a/compiler/Cargo.toml
+++ b/compiler/Cargo.toml
@@ -18,6 +18,7 @@ roc_uniq = { path = "./uniq" }
 roc_unify = { path = "./unify" }
 roc_solve = { path = "./solve" }
 roc_fmt = { path = "./fmt" }
+roc_mono = { path = "./mono" }
 log = "0.4.8"
 petgraph = { version = "0.4.5", optional = true }
 im    = "14" # im and im-rc should always have the same version!
diff --git a/compiler/mono/Cargo.toml b/compiler/mono/Cargo.toml
new file mode 100644
index 0000000000..c4a669b954
--- /dev/null
+++ b/compiler/mono/Cargo.toml
@@ -0,0 +1,26 @@
+[package]
+name = "roc_mono"
+version = "0.1.0"
+authors = ["Richard Feldman <oss@rtfeldman.com>"]
+edition = "2018"
+
+[dependencies]
+roc_collections = { path = "../collections" }
+roc_region = { path = "../region" }
+roc_module = { path = "../module" }
+roc_types = { path = "../types" }
+roc_can = { path = "../can" }
+roc_unify = { path = "../unify" }
+bumpalo = "2.6"
+
+[dev-dependencies]
+roc_constrain = { path = "../constrain" }
+roc_builtins = { path = "../builtins" }
+roc_problem = { path = "../problem" }
+roc_parse = { path = "../parse" }
+roc_solve = { path = "../solve" }
+pretty_assertions = "0.5.1 "
+maplit = "1.0.1"
+indoc = "0.3.3"
+quickcheck = "0.8"
+quickcheck_macros = "0.8"
diff --git a/compiler/src/mono/expr.rs b/compiler/mono/src/expr.rs
similarity index 99%
rename from compiler/src/mono/expr.rs
rename to compiler/mono/src/expr.rs
index 8bce087a27..770bab4f04 100644
--- a/compiler/src/mono/expr.rs
+++ b/compiler/mono/src/expr.rs
@@ -1,8 +1,8 @@
-use crate::mono::layout::{Builtin, Layout};
+use crate::layout::{Builtin, Layout};
 use bumpalo::collections::Vec;
 use bumpalo::Bump;
+use roc_can;
 use roc_can::pattern::Pattern;
-use roc_can::{self};
 use roc_collections::all::MutMap;
 use roc_module::ident::{Lowercase, TagName};
 use roc_module::symbol::{IdentIds, ModuleId, Symbol};
diff --git a/compiler/src/mono/layout.rs b/compiler/mono/src/layout.rs
similarity index 100%
rename from compiler/src/mono/layout.rs
rename to compiler/mono/src/layout.rs
diff --git a/compiler/mono/src/lib.rs b/compiler/mono/src/lib.rs
new file mode 100644
index 0000000000..5a9f7675c5
--- /dev/null
+++ b/compiler/mono/src/lib.rs
@@ -0,0 +1,14 @@
+#![warn(clippy::all, clippy::dbg_macro)]
+// I'm skeptical that clippy:large_enum_variant is a good lint to have globally enabled.
+//
+// It warns about a performance problem where the only quick remediation is
+// to allocate more on the heap, which has lots of tradeoffs - including making it
+// long-term unclear which allocations *need* to happen for compilation's sake
+// (e.g. recursive structures) versus those which were only added to appease clippy.
+//
+// Effectively optimizing data struture memory layout isn't a quick fix,
+// and encouraging shortcuts here creates bad incentives. I would rather temporarily
+// re-enable this when working on performance optimizations than have it block PRs.
+#![allow(clippy::large_enum_variant)]
+pub mod expr;
+pub mod layout;
diff --git a/compiler/src/crane/build.rs b/compiler/src/crane/build.rs
index 73142030c4..8a64b4e527 100644
--- a/compiler/src/crane/build.rs
+++ b/compiler/src/crane/build.rs
@@ -14,10 +14,10 @@ use cranelift_codegen::Context;
 use cranelift_module::{Backend, FuncId, Linkage, Module};
 
 use crate::crane::convert::{sig_from_layout, type_from_layout};
-use crate::mono::expr::{Expr, Proc, Procs};
-use crate::mono::layout::{Builtin, Layout};
 use roc_collections::all::ImMap;
 use roc_module::symbol::{Interns, Symbol};
+use roc_mono::expr::{Expr, Proc, Procs};
+use roc_mono::layout::{Builtin, Layout};
 use roc_types::subs::{Subs, Variable};
 
 type Scope = ImMap<Symbol, ScopeEntry>;
@@ -46,7 +46,7 @@ pub fn build_expr<'a, B: Backend>(
     expr: &Expr<'a>,
     procs: &Procs<'a>,
 ) -> Value {
-    use crate::mono::expr::Expr::*;
+    use roc_mono::expr::Expr::*;
 
     match expr {
         Int(num) => builder.ins().iconst(types::I64, *num),
diff --git a/compiler/src/crane/convert.rs b/compiler/src/crane/convert.rs
index 5b6c237c21..f88c16fe9a 100644
--- a/compiler/src/crane/convert.rs
+++ b/compiler/src/crane/convert.rs
@@ -3,11 +3,11 @@ use cranelift_codegen::ir::{types, Signature, Type};
 use cranelift_codegen::isa::TargetFrontendConfig;
 use cranelift_module::{Backend, Module};
 
-use crate::mono::layout::Layout;
+use roc_mono::layout::Layout;
 
 pub fn type_from_layout(cfg: TargetFrontendConfig, layout: &Layout<'_>) -> Type {
-    use crate::mono::layout::Builtin::*;
-    use crate::mono::layout::Layout::*;
+    use roc_mono::layout::Builtin::*;
+    use roc_mono::layout::Layout::*;
 
     match layout {
         Pointer(_) | FunctionPointer(_, _) => cfg.pointer_type(),
diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs
index 726fcfee14..f6f9c5f009 100644
--- a/compiler/src/lib.rs
+++ b/compiler/src/lib.rs
@@ -14,4 +14,3 @@
 pub mod crane;
 pub mod llvm;
 pub mod load;
-pub mod mono;
diff --git a/compiler/src/llvm/build.rs b/compiler/src/llvm/build.rs
index bc5367c19a..023e58461f 100644
--- a/compiler/src/llvm/build.rs
+++ b/compiler/src/llvm/build.rs
@@ -9,10 +9,10 @@ use inkwell::values::{FunctionValue, IntValue, PointerValue};
 use inkwell::{FloatPredicate, IntPredicate};
 
 use crate::llvm::convert::{basic_type_from_layout, get_fn_type};
-use crate::mono::expr::{Expr, Proc, Procs};
-use crate::mono::layout::Layout;
 use roc_collections::all::ImMap;
 use roc_module::symbol::{Interns, Symbol};
+use roc_mono::expr::{Expr, Proc, Procs};
+use roc_mono::layout::Layout;
 use roc_types::subs::{Subs, Variable};
 
 /// This is for Inkwell's FunctionValue::verify - we want to know the verification
@@ -42,7 +42,7 @@ pub fn build_expr<'a, 'ctx, 'env>(
     expr: &Expr<'a>,
     procs: &Procs<'a>,
 ) -> BasicValueEnum<'ctx> {
-    use crate::mono::expr::Expr::*;
+    use roc_mono::expr::Expr::*;
 
     match expr {
         Int(num) => env.context.i64_type().const_int(*num as u64, true).into(),
diff --git a/compiler/src/llvm/convert.rs b/compiler/src/llvm/convert.rs
index 6aa2b11a70..ff6c8de3e8 100644
--- a/compiler/src/llvm/convert.rs
+++ b/compiler/src/llvm/convert.rs
@@ -3,7 +3,7 @@ use inkwell::types::BasicTypeEnum::{self, *};
 use inkwell::types::{BasicType, FunctionType};
 use inkwell::AddressSpace;
 
-use crate::mono::layout::Layout;
+use roc_mono::layout::Layout;
 
 /// TODO could this be added to Inkwell itself as a method on BasicValueEnum?
 pub fn get_fn_type<'ctx>(
@@ -24,8 +24,8 @@ pub fn basic_type_from_layout<'ctx>(
     context: &'ctx Context,
     layout: &Layout<'_>,
 ) -> BasicTypeEnum<'ctx> {
-    use crate::mono::layout::Builtin::*;
-    use crate::mono::layout::Layout::*;
+    use roc_mono::layout::Builtin::*;
+    use roc_mono::layout::Layout::*;
 
     match layout {
         FunctionPointer(args, ret_layout) => {
diff --git a/compiler/src/mono/mod.rs b/compiler/src/mono/mod.rs
deleted file mode 100644
index ef0ddc4d01..0000000000
--- a/compiler/src/mono/mod.rs
+++ /dev/null
@@ -1,2 +0,0 @@
-pub mod expr;
-pub mod layout;
diff --git a/compiler/src/string.rs b/compiler/src/string.rs
new file mode 100644
index 0000000000..79aaf85ab1
--- /dev/null
+++ b/compiler/src/string.rs
@@ -0,0 +1,309 @@
+use std::alloc::{self, Layout};
+use std::fmt;
+use std::mem::{self, MaybeUninit};
+use std::ptr;
+use std::slice;
+use std::str;
+
+/// An immutable string whose maximum length is `isize::MAX`. (For convenience,
+/// it still returns its length as `usize` since it can't be negative.)
+///
+/// For larger strings, under the hood this is a struct which stores a
+/// pointer and a usize for length (so 16 bytes on a 64-bit system).
+///
+/// For smaller strings (lengths 0-15 on 64-bit systems, and 0-7 on 32-bit),
+/// this uses a "short string optimization" where it stores the entire string
+/// in this struct and does not bother allocating on the heap at all.
+pub struct RocStr(InnerStr);
+
+/// Roc strings are optimized not to do heap allocations when they are between
+/// 0-15 bytes in length on 64-bit little endian systems,
+/// and 0-7 bytes on systems that are 32-bit, big endian, or both.
+///
+/// This optimization relies on the assumption that string lengths are always
+/// less than isize::MAX as opposed to usize::MAX. It relies on this because
+/// it uses the most significant bit in the most significant byte in the length
+/// as a flag for whether it is a short string or a long string. This bit is
+/// unused if lengths are below isize::MAX.
+///
+/// Roc integers are i64, so on 64-bit systems this guarantee necessarily holds
+/// from the roc side. On a 32-bit system it might not though. Rust historically
+/// had this guarantee, but it might get relaxed. For more on the Rust side, see
+/// https://github.com/rust-lang/unsafe-code-guidelines/issues/102
+///
+/// Since Roc will interpret them as i64, it's important that on 64-bit systems,
+/// Rust never sends Roc any length values outsize isize::MAX because they'll
+/// be interpreted as negative i64s!
+///
+/// Anyway, this "is this a short string?" bit is in a convenient location on
+/// 64-bit little endian systems. This is because of how Rust's &str is
+/// laid out, and memory alignment.
+///
+/// Rust's &str is laid out as a slice, namely:
+///
+/// struct RustStr { ptr: *const [u8], length: usize }
+///
+/// In little endian systems, the bit for detecting short vs long length is
+/// the most significant bit of the length field, which is the very last byte
+/// in the struct.
+///
+/// This means if we detect that we are a short string, we can pass a pointer
+/// to the entire struct (which is necessarily aligned already), and its first
+/// contiguous N bytes represent the bytes in the string, where N is 15 on
+/// 64-bit systems and 7 on 32-bit ones. The final byte is the msbyte where
+/// we stored the flag, but it doesn't matter what's in that memory because the
+/// str's length will be too low to encounter that anyway.
+union InnerStr {
+    raw: [u8; 16],
+    long: LongStr,
+}
+
+#[derive(Copy)]
+#[repr(C)]
+struct LongStr {
+    /// It is *crucial* that we have exactly this memory layout!
+    /// This is the same layout that Rust uses for string slices in memory,
+    /// which lets us mem::transmute long strings directly into them.
+    ///
+    /// https://pramode.in/2016/09/13/using-unsafe-tricks-in-rust/
+    bytes: MaybeUninit<*const u8>,
+    length: usize,
+}
+
+// The bit pattern for an empty string. (1 and then all 0s.)
+// Any other bit pattern means this is not an empty string!
+#[cfg(target_pointer_width = "64")]
+const EMPTY_STRING: usize =
+    0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000;
+
+#[cfg(target_pointer_width = "32")]
+const EMPTY_STRING: usize = 0b1000_0000_0000_0000;
+
+impl RocStr {
+    #[inline(always)]
+    pub fn is_empty(&self) -> bool {
+        unsafe { self.0.long.length == EMPTY_STRING }
+    }
+
+    #[inline(always)]
+    pub fn empty() -> RocStr {
+        RocStr(InnerStr {
+            long: LongStr {
+                length: EMPTY_STRING,
+                // empty strings only ever have length set.
+                bytes: MaybeUninit::uninit(),
+            },
+        })
+    }
+
+    pub fn len(&self) -> usize {
+        let len_msbyte = self.len_msbyte();
+
+        if flagged_as_short_string(len_msbyte) {
+            // Drop the "is this a short string?" flag
+            let length: u8 = len_msbyte & 0b0111_1111;
+
+            length as usize
+        } else {
+            unsafe { self.0.long.length }
+        }
+    }
+
+    /// The most significant byte in the length. We use the last bit of this
+    /// byte to determine if we are a short string or a long string.
+    /// If this is a short string, we intentionally set that bit to 1.
+    #[inline(always)]
+    #[cfg(all(target_pointer_width = "64", target_endian = "little"))]
+    fn len_msbyte(&self) -> u8 {
+        (unsafe { mem::transmute::<usize, [u8; 8]>(self.0.long.length) })[7]
+    }
+
+    #[inline(always)]
+    #[cfg(all(target_pointer_width = "32", target_endian = "little"))]
+    fn len_msbyte(&self) -> u8 {
+        (unsafe { mem::transmute::<usize, [u8; 4]>(self.long.length) })[3]
+    }
+
+    #[inline(always)]
+    #[cfg(all(target_pointer_width = "64", target_endian = "big"))]
+    fn len_msbyte(&self) -> u8 {
+        (unsafe { mem::transmute::<usize, [u8; 8]>(self.long.length) })[0]
+    }
+
+    #[inline(always)]
+    #[cfg(all(target_pointer_width = "32", target_endian = "big"))]
+    fn len_msbyte(&self) -> u8 {
+        (unsafe { mem::transmute::<usize, [u8; 4]>(self.long.length) })[0]
+    }
+}
+
+#[inline(always)]
+fn flagged_as_short_string(len_msbyte: u8) -> bool {
+    // It's a short string iff the first bit of len_msbyte is 1.
+    len_msbyte & 0b1000_0000 == 0b1000_0000
+}
+
+#[inline(always)]
+fn with_short_string_flag_enabled(len_msbyte: u8) -> u8 {
+    // It's a short string iff the first bit of len_msbyte is 1.
+    len_msbyte | 0b1000_0000
+}
+
+impl fmt::Debug for RocStr {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        // TODO do this without getting a cloned String involved
+        let string: String = self.clone().into();
+
+        string.fmt(f)
+    }
+}
+
+impl fmt::Display for RocStr {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        // TODO do this without getting a cloned String involved
+        let string: String = self.clone().into();
+
+        string.fmt(f)
+    }
+}
+
+impl Clone for LongStr {
+    fn clone(&self) -> Self {
+        let length = self.length;
+        let layout = unsafe { Layout::from_size_align_unchecked(length, 8) };
+        let old_bytes_ptr = unsafe { self.bytes.assume_init() };
+
+        // Allocate memory for the new bytes. (We'll manually drop them later.)
+        let new_bytes_ptr = unsafe { alloc::alloc(layout) };
+
+        unsafe {
+            ptr::copy_nonoverlapping(old_bytes_ptr, new_bytes_ptr, length);
+        }
+
+        LongStr {
+            bytes: MaybeUninit::new(new_bytes_ptr),
+            length,
+        }
+    }
+}
+
+impl Into<String> for RocStr {
+    #[cfg(all(target_pointer_width = "64", target_endian = "little"))]
+    fn into(self) -> String {
+        let len_msbyte = self.len_msbyte();
+
+        // TODO I'm not sure this works the way we want it to. Need to review.
+
+        if flagged_as_short_string(len_msbyte) {
+            // Drop the "is this a short string?" flag
+            let length: u8 = len_msbyte & 0b0111_1111;
+            let bytes_ptr = unsafe { &self.0.raw } as *const u8;
+
+            // These bytes are already aligned, so we can use them directly.
+            let bytes_slice: &[u8] = unsafe { slice::from_raw_parts(bytes_ptr, length as usize) };
+
+            (unsafe { str::from_utf8_unchecked(bytes_slice) }).to_string()
+        } else {
+            // If it's a long string, we already have the exact
+            // same memory layout as a Rust &str slice.
+            let str_slice = unsafe { mem::transmute::<[u8; 16], &str>(self.0.raw) };
+            let string = str_slice.to_string();
+
+            // Drop will deallocate the bytes, which we don't want in this case.
+            // String is using those bytes now!
+            mem::forget(self);
+
+            string
+        }
+    }
+}
+
+impl From<String> for RocStr {
+    #[cfg(all(target_pointer_width = "64", target_endian = "little"))]
+    fn from(string: String) -> RocStr {
+        if string.is_empty() {
+            RocStr::empty()
+        } else {
+            let str_len = string.len();
+
+            if str_len <= 15 {
+                let mut buffer: [u8; 16] = [0; 16];
+
+                // Copy the raw bytes from the string into the buffer.
+                unsafe {
+                    // Write into the buffer's bytes
+                    ptr::copy_nonoverlapping(string.as_ptr(), buffer.as_ptr() as *mut u8, str_len);
+                }
+
+                // Set the last byte in the buffer to be the length (with flag).
+                buffer[15] = with_short_string_flag_enabled(string.len() as u8);
+
+                RocStr(InnerStr { raw: buffer })
+            } else {
+                panic!("TODO: use mem::forget on the string and steal its bytes!");
+                // let bytes_ptr = string.as_bytes().clone().as_ptr();
+                // let long = LongStr {
+                //     bytes: MaybeUninit::new(bytes_ptr),
+                //     length: str_len,
+                // };
+
+                // RocStr(InnerStr { long })
+            }
+        }
+    }
+}
+
+impl Clone for RocStr {
+    fn clone(&self) -> Self {
+        let inner = if flagged_as_short_string(self.len_msbyte()) {
+            InnerStr {
+                raw: (unsafe { self.0.raw }),
+            }
+        } else {
+            InnerStr {
+                long: (unsafe { self.0.long }),
+            }
+        };
+
+        RocStr(inner)
+    }
+}
+
+impl Drop for RocStr {
+    fn drop(&mut self) {
+        // If this is a LongStr, we need to deallocate its bytes.
+        // Otherwise we would have a memory leak!
+        if !flagged_as_short_string(self.len_msbyte()) {
+            let bytes_ptr = unsafe { self.0.long.bytes.assume_init() };
+
+            // If this was already dropped previously (most likely because the
+            // bytes were moved into a String), we shouldn't deallocate them.
+            if !bytes_ptr.is_null() {
+                let length = unsafe { self.0.long.length };
+                let layout = unsafe { Layout::from_size_align_unchecked(length, 8) };
+
+                // We don't need to call drop_in_place. We know bytes_ptr points to
+                // a plain u8 array, so there will for sure be no destructor to run.
+                unsafe {
+                    alloc::dealloc(bytes_ptr as *mut u8, layout);
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod test_roc_str {
+    use super::RocStr;
+
+    #[test]
+    fn empty_str() {
+        assert!(RocStr::empty().is_empty());
+        assert_eq!(RocStr::empty().len(), 0);
+    }
+
+    #[test]
+    fn fmt() {
+        assert_eq!("".to_string(), format!("{}", RocStr::empty()));
+    }
+}
diff --git a/compiler/tests/test_gen.rs b/compiler/tests/test_gen.rs
index 5feafee63d..444bf75d5f 100644
--- a/compiler/tests/test_gen.rs
+++ b/compiler/tests/test_gen.rs
@@ -29,9 +29,9 @@ mod test_gen {
     use roc::crane::imports::define_malloc;
     use roc::llvm::build::{build_proc, build_proc_header};
     use roc::llvm::convert::basic_type_from_layout;
-    use roc::mono::expr::Expr;
-    use roc::mono::layout::Layout;
     use roc_collections::all::{ImMap, MutMap};
+    use roc_mono::expr::Expr;
+    use roc_mono::layout::Layout;
     use roc_types::subs::Subs;
     use std::ffi::{CStr, CString};
     use std::mem;