diff --git a/compiler/builtins/docs/Bytes.roc b/compiler/builtins/docs/Bytes.roc
deleted file mode 100644
index e93afe948b..0000000000
--- a/compiler/builtins/docs/Bytes.roc
+++ /dev/null
@@ -1,81 +0,0 @@
-interface Bytes
-    exposes
-        [
-            Bytes,
-            parseUtf8Usv,
-            parseUtf16Usv,
-            parseUtf8Grapheme,
-            parseUtf16Grapheme,
-            parsePastUtf8,
-            parsePastUtf16,
-            parseU16,
-            parseI16,
-            parseU32,
-            parseI32,
-            parseU64,
-            parseI64,
-            parseU128,
-            parseI128,
-        ]
-    imports []
-
-# Conversion
-
-fromList : List U8 -> Bytes
-
-toList : Bytes -> List U8
-
-len : Bytes -> Nat
-
-isEmpty : Bytes -> Bool
-
-## The [endianness](https://en.wikipedia.org/wiki/Endianness) of the currently running system.
-hostEndi : Endi
-
-## [Endianness](https://en.wikipedia.org/wiki/Endianness)
-##
-## Be - Big Endian
-## Le - Little Endian
-Endi : [ Be, Le ]
-
-# Access
-
-splitFirst : Bytes -> Result { first : U8, rest : Bytes } [ NoBytes ]*
-
-take : Bytes, Nat -> Bytes
-
-# Building
-
-appendLe : Bytes, Num * -> Bytes
-appendBe : Bytes, Num * -> Bytes
-concat : Bytes, Bytes -> Bytes
-
-# Parsing
-
-## Parse a [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)
-## (USV) encoded as UTF-8.
-##
-## To parse an entire UTF-8 string, you can use #Bytes.toUtf8 or #Bytes.parsePastUtf8.
-parseUsvUtf8 : Bytes -> Result { val : Usv, rest : Bytes } [ Expected [ Utf8Usv ]* Bytes ]*
-parseUsvUtf16 : Bytes, Endi -> Result { val : Usv, rest : Bytes } [ Expected [ Utf16Usv Endi ]* Bytes ]*
-parseGraphemeUtf8 : Bytes -> Result { val : Str, rest : Bytes } [ Expected [ Utf8Grapheme ]* Bytes ]*
-parseGraphemeUtf16 : Bytes, Endi -> Result { val : Str, rest : Bytes } [ Expected [ Utf16Grapheme Endi ]* Bytes ]*
-
-## If the bytes begin with the given UTF-8 string, return whatever bytes come
-## after it.
-chompUtf8 : Bytes, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
-chompUtf16 : Bytes, Endi, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
-chompUsvUtf8 : Usv -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
-chompUsvUtf16 : Usv, Endi -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
-## If the bytes begin with the given bytes, return whatever bytes come
-## after them.
-chompBytes : Bytes, Bytes -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
-
-parseU16 : Bytes, Endi -> Result { val : U16, rest : Bytes } [ Expected [ U16 Endi ]* Bytes ]*
-parseI16 : Bytes, Endi -> Result { val : I16, rest : Bytes } [ Expected [ I16 Endi ]* Bytes ]*
-parseU32 : Bytes, Endi -> Result { val : U32, rest : Bytes } [ Expected [ U32 Endi ]* Bytes ]*
-parseI32 : Bytes, Endi -> Result { val : I32, rest : Bytes } [ Expected [ I32 Endi ]* Bytes ]*
-parseU64 : Bytes, Endi -> Result { val : U64, rest : Bytes } [ Expected [ U64 Endi ]* Bytes ]*
-parseI64 : Bytes, Endi -> Result { val : I64, rest : Bytes } [ Expected [ I64 Endi ]* Bytes ]*
-parseU128 : Bytes, Endi -> Result { val : U128, rest : Bytes } [ Expected [ U128 Endi ]* Bytes ]*
-parseI128 : Bytes, Endi -> Result { val : I128, rest : Bytes } [ Expected [ I128 Endi ]* Bytes ]*
diff --git a/compiler/builtins/docs/Num.roc b/compiler/builtins/docs/Num.roc
index 430bd10cc1..82d2fc3d3e 100644
--- a/compiler/builtins/docs/Num.roc
+++ b/compiler/builtins/docs/Num.roc
@@ -829,21 +829,12 @@ tryRecip : Float a -> Result (Float a) [ DivByZero ]*
 ## >>> Float.sqrt -4.0
 sqrt : Float a -> [Ok (Float a), InvalidSqrt]*
 
-## Try to convert a [Num] to a [Usv].
-##
-## Although [Usv]s are [U32]s under the hood,
-## not all [U32]s are valid [Usv]s.
-##
-## If you specifically have a [U8], the [Num.ascii]
-## function will convert it directly to a [Usv]
-## with no possibility of failure.
-toUsv : Num * -> Result Usv [ InvalidUsv ]*
 
-## Convert a raw [U8] to a [Usv].
-##
-## Since all [U8] values are valid [Usv]s, this
-## operation cannot fail.
-ascii : U8 -> Usv
+## [Endianness](https://en.wikipedia.org/wiki/Endianness)
+Endi : [ Big, Little ]
 
-## Convert a [Usv] into a [U32].
-fromUsv : Usv -> U32
+## The [endianness](https://en.wikipedia.org/wiki/Endianness) of [Num] values on
+## the currently running system.
+endi : Endi
+
+toBytes : Num *, Endi -> List U8
diff --git a/compiler/builtins/docs/Str.roc b/compiler/builtins/docs/Str.roc
index f596eab552..875f6d5c59 100644
--- a/compiler/builtins/docs/Str.roc
+++ b/compiler/builtins/docs/Str.roc
@@ -101,15 +101,6 @@ interface Str
 ## A [Unicode](https://unicode.org) text value.
 Str : [ @Str ]
 
-## A [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value).
-##
-## This is a [U32] that has been validated to be in the acceptable range for
-## a USV.
-##
-## You can make one of these using single quote literals - e.g. `'x'` - or
-## convert to and from a raw [Num] using [Num.toUsv] and [Num.fromUsv].
-Usv : [ @Usv U32 ]
-
 ## Convert
 
 ## Convert a #Float to a decimal string, rounding off to the given number of decimal places.
@@ -235,6 +226,9 @@ isCaseInsensitiveEq : Str, Str -> Bool
 isCaseInsensitiveNeq : Str, Str -> Bool
 
 walkGraphemes : Str, { start: state, step: (state, Str -> state) } -> state
+walkGraphemesUntil : Str, { start: state, step: (state, Str -> [ Continue state, Done state ]) } -> state
+walkGraphemesBackwards : Str, { start: state, step: (state, Str -> state) } -> state
+walkGraphemesBackwardsUntil : Str, { start: state, step: (state, Str -> [ Continue state, Done state ]) } -> state
 
 ## Returns #True if the string begins with an uppercase letter.
 ##
@@ -307,19 +301,17 @@ isAllLowercase : Str -> Bool
 ## as well as the end.
 trim : Str -> Str
 
-fromUtf8 : Bytes -> Result Str [ BadUtf8 ]*
+## If the given [U32] is a valid [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value),
+## return a [Str] containing only that scalar.
+fromScalar : U32 -> Result Str [ BadScalar ]*
+fromCodePoints : List U32 -> Result Str [ BadCodePoint U32 ]*
+fromUtf8 : List U8 -> Result Str [ BadUtf8 ]*
+fromUtf16 : List U8, Endi -> Result Str [ BadUtf16 Endi ]*
 
 ## Convert from UTF-8, substituting the replacement character ("�") for any
 ## invalid sequences encountered.
-fromUtf8Sub : Bytes -> Str
-
-fromUtf16Le : Bytes -> Result Str [ BadUtf16Le ]*
-
-fromUtf16LeSub : Bytes -> Str
-
-fromUtf16Be : Bytes -> Result Str [ BadUtf16Be ]*
-
-fromUtf16BeSub : Bytes -> Str
+fromUtf8Sub : List U8 -> Str
+fromUtf16Sub : List U8, Endi -> Str
 
 ## Return a #List of the string's #U8 UTF-8 [code units](https://unicode.org/glossary/#code_unit).
 ## (To split the string into a #List of smaller #Str values instead of #U8 values,
@@ -335,70 +327,21 @@ fromUtf16BeSub : Bytes -> Str
 ##
 ## For a more flexible function that walks through each of these #U8 code units
 ## without creating a #List, see #Str.walkUtf8 and #Str.walkRevUtf8.
-toUtf8 : Str -> Bytes
-
-toUtf16Le : Str -> Bytes
-
-toUtf16Be : Str -> Bytes
-
-## Unicode Scalar Values
-
-## Besides graphemes and bytes, another way to break down strings is into
-## Unicode Scalar Values.
-##
-## USVs are no substitute for graphemes!
-## These functions exist to support advanced use cases like those found in
-## [roc/unicode](roc/unicode), and using USVs when graphemes would
-## be more appropriate can very easily lead to bugs.
-##
-## For example, `Str.countGraphemes "👩‍👩‍👦‍👦"` returns `1`,
-## whereas `Str.toUtf8 "👩‍👩‍👦‍👦"` returns a list with a length of 25,
-## `Str.toUtf16 "👩‍👩‍👦‍👦"` returns a list with a length of 11.
-## and `Str.toUtf32 "👩‍👩‍👦‍👦"` returns a list with a length of 7.
-
-
-## Walk through the string's [Unicode Scalar Values](http://www.unicode.org/glossary/#unicode_scalar_value)
-## (USVs) to build up a state.
-## (If you want a `step` function which receives a #Str instead of an #Usv, see #Str.walkGraphemes.)
-##
-## Here are the #Usv values that will be passed to `step` when this function is
-## called on various strings:
-##
-## * `"👩‍👩‍👦‍👦"` passes 128105, 8205, 128105, 8205, 128102, 8205, 128102
-## * `"Roc"` passes 82, 111, 99
-## * `"鹏"` passes 40527
-## * `"🐦"` passes 128038
-walkUsv : Str, { start: state, step: (state, Usv -> state) } -> state
-
-## Walk backwards through the string's [Unicode Scalar Values](http://www.unicode.org/glossary/#unicode_scalar_value)
-## (USVs) to build up a state.
-## (If you want a `step` function which receives a #Str instead of an #Usv, see #Str.walkGraphemes.)
-##
-## Here are the #Usv values that will be passed to `step` when this function is
-## called on various strings:
-##
-## * `"👩‍👩‍👦‍👦"` passes 128102, 8205, 128102, 8205, 128105, 8205, 128105
-## * `"Roc"` passes 99, 111, 82
-## * `"鹏"` passes 40527
-## * `"🐦"` passes 128038
-##
-## To convert a #Str into a plain `List Usv` of UTF-32 code units, see #Str.toUtf32.
-walkBackwardsUsv : Str, { start: state, step: (state, Usv -> state) } -> state
+toUtf8 : Str -> List U8
+toUtf16 : Str, Endi -> List U8
 
 # Parsing
 
-## Return the first [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)
-## in the string, along with the rest of the string after that USV.
-parseUsv : Str -> Result { val : Usv, rest : Str } [ Expected [ ValidUsv ]* Str ]*
-
 ## Return the first [extended grapheme cluster](http://www.unicode.org/glossary/#extended_grapheme_cluster)
 ## in the string, along with the rest of the string after that grapheme.
+##
+## If the string does not contain a full grapheme, for example because it was
+## empty, returns `Err`.
 parseGrapheme : Str -> Result { val : Str, rest : Str } [ Expected [ Grapheme ]* Str ]*
 
 ## If the first string begins with the second, return whatever comes
 ## after the second.
-chompStr : Str, Str -> Result Str [ Expected [ ExactStr Str ]* Bytes ]*
-chompUsv : Usv -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
+chomp : Str, Str -> Result Str [ Expected [ ExactStr Str ]* Str ]*
 
 ## If the string begins with digits which can represent a valid #U8, return
 ## that number along with the rest of the string after the digits.
diff --git a/packages/bytes/Package-Config.roc b/packages/bytes/Package-Config.roc
new file mode 100644
index 0000000000..cfd9df8152
--- /dev/null
+++ b/packages/bytes/Package-Config.roc
@@ -0,0 +1,5 @@
+package roc/bytes 0.1.0
+    roc 0.0.0
+    exposes [ Bytes ]
+    packages {}
+    license UPL-1.0
diff --git a/packages/bytes/src/Bytes.roc b/packages/bytes/src/Bytes.roc
new file mode 100644
index 0000000000..3a291e1784
--- /dev/null
+++ b/packages/bytes/src/Bytes.roc
@@ -0,0 +1,60 @@
+interface Bytes
+    exposes
+        [
+            Bytes,
+            parseUtf8Usv,
+            parseUtf16Usv,
+            parseUtf8Grapheme,
+            parseUtf16Grapheme,
+            parsePastUtf8,
+            parsePastUtf16,
+            parseU16,
+            parseI16,
+            parseU32,
+            parseI32,
+            parseU64,
+            parseI64,
+            parseU128,
+            parseI128,
+        ]
+    imports []
+
+Bytes : List U8
+
+# Access
+
+splitFirst : Bytes -> Result { val : U8, rest : Bytes } [ NoBytes ]*
+
+take : Bytes, Nat -> Bytes
+
+# Building
+
+append : Bytes, Endi, Num * -> Bytes
+concat : Bytes, Bytes -> Bytes
+
+# Parsing
+
+## Parse an exact number of UTF-8 [extended grapheme clusters](http://www.unicode.org/glossary/#extended_grapheme_cluster)
+## into a [Str], and return the rest of the bytes after those graphemes.
+parseGraphemesUtf8 : Bytes, Nat -> Result { val : Str, rest : Bytes } [ Expected [ Utf8Grapheme ]* Bytes ]*
+parseGraphemesUtf16 : Bytes, Endi, Nat -> Result { val : Str, rest : Bytes } [ Expected [ Utf16Grapheme Endi ]* Bytes ]*
+
+## If the bytes begin with the given string encoded as UTF-8, return whatever
+## bytes come after.
+chompUtf8 : Bytes, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
+chompUtf16 : Bytes, Endi, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
+
+## If the bytes begin with the given bytes, return whatever comes after.
+chomp : Bytes, Bytes -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
+
+parseU16 : Bytes, Endi -> Result { val : U16, rest : Bytes } [ Expected [ NumU16 Endi ]* Bytes ]*
+parseI16 : Bytes, Endi -> Result { val : I16, rest : Bytes } [ Expected [ NumI16 Endi ]* Bytes ]*
+parseU32 : Bytes, Endi -> Result { val : U32, rest : Bytes } [ Expected [ NumU32 Endi ]* Bytes ]*
+parseI32 : Bytes, Endi -> Result { val : I32, rest : Bytes } [ Expected [ NumI32 Endi ]* Bytes ]*
+parseU64 : Bytes, Endi -> Result { val : U64, rest : Bytes } [ Expected [ NumU64 Endi ]* Bytes ]*
+parseI64 : Bytes, Endi -> Result { val : I64, rest : Bytes } [ Expected [ NumI64 Endi ]* Bytes ]*
+parseU128 : Bytes, Endi -> Result { val : U128, rest : Bytes } [ Expected [ NumU128 Endi ]* Bytes ]*
+parseI128 : Bytes, Endi -> Result { val : I128, rest : Bytes } [ Expected [ NumI128 Endi ]* Bytes ]*
+
+parseF64 : Bytes, Endi -> Result { val : U128, rest : Bytes } [ Expected [ NumF64 Endi ]* Bytes ]*
+parseF32 : Bytes, Endi -> Result { val : I128, rest : Bytes } [ Expected [ NumF32 Endi ]* Bytes ]*
diff --git a/packages/parser/src/Bytes/Parser.roc b/packages/parser/src/Bytes/Parser.roc
index 958d08bb51..3d7d8c44d2 100644
--- a/packages/parser/src/Bytes/Parser.roc
+++ b/packages/parser/src/Bytes/Parser.roc
@@ -40,8 +40,8 @@ Problem :
                 NumF32 Endi,
                 Utf8 Str,
                 Utf16 Str Endi,
-                UsvUtf8,
-                UsvUtf16 Endi,
+                CodePointUtf8,
+                CodePointUtf16 Endi,
                 GraphemeUtf8,
                 GraphemeUtf16 Endi,
                 End,
@@ -52,12 +52,10 @@ Problem :
 keep : Parser a, (a -> Parser b) -> Parser b
 skip : Parser *, ({} -> Parser b) -> Parser b
 
-graphemeUtf8 : Parser Str
-graphemeUtf16 : Endi -> Parser Str
 utf8 : Str -> Parser Str
 utf16 : Str, Endi -> Parser Str
-usvUtf8 : Parser U32 # UTF-8 defines endianness
-usvUtf16 : Endi -> Parser U32
+graphemeUtf8 : Parser Str
+graphemeUtf16 : Endi -> Parser Str
 
 u8 : Parser U8
 i8 : Parser I8
diff --git a/packages/unicode/Package-Config.roc b/packages/unicode/Package-Config.roc
new file mode 100644
index 0000000000..5e73e18ca9
--- /dev/null
+++ b/packages/unicode/Package-Config.roc
@@ -0,0 +1,9 @@
+package roc/unicode 0.1.0
+    roc 0.0.0
+    exposes [ Unicode, Unicode.Scalar, Unicode.CodePoint ]
+    packages {}
+    license UPL-1.0
+
+# TODO should we hande Latin1 encoding? Other encodings? Should there be
+# an Ascii module, or a separate roc/ascii package? Conside that ASCII is
+# 7-bit, so not all U8s are valid ASCII! There's also Extended ASCII to consider.
diff --git a/packages/unicode/src/Unicode/CodePoint.roc b/packages/unicode/src/Unicode/CodePoint.roc
new file mode 100644
index 0000000000..a700b0d802
--- /dev/null
+++ b/packages/unicode/src/Unicode/CodePoint.roc
@@ -0,0 +1,41 @@
+interface Unicode.CodePoint
+    exposes
+        [
+            CodePoint,
+            toU32,
+            fromU32,
+            parseUtf8,
+            parseUtf16,
+            chompUtf8,
+            chompUtf16
+        ]
+    imports
+        [
+            Unicode.CodePoint.Internal as Internal
+        ]
+
+## A [Unicode Code Point](http://www.unicode.org/glossary/#code_point)
+CodePoint : Internal.CodePoint
+
+toU32 : CodePoint -> U32
+toU32 = \codePoint -> Internal.toU32 codePoint
+
+## To convert exactly one [CodePoint] to a [Str], that code point must be
+## a valid [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value).
+## You can get one of those by calling [Unicode.Scalar.fromCodePoint], and then
+## you can call [Unicode.Scalar.toStr] to get a [Str] from it.
+toStr : List CodePoint -> Result Str [ BadCodePoint U32 ]*
+toStr = \points ->
+    u32s = List.map points toU32
+
+    Str.fromCodePoints u32s
+
+fromU32 : U32 -> Result CodePoint [ BadCodePoint ]*
+
+parseUtf8 : Bytes -> Result { val : CodePoint, rest : Bytes } [ Expected [ Utf8CodePoint ]* Bytes ]*
+parseUtf16 : Bytes, Endi -> Result { val : CodePoint, rest : Bytes } [ Expected [ Utf16CodePoint Endi ]* Bytes ]*
+
+chompUtf8 : Bytes, CodePoint -> Result Str [ Expected [ ExactCodePoint CodePoint ]* Bytes ]*
+chompUtf16 : Bytes, CodePoint, Endi -> Result Str [ Expected [ ExactCodePoint CodePoint ]* Bytes ]*
+
+isAsciiDigit : CodePoint -> Bool
diff --git a/packages/unicode/src/Unicode/CodePoint/Internal.roc b/packages/unicode/src/Unicode/CodePoint/Internal.roc
new file mode 100644
index 0000000000..5bac8917c7
--- /dev/null
+++ b/packages/unicode/src/Unicode/CodePoint/Internal.roc
@@ -0,0 +1,21 @@
+interface Unicode.CodePoint.Internal
+    exposes
+        [
+            CodePoint,
+            toU32,
+            fromU32,
+            fromU32Unchecked,
+        ]
+    imports
+        []
+
+## This is just here so that both Unicode.Scalar and Unicode.CodePoint can access it.
+CodePoint : [ @CodePoint U32 ]
+
+fromU32Unchecked : U32 -> CodePoint
+fromU32Unchecked = \u32 -> @CodePoint u32
+
+toU32 : CodePoint -> U32
+toU32 = \@CodePoint u32 -> u32
+
+fromU32 : U32 -> Result CodePoint [ BadCodePoint ]*
diff --git a/packages/unicode/src/Unicode/Scalar.roc b/packages/unicode/src/Unicode/Scalar.roc
new file mode 100644
index 0000000000..18bba5854a
--- /dev/null
+++ b/packages/unicode/src/Unicode/Scalar.roc
@@ -0,0 +1,45 @@
+interface Unicode.Scalar
+    exposes
+        [
+            Scalar,
+            toStr,
+            toCodePoint,
+            fromCodePoint,
+            parseUtf8,
+            parseUtf16,
+            chompUtf8,
+            chompUtf16
+        ]
+    imports
+        [
+            Unicode.CodePoint.Internal as Internal
+            Unicode.CodePoint.{ CodePoint },
+            Bytes.{ Bytes }
+        ]
+
+## A [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)
+Scalar : [ @Scalar U32 ]
+
+toStr : Scalar -> Str
+toStr = \@Scalar u32
+    when Str.fromScalar u32 is
+        Ok str -> str
+        Err _ ->
+            # This will quickly crash if it ever runs, but we're confident
+            # this Err branch will never run. That's becasue it only runs
+            # if Str.fromScalar receives an invalid scalar value, and we've
+            # already validated this!
+            toStr (@Scalar (scalar * 256))
+
+toCodePoint : Scalar -> CodePoint
+toCodePoint = \@Scalar u32 -> Internal.fromU32Unchecked u32
+
+fromCodePoint : CodePoint -> Result Scalar [ PointWasSurrogate ]*
+
+parseUtf8 : Bytes -> Result { val : Scalar, rest : Bytes } [ Expected [ Utf8CodePoint ]* Bytes ]*
+parseUtf16 : Bytes, Endi -> Result { val : Scalar, rest : Bytes } [ Expected [ Utf16CodePoint Endi ]* Bytes ]*
+
+chompUtf8 : Bytes, CodePoint -> Result Str [ Expected [ ExactCodePoint CodePoint ]* Bytes ]*
+chompUtf16 : Bytes, CodePoint, Endi -> Result Str [ Expected [ ExactCodePoint CodePoint ]* Bytes ]*
+
+isAsciiDigit : CodePoint -> Bool