First-class Usv type

2025-10-01 07:41:12 +00:00 · 2021-04-29 21:41:04 -04:00 · 2021-04-29 21:41:04 -04:00 · 81014c3790
commit 81014c3790
parent 391a4f13db
5 changed files with 119 additions and 96 deletions
--- a/compiler/builtins/docs/Bytes.roc
+++ b/compiler/builtins/docs/Bytes.roc
@ -8,22 +8,14 @@ interface Bytes
            parseUtf16Grapheme,
            parsePastUtf8,
            parsePastUtf16,
-            parseLeU16,
+            parseU16,
-            parseLeI16,
+            parseI16,
-            parseLeU32,
+            parseU32,
-            parseLeI32,
+            parseI32,
-            parseLeU64,
+            parseU64,
-            parseLeI64,
+            parseI64,
-            parseLeU128,
+            parseU128,
-            parseLeI128,
+            parseI128,
            parseBeU16,
            parseBeI16,
            parseBeU32,
            parseBeI32,
            parseBeU64,
            parseBeI64,
            parseBeU128,
            parseBeI128
        ]
    imports []
@ -37,8 +29,14 @@ len : Bytes -> Nat
 isEmpty : Bytes -> Bool
-## The endianness of the currently running system.
+## The [endianness](https://en.wikipedia.org/wiki/Endianness) of the currently running system.
-hostEndianness : [ Big, Little ]
+hostEndi : Endi
 ## [Endianness](https://en.wikipedia.org/wiki/Endianness)
 ##
 ## Be - Big Endian
 ## Le - Little Endian
 Endi : [ Be, Le ]
 # Access
@ -54,38 +52,30 @@ concat : Bytes, Bytes -> Bytes
 # Parsing
 ## Parse a [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)
 ## (USV) encoded as UTF-8.
 ##
 ## To parse an entire UTF-8 string, you can use #Bytes.toUtf8 or #Bytes.parsePastUtf8.
-parseUsvUtf8 : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ Utf8Usv ]* Bytes ]*
+parseUsvUtf8 : Bytes -> Result { val : Usv, rest : Bytes } [ Expected [ Utf8Usv ]* Bytes ]*
-parseUsvUtf16Le : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ Utf16BeUsv ]* Bytes ]*
+parseUsvUtf16 : Bytes, Endi -> Result { val : Usv, rest : Bytes } [ Expected [ Utf16Usv Endi ]* Bytes ]*
-parseUsvUtf16Be : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ Utf16BeUsv ]* Bytes ]*
+parseGraphemeUtf8 : Bytes -> Result { val : Str, rest : Bytes } [ Expected [ Utf8Grapheme ]* Bytes ]*
-parseGraphemeUtf8 : Bytes -> Result { answer : Str, rest : Bytes } [ Expected [ Utf8Grapheme ]* Bytes ]*
+parseGraphemeUtf16 : Bytes, Endi -> Result { val : Str, rest : Bytes } [ Expected [ Utf16Grapheme Endi ]* Bytes ]*
 parseGraphemeUtf16Le : Bytes -> Result { answer : Str, rest : Bytes } [ Expected [ Utf16LeGrapheme ]* Bytes ]*
 parseGraphemeUtf16Be : Bytes -> Result { answer : Str, rest : Bytes } [ Expected [ Utf16BeGrapheme ]* Bytes ]*
-## If the bytes begin with the given string, return whatever bytes come
+## If the bytes begin with the given UTF-8 string, return whatever bytes come
 ## after it.
-parsePastStr : Bytes, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
+chompUtf8 : Bytes, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
 chompUtf16 : Bytes, Endi, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
 chompUsvUtf8 : Usv -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
 chompUsvUtf16 : Usv, Endi -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
 ## If the bytes begin with the given bytes, return whatever bytes come
 ## after them.
 chompBytes : Bytes, Bytes -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
-# Little-Endian
+parseU16 : Bytes, Endi -> Result { val : U16, rest : Bytes } [ Expected [ U16 Endi ]* Bytes ]*
-parseU16Le : Bytes -> Result { answer : U16, rest : Bytes } [ Expected [ U16 ]* Bytes ]*
+parseI16 : Bytes, Endi -> Result { val : I16, rest : Bytes } [ Expected [ I16 Endi ]* Bytes ]*
-parseI16Le : Bytes -> Result { answer : I16, rest : Bytes } [ Expected [ I16 ]* Bytes ]*
+parseU32 : Bytes, Endi -> Result { val : U32, rest : Bytes } [ Expected [ U32 Endi ]* Bytes ]*
-parseU32Le : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ U32 ]* Bytes ]*
+parseI32 : Bytes, Endi -> Result { val : I32, rest : Bytes } [ Expected [ I32 Endi ]* Bytes ]*
-parseI32Le : Bytes -> Result { answer : I32, rest : Bytes } [ Expected [ I32 ]* Bytes ]*
+parseU64 : Bytes, Endi -> Result { val : U64, rest : Bytes } [ Expected [ U64 Endi ]* Bytes ]*
-parseU64Le : Bytes -> Result { answer : U64, rest : Bytes } [ Expected [ U64 ]* Bytes ]*
+parseI64 : Bytes, Endi -> Result { val : I64, rest : Bytes } [ Expected [ I64 Endi ]* Bytes ]*
-parseI64Le : Bytes -> Result { answer : I64, rest : Bytes } [ Expected [ I64 ]* Bytes ]*
+parseU128 : Bytes, Endi -> Result { val : U128, rest : Bytes } [ Expected [ U128 Endi ]* Bytes ]*
-parseU128Le : Bytes -> Result { answer : U128, rest : Bytes } [ Expected [ U128 ]* Bytes ]*
+parseI128 : Bytes, Endi -> Result { val : I128, rest : Bytes } [ Expected [ I128 Endi ]* Bytes ]*
 parseI128Le : Bytes -> Result { answer : I128, rest : Bytes } [ Expected [ I128 ]* Bytes ]*
 # Big-Endian
 parseU16Be : Bytes -> Result { answer : U16, rest : Bytes } [ Expected [ U16 ]* Bytes ]*
 parseI16Be : Bytes -> Result { answer : I16, rest : Bytes } [ Expected [ I16 ]* Bytes ]*
 parseU32Be : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ U32 ]* Bytes ]*
 parseI32Be : Bytes -> Result { answer : I32, rest : Bytes } [ Expected [ I32 ]* Bytes ]*
 parseU64Be : Bytes -> Result { answer : U64, rest : Bytes } [ Expected [ U64 ]* Bytes ]*
 parseI64Be : Bytes -> Result { answer : I64, rest : Bytes } [ Expected [ I64 ]* Bytes ]*
 parseU128Be : Bytes -> Result { answer : U128, rest : Bytes } [ Expected [ U128 ]* Bytes ]*
 parseI128Be : Bytes -> Result { answer : I128, rest : Bytes } [ Expected [ I128 ]* Bytes ]*
--- a/compiler/builtins/docs/Num.roc
+++ b/compiler/builtins/docs/Num.roc
@ -336,7 +336,6 @@ Int size : Num [ @Int size ]
 ## eliminate the performance difference between loud and quiet errors in
 ## the situation where no error occurs.
 ## Convert
 ## Return a negative number when given a positive one, and vice versa.
@ -829,3 +828,22 @@ tryRecip : Float a -> Result (Float a) [ DivByZero ]*
 ##
 ## >>> Float.sqrt -4.0
 sqrt : Float a -> [Ok (Float a), InvalidSqrt]*
 ## Try to convert a [Num] to a [Usv].
 ##
 ## Although [Usv]s are [U32]s under the hood,
 ## not all [U32]s are valid [Usv]s.
 ##
 ## If you specifically have a [U8], the [Num.ascii]
 ## function will convert it directly to a [Usv]
 ## with no possibility of failure.
 toUsv : Num * -> Result Usv [ InvalidUsv ]*
 ## Convert a raw [U8] to a [Usv].
 ##
 ## Since all [U8] values are valid [Usv]s, this
 ## operation cannot fail.
 ascii : U8 -> Usv
 ## Convert a [Usv] into a [U32].
 fromUsv : Usv -> U32
--- a/compiler/builtins/docs/Str.roc
+++ b/compiler/builtins/docs/Str.roc
@ -101,6 +101,15 @@ interface Str
 ## A [Unicode](https://unicode.org) text value.
 Str : [ @Str ]
 ## A [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value).
 ##
 ## This is a [U32] that has been validated to be in the acceptable range for
 ## a USV.
 ##
 ## You can make one of these using single quote literals - e.g. `'x'` - or
 ## convert to and from a raw [Num] using [Num.toUsv] and [Num.fromUsv].
 Usv : [ @Usv U32 ]
 ## Convert
 ## Convert a #Float to a decimal string, rounding off to the given number of decimal places.
@ -334,7 +343,7 @@ toUtf16Be : Str -> Bytes
 ## Unicode Scalar Values
-## Besides graphemes, another way to break down strings is into
+## Besides graphemes and bytes, another way to break down strings is into
 ## Unicode Scalar Values.
 ##
 ## USVs are no substitute for graphemes!
@ -350,22 +359,22 @@ toUtf16Be : Str -> Bytes
 ## Walk through the string's [Unicode Scalar Values](http://www.unicode.org/glossary/#unicode_scalar_value)
 ## (USVs) to build up a state.
-## (If you want a `step` function which receives a #Str instead of an #U32, see #Str.walkGraphemes.)
+## (If you want a `step` function which receives a #Str instead of an #Usv, see #Str.walkGraphemes.)
 ##
-## Here are the #U32 values that will be passed to `step` when this function is
+## Here are the #Usv values that will be passed to `step` when this function is
 ## called on various strings:
 ##
 ## * `"👩‍👩‍👦‍👦"` passes 128105, 8205, 128105, 8205, 128102, 8205, 128102
 ## * `"Roc"` passes 82, 111, 99
 ## * `"鹏"` passes 40527
 ## * `"🐦"` passes 128038
-walkUsv : Str, { start: state, step: (state, U32 -> state) } -> state
+walkUsv : Str, { start: state, step: (state, Usv -> state) } -> state
 ## Walk backwards through the string's [Unicode Scalar Values](http://www.unicode.org/glossary/#unicode_scalar_value)
 ## (USVs) to build up a state.
-## (If you want a `step` function which receives a #Str instead of an #U32, see #Str.walkGraphemes.)
+## (If you want a `step` function which receives a #Str instead of an #Usv, see #Str.walkGraphemes.)
 ##
-## Here are the #U32 values that will be passed to `step` when this function is
+## Here are the #Usv values that will be passed to `step` when this function is
 ## called on various strings:
 ##
 ## * `"👩‍👩‍👦‍👦"` passes 128102, 8205, 128102, 8205, 128105, 8205, 128105
@ -373,36 +382,36 @@ walkUsv : Str, { start: state, step: (state, U32 -> state) } -> state
 ## * `"鹏"` passes 40527
 ## * `"🐦"` passes 128038
 ##
-## To convert a #Str into a plain `List U32` of UTF-32 code units, see #Str.toUtf32.
+## To convert a #Str into a plain `List Usv` of UTF-32 code units, see #Str.toUtf32.
-walkBackwardsUsv : Str, { start: state, step: (state, U32 -> state) } -> state
+walkBackwardsUsv : Str, { start: state, step: (state, Usv -> state) } -> state
 # Parsing
 ## Return the first [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)
 ## in the string, along with the rest of the string after that USV.
-parseUsv : Str -> Result { answer : U32, rest : Str } [ StrWasEmpty ]*
+parseUsv : Str -> Result { val : Usv, rest : Str } [ Expected [ ValidUsv ]* Str ]*
 ## Return the first [extended grapheme cluster](http://www.unicode.org/glossary/#extended_grapheme_cluster)
 ## in the string, along with the rest of the string after that grapheme.
-parseGrapheme : Str -> Result { answer : Str, rest : Str } [ Expected [ Grapheme ]* Str ]*
+parseGrapheme : Str -> Result { val : Str, rest : Str } [ Expected [ Grapheme ]* Str ]*
 ## If the first string begins with the second, return whatever comes
 ## after the second.
 chompStr : Str, Str -> Result Str [ Expected [ ExactStr Str ]* Bytes ]*
-chompUsv, U32 -> Result Str [ Expected [ Usv U32 ]* Bytes ]*
+chompUsv : Usv -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
 ## If the string begins with digits which can represent a valid #U8, return
 ## that number along with the rest of the string after the digits.
-parseU8 : Str -> Result { answer : U8, rest : Str } [ Expected [ NumU8 ]* Str ]*
+parseU8 : Str -> Result { val : U8, rest : Str } [ Expected [ NumU8 ]* Str ]*
-parseI8 : Str -> Result { answer : I8, rest : Str } [ Expected [ NumI8 ]* Str ]*
+parseI8 : Str -> Result { val : I8, rest : Str } [ Expected [ NumI8 ]* Str ]*
-parseU16 : Str -> Result { answer : U16, rest : Str } [ Expected [ NumU16 ]* Str ]*
+parseU16 : Str -> Result { val : U16, rest : Str } [ Expected [ NumU16 ]* Str ]*
-parseI16 : Str -> Result { answer : I16, rest : Str } [ Expected [ NumI16 ]* Str ]*
+parseI16 : Str -> Result { val : I16, rest : Str } [ Expected [ NumI16 ]* Str ]*
-parseU32 : Str -> Result { answer : U32, rest : Str } [ Expected [ NumU32 ]* Str ]*
+parseU32 : Str -> Result { val : U32, rest : Str } [ Expected [ NumU32 ]* Str ]*
-parseI32 : Str -> Result { answer : I32, rest : Str } [ Expected [ NumI32 ]* Str ]*
+parseI32 : Str -> Result { val : I32, rest : Str } [ Expected [ NumI32 ]* Str ]*
-parseU64 : Str -> Result { answer : U64, rest : Str } [ Expected [ NumU64 ]* Str ]*
+parseU64 : Str -> Result { val : U64, rest : Str } [ Expected [ NumU64 ]* Str ]*
-parseI64 : Str -> Result { answer : I64, rest : Str } [ Expected [ NumI64 ]* Str ]*
+parseI64 : Str -> Result { val : I64, rest : Str } [ Expected [ NumI64 ]* Str ]*
-parseU128 : Str -> Result { answer : U128, rest : Str } [ Expected [ NumU128 ]* Str ]*
+parseU128 : Str -> Result { val : U128, rest : Str } [ Expected [ NumU128 ]* Str ]*
-parseI128 : Str -> Result { answer : I128, rest : Str } [ Expected [ NumI128 ]* Str ]*
+parseI128 : Str -> Result { val : I128, rest : Str } [ Expected [ NumI128 ]* Str ]*
-parseF64 : Str -> Result { answer : U128, rest : Str } [ Expected [ NumF64 ]* Str ]*
+parseF64 : Str -> Result { val : U128, rest : Str } [ Expected [ NumF64 ]* Str ]*
-parseF32 : Str -> Result { answer : I128, rest : Str } [ Expected [ NumF32 ]* Str ]*
+parseF32 : Str -> Result { val : I128, rest : Str } [ Expected [ NumF32 ]* Str ]*
--- a/packages/parser/src/Bytes/Parser.roc
+++ b/packages/parser/src/Bytes/Parser.roc
@ -28,40 +28,44 @@ Problem :
            [
                NumU8,
                NumI8,
-                NumU16,
+                NumU16 Endi,
-                NumI16,
+                NumI16 Endi,
-                NumU32,
+                NumU32 Endi,
-                NumI32,
+                NumI32 Endi,
-                NumU64,
+                NumU64 Endi,
-                NumI64,
+                NumI64 Endi,
-                NumU128,
+                NumU128 Endi,
-                NumI128,
+                NumI128 Endi,
-                NumF64,
+                NumF64 Endi,
-                NumF32,
+                NumF32 Endi,
                Usv U32,
                Utf8 Str,
-                Utf16Le Str,
+                Utf16 Str Endi,
-                Utf16Be Str,
+                UsvUtf8,
                UsvUtf16 Endi,
                GraphemeUtf8,
-                GraphemeUtf16Le,
+                GraphemeUtf16 Endi,
                GraphemeUtf16Be,
                End,
            ]
            Str
    ]
 keep : Parser a, (a -> Parser b) -> Parser b
 skip : Parser *, ({} -> Parser b) -> Parser b
 utf8 : Parser Str
 utf16 : Parser Str
 graphemeUtf8 : Parser Str
-graphemeUtf16Le : Parser Str
+graphemeUtf16 : Endi -> Parser Str
-graphemeUtf16Be : Parser Str
+utf8 : Str -> Parser Str
-
+utf16 : Str, Endi -> Parser Str
-usv : Parser U32
+usvUtf8 : Parser U32 # UTF-8 defines endianness
 usvUtf16 : Endi -> Parser U32
 u8 : Parser U8
 i8 : Parser I8
 u16 : Endi -> Parser U16
 i16 : Endi -> Parser I16
 u32 : Endi -> Parser U32
 i32 : Endi -> Parser I32
 u64 : Endi -> Parser U64
 i64 : Endi -> Parser I64
 u128 : Endi -> Parser U128
 i128 : Endi -> Parser I128
--- a/packages/parser/src/Str/Parser.roc
+++ b/packages/parser/src/Str/Parser.roc
@ -24,7 +24,7 @@ Parser a :
        @Parser (Str -> Result { answer : a, rest : Str } RawProblem),
    ]
-RawProblem :
+Problem :
    [
        Expected
            [
@ -40,12 +40,14 @@ RawProblem :
                NumI128,
                NumF64,
                NumF32,
-                End
+                ExactStr Str,
                Grapheme,
                End,
            ]
            Str
    ]
 keep : Parser a, (a -> Parser b) -> Parser b
 skip : Parser *, ({} -> Parser b) -> Parser b
 symbol : Str -> Parser {}