First-class Usv type

This commit is contained in:
Richard Feldman 2021-04-29 21:41:04 -04:00
parent 391a4f13db
commit 81014c3790
5 changed files with 119 additions and 96 deletions

View file

@ -8,22 +8,14 @@ interface Bytes
parseUtf16Grapheme, parseUtf16Grapheme,
parsePastUtf8, parsePastUtf8,
parsePastUtf16, parsePastUtf16,
parseLeU16, parseU16,
parseLeI16, parseI16,
parseLeU32, parseU32,
parseLeI32, parseI32,
parseLeU64, parseU64,
parseLeI64, parseI64,
parseLeU128, parseU128,
parseLeI128, parseI128,
parseBeU16,
parseBeI16,
parseBeU32,
parseBeI32,
parseBeU64,
parseBeI64,
parseBeU128,
parseBeI128
] ]
imports [] imports []
@ -37,8 +29,14 @@ len : Bytes -> Nat
isEmpty : Bytes -> Bool isEmpty : Bytes -> Bool
## The endianness of the currently running system. ## The [endianness](https://en.wikipedia.org/wiki/Endianness) of the currently running system.
hostEndianness : [ Big, Little ] hostEndi : Endi
## [Endianness](https://en.wikipedia.org/wiki/Endianness)
##
## Be - Big Endian
## Le - Little Endian
Endi : [ Be, Le ]
# Access # Access
@ -54,38 +52,30 @@ concat : Bytes, Bytes -> Bytes
# Parsing # Parsing
## Parse a [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value) ## Parse a [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)
## (USV) encoded as UTF-8. ## (USV) encoded as UTF-8.
## ##
## To parse an entire UTF-8 string, you can use #Bytes.toUtf8 or #Bytes.parsePastUtf8. ## To parse an entire UTF-8 string, you can use #Bytes.toUtf8 or #Bytes.parsePastUtf8.
parseUsvUtf8 : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ Utf8Usv ]* Bytes ]* parseUsvUtf8 : Bytes -> Result { val : Usv, rest : Bytes } [ Expected [ Utf8Usv ]* Bytes ]*
parseUsvUtf16Le : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ Utf16BeUsv ]* Bytes ]* parseUsvUtf16 : Bytes, Endi -> Result { val : Usv, rest : Bytes } [ Expected [ Utf16Usv Endi ]* Bytes ]*
parseUsvUtf16Be : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ Utf16BeUsv ]* Bytes ]* parseGraphemeUtf8 : Bytes -> Result { val : Str, rest : Bytes } [ Expected [ Utf8Grapheme ]* Bytes ]*
parseGraphemeUtf8 : Bytes -> Result { answer : Str, rest : Bytes } [ Expected [ Utf8Grapheme ]* Bytes ]* parseGraphemeUtf16 : Bytes, Endi -> Result { val : Str, rest : Bytes } [ Expected [ Utf16Grapheme Endi ]* Bytes ]*
parseGraphemeUtf16Le : Bytes -> Result { answer : Str, rest : Bytes } [ Expected [ Utf16LeGrapheme ]* Bytes ]*
parseGraphemeUtf16Be : Bytes -> Result { answer : Str, rest : Bytes } [ Expected [ Utf16BeGrapheme ]* Bytes ]*
## If the bytes begin with the given string, return whatever bytes come ## If the bytes begin with the given UTF-8 string, return whatever bytes come
## after it. ## after it.
parsePastStr : Bytes, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]* chompUtf8 : Bytes, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
chompUtf16 : Bytes, Endi, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
chompUsvUtf8 : Usv -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
chompUsvUtf16 : Usv, Endi -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
## If the bytes begin with the given bytes, return whatever bytes come
## after them.
chompBytes : Bytes, Bytes -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
# Little-Endian parseU16 : Bytes, Endi -> Result { val : U16, rest : Bytes } [ Expected [ U16 Endi ]* Bytes ]*
parseU16Le : Bytes -> Result { answer : U16, rest : Bytes } [ Expected [ U16 ]* Bytes ]* parseI16 : Bytes, Endi -> Result { val : I16, rest : Bytes } [ Expected [ I16 Endi ]* Bytes ]*
parseI16Le : Bytes -> Result { answer : I16, rest : Bytes } [ Expected [ I16 ]* Bytes ]* parseU32 : Bytes, Endi -> Result { val : U32, rest : Bytes } [ Expected [ U32 Endi ]* Bytes ]*
parseU32Le : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ U32 ]* Bytes ]* parseI32 : Bytes, Endi -> Result { val : I32, rest : Bytes } [ Expected [ I32 Endi ]* Bytes ]*
parseI32Le : Bytes -> Result { answer : I32, rest : Bytes } [ Expected [ I32 ]* Bytes ]* parseU64 : Bytes, Endi -> Result { val : U64, rest : Bytes } [ Expected [ U64 Endi ]* Bytes ]*
parseU64Le : Bytes -> Result { answer : U64, rest : Bytes } [ Expected [ U64 ]* Bytes ]* parseI64 : Bytes, Endi -> Result { val : I64, rest : Bytes } [ Expected [ I64 Endi ]* Bytes ]*
parseI64Le : Bytes -> Result { answer : I64, rest : Bytes } [ Expected [ I64 ]* Bytes ]* parseU128 : Bytes, Endi -> Result { val : U128, rest : Bytes } [ Expected [ U128 Endi ]* Bytes ]*
parseU128Le : Bytes -> Result { answer : U128, rest : Bytes } [ Expected [ U128 ]* Bytes ]* parseI128 : Bytes, Endi -> Result { val : I128, rest : Bytes } [ Expected [ I128 Endi ]* Bytes ]*
parseI128Le : Bytes -> Result { answer : I128, rest : Bytes } [ Expected [ I128 ]* Bytes ]*
# Big-Endian
parseU16Be : Bytes -> Result { answer : U16, rest : Bytes } [ Expected [ U16 ]* Bytes ]*
parseI16Be : Bytes -> Result { answer : I16, rest : Bytes } [ Expected [ I16 ]* Bytes ]*
parseU32Be : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ U32 ]* Bytes ]*
parseI32Be : Bytes -> Result { answer : I32, rest : Bytes } [ Expected [ I32 ]* Bytes ]*
parseU64Be : Bytes -> Result { answer : U64, rest : Bytes } [ Expected [ U64 ]* Bytes ]*
parseI64Be : Bytes -> Result { answer : I64, rest : Bytes } [ Expected [ I64 ]* Bytes ]*
parseU128Be : Bytes -> Result { answer : U128, rest : Bytes } [ Expected [ U128 ]* Bytes ]*
parseI128Be : Bytes -> Result { answer : I128, rest : Bytes } [ Expected [ I128 ]* Bytes ]*

View file

@ -336,7 +336,6 @@ Int size : Num [ @Int size ]
## eliminate the performance difference between loud and quiet errors in ## eliminate the performance difference between loud and quiet errors in
## the situation where no error occurs. ## the situation where no error occurs.
## Convert ## Convert
## Return a negative number when given a positive one, and vice versa. ## Return a negative number when given a positive one, and vice versa.
@ -829,3 +828,22 @@ tryRecip : Float a -> Result (Float a) [ DivByZero ]*
## ##
## >>> Float.sqrt -4.0 ## >>> Float.sqrt -4.0
sqrt : Float a -> [Ok (Float a), InvalidSqrt]* sqrt : Float a -> [Ok (Float a), InvalidSqrt]*
## Try to convert a [Num] to a [Usv].
##
## Although [Usv]s are [U32]s under the hood,
## not all [U32]s are valid [Usv]s.
##
## If you specifically have a [U8], the [Num.ascii]
## function will convert it directly to a [Usv]
## with no possibility of failure.
toUsv : Num * -> Result Usv [ InvalidUsv ]*
## Convert a raw [U8] to a [Usv].
##
## Since all [U8] values are valid [Usv]s, this
## operation cannot fail.
ascii : U8 -> Usv
## Convert a [Usv] into a [U32].
fromUsv : Usv -> U32

View file

@ -101,6 +101,15 @@ interface Str
## A [Unicode](https://unicode.org) text value. ## A [Unicode](https://unicode.org) text value.
Str : [ @Str ] Str : [ @Str ]
## A [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value).
##
## This is a [U32] that has been validated to be in the acceptable range for
## a USV.
##
## You can make one of these using single quote literals - e.g. `'x'` - or
## convert to and from a raw [Num] using [Num.toUsv] and [Num.fromUsv].
Usv : [ @Usv U32 ]
## Convert ## Convert
## Convert a #Float to a decimal string, rounding off to the given number of decimal places. ## Convert a #Float to a decimal string, rounding off to the given number of decimal places.
@ -334,7 +343,7 @@ toUtf16Be : Str -> Bytes
## Unicode Scalar Values ## Unicode Scalar Values
## Besides graphemes, another way to break down strings is into ## Besides graphemes and bytes, another way to break down strings is into
## Unicode Scalar Values. ## Unicode Scalar Values.
## ##
## USVs are no substitute for graphemes! ## USVs are no substitute for graphemes!
@ -350,22 +359,22 @@ toUtf16Be : Str -> Bytes
## Walk through the string's [Unicode Scalar Values](http://www.unicode.org/glossary/#unicode_scalar_value) ## Walk through the string's [Unicode Scalar Values](http://www.unicode.org/glossary/#unicode_scalar_value)
## (USVs) to build up a state. ## (USVs) to build up a state.
## (If you want a `step` function which receives a #Str instead of an #U32, see #Str.walkGraphemes.) ## (If you want a `step` function which receives a #Str instead of an #Usv, see #Str.walkGraphemes.)
## ##
## Here are the #U32 values that will be passed to `step` when this function is ## Here are the #Usv values that will be passed to `step` when this function is
## called on various strings: ## called on various strings:
## ##
## * `"👩‍👩‍👦‍👦"` passes 128105, 8205, 128105, 8205, 128102, 8205, 128102 ## * `"👩‍👩‍👦‍👦"` passes 128105, 8205, 128105, 8205, 128102, 8205, 128102
## * `"Roc"` passes 82, 111, 99 ## * `"Roc"` passes 82, 111, 99
## * `"鹏"` passes 40527 ## * `"鹏"` passes 40527
## * `"🐦"` passes 128038 ## * `"🐦"` passes 128038
walkUsv : Str, { start: state, step: (state, U32 -> state) } -> state walkUsv : Str, { start: state, step: (state, Usv -> state) } -> state
## Walk backwards through the string's [Unicode Scalar Values](http://www.unicode.org/glossary/#unicode_scalar_value) ## Walk backwards through the string's [Unicode Scalar Values](http://www.unicode.org/glossary/#unicode_scalar_value)
## (USVs) to build up a state. ## (USVs) to build up a state.
## (If you want a `step` function which receives a #Str instead of an #U32, see #Str.walkGraphemes.) ## (If you want a `step` function which receives a #Str instead of an #Usv, see #Str.walkGraphemes.)
## ##
## Here are the #U32 values that will be passed to `step` when this function is ## Here are the #Usv values that will be passed to `step` when this function is
## called on various strings: ## called on various strings:
## ##
## * `"👩‍👩‍👦‍👦"` passes 128102, 8205, 128102, 8205, 128105, 8205, 128105 ## * `"👩‍👩‍👦‍👦"` passes 128102, 8205, 128102, 8205, 128105, 8205, 128105
@ -373,36 +382,36 @@ walkUsv : Str, { start: state, step: (state, U32 -> state) } -> state
## * `"鹏"` passes 40527 ## * `"鹏"` passes 40527
## * `"🐦"` passes 128038 ## * `"🐦"` passes 128038
## ##
## To convert a #Str into a plain `List U32` of UTF-32 code units, see #Str.toUtf32. ## To convert a #Str into a plain `List Usv` of UTF-32 code units, see #Str.toUtf32.
walkBackwardsUsv : Str, { start: state, step: (state, U32 -> state) } -> state walkBackwardsUsv : Str, { start: state, step: (state, Usv -> state) } -> state
# Parsing # Parsing
## Return the first [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value) ## Return the first [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)
## in the string, along with the rest of the string after that USV. ## in the string, along with the rest of the string after that USV.
parseUsv : Str -> Result { answer : U32, rest : Str } [ StrWasEmpty ]* parseUsv : Str -> Result { val : Usv, rest : Str } [ Expected [ ValidUsv ]* Str ]*
## Return the first [extended grapheme cluster](http://www.unicode.org/glossary/#extended_grapheme_cluster) ## Return the first [extended grapheme cluster](http://www.unicode.org/glossary/#extended_grapheme_cluster)
## in the string, along with the rest of the string after that grapheme. ## in the string, along with the rest of the string after that grapheme.
parseGrapheme : Str -> Result { answer : Str, rest : Str } [ Expected [ Grapheme ]* Str ]* parseGrapheme : Str -> Result { val : Str, rest : Str } [ Expected [ Grapheme ]* Str ]*
## If the first string begins with the second, return whatever comes ## If the first string begins with the second, return whatever comes
## after the second. ## after the second.
chompStr : Str, Str -> Result Str [ Expected [ ExactStr Str ]* Bytes ]* chompStr : Str, Str -> Result Str [ Expected [ ExactStr Str ]* Bytes ]*
chompUsv, U32 -> Result Str [ Expected [ Usv U32 ]* Bytes ]* chompUsv : Usv -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
## If the string begins with digits which can represent a valid #U8, return ## If the string begins with digits which can represent a valid #U8, return
## that number along with the rest of the string after the digits. ## that number along with the rest of the string after the digits.
parseU8 : Str -> Result { answer : U8, rest : Str } [ Expected [ NumU8 ]* Str ]* parseU8 : Str -> Result { val : U8, rest : Str } [ Expected [ NumU8 ]* Str ]*
parseI8 : Str -> Result { answer : I8, rest : Str } [ Expected [ NumI8 ]* Str ]* parseI8 : Str -> Result { val : I8, rest : Str } [ Expected [ NumI8 ]* Str ]*
parseU16 : Str -> Result { answer : U16, rest : Str } [ Expected [ NumU16 ]* Str ]* parseU16 : Str -> Result { val : U16, rest : Str } [ Expected [ NumU16 ]* Str ]*
parseI16 : Str -> Result { answer : I16, rest : Str } [ Expected [ NumI16 ]* Str ]* parseI16 : Str -> Result { val : I16, rest : Str } [ Expected [ NumI16 ]* Str ]*
parseU32 : Str -> Result { answer : U32, rest : Str } [ Expected [ NumU32 ]* Str ]* parseU32 : Str -> Result { val : U32, rest : Str } [ Expected [ NumU32 ]* Str ]*
parseI32 : Str -> Result { answer : I32, rest : Str } [ Expected [ NumI32 ]* Str ]* parseI32 : Str -> Result { val : I32, rest : Str } [ Expected [ NumI32 ]* Str ]*
parseU64 : Str -> Result { answer : U64, rest : Str } [ Expected [ NumU64 ]* Str ]* parseU64 : Str -> Result { val : U64, rest : Str } [ Expected [ NumU64 ]* Str ]*
parseI64 : Str -> Result { answer : I64, rest : Str } [ Expected [ NumI64 ]* Str ]* parseI64 : Str -> Result { val : I64, rest : Str } [ Expected [ NumI64 ]* Str ]*
parseU128 : Str -> Result { answer : U128, rest : Str } [ Expected [ NumU128 ]* Str ]* parseU128 : Str -> Result { val : U128, rest : Str } [ Expected [ NumU128 ]* Str ]*
parseI128 : Str -> Result { answer : I128, rest : Str } [ Expected [ NumI128 ]* Str ]* parseI128 : Str -> Result { val : I128, rest : Str } [ Expected [ NumI128 ]* Str ]*
parseF64 : Str -> Result { answer : U128, rest : Str } [ Expected [ NumF64 ]* Str ]* parseF64 : Str -> Result { val : U128, rest : Str } [ Expected [ NumF64 ]* Str ]*
parseF32 : Str -> Result { answer : I128, rest : Str } [ Expected [ NumF32 ]* Str ]* parseF32 : Str -> Result { val : I128, rest : Str } [ Expected [ NumF32 ]* Str ]*

View file

@ -28,40 +28,44 @@ Problem :
[ [
NumU8, NumU8,
NumI8, NumI8,
NumU16, NumU16 Endi,
NumI16, NumI16 Endi,
NumU32, NumU32 Endi,
NumI32, NumI32 Endi,
NumU64, NumU64 Endi,
NumI64, NumI64 Endi,
NumU128, NumU128 Endi,
NumI128, NumI128 Endi,
NumF64, NumF64 Endi,
NumF32, NumF32 Endi,
Usv U32,
Utf8 Str, Utf8 Str,
Utf16Le Str, Utf16 Str Endi,
Utf16Be Str, UsvUtf8,
UsvUtf16 Endi,
GraphemeUtf8, GraphemeUtf8,
GraphemeUtf16Le, GraphemeUtf16 Endi,
GraphemeUtf16Be,
End, End,
] ]
Str Str
] ]
keep : Parser a, (a -> Parser b) -> Parser b keep : Parser a, (a -> Parser b) -> Parser b
skip : Parser *, ({} -> Parser b) -> Parser b skip : Parser *, ({} -> Parser b) -> Parser b
utf8 : Parser Str
utf16 : Parser Str
graphemeUtf8 : Parser Str graphemeUtf8 : Parser Str
graphemeUtf16Le : Parser Str graphemeUtf16 : Endi -> Parser Str
graphemeUtf16Be : Parser Str utf8 : Str -> Parser Str
utf16 : Str, Endi -> Parser Str
usv : Parser U32 usvUtf8 : Parser U32 # UTF-8 defines endianness
usvUtf16 : Endi -> Parser U32
u8 : Parser U8 u8 : Parser U8
i8 : Parser I8 i8 : Parser I8
u16 : Endi -> Parser U16
i16 : Endi -> Parser I16
u32 : Endi -> Parser U32
i32 : Endi -> Parser I32
u64 : Endi -> Parser U64
i64 : Endi -> Parser I64
u128 : Endi -> Parser U128
i128 : Endi -> Parser I128

View file

@ -24,7 +24,7 @@ Parser a :
@Parser (Str -> Result { answer : a, rest : Str } RawProblem), @Parser (Str -> Result { answer : a, rest : Str } RawProblem),
] ]
RawProblem : Problem :
[ [
Expected Expected
[ [
@ -40,12 +40,14 @@ RawProblem :
NumI128, NumI128,
NumF64, NumF64,
NumF32, NumF32,
End ExactStr Str,
Grapheme,
End,
] ]
Str
] ]
keep : Parser a, (a -> Parser b) -> Parser b keep : Parser a, (a -> Parser b) -> Parser b
skip : Parser *, ({} -> Parser b) -> Parser b skip : Parser *, ({} -> Parser b) -> Parser b
symbol : Str -> Parser {} symbol : Str -> Parser {}