First-class Usv type

This commit is contained in:
Richard Feldman 2021-04-29 21:41:04 -04:00
parent 391a4f13db
commit 81014c3790
5 changed files with 119 additions and 96 deletions

View file

@ -8,22 +8,14 @@ interface Bytes
parseUtf16Grapheme,
parsePastUtf8,
parsePastUtf16,
parseLeU16,
parseLeI16,
parseLeU32,
parseLeI32,
parseLeU64,
parseLeI64,
parseLeU128,
parseLeI128,
parseBeU16,
parseBeI16,
parseBeU32,
parseBeI32,
parseBeU64,
parseBeI64,
parseBeU128,
parseBeI128
parseU16,
parseI16,
parseU32,
parseI32,
parseU64,
parseI64,
parseU128,
parseI128,
]
imports []
@ -37,8 +29,14 @@ len : Bytes -> Nat
isEmpty : Bytes -> Bool
## The endianness of the currently running system.
hostEndianness : [ Big, Little ]
## The [endianness](https://en.wikipedia.org/wiki/Endianness) of the currently running system.
hostEndi : Endi
## [Endianness](https://en.wikipedia.org/wiki/Endianness)
##
## Be - Big Endian
## Le - Little Endian
Endi : [ Be, Le ]
# Access
@ -54,38 +52,30 @@ concat : Bytes, Bytes -> Bytes
# Parsing
## Parse a [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)
## (USV) encoded as UTF-8.
##
## To parse an entire UTF-8 string, you can use #Bytes.toUtf8 or #Bytes.parsePastUtf8.
parseUsvUtf8 : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ Utf8Usv ]* Bytes ]*
parseUsvUtf16Le : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ Utf16BeUsv ]* Bytes ]*
parseUsvUtf16Be : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ Utf16BeUsv ]* Bytes ]*
parseGraphemeUtf8 : Bytes -> Result { answer : Str, rest : Bytes } [ Expected [ Utf8Grapheme ]* Bytes ]*
parseGraphemeUtf16Le : Bytes -> Result { answer : Str, rest : Bytes } [ Expected [ Utf16LeGrapheme ]* Bytes ]*
parseGraphemeUtf16Be : Bytes -> Result { answer : Str, rest : Bytes } [ Expected [ Utf16BeGrapheme ]* Bytes ]*
parseUsvUtf8 : Bytes -> Result { val : Usv, rest : Bytes } [ Expected [ Utf8Usv ]* Bytes ]*
parseUsvUtf16 : Bytes, Endi -> Result { val : Usv, rest : Bytes } [ Expected [ Utf16Usv Endi ]* Bytes ]*
parseGraphemeUtf8 : Bytes -> Result { val : Str, rest : Bytes } [ Expected [ Utf8Grapheme ]* Bytes ]*
parseGraphemeUtf16 : Bytes, Endi -> Result { val : Str, rest : Bytes } [ Expected [ Utf16Grapheme Endi ]* Bytes ]*
## If the bytes begin with the given string, return whatever bytes come
## If the bytes begin with the given UTF-8 string, return whatever bytes come
## after it.
parsePastStr : Bytes, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
chompUtf8 : Bytes, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
chompUtf16 : Bytes, Endi, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
chompUsvUtf8 : Usv -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
chompUsvUtf16 : Usv, Endi -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
## If the bytes begin with the given bytes, return whatever bytes come
## after them.
chompBytes : Bytes, Bytes -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
# Little-Endian
parseU16Le : Bytes -> Result { answer : U16, rest : Bytes } [ Expected [ U16 ]* Bytes ]*
parseI16Le : Bytes -> Result { answer : I16, rest : Bytes } [ Expected [ I16 ]* Bytes ]*
parseU32Le : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ U32 ]* Bytes ]*
parseI32Le : Bytes -> Result { answer : I32, rest : Bytes } [ Expected [ I32 ]* Bytes ]*
parseU64Le : Bytes -> Result { answer : U64, rest : Bytes } [ Expected [ U64 ]* Bytes ]*
parseI64Le : Bytes -> Result { answer : I64, rest : Bytes } [ Expected [ I64 ]* Bytes ]*
parseU128Le : Bytes -> Result { answer : U128, rest : Bytes } [ Expected [ U128 ]* Bytes ]*
parseI128Le : Bytes -> Result { answer : I128, rest : Bytes } [ Expected [ I128 ]* Bytes ]*
# Big-Endian
parseU16Be : Bytes -> Result { answer : U16, rest : Bytes } [ Expected [ U16 ]* Bytes ]*
parseI16Be : Bytes -> Result { answer : I16, rest : Bytes } [ Expected [ I16 ]* Bytes ]*
parseU32Be : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ U32 ]* Bytes ]*
parseI32Be : Bytes -> Result { answer : I32, rest : Bytes } [ Expected [ I32 ]* Bytes ]*
parseU64Be : Bytes -> Result { answer : U64, rest : Bytes } [ Expected [ U64 ]* Bytes ]*
parseI64Be : Bytes -> Result { answer : I64, rest : Bytes } [ Expected [ I64 ]* Bytes ]*
parseU128Be : Bytes -> Result { answer : U128, rest : Bytes } [ Expected [ U128 ]* Bytes ]*
parseI128Be : Bytes -> Result { answer : I128, rest : Bytes } [ Expected [ I128 ]* Bytes ]*
parseU16 : Bytes, Endi -> Result { val : U16, rest : Bytes } [ Expected [ U16 Endi ]* Bytes ]*
parseI16 : Bytes, Endi -> Result { val : I16, rest : Bytes } [ Expected [ I16 Endi ]* Bytes ]*
parseU32 : Bytes, Endi -> Result { val : U32, rest : Bytes } [ Expected [ U32 Endi ]* Bytes ]*
parseI32 : Bytes, Endi -> Result { val : I32, rest : Bytes } [ Expected [ I32 Endi ]* Bytes ]*
parseU64 : Bytes, Endi -> Result { val : U64, rest : Bytes } [ Expected [ U64 Endi ]* Bytes ]*
parseI64 : Bytes, Endi -> Result { val : I64, rest : Bytes } [ Expected [ I64 Endi ]* Bytes ]*
parseU128 : Bytes, Endi -> Result { val : U128, rest : Bytes } [ Expected [ U128 Endi ]* Bytes ]*
parseI128 : Bytes, Endi -> Result { val : I128, rest : Bytes } [ Expected [ I128 Endi ]* Bytes ]*

View file

@ -336,7 +336,6 @@ Int size : Num [ @Int size ]
## eliminate the performance difference between loud and quiet errors in
## the situation where no error occurs.
## Convert
## Return a negative number when given a positive one, and vice versa.
@ -829,3 +828,22 @@ tryRecip : Float a -> Result (Float a) [ DivByZero ]*
##
## >>> Float.sqrt -4.0
sqrt : Float a -> [Ok (Float a), InvalidSqrt]*
## Try to convert a [Num] to a [Usv].
##
## Although [Usv]s are [U32]s under the hood,
## not all [U32]s are valid [Usv]s.
##
## If you specifically have a [U8], the [Num.ascii]
## function will convert it directly to a [Usv]
## with no possibility of failure.
toUsv : Num * -> Result Usv [ InvalidUsv ]*
## Convert a raw [U8] to a [Usv].
##
## Since all [U8] values are valid [Usv]s, this
## operation cannot fail.
ascii : U8 -> Usv
## Convert a [Usv] into a [U32].
fromUsv : Usv -> U32

View file

@ -101,6 +101,15 @@ interface Str
## A [Unicode](https://unicode.org) text value.
Str : [ @Str ]
## A [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value).
##
## This is a [U32] that has been validated to be in the acceptable range for
## a USV.
##
## You can make one of these using single quote literals - e.g. `'x'` - or
## convert to and from a raw [Num] using [Num.toUsv] and [Num.fromUsv].
Usv : [ @Usv U32 ]
## Convert
## Convert a #Float to a decimal string, rounding off to the given number of decimal places.
@ -334,7 +343,7 @@ toUtf16Be : Str -> Bytes
## Unicode Scalar Values
## Besides graphemes, another way to break down strings is into
## Besides graphemes and bytes, another way to break down strings is into
## Unicode Scalar Values.
##
## USVs are no substitute for graphemes!
@ -350,22 +359,22 @@ toUtf16Be : Str -> Bytes
## Walk through the string's [Unicode Scalar Values](http://www.unicode.org/glossary/#unicode_scalar_value)
## (USVs) to build up a state.
## (If you want a `step` function which receives a #Str instead of an #U32, see #Str.walkGraphemes.)
## (If you want a `step` function which receives a #Str instead of an #Usv, see #Str.walkGraphemes.)
##
## Here are the #U32 values that will be passed to `step` when this function is
## Here are the #Usv values that will be passed to `step` when this function is
## called on various strings:
##
## * `"👩‍👩‍👦‍👦"` passes 128105, 8205, 128105, 8205, 128102, 8205, 128102
## * `"Roc"` passes 82, 111, 99
## * `"鹏"` passes 40527
## * `"🐦"` passes 128038
walkUsv : Str, { start: state, step: (state, U32 -> state) } -> state
walkUsv : Str, { start: state, step: (state, Usv -> state) } -> state
## Walk backwards through the string's [Unicode Scalar Values](http://www.unicode.org/glossary/#unicode_scalar_value)
## (USVs) to build up a state.
## (If you want a `step` function which receives a #Str instead of an #U32, see #Str.walkGraphemes.)
## (If you want a `step` function which receives a #Str instead of an #Usv, see #Str.walkGraphemes.)
##
## Here are the #U32 values that will be passed to `step` when this function is
## Here are the #Usv values that will be passed to `step` when this function is
## called on various strings:
##
## * `"👩‍👩‍👦‍👦"` passes 128102, 8205, 128102, 8205, 128105, 8205, 128105
@ -373,36 +382,36 @@ walkUsv : Str, { start: state, step: (state, U32 -> state) } -> state
## * `"鹏"` passes 40527
## * `"🐦"` passes 128038
##
## To convert a #Str into a plain `List U32` of UTF-32 code units, see #Str.toUtf32.
walkBackwardsUsv : Str, { start: state, step: (state, U32 -> state) } -> state
## To convert a #Str into a plain `List Usv` of UTF-32 code units, see #Str.toUtf32.
walkBackwardsUsv : Str, { start: state, step: (state, Usv -> state) } -> state
# Parsing
## Return the first [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)
## in the string, along with the rest of the string after that USV.
parseUsv : Str -> Result { answer : U32, rest : Str } [ StrWasEmpty ]*
parseUsv : Str -> Result { val : Usv, rest : Str } [ Expected [ ValidUsv ]* Str ]*
## Return the first [extended grapheme cluster](http://www.unicode.org/glossary/#extended_grapheme_cluster)
## in the string, along with the rest of the string after that grapheme.
parseGrapheme : Str -> Result { answer : Str, rest : Str } [ Expected [ Grapheme ]* Str ]*
parseGrapheme : Str -> Result { val : Str, rest : Str } [ Expected [ Grapheme ]* Str ]*
## If the first string begins with the second, return whatever comes
## after the second.
chompStr : Str, Str -> Result Str [ Expected [ ExactStr Str ]* Bytes ]*
chompUsv, U32 -> Result Str [ Expected [ Usv U32 ]* Bytes ]*
chompUsv : Usv -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
## If the string begins with digits which can represent a valid #U8, return
## that number along with the rest of the string after the digits.
parseU8 : Str -> Result { answer : U8, rest : Str } [ Expected [ NumU8 ]* Str ]*
parseI8 : Str -> Result { answer : I8, rest : Str } [ Expected [ NumI8 ]* Str ]*
parseU16 : Str -> Result { answer : U16, rest : Str } [ Expected [ NumU16 ]* Str ]*
parseI16 : Str -> Result { answer : I16, rest : Str } [ Expected [ NumI16 ]* Str ]*
parseU32 : Str -> Result { answer : U32, rest : Str } [ Expected [ NumU32 ]* Str ]*
parseI32 : Str -> Result { answer : I32, rest : Str } [ Expected [ NumI32 ]* Str ]*
parseU64 : Str -> Result { answer : U64, rest : Str } [ Expected [ NumU64 ]* Str ]*
parseI64 : Str -> Result { answer : I64, rest : Str } [ Expected [ NumI64 ]* Str ]*
parseU128 : Str -> Result { answer : U128, rest : Str } [ Expected [ NumU128 ]* Str ]*
parseI128 : Str -> Result { answer : I128, rest : Str } [ Expected [ NumI128 ]* Str ]*
parseU8 : Str -> Result { val : U8, rest : Str } [ Expected [ NumU8 ]* Str ]*
parseI8 : Str -> Result { val : I8, rest : Str } [ Expected [ NumI8 ]* Str ]*
parseU16 : Str -> Result { val : U16, rest : Str } [ Expected [ NumU16 ]* Str ]*
parseI16 : Str -> Result { val : I16, rest : Str } [ Expected [ NumI16 ]* Str ]*
parseU32 : Str -> Result { val : U32, rest : Str } [ Expected [ NumU32 ]* Str ]*
parseI32 : Str -> Result { val : I32, rest : Str } [ Expected [ NumI32 ]* Str ]*
parseU64 : Str -> Result { val : U64, rest : Str } [ Expected [ NumU64 ]* Str ]*
parseI64 : Str -> Result { val : I64, rest : Str } [ Expected [ NumI64 ]* Str ]*
parseU128 : Str -> Result { val : U128, rest : Str } [ Expected [ NumU128 ]* Str ]*
parseI128 : Str -> Result { val : I128, rest : Str } [ Expected [ NumI128 ]* Str ]*
parseF64 : Str -> Result { answer : U128, rest : Str } [ Expected [ NumF64 ]* Str ]*
parseF32 : Str -> Result { answer : I128, rest : Str } [ Expected [ NumF32 ]* Str ]*
parseF64 : Str -> Result { val : U128, rest : Str } [ Expected [ NumF64 ]* Str ]*
parseF32 : Str -> Result { val : I128, rest : Str } [ Expected [ NumF32 ]* Str ]*

View file

@ -28,40 +28,44 @@ Problem :
[
NumU8,
NumI8,
NumU16,
NumI16,
NumU32,
NumI32,
NumU64,
NumI64,
NumU128,
NumI128,
NumF64,
NumF32,
Usv U32,
NumU16 Endi,
NumI16 Endi,
NumU32 Endi,
NumI32 Endi,
NumU64 Endi,
NumI64 Endi,
NumU128 Endi,
NumI128 Endi,
NumF64 Endi,
NumF32 Endi,
Utf8 Str,
Utf16Le Str,
Utf16Be Str,
Utf16 Str Endi,
UsvUtf8,
UsvUtf16 Endi,
GraphemeUtf8,
GraphemeUtf16Le,
GraphemeUtf16Be,
GraphemeUtf16 Endi,
End,
]
Str
]
keep : Parser a, (a -> Parser b) -> Parser b
skip : Parser *, ({} -> Parser b) -> Parser b
utf8 : Parser Str
utf16 : Parser Str
graphemeUtf8 : Parser Str
graphemeUtf16Le : Parser Str
graphemeUtf16Be : Parser Str
usv : Parser U32
graphemeUtf16 : Endi -> Parser Str
utf8 : Str -> Parser Str
utf16 : Str, Endi -> Parser Str
usvUtf8 : Parser U32 # UTF-8 defines endianness
usvUtf16 : Endi -> Parser U32
u8 : Parser U8
i8 : Parser I8
u16 : Endi -> Parser U16
i16 : Endi -> Parser I16
u32 : Endi -> Parser U32
i32 : Endi -> Parser I32
u64 : Endi -> Parser U64
i64 : Endi -> Parser I64
u128 : Endi -> Parser U128
i128 : Endi -> Parser I128

View file

@ -24,7 +24,7 @@ Parser a :
@Parser (Str -> Result { answer : a, rest : Str } RawProblem),
]
RawProblem :
Problem :
[
Expected
[
@ -40,12 +40,14 @@ RawProblem :
NumI128,
NumF64,
NumF32,
End
ExactStr Str,
Grapheme,
End,
]
Str
]
keep : Parser a, (a -> Parser b) -> Parser b
skip : Parser *, ({} -> Parser b) -> Parser b
symbol : Str -> Parser {}