mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-30 23:31:12 +00:00
First-class Usv type
This commit is contained in:
parent
391a4f13db
commit
81014c3790
5 changed files with 119 additions and 96 deletions
|
@ -8,22 +8,14 @@ interface Bytes
|
|||
parseUtf16Grapheme,
|
||||
parsePastUtf8,
|
||||
parsePastUtf16,
|
||||
parseLeU16,
|
||||
parseLeI16,
|
||||
parseLeU32,
|
||||
parseLeI32,
|
||||
parseLeU64,
|
||||
parseLeI64,
|
||||
parseLeU128,
|
||||
parseLeI128,
|
||||
parseBeU16,
|
||||
parseBeI16,
|
||||
parseBeU32,
|
||||
parseBeI32,
|
||||
parseBeU64,
|
||||
parseBeI64,
|
||||
parseBeU128,
|
||||
parseBeI128
|
||||
parseU16,
|
||||
parseI16,
|
||||
parseU32,
|
||||
parseI32,
|
||||
parseU64,
|
||||
parseI64,
|
||||
parseU128,
|
||||
parseI128,
|
||||
]
|
||||
imports []
|
||||
|
||||
|
@ -37,8 +29,14 @@ len : Bytes -> Nat
|
|||
|
||||
isEmpty : Bytes -> Bool
|
||||
|
||||
## The endianness of the currently running system.
|
||||
hostEndianness : [ Big, Little ]
|
||||
## The [endianness](https://en.wikipedia.org/wiki/Endianness) of the currently running system.
|
||||
hostEndi : Endi
|
||||
|
||||
## [Endianness](https://en.wikipedia.org/wiki/Endianness)
|
||||
##
|
||||
## Be - Big Endian
|
||||
## Le - Little Endian
|
||||
Endi : [ Be, Le ]
|
||||
|
||||
# Access
|
||||
|
||||
|
@ -54,38 +52,30 @@ concat : Bytes, Bytes -> Bytes
|
|||
|
||||
# Parsing
|
||||
|
||||
|
||||
## Parse a [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)
|
||||
## (USV) encoded as UTF-8.
|
||||
##
|
||||
## To parse an entire UTF-8 string, you can use #Bytes.toUtf8 or #Bytes.parsePastUtf8.
|
||||
parseUsvUtf8 : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ Utf8Usv ]* Bytes ]*
|
||||
parseUsvUtf16Le : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ Utf16BeUsv ]* Bytes ]*
|
||||
parseUsvUtf16Be : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ Utf16BeUsv ]* Bytes ]*
|
||||
parseGraphemeUtf8 : Bytes -> Result { answer : Str, rest : Bytes } [ Expected [ Utf8Grapheme ]* Bytes ]*
|
||||
parseGraphemeUtf16Le : Bytes -> Result { answer : Str, rest : Bytes } [ Expected [ Utf16LeGrapheme ]* Bytes ]*
|
||||
parseGraphemeUtf16Be : Bytes -> Result { answer : Str, rest : Bytes } [ Expected [ Utf16BeGrapheme ]* Bytes ]*
|
||||
parseUsvUtf8 : Bytes -> Result { val : Usv, rest : Bytes } [ Expected [ Utf8Usv ]* Bytes ]*
|
||||
parseUsvUtf16 : Bytes, Endi -> Result { val : Usv, rest : Bytes } [ Expected [ Utf16Usv Endi ]* Bytes ]*
|
||||
parseGraphemeUtf8 : Bytes -> Result { val : Str, rest : Bytes } [ Expected [ Utf8Grapheme ]* Bytes ]*
|
||||
parseGraphemeUtf16 : Bytes, Endi -> Result { val : Str, rest : Bytes } [ Expected [ Utf16Grapheme Endi ]* Bytes ]*
|
||||
|
||||
## If the bytes begin with the given string, return whatever bytes come
|
||||
## If the bytes begin with the given UTF-8 string, return whatever bytes come
|
||||
## after it.
|
||||
parsePastStr : Bytes, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
|
||||
chompUtf8 : Bytes, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
|
||||
chompUtf16 : Bytes, Endi, Str -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
|
||||
chompUsvUtf8 : Usv -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
|
||||
chompUsvUtf16 : Usv, Endi -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
|
||||
## If the bytes begin with the given bytes, return whatever bytes come
|
||||
## after them.
|
||||
chompBytes : Bytes, Bytes -> Result Bytes [ Expected [ ExactStr Str ]* Bytes ]*
|
||||
|
||||
# Little-Endian
|
||||
parseU16Le : Bytes -> Result { answer : U16, rest : Bytes } [ Expected [ U16 ]* Bytes ]*
|
||||
parseI16Le : Bytes -> Result { answer : I16, rest : Bytes } [ Expected [ I16 ]* Bytes ]*
|
||||
parseU32Le : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ U32 ]* Bytes ]*
|
||||
parseI32Le : Bytes -> Result { answer : I32, rest : Bytes } [ Expected [ I32 ]* Bytes ]*
|
||||
parseU64Le : Bytes -> Result { answer : U64, rest : Bytes } [ Expected [ U64 ]* Bytes ]*
|
||||
parseI64Le : Bytes -> Result { answer : I64, rest : Bytes } [ Expected [ I64 ]* Bytes ]*
|
||||
parseU128Le : Bytes -> Result { answer : U128, rest : Bytes } [ Expected [ U128 ]* Bytes ]*
|
||||
parseI128Le : Bytes -> Result { answer : I128, rest : Bytes } [ Expected [ I128 ]* Bytes ]*
|
||||
|
||||
# Big-Endian
|
||||
parseU16Be : Bytes -> Result { answer : U16, rest : Bytes } [ Expected [ U16 ]* Bytes ]*
|
||||
parseI16Be : Bytes -> Result { answer : I16, rest : Bytes } [ Expected [ I16 ]* Bytes ]*
|
||||
parseU32Be : Bytes -> Result { answer : U32, rest : Bytes } [ Expected [ U32 ]* Bytes ]*
|
||||
parseI32Be : Bytes -> Result { answer : I32, rest : Bytes } [ Expected [ I32 ]* Bytes ]*
|
||||
parseU64Be : Bytes -> Result { answer : U64, rest : Bytes } [ Expected [ U64 ]* Bytes ]*
|
||||
parseI64Be : Bytes -> Result { answer : I64, rest : Bytes } [ Expected [ I64 ]* Bytes ]*
|
||||
parseU128Be : Bytes -> Result { answer : U128, rest : Bytes } [ Expected [ U128 ]* Bytes ]*
|
||||
parseI128Be : Bytes -> Result { answer : I128, rest : Bytes } [ Expected [ I128 ]* Bytes ]*
|
||||
parseU16 : Bytes, Endi -> Result { val : U16, rest : Bytes } [ Expected [ U16 Endi ]* Bytes ]*
|
||||
parseI16 : Bytes, Endi -> Result { val : I16, rest : Bytes } [ Expected [ I16 Endi ]* Bytes ]*
|
||||
parseU32 : Bytes, Endi -> Result { val : U32, rest : Bytes } [ Expected [ U32 Endi ]* Bytes ]*
|
||||
parseI32 : Bytes, Endi -> Result { val : I32, rest : Bytes } [ Expected [ I32 Endi ]* Bytes ]*
|
||||
parseU64 : Bytes, Endi -> Result { val : U64, rest : Bytes } [ Expected [ U64 Endi ]* Bytes ]*
|
||||
parseI64 : Bytes, Endi -> Result { val : I64, rest : Bytes } [ Expected [ I64 Endi ]* Bytes ]*
|
||||
parseU128 : Bytes, Endi -> Result { val : U128, rest : Bytes } [ Expected [ U128 Endi ]* Bytes ]*
|
||||
parseI128 : Bytes, Endi -> Result { val : I128, rest : Bytes } [ Expected [ I128 Endi ]* Bytes ]*
|
||||
|
|
|
@ -336,7 +336,6 @@ Int size : Num [ @Int size ]
|
|||
## eliminate the performance difference between loud and quiet errors in
|
||||
## the situation where no error occurs.
|
||||
|
||||
|
||||
## Convert
|
||||
|
||||
## Return a negative number when given a positive one, and vice versa.
|
||||
|
@ -829,3 +828,22 @@ tryRecip : Float a -> Result (Float a) [ DivByZero ]*
|
|||
##
|
||||
## >>> Float.sqrt -4.0
|
||||
sqrt : Float a -> [Ok (Float a), InvalidSqrt]*
|
||||
|
||||
## Try to convert a [Num] to a [Usv].
|
||||
##
|
||||
## Although [Usv]s are [U32]s under the hood,
|
||||
## not all [U32]s are valid [Usv]s.
|
||||
##
|
||||
## If you specifically have a [U8], the [Num.ascii]
|
||||
## function will convert it directly to a [Usv]
|
||||
## with no possibility of failure.
|
||||
toUsv : Num * -> Result Usv [ InvalidUsv ]*
|
||||
|
||||
## Convert a raw [U8] to a [Usv].
|
||||
##
|
||||
## Since all [U8] values are valid [Usv]s, this
|
||||
## operation cannot fail.
|
||||
ascii : U8 -> Usv
|
||||
|
||||
## Convert a [Usv] into a [U32].
|
||||
fromUsv : Usv -> U32
|
||||
|
|
|
@ -101,6 +101,15 @@ interface Str
|
|||
## A [Unicode](https://unicode.org) text value.
|
||||
Str : [ @Str ]
|
||||
|
||||
## A [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value).
|
||||
##
|
||||
## This is a [U32] that has been validated to be in the acceptable range for
|
||||
## a USV.
|
||||
##
|
||||
## You can make one of these using single quote literals - e.g. `'x'` - or
|
||||
## convert to and from a raw [Num] using [Num.toUsv] and [Num.fromUsv].
|
||||
Usv : [ @Usv U32 ]
|
||||
|
||||
## Convert
|
||||
|
||||
## Convert a #Float to a decimal string, rounding off to the given number of decimal places.
|
||||
|
@ -334,7 +343,7 @@ toUtf16Be : Str -> Bytes
|
|||
|
||||
## Unicode Scalar Values
|
||||
|
||||
## Besides graphemes, another way to break down strings is into
|
||||
## Besides graphemes and bytes, another way to break down strings is into
|
||||
## Unicode Scalar Values.
|
||||
##
|
||||
## USVs are no substitute for graphemes!
|
||||
|
@ -350,22 +359,22 @@ toUtf16Be : Str -> Bytes
|
|||
|
||||
## Walk through the string's [Unicode Scalar Values](http://www.unicode.org/glossary/#unicode_scalar_value)
|
||||
## (USVs) to build up a state.
|
||||
## (If you want a `step` function which receives a #Str instead of an #U32, see #Str.walkGraphemes.)
|
||||
## (If you want a `step` function which receives a #Str instead of an #Usv, see #Str.walkGraphemes.)
|
||||
##
|
||||
## Here are the #U32 values that will be passed to `step` when this function is
|
||||
## Here are the #Usv values that will be passed to `step` when this function is
|
||||
## called on various strings:
|
||||
##
|
||||
## * `"👩👩👦👦"` passes 128105, 8205, 128105, 8205, 128102, 8205, 128102
|
||||
## * `"Roc"` passes 82, 111, 99
|
||||
## * `"鹏"` passes 40527
|
||||
## * `"🐦"` passes 128038
|
||||
walkUsv : Str, { start: state, step: (state, U32 -> state) } -> state
|
||||
walkUsv : Str, { start: state, step: (state, Usv -> state) } -> state
|
||||
|
||||
## Walk backwards through the string's [Unicode Scalar Values](http://www.unicode.org/glossary/#unicode_scalar_value)
|
||||
## (USVs) to build up a state.
|
||||
## (If you want a `step` function which receives a #Str instead of an #U32, see #Str.walkGraphemes.)
|
||||
## (If you want a `step` function which receives a #Str instead of an #Usv, see #Str.walkGraphemes.)
|
||||
##
|
||||
## Here are the #U32 values that will be passed to `step` when this function is
|
||||
## Here are the #Usv values that will be passed to `step` when this function is
|
||||
## called on various strings:
|
||||
##
|
||||
## * `"👩👩👦👦"` passes 128102, 8205, 128102, 8205, 128105, 8205, 128105
|
||||
|
@ -373,36 +382,36 @@ walkUsv : Str, { start: state, step: (state, U32 -> state) } -> state
|
|||
## * `"鹏"` passes 40527
|
||||
## * `"🐦"` passes 128038
|
||||
##
|
||||
## To convert a #Str into a plain `List U32` of UTF-32 code units, see #Str.toUtf32.
|
||||
walkBackwardsUsv : Str, { start: state, step: (state, U32 -> state) } -> state
|
||||
## To convert a #Str into a plain `List Usv` of UTF-32 code units, see #Str.toUtf32.
|
||||
walkBackwardsUsv : Str, { start: state, step: (state, Usv -> state) } -> state
|
||||
|
||||
# Parsing
|
||||
|
||||
## Return the first [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)
|
||||
## in the string, along with the rest of the string after that USV.
|
||||
parseUsv : Str -> Result { answer : U32, rest : Str } [ StrWasEmpty ]*
|
||||
parseUsv : Str -> Result { val : Usv, rest : Str } [ Expected [ ValidUsv ]* Str ]*
|
||||
|
||||
## Return the first [extended grapheme cluster](http://www.unicode.org/glossary/#extended_grapheme_cluster)
|
||||
## in the string, along with the rest of the string after that grapheme.
|
||||
parseGrapheme : Str -> Result { answer : Str, rest : Str } [ Expected [ Grapheme ]* Str ]*
|
||||
parseGrapheme : Str -> Result { val : Str, rest : Str } [ Expected [ Grapheme ]* Str ]*
|
||||
|
||||
## If the first string begins with the second, return whatever comes
|
||||
## after the second.
|
||||
chompStr : Str, Str -> Result Str [ Expected [ ExactStr Str ]* Bytes ]*
|
||||
chompUsv, U32 -> Result Str [ Expected [ Usv U32 ]* Bytes ]*
|
||||
chompUsv : Usv -> Result Str [ Expected [ ExactUsv Usv ]* Bytes ]*
|
||||
|
||||
## If the string begins with digits which can represent a valid #U8, return
|
||||
## that number along with the rest of the string after the digits.
|
||||
parseU8 : Str -> Result { answer : U8, rest : Str } [ Expected [ NumU8 ]* Str ]*
|
||||
parseI8 : Str -> Result { answer : I8, rest : Str } [ Expected [ NumI8 ]* Str ]*
|
||||
parseU16 : Str -> Result { answer : U16, rest : Str } [ Expected [ NumU16 ]* Str ]*
|
||||
parseI16 : Str -> Result { answer : I16, rest : Str } [ Expected [ NumI16 ]* Str ]*
|
||||
parseU32 : Str -> Result { answer : U32, rest : Str } [ Expected [ NumU32 ]* Str ]*
|
||||
parseI32 : Str -> Result { answer : I32, rest : Str } [ Expected [ NumI32 ]* Str ]*
|
||||
parseU64 : Str -> Result { answer : U64, rest : Str } [ Expected [ NumU64 ]* Str ]*
|
||||
parseI64 : Str -> Result { answer : I64, rest : Str } [ Expected [ NumI64 ]* Str ]*
|
||||
parseU128 : Str -> Result { answer : U128, rest : Str } [ Expected [ NumU128 ]* Str ]*
|
||||
parseI128 : Str -> Result { answer : I128, rest : Str } [ Expected [ NumI128 ]* Str ]*
|
||||
parseU8 : Str -> Result { val : U8, rest : Str } [ Expected [ NumU8 ]* Str ]*
|
||||
parseI8 : Str -> Result { val : I8, rest : Str } [ Expected [ NumI8 ]* Str ]*
|
||||
parseU16 : Str -> Result { val : U16, rest : Str } [ Expected [ NumU16 ]* Str ]*
|
||||
parseI16 : Str -> Result { val : I16, rest : Str } [ Expected [ NumI16 ]* Str ]*
|
||||
parseU32 : Str -> Result { val : U32, rest : Str } [ Expected [ NumU32 ]* Str ]*
|
||||
parseI32 : Str -> Result { val : I32, rest : Str } [ Expected [ NumI32 ]* Str ]*
|
||||
parseU64 : Str -> Result { val : U64, rest : Str } [ Expected [ NumU64 ]* Str ]*
|
||||
parseI64 : Str -> Result { val : I64, rest : Str } [ Expected [ NumI64 ]* Str ]*
|
||||
parseU128 : Str -> Result { val : U128, rest : Str } [ Expected [ NumU128 ]* Str ]*
|
||||
parseI128 : Str -> Result { val : I128, rest : Str } [ Expected [ NumI128 ]* Str ]*
|
||||
|
||||
parseF64 : Str -> Result { answer : U128, rest : Str } [ Expected [ NumF64 ]* Str ]*
|
||||
parseF32 : Str -> Result { answer : I128, rest : Str } [ Expected [ NumF32 ]* Str ]*
|
||||
parseF64 : Str -> Result { val : U128, rest : Str } [ Expected [ NumF64 ]* Str ]*
|
||||
parseF32 : Str -> Result { val : I128, rest : Str } [ Expected [ NumF32 ]* Str ]*
|
||||
|
|
|
@ -28,40 +28,44 @@ Problem :
|
|||
[
|
||||
NumU8,
|
||||
NumI8,
|
||||
NumU16,
|
||||
NumI16,
|
||||
NumU32,
|
||||
NumI32,
|
||||
NumU64,
|
||||
NumI64,
|
||||
NumU128,
|
||||
NumI128,
|
||||
NumF64,
|
||||
NumF32,
|
||||
Usv U32,
|
||||
NumU16 Endi,
|
||||
NumI16 Endi,
|
||||
NumU32 Endi,
|
||||
NumI32 Endi,
|
||||
NumU64 Endi,
|
||||
NumI64 Endi,
|
||||
NumU128 Endi,
|
||||
NumI128 Endi,
|
||||
NumF64 Endi,
|
||||
NumF32 Endi,
|
||||
Utf8 Str,
|
||||
Utf16Le Str,
|
||||
Utf16Be Str,
|
||||
Utf16 Str Endi,
|
||||
UsvUtf8,
|
||||
UsvUtf16 Endi,
|
||||
GraphemeUtf8,
|
||||
GraphemeUtf16Le,
|
||||
GraphemeUtf16Be,
|
||||
GraphemeUtf16 Endi,
|
||||
End,
|
||||
]
|
||||
Str
|
||||
]
|
||||
|
||||
keep : Parser a, (a -> Parser b) -> Parser b
|
||||
|
||||
skip : Parser *, ({} -> Parser b) -> Parser b
|
||||
|
||||
utf8 : Parser Str
|
||||
utf16 : Parser Str
|
||||
|
||||
graphemeUtf8 : Parser Str
|
||||
graphemeUtf16Le : Parser Str
|
||||
graphemeUtf16Be : Parser Str
|
||||
|
||||
usv : Parser U32
|
||||
graphemeUtf16 : Endi -> Parser Str
|
||||
utf8 : Str -> Parser Str
|
||||
utf16 : Str, Endi -> Parser Str
|
||||
usvUtf8 : Parser U32 # UTF-8 defines endianness
|
||||
usvUtf16 : Endi -> Parser U32
|
||||
|
||||
u8 : Parser U8
|
||||
i8 : Parser I8
|
||||
u16 : Endi -> Parser U16
|
||||
i16 : Endi -> Parser I16
|
||||
u32 : Endi -> Parser U32
|
||||
i32 : Endi -> Parser I32
|
||||
u64 : Endi -> Parser U64
|
||||
i64 : Endi -> Parser I64
|
||||
u128 : Endi -> Parser U128
|
||||
i128 : Endi -> Parser I128
|
||||
|
|
|
@ -24,7 +24,7 @@ Parser a :
|
|||
@Parser (Str -> Result { answer : a, rest : Str } RawProblem),
|
||||
]
|
||||
|
||||
RawProblem :
|
||||
Problem :
|
||||
[
|
||||
Expected
|
||||
[
|
||||
|
@ -40,12 +40,14 @@ RawProblem :
|
|||
NumI128,
|
||||
NumF64,
|
||||
NumF32,
|
||||
End
|
||||
ExactStr Str,
|
||||
Grapheme,
|
||||
End,
|
||||
]
|
||||
Str
|
||||
]
|
||||
|
||||
keep : Parser a, (a -> Parser b) -> Parser b
|
||||
|
||||
skip : Parser *, ({} -> Parser b) -> Parser b
|
||||
|
||||
symbol : Str -> Parser {}
|
Loading…
Add table
Add a link
Reference in a new issue