mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-28 14:24:45 +00:00
Move things into roc/bytes and roc/unicode
This commit is contained in:
parent
9748aa00da
commit
877cc328d2
10 changed files with 209 additions and 177 deletions
9
packages/unicode/Package-Config.roc
Normal file
9
packages/unicode/Package-Config.roc
Normal file
|
@ -0,0 +1,9 @@
|
|||
package roc/unicode 0.1.0
|
||||
roc 0.0.0
|
||||
exposes [ Unicode, Unicode.Scalar, Unicode.CodePoint ]
|
||||
packages {}
|
||||
license UPL-1.0
|
||||
|
||||
# TODO should we hande Latin1 encoding? Other encodings? Should there be
|
||||
# an Ascii module, or a separate roc/ascii package? Conside that ASCII is
|
||||
# 7-bit, so not all U8s are valid ASCII! There's also Extended ASCII to consider.
|
41
packages/unicode/src/Unicode/CodePoint.roc
Normal file
41
packages/unicode/src/Unicode/CodePoint.roc
Normal file
|
@ -0,0 +1,41 @@
|
|||
interface Unicode.CodePoint
|
||||
exposes
|
||||
[
|
||||
CodePoint,
|
||||
toU32,
|
||||
fromU32,
|
||||
parseUtf8,
|
||||
parseUtf16,
|
||||
chompUtf8,
|
||||
chompUtf16
|
||||
]
|
||||
imports
|
||||
[
|
||||
Unicode.CodePoint.Internal as Internal
|
||||
]
|
||||
|
||||
## A [Unicode Code Point](http://www.unicode.org/glossary/#code_point)
|
||||
CodePoint : Internal.CodePoint
|
||||
|
||||
toU32 : CodePoint -> U32
|
||||
toU32 = \codePoint -> Internal.toU32 codePoint
|
||||
|
||||
## To convert exactly one [CodePoint] to a [Str], that code point must be
|
||||
## a valid [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value).
|
||||
## You can get one of those by calling [Unicode.Scalar.fromCodePoint], and then
|
||||
## you can call [Unicode.Scalar.toStr] to get a [Str] from it.
|
||||
toStr : List CodePoint -> Result Str [ BadCodePoint U32 ]*
|
||||
toStr = \points ->
|
||||
u32s = List.map points toU32
|
||||
|
||||
Str.fromCodePoints u32s
|
||||
|
||||
fromU32 : U32 -> Result CodePoint [ BadCodePoint ]*
|
||||
|
||||
parseUtf8 : Bytes -> Result { val : CodePoint, rest : Bytes } [ Expected [ Utf8CodePoint ]* Bytes ]*
|
||||
parseUtf16 : Bytes, Endi -> Result { val : CodePoint, rest : Bytes } [ Expected [ Utf16CodePoint Endi ]* Bytes ]*
|
||||
|
||||
chompUtf8 : Bytes, CodePoint -> Result Str [ Expected [ ExactCodePoint CodePoint ]* Bytes ]*
|
||||
chompUtf16 : Bytes, CodePoint, Endi -> Result Str [ Expected [ ExactCodePoint CodePoint ]* Bytes ]*
|
||||
|
||||
isAsciiDigit : CodePoint -> Bool
|
21
packages/unicode/src/Unicode/CodePoint/Internal.roc
Normal file
21
packages/unicode/src/Unicode/CodePoint/Internal.roc
Normal file
|
@ -0,0 +1,21 @@
|
|||
interface Unicode.CodePoint.Internal
|
||||
exposes
|
||||
[
|
||||
CodePoint,
|
||||
toU32,
|
||||
fromU32,
|
||||
fromU32Unchecked,
|
||||
]
|
||||
imports
|
||||
[]
|
||||
|
||||
## This is just here so that both Unicode.Scalar and Unicode.CodePoint can access it.
|
||||
CodePoint : [ @CodePoint U32 ]
|
||||
|
||||
fromU32Unchecked : U32 -> CodePoint
|
||||
fromU32Unchecked = \u32 -> @CodePoint u32
|
||||
|
||||
toU32 : CodePoint -> U32
|
||||
toU32 = \@CodePoint u32 -> u32
|
||||
|
||||
fromU32 : U32 -> Result CodePoint [ BadCodePoint ]*
|
45
packages/unicode/src/Unicode/Scalar.roc
Normal file
45
packages/unicode/src/Unicode/Scalar.roc
Normal file
|
@ -0,0 +1,45 @@
|
|||
interface Unicode.Scalar
|
||||
exposes
|
||||
[
|
||||
Scalar,
|
||||
toStr,
|
||||
toCodePoint,
|
||||
fromCodePoint,
|
||||
parseUtf8,
|
||||
parseUtf16,
|
||||
chompUtf8,
|
||||
chompUtf16
|
||||
]
|
||||
imports
|
||||
[
|
||||
Unicode.CodePoint.Internal as Internal
|
||||
Unicode.CodePoint.{ CodePoint },
|
||||
Bytes.{ Bytes }
|
||||
]
|
||||
|
||||
## A [Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)
|
||||
Scalar : [ @Scalar U32 ]
|
||||
|
||||
toStr : Scalar -> Str
|
||||
toStr = \@Scalar u32
|
||||
when Str.fromScalar u32 is
|
||||
Ok str -> str
|
||||
Err _ ->
|
||||
# This will quickly crash if it ever runs, but we're confident
|
||||
# this Err branch will never run. That's becasue it only runs
|
||||
# if Str.fromScalar receives an invalid scalar value, and we've
|
||||
# already validated this!
|
||||
toStr (@Scalar (scalar * 256))
|
||||
|
||||
toCodePoint : Scalar -> CodePoint
|
||||
toCodePoint = \@Scalar u32 -> Internal.fromU32Unchecked u32
|
||||
|
||||
fromCodePoint : CodePoint -> Result Scalar [ PointWasSurrogate ]*
|
||||
|
||||
parseUtf8 : Bytes -> Result { val : Scalar, rest : Bytes } [ Expected [ Utf8CodePoint ]* Bytes ]*
|
||||
parseUtf16 : Bytes, Endi -> Result { val : Scalar, rest : Bytes } [ Expected [ Utf16CodePoint Endi ]* Bytes ]*
|
||||
|
||||
chompUtf8 : Bytes, CodePoint -> Result Str [ Expected [ ExactCodePoint CodePoint ]* Bytes ]*
|
||||
chompUtf16 : Bytes, CodePoint, Endi -> Result Str [ Expected [ ExactCodePoint CodePoint ]* Bytes ]*
|
||||
|
||||
isAsciiDigit : CodePoint -> Bool
|
Loading…
Add table
Add a link
Reference in a new issue