roc/examples/cli/cli-platform/InternalPath.roc

73 lines
3.1 KiB
Text

interface InternalPath
exposes [
UnwrappedPath,
InternalPath,
wrap,
unwrap,
toBytes,
fromArbitraryBytes,
fromOsBytes,
]
imports []
InternalPath := UnwrappedPath
UnwrappedPath : [
# We store these separately for two reasons:
# 1. If I'm calling an OS API, passing a path I got from the OS is definitely safe.
# However, passing a Path I got from a RocStr might be unsafe; it may contain \0
# characters, which would result in the operation happening on a totally different
# path. As such, we need to check for \0s and fail without calling the OS API if we
# find one in the path.
# 2. If I'm converting the Path to a Str, doing that conversion on a Path that was
# created from a RocStr needs no further processing. However, if it came from the OS,
# then we need to know what charset to assume it had, in order to decode it properly.
# These come from the OS (e.g. when reading a directory, calling `canonicalize`,
# or reading an environment variable - which, incidentally, are nul-terminated),
# so we know they are both nul-terminated and do not contain interior nuls.
# As such, they can be passed directly to OS APIs.
#
# Note that the nul terminator byte is right after the end of the length (into the
# unused capacity), so this can both be compared directly to other `List U8`s that
# aren't nul-terminated, while also being able to be passed directly to OS APIs.
FromOperatingSystem (List U8),
# These come from userspace (e.g. Path.fromBytes), so they need to be checked for interior
# nuls and then nul-terminated before the host can pass them to OS APIs.
ArbitraryBytes (List U8),
# This was created as a RocStr, so it might have interior nul bytes but it's definitely UTF-8.
# That means we can `toStr` it trivially, but have to validate before sending it to OS
# APIs that expect a nul-terminated `char*`.
#
# Note that both UNIX and Windows APIs will accept UTF-8, because on Windows the host calls
# `_setmbcp(_MB_CP_UTF8);` to set the process's Code Page to UTF-8 before doing anything else.
# See https://docs.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page#-a-vs--w-apis
# and https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/setmbcp?view=msvc-170
# for more details on the UTF-8 Code Page in Windows.
FromStr Str,
]
wrap : UnwrappedPath -> InternalPath
wrap = @InternalPath
unwrap : InternalPath -> UnwrappedPath
unwrap = \@InternalPath raw -> raw
## TODO do this in the host, and iterate over the Str
## bytes when possible instead of always converting to
## a heap-allocated List.
toBytes : InternalPath -> List U8
toBytes = \@InternalPath path ->
when path is
FromOperatingSystem bytes -> bytes
ArbitraryBytes bytes -> bytes
FromStr str -> Str.toUtf8 str
fromArbitraryBytes : List U8 -> InternalPath
fromArbitraryBytes = \bytes ->
@InternalPath (ArbitraryBytes bytes)
fromOsBytes : List U8 -> InternalPath
fromOsBytes = \bytes ->
@InternalPath (FromOperatingSystem bytes)