mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-12 06:46:19 +00:00
73 lines
3.1 KiB
Text
73 lines
3.1 KiB
Text
interface InternalPath
|
|
exposes [
|
|
UnwrappedPath,
|
|
InternalPath,
|
|
wrap,
|
|
unwrap,
|
|
toBytes,
|
|
fromArbitraryBytes,
|
|
fromOsBytes,
|
|
]
|
|
imports []
|
|
|
|
InternalPath := UnwrappedPath
|
|
|
|
UnwrappedPath : [
|
|
# We store these separately for two reasons:
|
|
# 1. If I'm calling an OS API, passing a path I got from the OS is definitely safe.
|
|
# However, passing a Path I got from a RocStr might be unsafe; it may contain \0
|
|
# characters, which would result in the operation happening on a totally different
|
|
# path. As such, we need to check for \0s and fail without calling the OS API if we
|
|
# find one in the path.
|
|
# 2. If I'm converting the Path to a Str, doing that conversion on a Path that was
|
|
# created from a RocStr needs no further processing. However, if it came from the OS,
|
|
# then we need to know what charset to assume it had, in order to decode it properly.
|
|
# These come from the OS (e.g. when reading a directory, calling `canonicalize`,
|
|
# or reading an environment variable - which, incidentally, are nul-terminated),
|
|
# so we know they are both nul-terminated and do not contain interior nuls.
|
|
# As such, they can be passed directly to OS APIs.
|
|
#
|
|
# Note that the nul terminator byte is right after the end of the length (into the
|
|
# unused capacity), so this can both be compared directly to other `List U8`s that
|
|
# aren't nul-terminated, while also being able to be passed directly to OS APIs.
|
|
FromOperatingSystem (List U8),
|
|
|
|
# These come from userspace (e.g. Path.fromBytes), so they need to be checked for interior
|
|
# nuls and then nul-terminated before the host can pass them to OS APIs.
|
|
ArbitraryBytes (List U8),
|
|
|
|
# This was created as a RocStr, so it might have interior nul bytes but it's definitely UTF-8.
|
|
# That means we can `toStr` it trivially, but have to validate before sending it to OS
|
|
# APIs that expect a nul-terminated `char*`.
|
|
#
|
|
# Note that both UNIX and Windows APIs will accept UTF-8, because on Windows the host calls
|
|
# `_setmbcp(_MB_CP_UTF8);` to set the process's Code Page to UTF-8 before doing anything else.
|
|
# See https://docs.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page#-a-vs--w-apis
|
|
# and https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/setmbcp?view=msvc-170
|
|
# for more details on the UTF-8 Code Page in Windows.
|
|
FromStr Str,
|
|
]
|
|
|
|
wrap : UnwrappedPath -> InternalPath
|
|
wrap = @InternalPath
|
|
|
|
unwrap : InternalPath -> UnwrappedPath
|
|
unwrap = \@InternalPath raw -> raw
|
|
|
|
## TODO do this in the host, and iterate over the Str
|
|
## bytes when possible instead of always converting to
|
|
## a heap-allocated List.
|
|
toBytes : InternalPath -> List U8
|
|
toBytes = \@InternalPath path ->
|
|
when path is
|
|
FromOperatingSystem bytes -> bytes
|
|
ArbitraryBytes bytes -> bytes
|
|
FromStr str -> Str.toUtf8 str
|
|
|
|
fromArbitraryBytes : List U8 -> InternalPath
|
|
fromArbitraryBytes = \bytes ->
|
|
@InternalPath (ArbitraryBytes bytes)
|
|
|
|
fromOsBytes : List U8 -> InternalPath
|
|
fromOsBytes = \bytes ->
|
|
@InternalPath (FromOperatingSystem bytes)
|