roc/examples/cli/cli-platform/Url.roc

431 lines
16 KiB
Text

interface Url
exposes [
Url,
append,
fromStr,
toStr,
appendParam,
hasQuery,
hasFragment,
query,
fragment,
reserve,
withQuery,
withFragment,
]
imports []
## A [Uniform Resource Locator](https://en.wikipedia.org/wiki/URL).
##
## It could be an absolute address, such as `https://roc-lang.org/authors` or
## a relative address, such as `/authors`. You can create one using [Url.fromStr].
Url := Str
## Reserve the given number of bytes as extra capacity. This can avoid reallocation
## when calling multiple functions that increase the length of the URL.
##
## Url.fromStr "https://example.com"
## |> Url.reserve 50 # We're about to add 50 UTF-8 bytes to it
## |> Url.append "stuff"
## |> Url.appendParam "café" "du Monde"
## |> Url.appendParam "email" "hi@example.com"
## # https://example.com/stuff?caf%C3%A9=du%20Monde&email=hi%40example.com
##
## The [Str.countUtf8Bytes] function can be helpful in finding out how many bytes to reserve.
##
## There is no `Url.withCapacity` because it's better to reserve extra capacity
## on a [Str] first, and then pass that string to [Url.fromStr]. This function will make use
## of the extra capacity.
reserve : Url, Nat -> Url
reserve = \@Url str, cap ->
@Url (Str.reserve str cap)
## Create a [Url] without validating or [percent-encoding](https://en.wikipedia.org/wiki/Percent-encoding)
## anything.
##
## Url.fromStr "https://example.com#stuff"
## # https://example.com#stuff
##
## URLs can be absolute, like `https://example.com`, or they can be relative, like `/blah`.
##
## Url.fromStr "/this/is#relative"
## # /this/is#relative
##
## Since nothing is validated, this can return invalid URLs.
##
## Url.fromStr "https://this is not a valid URL, not at all!"
## # https://this is not a valid URL, not at all!
##
## Naturally, passing invalid URLs to functions that need valid ones will tend to result in errors.
fromStr : Str -> Url
fromStr = \str -> @Url str
## Return a [Str] representation of this URL.
##
## Url.fromStr "https://example.com"
## |> Url.append "two words"
## |> Url.toStr
## # "https://example.com/two%20words"
toStr : Url -> Str
toStr = \@Url str -> str
## [Percent-encodes](https://en.wikipedia.org/wiki/Percent-encoding) a
## [path component](https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#Syntax)
## and appends to the end of the URL's path.
##
## Url.fromStr "https://example.com"
## |> Url.append "some stuff"
## # https://example.com/some%20stuff
##
## This will be appended before any queries and fragments.
##
## Url.fromStr "https://example.com?search=blah#fragment"
## |> Url.append "stuff"
## # https://example.com/stuff?search=blah#fragment
##
## If the given path string begins with `"/"` and the URL already ends with `"/"`, one
## will be ignored. This avoids turning a single slash into a double slash.
##
## Url.fromStr "https://example.com/things/"
## |> Url.append "/stuff/"
## |> Url.append "/more/etc/"
## # https://example.com/things/stuff/more/etc/"
##
## If either the given URL or the given string is empty, no `"/"` will be added.
##
## Url.fromStr "https://example.com/things"
## |> Url.append ""
## # https://example.com/things
append : Url, Str -> Url
append = \@Url urlStr, suffixUnencoded ->
suffix = percentEncode suffixUnencoded
when Str.splitFirst urlStr "?" is
Ok { before, after } ->
bytes =
Str.countUtf8Bytes before
+ 1 # for "/"
+ Str.countUtf8Bytes suffix
+ 1 # for "?"
+ Str.countUtf8Bytes after
before
|> Str.reserve bytes
|> appendHelp suffix
|> Str.concat "?"
|> Str.concat after
|> @Url
Err NotFound ->
# There wasn't a query, but there might still be a fragment
when Str.splitFirst urlStr "#" is
Ok { before, after } ->
bytes =
Str.countUtf8Bytes before
+ 1 # for "/"
+ Str.countUtf8Bytes suffix
+ 1 # for "#"
+ Str.countUtf8Bytes after
before
|> Str.reserve bytes
|> appendHelp suffix
|> Str.concat "#"
|> Str.concat after
|> @Url
Err NotFound ->
# No query and no fragment, so just append it
@Url (appendHelp urlStr suffix)
## Internal helper
appendHelp : Str, Str -> Str
appendHelp = \prefix, suffix ->
if Str.endsWith prefix "/" then
if Str.startsWith suffix "/" then
# Avoid a double-slash by appending only the part of the suffix after the "/"
when Str.splitFirst suffix "/" is
Ok { after } ->
# TODO `expect before == ""`
Str.concat prefix after
Err NotFound ->
# This should never happen, because we already verified
# that the suffix startsWith "/"
# TODO `expect Bool.false` here with a comment
Str.concat prefix suffix
else
# prefix ends with "/" but suffix doesn't start with one, so just append.
Str.concat prefix suffix
else if Str.startsWith suffix "/" then
# Suffix starts with "/" but prefix doesn't end with one, so just append them.
Str.concat prefix suffix
else if Str.isEmpty prefix then
# Prefix is empty; return suffix.
suffix
else if Str.isEmpty suffix then
# Suffix is empty; return prefix.
prefix
else
# Neither is empty, but neither has a "/", so add one in between.
prefix
|> Str.concat "/"
|> Str.concat suffix
## Internal helper. This is intentionally unexposed so that you don't accidentally
## double-encode things. If you really want to percent-encode an arbitrary string,
## you can always do:
##
## Url.fromStr ""
## |> Url.append myStrToEncode
## |> Url.toStr
##
## Note that it's not necessary to situationally encode spaces as `+` instead of `%20` -
## it's apparently always safe to use `%20` (but not always safe to use `+`):
## https://stackoverflow.com/questions/2678551/when-should-space-be-encoded-to-plus-or-20/47188851#47188851
percentEncode : Str -> Str
percentEncode = \input ->
# Optimistically assume we won't need any percent encoding, and can have
# the same capacity as the input string. If we're wrong, it will get doubled.
initialOutput = strWithCapacity (Str.countUtf8Bytes input)
# TODO use Str.walkUtf8 once it exists
Str.walkUtf8WithIndex input initialOutput \output, byte, _index ->
# Spec for percent-encoding: https://www.ietf.org/rfc/rfc3986.txt
if
(byte >= 97 && byte <= 122) # lowercase ASCII
|| (byte >= 65 && byte <= 90) # uppercase ASCII
|| (byte >= 48 && byte <= 57) # digit
then
# This is the most common case: an unreserved character,
# which needs no encoding in a path
Str.appendScalar output (Num.toU32 byte)
|> Result.withDefault "" # this will never fail
else
when byte is
46 # '.'
| 95 # '_'
| 126 # '~'
| 150 -> # '-'
# These special characters can all be unescaped in paths
Str.appendScalar output (Num.toU32 byte)
|> Result.withDefault "" # this will never fail
_ ->
# This needs encoding in a path
suffix =
Str.toUtf8 percentEncoded
|> List.sublist { len: 3, start: 3 * Num.toNat byte }
|> Str.fromUtf8
|> Result.withDefault "" # This will never fail
Str.concat output suffix
## Adds a [Str] query parameter to the end of the [Url]. The key
## and value both get [percent-encoded](https://en.wikipedia.org/wiki/Percent-encoding).
##
## Url.fromStr "https://example.com"
## |> Url.appendParam "email" "someone@example.com"
## # https://example.com?email=someone%40example.com
##
## This can be called multiple times on the same URL.
##
## Url.fromStr "https://example.com"
## |> Url.appendParam "café" "du Monde"
## |> Url.appendParam "email" "hi@example.com"
## # https://example.com?caf%C3%A9=du%20Monde&email=hi%40example.com
appendParam : Url, Str, Str -> Url
appendParam = \@Url urlStr, key, value ->
{ withoutFragment, afterQuery } =
when Str.splitLast urlStr "#" is
Ok { before, after } ->
# The fragment is almost certainly going to be a small string,
# so this interpolation should happen on the stack.
{ withoutFragment: before, afterQuery: "#\(after)" }
Err NotFound ->
{ withoutFragment: urlStr, afterQuery: "" }
encodedKey = percentEncode key
encodedValue = percentEncode value
bytes =
Str.countUtf8Bytes withoutFragment
+ 1 # for "?" or "&"
+ Str.countUtf8Bytes encodedKey
+ 1 # for "="
+ Str.countUtf8Bytes encodedValue
+ Str.countUtf8Bytes afterQuery
withoutFragment
|> Str.reserve bytes
|> Str.concat (if hasQuery (@Url withoutFragment) then "&" else "?")
|> Str.concat encodedKey
|> Str.concat "="
|> Str.concat encodedValue
|> Str.concat afterQuery
|> @Url
## Replaces the URL's [query](https://en.wikipedia.org/wiki/URL#Syntax)—the part after
## the `?`, if it has one, but before any `#` it might have.
##
## Url.fromStr "https://example.com?key1=val1&key2=val2#stuff"
## |> Url.withQuery "newQuery=thisRightHere"
## # https://example.com?newQuery=thisRightHere#stuff
##
## Passing `""` removes the `?` (if there was one).
##
## Url.fromStr "https://example.com?key1=val1&key2=val2#stuff"
## |> Url.withQuery ""
## # https://example.com#stuff
withQuery : Url, Str -> Url
withQuery = \@Url urlStr, queryStr ->
{ withoutFragment, afterQuery } =
when Str.splitLast urlStr "#" is
Ok { before, after } ->
# The fragment is almost certainly going to be a small string,
# so this interpolation should happen on the stack.
{ withoutFragment: before, afterQuery: "#\(after)" }
Err NotFound ->
{ withoutFragment: urlStr, afterQuery: "" }
beforeQuery =
when Str.splitLast withoutFragment "?" is
Ok { before } -> before
Err NotFound -> withoutFragment
if Str.isEmpty queryStr then
@Url (Str.concat beforeQuery afterQuery)
else
bytes =
Str.countUtf8Bytes beforeQuery
+ 1 # for "?"
+ Str.countUtf8Bytes queryStr
+ Str.countUtf8Bytes afterQuery
beforeQuery
|> Str.reserve bytes
|> Str.concat "?"
|> Str.concat queryStr
|> Str.concat afterQuery
|> @Url
## Returns the URL's [query](https://en.wikipedia.org/wiki/URL#Syntax)—the part after
## the `?`, if it has one, but before any `#` it might have.
##
## Url.fromStr "https://example.com?key1=val1&key2=val2&key3=val3#stuff"
## |> Url.query
## # "key1=val1&key2=val2&key3=val3"
##
## Returns `""` if the URL has no query.
##
## Url.fromStr "https://example.com#stuff"
## |> Url.query
## # ""
query : Url -> Str
query = \@Url urlStr ->
withoutFragment =
when Str.splitLast urlStr "#" is
Ok { before } -> before
Err NotFound -> urlStr
when Str.splitLast withoutFragment "?" is
Ok { after } -> after
Err NotFound -> ""
## Returns `Bool.true` if the URL has a `?` in it.
##
## Url.fromStr "https://example.com?key=value#stuff"
## |> Url.hasQuery
## # Bool.true
##
## Url.fromStr "https://example.com#stuff"
## |> Url.hasQuery
## # Bool.false
hasQuery : Url -> Bool
hasQuery = \@Url urlStr ->
# TODO use Str.contains once it exists. It should have a "fast path"
# with SIMD iteration if the string is small enough to fit in a SIMD register.
Str.toUtf8 urlStr
|> List.contains (Num.toU8 '?')
## Returns the URL's [fragment](https://en.wikipedia.org/wiki/URL#Syntax)—the part after
## the `#`, if it has one.
##
## Url.fromStr "https://example.com#stuff"
## |> Url.fragment
## # "stuff"
##
## Returns `""` if the URL has no fragment.
##
## Url.fromStr "https://example.com"
## |> Url.fragment
## # ""
fragment : Url -> Str
fragment = \@Url urlStr ->
when Str.splitLast urlStr "#" is
Ok { after } -> after
Err NotFound -> ""
## Replaces the URL's [fragment](https://en.wikipedia.org/wiki/URL#Syntax).
##
## Url.fromStr "https://example.com#stuff"
## |> Url.withFragment "things"
## # https://example.com#things
##
## If the URL didn't have a fragment, adds one.
##
## Url.fromStr "https://example.com"
## |> Url.withFragment "things"
## # https://example.com#things
##
## Passing `""` removes the fragment.
##
## Url.fromStr "https://example.com#stuff"
## |> Url.withFragment ""
## # https://example.com
withFragment : Url, Str -> Url
withFragment = \@Url urlStr, fragmentStr ->
when Str.splitLast urlStr "#" is
Ok { before } ->
if Str.isEmpty fragmentStr then
# If the given fragment is empty, remove the URL's fragment
@Url before
else
# Replace the URL's old fragment with this one, discarding `after`
@Url "\(before)#\(fragmentStr)"
Err NotFound ->
if Str.isEmpty fragmentStr then
# If the given fragment is empty, leave the URL as having no fragment
@Url urlStr
else
# The URL didn't have a fragment, so give it this one
@Url "\(urlStr)#\(fragmentStr)"
## Returns `Bool.true` if the URL has a `#` in it.
##
## Url.fromStr "https://example.com?key=value#stuff"
## |> Url.hasFragment
## # Bool.true
##
## Url.fromStr "https://example.com?key=value"
## |> Url.hasFragment
## # Bool.false
hasFragment : Url -> Bool
hasFragment = \@Url urlStr ->
# TODO use Str.contains once it exists. It should have a "fast path"
# with SIMD iteration if the string is small enough to fit in a SIMD register.
Str.toUtf8 urlStr
|> List.contains (Num.toU8 '#')
strWithCapacity : Nat -> Str
strWithCapacity = \cap ->
Str.reserve "" cap
# Adapted from the percent-encoding crate, © The rust-url developers, Apache2-licensed
#
# https://github.com/servo/rust-url/blob/e12d76a61add5bc09980599c738099feaacd1d0d/percent_encoding/src/lib.rs#L183
percentEncoded : Str
percentEncoded = "%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20%21%22%23%24%25%26%27%28%29%2A%2B%2C%2D%2E%2F%30%31%32%33%34%35%36%37%38%39%3A%3B%3C%3D%3E%3F%40%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F%50%51%52%53%54%55%56%57%58%59%5A%5B%5C%5D%5E%5F%60%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F%70%71%72%73%74%75%76%77%78%79%7A%7B%7C%7D%7E%7F%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF"