optimise encodeString for when no escapes, add more tests various

This commit is contained in:
Luke Boswell 2023-04-23 13:24:41 +10:00
parent eef3adcbbb
commit 2ff55b9704
No known key found for this signature in database
GPG key ID: F6DB3C9DB47377B0

View file

@ -140,6 +140,10 @@ FieldNameMapping : [
Custom (Str -> Str), # provide a custom formatting
]
# TODO encode as JSON numbers as base 10 decimal digits
# e.g. the REPL `Num.toStr 12e42f64` gives
# "12000000000000000000000000000000000000000000" : Str
# which should be encoded as "12e42" : Str
numToBytes = \n ->
n |> Num.toStr |> Str.toUtf8
@ -202,27 +206,68 @@ encodeBool = \b ->
else
List.concat bytes (Str.toUtf8 "false")
# Test encode boolean
expect
input = [Bool.true, Bool.false]
actual = Encode.toBytes input json
expected = Str.toUtf8 "[true,false]"
actual == expected
encodeString = \str ->
Encode.custom \bytes, @Json {} ->
bytes
|> List.concat ['"']
|> List.concat (encodeJsonEscapes str)
|> List.concat ['"']
List.concat bytes (encodeStrBytes str)
encodeJsonEscapes : Str -> List U8
encodeJsonEscapes = \str ->
# TODO add support for unicode escapes (including 2,3,4 byte code points)
# these should be encoded using a 12-byte sequence encoding the UTF-16 surrogate
# pair. For example a string containing only G clef character U+1D11E is
# represented as "\\uD834\\uDD1E" (note "\\" here is a single reverse solidus)
encodeStrBytes = \str ->
bytes = Str.toUtf8 str
# Reserve capacity for escaped bytes to reduce allocations
initial =
bytes
|> List.len
|> Num.mul 120
|> Num.divCeil 100
|> List.withCapacity
initialState = { bytePos: 0, status: NoEscapesFound }
List.walk bytes initial \encodedBytes, byte ->
List.concat encodedBytes (escapedByteToJson byte)
firstPassState =
List.walkUntil bytes initialState \{ bytePos, status }, b ->
when b is
0x22 -> Break { bytePos, status: FoundEscape } # U+0022 Quotation mark
0x5c -> Break { bytePos, status: FoundEscape } # U+005c Reverse solidus
0x2f -> Break { bytePos, status: FoundEscape } # U+002f Solidus
0x08 -> Break { bytePos, status: FoundEscape } # U+0008 Backspace
0x0c -> Break { bytePos, status: FoundEscape } # U+000c Form feed
0x0a -> Break { bytePos, status: FoundEscape } # U+000a Line feed
0x0d -> Break { bytePos, status: FoundEscape } # U+000d Carriage return
0x09 -> Break { bytePos, status: FoundEscape } # U+0009 Tab
_ -> Continue { bytePos: bytePos + 1, status }
when firstPassState.status is
NoEscapesFound ->
(List.len bytes)
+ 2
|> List.withCapacity
|> List.concat ['"']
|> List.concat bytes
|> List.concat ['"']
FoundEscape ->
{ before: bytesBeforeEscape, others: bytesWithEscapes } =
List.split bytes firstPassState.bytePos
# Reserve List with 120% capacity for escaped bytes to reduce
# allocations, include starting quote, and bytes up to first escape
initial =
List.len bytes
|> Num.mul 120
|> Num.divCeil 100
|> List.withCapacity
|> List.concat ['"']
|> List.concat bytesBeforeEscape
# Walk the remaining bytes and include escape '\' as required
# add closing quote
List.walk bytesWithEscapes initial \encodedBytes, byte ->
List.concat encodedBytes (escapedByteToJson byte)
|> List.concat ['"']
# Prepend an "\" escape byte
escapedByteToJson : U8 -> List U8
@ -242,19 +287,34 @@ expect escapedByteToJson '\n' == ['\\', 'n']
expect escapedByteToJson '\\' == ['\\', '\\']
expect escapedByteToJson '"' == ['\\', '"']
# Test json string encoding with escapes
# e.g. "\r" encodes to "\\r" or "\\u000D" as Carriage Return is U+000D
# Test encode small string
expect
input = "a\r\nbc\\\"xz"
input = "G'day"
actual = Encode.toBytes input json
expected = Str.toUtf8 "\"a\\r\\nbc\\\\\\\"xz\""
expected = Str.toUtf8 "\"G'day\""
actual == expected
# Test encode large string
expect
input = "the quick brown fox jumps over the lazy dog"
actual = Encode.toBytes input json
expected = Str.toUtf8 "\"the quick brown fox jumps over the lazy dog\""
actual == expected
# Test encode with escapes e.g. "\r" encodes to "\\r"
expect
input = "the quick brown fox jumps over the lazy doga\r\nbc\\\"xz"
actual = Encode.toBytes input json
expected = Str.toUtf8 "\"the quick brown fox jumps over the lazy doga\\r\\nbc\\\\\\\"xz\""
actual == expected
encodeList = \lst, encodeElem ->
Encode.custom \bytes, @Json {} ->
Encode.custom \bytes, @Json { fieldNameMapping } ->
writeList = \{ buffer, elemsLeft }, elem ->
bufferWithElem = appendWith buffer (encodeElem elem) json
bufferWithElem = appendWith buffer (encodeElem elem) (@Json { fieldNameMapping })
bufferWithSuffix =
if elemsLeft > 1 then
List.append bufferWithElem (Num.toU8 ',')
@ -268,6 +328,15 @@ encodeList = \lst, encodeElem ->
List.append withList (Num.toU8 ']')
# Test encode list of floats
expect
input : List F64
input = [-1, 0.00001, 1e12, 2.0e-2, 0.0003, 43]
actual = Encode.toBytes input json
expected = Str.toUtf8 "[-1,0.00001,1000000000000,0.02,0.0003,43]"
actual == expected
encodeRecord = \fields ->
Encode.custom \bytes, @Json { fieldNameMapping } ->
writeRecord = \{ buffer, fieldsLeft }, { key, value } ->
@ -336,10 +405,10 @@ toYellingCase = \str ->
|> Str.joinWith ""
encodeTuple = \elems ->
Encode.custom \bytes, @Json {} ->
Encode.custom \bytes, @Json { fieldNameMapping } ->
writeTuple = \{ buffer, elemsLeft }, elemEncoder ->
bufferWithElem =
appendWith buffer elemEncoder json
appendWith buffer elemEncoder (@Json { fieldNameMapping })
bufferWithSuffix =
if elemsLeft > 1 then
@ -354,11 +423,19 @@ encodeTuple = \elems ->
List.append bytesWithRecord (Num.toU8 ']')
# Test encode of tuple
expect
input = ("The Answer is", 42)
actual = Encode.toBytes input json
expected = Str.toUtf8 "[\"The Answer is\",42]"
actual == expected
encodeTag = \name, payload ->
Encode.custom \bytes, @Json {} ->
Encode.custom \bytes, @Json { fieldNameMapping } ->
# Idea: encode `A v1 v2` as `{"A": [v1, v2]}`
writePayload = \{ buffer, itemsLeft }, encoder ->
bufferWithValue = appendWith buffer encoder json
bufferWithValue = appendWith buffer encoder (@Json { fieldNameMapping })
bufferWithSuffix =
if itemsLeft > 1 then
List.append bufferWithValue (Num.toU8 ',')
@ -380,6 +457,15 @@ encodeTag = \name, payload ->
List.append bytesWithPayload (Num.toU8 ']')
|> List.append (Num.toU8 '}')
# Test encode of tag
expect
input = TheAnswer "is" 42
encoder = jsonWithOptions { fieldNameMapping: KebabCase }
actual = Encode.toBytes input encoder
expected = Str.toUtf8 "{\"TheAnswer\":[\"is\",42]}"
actual == expected
decodeU8 = Decode.custom \bytes, @Json {} ->
{ taken, rest } = takeJsonNumber bytes
@ -391,6 +477,11 @@ decodeU8 = Decode.custom \bytes, @Json {} ->
{ result, rest }
# Test decode of U8
expect
actual = Str.toUtf8 "255" |> Decode.fromBytes json
actual == Ok 255u8
decodeU16 = Decode.custom \bytes, @Json {} ->
{ taken, rest } = takeJsonNumber bytes
@ -402,6 +493,11 @@ decodeU16 = Decode.custom \bytes, @Json {} ->
{ result, rest }
# Test decode of U16
expect
actual = Str.toUtf8 "65535" |> Decode.fromBytes json
actual == Ok 65_535u16
decodeU32 = Decode.custom \bytes, @Json {} ->
{ taken, rest } = takeJsonNumber bytes
@ -413,6 +509,11 @@ decodeU32 = Decode.custom \bytes, @Json {} ->
{ result, rest }
# Test decode of U32
expect
actual = Str.toUtf8 "4000000000" |> Decode.fromBytes json
actual == Ok 4_000_000_000u32
decodeU64 = Decode.custom \bytes, @Json {} ->
{ taken, rest } = takeJsonNumber bytes
@ -424,6 +525,11 @@ decodeU64 = Decode.custom \bytes, @Json {} ->
{ result, rest }
# Test decode of U64
expect
actual = Str.toUtf8 "18446744073709551614" |> Decode.fromBytes json
actual == Ok 18_446_744_073_709_551_614u64
decodeU128 = Decode.custom \bytes, @Json {} ->
{ taken, rest } = takeJsonNumber bytes
@ -435,6 +541,17 @@ decodeU128 = Decode.custom \bytes, @Json {} ->
{ result, rest }
# Test decode of U128
expect
actual = Str.toUtf8 "1234567" |> Decode.fromBytesPartial json
actual.result == Ok 1234567u128
# TODO should we support decoding bigints, note that valid json is only a
# double precision float-64
# expect
# actual = Str.toUtf8 "340282366920938463463374607431768211455" |> Decode.fromBytesPartial json
# actual.result == Ok 340_282_366_920_938_463_463_374_607_431_768_211_455u128
decodeI8 = Decode.custom \bytes, @Json {} ->
{ taken, rest } = takeJsonNumber bytes
@ -446,6 +563,11 @@ decodeI8 = Decode.custom \bytes, @Json {} ->
{ result, rest }
# Test decode of I8
expect
actual = Str.toUtf8 "-125" |> Decode.fromBytesPartial json
actual.result == Ok -125i8
decodeI16 = Decode.custom \bytes, @Json {} ->
{ taken, rest } = takeJsonNumber bytes
@ -457,6 +579,11 @@ decodeI16 = Decode.custom \bytes, @Json {} ->
{ result, rest }
# Test decode of I16
expect
actual = Str.toUtf8 "-32768" |> Decode.fromBytesPartial json
actual.result == Ok -32_768i16
decodeI32 = Decode.custom \bytes, @Json {} ->
{ taken, rest } = takeJsonNumber bytes
@ -468,6 +595,11 @@ decodeI32 = Decode.custom \bytes, @Json {} ->
{ result, rest }
# Test decode of I32
expect
actual = Str.toUtf8 "-2147483648" |> Decode.fromBytesPartial json
actual.result == Ok -2_147_483_648i32
decodeI64 = Decode.custom \bytes, @Json {} ->
{ taken, rest } = takeJsonNumber bytes
@ -479,6 +611,11 @@ decodeI64 = Decode.custom \bytes, @Json {} ->
{ result, rest }
# Test decode of I64
expect
actual = Str.toUtf8 "-9223372036854775808" |> Decode.fromBytesPartial json
actual.result == Ok -9_223_372_036_854_775_808i64
decodeI128 = Decode.custom \bytes, @Json {} ->
{ taken, rest } = takeJsonNumber bytes
@ -490,6 +627,11 @@ decodeI128 = Decode.custom \bytes, @Json {} ->
{ result, rest }
# Test decode of I128
# expect
# actual = Str.toUtf8 "-170141183460469231731687303715884105728" |> Decode.fromBytesPartial json
# actual.result == Ok -170_141_183_460_469_231_731_687_303_715_884_105_728i128
decodeF32 = Decode.custom \bytes, @Json {} ->
{ taken, rest } = takeJsonNumber bytes
@ -501,6 +643,14 @@ decodeF32 = Decode.custom \bytes, @Json {} ->
{ result, rest }
# Test decode of F32
expect
actual : DecodeResult F32
actual = Str.toUtf8 "12.34e-5" |> Decode.fromBytesPartial json
numStr = actual.result |> Result.map Num.toStr
Result.withDefault numStr "" == "0.00012339999375399202"
decodeF64 = Decode.custom \bytes, @Json {} ->
{ taken, rest } = takeJsonNumber bytes
@ -512,6 +662,14 @@ decodeF64 = Decode.custom \bytes, @Json {} ->
{ result, rest }
# Test decode of F64
expect
actual : DecodeResult F64
actual = Str.toUtf8 "12.34e-5" |> Decode.fromBytesPartial json
numStr = actual.result |> Result.map Num.toStr
Result.withDefault numStr "" == "0.0001234"
decodeDec = Decode.custom \bytes, @Json {} ->
{ taken, rest } = takeJsonNumber bytes
@ -523,6 +681,13 @@ decodeDec = Decode.custom \bytes, @Json {} ->
{ result, rest }
# Test decode of Dec
expect
actual : DecodeResult Dec
actual = Str.toUtf8 "12.0034" |> Decode.fromBytesPartial json
actual.result == Ok 12.0034dec
decodeBool = Decode.custom \bytes, @Json {} ->
when bytes is
['f', 'a', 'l', 's', 'e', ..] -> { result: Ok Bool.false, rest: List.drop bytes 5 }
@ -571,6 +736,13 @@ decodeTuple = \initialState, stepElem, finalizer -> Decode.custom \initialBytes,
Ok val -> { result: Ok val, rest: afterTupleBytes }
Err e -> { result: Err e, rest: afterTupleBytes }
# Test decode of tuple
expect
input = Str.toUtf8 "[\"The Answer is\",42]"
actual = Decode.fromBytesPartial input json
actual.result == Ok ("The Answer is", 42)
parseExactChar : List U8, U8 -> DecodeResult {}
parseExactChar = \bytes, char ->
when List.get bytes 0 is
@ -669,6 +841,8 @@ NumberState : [
Finish Nat,
]
# TODO confirm if we would like to be able to decode
# "340282366920938463463374607431768211455" which is MAX U128 and 39 bytes
maxBytes : Nat
maxBytes = 21 # Max bytes in a double precision float
@ -811,25 +985,26 @@ expect
# Note that decodeStr does not handle leading whitespace, any whitespace must be
# handled in json list or record decodin.
decodeString = Decode.custom \bytes, @Json {} ->
when bytes is
when bytes is
['n', 'u', 'l', 'l', ..] ->
{ result: Ok "null", rest: List.drop bytes 4 }
_ ->
{ taken: strBytes, rest } = takeJsonString bytes
if List.isEmpty strBytes then
{ result: Err TooShort, rest: bytes }
else
# Remove starting and ending quotation marks, replace unicode
# escpapes with Roc equivalent, and try to parse RocStr from
# bytes
# Remove starting and ending quotation marks, replace unicode
# escpapes with Roc equivalent, and try to parse RocStr from
# bytes
result =
strBytes
|> List.sublist {
start: 1,
len: Num.subSaturated (List.len strBytes) 2
}
|> \bytesWithoutQuotationMarks ->
start: 1,
len: Num.subSaturated (List.len strBytes) 2,
}
|> \bytesWithoutQuotationMarks ->
replaceEscapedChars { inBytes: bytesWithoutQuotationMarks, outBytes: [] }
|> .outBytes
|> Str.fromUtf8