From d5df5fc58deef88d49c1758ee3eff4034ae1ac33 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 27 Mar 2023 21:52:18 +1100 Subject: [PATCH] WIP updating record decoding --- crates/compiler/builtins/roc/Json.roc | 186 +++++++++++++++++++++----- 1 file changed, 153 insertions(+), 33 deletions(-) diff --git a/crates/compiler/builtins/roc/Json.roc b/crates/compiler/builtins/roc/Json.roc index 31010e1b51..8a7c524044 100644 --- a/crates/compiler/builtins/roc/Json.roc +++ b/crates/compiler/builtins/roc/Json.roc @@ -459,11 +459,11 @@ parseExactChar = \bytes, char -> Err _ -> { result: Err TooShort, rest: bytes } -openBrace : List U8 -> DecodeResult {} -openBrace = \bytes -> parseExactChar bytes '{' +# openBrace : List U8 -> DecodeResult {} +# openBrace = \bytes -> parseExactChar bytes '{' -closingBrace : List U8 -> DecodeResult {} -closingBrace = \bytes -> parseExactChar bytes '}' +# closingBrace : List U8 -> DecodeResult {} +# closingBrace = \bytes -> parseExactChar bytes '}' openBracket : List U8 -> DecodeResult {} openBracket = \bytes -> parseExactChar bytes '[' @@ -474,8 +474,8 @@ closingBracket = \bytes -> parseExactChar bytes ']' anything : List U8 -> DecodeResult {} anything = \bytes -> { result: Err TooShort, rest: bytes } -colon : List U8 -> DecodeResult {} -colon = \bytes -> parseExactChar bytes ':' +# colon : List U8 -> DecodeResult {} +# colon = \bytes -> parseExactChar bytes ':' comma : List U8 -> DecodeResult {} comma = \bytes -> parseExactChar bytes ',' @@ -588,8 +588,10 @@ expect expected = { result: Ok 2u64, rest: [']'] } actual == expected +# TODO why is this trying to decode as a Record? +# If you use Decode.fromBytesPartial it fails expect - actual = "30,\n" |> Str.toUtf8 |> Decode.fromBytesPartial fromUtf8 + actual = "30,\n" |> Str.toUtf8 |> Decode.decodeWith decodeI64 fromUtf8 expected = { result: Ok 30i64, rest: [',', '\n'] } actual == expected @@ -745,7 +747,7 @@ stringHelp = \state, byte -> (UnicodeB n, b) if isHex b -> Continue (UnicodeC (n + 1)) (UnicodeC n, b) if isHex b -> Continue (UnicodeD (n + 1)) (UnicodeD n, b) if isHex b -> Continue (Chars (n + 1)) - _ -> Break (Invalid) + _ -> Break (InvalidNumber) StringState : [ Start, @@ -756,7 +758,7 @@ StringState : [ UnicodeC Nat, UnicodeD Nat, Finish Nat, - Invalid, + InvalidNumber, ] isEscapedChar : U8 -> Bool @@ -1075,43 +1077,161 @@ expect # JSON OBJECTS ----------------------------------------------------------------- +# decodeRecord = \initialState, stepField, finalizer -> Decode.custom \bytes, @Json {} -> +# # NB: the stepper function must be passed explicitly until #2894 is resolved. +# decodeFields = \stepper, state, kvBytes -> +# { val: key, rest } <- (Decode.decodeWith kvBytes decodeString (@Json {})) |> tryDecode +# { rest: afterColonBytes } <- colon rest |> tryDecode +# { val: newState, rest: beforeCommaOrBreak } <- tryDecode +# ( +# when stepper state key is +# Skip -> +# { rest: beforeCommaOrBreak } <- afterColonBytes |> anything |> tryDecode +# { result: Ok state, rest: beforeCommaOrBreak } + +# Keep decoder -> +# Decode.decodeWith afterColonBytes decoder (@Json {}) +# ) + +# { result: commaResult, rest: nextBytes } = comma beforeCommaOrBreak + +# when commaResult is +# Ok {} -> decodeFields stepField newState nextBytes +# Err _ -> { result: Ok newState, rest: nextBytes } + +# { rest: afterBraceBytes } <- bytes |> openBrace |> tryDecode + +# { val: endStateResult, rest: beforeClosingBraceBytes } <- decodeFields stepField initialState afterBraceBytes |> tryDecode + +# { rest: afterRecordBytes } <- beforeClosingBraceBytes |> closingBrace |> tryDecode + + # when finalizer endStateResult is + # Ok val -> { result: Ok val, rest: afterRecordBytes } + # Err e -> { result: Err e, rest: afterRecordBytes } + decodeRecord = \initialState, stepField, finalizer -> Decode.custom \bytes, @Json {} -> - # NB: the stepper function must be passed explicitly until #2894 is resolved. - decodeFields = \stepper, state, kvBytes -> - { val: key, rest } <- (Decode.decodeWith kvBytes decodeString (@Json {})) |> tryDecode - { rest: afterColonBytes } <- colon rest |> tryDecode - { val: newState, rest: beforeCommaOrBreak } <- tryDecode + + # Recursively build up record from object field:value pairs + decodeFields = \recordState, bytesBeforeField -> + + # Decode the json string field name + {result: fieldNameResult, rest: bytesAfterField} = + Decode.decodeWith bytesBeforeField decodeString fromUtf8 + + # Count the bytes until the field value + countBytesBeforeValue = when List.walkUntil bytesAfterField (BeforeColon 0) objectHelp is + ObjectValueStart n -> n + _ -> 0 + + valueBytes = List.drop bytesAfterField countBytesBeforeValue + + when fieldNameResult is + Err TooShort -> + # Invalid object, unable to decode field name or find colon ':' + # after field and before the value + {result: Err TooShort, rest: bytes} + Ok fieldName -> + # Decode the json value + {val: updatedRecord, rest: bytesAfterValue } <- ( - when stepper state key is + # Retrieve value decoder for the current field + when stepField recordState fieldName is Skip -> - { rest: beforeCommaOrBreak } <- afterColonBytes |> anything |> tryDecode - { result: Ok state, rest: beforeCommaOrBreak } + # TODO This doesn't seem right, shouldn't we eat + # the remaining value bytes if we are skipping this + # field? + # + # Should rest be bytesAfterNextValue or similar? + {result: Ok recordState, rest: valueBytes} - Keep decoder -> - Decode.decodeWith afterColonBytes decoder (@Json {}) - ) + Keep valueDecoder -> + # Decode the value using the decoder from the recordState + Decode.decodeWith valueBytes valueDecoder fromUtf8 + ) + |> tryDecode - { result: commaResult, rest: nextBytes } = comma beforeCommaOrBreak + # Check if another field or '}' for end of object + when List.walkUntil bytesAfterValue (AfterObjectValue 0) objectHelp is + ObjectFieldNameStart n -> + rest = List.drop bytesAfterValue n - when commaResult is - Ok {} -> decodeFields stepField newState nextBytes - Err _ -> { result: Ok newState, rest: nextBytes } + # Decode the next field and value + decodeFields updatedRecord rest - { rest: afterBraceBytes } <- bytes |> openBrace |> tryDecode + AfterClosingBrace n -> + rest = List.drop bytesAfterValue n - { val: endStateResult, rest: beforeClosingBraceBytes } <- decodeFields stepField initialState afterBraceBytes |> tryDecode + # Build final record from decoded fields and values + when finalizer updatedRecord is + Ok val -> { result: Ok val, rest } + Err e -> { result: Err e, rest } - { rest: afterRecordBytes } <- beforeClosingBraceBytes |> closingBrace |> tryDecode + _ -> + # Invalid object + {result : Err TooShort, rest: bytesAfterValue} + + + countBytesBeforeFirstField = + when List.walkUntil bytes (BeforeOpeningBrace 0) objectHelp is + ObjectFieldNameStart n -> n + _ -> 0 - when finalizer endStateResult is - Ok val -> { result: Ok val, rest: afterRecordBytes } - Err e -> { result: Err e, rest: afterRecordBytes } + if countBytesBeforeFirstField == 0 then + # Invalid object, expected opening brace '{' followed by a field + {result: Err TooShort, rest: bytes} + else + bytesBeforeFirstField = (List.drop bytes countBytesBeforeFirstField) -# # Test decode of simple Json Object into Roc Record + # Begin decoding field:value pairs + decodeFields initialState bytesBeforeFirstField + +objectHelp : ObjectState, U8 -> [Break ObjectState, Continue ObjectState] +objectHelp = \state, byte -> + when (state, byte) is + (BeforeOpeningBrace n, b) if isWhitespace b -> Continue (BeforeOpeningBrace (n+1)) + (BeforeOpeningBrace n, b) if b == '{' -> Continue (AfterOpeningBrace (n+1)) + (AfterOpeningBrace n, b) if isWhitespace b -> Continue (AfterOpeningBrace (n+1)) + (AfterOpeningBrace n, b) if b == '"' -> Break (ObjectFieldNameStart n) # field names must be a json string + (BeforeColon n, b) if isWhitespace b -> Continue (BeforeColon (n+1)) + (BeforeColon n, b) if b == ':' -> Continue (AfterColon (n+1)) + (AfterColon n, b) if isWhitespace b -> Continue (AfterColon (n+1)) + (AfterColon n, _) -> Break (ObjectValueStart n) # object value could start with **almost** anything + (AfterObjectValue n, b) if isWhitespace b -> Continue (AfterObjectValue (n+1)) + (AfterObjectValue n, b) if b == ',' -> Continue (AfterComma (n+1)) + (AfterObjectValue n, b) if b == '}' -> Continue (AfterClosingBrace (n+1)) + (AfterComma n, b) if isWhitespace b -> Continue (AfterComma (n+1)) + (AfterComma n, b) if b == '"' -> Break (ObjectFieldNameStart n) + (AfterClosingBrace n, b) if isWhitespace b -> Continue (AfterClosingBrace (n+1)) + _ -> Break InvalidObject + +ObjectState : [ + BeforeOpeningBrace Nat, + AfterOpeningBrace Nat, + ObjectFieldNameStart Nat, + BeforeColon Nat, + AfterColon Nat, + ObjectValueStart Nat, + AfterObjectValue Nat, + AfterComma Nat, + AfterClosingBrace Nat, + InvalidObject, +] + +# Test decode of simple Json Object into Roc Record +expect + input = Str.toUtf8 "{\"fruit\":2\n} " + + actual = Decode.fromBytesPartial input fromUtf8 + expected = Ok { fruit: 2 } + + actual.result == expected + +# Test decode of simple Json Object into Roc Record +# TODO assertion failed: value == 0 || value == 1 # expect -# input = Str.toUtf8 "{\"fruit\":2}" +# input = Str.toUtf8 "{\"fruit\":\"two\"}" # actual = Decode.fromBytesPartial input fromUtf8 -# expected = Ok { fruit: 2 } +# expected = Ok { fruit: "two" } # actual.result == expected