From 35ff9642aa6489b10a7eb19c0eb61b666815c67a Mon Sep 17 00:00:00 2001 From: Marten/Qqwy Date: Sat, 16 Jul 2022 21:49:18 +0200 Subject: [PATCH] Better parse errors on CSV parsing fail. --- examples/csv/Parser/CSV.roc | 62 ++++++++++++++++++++++++++++-------- examples/csv/Parser/Core.roc | 28 +++++++++++++--- examples/csv/Parser/Str.roc | 36 +++++++++++++++++---- examples/csv/main.roc | 24 ++++++++++---- examples/csv/platform/host.c | 17 ++++++++++ 5 files changed, 137 insertions(+), 30 deletions(-) diff --git a/examples/csv/Parser/CSV.roc b/examples/csv/Parser/CSV.roc index ef6b567bbc..e95714f5ff 100644 --- a/examples/csv/Parser/CSV.roc +++ b/examples/csv/Parser/CSV.roc @@ -8,10 +8,12 @@ interface Parser.CSV parseStr, parseCSV, field, + string, + nat, ] imports [ - Parser.Core.{Parser, parse, buildPrimitiveParser, fail, const, alt, map, map2, apply, many, oneorMore, sepBy1, between, ignore}, - Parser.Str.{RawStr, parseStrPartial, oneOf, codepoint, codepointSatisfies, string, scalar, digits, strFromRaw} + Parser.Core.{Parser, parse, buildPrimitiveParser, fail, const, alt, map, map2, apply, many, oneorMore, sepBy1, between, ignore, flatten}, + Parser.Str.{RawStr, parseStrPartial, oneOf, codepoint, codepointSatisfies, scalar, digits, strFromRaw} ] ## This is a CSV parser which follows RFC4180 @@ -27,11 +29,12 @@ CSVField : RawStr CSVRecord : List CSVField CSV : List CSVRecord -parseStr : Parser CSVRecord a, Str -> Result (List a) [ParsingFailure Str, SyntaxError (List U8), ParsingIncomplete CSVRecord] +parseStr : Parser CSVRecord a, Str -> Result (List a) [ParsingFailure Str, SyntaxError Str, ParsingIncomplete CSVRecord] parseStr = \csvParser, input -> when parseStrToCSV input is Err (ParsingIncomplete rest) -> - Err (SyntaxError rest) + restStr = Parser.Str.strFromRaw rest + Err (SyntaxError restStr) Err (ParsingFailure str) -> Err (ParsingFailure str) Ok csvData -> @@ -47,32 +50,65 @@ parseCSV : Parser CSVRecord a, CSV -> Result (List a) [ParsingFailure Str, Parsi parseCSV = \csvParser, csvData -> List.walkUntil csvData (Ok []) \state, recordList -> when parse csvParser recordList (\leftover -> leftover == []) is - Err problem -> - Break (Err problem) + Err (ParsingFailure problem) -> + recordStr = recordList |> List.map strFromRaw |> Str.joinWith ", " + problemStr = "\(problem)\nWhile parsing record `\(recordStr)`." + Break (Err (ParsingFailure problemStr)) + Err (ParsingIncomplete problem) -> + Break (Err (ParsingIncomplete problem)) Ok val -> state |> Result.map (\vals -> List.append vals val) |> Continue +# Wrapper function to combine a set of fields into your desired `a` +# +# ## Usage example +# +# >>> record (\firstName -> \lastName -> \age -> User {firstName, lastName, age}) +# >>> |> field string +# >>> |> field string +# >>> |> field nat +# record : a -> Parser CSVRecord a record = Parser.Core.const field : Parser RawStr a -> Parser CSVRecord a field = \fieldParser -> - buildPrimitiveParser \recordVal -> - when List.get recordVal 0 is + buildPrimitiveParser \fieldsList -> + when List.get fieldsList 0 is Err OutOfBounds -> Err (ParsingFailure "expected another CSV field but there are no more fields in this record") Ok rawStr -> when Parser.Str.parseRawStr fieldParser rawStr is Ok val -> - Ok {val: val, input: (List.dropFirst recordVal)} + Ok {val: val, input: (List.dropFirst fieldsList)} Err (ParsingFailure reason) -> - Err (ParsingFailure reason) + fieldStr = rawStr |> strFromRaw + Err (ParsingFailure "Field `\(fieldStr)` from could not be parsed. \(reason)") Err (ParsingIncomplete reason) -> reasonStr = strFromRaw reason - Err (ParsingFailure "The field parser was unable to read the whole field: \(reasonStr)") + fieldsStr = fieldsList |> List.map strFromRaw |> Str.joinWith ", " + Err (ParsingFailure "The field parser was unable to read the whole field: `\(reasonStr)` while parsing the first field of leftover \(fieldsStr))") +# Parser for a field containing a UTF8-encoded string +string : Parser CSVField Str +string = Parser.Str.anyString + +nat : Parser CSVField Nat +nat = + string + |> map (\val -> + when Str.toNat val is + Ok num -> + Ok num + Err problem -> + Err "The field is not a valid Nat: \(val)" + ) + |> flatten + +# f64 : Parser CSVField F64 +# f64 = string |> map Str.toF64 |> flatten parseStrToCSV : Str -> Result CSV [ParsingFailure Str, ParsingIncomplete RawStr] parseStrToCSV = \input -> @@ -111,7 +147,7 @@ escapedContents = many (oneOf [ textdata ]) -twodquotes = string "\"\"" +twodquotes = Parser.Str.string "\"\"" nonescapedCsvField : Parser RawStr CSVField nonescapedCsvField = many textdata @@ -120,5 +156,5 @@ dquote = codepoint 34 # '"' endOfLine = alt (ignore crlf) (ignore lf) cr = codepoint 13 # '\r' lf = codepoint 10 # '\n' -crlf = string "\r\n" +crlf = Parser.Str.string "\r\n" textdata = codepointSatisfies (\x -> (x >= 32 && x <= 33) || (x >= 35 && x <= 43) || (x >= 45 && x <= 126)) # Any printable char except " (34) and , (44) diff --git a/examples/csv/Parser/Core.roc b/examples/csv/Parser/Core.roc index 51c5e0d1d7..bc2b2e4053 100644 --- a/examples/csv/Parser/Core.roc +++ b/examples/csv/Parser/Core.roc @@ -1,6 +1,7 @@ interface Parser.Core exposes [ Parser, + ParseResult, parse, parsePartial, fail, @@ -21,6 +22,7 @@ interface Parser.Core sepBy1, ignore, buildPrimitiveParser, + flatten, ] imports [] @@ -37,9 +39,11 @@ interface Parser.Core ## How a parser is _actually_ implemented internally is not important ## and this might change between versions; ## for instance to improve efficiency or error messages on parsing failures. -Parser input a := (input -> Result {val: a, input: input} [ParsingFailure Str]) +Parser input a := (input -> ParseResult input a) -buildPrimitiveParser : (input -> Result {val: a, input: input} [ParsingFailure Str]) -> Parser input a +ParseResult input a : Result {val: a, input: input} [ParsingFailure Str] + +buildPrimitiveParser : (input -> ParseResult input a) -> Parser input a buildPrimitiveParser = \fun -> @Parser fun @@ -57,7 +61,7 @@ buildPrimitiveParser = \fun -> ## ## Of course, this is mostly useful when creating your own internal parsing building blocks. ## `run` or `Parser.Str.runStr` etc. are more useful in daily usage. -parsePartial : Parser input a, input -> Result {val: a, input: input} [ParsingFailure Str] +parsePartial : Parser input a, input -> ParseResult input a parsePartial = \@Parser parser, input -> (parser input) @@ -211,6 +215,22 @@ map3 = \parserA, parserB, parserC, transform -> # ^ And this could be repeated for as high as we want, of course. +# Removes a layer of 'result' from running the parser. +# +# This allows for instance to map functions that return a result over the parser, +# where errors are turned into `ParsingFailure` s. +flatten : Parser input (Result a Str) -> Parser input a +flatten = \parser -> + buildPrimitiveParser \input -> + result = parsePartial parser input + when result is + Err problem -> + Err problem + Ok {val: (Ok val), input: inputRest} -> + Ok {val: val, input: inputRest} + Ok {val: (Err problem), input: inputRest} -> + Err (ParsingFailure problem) + ## Runs a parser lazily ## ## This is (only) useful when dealing with a recursive structure. @@ -226,7 +246,7 @@ maybe : Parser input a -> Parser input (Result a [Nothing]) maybe = \parser -> alt (parser |> map (\val -> Ok val)) (const (Err Nothing)) -manyImpl : Parser input a, List a, input -> Result { input : input, val : List a } [ParsingFailure Str] +manyImpl : Parser input a, List a, input -> ParseResult input (List a) manyImpl = \parser, vals, input -> result = parsePartial parser input when result is diff --git a/examples/csv/Parser/Str.roc b/examples/csv/Parser/Str.roc index 4165143479..834f4a336a 100644 --- a/examples/csv/Parser/Str.roc +++ b/examples/csv/Parser/Str.roc @@ -9,13 +9,16 @@ interface Parser.Str stringRaw, codepoint, codepointSatisfies, + anyString, + anyRawString, + anyCodepoint, scalar, oneOf, digit, digits, strFromRaw, ] - imports [Parser.Core.{Parser, const, fail, map, map2, apply, many, oneOrMore, parse, parsePartial, buildPrimitiveParser, between}] + imports [Parser.Core.{Parser, ParseResult, const, fail, map, map2, apply, many, oneOrMore, parse, parsePartial, buildPrimitiveParser, between}] # Specific string-based parsers: @@ -41,7 +44,7 @@ strFromCodepoint = \cp -> strFromRaw [cp] ## Runs a parser against the start of a list of scalars, allowing the parser to consume it only partially. -parseRawStrPartial : Parser RawStr a, RawStr -> Result {val: a, input: RawStr} [ParsingFailure Str] +parseRawStrPartial : Parser RawStr a, RawStr -> ParseResult RawStr a parseRawStrPartial = \parser, input -> parsePartial parser input @@ -49,7 +52,7 @@ parseRawStrPartial = \parser, input -> ## ## - If the parser succeeds, returns the resulting value as well as the leftover input. ## - If the parser fails, returns `Err (ParsingFailure msg)` -parseStrPartial : Parser RawStr a, Str -> Result {val: a, input: Str} [ParsingFailure Str] +parseStrPartial : Parser RawStr a, Str -> ParseResult Str a parseStrPartial = \parser, input -> parser |> parseRawStrPartial (strToRaw input) @@ -137,9 +140,30 @@ scalar = \expectedScalar -> |> string |> map (\_ -> expectedScalar) -betweenBraces : Parser RawStr a -> Parser RawStr a -betweenBraces = \parser -> - between parser (scalar '[') (scalar ']') +# Matches any codepoint +anyCodepoint : Parser RawStr U8 +anyCodepoint = codepointSatisfies (\_ -> True) + +# Matches any bytestring +# and consumes all of it. +# Does not fail. +anyRawString : Parser RawStr RawStr +anyRawString = buildPrimitiveParser \rawStringValue -> + Ok {val: rawStringValue, input: []} + +# Matches any string +# as long as it is valid UTF8. +anyString : Parser RawStr Str +anyString = buildPrimitiveParser \fieldRawString -> + when Str.fromUtf8 fieldRawString is + Ok stringVal -> + Ok {val: stringVal, input: []} + Err (BadUtf8 _ _) -> + Err (ParsingFailure "Expected a string field, but its contents cannot be parsed as UTF8.") + +# betweenBraces : Parser RawStr a -> Parser RawStr a +# betweenBraces = \parser -> +# between parser (scalar '[') (scalar ']') digit : Parser RawStr U8 diff --git a/examples/csv/main.roc b/examples/csv/main.roc index 8ce7d674a2..e328414a83 100644 --- a/examples/csv/main.roc +++ b/examples/csv/main.roc @@ -8,12 +8,20 @@ app "main" # with hard-coded input. main = - when Parser.CSV.parseStr userCSVParser "John,Doe,10" is + when Parser.CSV.parseStr userCSVParser "John,Doe,10\r\nRichard,Feldman,100\r\nMarten,Wijnja,28\r\n" is Ok result -> val = result |> Str.joinWith("\n") - "Parse success: \(val)\n" + nResults = List.len result |> Num.toStr + "Parse success!\n\n\(nResults) users were found:\n\(val)\n" Err problem -> - "Parsing Problem" + when problem is + ParsingFailure failure -> + "Parsing failure: \(failure)\n" + ParsingIncomplete leftover -> + leftoverStr = leftover |> List.map Parser.Str.strFromRaw |> Str.joinWith ", " + "Parsing incomplete. Following still left: \(leftoverStr)\n" + SyntaxError error -> + "Parsing failure. Syntax error in the CSV: \(error)" # main = fullTest csvParser "10,20\n\"An escaped field!\"\"\n,,,\",30\n" # main = partialTest fieldParser "\"An escaped field with some \"\"<- double quotes\"" # main = fullTest fieldContentsParser "My very cool,\"\"\r\n string" @@ -21,10 +29,12 @@ main = # main = partialTest manyParser "this is a very long string\"\"" userCSVParser = - Parser.CSV.record (\first -> \last -> \age -> "User: \(first) \(last) \(age)") - |> Parser.Core.apply (Parser.CSV.field (Parser.Str.string "John")) - |> Parser.Core.apply (Parser.CSV.field (Parser.Str.string "Doe")) - |> Parser.Core.apply (Parser.CSV.field (Parser.Str.string "10")) + Parser.CSV.record (\first -> \last -> \age -> + ageStr = Num.toStr age + "User: \(first) \(last) \(ageStr)") + |> Parser.Core.apply (Parser.CSV.field Parser.CSV.string) + |> Parser.Core.apply (Parser.CSV.field Parser.CSV.string) + |> Parser.Core.apply (Parser.CSV.field Parser.CSV.nat) partialTest = \parser, input -> when Parser.Str.parseStrPartial parser input is diff --git a/examples/csv/platform/host.c b/examples/csv/platform/host.c index 8fda8d4f8a..c3a067ae75 100644 --- a/examples/csv/platform/host.c +++ b/examples/csv/platform/host.c @@ -6,13 +6,20 @@ #include #include +//#define ROC_PLATFORM_DEBUG + void alloc_panic(size_t size); void *roc_alloc(size_t size, unsigned int alignment) { +#ifdef ROC_PLATFORM_DEBUG printf("Allocating %llu (alignment %ud) ", (unsigned long long)size, alignment); +#endif void *result = malloc(size); + +#ifdef ROC_PLATFORM_DEBUG printf("at: %p\n", result); +#endif if (result == NULL) { if (size == @@ -28,11 +35,16 @@ void *roc_alloc(size_t size, unsigned int alignment) { void *roc_realloc(void *ptr, size_t new_size, size_t old_size, unsigned int alignment) { +#ifdef ROC_PLATFORM_DEBUG printf("Rellocating %p (%llu -> %llu) (alignment %ud) ", ptr, (unsigned long long)old_size, (unsigned long long)new_size, alignment); +#endif void *result = realloc(ptr, new_size); + +#ifdef ROC_PLATFORM_DEBUG printf("at: %p\n", result); +#endif if (result == NULL) { if (new_size == @@ -47,7 +59,10 @@ void *roc_realloc(void *ptr, size_t new_size, size_t old_size, } void roc_dealloc(void *ptr, unsigned int alignment) { + +#ifdef ROC_PLATFORM_DEBUG printf("Deallocating %p (alignment %ud)\n", ptr, alignment); +#endif free(ptr); } @@ -67,7 +82,9 @@ void alloc_panic(size_t size) { } void *roc_memcpy(void *dest, const void *src, size_t n) { +#ifdef ROC_PLATFORM_DEBUG printf("memcpy %p -> %p (size: %llu)\n", src, dest, (unsigned long long)n); +#endif return memcpy(dest, src, n); }