From 35ff9642aa6489b10a7eb19c0eb61b666815c67a Mon Sep 17 00:00:00 2001
From: Marten/Qqwy <w-m@resilia.nl>
Date: Sat, 16 Jul 2022 21:49:18 +0200
Subject: [PATCH] Better parse errors on CSV parsing fail.

---
 examples/csv/Parser/CSV.roc  | 62 ++++++++++++++++++++++++++++--------
 examples/csv/Parser/Core.roc | 28 +++++++++++++---
 examples/csv/Parser/Str.roc  | 36 +++++++++++++++++----
 examples/csv/main.roc        | 24 ++++++++++----
 examples/csv/platform/host.c | 17 ++++++++++
 5 files changed, 137 insertions(+), 30 deletions(-)

diff --git a/examples/csv/Parser/CSV.roc b/examples/csv/Parser/CSV.roc
index ef6b567bbc..e95714f5ff 100644
--- a/examples/csv/Parser/CSV.roc
+++ b/examples/csv/Parser/CSV.roc
@@ -8,10 +8,12 @@ interface Parser.CSV
   parseStr,
   parseCSV,
   field,
+  string,
+  nat,
   ]
   imports [
-  Parser.Core.{Parser, parse, buildPrimitiveParser, fail, const, alt, map, map2, apply, many, oneorMore, sepBy1, between, ignore},
-  Parser.Str.{RawStr, parseStrPartial, oneOf, codepoint, codepointSatisfies, string, scalar, digits, strFromRaw}
+  Parser.Core.{Parser, parse, buildPrimitiveParser, fail, const, alt, map, map2, apply, many, oneorMore, sepBy1, between, ignore, flatten},
+  Parser.Str.{RawStr, parseStrPartial, oneOf, codepoint, codepointSatisfies, scalar, digits, strFromRaw}
   ]
 
 ## This is a CSV parser which follows RFC4180
@@ -27,11 +29,12 @@ CSVField : RawStr
 CSVRecord : List CSVField
 CSV : List CSVRecord
 
-parseStr : Parser CSVRecord a, Str -> Result (List a) [ParsingFailure Str, SyntaxError (List U8), ParsingIncomplete CSVRecord]
+parseStr : Parser CSVRecord a, Str -> Result (List a) [ParsingFailure Str, SyntaxError Str, ParsingIncomplete CSVRecord]
 parseStr = \csvParser, input ->
   when parseStrToCSV input is
     Err (ParsingIncomplete rest) ->
-      Err (SyntaxError rest)
+      restStr = Parser.Str.strFromRaw rest
+      Err (SyntaxError restStr)
     Err (ParsingFailure str) ->
       Err (ParsingFailure str)
     Ok csvData ->
@@ -47,32 +50,65 @@ parseCSV : Parser CSVRecord a, CSV -> Result (List a) [ParsingFailure Str, Parsi
 parseCSV = \csvParser, csvData ->
   List.walkUntil csvData (Ok []) \state, recordList ->
     when parse csvParser recordList (\leftover -> leftover == []) is
-      Err problem ->
-        Break (Err problem)
+      Err (ParsingFailure problem) ->
+        recordStr = recordList |> List.map strFromRaw |> Str.joinWith ", "
+        problemStr = "\(problem)\nWhile parsing record `\(recordStr)`."
+        Break (Err (ParsingFailure problemStr))
+      Err (ParsingIncomplete problem) ->
+        Break (Err (ParsingIncomplete problem))
       Ok val ->
         state
         |> Result.map (\vals -> List.append vals val)
         |> Continue
 
+# Wrapper function to combine a set of fields into your desired `a`
+#
+# ## Usage example
+#
+# >>> record (\firstName -> \lastName -> \age -> User {firstName, lastName, age})
+# >>> |> field string
+# >>> |> field string
+# >>> |> field nat
+#
 record : a -> Parser CSVRecord a
 record = Parser.Core.const
 
 field : Parser RawStr a -> Parser CSVRecord a
 field = \fieldParser ->
-  buildPrimitiveParser \recordVal ->
-    when List.get recordVal 0 is
+  buildPrimitiveParser \fieldsList ->
+    when List.get fieldsList 0 is
       Err OutOfBounds ->
         Err (ParsingFailure "expected another CSV field but there are no more fields in this record")
       Ok rawStr ->
         when Parser.Str.parseRawStr fieldParser rawStr is
           Ok val ->
-            Ok {val: val, input: (List.dropFirst recordVal)}
+            Ok {val: val, input: (List.dropFirst fieldsList)}
           Err (ParsingFailure reason) ->
-            Err (ParsingFailure reason)
+            fieldStr = rawStr |> strFromRaw
+            Err (ParsingFailure "Field `\(fieldStr)` from could not be parsed. \(reason)")
           Err (ParsingIncomplete reason) ->
             reasonStr = strFromRaw reason
-            Err (ParsingFailure "The field parser was unable to read the whole field: \(reasonStr)")
+            fieldsStr = fieldsList |> List.map strFromRaw |> Str.joinWith ", "
+            Err (ParsingFailure "The field parser was unable to read the whole field: `\(reasonStr)` while parsing the first field of leftover \(fieldsStr))")
 
+# Parser for a field containing a UTF8-encoded string
+string : Parser CSVField Str
+string = Parser.Str.anyString
+
+nat : Parser CSVField Nat
+nat =
+  string
+  |> map (\val ->
+    when Str.toNat val is
+      Ok num ->
+        Ok num
+      Err problem ->
+        Err "The field is not a valid Nat: \(val)"
+        )
+  |> flatten
+
+# f64 : Parser CSVField F64
+# f64 = string |> map Str.toF64 |> flatten
 
 parseStrToCSV : Str -> Result CSV [ParsingFailure Str, ParsingIncomplete RawStr]
 parseStrToCSV = \input ->
@@ -111,7 +147,7 @@ escapedContents = many (oneOf [
   textdata
 ])
 
-twodquotes = string "\"\""
+twodquotes = Parser.Str.string "\"\""
 
 nonescapedCsvField : Parser RawStr CSVField
 nonescapedCsvField = many textdata
@@ -120,5 +156,5 @@ dquote = codepoint 34 # '"'
 endOfLine = alt (ignore crlf) (ignore lf)
 cr = codepoint 13 # '\r'
 lf = codepoint 10 # '\n'
-crlf = string "\r\n"
+crlf = Parser.Str.string "\r\n"
 textdata = codepointSatisfies (\x -> (x >= 32 && x <= 33) || (x >= 35 && x <= 43) || (x >= 45 && x <= 126)) # Any printable char except " (34) and , (44)
diff --git a/examples/csv/Parser/Core.roc b/examples/csv/Parser/Core.roc
index 51c5e0d1d7..bc2b2e4053 100644
--- a/examples/csv/Parser/Core.roc
+++ b/examples/csv/Parser/Core.roc
@@ -1,6 +1,7 @@
 interface Parser.Core
   exposes [
     Parser,
+    ParseResult,
     parse,
     parsePartial,
     fail,
@@ -21,6 +22,7 @@ interface Parser.Core
     sepBy1,
     ignore,
     buildPrimitiveParser,
+    flatten,
   ]
   imports []
 
@@ -37,9 +39,11 @@ interface Parser.Core
 ## How a parser is _actually_ implemented internally is not important
 ## and this might change between versions;
 ## for instance to improve efficiency or error messages on parsing failures.
-Parser input a := (input -> Result {val: a, input: input} [ParsingFailure Str])
+Parser input a := (input -> ParseResult input a)
 
-buildPrimitiveParser : (input -> Result {val: a, input: input} [ParsingFailure Str]) -> Parser input a
+ParseResult input a : Result {val: a, input: input} [ParsingFailure Str]
+
+buildPrimitiveParser : (input -> ParseResult input a) -> Parser input a
 buildPrimitiveParser = \fun ->
   @Parser fun
 
@@ -57,7 +61,7 @@ buildPrimitiveParser = \fun ->
 ##
 ## Of course, this is mostly useful when creating your own internal parsing building blocks.
 ## `run` or `Parser.Str.runStr` etc. are more useful in daily usage.
-parsePartial : Parser input a, input -> Result {val: a, input: input} [ParsingFailure Str]
+parsePartial : Parser input a, input -> ParseResult input a
 parsePartial = \@Parser parser, input ->
   (parser input)
 
@@ -211,6 +215,22 @@ map3 = \parserA, parserB, parserC, transform ->
 
 # ^ And this could be repeated for as high as we want, of course.
 
+# Removes a layer of 'result' from running the parser.
+#
+# This allows for instance to map functions that return a result over the parser,
+# where errors are turned into `ParsingFailure` s.
+flatten : Parser input (Result a Str) -> Parser input a
+flatten = \parser ->
+  buildPrimitiveParser \input ->
+    result = parsePartial parser input
+    when result is
+        Err problem ->
+          Err problem
+        Ok {val: (Ok val), input: inputRest} ->
+          Ok {val: val, input: inputRest}
+        Ok {val: (Err problem), input: inputRest} ->
+          Err (ParsingFailure problem)
+
 ## Runs a parser lazily
 ##
 ## This is (only) useful when dealing with a recursive structure.
@@ -226,7 +246,7 @@ maybe : Parser input a -> Parser input (Result a [Nothing])
 maybe = \parser ->
   alt (parser |> map (\val -> Ok val)) (const (Err Nothing))
 
-manyImpl : Parser input a, List a, input -> Result { input : input, val : List a } [ParsingFailure Str]
+manyImpl : Parser input a, List a, input -> ParseResult input (List a)
 manyImpl = \parser, vals, input ->
   result = parsePartial parser input
   when result is
diff --git a/examples/csv/Parser/Str.roc b/examples/csv/Parser/Str.roc
index 4165143479..834f4a336a 100644
--- a/examples/csv/Parser/Str.roc
+++ b/examples/csv/Parser/Str.roc
@@ -9,13 +9,16 @@ interface Parser.Str
     stringRaw,
     codepoint,
     codepointSatisfies,
+    anyString,
+    anyRawString,
+    anyCodepoint,
     scalar,
     oneOf,
     digit,
     digits,
     strFromRaw,
   ]
-  imports [Parser.Core.{Parser, const, fail, map, map2, apply, many, oneOrMore, parse, parsePartial, buildPrimitiveParser, between}]
+  imports [Parser.Core.{Parser, ParseResult, const, fail, map, map2, apply, many, oneOrMore, parse, parsePartial, buildPrimitiveParser, between}]
 
 # Specific string-based parsers:
 
@@ -41,7 +44,7 @@ strFromCodepoint = \cp ->
   strFromRaw [cp]
 
 ## Runs a parser against the start of a list of scalars, allowing the parser to consume it only partially.
-parseRawStrPartial : Parser RawStr a, RawStr -> Result {val: a, input: RawStr} [ParsingFailure Str]
+parseRawStrPartial : Parser RawStr a, RawStr -> ParseResult RawStr a
 parseRawStrPartial = \parser, input ->
   parsePartial parser input
 
@@ -49,7 +52,7 @@ parseRawStrPartial = \parser, input ->
 ##
 ## - If the parser succeeds, returns the resulting value as well as the leftover input.
 ## - If the parser fails, returns `Err (ParsingFailure msg)`
-parseStrPartial : Parser RawStr a, Str -> Result {val: a, input: Str} [ParsingFailure Str]
+parseStrPartial : Parser RawStr a, Str -> ParseResult Str a
 parseStrPartial = \parser, input ->
   parser
   |> parseRawStrPartial (strToRaw input)
@@ -137,9 +140,30 @@ scalar = \expectedScalar ->
   |> string
   |> map (\_ -> expectedScalar)
 
-betweenBraces : Parser RawStr a -> Parser RawStr a
-betweenBraces = \parser ->
-  between parser (scalar '[') (scalar ']')
+# Matches any codepoint
+anyCodepoint : Parser RawStr U8
+anyCodepoint = codepointSatisfies (\_ -> True)
+
+# Matches any bytestring
+# and consumes all of it.
+# Does not fail.
+anyRawString : Parser RawStr RawStr
+anyRawString = buildPrimitiveParser \rawStringValue ->
+  Ok {val: rawStringValue, input: []}
+
+# Matches any string
+# as long as it is valid UTF8.
+anyString : Parser RawStr Str
+anyString = buildPrimitiveParser \fieldRawString ->
+  when Str.fromUtf8 fieldRawString is
+    Ok stringVal ->
+      Ok {val: stringVal, input: []}
+    Err (BadUtf8 _ _) ->
+      Err (ParsingFailure "Expected a string field, but its contents cannot be parsed as UTF8.")
+
+# betweenBraces : Parser RawStr a -> Parser RawStr a
+# betweenBraces = \parser ->
+#   between parser (scalar '[') (scalar ']')
 
 
 digit : Parser RawStr U8
diff --git a/examples/csv/main.roc b/examples/csv/main.roc
index 8ce7d674a2..e328414a83 100644
--- a/examples/csv/main.roc
+++ b/examples/csv/main.roc
@@ -8,12 +8,20 @@ app "main"
 # with hard-coded input.
 
 main =
-  when Parser.CSV.parseStr userCSVParser "John,Doe,10" is
+  when Parser.CSV.parseStr userCSVParser "John,Doe,10\r\nRichard,Feldman,100\r\nMarten,Wijnja,28\r\n" is
     Ok result ->
         val = result |> Str.joinWith("\n")
-        "Parse success: \(val)\n"
+        nResults = List.len result |> Num.toStr
+        "Parse success!\n\n\(nResults) users were found:\n\(val)\n"
     Err problem ->
-        "Parsing Problem"
+      when problem is
+        ParsingFailure failure ->
+          "Parsing failure: \(failure)\n"
+        ParsingIncomplete leftover ->
+          leftoverStr = leftover |> List.map Parser.Str.strFromRaw |> Str.joinWith ", "
+          "Parsing incomplete. Following still left: \(leftoverStr)\n"
+        SyntaxError error ->
+          "Parsing failure. Syntax error in the CSV: \(error)"
 # main = fullTest csvParser "10,20\n\"An escaped field!\"\"\n,,,\",30\n"
 # main = partialTest fieldParser "\"An escaped field with some \"\"<- double quotes\""
 # main = fullTest fieldContentsParser "My very cool,\"\"\r\n string"
@@ -21,10 +29,12 @@ main =
 # main = partialTest manyParser "this is a very long string\"\""
 
 userCSVParser =
-  Parser.CSV.record (\first -> \last -> \age -> "User: \(first) \(last) \(age)")
-  |> Parser.Core.apply (Parser.CSV.field (Parser.Str.string "John"))
-  |> Parser.Core.apply (Parser.CSV.field (Parser.Str.string "Doe"))
-  |> Parser.Core.apply (Parser.CSV.field (Parser.Str.string "10"))
+  Parser.CSV.record (\first -> \last -> \age ->
+    ageStr = Num.toStr age
+    "User: \(first) \(last) \(ageStr)")
+  |> Parser.Core.apply (Parser.CSV.field Parser.CSV.string)
+  |> Parser.Core.apply (Parser.CSV.field Parser.CSV.string)
+  |> Parser.Core.apply (Parser.CSV.field Parser.CSV.nat)
 
 partialTest = \parser, input ->
   when Parser.Str.parseStrPartial parser input is
diff --git a/examples/csv/platform/host.c b/examples/csv/platform/host.c
index 8fda8d4f8a..c3a067ae75 100644
--- a/examples/csv/platform/host.c
+++ b/examples/csv/platform/host.c
@@ -6,13 +6,20 @@
 #include <string.h>
 #include <unistd.h>
 
+//#define ROC_PLATFORM_DEBUG
+
 void alloc_panic(size_t size);
 
 void *roc_alloc(size_t size, unsigned int alignment) {
+#ifdef ROC_PLATFORM_DEBUG
   printf("Allocating %llu (alignment %ud) ", (unsigned long long)size,
          alignment);
+#endif
   void *result = malloc(size);
+
+#ifdef ROC_PLATFORM_DEBUG
   printf("at: %p\n", result);
+#endif
 
   if (result == NULL) {
     if (size ==
@@ -28,11 +35,16 @@ void *roc_alloc(size_t size, unsigned int alignment) {
 
 void *roc_realloc(void *ptr, size_t new_size, size_t old_size,
                   unsigned int alignment) {
+#ifdef ROC_PLATFORM_DEBUG
   printf("Rellocating %p (%llu -> %llu) (alignment %ud) ", ptr,
          (unsigned long long)old_size, (unsigned long long)new_size, alignment);
+#endif
 
   void *result = realloc(ptr, new_size);
+
+#ifdef ROC_PLATFORM_DEBUG
   printf("at: %p\n", result);
+#endif
 
   if (result == NULL) {
     if (new_size ==
@@ -47,7 +59,10 @@ void *roc_realloc(void *ptr, size_t new_size, size_t old_size,
 }
 
 void roc_dealloc(void *ptr, unsigned int alignment) {
+
+#ifdef ROC_PLATFORM_DEBUG
   printf("Deallocating %p (alignment %ud)\n", ptr, alignment);
+#endif
   free(ptr);
 }
 
@@ -67,7 +82,9 @@ void alloc_panic(size_t size) {
 }
 
 void *roc_memcpy(void *dest, const void *src, size_t n) {
+#ifdef ROC_PLATFORM_DEBUG
   printf("memcpy %p -> %p (size: %llu)\n", src, dest, (unsigned long long)n);
+#endif
   return memcpy(dest, src, n);
 }