Better parse errors on CSV parsing fail.

This commit is contained in:
Marten/Qqwy 2022-07-16 21:49:18 +02:00
parent b45ce4908d
commit 35ff9642aa
No known key found for this signature in database
GPG key ID: FACEF83266BDAF72
5 changed files with 137 additions and 30 deletions

View file

@ -8,10 +8,12 @@ interface Parser.CSV
parseStr, parseStr,
parseCSV, parseCSV,
field, field,
string,
nat,
] ]
imports [ imports [
Parser.Core.{Parser, parse, buildPrimitiveParser, fail, const, alt, map, map2, apply, many, oneorMore, sepBy1, between, ignore}, Parser.Core.{Parser, parse, buildPrimitiveParser, fail, const, alt, map, map2, apply, many, oneorMore, sepBy1, between, ignore, flatten},
Parser.Str.{RawStr, parseStrPartial, oneOf, codepoint, codepointSatisfies, string, scalar, digits, strFromRaw} Parser.Str.{RawStr, parseStrPartial, oneOf, codepoint, codepointSatisfies, scalar, digits, strFromRaw}
] ]
## This is a CSV parser which follows RFC4180 ## This is a CSV parser which follows RFC4180
@ -27,11 +29,12 @@ CSVField : RawStr
CSVRecord : List CSVField CSVRecord : List CSVField
CSV : List CSVRecord CSV : List CSVRecord
parseStr : Parser CSVRecord a, Str -> Result (List a) [ParsingFailure Str, SyntaxError (List U8), ParsingIncomplete CSVRecord] parseStr : Parser CSVRecord a, Str -> Result (List a) [ParsingFailure Str, SyntaxError Str, ParsingIncomplete CSVRecord]
parseStr = \csvParser, input -> parseStr = \csvParser, input ->
when parseStrToCSV input is when parseStrToCSV input is
Err (ParsingIncomplete rest) -> Err (ParsingIncomplete rest) ->
Err (SyntaxError rest) restStr = Parser.Str.strFromRaw rest
Err (SyntaxError restStr)
Err (ParsingFailure str) -> Err (ParsingFailure str) ->
Err (ParsingFailure str) Err (ParsingFailure str)
Ok csvData -> Ok csvData ->
@ -47,32 +50,65 @@ parseCSV : Parser CSVRecord a, CSV -> Result (List a) [ParsingFailure Str, Parsi
parseCSV = \csvParser, csvData -> parseCSV = \csvParser, csvData ->
List.walkUntil csvData (Ok []) \state, recordList -> List.walkUntil csvData (Ok []) \state, recordList ->
when parse csvParser recordList (\leftover -> leftover == []) is when parse csvParser recordList (\leftover -> leftover == []) is
Err problem -> Err (ParsingFailure problem) ->
Break (Err problem) recordStr = recordList |> List.map strFromRaw |> Str.joinWith ", "
problemStr = "\(problem)\nWhile parsing record `\(recordStr)`."
Break (Err (ParsingFailure problemStr))
Err (ParsingIncomplete problem) ->
Break (Err (ParsingIncomplete problem))
Ok val -> Ok val ->
state state
|> Result.map (\vals -> List.append vals val) |> Result.map (\vals -> List.append vals val)
|> Continue |> Continue
# Wrapper function to combine a set of fields into your desired `a`
#
# ## Usage example
#
# >>> record (\firstName -> \lastName -> \age -> User {firstName, lastName, age})
# >>> |> field string
# >>> |> field string
# >>> |> field nat
#
record : a -> Parser CSVRecord a record : a -> Parser CSVRecord a
record = Parser.Core.const record = Parser.Core.const
field : Parser RawStr a -> Parser CSVRecord a field : Parser RawStr a -> Parser CSVRecord a
field = \fieldParser -> field = \fieldParser ->
buildPrimitiveParser \recordVal -> buildPrimitiveParser \fieldsList ->
when List.get recordVal 0 is when List.get fieldsList 0 is
Err OutOfBounds -> Err OutOfBounds ->
Err (ParsingFailure "expected another CSV field but there are no more fields in this record") Err (ParsingFailure "expected another CSV field but there are no more fields in this record")
Ok rawStr -> Ok rawStr ->
when Parser.Str.parseRawStr fieldParser rawStr is when Parser.Str.parseRawStr fieldParser rawStr is
Ok val -> Ok val ->
Ok {val: val, input: (List.dropFirst recordVal)} Ok {val: val, input: (List.dropFirst fieldsList)}
Err (ParsingFailure reason) -> Err (ParsingFailure reason) ->
Err (ParsingFailure reason) fieldStr = rawStr |> strFromRaw
Err (ParsingFailure "Field `\(fieldStr)` from could not be parsed. \(reason)")
Err (ParsingIncomplete reason) -> Err (ParsingIncomplete reason) ->
reasonStr = strFromRaw reason reasonStr = strFromRaw reason
Err (ParsingFailure "The field parser was unable to read the whole field: \(reasonStr)") fieldsStr = fieldsList |> List.map strFromRaw |> Str.joinWith ", "
Err (ParsingFailure "The field parser was unable to read the whole field: `\(reasonStr)` while parsing the first field of leftover \(fieldsStr))")
# Parser for a field containing a UTF8-encoded string
string : Parser CSVField Str
string = Parser.Str.anyString
nat : Parser CSVField Nat
nat =
string
|> map (\val ->
when Str.toNat val is
Ok num ->
Ok num
Err problem ->
Err "The field is not a valid Nat: \(val)"
)
|> flatten
# f64 : Parser CSVField F64
# f64 = string |> map Str.toF64 |> flatten
parseStrToCSV : Str -> Result CSV [ParsingFailure Str, ParsingIncomplete RawStr] parseStrToCSV : Str -> Result CSV [ParsingFailure Str, ParsingIncomplete RawStr]
parseStrToCSV = \input -> parseStrToCSV = \input ->
@ -111,7 +147,7 @@ escapedContents = many (oneOf [
textdata textdata
]) ])
twodquotes = string "\"\"" twodquotes = Parser.Str.string "\"\""
nonescapedCsvField : Parser RawStr CSVField nonescapedCsvField : Parser RawStr CSVField
nonescapedCsvField = many textdata nonescapedCsvField = many textdata
@ -120,5 +156,5 @@ dquote = codepoint 34 # '"'
endOfLine = alt (ignore crlf) (ignore lf) endOfLine = alt (ignore crlf) (ignore lf)
cr = codepoint 13 # '\r' cr = codepoint 13 # '\r'
lf = codepoint 10 # '\n' lf = codepoint 10 # '\n'
crlf = string "\r\n" crlf = Parser.Str.string "\r\n"
textdata = codepointSatisfies (\x -> (x >= 32 && x <= 33) || (x >= 35 && x <= 43) || (x >= 45 && x <= 126)) # Any printable char except " (34) and , (44) textdata = codepointSatisfies (\x -> (x >= 32 && x <= 33) || (x >= 35 && x <= 43) || (x >= 45 && x <= 126)) # Any printable char except " (34) and , (44)

View file

@ -1,6 +1,7 @@
interface Parser.Core interface Parser.Core
exposes [ exposes [
Parser, Parser,
ParseResult,
parse, parse,
parsePartial, parsePartial,
fail, fail,
@ -21,6 +22,7 @@ interface Parser.Core
sepBy1, sepBy1,
ignore, ignore,
buildPrimitiveParser, buildPrimitiveParser,
flatten,
] ]
imports [] imports []
@ -37,9 +39,11 @@ interface Parser.Core
## How a parser is _actually_ implemented internally is not important ## How a parser is _actually_ implemented internally is not important
## and this might change between versions; ## and this might change between versions;
## for instance to improve efficiency or error messages on parsing failures. ## for instance to improve efficiency or error messages on parsing failures.
Parser input a := (input -> Result {val: a, input: input} [ParsingFailure Str]) Parser input a := (input -> ParseResult input a)
buildPrimitiveParser : (input -> Result {val: a, input: input} [ParsingFailure Str]) -> Parser input a ParseResult input a : Result {val: a, input: input} [ParsingFailure Str]
buildPrimitiveParser : (input -> ParseResult input a) -> Parser input a
buildPrimitiveParser = \fun -> buildPrimitiveParser = \fun ->
@Parser fun @Parser fun
@ -57,7 +61,7 @@ buildPrimitiveParser = \fun ->
## ##
## Of course, this is mostly useful when creating your own internal parsing building blocks. ## Of course, this is mostly useful when creating your own internal parsing building blocks.
## `run` or `Parser.Str.runStr` etc. are more useful in daily usage. ## `run` or `Parser.Str.runStr` etc. are more useful in daily usage.
parsePartial : Parser input a, input -> Result {val: a, input: input} [ParsingFailure Str] parsePartial : Parser input a, input -> ParseResult input a
parsePartial = \@Parser parser, input -> parsePartial = \@Parser parser, input ->
(parser input) (parser input)
@ -211,6 +215,22 @@ map3 = \parserA, parserB, parserC, transform ->
# ^ And this could be repeated for as high as we want, of course. # ^ And this could be repeated for as high as we want, of course.
# Removes a layer of 'result' from running the parser.
#
# This allows for instance to map functions that return a result over the parser,
# where errors are turned into `ParsingFailure` s.
flatten : Parser input (Result a Str) -> Parser input a
flatten = \parser ->
buildPrimitiveParser \input ->
result = parsePartial parser input
when result is
Err problem ->
Err problem
Ok {val: (Ok val), input: inputRest} ->
Ok {val: val, input: inputRest}
Ok {val: (Err problem), input: inputRest} ->
Err (ParsingFailure problem)
## Runs a parser lazily ## Runs a parser lazily
## ##
## This is (only) useful when dealing with a recursive structure. ## This is (only) useful when dealing with a recursive structure.
@ -226,7 +246,7 @@ maybe : Parser input a -> Parser input (Result a [Nothing])
maybe = \parser -> maybe = \parser ->
alt (parser |> map (\val -> Ok val)) (const (Err Nothing)) alt (parser |> map (\val -> Ok val)) (const (Err Nothing))
manyImpl : Parser input a, List a, input -> Result { input : input, val : List a } [ParsingFailure Str] manyImpl : Parser input a, List a, input -> ParseResult input (List a)
manyImpl = \parser, vals, input -> manyImpl = \parser, vals, input ->
result = parsePartial parser input result = parsePartial parser input
when result is when result is

View file

@ -9,13 +9,16 @@ interface Parser.Str
stringRaw, stringRaw,
codepoint, codepoint,
codepointSatisfies, codepointSatisfies,
anyString,
anyRawString,
anyCodepoint,
scalar, scalar,
oneOf, oneOf,
digit, digit,
digits, digits,
strFromRaw, strFromRaw,
] ]
imports [Parser.Core.{Parser, const, fail, map, map2, apply, many, oneOrMore, parse, parsePartial, buildPrimitiveParser, between}] imports [Parser.Core.{Parser, ParseResult, const, fail, map, map2, apply, many, oneOrMore, parse, parsePartial, buildPrimitiveParser, between}]
# Specific string-based parsers: # Specific string-based parsers:
@ -41,7 +44,7 @@ strFromCodepoint = \cp ->
strFromRaw [cp] strFromRaw [cp]
## Runs a parser against the start of a list of scalars, allowing the parser to consume it only partially. ## Runs a parser against the start of a list of scalars, allowing the parser to consume it only partially.
parseRawStrPartial : Parser RawStr a, RawStr -> Result {val: a, input: RawStr} [ParsingFailure Str] parseRawStrPartial : Parser RawStr a, RawStr -> ParseResult RawStr a
parseRawStrPartial = \parser, input -> parseRawStrPartial = \parser, input ->
parsePartial parser input parsePartial parser input
@ -49,7 +52,7 @@ parseRawStrPartial = \parser, input ->
## ##
## - If the parser succeeds, returns the resulting value as well as the leftover input. ## - If the parser succeeds, returns the resulting value as well as the leftover input.
## - If the parser fails, returns `Err (ParsingFailure msg)` ## - If the parser fails, returns `Err (ParsingFailure msg)`
parseStrPartial : Parser RawStr a, Str -> Result {val: a, input: Str} [ParsingFailure Str] parseStrPartial : Parser RawStr a, Str -> ParseResult Str a
parseStrPartial = \parser, input -> parseStrPartial = \parser, input ->
parser parser
|> parseRawStrPartial (strToRaw input) |> parseRawStrPartial (strToRaw input)
@ -137,9 +140,30 @@ scalar = \expectedScalar ->
|> string |> string
|> map (\_ -> expectedScalar) |> map (\_ -> expectedScalar)
betweenBraces : Parser RawStr a -> Parser RawStr a # Matches any codepoint
betweenBraces = \parser -> anyCodepoint : Parser RawStr U8
between parser (scalar '[') (scalar ']') anyCodepoint = codepointSatisfies (\_ -> True)
# Matches any bytestring
# and consumes all of it.
# Does not fail.
anyRawString : Parser RawStr RawStr
anyRawString = buildPrimitiveParser \rawStringValue ->
Ok {val: rawStringValue, input: []}
# Matches any string
# as long as it is valid UTF8.
anyString : Parser RawStr Str
anyString = buildPrimitiveParser \fieldRawString ->
when Str.fromUtf8 fieldRawString is
Ok stringVal ->
Ok {val: stringVal, input: []}
Err (BadUtf8 _ _) ->
Err (ParsingFailure "Expected a string field, but its contents cannot be parsed as UTF8.")
# betweenBraces : Parser RawStr a -> Parser RawStr a
# betweenBraces = \parser ->
# between parser (scalar '[') (scalar ']')
digit : Parser RawStr U8 digit : Parser RawStr U8

View file

@ -8,12 +8,20 @@ app "main"
# with hard-coded input. # with hard-coded input.
main = main =
when Parser.CSV.parseStr userCSVParser "John,Doe,10" is when Parser.CSV.parseStr userCSVParser "John,Doe,10\r\nRichard,Feldman,100\r\nMarten,Wijnja,28\r\n" is
Ok result -> Ok result ->
val = result |> Str.joinWith("\n") val = result |> Str.joinWith("\n")
"Parse success: \(val)\n" nResults = List.len result |> Num.toStr
"Parse success!\n\n\(nResults) users were found:\n\(val)\n"
Err problem -> Err problem ->
"Parsing Problem" when problem is
ParsingFailure failure ->
"Parsing failure: \(failure)\n"
ParsingIncomplete leftover ->
leftoverStr = leftover |> List.map Parser.Str.strFromRaw |> Str.joinWith ", "
"Parsing incomplete. Following still left: \(leftoverStr)\n"
SyntaxError error ->
"Parsing failure. Syntax error in the CSV: \(error)"
# main = fullTest csvParser "10,20\n\"An escaped field!\"\"\n,,,\",30\n" # main = fullTest csvParser "10,20\n\"An escaped field!\"\"\n,,,\",30\n"
# main = partialTest fieldParser "\"An escaped field with some \"\"<- double quotes\"" # main = partialTest fieldParser "\"An escaped field with some \"\"<- double quotes\""
# main = fullTest fieldContentsParser "My very cool,\"\"\r\n string" # main = fullTest fieldContentsParser "My very cool,\"\"\r\n string"
@ -21,10 +29,12 @@ main =
# main = partialTest manyParser "this is a very long string\"\"" # main = partialTest manyParser "this is a very long string\"\""
userCSVParser = userCSVParser =
Parser.CSV.record (\first -> \last -> \age -> "User: \(first) \(last) \(age)") Parser.CSV.record (\first -> \last -> \age ->
|> Parser.Core.apply (Parser.CSV.field (Parser.Str.string "John")) ageStr = Num.toStr age
|> Parser.Core.apply (Parser.CSV.field (Parser.Str.string "Doe")) "User: \(first) \(last) \(ageStr)")
|> Parser.Core.apply (Parser.CSV.field (Parser.Str.string "10")) |> Parser.Core.apply (Parser.CSV.field Parser.CSV.string)
|> Parser.Core.apply (Parser.CSV.field Parser.CSV.string)
|> Parser.Core.apply (Parser.CSV.field Parser.CSV.nat)
partialTest = \parser, input -> partialTest = \parser, input ->
when Parser.Str.parseStrPartial parser input is when Parser.Str.parseStrPartial parser input is

View file

@ -6,13 +6,20 @@
#include <string.h> #include <string.h>
#include <unistd.h> #include <unistd.h>
//#define ROC_PLATFORM_DEBUG
void alloc_panic(size_t size); void alloc_panic(size_t size);
void *roc_alloc(size_t size, unsigned int alignment) { void *roc_alloc(size_t size, unsigned int alignment) {
#ifdef ROC_PLATFORM_DEBUG
printf("Allocating %llu (alignment %ud) ", (unsigned long long)size, printf("Allocating %llu (alignment %ud) ", (unsigned long long)size,
alignment); alignment);
#endif
void *result = malloc(size); void *result = malloc(size);
#ifdef ROC_PLATFORM_DEBUG
printf("at: %p\n", result); printf("at: %p\n", result);
#endif
if (result == NULL) { if (result == NULL) {
if (size == if (size ==
@ -28,11 +35,16 @@ void *roc_alloc(size_t size, unsigned int alignment) {
void *roc_realloc(void *ptr, size_t new_size, size_t old_size, void *roc_realloc(void *ptr, size_t new_size, size_t old_size,
unsigned int alignment) { unsigned int alignment) {
#ifdef ROC_PLATFORM_DEBUG
printf("Rellocating %p (%llu -> %llu) (alignment %ud) ", ptr, printf("Rellocating %p (%llu -> %llu) (alignment %ud) ", ptr,
(unsigned long long)old_size, (unsigned long long)new_size, alignment); (unsigned long long)old_size, (unsigned long long)new_size, alignment);
#endif
void *result = realloc(ptr, new_size); void *result = realloc(ptr, new_size);
#ifdef ROC_PLATFORM_DEBUG
printf("at: %p\n", result); printf("at: %p\n", result);
#endif
if (result == NULL) { if (result == NULL) {
if (new_size == if (new_size ==
@ -47,7 +59,10 @@ void *roc_realloc(void *ptr, size_t new_size, size_t old_size,
} }
void roc_dealloc(void *ptr, unsigned int alignment) { void roc_dealloc(void *ptr, unsigned int alignment) {
#ifdef ROC_PLATFORM_DEBUG
printf("Deallocating %p (alignment %ud)\n", ptr, alignment); printf("Deallocating %p (alignment %ud)\n", ptr, alignment);
#endif
free(ptr); free(ptr);
} }
@ -67,7 +82,9 @@ void alloc_panic(size_t size) {
} }
void *roc_memcpy(void *dest, const void *src, size_t n) { void *roc_memcpy(void *dest, const void *src, size_t n) {
#ifdef ROC_PLATFORM_DEBUG
printf("memcpy %p -> %p (size: %llu)\n", src, dest, (unsigned long long)n); printf("memcpy %p -> %p (size: %llu)\n", src, dest, (unsigned long long)n);
#endif
return memcpy(dest, src, n); return memcpy(dest, src, n);
} }