Better parse errors on CSV parsing fail.

This commit is contained in:
Marten/Qqwy 2022-07-16 21:49:18 +02:00
parent b45ce4908d
commit 35ff9642aa
No known key found for this signature in database
GPG key ID: FACEF83266BDAF72
5 changed files with 137 additions and 30 deletions

View file

@ -8,10 +8,12 @@ interface Parser.CSV
parseStr,
parseCSV,
field,
string,
nat,
]
imports [
Parser.Core.{Parser, parse, buildPrimitiveParser, fail, const, alt, map, map2, apply, many, oneorMore, sepBy1, between, ignore},
Parser.Str.{RawStr, parseStrPartial, oneOf, codepoint, codepointSatisfies, string, scalar, digits, strFromRaw}
Parser.Core.{Parser, parse, buildPrimitiveParser, fail, const, alt, map, map2, apply, many, oneorMore, sepBy1, between, ignore, flatten},
Parser.Str.{RawStr, parseStrPartial, oneOf, codepoint, codepointSatisfies, scalar, digits, strFromRaw}
]
## This is a CSV parser which follows RFC4180
@ -27,11 +29,12 @@ CSVField : RawStr
CSVRecord : List CSVField
CSV : List CSVRecord
parseStr : Parser CSVRecord a, Str -> Result (List a) [ParsingFailure Str, SyntaxError (List U8), ParsingIncomplete CSVRecord]
parseStr : Parser CSVRecord a, Str -> Result (List a) [ParsingFailure Str, SyntaxError Str, ParsingIncomplete CSVRecord]
parseStr = \csvParser, input ->
when parseStrToCSV input is
Err (ParsingIncomplete rest) ->
Err (SyntaxError rest)
restStr = Parser.Str.strFromRaw rest
Err (SyntaxError restStr)
Err (ParsingFailure str) ->
Err (ParsingFailure str)
Ok csvData ->
@ -47,32 +50,65 @@ parseCSV : Parser CSVRecord a, CSV -> Result (List a) [ParsingFailure Str, Parsi
parseCSV = \csvParser, csvData ->
List.walkUntil csvData (Ok []) \state, recordList ->
when parse csvParser recordList (\leftover -> leftover == []) is
Err problem ->
Break (Err problem)
Err (ParsingFailure problem) ->
recordStr = recordList |> List.map strFromRaw |> Str.joinWith ", "
problemStr = "\(problem)\nWhile parsing record `\(recordStr)`."
Break (Err (ParsingFailure problemStr))
Err (ParsingIncomplete problem) ->
Break (Err (ParsingIncomplete problem))
Ok val ->
state
|> Result.map (\vals -> List.append vals val)
|> Continue
# Wrapper function to combine a set of fields into your desired `a`
#
# ## Usage example
#
# >>> record (\firstName -> \lastName -> \age -> User {firstName, lastName, age})
# >>> |> field string
# >>> |> field string
# >>> |> field nat
#
record : a -> Parser CSVRecord a
record = Parser.Core.const
field : Parser RawStr a -> Parser CSVRecord a
field = \fieldParser ->
buildPrimitiveParser \recordVal ->
when List.get recordVal 0 is
buildPrimitiveParser \fieldsList ->
when List.get fieldsList 0 is
Err OutOfBounds ->
Err (ParsingFailure "expected another CSV field but there are no more fields in this record")
Ok rawStr ->
when Parser.Str.parseRawStr fieldParser rawStr is
Ok val ->
Ok {val: val, input: (List.dropFirst recordVal)}
Ok {val: val, input: (List.dropFirst fieldsList)}
Err (ParsingFailure reason) ->
Err (ParsingFailure reason)
fieldStr = rawStr |> strFromRaw
Err (ParsingFailure "Field `\(fieldStr)` from could not be parsed. \(reason)")
Err (ParsingIncomplete reason) ->
reasonStr = strFromRaw reason
Err (ParsingFailure "The field parser was unable to read the whole field: \(reasonStr)")
fieldsStr = fieldsList |> List.map strFromRaw |> Str.joinWith ", "
Err (ParsingFailure "The field parser was unable to read the whole field: `\(reasonStr)` while parsing the first field of leftover \(fieldsStr))")
# Parser for a field containing a UTF8-encoded string
string : Parser CSVField Str
string = Parser.Str.anyString
nat : Parser CSVField Nat
nat =
string
|> map (\val ->
when Str.toNat val is
Ok num ->
Ok num
Err problem ->
Err "The field is not a valid Nat: \(val)"
)
|> flatten
# f64 : Parser CSVField F64
# f64 = string |> map Str.toF64 |> flatten
parseStrToCSV : Str -> Result CSV [ParsingFailure Str, ParsingIncomplete RawStr]
parseStrToCSV = \input ->
@ -111,7 +147,7 @@ escapedContents = many (oneOf [
textdata
])
twodquotes = string "\"\""
twodquotes = Parser.Str.string "\"\""
nonescapedCsvField : Parser RawStr CSVField
nonescapedCsvField = many textdata
@ -120,5 +156,5 @@ dquote = codepoint 34 # '"'
endOfLine = alt (ignore crlf) (ignore lf)
cr = codepoint 13 # '\r'
lf = codepoint 10 # '\n'
crlf = string "\r\n"
crlf = Parser.Str.string "\r\n"
textdata = codepointSatisfies (\x -> (x >= 32 && x <= 33) || (x >= 35 && x <= 43) || (x >= 45 && x <= 126)) # Any printable char except " (34) and , (44)

View file

@ -1,6 +1,7 @@
interface Parser.Core
exposes [
Parser,
ParseResult,
parse,
parsePartial,
fail,
@ -21,6 +22,7 @@ interface Parser.Core
sepBy1,
ignore,
buildPrimitiveParser,
flatten,
]
imports []
@ -37,9 +39,11 @@ interface Parser.Core
## How a parser is _actually_ implemented internally is not important
## and this might change between versions;
## for instance to improve efficiency or error messages on parsing failures.
Parser input a := (input -> Result {val: a, input: input} [ParsingFailure Str])
Parser input a := (input -> ParseResult input a)
buildPrimitiveParser : (input -> Result {val: a, input: input} [ParsingFailure Str]) -> Parser input a
ParseResult input a : Result {val: a, input: input} [ParsingFailure Str]
buildPrimitiveParser : (input -> ParseResult input a) -> Parser input a
buildPrimitiveParser = \fun ->
@Parser fun
@ -57,7 +61,7 @@ buildPrimitiveParser = \fun ->
##
## Of course, this is mostly useful when creating your own internal parsing building blocks.
## `run` or `Parser.Str.runStr` etc. are more useful in daily usage.
parsePartial : Parser input a, input -> Result {val: a, input: input} [ParsingFailure Str]
parsePartial : Parser input a, input -> ParseResult input a
parsePartial = \@Parser parser, input ->
(parser input)
@ -211,6 +215,22 @@ map3 = \parserA, parserB, parserC, transform ->
# ^ And this could be repeated for as high as we want, of course.
# Removes a layer of 'result' from running the parser.
#
# This allows for instance to map functions that return a result over the parser,
# where errors are turned into `ParsingFailure` s.
flatten : Parser input (Result a Str) -> Parser input a
flatten = \parser ->
buildPrimitiveParser \input ->
result = parsePartial parser input
when result is
Err problem ->
Err problem
Ok {val: (Ok val), input: inputRest} ->
Ok {val: val, input: inputRest}
Ok {val: (Err problem), input: inputRest} ->
Err (ParsingFailure problem)
## Runs a parser lazily
##
## This is (only) useful when dealing with a recursive structure.
@ -226,7 +246,7 @@ maybe : Parser input a -> Parser input (Result a [Nothing])
maybe = \parser ->
alt (parser |> map (\val -> Ok val)) (const (Err Nothing))
manyImpl : Parser input a, List a, input -> Result { input : input, val : List a } [ParsingFailure Str]
manyImpl : Parser input a, List a, input -> ParseResult input (List a)
manyImpl = \parser, vals, input ->
result = parsePartial parser input
when result is

View file

@ -9,13 +9,16 @@ interface Parser.Str
stringRaw,
codepoint,
codepointSatisfies,
anyString,
anyRawString,
anyCodepoint,
scalar,
oneOf,
digit,
digits,
strFromRaw,
]
imports [Parser.Core.{Parser, const, fail, map, map2, apply, many, oneOrMore, parse, parsePartial, buildPrimitiveParser, between}]
imports [Parser.Core.{Parser, ParseResult, const, fail, map, map2, apply, many, oneOrMore, parse, parsePartial, buildPrimitiveParser, between}]
# Specific string-based parsers:
@ -41,7 +44,7 @@ strFromCodepoint = \cp ->
strFromRaw [cp]
## Runs a parser against the start of a list of scalars, allowing the parser to consume it only partially.
parseRawStrPartial : Parser RawStr a, RawStr -> Result {val: a, input: RawStr} [ParsingFailure Str]
parseRawStrPartial : Parser RawStr a, RawStr -> ParseResult RawStr a
parseRawStrPartial = \parser, input ->
parsePartial parser input
@ -49,7 +52,7 @@ parseRawStrPartial = \parser, input ->
##
## - If the parser succeeds, returns the resulting value as well as the leftover input.
## - If the parser fails, returns `Err (ParsingFailure msg)`
parseStrPartial : Parser RawStr a, Str -> Result {val: a, input: Str} [ParsingFailure Str]
parseStrPartial : Parser RawStr a, Str -> ParseResult Str a
parseStrPartial = \parser, input ->
parser
|> parseRawStrPartial (strToRaw input)
@ -137,9 +140,30 @@ scalar = \expectedScalar ->
|> string
|> map (\_ -> expectedScalar)
betweenBraces : Parser RawStr a -> Parser RawStr a
betweenBraces = \parser ->
between parser (scalar '[') (scalar ']')
# Matches any codepoint
anyCodepoint : Parser RawStr U8
anyCodepoint = codepointSatisfies (\_ -> True)
# Matches any bytestring
# and consumes all of it.
# Does not fail.
anyRawString : Parser RawStr RawStr
anyRawString = buildPrimitiveParser \rawStringValue ->
Ok {val: rawStringValue, input: []}
# Matches any string
# as long as it is valid UTF8.
anyString : Parser RawStr Str
anyString = buildPrimitiveParser \fieldRawString ->
when Str.fromUtf8 fieldRawString is
Ok stringVal ->
Ok {val: stringVal, input: []}
Err (BadUtf8 _ _) ->
Err (ParsingFailure "Expected a string field, but its contents cannot be parsed as UTF8.")
# betweenBraces : Parser RawStr a -> Parser RawStr a
# betweenBraces = \parser ->
# between parser (scalar '[') (scalar ']')
digit : Parser RawStr U8

View file

@ -8,12 +8,20 @@ app "main"
# with hard-coded input.
main =
when Parser.CSV.parseStr userCSVParser "John,Doe,10" is
when Parser.CSV.parseStr userCSVParser "John,Doe,10\r\nRichard,Feldman,100\r\nMarten,Wijnja,28\r\n" is
Ok result ->
val = result |> Str.joinWith("\n")
"Parse success: \(val)\n"
nResults = List.len result |> Num.toStr
"Parse success!\n\n\(nResults) users were found:\n\(val)\n"
Err problem ->
"Parsing Problem"
when problem is
ParsingFailure failure ->
"Parsing failure: \(failure)\n"
ParsingIncomplete leftover ->
leftoverStr = leftover |> List.map Parser.Str.strFromRaw |> Str.joinWith ", "
"Parsing incomplete. Following still left: \(leftoverStr)\n"
SyntaxError error ->
"Parsing failure. Syntax error in the CSV: \(error)"
# main = fullTest csvParser "10,20\n\"An escaped field!\"\"\n,,,\",30\n"
# main = partialTest fieldParser "\"An escaped field with some \"\"<- double quotes\""
# main = fullTest fieldContentsParser "My very cool,\"\"\r\n string"
@ -21,10 +29,12 @@ main =
# main = partialTest manyParser "this is a very long string\"\""
userCSVParser =
Parser.CSV.record (\first -> \last -> \age -> "User: \(first) \(last) \(age)")
|> Parser.Core.apply (Parser.CSV.field (Parser.Str.string "John"))
|> Parser.Core.apply (Parser.CSV.field (Parser.Str.string "Doe"))
|> Parser.Core.apply (Parser.CSV.field (Parser.Str.string "10"))
Parser.CSV.record (\first -> \last -> \age ->
ageStr = Num.toStr age
"User: \(first) \(last) \(ageStr)")
|> Parser.Core.apply (Parser.CSV.field Parser.CSV.string)
|> Parser.Core.apply (Parser.CSV.field Parser.CSV.string)
|> Parser.Core.apply (Parser.CSV.field Parser.CSV.nat)
partialTest = \parser, input ->
when Parser.Str.parseStrPartial parser input is

View file

@ -6,13 +6,20 @@
#include <string.h>
#include <unistd.h>
//#define ROC_PLATFORM_DEBUG
void alloc_panic(size_t size);
void *roc_alloc(size_t size, unsigned int alignment) {
#ifdef ROC_PLATFORM_DEBUG
printf("Allocating %llu (alignment %ud) ", (unsigned long long)size,
alignment);
#endif
void *result = malloc(size);
#ifdef ROC_PLATFORM_DEBUG
printf("at: %p\n", result);
#endif
if (result == NULL) {
if (size ==
@ -28,11 +35,16 @@ void *roc_alloc(size_t size, unsigned int alignment) {
void *roc_realloc(void *ptr, size_t new_size, size_t old_size,
unsigned int alignment) {
#ifdef ROC_PLATFORM_DEBUG
printf("Rellocating %p (%llu -> %llu) (alignment %ud) ", ptr,
(unsigned long long)old_size, (unsigned long long)new_size, alignment);
#endif
void *result = realloc(ptr, new_size);
#ifdef ROC_PLATFORM_DEBUG
printf("at: %p\n", result);
#endif
if (result == NULL) {
if (new_size ==
@ -47,7 +59,10 @@ void *roc_realloc(void *ptr, size_t new_size, size_t old_size,
}
void roc_dealloc(void *ptr, unsigned int alignment) {
#ifdef ROC_PLATFORM_DEBUG
printf("Deallocating %p (alignment %ud)\n", ptr, alignment);
#endif
free(ptr);
}
@ -67,7 +82,9 @@ void alloc_panic(size_t size) {
}
void *roc_memcpy(void *dest, const void *src, size_t n) {
#ifdef ROC_PLATFORM_DEBUG
printf("memcpy %p -> %p (size: %llu)\n", src, dest, (unsigned long long)n);
#endif
return memcpy(dest, src, n);
}