mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-30 23:31:12 +00:00
Better parse errors on CSV parsing fail.
This commit is contained in:
parent
b45ce4908d
commit
35ff9642aa
5 changed files with 137 additions and 30 deletions
|
@ -8,10 +8,12 @@ interface Parser.CSV
|
|||
parseStr,
|
||||
parseCSV,
|
||||
field,
|
||||
string,
|
||||
nat,
|
||||
]
|
||||
imports [
|
||||
Parser.Core.{Parser, parse, buildPrimitiveParser, fail, const, alt, map, map2, apply, many, oneorMore, sepBy1, between, ignore},
|
||||
Parser.Str.{RawStr, parseStrPartial, oneOf, codepoint, codepointSatisfies, string, scalar, digits, strFromRaw}
|
||||
Parser.Core.{Parser, parse, buildPrimitiveParser, fail, const, alt, map, map2, apply, many, oneorMore, sepBy1, between, ignore, flatten},
|
||||
Parser.Str.{RawStr, parseStrPartial, oneOf, codepoint, codepointSatisfies, scalar, digits, strFromRaw}
|
||||
]
|
||||
|
||||
## This is a CSV parser which follows RFC4180
|
||||
|
@ -27,11 +29,12 @@ CSVField : RawStr
|
|||
CSVRecord : List CSVField
|
||||
CSV : List CSVRecord
|
||||
|
||||
parseStr : Parser CSVRecord a, Str -> Result (List a) [ParsingFailure Str, SyntaxError (List U8), ParsingIncomplete CSVRecord]
|
||||
parseStr : Parser CSVRecord a, Str -> Result (List a) [ParsingFailure Str, SyntaxError Str, ParsingIncomplete CSVRecord]
|
||||
parseStr = \csvParser, input ->
|
||||
when parseStrToCSV input is
|
||||
Err (ParsingIncomplete rest) ->
|
||||
Err (SyntaxError rest)
|
||||
restStr = Parser.Str.strFromRaw rest
|
||||
Err (SyntaxError restStr)
|
||||
Err (ParsingFailure str) ->
|
||||
Err (ParsingFailure str)
|
||||
Ok csvData ->
|
||||
|
@ -47,32 +50,65 @@ parseCSV : Parser CSVRecord a, CSV -> Result (List a) [ParsingFailure Str, Parsi
|
|||
parseCSV = \csvParser, csvData ->
|
||||
List.walkUntil csvData (Ok []) \state, recordList ->
|
||||
when parse csvParser recordList (\leftover -> leftover == []) is
|
||||
Err problem ->
|
||||
Break (Err problem)
|
||||
Err (ParsingFailure problem) ->
|
||||
recordStr = recordList |> List.map strFromRaw |> Str.joinWith ", "
|
||||
problemStr = "\(problem)\nWhile parsing record `\(recordStr)`."
|
||||
Break (Err (ParsingFailure problemStr))
|
||||
Err (ParsingIncomplete problem) ->
|
||||
Break (Err (ParsingIncomplete problem))
|
||||
Ok val ->
|
||||
state
|
||||
|> Result.map (\vals -> List.append vals val)
|
||||
|> Continue
|
||||
|
||||
# Wrapper function to combine a set of fields into your desired `a`
|
||||
#
|
||||
# ## Usage example
|
||||
#
|
||||
# >>> record (\firstName -> \lastName -> \age -> User {firstName, lastName, age})
|
||||
# >>> |> field string
|
||||
# >>> |> field string
|
||||
# >>> |> field nat
|
||||
#
|
||||
record : a -> Parser CSVRecord a
|
||||
record = Parser.Core.const
|
||||
|
||||
field : Parser RawStr a -> Parser CSVRecord a
|
||||
field = \fieldParser ->
|
||||
buildPrimitiveParser \recordVal ->
|
||||
when List.get recordVal 0 is
|
||||
buildPrimitiveParser \fieldsList ->
|
||||
when List.get fieldsList 0 is
|
||||
Err OutOfBounds ->
|
||||
Err (ParsingFailure "expected another CSV field but there are no more fields in this record")
|
||||
Ok rawStr ->
|
||||
when Parser.Str.parseRawStr fieldParser rawStr is
|
||||
Ok val ->
|
||||
Ok {val: val, input: (List.dropFirst recordVal)}
|
||||
Ok {val: val, input: (List.dropFirst fieldsList)}
|
||||
Err (ParsingFailure reason) ->
|
||||
Err (ParsingFailure reason)
|
||||
fieldStr = rawStr |> strFromRaw
|
||||
Err (ParsingFailure "Field `\(fieldStr)` from could not be parsed. \(reason)")
|
||||
Err (ParsingIncomplete reason) ->
|
||||
reasonStr = strFromRaw reason
|
||||
Err (ParsingFailure "The field parser was unable to read the whole field: \(reasonStr)")
|
||||
fieldsStr = fieldsList |> List.map strFromRaw |> Str.joinWith ", "
|
||||
Err (ParsingFailure "The field parser was unable to read the whole field: `\(reasonStr)` while parsing the first field of leftover \(fieldsStr))")
|
||||
|
||||
# Parser for a field containing a UTF8-encoded string
|
||||
string : Parser CSVField Str
|
||||
string = Parser.Str.anyString
|
||||
|
||||
nat : Parser CSVField Nat
|
||||
nat =
|
||||
string
|
||||
|> map (\val ->
|
||||
when Str.toNat val is
|
||||
Ok num ->
|
||||
Ok num
|
||||
Err problem ->
|
||||
Err "The field is not a valid Nat: \(val)"
|
||||
)
|
||||
|> flatten
|
||||
|
||||
# f64 : Parser CSVField F64
|
||||
# f64 = string |> map Str.toF64 |> flatten
|
||||
|
||||
parseStrToCSV : Str -> Result CSV [ParsingFailure Str, ParsingIncomplete RawStr]
|
||||
parseStrToCSV = \input ->
|
||||
|
@ -111,7 +147,7 @@ escapedContents = many (oneOf [
|
|||
textdata
|
||||
])
|
||||
|
||||
twodquotes = string "\"\""
|
||||
twodquotes = Parser.Str.string "\"\""
|
||||
|
||||
nonescapedCsvField : Parser RawStr CSVField
|
||||
nonescapedCsvField = many textdata
|
||||
|
@ -120,5 +156,5 @@ dquote = codepoint 34 # '"'
|
|||
endOfLine = alt (ignore crlf) (ignore lf)
|
||||
cr = codepoint 13 # '\r'
|
||||
lf = codepoint 10 # '\n'
|
||||
crlf = string "\r\n"
|
||||
crlf = Parser.Str.string "\r\n"
|
||||
textdata = codepointSatisfies (\x -> (x >= 32 && x <= 33) || (x >= 35 && x <= 43) || (x >= 45 && x <= 126)) # Any printable char except " (34) and , (44)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
interface Parser.Core
|
||||
exposes [
|
||||
Parser,
|
||||
ParseResult,
|
||||
parse,
|
||||
parsePartial,
|
||||
fail,
|
||||
|
@ -21,6 +22,7 @@ interface Parser.Core
|
|||
sepBy1,
|
||||
ignore,
|
||||
buildPrimitiveParser,
|
||||
flatten,
|
||||
]
|
||||
imports []
|
||||
|
||||
|
@ -37,9 +39,11 @@ interface Parser.Core
|
|||
## How a parser is _actually_ implemented internally is not important
|
||||
## and this might change between versions;
|
||||
## for instance to improve efficiency or error messages on parsing failures.
|
||||
Parser input a := (input -> Result {val: a, input: input} [ParsingFailure Str])
|
||||
Parser input a := (input -> ParseResult input a)
|
||||
|
||||
buildPrimitiveParser : (input -> Result {val: a, input: input} [ParsingFailure Str]) -> Parser input a
|
||||
ParseResult input a : Result {val: a, input: input} [ParsingFailure Str]
|
||||
|
||||
buildPrimitiveParser : (input -> ParseResult input a) -> Parser input a
|
||||
buildPrimitiveParser = \fun ->
|
||||
@Parser fun
|
||||
|
||||
|
@ -57,7 +61,7 @@ buildPrimitiveParser = \fun ->
|
|||
##
|
||||
## Of course, this is mostly useful when creating your own internal parsing building blocks.
|
||||
## `run` or `Parser.Str.runStr` etc. are more useful in daily usage.
|
||||
parsePartial : Parser input a, input -> Result {val: a, input: input} [ParsingFailure Str]
|
||||
parsePartial : Parser input a, input -> ParseResult input a
|
||||
parsePartial = \@Parser parser, input ->
|
||||
(parser input)
|
||||
|
||||
|
@ -211,6 +215,22 @@ map3 = \parserA, parserB, parserC, transform ->
|
|||
|
||||
# ^ And this could be repeated for as high as we want, of course.
|
||||
|
||||
# Removes a layer of 'result' from running the parser.
|
||||
#
|
||||
# This allows for instance to map functions that return a result over the parser,
|
||||
# where errors are turned into `ParsingFailure` s.
|
||||
flatten : Parser input (Result a Str) -> Parser input a
|
||||
flatten = \parser ->
|
||||
buildPrimitiveParser \input ->
|
||||
result = parsePartial parser input
|
||||
when result is
|
||||
Err problem ->
|
||||
Err problem
|
||||
Ok {val: (Ok val), input: inputRest} ->
|
||||
Ok {val: val, input: inputRest}
|
||||
Ok {val: (Err problem), input: inputRest} ->
|
||||
Err (ParsingFailure problem)
|
||||
|
||||
## Runs a parser lazily
|
||||
##
|
||||
## This is (only) useful when dealing with a recursive structure.
|
||||
|
@ -226,7 +246,7 @@ maybe : Parser input a -> Parser input (Result a [Nothing])
|
|||
maybe = \parser ->
|
||||
alt (parser |> map (\val -> Ok val)) (const (Err Nothing))
|
||||
|
||||
manyImpl : Parser input a, List a, input -> Result { input : input, val : List a } [ParsingFailure Str]
|
||||
manyImpl : Parser input a, List a, input -> ParseResult input (List a)
|
||||
manyImpl = \parser, vals, input ->
|
||||
result = parsePartial parser input
|
||||
when result is
|
||||
|
|
|
@ -9,13 +9,16 @@ interface Parser.Str
|
|||
stringRaw,
|
||||
codepoint,
|
||||
codepointSatisfies,
|
||||
anyString,
|
||||
anyRawString,
|
||||
anyCodepoint,
|
||||
scalar,
|
||||
oneOf,
|
||||
digit,
|
||||
digits,
|
||||
strFromRaw,
|
||||
]
|
||||
imports [Parser.Core.{Parser, const, fail, map, map2, apply, many, oneOrMore, parse, parsePartial, buildPrimitiveParser, between}]
|
||||
imports [Parser.Core.{Parser, ParseResult, const, fail, map, map2, apply, many, oneOrMore, parse, parsePartial, buildPrimitiveParser, between}]
|
||||
|
||||
# Specific string-based parsers:
|
||||
|
||||
|
@ -41,7 +44,7 @@ strFromCodepoint = \cp ->
|
|||
strFromRaw [cp]
|
||||
|
||||
## Runs a parser against the start of a list of scalars, allowing the parser to consume it only partially.
|
||||
parseRawStrPartial : Parser RawStr a, RawStr -> Result {val: a, input: RawStr} [ParsingFailure Str]
|
||||
parseRawStrPartial : Parser RawStr a, RawStr -> ParseResult RawStr a
|
||||
parseRawStrPartial = \parser, input ->
|
||||
parsePartial parser input
|
||||
|
||||
|
@ -49,7 +52,7 @@ parseRawStrPartial = \parser, input ->
|
|||
##
|
||||
## - If the parser succeeds, returns the resulting value as well as the leftover input.
|
||||
## - If the parser fails, returns `Err (ParsingFailure msg)`
|
||||
parseStrPartial : Parser RawStr a, Str -> Result {val: a, input: Str} [ParsingFailure Str]
|
||||
parseStrPartial : Parser RawStr a, Str -> ParseResult Str a
|
||||
parseStrPartial = \parser, input ->
|
||||
parser
|
||||
|> parseRawStrPartial (strToRaw input)
|
||||
|
@ -137,9 +140,30 @@ scalar = \expectedScalar ->
|
|||
|> string
|
||||
|> map (\_ -> expectedScalar)
|
||||
|
||||
betweenBraces : Parser RawStr a -> Parser RawStr a
|
||||
betweenBraces = \parser ->
|
||||
between parser (scalar '[') (scalar ']')
|
||||
# Matches any codepoint
|
||||
anyCodepoint : Parser RawStr U8
|
||||
anyCodepoint = codepointSatisfies (\_ -> True)
|
||||
|
||||
# Matches any bytestring
|
||||
# and consumes all of it.
|
||||
# Does not fail.
|
||||
anyRawString : Parser RawStr RawStr
|
||||
anyRawString = buildPrimitiveParser \rawStringValue ->
|
||||
Ok {val: rawStringValue, input: []}
|
||||
|
||||
# Matches any string
|
||||
# as long as it is valid UTF8.
|
||||
anyString : Parser RawStr Str
|
||||
anyString = buildPrimitiveParser \fieldRawString ->
|
||||
when Str.fromUtf8 fieldRawString is
|
||||
Ok stringVal ->
|
||||
Ok {val: stringVal, input: []}
|
||||
Err (BadUtf8 _ _) ->
|
||||
Err (ParsingFailure "Expected a string field, but its contents cannot be parsed as UTF8.")
|
||||
|
||||
# betweenBraces : Parser RawStr a -> Parser RawStr a
|
||||
# betweenBraces = \parser ->
|
||||
# between parser (scalar '[') (scalar ']')
|
||||
|
||||
|
||||
digit : Parser RawStr U8
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue