roc/examples/csv/Parser/CSV.roc
2022-07-16 21:49:18 +02:00

160 lines
5.3 KiB
Text

interface Parser.CSV
exposes [
CSV,
CSVRecord,
CSVField, # <- Might be unneeded?
file,
record,
parseStr,
parseCSV,
field,
string,
nat,
]
imports [
Parser.Core.{Parser, parse, buildPrimitiveParser, fail, const, alt, map, map2, apply, many, oneorMore, sepBy1, between, ignore, flatten},
Parser.Str.{RawStr, parseStrPartial, oneOf, codepoint, codepointSatisfies, scalar, digits, strFromRaw}
]
## This is a CSV parser which follows RFC4180
##
## For simplicity's sake, the following things are not yet supported:
## - CSV files with headings
## - A file not ending in a final CRLF ("\r\n").
##
## The following however *is* supported
## - A simple LF ("\n") instead of CRLF ("\r\n") to separate records (and at the end).
CSVField : RawStr
CSVRecord : List CSVField
CSV : List CSVRecord
parseStr : Parser CSVRecord a, Str -> Result (List a) [ParsingFailure Str, SyntaxError Str, ParsingIncomplete CSVRecord]
parseStr = \csvParser, input ->
when parseStrToCSV input is
Err (ParsingIncomplete rest) ->
restStr = Parser.Str.strFromRaw rest
Err (SyntaxError restStr)
Err (ParsingFailure str) ->
Err (ParsingFailure str)
Ok csvData ->
when parseCSV csvParser csvData is
Err (ParsingFailure str) ->
Err (ParsingFailure str)
Err (ParsingIncomplete problem) ->
Err (ParsingIncomplete problem)
Ok vals ->
Ok vals
parseCSV : Parser CSVRecord a, CSV -> Result (List a) [ParsingFailure Str, ParsingIncomplete CSVRecord]
parseCSV = \csvParser, csvData ->
List.walkUntil csvData (Ok []) \state, recordList ->
when parse csvParser recordList (\leftover -> leftover == []) is
Err (ParsingFailure problem) ->
recordStr = recordList |> List.map strFromRaw |> Str.joinWith ", "
problemStr = "\(problem)\nWhile parsing record `\(recordStr)`."
Break (Err (ParsingFailure problemStr))
Err (ParsingIncomplete problem) ->
Break (Err (ParsingIncomplete problem))
Ok val ->
state
|> Result.map (\vals -> List.append vals val)
|> Continue
# Wrapper function to combine a set of fields into your desired `a`
#
# ## Usage example
#
# >>> record (\firstName -> \lastName -> \age -> User {firstName, lastName, age})
# >>> |> field string
# >>> |> field string
# >>> |> field nat
#
record : a -> Parser CSVRecord a
record = Parser.Core.const
field : Parser RawStr a -> Parser CSVRecord a
field = \fieldParser ->
buildPrimitiveParser \fieldsList ->
when List.get fieldsList 0 is
Err OutOfBounds ->
Err (ParsingFailure "expected another CSV field but there are no more fields in this record")
Ok rawStr ->
when Parser.Str.parseRawStr fieldParser rawStr is
Ok val ->
Ok {val: val, input: (List.dropFirst fieldsList)}
Err (ParsingFailure reason) ->
fieldStr = rawStr |> strFromRaw
Err (ParsingFailure "Field `\(fieldStr)` from could not be parsed. \(reason)")
Err (ParsingIncomplete reason) ->
reasonStr = strFromRaw reason
fieldsStr = fieldsList |> List.map strFromRaw |> Str.joinWith ", "
Err (ParsingFailure "The field parser was unable to read the whole field: `\(reasonStr)` while parsing the first field of leftover \(fieldsStr))")
# Parser for a field containing a UTF8-encoded string
string : Parser CSVField Str
string = Parser.Str.anyString
nat : Parser CSVField Nat
nat =
string
|> map (\val ->
when Str.toNat val is
Ok num ->
Ok num
Err problem ->
Err "The field is not a valid Nat: \(val)"
)
|> flatten
# f64 : Parser CSVField F64
# f64 = string |> map Str.toF64 |> flatten
parseStrToCSV : Str -> Result CSV [ParsingFailure Str, ParsingIncomplete RawStr]
parseStrToCSV = \input ->
parse file (Str.toUtf8 input) (\leftover -> leftover == [])
parseStrToCSVRecord : Str -> Result CSVRecord [ParsingFailure Str, ParsingIncomplete RawStr]
parseStrToCSVRecord = \input ->
parse csvRecord (Str.toUtf8 input) (\leftover -> leftover == [])
# The following are parsers to turn strings into CSV structures
file : Parser RawStr CSV
file = many recordNewline
# The following compiles 6x slower, but follows the RFC to the letter (allowing the final CRLF to be omitted)
# file : Parser RawStr CSV
# file = map2 (many recordNewline) (alt record recordNewline) (\records, finalRecord -> List.concat records [finalRecord])
recordNewline : Parser RawStr CSVRecord
recordNewline = map2 csvRecord endOfLine (\rec, _ -> rec)
csvRecord : Parser RawStr CSVRecord
csvRecord = sepBy1 csvField comma
csvField : Parser RawStr CSVField
csvField = alt escapedCsvField nonescapedCsvField
escapedCsvField : Parser RawStr CSVField
escapedCsvField = between escapedContents dquote dquote
escapedContents = many (oneOf [
twodquotes |> map (\_ -> 34), # An escaped double quote
comma,
cr,
lf,
textdata
])
twodquotes = Parser.Str.string "\"\""
nonescapedCsvField : Parser RawStr CSVField
nonescapedCsvField = many textdata
comma = codepoint 44 # ','
dquote = codepoint 34 # '"'
endOfLine = alt (ignore crlf) (ignore lf)
cr = codepoint 13 # '\r'
lf = codepoint 10 # '\n'
crlf = Parser.Str.string "\r\n"
textdata = codepointSatisfies (\x -> (x >= 32 && x <= 33) || (x >= 35 && x <= 43) || (x >= 45 && x <= 126)) # Any printable char except " (34) and , (44)