roc/examples/csv/Parser/CSV.roc
2022-07-16 13:04:14 +02:00

124 lines
4.1 KiB
Text

interface Parser.CSV
exposes [
CSV,
CSVRecord,
CSVField, # <- Might be unneeded?
file,
record,
parseStr,
parseCSV,
field,
]
imports [
Parser.Core.{Parser, parse, buildPrimitiveParser, fail, const, alt, map, map2, apply, many, oneorMore, sepBy1, between, ignore},
Parser.Str.{RawStr, parseStrPartial, oneOf, codepoint, codepointSatisfies, string, scalar, digits, strFromRaw}
]
## This is a CSV parser which follows RFC4180
##
## For simplicity's sake, the following things are not yet supported:
## - CSV files with headings
## - A file not ending in a final CRLF ("\r\n").
##
## The following however *is* supported
## - A simple LF ("\n") instead of CRLF ("\r\n") to separate records (and at the end).
CSVField : RawStr
CSVRecord : List CSVField
CSV : List CSVRecord
parseStr : Parser CSVRecord a, Str -> Result (List a) [ParsingFailure Str, SyntaxError (List U8), ParsingIncomplete CSVRecord]
parseStr = \csvParser, input ->
when parseStrToCSV input is
Err (ParsingIncomplete rest) ->
Err (SyntaxError rest)
Err (ParsingFailure str) ->
Err (ParsingFailure str)
Ok csvData ->
when parseCSV csvParser csvData is
Err (ParsingFailure str) ->
Err (ParsingFailure str)
Err (ParsingIncomplete problem) ->
Err (ParsingIncomplete problem)
Ok vals ->
Ok vals
parseCSV : Parser CSVRecord a, CSV -> Result (List a) [ParsingFailure Str, ParsingIncomplete CSVRecord]
parseCSV = \csvParser, csvData ->
List.walkUntil csvData (Ok []) \state, recordList ->
when parse csvParser recordList (\leftover -> leftover == []) is
Err problem ->
Break (Err problem)
Ok val ->
state
|> Result.map (\vals -> List.append vals val)
|> Continue
record : a -> Parser CSVRecord a
record = Parser.Core.const
field : Parser RawStr a -> Parser CSVRecord a
field = \fieldParser ->
buildPrimitiveParser \recordVal ->
when List.get recordVal 0 is
Err OutOfBounds ->
Err (ParsingFailure "expected another CSV field but there are no more fields in this record")
Ok rawStr ->
when Parser.Str.parseRawStr fieldParser rawStr is
Ok val ->
Ok {val: val, input: (List.dropFirst recordVal)}
Err (ParsingFailure reason) ->
Err (ParsingFailure reason)
Err (ParsingIncomplete reason) ->
reasonStr = strFromRaw reason
Err (ParsingFailure "The field parser was unable to read the whole field: \(reasonStr)")
parseStrToCSV : Str -> Result CSV [ParsingFailure Str, ParsingIncomplete RawStr]
parseStrToCSV = \input ->
parse file (Str.toUtf8 input) (\leftover -> leftover == [])
parseStrToCSVRecord : Str -> Result CSVRecord [ParsingFailure Str, ParsingIncomplete RawStr]
parseStrToCSVRecord = \input ->
parse csvRecord (Str.toUtf8 input) (\leftover -> leftover == [])
# The following are parsers to turn strings into CSV structures
file : Parser RawStr CSV
file = many recordNewline
# The following compiles 6x slower, but follows the RFC to the letter (allowing the final CRLF to be omitted)
# file : Parser RawStr CSV
# file = map2 (many recordNewline) (alt record recordNewline) (\records, finalRecord -> List.concat records [finalRecord])
recordNewline : Parser RawStr CSVRecord
recordNewline = map2 csvRecord endOfLine (\rec, _ -> rec)
csvRecord : Parser RawStr CSVRecord
csvRecord = sepBy1 csvField comma
csvField : Parser RawStr CSVField
csvField = alt escapedCsvField nonescapedCsvField
escapedCsvField : Parser RawStr CSVField
escapedCsvField = between escapedContents dquote dquote
escapedContents = many (oneOf [
twodquotes |> map (\_ -> 34), # An escaped double quote
comma,
cr,
lf,
textdata
])
twodquotes = string "\"\""
nonescapedCsvField : Parser RawStr CSVField
nonescapedCsvField = many textdata
comma = codepoint 44 # ','
dquote = codepoint 34 # '"'
endOfLine = alt (ignore crlf) (ignore lf)
cr = codepoint 13 # '\r'
lf = codepoint 10 # '\n'
crlf = string "\r\n"
textdata = codepointSatisfies (\x -> (x >= 32 && x <= 33) || (x >= 35 && x <= 43) || (x >= 45 && x <= 126)) # Any printable char except " (34) and , (44)