mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-29 06:44:46 +00:00
160 lines
5.3 KiB
Text
160 lines
5.3 KiB
Text
interface Parser.CSV
|
|
exposes [
|
|
CSV,
|
|
CSVRecord,
|
|
CSVField, # <- Might be unneeded?
|
|
file,
|
|
record,
|
|
parseStr,
|
|
parseCSV,
|
|
field,
|
|
string,
|
|
nat,
|
|
]
|
|
imports [
|
|
Parser.Core.{Parser, parse, buildPrimitiveParser, fail, const, alt, map, map2, apply, many, oneorMore, sepBy1, between, ignore, flatten},
|
|
Parser.Str.{RawStr, parseStrPartial, oneOf, codepoint, codepointSatisfies, scalar, digits, strFromRaw}
|
|
]
|
|
|
|
## This is a CSV parser which follows RFC4180
|
|
##
|
|
## For simplicity's sake, the following things are not yet supported:
|
|
## - CSV files with headings
|
|
## - A file not ending in a final CRLF ("\r\n").
|
|
##
|
|
## The following however *is* supported
|
|
## - A simple LF ("\n") instead of CRLF ("\r\n") to separate records (and at the end).
|
|
|
|
CSVField : RawStr
|
|
CSVRecord : List CSVField
|
|
CSV : List CSVRecord
|
|
|
|
parseStr : Parser CSVRecord a, Str -> Result (List a) [ParsingFailure Str, SyntaxError Str, ParsingIncomplete CSVRecord]
|
|
parseStr = \csvParser, input ->
|
|
when parseStrToCSV input is
|
|
Err (ParsingIncomplete rest) ->
|
|
restStr = Parser.Str.strFromRaw rest
|
|
Err (SyntaxError restStr)
|
|
Err (ParsingFailure str) ->
|
|
Err (ParsingFailure str)
|
|
Ok csvData ->
|
|
when parseCSV csvParser csvData is
|
|
Err (ParsingFailure str) ->
|
|
Err (ParsingFailure str)
|
|
Err (ParsingIncomplete problem) ->
|
|
Err (ParsingIncomplete problem)
|
|
Ok vals ->
|
|
Ok vals
|
|
|
|
parseCSV : Parser CSVRecord a, CSV -> Result (List a) [ParsingFailure Str, ParsingIncomplete CSVRecord]
|
|
parseCSV = \csvParser, csvData ->
|
|
List.walkUntil csvData (Ok []) \state, recordList ->
|
|
when parse csvParser recordList (\leftover -> leftover == []) is
|
|
Err (ParsingFailure problem) ->
|
|
recordStr = recordList |> List.map strFromRaw |> Str.joinWith ", "
|
|
problemStr = "\(problem)\nWhile parsing record `\(recordStr)`."
|
|
Break (Err (ParsingFailure problemStr))
|
|
Err (ParsingIncomplete problem) ->
|
|
Break (Err (ParsingIncomplete problem))
|
|
Ok val ->
|
|
state
|
|
|> Result.map (\vals -> List.append vals val)
|
|
|> Continue
|
|
|
|
# Wrapper function to combine a set of fields into your desired `a`
|
|
#
|
|
# ## Usage example
|
|
#
|
|
# >>> record (\firstName -> \lastName -> \age -> User {firstName, lastName, age})
|
|
# >>> |> field string
|
|
# >>> |> field string
|
|
# >>> |> field nat
|
|
#
|
|
record : a -> Parser CSVRecord a
|
|
record = Parser.Core.const
|
|
|
|
field : Parser RawStr a -> Parser CSVRecord a
|
|
field = \fieldParser ->
|
|
buildPrimitiveParser \fieldsList ->
|
|
when List.get fieldsList 0 is
|
|
Err OutOfBounds ->
|
|
Err (ParsingFailure "expected another CSV field but there are no more fields in this record")
|
|
Ok rawStr ->
|
|
when Parser.Str.parseRawStr fieldParser rawStr is
|
|
Ok val ->
|
|
Ok {val: val, input: (List.dropFirst fieldsList)}
|
|
Err (ParsingFailure reason) ->
|
|
fieldStr = rawStr |> strFromRaw
|
|
Err (ParsingFailure "Field `\(fieldStr)` from could not be parsed. \(reason)")
|
|
Err (ParsingIncomplete reason) ->
|
|
reasonStr = strFromRaw reason
|
|
fieldsStr = fieldsList |> List.map strFromRaw |> Str.joinWith ", "
|
|
Err (ParsingFailure "The field parser was unable to read the whole field: `\(reasonStr)` while parsing the first field of leftover \(fieldsStr))")
|
|
|
|
# Parser for a field containing a UTF8-encoded string
|
|
string : Parser CSVField Str
|
|
string = Parser.Str.anyString
|
|
|
|
nat : Parser CSVField Nat
|
|
nat =
|
|
string
|
|
|> map (\val ->
|
|
when Str.toNat val is
|
|
Ok num ->
|
|
Ok num
|
|
Err problem ->
|
|
Err "The field is not a valid Nat: \(val)"
|
|
)
|
|
|> flatten
|
|
|
|
# f64 : Parser CSVField F64
|
|
# f64 = string |> map Str.toF64 |> flatten
|
|
|
|
parseStrToCSV : Str -> Result CSV [ParsingFailure Str, ParsingIncomplete RawStr]
|
|
parseStrToCSV = \input ->
|
|
parse file (Str.toUtf8 input) (\leftover -> leftover == [])
|
|
|
|
parseStrToCSVRecord : Str -> Result CSVRecord [ParsingFailure Str, ParsingIncomplete RawStr]
|
|
parseStrToCSVRecord = \input ->
|
|
parse csvRecord (Str.toUtf8 input) (\leftover -> leftover == [])
|
|
|
|
|
|
# The following are parsers to turn strings into CSV structures
|
|
|
|
file : Parser RawStr CSV
|
|
file = many recordNewline
|
|
|
|
# The following compiles 6x slower, but follows the RFC to the letter (allowing the final CRLF to be omitted)
|
|
# file : Parser RawStr CSV
|
|
# file = map2 (many recordNewline) (alt record recordNewline) (\records, finalRecord -> List.concat records [finalRecord])
|
|
|
|
recordNewline : Parser RawStr CSVRecord
|
|
recordNewline = map2 csvRecord endOfLine (\rec, _ -> rec)
|
|
|
|
csvRecord : Parser RawStr CSVRecord
|
|
csvRecord = sepBy1 csvField comma
|
|
|
|
csvField : Parser RawStr CSVField
|
|
csvField = alt escapedCsvField nonescapedCsvField
|
|
|
|
escapedCsvField : Parser RawStr CSVField
|
|
escapedCsvField = between escapedContents dquote dquote
|
|
escapedContents = many (oneOf [
|
|
twodquotes |> map (\_ -> 34), # An escaped double quote
|
|
comma,
|
|
cr,
|
|
lf,
|
|
textdata
|
|
])
|
|
|
|
twodquotes = Parser.Str.string "\"\""
|
|
|
|
nonescapedCsvField : Parser RawStr CSVField
|
|
nonescapedCsvField = many textdata
|
|
comma = codepoint 44 # ','
|
|
dquote = codepoint 34 # '"'
|
|
endOfLine = alt (ignore crlf) (ignore lf)
|
|
cr = codepoint 13 # '\r'
|
|
lf = codepoint 10 # '\n'
|
|
crlf = Parser.Str.string "\r\n"
|
|
textdata = codepointSatisfies (\x -> (x >= 32 && x <= 33) || (x >= 35 && x <= 43) || (x >= 45 && x <= 126)) # Any printable char except " (34) and , (44)
|