BREAKING(std/encoding/csv): improve the definition of ParseOptions (#7714)

This commit is contained in:
uki00a 2020-09-28 03:20:46 +09:00 committed by GitHub
parent 5db72dcaf3
commit 94dcef714d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 101 additions and 72 deletions

View file

@ -52,7 +52,7 @@ export class ParseError extends Error {
}
/**
* @property comma - Character which separates values. Default: ','
* @property separator - Character which separates values. Default: ','
* @property comment - Character to start a comment. Default: '#'
* @property trimLeadingSpace - Flag to trim the leading space of the value.
* Default: 'false'
@ -62,7 +62,7 @@ export class ParseError extends Error {
* If == 0, first row is used as referral for the number of fields.
*/
export interface ReadOptions {
comma?: string;
separator?: string;
comment?: string;
trimLeadingSpace?: boolean;
lazyQuotes?: boolean;
@ -70,16 +70,16 @@ export interface ReadOptions {
}
function chkOptions(opt: ReadOptions): void {
if (!opt.comma) {
opt.comma = ",";
if (!opt.separator) {
opt.separator = ",";
}
if (!opt.trimLeadingSpace) {
opt.trimLeadingSpace = false;
}
if (
INVALID_RUNE.includes(opt.comma) ||
INVALID_RUNE.includes(opt.separator) ||
(typeof opt.comment === "string" && INVALID_RUNE.includes(opt.comment)) ||
opt.comma === opt.comment
opt.separator === opt.comment
) {
throw new Error(ERR_INVALID_DELIM);
}
@ -88,7 +88,7 @@ function chkOptions(opt: ReadOptions): void {
async function readRecord(
startLine: number,
reader: BufReader,
opt: ReadOptions = { comma: ",", trimLeadingSpace: false },
opt: ReadOptions = { separator: ",", trimLeadingSpace: false },
): Promise<string[] | null> {
const tp = new TextProtoReader(reader);
let line = await readLine(tp);
@ -103,13 +103,13 @@ async function readRecord(
return [];
}
assert(opt.comma != null);
assert(opt.separator != null);
let fullLine = line;
let quoteError: ParseError | null = null;
const quote = '"';
const quoteLen = quote.length;
const commaLen = opt.comma.length;
const separatorLen = opt.separator.length;
let recordBuffer = "";
const fieldIndexes = [] as number[];
parseField:
@ -120,7 +120,7 @@ async function readRecord(
if (line.length === 0 || !line.startsWith(quote)) {
// Non-quoted string field
const i = line.indexOf(opt.comma);
const i = line.indexOf(opt.separator);
let field = line;
if (i >= 0) {
field = field.substring(0, i);
@ -144,7 +144,7 @@ async function readRecord(
recordBuffer += field;
fieldIndexes.push(recordBuffer.length);
if (i >= 0) {
line = line.substring(i + commaLen);
line = line.substring(i + separatorLen);
continue parseField;
}
break parseField;
@ -161,9 +161,9 @@ async function readRecord(
// `""` sequence (append quote).
recordBuffer += quote;
line = line.substring(quoteLen);
} else if (line.startsWith(opt.comma)) {
} else if (line.startsWith(opt.separator)) {
// `","` sequence (end of field).
line = line.substring(commaLen);
line = line.substring(separatorLen);
fieldIndexes.push(recordBuffer.length);
continue parseField;
} else if (0 === line.length) {
@ -281,7 +281,7 @@ async function readLine(tp: TextProtoReader): Promise<string | null> {
export async function readMatrix(
reader: BufReader,
opt: ReadOptions = {
comma: ",",
separator: ",",
trimLeadingSpace: false,
lazyQuotes: false,
},
@ -324,13 +324,13 @@ export async function readMatrix(
/**
* Parse the CSV string/buffer with the options provided.
*
* HeaderOptions provides the column definition
* ColumnOptions provides the column definition
* and the parse function for each entry of the
* column.
*/
export interface HeaderOptions {
export interface ColumnOptions {
/**
* Name of the header to be used as property
* Name of the column to be used as property
*/
name: string;
/**
@ -343,14 +343,20 @@ export interface HeaderOptions {
export interface ParseOptions extends ReadOptions {
/**
* If a boolean is provided, the first line will be used as Header definitions.
* If `string[]` or `HeaderOptions[]` those names will be used for header definition.
* If you provide `skipFirstRow: true` and `columns`, the first line will be skipped.
* If you provide `skipFirstRow: true` but not `columns`, the first line will be skipped and used as header definitions.
*/
header: boolean | string[] | HeaderOptions[];
skipFirstRow?: boolean;
/**
* If you provide `string[]` or `ColumnOptions[]`, those names will be used for header definition.
*/
columns?: string[] | ColumnOptions[];
/** Parse function for rows.
* Example:
* const r = await parseFile('a,b,c\ne,f,g\n', {
* header: ["this", "is", "sparta"],
* columns: ["this", "is", "sparta"],
* parse: (e: Record<string, unknown>) => {
* return { super: e.this, street: e.is, fighter: e.sparta };
* }
@ -370,14 +376,14 @@ export interface ParseOptions extends ReadOptions {
* for columns and rows.
* @param input Input to parse. Can be a string or BufReader.
* @param opt options of the parser.
* @returns If you don't provide both `opt.header` and `opt.parse`, it returns `string[][]`.
* If you provide `opt.header` but not `opt.parse`, it returns `object[]`.
* @returns If you don't provide `opt.skipFirstRow`, `opt.parse`, and `opt.columns`, it returns `string[][]`.
* If you provide `opt.skipFirstRow` or `opt.columns` but not `opt.parse`, it returns `object[]`.
* If you provide `opt.parse`, it returns an array where each element is the value returned from `opt.parse`.
*/
export async function parse(
input: string | BufReader,
opt: ParseOptions = {
header: false,
skipFirstRow: false,
},
): Promise<unknown[]> {
let r: string[][];
@ -386,27 +392,15 @@ export async function parse(
} else {
r = await readMatrix(new BufReader(new StringReader(input)), opt);
}
if (opt.header) {
let headers: HeaderOptions[] = [];
if (opt.skipFirstRow || opt.columns) {
let headers: ColumnOptions[] = [];
let i = 0;
if (Array.isArray(opt.header)) {
if (typeof opt.header[0] !== "string") {
headers = opt.header as HeaderOptions[];
} else {
const h = opt.header as string[];
headers = h.map(
(e): HeaderOptions => {
return {
name: e,
};
},
);
}
} else {
if (opt.skipFirstRow) {
const head = r.shift();
assert(head != null);
headers = head.map(
(e): HeaderOptions => {
(e): ColumnOptions => {
return {
name: e,
};
@ -414,6 +408,21 @@ export async function parse(
);
i++;
}
if (opt.columns) {
if (typeof opt.columns[0] !== "string") {
headers = opt.columns as ColumnOptions[];
} else {
const h = opt.columns as string[];
headers = h.map(
(e): ColumnOptions => {
return {
name: e,
};
},
);
}
}
return r.map((e): unknown => {
if (e.length !== headers.length) {
throw `Error number of fields line:${i}`;