mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-31 11:17:23 +00:00
Add parse_multipart_identifier function to parser (#860)
* Add parse_multipart_identifier function to parser * Update doc for parse_multipart_identifier * Fix conflict
This commit is contained in:
parent
482a3ad417
commit
4559d87a82
1 changed files with 167 additions and 0 deletions
167
src/parser.rs
167
src/parser.rs
|
@ -4707,6 +4707,92 @@ impl<'a> Parser<'a> {
|
|||
Ok(idents)
|
||||
}
|
||||
|
||||
/// Parse identifiers of form ident1[.identN]*
|
||||
///
|
||||
/// Similar in functionality to [parse_identifiers], with difference
|
||||
/// being this function is much more strict about parsing a valid multipart identifier, not
|
||||
/// allowing extraneous tokens to be parsed, otherwise it fails.
|
||||
///
|
||||
/// For example:
|
||||
///
|
||||
/// ```rust
|
||||
/// use sqlparser::ast::Ident;
|
||||
/// use sqlparser::dialect::GenericDialect;
|
||||
/// use sqlparser::parser::Parser;
|
||||
///
|
||||
/// let dialect = GenericDialect {};
|
||||
/// let expected = vec![Ident::new("one"), Ident::new("two")];
|
||||
///
|
||||
/// // expected usage
|
||||
/// let sql = "one.two";
|
||||
/// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap();
|
||||
/// let actual = parser.parse_multipart_identifier().unwrap();
|
||||
/// assert_eq!(&actual, &expected);
|
||||
///
|
||||
/// // parse_identifiers is more loose on what it allows, parsing successfully
|
||||
/// let sql = "one + two";
|
||||
/// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap();
|
||||
/// let actual = parser.parse_identifiers().unwrap();
|
||||
/// assert_eq!(&actual, &expected);
|
||||
///
|
||||
/// // expected to strictly fail due to + separator
|
||||
/// let sql = "one + two";
|
||||
/// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap();
|
||||
/// let actual = parser.parse_multipart_identifier().unwrap_err();
|
||||
/// assert_eq!(
|
||||
/// actual.to_string(),
|
||||
/// "sql parser error: Unexpected token in identifier: +"
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// [parse_identifiers]: Parser::parse_identifiers
|
||||
pub fn parse_multipart_identifier(&mut self) -> Result<Vec<Ident>, ParserError> {
|
||||
let mut idents = vec![];
|
||||
|
||||
// expecting at least one word for identifier
|
||||
match self.next_token().token {
|
||||
Token::Word(w) => idents.push(w.to_ident()),
|
||||
Token::EOF => {
|
||||
return Err(ParserError::ParserError(
|
||||
"Empty input when parsing identifier".to_string(),
|
||||
))?
|
||||
}
|
||||
token => {
|
||||
return Err(ParserError::ParserError(format!(
|
||||
"Unexpected token in identifier: {token}"
|
||||
)))?
|
||||
}
|
||||
};
|
||||
|
||||
// parse optional next parts if exist
|
||||
loop {
|
||||
match self.next_token().token {
|
||||
// ensure that optional period is succeeded by another identifier
|
||||
Token::Period => match self.next_token().token {
|
||||
Token::Word(w) => idents.push(w.to_ident()),
|
||||
Token::EOF => {
|
||||
return Err(ParserError::ParserError(
|
||||
"Trailing period in identifier".to_string(),
|
||||
))?
|
||||
}
|
||||
token => {
|
||||
return Err(ParserError::ParserError(format!(
|
||||
"Unexpected token following period in identifier: {token}"
|
||||
)))?
|
||||
}
|
||||
},
|
||||
Token::EOF => break,
|
||||
token => {
|
||||
return Err(ParserError::ParserError(format!(
|
||||
"Unexpected token in identifier: {token}"
|
||||
)))?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(idents)
|
||||
}
|
||||
|
||||
/// Parse a simple one-word identifier (possibly quoted, possibly a keyword)
|
||||
pub fn parse_identifier(&mut self) -> Result<Ident, ParserError> {
|
||||
let next_token = self.next_token();
|
||||
|
@ -7455,4 +7541,85 @@ mod tests {
|
|||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_multipart_identifier_positive() {
|
||||
let dialect = TestedDialects {
|
||||
dialects: vec![Box::new(GenericDialect {})],
|
||||
options: None,
|
||||
};
|
||||
|
||||
// parse multipart with quotes
|
||||
let expected = vec![
|
||||
Ident {
|
||||
value: "CATALOG".to_string(),
|
||||
quote_style: None,
|
||||
},
|
||||
Ident {
|
||||
value: "F(o)o. \"bar".to_string(),
|
||||
quote_style: Some('"'),
|
||||
},
|
||||
Ident {
|
||||
value: "table".to_string(),
|
||||
quote_style: None,
|
||||
},
|
||||
];
|
||||
dialect.run_parser_method(r#"CATALOG."F(o)o. ""bar".table"#, |parser| {
|
||||
let actual = parser.parse_multipart_identifier().unwrap();
|
||||
assert_eq!(expected, actual);
|
||||
});
|
||||
|
||||
// allow whitespace between ident parts
|
||||
let expected = vec![
|
||||
Ident {
|
||||
value: "CATALOG".to_string(),
|
||||
quote_style: None,
|
||||
},
|
||||
Ident {
|
||||
value: "table".to_string(),
|
||||
quote_style: None,
|
||||
},
|
||||
];
|
||||
dialect.run_parser_method("CATALOG . table", |parser| {
|
||||
let actual = parser.parse_multipart_identifier().unwrap();
|
||||
assert_eq!(expected, actual);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_multipart_identifier_negative() {
|
||||
macro_rules! test_parse_multipart_identifier_error {
|
||||
($input:expr, $expected_err:expr $(,)?) => {{
|
||||
all_dialects().run_parser_method(&*$input, |parser| {
|
||||
let actual_err = parser.parse_multipart_identifier().unwrap_err();
|
||||
assert_eq!(actual_err.to_string(), $expected_err);
|
||||
});
|
||||
}};
|
||||
}
|
||||
|
||||
test_parse_multipart_identifier_error!(
|
||||
"",
|
||||
"sql parser error: Empty input when parsing identifier",
|
||||
);
|
||||
|
||||
test_parse_multipart_identifier_error!(
|
||||
"*schema.table",
|
||||
"sql parser error: Unexpected token in identifier: *",
|
||||
);
|
||||
|
||||
test_parse_multipart_identifier_error!(
|
||||
"schema.table*",
|
||||
"sql parser error: Unexpected token in identifier: *",
|
||||
);
|
||||
|
||||
test_parse_multipart_identifier_error!(
|
||||
"schema.table.",
|
||||
"sql parser error: Trailing period in identifier",
|
||||
);
|
||||
|
||||
test_parse_multipart_identifier_error!(
|
||||
"schema.*",
|
||||
"sql parser error: Unexpected token following period in identifier: *",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue