mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-10-13 23:42:05 +00:00
Improve parsing speed by avoiding some clones in parse_identifier (#1624)
This commit is contained in:
parent
539db9fb1a
commit
3db1b4430f
1 changed files with 36 additions and 24 deletions
|
@ -970,7 +970,7 @@ impl<'a> Parser<'a> {
|
||||||
t @ (Token::Word(_) | Token::SingleQuotedString(_)) => {
|
t @ (Token::Word(_) | Token::SingleQuotedString(_)) => {
|
||||||
if self.peek_token().token == Token::Period {
|
if self.peek_token().token == Token::Period {
|
||||||
let mut id_parts: Vec<Ident> = vec![match t {
|
let mut id_parts: Vec<Ident> = vec![match t {
|
||||||
Token::Word(w) => w.to_ident(next_token.span),
|
Token::Word(w) => w.into_ident(next_token.span),
|
||||||
Token::SingleQuotedString(s) => Ident::with_quote('\'', s),
|
Token::SingleQuotedString(s) => Ident::with_quote('\'', s),
|
||||||
_ => unreachable!(), // We matched above
|
_ => unreachable!(), // We matched above
|
||||||
}];
|
}];
|
||||||
|
@ -978,7 +978,7 @@ impl<'a> Parser<'a> {
|
||||||
while self.consume_token(&Token::Period) {
|
while self.consume_token(&Token::Period) {
|
||||||
let next_token = self.next_token();
|
let next_token = self.next_token();
|
||||||
match next_token.token {
|
match next_token.token {
|
||||||
Token::Word(w) => id_parts.push(w.to_ident(next_token.span)),
|
Token::Word(w) => id_parts.push(w.into_ident(next_token.span)),
|
||||||
Token::SingleQuotedString(s) => {
|
Token::SingleQuotedString(s) => {
|
||||||
// SQLite has single-quoted identifiers
|
// SQLite has single-quoted identifiers
|
||||||
id_parts.push(Ident::with_quote('\'', s))
|
id_parts.push(Ident::with_quote('\'', s))
|
||||||
|
@ -1108,7 +1108,7 @@ impl<'a> Parser<'a> {
|
||||||
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
|
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
|
||||||
{
|
{
|
||||||
Ok(Some(Expr::Function(Function {
|
Ok(Some(Expr::Function(Function {
|
||||||
name: ObjectName(vec![w.to_ident(w_span)]),
|
name: ObjectName(vec![w.clone().into_ident(w_span)]),
|
||||||
uses_odbc_syntax: false,
|
uses_odbc_syntax: false,
|
||||||
parameters: FunctionArguments::None,
|
parameters: FunctionArguments::None,
|
||||||
args: FunctionArguments::None,
|
args: FunctionArguments::None,
|
||||||
|
@ -1123,7 +1123,7 @@ impl<'a> Parser<'a> {
|
||||||
| Keyword::CURRENT_DATE
|
| Keyword::CURRENT_DATE
|
||||||
| Keyword::LOCALTIME
|
| Keyword::LOCALTIME
|
||||||
| Keyword::LOCALTIMESTAMP => {
|
| Keyword::LOCALTIMESTAMP => {
|
||||||
Ok(Some(self.parse_time_functions(ObjectName(vec![w.to_ident(w_span)]))?))
|
Ok(Some(self.parse_time_functions(ObjectName(vec![w.clone().into_ident(w_span)]))?))
|
||||||
}
|
}
|
||||||
Keyword::CASE => Ok(Some(self.parse_case_expr()?)),
|
Keyword::CASE => Ok(Some(self.parse_case_expr()?)),
|
||||||
Keyword::CONVERT => Ok(Some(self.parse_convert_expr(false)?)),
|
Keyword::CONVERT => Ok(Some(self.parse_convert_expr(false)?)),
|
||||||
|
@ -1148,7 +1148,7 @@ impl<'a> Parser<'a> {
|
||||||
Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
|
Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
|
||||||
Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
|
Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
|
||||||
Keyword::POSITION if self.peek_token_ref().token == Token::LParen => {
|
Keyword::POSITION if self.peek_token_ref().token == Token::LParen => {
|
||||||
Ok(Some(self.parse_position_expr(w.to_ident(w_span))?))
|
Ok(Some(self.parse_position_expr(w.clone().into_ident(w_span))?))
|
||||||
}
|
}
|
||||||
Keyword::SUBSTRING => Ok(Some(self.parse_substring_expr()?)),
|
Keyword::SUBSTRING => Ok(Some(self.parse_substring_expr()?)),
|
||||||
Keyword::OVERLAY => Ok(Some(self.parse_overlay_expr()?)),
|
Keyword::OVERLAY => Ok(Some(self.parse_overlay_expr()?)),
|
||||||
|
@ -1167,7 +1167,7 @@ impl<'a> Parser<'a> {
|
||||||
let query = self.parse_query()?;
|
let query = self.parse_query()?;
|
||||||
self.expect_token(&Token::RParen)?;
|
self.expect_token(&Token::RParen)?;
|
||||||
Ok(Some(Expr::Function(Function {
|
Ok(Some(Expr::Function(Function {
|
||||||
name: ObjectName(vec![w.to_ident(w_span)]),
|
name: ObjectName(vec![w.clone().into_ident(w_span)]),
|
||||||
uses_odbc_syntax: false,
|
uses_odbc_syntax: false,
|
||||||
parameters: FunctionArguments::None,
|
parameters: FunctionArguments::None,
|
||||||
args: FunctionArguments::Subquery(query),
|
args: FunctionArguments::Subquery(query),
|
||||||
|
@ -1203,11 +1203,12 @@ impl<'a> Parser<'a> {
|
||||||
w_span: Span,
|
w_span: Span,
|
||||||
) -> Result<Expr, ParserError> {
|
) -> Result<Expr, ParserError> {
|
||||||
match self.peek_token().token {
|
match self.peek_token().token {
|
||||||
Token::Period => {
|
Token::Period => self.parse_compound_field_access(
|
||||||
self.parse_compound_field_access(Expr::Identifier(w.to_ident(w_span)), vec![])
|
Expr::Identifier(w.clone().into_ident(w_span)),
|
||||||
}
|
vec![],
|
||||||
|
),
|
||||||
Token::LParen => {
|
Token::LParen => {
|
||||||
let id_parts = vec![w.to_ident(w_span)];
|
let id_parts = vec![w.clone().into_ident(w_span)];
|
||||||
if let Some(expr) = self.parse_outer_join_expr(&id_parts) {
|
if let Some(expr) = self.parse_outer_join_expr(&id_parts) {
|
||||||
Ok(expr)
|
Ok(expr)
|
||||||
} else {
|
} else {
|
||||||
|
@ -1220,7 +1221,7 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
Token::LBracket if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) =>
|
Token::LBracket if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) =>
|
||||||
{
|
{
|
||||||
let ident = Expr::Identifier(w.to_ident(w_span));
|
let ident = Expr::Identifier(w.clone().into_ident(w_span));
|
||||||
let mut fields = vec![];
|
let mut fields = vec![];
|
||||||
self.parse_multi_dim_subscript(&mut fields)?;
|
self.parse_multi_dim_subscript(&mut fields)?;
|
||||||
self.parse_compound_field_access(ident, fields)
|
self.parse_compound_field_access(ident, fields)
|
||||||
|
@ -1250,11 +1251,11 @@ impl<'a> Parser<'a> {
|
||||||
Token::Arrow if self.dialect.supports_lambda_functions() => {
|
Token::Arrow if self.dialect.supports_lambda_functions() => {
|
||||||
self.expect_token(&Token::Arrow)?;
|
self.expect_token(&Token::Arrow)?;
|
||||||
Ok(Expr::Lambda(LambdaFunction {
|
Ok(Expr::Lambda(LambdaFunction {
|
||||||
params: OneOrManyWithParens::One(w.to_ident(w_span)),
|
params: OneOrManyWithParens::One(w.clone().into_ident(w_span)),
|
||||||
body: Box::new(self.parse_expr()?),
|
body: Box::new(self.parse_expr()?),
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
_ => Ok(Expr::Identifier(w.to_ident(w_span))),
|
_ => Ok(Expr::Identifier(w.clone().into_ident(w_span))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1438,7 +1439,7 @@ impl<'a> Parser<'a> {
|
||||||
} else {
|
} else {
|
||||||
let tok = self.next_token();
|
let tok = self.next_token();
|
||||||
let key = match tok.token {
|
let key = match tok.token {
|
||||||
Token::Word(word) => word.to_ident(tok.span),
|
Token::Word(word) => word.into_ident(tok.span),
|
||||||
_ => {
|
_ => {
|
||||||
return parser_err!(
|
return parser_err!(
|
||||||
format!("Expected identifier, found: {tok}"),
|
format!("Expected identifier, found: {tok}"),
|
||||||
|
@ -1490,7 +1491,7 @@ impl<'a> Parser<'a> {
|
||||||
let next_token = self.next_token();
|
let next_token = self.next_token();
|
||||||
match next_token.token {
|
match next_token.token {
|
||||||
Token::Word(w) => {
|
Token::Word(w) => {
|
||||||
let expr = Expr::Identifier(w.to_ident(next_token.span));
|
let expr = Expr::Identifier(w.into_ident(next_token.span));
|
||||||
chain.push(AccessExpr::Dot(expr));
|
chain.push(AccessExpr::Dot(expr));
|
||||||
if self.peek_token().token == Token::LBracket {
|
if self.peek_token().token == Token::LBracket {
|
||||||
if self.dialect.supports_partiql() {
|
if self.dialect.supports_partiql() {
|
||||||
|
@ -1670,7 +1671,7 @@ impl<'a> Parser<'a> {
|
||||||
while p.consume_token(&Token::Period) {
|
while p.consume_token(&Token::Period) {
|
||||||
let tok = p.next_token();
|
let tok = p.next_token();
|
||||||
let name = match tok.token {
|
let name = match tok.token {
|
||||||
Token::Word(word) => word.to_ident(tok.span),
|
Token::Word(word) => word.into_ident(tok.span),
|
||||||
_ => return p.expected("identifier", tok),
|
_ => return p.expected("identifier", tok),
|
||||||
};
|
};
|
||||||
let func = match p.parse_function(ObjectName(vec![name]))? {
|
let func = match p.parse_function(ObjectName(vec![name]))? {
|
||||||
|
@ -8242,7 +8243,7 @@ impl<'a> Parser<'a> {
|
||||||
// This because snowflake allows numbers as placeholders
|
// This because snowflake allows numbers as placeholders
|
||||||
let next_token = self.next_token();
|
let next_token = self.next_token();
|
||||||
let ident = match next_token.token {
|
let ident = match next_token.token {
|
||||||
Token::Word(w) => Ok(w.to_ident(next_token.span)),
|
Token::Word(w) => Ok(w.into_ident(next_token.span)),
|
||||||
Token::Number(w, false) => Ok(Ident::new(w)),
|
Token::Number(w, false) => Ok(Ident::new(w)),
|
||||||
_ => self.expected("placeholder", next_token),
|
_ => self.expected("placeholder", next_token),
|
||||||
}?;
|
}?;
|
||||||
|
@ -8753,7 +8754,7 @@ impl<'a> Parser<'a> {
|
||||||
// (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword,
|
// (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword,
|
||||||
// not an alias.)
|
// not an alias.)
|
||||||
Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => {
|
Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => {
|
||||||
Ok(Some(w.to_ident(next_token.span)))
|
Ok(Some(w.into_ident(next_token.span)))
|
||||||
}
|
}
|
||||||
// MSSQL supports single-quoted strings as aliases for columns
|
// MSSQL supports single-quoted strings as aliases for columns
|
||||||
// We accept them as table aliases too, although MSSQL does not.
|
// We accept them as table aliases too, although MSSQL does not.
|
||||||
|
@ -8920,7 +8921,7 @@ impl<'a> Parser<'a> {
|
||||||
loop {
|
loop {
|
||||||
match &self.peek_token_ref().token {
|
match &self.peek_token_ref().token {
|
||||||
Token::Word(w) => {
|
Token::Word(w) => {
|
||||||
idents.push(w.to_ident(self.peek_token_ref().span));
|
idents.push(w.clone().into_ident(self.peek_token_ref().span));
|
||||||
}
|
}
|
||||||
Token::EOF | Token::Eq => break,
|
Token::EOF | Token::Eq => break,
|
||||||
_ => {}
|
_ => {}
|
||||||
|
@ -8975,7 +8976,7 @@ impl<'a> Parser<'a> {
|
||||||
// expecting at least one word for identifier
|
// expecting at least one word for identifier
|
||||||
let next_token = self.next_token();
|
let next_token = self.next_token();
|
||||||
match next_token.token {
|
match next_token.token {
|
||||||
Token::Word(w) => idents.push(w.to_ident(next_token.span)),
|
Token::Word(w) => idents.push(w.into_ident(next_token.span)),
|
||||||
Token::EOF => {
|
Token::EOF => {
|
||||||
return Err(ParserError::ParserError(
|
return Err(ParserError::ParserError(
|
||||||
"Empty input when parsing identifier".to_string(),
|
"Empty input when parsing identifier".to_string(),
|
||||||
|
@ -8995,7 +8996,7 @@ impl<'a> Parser<'a> {
|
||||||
Token::Period => {
|
Token::Period => {
|
||||||
let next_token = self.next_token();
|
let next_token = self.next_token();
|
||||||
match next_token.token {
|
match next_token.token {
|
||||||
Token::Word(w) => idents.push(w.to_ident(next_token.span)),
|
Token::Word(w) => idents.push(w.into_ident(next_token.span)),
|
||||||
Token::EOF => {
|
Token::EOF => {
|
||||||
return Err(ParserError::ParserError(
|
return Err(ParserError::ParserError(
|
||||||
"Trailing period in identifier".to_string(),
|
"Trailing period in identifier".to_string(),
|
||||||
|
@ -9024,7 +9025,7 @@ impl<'a> Parser<'a> {
|
||||||
pub fn parse_identifier(&mut self) -> Result<Ident, ParserError> {
|
pub fn parse_identifier(&mut self) -> Result<Ident, ParserError> {
|
||||||
let next_token = self.next_token();
|
let next_token = self.next_token();
|
||||||
match next_token.token {
|
match next_token.token {
|
||||||
Token::Word(w) => Ok(w.to_ident(next_token.span)),
|
Token::Word(w) => Ok(w.into_ident(next_token.span)),
|
||||||
Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)),
|
Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)),
|
||||||
Token::DoubleQuotedString(s) => Ok(Ident::with_quote('\"', s)),
|
Token::DoubleQuotedString(s) => Ok(Ident::with_quote('\"', s)),
|
||||||
_ => self.expected("identifier", next_token),
|
_ => self.expected("identifier", next_token),
|
||||||
|
@ -9044,9 +9045,10 @@ impl<'a> Parser<'a> {
|
||||||
fn parse_unquoted_hyphenated_identifier(&mut self) -> Result<(Ident, bool), ParserError> {
|
fn parse_unquoted_hyphenated_identifier(&mut self) -> Result<(Ident, bool), ParserError> {
|
||||||
match self.peek_token().token {
|
match self.peek_token().token {
|
||||||
Token::Word(w) => {
|
Token::Word(w) => {
|
||||||
|
let quote_style_is_none = w.quote_style.is_none();
|
||||||
let mut requires_whitespace = false;
|
let mut requires_whitespace = false;
|
||||||
let mut ident = w.to_ident(self.next_token().span);
|
let mut ident = w.into_ident(self.next_token().span);
|
||||||
if w.quote_style.is_none() {
|
if quote_style_is_none {
|
||||||
while matches!(self.peek_token_no_skip().token, Token::Minus) {
|
while matches!(self.peek_token_no_skip().token, Token::Minus) {
|
||||||
self.next_token();
|
self.next_token();
|
||||||
ident.value.push('-');
|
ident.value.push('-');
|
||||||
|
@ -13475,6 +13477,7 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Word {
|
impl Word {
|
||||||
|
#[deprecated(since = "0.54.0", note = "please use `into_ident` instead")]
|
||||||
pub fn to_ident(&self, span: Span) -> Ident {
|
pub fn to_ident(&self, span: Span) -> Ident {
|
||||||
Ident {
|
Ident {
|
||||||
value: self.value.clone(),
|
value: self.value.clone(),
|
||||||
|
@ -13482,6 +13485,15 @@ impl Word {
|
||||||
span,
|
span,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Convert this word into an [`Ident`] identifier
|
||||||
|
pub fn into_ident(self, span: Span) -> Ident {
|
||||||
|
Ident {
|
||||||
|
value: self.value,
|
||||||
|
quote_style: self.quote_style,
|
||||||
|
span,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue