mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-12-23 11:12:51 +00:00
Extract source comments (#2107)
Co-authored-by: Ifeanyi Ubah <ify1992@yahoo.com>
This commit is contained in:
parent
1b842d3b6a
commit
9b8a2d1e22
5 changed files with 459 additions and 7 deletions
329
src/ast/comments.rs
Normal file
329
src/ast/comments.rs
Normal file
|
|
@ -0,0 +1,329 @@
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Provides a representation of source code comments in parsed SQL code.
|
||||
//!
|
||||
//! See [Comments::find] for an example.
|
||||
|
||||
#[cfg(not(feature = "std"))]
|
||||
use alloc::{string::String, vec::Vec};
|
||||
|
||||
use core::{
|
||||
ops::{Bound, Deref, RangeBounds},
|
||||
slice,
|
||||
};
|
||||
|
||||
use crate::tokenizer::{Location, Span};
|
||||
|
||||
/// An opaque container for comments from a parse SQL source code.
|
||||
#[derive(Default, Debug)]
|
||||
pub struct Comments(Vec<CommentWithSpan>);
|
||||
|
||||
impl Comments {
|
||||
/// Accepts `comment` if its the first or is located strictly after the
|
||||
/// last accepted comment. In other words, this method will skip the
|
||||
/// comment if its comming out of order (as encountered in the parsed
|
||||
/// source code.)
|
||||
pub(crate) fn offer(&mut self, comment: CommentWithSpan) {
|
||||
if self
|
||||
.0
|
||||
.last()
|
||||
.map(|last| last.span < comment.span)
|
||||
.unwrap_or(true)
|
||||
{
|
||||
self.0.push(comment);
|
||||
}
|
||||
}
|
||||
|
||||
/// Finds comments starting within the given location range. The order of
|
||||
/// iterator reflects the order of the comments as encountered in the parsed
|
||||
/// source code.
|
||||
///
|
||||
/// # Example
|
||||
/// ```rust
|
||||
/// use sqlparser::{dialect::GenericDialect, parser::Parser, tokenizer::Location};
|
||||
///
|
||||
/// let sql = r#"/*
|
||||
/// header comment ...
|
||||
/// ... spanning multiple lines
|
||||
/// */
|
||||
///
|
||||
/// -- first statement
|
||||
/// SELECT 'hello' /* world */ FROM DUAL;
|
||||
///
|
||||
/// -- second statement
|
||||
/// SELECT 123 FROM DUAL;
|
||||
///
|
||||
/// -- trailing comment
|
||||
/// "#;
|
||||
///
|
||||
/// let (ast, comments) = Parser::parse_sql_with_comments(&GenericDialect, sql).unwrap();
|
||||
///
|
||||
/// // all comments appearing before line seven, i.e. before the first statement itself
|
||||
/// assert_eq!(
|
||||
/// &comments.find(..Location::new(7, 1)).map(|c| c.as_str()).collect::<Vec<_>>(),
|
||||
/// &["\n header comment ...\n ... spanning multiple lines\n", " first statement\n"]);
|
||||
///
|
||||
/// // all comments appearing within the first statement
|
||||
/// assert_eq!(
|
||||
/// &comments.find(Location::new(7, 1)..Location::new(8,1)).map(|c| c.as_str()).collect::<Vec<_>>(),
|
||||
/// &[" world "]);
|
||||
///
|
||||
/// // all comments appearing within or after the first statement
|
||||
/// assert_eq!(
|
||||
/// &comments.find(Location::new(7, 1)..).map(|c| c.as_str()).collect::<Vec<_>>(),
|
||||
/// &[" world ", " second statement\n", " trailing comment\n"]);
|
||||
/// ```
|
||||
///
|
||||
/// The [Spanned](crate::ast::Spanned) trait allows you to access location
|
||||
/// information for certain AST nodes.
|
||||
pub fn find<R: RangeBounds<Location>>(&self, range: R) -> Iter<'_> {
|
||||
let (start, end) = (
|
||||
self.start_index(range.start_bound()),
|
||||
self.end_index(range.end_bound()),
|
||||
);
|
||||
debug_assert!((0..=self.0.len()).contains(&start));
|
||||
debug_assert!((0..=self.0.len()).contains(&end));
|
||||
// in case the user specified a reverse range
|
||||
Iter(if start <= end {
|
||||
self.0[start..end].iter()
|
||||
} else {
|
||||
self.0[0..0].iter()
|
||||
})
|
||||
}
|
||||
|
||||
/// Find the index of the first comment starting "before" the given location.
|
||||
///
|
||||
/// The returned index is _inclusive_ and within the range of `0..=self.0.len()`.
|
||||
fn start_index(&self, location: Bound<&Location>) -> usize {
|
||||
match location {
|
||||
Bound::Included(location) => {
|
||||
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
|
||||
Ok(i) => i,
|
||||
Err(i) => i,
|
||||
}
|
||||
}
|
||||
Bound::Excluded(location) => {
|
||||
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
|
||||
Ok(i) => i + 1,
|
||||
Err(i) => i,
|
||||
}
|
||||
}
|
||||
Bound::Unbounded => 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the index of the first comment starting "after" the given location.
|
||||
///
|
||||
/// The returned index is _exclusive_ and within the range of `0..=self.0.len()`.
|
||||
fn end_index(&self, location: Bound<&Location>) -> usize {
|
||||
match location {
|
||||
Bound::Included(location) => {
|
||||
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
|
||||
Ok(i) => i + 1,
|
||||
Err(i) => i,
|
||||
}
|
||||
}
|
||||
Bound::Excluded(location) => {
|
||||
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
|
||||
Ok(i) => i,
|
||||
Err(i) => i,
|
||||
}
|
||||
}
|
||||
Bound::Unbounded => self.0.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Comments> for Vec<CommentWithSpan> {
|
||||
fn from(comments: Comments) -> Self {
|
||||
comments.0
|
||||
}
|
||||
}
|
||||
|
||||
/// A source code comment with information of its entire span.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct CommentWithSpan {
|
||||
/// The source code comment iself
|
||||
pub comment: Comment,
|
||||
/// The span of the comment including its markers
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl Deref for CommentWithSpan {
|
||||
type Target = Comment;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.comment
|
||||
}
|
||||
}
|
||||
|
||||
/// A unified type of the different source code comment formats.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Comment {
|
||||
/// A single line comment, typically introduced with a prefix and spanning
|
||||
/// until end-of-line or end-of-file in the source code.
|
||||
///
|
||||
/// Note: `content` will include the terminating new-line character, if any.
|
||||
SingleLine { content: String, prefix: String },
|
||||
|
||||
/// A multi-line comment, typically enclosed in `/* .. */` markers. The
|
||||
/// string represents the content excluding the markers.
|
||||
MultiLine(String),
|
||||
}
|
||||
|
||||
impl Comment {
|
||||
/// Retrieves the content of the comment as string slice.
|
||||
pub fn as_str(&self) -> &str {
|
||||
match self {
|
||||
Comment::SingleLine { content, prefix: _ } => content.as_str(),
|
||||
Comment::MultiLine(content) => content.as_str(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Comment {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
/// An opaque iterator implementation over comments served by [Comments::find].
|
||||
pub struct Iter<'a>(slice::Iter<'a, CommentWithSpan>);
|
||||
|
||||
impl<'a> Iterator for Iter<'a> {
|
||||
type Item = &'a CommentWithSpan;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.0.next()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_find() {
|
||||
let comments = {
|
||||
// ```
|
||||
// -- abc
|
||||
// /* hello */--, world
|
||||
// /* def
|
||||
// ghi
|
||||
// jkl
|
||||
// */
|
||||
// ```
|
||||
let mut c = Comments(Vec::new());
|
||||
c.offer(CommentWithSpan {
|
||||
comment: Comment::SingleLine {
|
||||
content: " abc".into(),
|
||||
prefix: "--".into(),
|
||||
},
|
||||
span: Span::new((1, 1).into(), (1, 7).into()),
|
||||
});
|
||||
c.offer(CommentWithSpan {
|
||||
comment: Comment::MultiLine(" hello ".into()),
|
||||
span: Span::new((2, 3).into(), (2, 14).into()),
|
||||
});
|
||||
c.offer(CommentWithSpan {
|
||||
comment: Comment::SingleLine {
|
||||
content: ", world".into(),
|
||||
prefix: "--".into(),
|
||||
},
|
||||
span: Span::new((2, 14).into(), (2, 21).into()),
|
||||
});
|
||||
c.offer(CommentWithSpan {
|
||||
comment: Comment::MultiLine(" def\n ghi\n jkl\n".into()),
|
||||
span: Span::new((3, 3).into(), (7, 1).into()),
|
||||
});
|
||||
c
|
||||
};
|
||||
|
||||
fn find<R: RangeBounds<Location>>(comments: &Comments, range: R) -> Vec<&str> {
|
||||
comments.find(range).map(|c| c.as_str()).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
// ~ end-points only --------------------------------------------------
|
||||
assert_eq!(find(&comments, ..Location::new(0, 0)), Vec::<&str>::new());
|
||||
assert_eq!(find(&comments, ..Location::new(2, 1)), vec![" abc"]);
|
||||
assert_eq!(find(&comments, ..Location::new(2, 3)), vec![" abc"]);
|
||||
assert_eq!(
|
||||
find(&comments, ..=Location::new(2, 3)),
|
||||
vec![" abc", " hello "]
|
||||
);
|
||||
assert_eq!(
|
||||
find(&comments, ..=Location::new(2, 3)),
|
||||
vec![" abc", " hello "]
|
||||
);
|
||||
assert_eq!(
|
||||
find(&comments, ..Location::new(2, 15)),
|
||||
vec![" abc", " hello ", ", world"]
|
||||
);
|
||||
|
||||
// ~ start-points only ------------------------------------------------
|
||||
assert_eq!(
|
||||
find(&comments, Location::new(1000, 1000)..),
|
||||
Vec::<&str>::new()
|
||||
);
|
||||
assert_eq!(
|
||||
find(&comments, Location::new(2, 14)..),
|
||||
vec![", world", " def\n ghi\n jkl\n"]
|
||||
);
|
||||
assert_eq!(
|
||||
find(&comments, Location::new(2, 15)..),
|
||||
vec![" def\n ghi\n jkl\n"]
|
||||
);
|
||||
assert_eq!(
|
||||
find(&comments, Location::new(0, 0)..),
|
||||
vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
|
||||
);
|
||||
assert_eq!(
|
||||
find(&comments, Location::new(1, 1)..),
|
||||
vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
|
||||
);
|
||||
|
||||
// ~ ranges -----------------------------------------------------------
|
||||
assert_eq!(
|
||||
find(&comments, Location::new(2, 1)..Location::new(1, 1)),
|
||||
Vec::<&str>::new()
|
||||
);
|
||||
assert_eq!(
|
||||
find(&comments, Location::new(1, 1)..Location::new(2, 3)),
|
||||
vec![" abc"]
|
||||
);
|
||||
assert_eq!(
|
||||
find(&comments, Location::new(1, 1)..=Location::new(2, 3)),
|
||||
vec![" abc", " hello "]
|
||||
);
|
||||
assert_eq!(
|
||||
find(&comments, Location::new(1, 1)..=Location::new(2, 10)),
|
||||
vec![" abc", " hello "]
|
||||
);
|
||||
assert_eq!(
|
||||
find(&comments, Location::new(1, 1)..=Location::new(2, 14)),
|
||||
vec![" abc", " hello ", ", world"]
|
||||
);
|
||||
assert_eq!(
|
||||
find(&comments, Location::new(1, 1)..Location::new(2, 15)),
|
||||
vec![" abc", " hello ", ", world"]
|
||||
);
|
||||
|
||||
// ~ find everything --------------------------------------------------
|
||||
assert_eq!(
|
||||
find(&comments, ..),
|
||||
vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -136,6 +136,7 @@ mod query;
|
|||
mod spans;
|
||||
pub use spans::Spanned;
|
||||
|
||||
pub mod comments;
|
||||
mod trigger;
|
||||
mod value;
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ use core::iter;
|
|||
use crate::tokenizer::Span;
|
||||
|
||||
use super::{
|
||||
dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation,
|
||||
comments, dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation,
|
||||
AlterIndexOperation, AlterTableOperation, Analyze, Array, Assignment, AssignmentTarget,
|
||||
AttachedToken, BeginEndStatements, CaseStatement, CloseCursor, ClusteredIndex, ColumnDef,
|
||||
ColumnOption, ColumnOptionDef, ConditionalStatementBlock, ConditionalStatements,
|
||||
|
|
@ -2477,6 +2477,12 @@ impl Spanned for OutputClause {
|
|||
}
|
||||
}
|
||||
|
||||
impl Spanned for comments::CommentWithSpan {
|
||||
fn span(&self) -> Span {
|
||||
self.span
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use crate::dialect::{Dialect, GenericDialect, SnowflakeDialect};
|
||||
|
|
|
|||
|
|
@ -32,14 +32,17 @@ use recursion::RecursionCounter;
|
|||
use IsLateral::*;
|
||||
use IsOptional::*;
|
||||
|
||||
use crate::ast::helpers::{
|
||||
key_value_options::{
|
||||
KeyValueOption, KeyValueOptionKind, KeyValueOptions, KeyValueOptionsDelimiter,
|
||||
},
|
||||
stmt_create_table::{CreateTableBuilder, CreateTableConfiguration},
|
||||
};
|
||||
use crate::ast::Statement::CreatePolicy;
|
||||
use crate::ast::*;
|
||||
use crate::ast::{
|
||||
comments,
|
||||
helpers::{
|
||||
key_value_options::{
|
||||
KeyValueOption, KeyValueOptionKind, KeyValueOptions, KeyValueOptionsDelimiter,
|
||||
},
|
||||
stmt_create_table::{CreateTableBuilder, CreateTableConfiguration},
|
||||
},
|
||||
};
|
||||
use crate::dialect::*;
|
||||
use crate::keywords::{Keyword, ALL_KEYWORDS};
|
||||
use crate::tokenizer::*;
|
||||
|
|
@ -530,6 +533,44 @@ impl<'a> Parser<'a> {
|
|||
Parser::new(dialect).try_with_sql(sql)?.parse_statements()
|
||||
}
|
||||
|
||||
/// Parses the given `sql` into an Abstract Syntax Tree (AST), returning
|
||||
/// also encountered source code comments.
|
||||
///
|
||||
/// See [Parser::parse_sql].
|
||||
pub fn parse_sql_with_comments(
|
||||
dialect: &'a dyn Dialect,
|
||||
sql: &str,
|
||||
) -> Result<(Vec<Statement>, comments::Comments), ParserError> {
|
||||
let mut p = Parser::new(dialect).try_with_sql(sql)?;
|
||||
p.parse_statements().map(|stmts| (stmts, p.into_comments()))
|
||||
}
|
||||
|
||||
/// Consumes this parser returning comments from the parsed token stream.
|
||||
fn into_comments(self) -> comments::Comments {
|
||||
let mut comments = comments::Comments::default();
|
||||
for t in self.tokens.into_iter() {
|
||||
match t.token {
|
||||
Token::Whitespace(Whitespace::SingleLineComment { comment, prefix }) => {
|
||||
comments.offer(comments::CommentWithSpan {
|
||||
comment: comments::Comment::SingleLine {
|
||||
content: comment,
|
||||
prefix,
|
||||
},
|
||||
span: t.span,
|
||||
});
|
||||
}
|
||||
Token::Whitespace(Whitespace::MultiLineComment(comment)) => {
|
||||
comments.offer(comments::CommentWithSpan {
|
||||
comment: comments::Comment::MultiLine(comment),
|
||||
span: t.span,
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
comments
|
||||
}
|
||||
|
||||
/// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.),
|
||||
/// stopping before the statement separator, if any.
|
||||
pub fn parse_statement(&mut self) -> Result<Statement, ParserError> {
|
||||
|
|
|
|||
75
tests/sqlparser_comments.rs
Normal file
75
tests/sqlparser_comments.rs
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#![warn(clippy::all)]
|
||||
//! Test comment extraction from SQL source code.
|
||||
|
||||
#[cfg(test)]
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use sqlparser::{
|
||||
ast::comments::{Comment, CommentWithSpan},
|
||||
dialect::GenericDialect,
|
||||
parser::Parser,
|
||||
tokenizer::Span,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn parse_sql_with_comments() {
|
||||
let sql = r#"
|
||||
-- second line comment
|
||||
select * from /* inline comment after `from` */ dual;
|
||||
|
||||
/*select
|
||||
some
|
||||
more*/
|
||||
|
||||
-- end-of-script-with-no-newline"#;
|
||||
|
||||
let comments = match Parser::parse_sql_with_comments(&GenericDialect, sql) {
|
||||
Ok((_, comments)) => comments,
|
||||
Err(e) => panic!("Invalid sql script: {e}"),
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
Vec::from(comments),
|
||||
vec![
|
||||
CommentWithSpan {
|
||||
comment: Comment::SingleLine {
|
||||
content: " second line comment\n".into(),
|
||||
prefix: "--".into()
|
||||
},
|
||||
span: Span::new((2, 1).into(), (3, 1).into()),
|
||||
},
|
||||
CommentWithSpan {
|
||||
comment: Comment::MultiLine(" inline comment after `from` ".into()),
|
||||
span: Span::new((3, 15).into(), (3, 48).into()),
|
||||
},
|
||||
CommentWithSpan {
|
||||
comment: Comment::MultiLine("select\nsome\nmore".into()),
|
||||
span: Span::new((5, 1).into(), (7, 7).into())
|
||||
},
|
||||
CommentWithSpan {
|
||||
comment: Comment::SingleLine {
|
||||
content: " end-of-script-with-no-newline".into(),
|
||||
prefix: "--".into()
|
||||
},
|
||||
span: Span::new((9, 3).into(), (9, 35).into()),
|
||||
}
|
||||
]
|
||||
);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue