Use binary search to speed up matching keywords (#191)

This commit is contained in:
Daniël Heres 2020-06-07 19:25:10 +02:00 committed by GitHub
parent af54eb02b2
commit a42121de52
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 18 deletions

View file

@ -49,11 +49,11 @@ macro_rules! define_keywords {
} }
} }
// The following keywords should be sorted to be able to match using binary search
define_keywords!( define_keywords!(
ABS, ABS,
ACTION, ACTION,
ADD, ADD,
ASC,
ALL, ALL,
ALLOCATE, ALLOCATE,
ALTER, ALTER,
@ -65,6 +65,7 @@ define_keywords!(
ARRAY_AGG, ARRAY_AGG,
ARRAY_MAX_CARDINALITY, ARRAY_MAX_CARDINALITY,
AS, AS,
ASC,
ASENSITIVE, ASENSITIVE,
ASYMMETRIC, ASYMMETRIC,
AT, AT,
@ -93,9 +94,9 @@ define_keywords!(
CEILING, CEILING,
CHAIN, CHAIN,
CHAR, CHAR,
CHAR_LENGTH,
CHARACTER, CHARACTER,
CHARACTER_LENGTH, CHARACTER_LENGTH,
CHAR_LENGTH,
CHECK, CHECK,
CLOB, CLOB,
CLOSE, CLOSE,
@ -158,6 +159,7 @@ define_keywords!(
ELEMENT, ELEMENT,
ELSE, ELSE,
END, END,
END_EXEC = "END-EXEC",
END_FRAME, END_FRAME,
END_PARTITION, END_PARTITION,
EQUALS, EQUALS,
@ -175,8 +177,8 @@ define_keywords!(
FALSE, FALSE,
FETCH, FETCH,
FIELDS, FIELDS,
FIRST,
FILTER, FILTER,
FIRST,
FIRST_VALUE, FIRST_VALUE,
FLOAT, FLOAT,
FLOOR, FLOOR,
@ -255,8 +257,8 @@ define_keywords!(
NATURAL, NATURAL,
NCHAR, NCHAR,
NCLOB, NCLOB,
NEXT,
NEW, NEW,
NEXT,
NO, NO,
NONE, NONE,
NORMALIZE, NORMALIZE,
@ -268,8 +270,8 @@ define_keywords!(
NULLS, NULLS,
NUMERIC, NUMERIC,
OBJECT, OBJECT,
OCTET_LENGTH,
OCCURRENCES_REGEX, OCCURRENCES_REGEX,
OCTET_LENGTH,
OF, OF,
OFFSET, OFFSET,
OLD, OLD,
@ -285,12 +287,12 @@ define_keywords!(
OVERLAPS, OVERLAPS,
OVERLAY, OVERLAY,
PARAMETER, PARAMETER,
PARTITION,
PARQUET, PARQUET,
PARTITION,
PERCENT, PERCENT,
PERCENT_RANK,
PERCENTILE_CONT, PERCENTILE_CONT,
PERCENTILE_DISC, PERCENTILE_DISC,
PERCENT_RANK,
PERIOD, PERIOD,
PORTION, PORTION,
POSITION, POSITION,
@ -332,8 +334,8 @@ define_keywords!(
ROLLBACK, ROLLBACK,
ROLLUP, ROLLUP,
ROW, ROW,
ROW_NUMBER,
ROWS, ROWS,
ROW_NUMBER,
SAVEPOINT, SAVEPOINT,
SCHEMA, SCHEMA,
SCOPE, SCOPE,
@ -390,10 +392,10 @@ define_keywords!(
TRANSLATION, TRANSLATION,
TREAT, TREAT,
TRIGGER, TRIGGER,
TRUNCATE,
TRIM, TRIM,
TRIM_ARRAY, TRIM_ARRAY,
TRUE, TRUE,
TRUNCATE,
UESCAPE, UESCAPE,
UNBOUNDED, UNBOUNDED,
UNCOMMITTED, UNCOMMITTED,
@ -409,11 +411,11 @@ define_keywords!(
VALUE, VALUE,
VALUES, VALUES,
VALUE_OF, VALUE_OF,
VAR_POP,
VAR_SAMP,
VARBINARY, VARBINARY,
VARCHAR, VARCHAR,
VARYING, VARYING,
VAR_POP,
VAR_SAMP,
VERSIONING, VERSIONING,
VIEW, VIEW,
WHEN, WHEN,
@ -424,11 +426,10 @@ define_keywords!(
WITH, WITH,
WITHIN, WITHIN,
WITHOUT, WITHOUT,
WRITE,
WORK, WORK,
WRITE,
YEAR, YEAR,
ZONE, ZONE
END_EXEC = "END-EXEC"
); );
/// These keywords can't be used as a table alias, so that `FROM table_name alias` /// These keywords can't be used as a table alias, so that `FROM table_name alias`

View file

@ -143,10 +143,9 @@ impl Token {
} }
pub fn make_word(word: &str, quote_style: Option<char>) -> Self { pub fn make_word(word: &str, quote_style: Option<char>) -> Self {
let word_uppercase = word.to_uppercase(); let word_uppercase = word.to_uppercase();
//TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is //TODO: validate use of a hashset (e.g. FnvHashSet) compared to using binary search
// not fast but I want the simplicity for now while I experiment with pluggable let is_keyword =
// dialects quote_style == None && ALL_KEYWORDS.binary_search(&word_uppercase.as_str()).is_ok();
let is_keyword = quote_style == None && ALL_KEYWORDS.contains(&word_uppercase.as_str());
Token::Word(Word { Token::Word(Word {
value: word.to_string(), value: word.to_string(),
quote_style, quote_style,

View file

@ -21,6 +21,7 @@
use matches::assert_matches; use matches::assert_matches;
use sqlparser::ast::*; use sqlparser::ast::*;
use sqlparser::dialect::keywords::ALL_KEYWORDS;
use sqlparser::parser::*; use sqlparser::parser::*;
use sqlparser::test_utils::{all_dialects, expr_from_projection, number, only}; use sqlparser::test_utils::{all_dialects, expr_from_projection, number, only};
@ -2851,6 +2852,14 @@ fn parse_drop_index() {
} }
} }
#[test]
fn keywords_sorted() {
// assert!(ALL_KEYWORDS.is_sorted())
let mut copy = Vec::from(ALL_KEYWORDS);
copy.sort();
assert!(copy == ALL_KEYWORDS)
}
fn parse_sql_statements(sql: &str) -> Result<Vec<Statement>, ParserError> { fn parse_sql_statements(sql: &str) -> Result<Vec<Statement>, ParserError> {
all_dialects().parse_sql_statements(sql) all_dialects().parse_sql_statements(sql)
} }