Use binary search to speed up matching keywords (#191)

This commit is contained in:
Daniël Heres 2020-06-07 19:25:10 +02:00 committed by GitHub
parent af54eb02b2
commit a42121de52
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 18 deletions

View file

@ -49,11 +49,11 @@ macro_rules! define_keywords {
}
}
// The following keywords should be sorted to be able to match using binary search
define_keywords!(
ABS,
ACTION,
ADD,
ASC,
ALL,
ALLOCATE,
ALTER,
@ -65,6 +65,7 @@ define_keywords!(
ARRAY_AGG,
ARRAY_MAX_CARDINALITY,
AS,
ASC,
ASENSITIVE,
ASYMMETRIC,
AT,
@ -93,9 +94,9 @@ define_keywords!(
CEILING,
CHAIN,
CHAR,
CHAR_LENGTH,
CHARACTER,
CHARACTER_LENGTH,
CHAR_LENGTH,
CHECK,
CLOB,
CLOSE,
@ -158,6 +159,7 @@ define_keywords!(
ELEMENT,
ELSE,
END,
END_EXEC = "END-EXEC",
END_FRAME,
END_PARTITION,
EQUALS,
@ -175,8 +177,8 @@ define_keywords!(
FALSE,
FETCH,
FIELDS,
FIRST,
FILTER,
FIRST,
FIRST_VALUE,
FLOAT,
FLOOR,
@ -255,8 +257,8 @@ define_keywords!(
NATURAL,
NCHAR,
NCLOB,
NEXT,
NEW,
NEXT,
NO,
NONE,
NORMALIZE,
@ -268,8 +270,8 @@ define_keywords!(
NULLS,
NUMERIC,
OBJECT,
OCTET_LENGTH,
OCCURRENCES_REGEX,
OCTET_LENGTH,
OF,
OFFSET,
OLD,
@ -285,12 +287,12 @@ define_keywords!(
OVERLAPS,
OVERLAY,
PARAMETER,
PARTITION,
PARQUET,
PARTITION,
PERCENT,
PERCENT_RANK,
PERCENTILE_CONT,
PERCENTILE_DISC,
PERCENT_RANK,
PERIOD,
PORTION,
POSITION,
@ -332,8 +334,8 @@ define_keywords!(
ROLLBACK,
ROLLUP,
ROW,
ROW_NUMBER,
ROWS,
ROW_NUMBER,
SAVEPOINT,
SCHEMA,
SCOPE,
@ -390,10 +392,10 @@ define_keywords!(
TRANSLATION,
TREAT,
TRIGGER,
TRUNCATE,
TRIM,
TRIM_ARRAY,
TRUE,
TRUNCATE,
UESCAPE,
UNBOUNDED,
UNCOMMITTED,
@ -409,11 +411,11 @@ define_keywords!(
VALUE,
VALUES,
VALUE_OF,
VAR_POP,
VAR_SAMP,
VARBINARY,
VARCHAR,
VARYING,
VAR_POP,
VAR_SAMP,
VERSIONING,
VIEW,
WHEN,
@ -424,11 +426,10 @@ define_keywords!(
WITH,
WITHIN,
WITHOUT,
WRITE,
WORK,
WRITE,
YEAR,
ZONE,
END_EXEC = "END-EXEC"
ZONE
);
/// These keywords can't be used as a table alias, so that `FROM table_name alias`

View file

@ -143,10 +143,9 @@ impl Token {
}
pub fn make_word(word: &str, quote_style: Option<char>) -> Self {
let word_uppercase = word.to_uppercase();
//TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is
// not fast but I want the simplicity for now while I experiment with pluggable
// dialects
let is_keyword = quote_style == None && ALL_KEYWORDS.contains(&word_uppercase.as_str());
//TODO: validate use of a hashset (e.g. FnvHashSet) compared to using binary search
let is_keyword =
quote_style == None && ALL_KEYWORDS.binary_search(&word_uppercase.as_str()).is_ok();
Token::Word(Word {
value: word.to_string(),
quote_style,

View file

@ -21,6 +21,7 @@
use matches::assert_matches;
use sqlparser::ast::*;
use sqlparser::dialect::keywords::ALL_KEYWORDS;
use sqlparser::parser::*;
use sqlparser::test_utils::{all_dialects, expr_from_projection, number, only};
@ -2851,6 +2852,14 @@ fn parse_drop_index() {
}
}
#[test]
fn keywords_sorted() {
// assert!(ALL_KEYWORDS.is_sorted())
let mut copy = Vec::from(ALL_KEYWORDS);
copy.sort();
assert!(copy == ALL_KEYWORDS)
}
fn parse_sql_statements(sql: &str) -> Result<Vec<Statement>, ParserError> {
all_dialects().parse_sql_statements(sql)
}