Count characters instead of bytes (#529)

* Count characters instead of bytes

* cargo fmt

* add tests to PR #529
This commit is contained in:
Mykhailo Bondarenko 2022-07-07 20:45:59 +03:00 committed by GitHub
parent 68768530cd
commit c2ccc80c28
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -354,11 +354,15 @@ impl<'a> Tokenizer<'a> {
}
Token::Whitespace(Whitespace::Tab) => self.col += 4,
Token::Word(w) if w.quote_style == None => self.col += w.value.len() as u64,
Token::Word(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2,
Token::Number(s, _) => self.col += s.len() as u64,
Token::SingleQuotedString(s) => self.col += s.len() as u64,
Token::Placeholder(s) => self.col += s.len() as u64,
Token::Word(w) if w.quote_style == None => {
self.col += w.value.chars().count() as u64
}
Token::Word(w) if w.quote_style != None => {
self.col += w.value.chars().count() as u64 + 2
}
Token::Number(s, _) => self.col += s.chars().count() as u64,
Token::SingleQuotedString(s) => self.col += s.chars().count() as u64,
Token::Placeholder(s) => self.col += s.chars().count() as u64,
_ => self.col += 1,
}
@ -1220,6 +1224,22 @@ mod tests {
);
}
#[test]
fn tokenize_unterminated_string_literal_utf8() {
let sql = String::from("SELECT \"なにか\" FROM Y WHERE \"なにか\" = 'test;");
let dialect = GenericDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
assert_eq!(
tokenizer.tokenize(),
Err(TokenizerError {
message: "Unterminated string literal".to_string(),
line: 1,
col: 35
})
);
}
#[test]
fn tokenize_invalid_string_cols() {
let sql = String::from("\n\nSELECT * FROM table\tمصطفىh");