mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-16 12:10:15 +00:00
Count characters instead of bytes (#529)
* Count characters instead of bytes * cargo fmt * add tests to PR #529
This commit is contained in:
parent
68768530cd
commit
c2ccc80c28
1 changed files with 25 additions and 5 deletions
|
@ -354,11 +354,15 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
|
||||
Token::Whitespace(Whitespace::Tab) => self.col += 4,
|
||||
Token::Word(w) if w.quote_style == None => self.col += w.value.len() as u64,
|
||||
Token::Word(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2,
|
||||
Token::Number(s, _) => self.col += s.len() as u64,
|
||||
Token::SingleQuotedString(s) => self.col += s.len() as u64,
|
||||
Token::Placeholder(s) => self.col += s.len() as u64,
|
||||
Token::Word(w) if w.quote_style == None => {
|
||||
self.col += w.value.chars().count() as u64
|
||||
}
|
||||
Token::Word(w) if w.quote_style != None => {
|
||||
self.col += w.value.chars().count() as u64 + 2
|
||||
}
|
||||
Token::Number(s, _) => self.col += s.chars().count() as u64,
|
||||
Token::SingleQuotedString(s) => self.col += s.chars().count() as u64,
|
||||
Token::Placeholder(s) => self.col += s.chars().count() as u64,
|
||||
_ => self.col += 1,
|
||||
}
|
||||
|
||||
|
@ -1220,6 +1224,22 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_unterminated_string_literal_utf8() {
|
||||
let sql = String::from("SELECT \"なにか\" FROM Y WHERE \"なにか\" = 'test;");
|
||||
|
||||
let dialect = GenericDialect {};
|
||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
||||
assert_eq!(
|
||||
tokenizer.tokenize(),
|
||||
Err(TokenizerError {
|
||||
message: "Unterminated string literal".to_string(),
|
||||
line: 1,
|
||||
col: 35
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_invalid_string_cols() {
|
||||
let sql = String::from("\n\nSELECT * FROM table\tمصطفىh");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue