mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-18 21:20:15 +00:00
Count characters instead of bytes (#529)
* Count characters instead of bytes * cargo fmt * add tests to PR #529
This commit is contained in:
parent
68768530cd
commit
c2ccc80c28
1 changed files with 25 additions and 5 deletions
|
@ -354,11 +354,15 @@ impl<'a> Tokenizer<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
Token::Whitespace(Whitespace::Tab) => self.col += 4,
|
Token::Whitespace(Whitespace::Tab) => self.col += 4,
|
||||||
Token::Word(w) if w.quote_style == None => self.col += w.value.len() as u64,
|
Token::Word(w) if w.quote_style == None => {
|
||||||
Token::Word(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2,
|
self.col += w.value.chars().count() as u64
|
||||||
Token::Number(s, _) => self.col += s.len() as u64,
|
}
|
||||||
Token::SingleQuotedString(s) => self.col += s.len() as u64,
|
Token::Word(w) if w.quote_style != None => {
|
||||||
Token::Placeholder(s) => self.col += s.len() as u64,
|
self.col += w.value.chars().count() as u64 + 2
|
||||||
|
}
|
||||||
|
Token::Number(s, _) => self.col += s.chars().count() as u64,
|
||||||
|
Token::SingleQuotedString(s) => self.col += s.chars().count() as u64,
|
||||||
|
Token::Placeholder(s) => self.col += s.chars().count() as u64,
|
||||||
_ => self.col += 1,
|
_ => self.col += 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1220,6 +1224,22 @@ mod tests {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_unterminated_string_literal_utf8() {
|
||||||
|
let sql = String::from("SELECT \"なにか\" FROM Y WHERE \"なにか\" = 'test;");
|
||||||
|
|
||||||
|
let dialect = GenericDialect {};
|
||||||
|
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
||||||
|
assert_eq!(
|
||||||
|
tokenizer.tokenize(),
|
||||||
|
Err(TokenizerError {
|
||||||
|
message: "Unterminated string literal".to_string(),
|
||||||
|
line: 1,
|
||||||
|
col: 35
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokenize_invalid_string_cols() {
|
fn tokenize_invalid_string_cols() {
|
||||||
let sql = String::from("\n\nSELECT * FROM table\tمصطفىh");
|
let sql = String::from("\n\nSELECT * FROM table\tمصطفىh");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue