Implement zero-copy tokenization for Word, SingleQuotedString, and Whitespace

Convert token string fields to use Cow<'a, str> to enable zero-copy tokenization
  for commonly used tokens:
  - Word.value: Regular identifiers and keywords now borrow from source
  - SingleQuotedString: String literals borrow when no escape processing needed
  - Whitespace: Single-line and multi-line comments borrow from source

Also add benchmark for measuring tokenization performance
This commit is contained in:
Eyal Leshem 2025-12-03 16:05:06 +02:00
parent 0f17b327b9
commit 5458a2b21d
11 changed files with 1288 additions and 224 deletions

View file

@ -2629,7 +2629,7 @@ fn test_export_data() {
body: Box::new(SetExpr::Select(Box::new(Select {
select_token: AttachedToken(TokenWithSpan::new(
Token::Word(Word {
value: "SELECT".to_string(),
value: "SELECT".to_string().into(),
quote_style: None,
keyword: Keyword::SELECT,
}),
@ -2733,7 +2733,7 @@ fn test_export_data() {
body: Box::new(SetExpr::Select(Box::new(Select {
select_token: AttachedToken(TokenWithSpan::new(
Token::Word(Word {
value: "SELECT".to_string(),
value: "SELECT".to_string().into(),
quote_style: None,
keyword: Keyword::SELECT,
}),