Implement zero-copy tokenization for Word, SingleQuotedString, and Whitespace

Convert token string fields to use Cow<'a, str> to enable zero-copy tokenization for commonly used tokens: - Word.value: Regular identifiers and keywords now borrow from source - SingleQuotedString: String literals borrow when no escape processing needed - Whitespace: Single-line and multi-line comments borrow from source Also add benchmark for measuring tokenization performance
2025-12-23 11:12:51 +00:00 · 2025-12-03 16:05:06 +02:00 · 2025-12-03 16:05:06 +02:00 · 5458a2b21d
commit 5458a2b21d
parent 0f17b327b9
11 changed files with 1288 additions and 224 deletions
--- a/.gitignore
+++ b/.gitignore
@ -18,4 +18,7 @@ Cargo.lock

 *.swp

-.DS_store
+.DS_store
+
+# dhat profiler output files
+dhat*.json
--- a/Cargo.toml
+++ b/Cargo.toml
@ -48,6 +48,7 @@ visitor = ["sqlparser_derive"]
 bigdecimal = { version = "0.4.1", features = ["serde"], optional = true }
 log = "0.4"
 recursive = { version = "0.1.1", optional = true}
+unicase = "2.7"

 serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true }
 # serde_json is only used in examples/cli, but we have to put it outside
@ -60,7 +61,12 @@ sqlparser_derive = { version = "0.4.0", path = "derive", optional = true }
 simple_logger = "5.0"
 matches = "0.1"
 pretty_assertions = "1"
+sysinfo = "0.30"
+dhat = "0.3.3"
+criterion = "0.5"  

 [package.metadata.docs.rs]
 # Document these features on docs.rs
-features = ["serde", "visitor"]
+features = ["serde", "visitor"] 
+
+
--- a/sqlparser_bench/Cargo.toml
+++ b/sqlparser_bench/Cargo.toml
@ -31,3 +31,7 @@ criterion = "0.7"
 [[bench]]
 name = "sqlparser_bench"
 harness = false
+
+[[bench]]
+name = "tokenize_bench"
+harness = false
--- a/sqlparser_bench/benches/tokenize_bench.rs
+++ b/sqlparser_bench/benches/tokenize_bench.rs
@ -0,0 +1,862 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Benchmark tokenization performance
+//!
+//! This benchmark measures tokenization speed using a complex SQL query
+//! with many identifiers, keywords, string literals, and comments.
+
+use criterion::{criterion_group, criterion_main, Criterion};
+use sqlparser::dialect::GenericDialect;
+use sqlparser::tokenizer::Tokenizer;
+
+const COMPLEX_SQL: &str = r#"
+  -- ============================================================================
+  -- Enterprise Sales Analytics Dashboard Query
+  -- ============================================================================
+  -- Purpose: Comprehensive sales analysis across multiple dimensions
+  -- Author: Analytics Team
+  -- Last Modified: 2024-01-15
+  -- ============================================================================
+
+  /*
+   * This query aggregates sales data from multiple sources:
+   * - Customer transactions and lifetime value
+   * - Product performance across categories
+   * - Regional sales trends and patterns
+   * - Employee commission calculations
+   * - Inventory and fulfillment metrics
+   */
+
+  WITH customer_segments AS (
+      -- Segment customers by purchase behavior and demographics
+      SELECT
+          customer_id,
+          customer_number,
+          customer_name,
+          customer_type,
+          customer_status,
+          customer_tier,
+          email_address,
+          phone_number,
+          mobile_number,
+          fax_number,
+          date_of_birth,
+          registration_date,
+          last_login_date,
+          account_status,
+          email_verified,
+          phone_verified,
+          -- Address information
+          billing_address_line1,
+          billing_address_line2,
+          billing_city,
+          billing_state,
+          billing_postal_code,
+          billing_country,
+          shipping_address_line1,
+          shipping_address_line2,
+          shipping_city,
+          shipping_state,
+          shipping_postal_code,
+          shipping_country,
+          -- Demographics
+          gender,
+          age_group,
+          income_bracket,
+          education_level,
+          occupation,
+          marital_status,
+          -- Marketing preferences
+          marketing_opt_in,
+          sms_opt_in,
+          email_frequency,
+          preferred_channel,
+          preferred_language,
+          -- Calculated fields
+          CASE
+              WHEN customer_status = 'active' AND last_login_date >= CURRENT_DATE - INTERVAL '30' DAY THEN 'highly_active'
+              WHEN customer_status = 'active' AND last_login_date >= CURRENT_DATE - INTERVAL '90' DAY THEN 'active'
+              WHEN customer_status = 'active' THEN 'inactive'
+              ELSE 'dormant'
+          END AS activity_level,
+          CASE
+              WHEN registration_date >= CURRENT_DATE - INTERVAL '1' YEAR THEN 'new'
+              WHEN registration_date >= CURRENT_DATE - INTERVAL '3' YEAR THEN 'established'
+              ELSE 'veteran'
+          END AS customer_tenure
+      FROM customers
+      WHERE customer_status IN ('active', 'pending', 'suspended')
+          AND registration_date >= '2020-01-01'
+          AND billing_country IN ('USA', 'Canada', 'Mexico', 'UK', 'Germany', 'France', 'Spain', 'Italy')
+          AND email_address NOT LIKE '%@test.com'
+          AND email_address NOT LIKE '%@example.com'
+          AND customer_name IS NOT NULL
+  ),
+
+  product_catalog AS (
+      -- Product information with categories and attributes
+      SELECT
+          product_id,
+          product_sku,
+          product_name,
+          product_description,
+          product_category,
+          product_subcategory,
+          product_brand,
+          product_manufacturer,
+          product_supplier,
+          product_model,
+          product_series,
+          product_version,
+          -- Pricing
+          list_price,
+          cost_price,
+          sale_price,
+          wholesale_price,
+          minimum_price,
+          suggested_retail_price,
+          -- Attributes
+          product_color,
+          product_size,
+          product_weight,
+          product_length,
+          product_width,
+          product_height,
+          product_material,
+          product_warranty,
+          -- Inventory
+          stock_quantity,
+          reorder_level,
+          reorder_quantity,
+          warehouse_location,
+          bin_location,
+          aisle_number,
+          shelf_number,
+          -- Status
+          product_status,
+          availability_status,
+          is_featured,
+          is_new_arrival,
+          is_on_sale,
+          is_clearance,
+          is_discontinued,
+          launch_date,
+          discontinuation_date,
+          -- Ratings
+          average_rating,
+          review_count,
+          return_rate,
+          defect_rate,
+          -- Categories
+          CASE
+              WHEN product_category = 'electronics' THEN 'high_tech'
+              WHEN product_category IN ('clothing', 'shoes', 'accessories') THEN 'fashion'
+              WHEN product_category IN ('home', 'garden', 'furniture') THEN 'home_living'
+              WHEN product_category IN ('sports', 'outdoor', 'fitness') THEN 'active_lifestyle'
+              ELSE 'general_merchandise'
+          END AS category_group
+      FROM products
+      WHERE product_status = 'active'
+          AND availability_status IN ('in_stock', 'low_stock', 'backorder')
+          AND is_discontinued = FALSE
+          AND launch_date <= CURRENT_DATE
+  ),
+
+  order_transactions AS (
+      -- Order and transaction details
+      SELECT
+          order_id,
+          order_number,
+          order_date,
+          order_time,
+          order_timestamp,
+          customer_id,
+          order_status,
+          order_type,
+          order_channel,
+          order_source,
+          -- Payment information
+          payment_method,
+          payment_status,
+          payment_date,
+          payment_reference,
+          transaction_id,
+          authorization_code,
+          -- Financial details
+          subtotal_amount,
+          tax_amount,
+          shipping_amount,
+          discount_amount,
+          coupon_amount,
+          gift_card_amount,
+          total_amount,
+          paid_amount,
+          refund_amount,
+          net_amount,
+          -- Shipping details
+          shipping_method,
+          shipping_carrier,
+          tracking_number,
+          shipped_date,
+          estimated_delivery_date,
+          actual_delivery_date,
+          delivery_status,
+          signature_required,
+          -- Location
+          ship_to_address_line1,
+          ship_to_address_line2,
+          ship_to_city,
+          ship_to_state,
+          ship_to_postal_code,
+          ship_to_country,
+          -- Fulfillment
+          warehouse_id,
+          fulfillment_center,
+          picker_id,
+          packer_id,
+          shipper_id,
+          -- Timestamps
+          created_at,
+          updated_at,
+          completed_at,
+          cancelled_at,
+          -- Flags
+          is_gift,
+          is_rush_order,
+          is_international,
+          requires_signature,
+          is_business_order,
+          -- Notes
+          customer_notes,
+          internal_notes,
+          gift_message,
+          special_instructions
+      FROM orders
+      WHERE order_date >= '2023-01-01'
+          AND order_date < '2024-12-31'
+          AND order_status IN ('pending', 'processing', 'shipped', 'delivered', 'completed')
+          AND order_type IN ('standard', 'express', 'overnight', 'international')
+          AND total_amount > 0
+          AND customer_id IS NOT NULL
+  ),
+
+  order_line_items AS (
+      -- Individual line items from orders
+      SELECT
+          line_item_id,
+          order_id,
+          product_id,
+          line_number,
+          -- Quantities
+          quantity_ordered,
+          quantity_shipped,
+          quantity_cancelled,
+          quantity_returned,
+          quantity_damaged,
+          -- Pricing
+          unit_price,
+          unit_cost,
+          unit_discount,
+          line_subtotal,
+          line_tax,
+          line_shipping,
+          line_total,
+          -- Discounts
+          discount_type,
+          discount_code,
+          discount_percentage,
+          discount_reason,
+          -- Product details at time of order
+          product_sku_snapshot,
+          product_name_snapshot,
+          product_category_snapshot,
+          -- Status
+          line_status,
+          fulfillment_status,
+          return_status,
+          -- Warehouse
+          picked_from_warehouse,
+          picked_from_location,
+          picked_by_user,
+          picked_at_timestamp,
+          packed_by_user,
+          packed_at_timestamp,
+          -- Returns
+          return_reason,
+          return_date,
+          refund_amount,
+          restocking_fee,
+          -- Gift wrap
+          is_gift_wrapped,
+          gift_wrap_type,
+          gift_wrap_charge,
+          -- Calculated fields
+          unit_price * quantity_ordered AS line_revenue,
+          unit_cost * quantity_ordered AS line_cost,
+          (unit_price - unit_cost) * quantity_ordered AS line_profit,
+          CASE
+              WHEN quantity_returned > 0 THEN 'returned'
+              WHEN quantity_cancelled > 0 THEN 'cancelled'
+              WHEN quantity_shipped = quantity_ordered THEN 'fulfilled'
+              ELSE 'partial'
+          END AS fulfillment_type
+      FROM order_items
+      WHERE line_status NOT IN ('cancelled', 'voided')
+          AND quantity_ordered > 0
+  ),
+
+  employee_data AS (
+      -- Employee and sales representative information
+      SELECT
+          employee_id,
+          employee_number,
+          employee_name,
+          first_name,
+          last_name,
+          middle_name,
+          email_address,
+          phone_extension,
+          mobile_phone,
+          -- Employment details
+          hire_date,
+          termination_date,
+          employment_status,
+          employment_type,
+          job_title,
+          job_level,
+          job_grade,
+          department_id,
+          department_name,
+          division_id,
+          division_name,
+          -- Management
+          manager_id,
+          manager_name,
+          reports_to,
+          -- Location
+          office_location,
+          office_building,
+          office_floor,
+          office_room,
+          work_city,
+          work_state,
+          work_country,
+          -- Compensation
+          base_salary,
+          commission_rate,
+          bonus_target,
+          commission_tier,
+          -- Performance
+          sales_quota,
+          current_sales,
+          quota_attainment,
+          performance_rating,
+          last_review_date,
+          next_review_date
+      FROM employees
+      WHERE employment_status = 'active'
+          AND employee_id IS NOT NULL
+          AND hire_date <= CURRENT_DATE
+  ),
+
+  customer_lifetime_metrics AS (
+      -- Calculate customer lifetime value and metrics
+      SELECT
+          cs.customer_id,
+          cs.customer_name,
+          cs.customer_tier,
+          cs.activity_level,
+          -- Order counts
+          COUNT(DISTINCT ot.order_id) AS total_orders,
+          COUNT(DISTINCT CASE WHEN ot.order_date >= CURRENT_DATE - INTERVAL '30' DAY THEN ot.order_id END) AS orders_last_30_days,
+          COUNT(DISTINCT CASE WHEN ot.order_date >= CURRENT_DATE - INTERVAL '90' DAY THEN ot.order_id END) AS orders_last_90_days,
+          COUNT(DISTINCT CASE WHEN ot.order_date >= CURRENT_DATE - INTERVAL '365' DAY THEN ot.order_id END) AS orders_last_year,
+          -- Revenue metrics
+          SUM(ot.total_amount) AS lifetime_revenue,
+          SUM(CASE WHEN ot.order_date >= CURRENT_DATE - INTERVAL '30' DAY THEN ot.total_amount ELSE 0 END) AS revenue_last_30_days,
+          SUM(CASE WHEN ot.order_date >= CURRENT_DATE - INTERVAL '90' DAY THEN ot.total_amount ELSE 0 END) AS revenue_last_90_days,
+          SUM(CASE WHEN ot.order_date >= CURRENT_DATE - INTERVAL '365' DAY THEN ot.total_amount ELSE 0 END) AS revenue_last_year,
+          -- Average values
+          AVG(ot.total_amount) AS average_order_value,
+          AVG(CASE WHEN ot.order_date >= CURRENT_DATE - INTERVAL '365' DAY THEN ot.total_amount END) AS avg_order_value_last_year,
+          -- Product metrics
+          COUNT(DISTINCT oli.product_id) AS unique_products_purchased,
+          SUM(oli.quantity_ordered) AS total_items_purchased,
+          -- Return metrics
+          SUM(oli.quantity_returned) AS total_items_returned,
+          SUM(CASE WHEN oli.quantity_returned > 0 THEN oli.refund_amount ELSE 0 END) AS total_refund_amount,
+          -- Date ranges
+          MIN(ot.order_date) AS first_order_date,
+          MAX(ot.order_date) AS last_order_date,
+          MAX(ot.order_date) - MIN(ot.order_date) AS customer_lifespan_days,
+          -- Recency
+          CURRENT_DATE - MAX(ot.order_date) AS days_since_last_order
+      FROM customer_segments cs
+      LEFT JOIN order_transactions ot ON cs.customer_id = ot.customer_id
+      LEFT JOIN order_line_items oli ON ot.order_id = oli.order_id
+      WHERE ot.order_status IN ('delivered', 'completed')
+      GROUP BY
+          cs.customer_id,
+          cs.customer_name,
+          cs.customer_tier,
+          cs.activity_level
+  ),
+
+  product_performance AS (
+      -- Product sales performance metrics
+      SELECT
+          pc.product_id,
+          pc.product_sku,
+          pc.product_name,
+          pc.product_category,
+          pc.product_subcategory,
+          pc.product_brand,
+          pc.category_group,
+          -- Sales metrics
+          COUNT(DISTINCT oli.order_id) AS total_orders,
+          SUM(oli.quantity_ordered) AS total_quantity_sold,
+          SUM(oli.quantity_returned) AS total_quantity_returned,
+          SUM(oli.line_revenue) AS total_revenue,
+          SUM(oli.line_cost) AS total_cost,
+          SUM(oli.line_profit) AS total_profit,
+          -- Averages
+          AVG(oli.unit_price) AS average_selling_price,
+          AVG(oli.line_revenue) AS average_line_revenue,
+          -- Return rate
+          CAST(SUM(oli.quantity_returned) AS DECIMAL) / NULLIF(SUM(oli.quantity_ordered), 0) AS return_rate,
+          -- Profit margin
+          CAST(SUM(oli.line_profit) AS DECIMAL) / NULLIF(SUM(oli.line_revenue), 0) AS profit_margin,
+          -- Rankings
+          RANK() OVER (PARTITION BY pc.product_category ORDER BY SUM(oli.line_revenue) DESC) AS revenue_rank_in_category,
+          RANK() OVER (ORDER BY SUM(oli.quantity_ordered) DESC) AS quantity_rank_overall
+      FROM product_catalog pc
+      INNER JOIN order_line_items oli ON pc.product_id = oli.product_id
+      INNER JOIN order_transactions ot ON oli.order_id = ot.order_id
+      WHERE ot.order_status IN ('delivered', 'completed')
+          AND ot.order_date >= '2023-01-01'
+      GROUP BY
+          pc.product_id,
+          pc.product_sku,
+          pc.product_name,
+          pc.product_category,
+          pc.product_subcategory,
+          pc.product_brand,
+          pc.category_group
+  ),
+
+  regional_sales AS (
+      -- Sales performance by region
+      SELECT
+          cs.billing_country,
+          cs.billing_state,
+          cs.billing_city,
+          -- Order metrics
+          COUNT(DISTINCT ot.order_id) AS total_orders,
+          COUNT(DISTINCT cs.customer_id) AS unique_customers,
+          -- Revenue
+          SUM(ot.total_amount) AS total_revenue,
+          SUM(ot.shipping_amount) AS total_shipping_revenue,
+          SUM(ot.tax_amount) AS total_tax_collected,
+          AVG(ot.total_amount) AS average_order_value,
+          -- Time periods
+          SUM(CASE WHEN ot.order_date >= '2024-01-01' THEN ot.total_amount ELSE 0 END) AS revenue_2024,
+          SUM(CASE WHEN ot.order_date >= '2023-01-01' AND ot.order_date < '2024-01-01' THEN ot.total_amount ELSE 0 END) AS revenue_2023,
+          -- Growth
+          (SUM(CASE WHEN ot.order_date >= '2024-01-01' THEN ot.total_amount ELSE 0 END) -
+           SUM(CASE WHEN ot.order_date >= '2023-01-01' AND ot.order_date < '2024-01-01' THEN ot.total_amount ELSE 0 END)) /
+          NULLIF(SUM(CASE WHEN ot.order_date >= '2023-01-01' AND ot.order_date < '2024-01-01' THEN ot.total_amount ELSE 0 END), 0) AS year_over_year_growth
+      FROM customer_segments cs
+      INNER JOIN order_transactions ot ON cs.customer_id = ot.customer_id
+      WHERE ot.order_status IN ('delivered', 'completed')
+      GROUP BY
+          cs.billing_country,
+          cs.billing_state,
+          cs.billing_city
+      HAVING SUM(ot.total_amount) > 1000
+  ),
+
+  monthly_trends AS (
+      -- Monthly sales trends and seasonality
+      SELECT
+          DATE_TRUNC('month', ot.order_date) AS order_month,
+          EXTRACT(YEAR FROM ot.order_date) AS order_year,
+          EXTRACT(MONTH FROM ot.order_date) AS month_number,
+          EXTRACT(QUARTER FROM ot.order_date) AS quarter_number,
+          -- Volume metrics
+          COUNT(DISTINCT ot.order_id) AS orders,
+          COUNT(DISTINCT ot.customer_id) AS customers,
+          SUM(oli.quantity_ordered) AS items_sold,
+          -- Financial metrics
+          SUM(ot.subtotal_amount) AS subtotal,
+          SUM(ot.tax_amount) AS tax,
+          SUM(ot.shipping_amount) AS shipping,
+          SUM(ot.discount_amount) AS discounts,
+          SUM(ot.total_amount) AS revenue,
+          -- Averages
+          AVG(ot.total_amount) AS avg_order_value,
+          AVG(oli.quantity_ordered) AS avg_items_per_order,
+          -- Moving averages
+          AVG(SUM(ot.total_amount)) OVER (ORDER BY DATE_TRUNC('month', ot.order_date) ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS three_month_moving_avg,
+          AVG(SUM(ot.total_amount)) OVER (ORDER BY DATE_TRUNC('month', ot.order_date) ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS six_month_moving_avg
+      FROM order_transactions ot
+      INNER JOIN order_line_items oli ON ot.order_id = oli.order_id
+      WHERE ot.order_status IN ('delivered', 'completed')
+          AND ot.order_date >= '2022-01-01'
+      GROUP BY
+          DATE_TRUNC('month', ot.order_date),
+          EXTRACT(YEAR FROM ot.order_date),
+          EXTRACT(MONTH FROM ot.order_date),
+          EXTRACT(QUARTER FROM ot.order_date)
+  ),
+
+  category_analysis AS (
+      -- Category performance analysis
+      SELECT
+          pc.product_category,
+          pc.product_subcategory,
+          pc.category_group,
+          -- Sales
+          COUNT(DISTINCT oli.order_id) AS orders,
+          SUM(oli.quantity_ordered) AS quantity,
+          SUM(oli.line_revenue) AS revenue,
+          SUM(oli.line_profit) AS profit,
+          -- Market share
+          SUM(oli.line_revenue) / SUM(SUM(oli.line_revenue)) OVER () AS revenue_share,
+          -- Pricing
+          AVG(oli.unit_price) AS avg_price,
+          MIN(oli.unit_price) AS min_price,
+          MAX(oli.unit_price) AS max_price,
+          -- Profitability
+          SUM(oli.line_profit) / NULLIF(SUM(oli.line_revenue), 0) AS profit_margin,
+          -- Returns
+          SUM(oli.quantity_returned) AS returns,
+          CAST(SUM(oli.quantity_returned) AS DECIMAL) / NULLIF(SUM(oli.quantity_ordered), 0) AS return_rate
+      FROM product_catalog pc
+      INNER JOIN order_line_items oli ON pc.product_id = oli.product_id
+      INNER JOIN order_transactions ot ON oli.order_id = ot.order_id
+      WHERE ot.order_status IN ('delivered', 'completed')
+      GROUP BY
+          pc.product_category,
+          pc.product_subcategory,
+          pc.category_group
+  )
+
+  -- Main query combining all CTEs
+  SELECT
+      -- Customer information
+      cs.customer_id,
+      cs.customer_number,
+      cs.customer_name,
+      cs.customer_type,
+      cs.customer_tier,
+      cs.activity_level,
+      cs.customer_tenure,
+      cs.email_address,
+      cs.phone_number,
+      cs.billing_city,
+      cs.billing_state,
+      cs.billing_country,
+      cs.age_group,
+      cs.gender,
+      cs.income_bracket,
+      -- Customer metrics
+      clm.total_orders,
+      clm.orders_last_30_days,
+      clm.orders_last_90_days,
+      clm.orders_last_year,
+      clm.lifetime_revenue,
+      clm.revenue_last_30_days,
+      clm.revenue_last_90_days,
+      clm.revenue_last_year,
+      clm.average_order_value,
+      clm.unique_products_purchased,
+      clm.total_items_purchased,
+      clm.total_items_returned,
+      clm.first_order_date,
+      clm.last_order_date,
+      clm.days_since_last_order,
+      -- Order details
+      ot.order_id,
+      ot.order_number,
+      ot.order_date,
+      ot.order_status,
+      ot.order_type,
+      ot.order_channel,
+      ot.payment_method,
+      ot.payment_status,
+      ot.subtotal_amount,
+      ot.tax_amount,
+      ot.shipping_amount,
+      ot.discount_amount,
+      ot.total_amount,
+      ot.shipping_method,
+      ot.shipping_carrier,
+      ot.tracking_number,
+      ot.delivery_status,
+      -- Line item details
+      oli.line_item_id,
+      oli.product_id,
+      oli.quantity_ordered,
+      oli.quantity_shipped,
+      oli.unit_price,
+      oli.line_total,
+      oli.discount_type,
+      oli.line_status,
+      -- Product information
+      pc.product_sku,
+      pc.product_name,
+      pc.product_category,
+      pc.product_subcategory,
+      pc.product_brand,
+      pc.product_manufacturer,
+      pc.category_group,
+      pc.list_price,
+      pc.product_color,
+      pc.product_size,
+      pc.average_rating,
+      pc.review_count,
+      -- Product performance
+      pp.total_quantity_sold AS product_total_quantity_sold,
+      pp.total_revenue AS product_total_revenue,
+      pp.total_profit AS product_total_profit,
+      pp.return_rate AS product_return_rate,
+      pp.profit_margin AS product_profit_margin,
+      pp.revenue_rank_in_category,
+      -- Employee information
+      ed.employee_id,
+      ed.employee_name,
+      ed.job_title,
+      ed.department_name,
+      ed.office_location,
+      ed.commission_rate,
+      ed.sales_quota,
+      -- Regional metrics
+      rs.total_orders AS region_total_orders,
+      rs.unique_customers AS region_unique_customers,
+      rs.total_revenue AS region_total_revenue,
+      rs.average_order_value AS region_avg_order_value,
+      rs.year_over_year_growth AS region_yoy_growth,
+      -- Category metrics
+      ca.revenue AS category_revenue,
+      ca.profit AS category_profit,
+      ca.revenue_share AS category_revenue_share,
+      ca.profit_margin AS category_profit_margin,
+      ca.return_rate AS category_return_rate,
+      -- Monthly trends
+      mt.order_month,
+      mt.three_month_moving_avg,
+      mt.six_month_moving_avg,
+      -- Calculated fields
+      CASE
+          WHEN clm.lifetime_revenue > 10000 THEN 'vip'
+          WHEN clm.lifetime_revenue > 5000 THEN 'premium'
+          WHEN clm.lifetime_revenue > 1000 THEN 'standard'
+          ELSE 'basic'
+      END AS calculated_tier,
+      CASE
+          WHEN clm.days_since_last_order <= 30 THEN 'very_recent'
+          WHEN clm.days_since_last_order <= 90 THEN 'recent'
+          WHEN clm.days_since_last_order <= 180 THEN 'moderate'
+          ELSE 'at_risk'
+      END AS recency_segment,
+      CASE
+          WHEN clm.total_orders >= 50 THEN 'frequent'
+          WHEN clm.total_orders >= 20 THEN 'regular'
+          WHEN clm.total_orders >= 5 THEN 'occasional'
+          ELSE 'rare'
+      END AS frequency_segment,
+      oli.unit_price * oli.quantity_ordered AS calculated_line_revenue,
+      (oli.unit_price * oli.quantity_ordered) * (ed.commission_rate / 100) AS calculated_commission,
+      ROUND(oli.unit_price * oli.quantity_ordered * 0.9, 2) AS discounted_line_total,
+      -- Window functions
+      ROW_NUMBER() OVER (PARTITION BY cs.customer_id ORDER BY ot.order_date DESC) AS order_recency_rank,
+      RANK() OVER (PARTITION BY cs.billing_country ORDER BY clm.lifetime_revenue DESC) AS customer_value_rank_in_country,
+      DENSE_RANK() OVER (PARTITION BY pc.product_category ORDER BY oli.quantity_ordered DESC) AS product_popularity_rank,
+      SUM(ot.total_amount) OVER (PARTITION BY cs.customer_id ORDER BY ot.order_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_customer_revenue,
+      AVG(ot.total_amount) OVER (PARTITION BY cs.customer_id ORDER BY ot.order_date ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS rolling_5_order_avg,
+      -- Aggregates
+      SUM(oli.quantity_ordered) OVER (PARTITION BY pc.product_category) AS category_total_quantity,
+      COUNT(DISTINCT ot.order_id) OVER (PARTITION BY cs.billing_country, DATE_TRUNC('month', ot.order_date)) AS monthly_orders_in_country,
+      MAX(ot.total_amount) OVER (PARTITION BY cs.customer_id) AS customer_largest_order,
+      MIN(ot.order_date) OVER (PARTITION BY pc.product_id) AS product_first_sale_date
+
+  FROM customer_segments cs
+  INNER JOIN customer_lifetime_metrics clm ON cs.customer_id = clm.customer_id
+  INNER JOIN order_transactions ot ON cs.customer_id = ot.customer_id
+  INNER JOIN order_line_items oli ON ot.order_id = oli.order_id
+  INNER JOIN product_catalog pc ON oli.product_id = pc.product_id
+  INNER JOIN product_performance pp ON pc.product_id = pp.product_id
+  LEFT JOIN employee_data ed ON ot.order_id IN (
+      SELECT order_id FROM employee_assignments WHERE employee_id = ed.employee_id
+  )
+  LEFT JOIN regional_sales rs ON cs.billing_country = rs.billing_country
+      AND cs.billing_state = rs.billing_state
+      AND cs.billing_city = rs.billing_city
+  LEFT JOIN category_analysis ca ON pc.product_category = ca.product_category
+      AND pc.product_subcategory = ca.product_subcategory
+  LEFT JOIN monthly_trends mt ON DATE_TRUNC('month', ot.order_date) = mt.order_month
+
+  WHERE
+      -- Date filters
+      ot.order_date >= '2023-01-01'
+      AND ot.order_date < '2024-12-31'
+      -- Status filters
+      AND ot.order_status IN ('processing', 'shipped', 'delivered', 'completed')
+      AND oli.line_status NOT IN ('cancelled', 'voided', 'rejected')
+      AND cs.customer_status = 'active'
+      AND pc.product_status = 'active'
+      -- Geographic filters
+      AND cs.billing_country IN ('USA', 'Canada', 'Mexico', 'UK', 'Germany', 'France', 'Spain', 'Italy', 'Japan', 'Australia')
+      AND cs.billing_state NOT IN ('test', 'demo', 'internal')
+      -- Category filters
+      AND pc.product_category IN ('electronics', 'clothing', 'home', 'sports', 'books', 'toys', 'automotive', 'health', 'beauty', 'grocery')
+      AND pc.product_subcategory NOT LIKE '%test%'
+      -- Amount filters
+      AND ot.total_amount > 0
+      AND ot.total_amount < 100000
+      AND oli.quantity_ordered > 0
+      AND oli.unit_price > 0
+      -- Quality filters
+      AND cs.email_address NOT LIKE '%@test.com'
+      AND cs.email_address NOT LIKE '%@example.com'
+      AND cs.email_address NOT LIKE '%@invalid.com'
+      AND cs.customer_name NOT LIKE '%test%'
+      AND cs.customer_name NOT LIKE '%demo%'
+      AND pc.product_name NOT LIKE '%sample%'
+      AND pc.product_name NOT LIKE '%demo%'
+      -- Tier filters
+      AND cs.customer_tier IN ('gold', 'silver', 'bronze', 'platinum')
+      AND cs.activity_level IN ('highly_active', 'active')
+      -- Payment filters
+      AND ot.payment_status = 'completed'
+      AND ot.payment_method IN ('credit_card', 'debit_card', 'paypal', 'apple_pay', 'google_pay', 'bank_transfer')
+      -- Shipping filters
+      AND ot.delivery_status IN ('delivered', 'in_transit', 'out_for_delivery')
+      AND ot.shipping_method IN ('standard', 'express', 'overnight', 'two_day')
+      -- Channel filters
+      AND ot.order_channel IN ('web', 'mobile', 'tablet', 'phone', 'store', 'marketplace')
+      -- Null checks
+      AND cs.customer_id IS NOT NULL
+      AND ot.order_id IS NOT NULL
+      AND oli.product_id IS NOT NULL
+      AND pc.product_sku IS NOT NULL
+      AND ot.total_amount IS NOT NULL
+
+  GROUP BY
+      cs.customer_id, cs.customer_number, cs.customer_name, cs.customer_type, cs.customer_tier,
+      cs.activity_level, cs.customer_tenure, cs.email_address, cs.phone_number,
+      cs.billing_city, cs.billing_state, cs.billing_country, cs.age_group, cs.gender, cs.income_bracket,
+      clm.total_orders, clm.orders_last_30_days, clm.orders_last_90_days, clm.orders_last_year,
+      clm.lifetime_revenue, clm.revenue_last_30_days, clm.revenue_last_90_days, clm.revenue_last_year,
+      clm.average_order_value, clm.unique_products_purchased, clm.total_items_purchased,
+      clm.total_items_returned, clm.first_order_date, clm.last_order_date, clm.days_since_last_order,
+      ot.order_id, ot.order_number, ot.order_date, ot.order_status, ot.order_type, ot.order_channel,
+      ot.payment_method, ot.payment_status, ot.subtotal_amount, ot.tax_amount, ot.shipping_amount,
+      ot.discount_amount, ot.total_amount, ot.shipping_method, ot.shipping_carrier, ot.tracking_number,
+      ot.delivery_status, oli.line_item_id, oli.product_id, oli.quantity_ordered, oli.quantity_shipped,
+      oli.unit_price, oli.line_total, oli.discount_type, oli.line_status,
+      pc.product_sku, pc.product_name, pc.product_category, pc.product_subcategory, pc.product_brand,
+      pc.product_manufacturer, pc.category_group, pc.list_price, pc.product_color, pc.product_size,
+      pc.average_rating, pc.review_count, pp.total_quantity_sold, pp.total_revenue, pp.total_profit,
+      pp.return_rate, pp.profit_margin, pp.revenue_rank_in_category,
+      ed.employee_id, ed.employee_name, ed.job_title, ed.department_name, ed.office_location,
+      ed.commission_rate, ed.sales_quota, rs.total_orders, rs.unique_customers, rs.total_revenue,
+      rs.average_order_value, rs.year_over_year_growth,
+      ca.revenue, ca.profit, ca.revenue_share, ca.profit_margin, ca.return_rate,
+      mt.order_month, mt.three_month_moving_avg, mt.six_month_moving_avg
+
+  HAVING
+      SUM(oli.quantity_ordered) > 0
+      AND SUM(oli.line_total) > 0
+      AND COUNT(DISTINCT ot.order_id) >= 1
+
+  ORDER BY
+      clm.lifetime_revenue DESC,
+      clm.total_orders DESC,
+      ot.order_date DESC,
+      cs.customer_name ASC,
+      pc.product_category ASC,
+      pc.product_name ASC,
+      oli.line_number ASC,
+      ot.order_id ASC
+
+  LIMIT 100000
+  OFFSET 0;
+
+  -- Additional analytics queries for dashboard
+
+  -- Top customers by revenue
+  SELECT
+      customer_id,
+      customer_name,
+      customer_tier,
+      total_orders,
+      lifetime_revenue,
+      average_order_value,
+      days_since_last_order
+  FROM customer_lifetime_metrics
+  WHERE lifetime_revenue > 1000
+  ORDER BY lifetime_revenue DESC
+  LIMIT 100;
+
+  -- Top products by sales
+  SELECT
+      product_sku,
+      product_name,
+      product_category,
+      product_brand,
+      total_quantity_sold,
+      total_revenue,
+      total_profit,
+      profit_margin,
+      return_rate
+  FROM product_performance
+  WHERE total_revenue > 5000
+  ORDER BY total_revenue DESC
+  LIMIT 50;
+
+  -- Regional performance summary
+  SELECT
+      billing_country,
+      billing_state,
+      total_orders,
+      unique_customers,
+      total_revenue,
+      average_order_value,
+      year_over_year_growth
+  FROM regional_sales
+  WHERE total_revenue > 10000
+  ORDER BY total_revenue DESC;
+"#;
+
+fn tokenization_benchmark(c: &mut Criterion) {
+    let mut group = c.benchmark_group("tokenization");
+    let dialect = GenericDialect {};
+
+    group.bench_function("tokenize_complex_sql", |b| {
+        b.iter(|| {
+            let mut tokenizer = Tokenizer::new(&dialect, COMPLEX_SQL);
+            tokenizer.tokenize().unwrap()
+        });
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, tokenization_benchmark);
+criterion_main!(benches);
--- a/src/dialect/snowflake.rs
+++ b/src/dialect/snowflake.rs
@ -1251,7 +1251,7 @@ pub fn parse_copy_into(parser: &Parser) -> Result<Statement, ParserError> {
                continue_loop = false;
                let next_token = parser.next_token();
                match next_token.token {
-                    BorrowedToken::SingleQuotedString(s) => files.push(s),
+                    BorrowedToken::SingleQuotedString(s) => files.push(s.into_owned()),
                    _ => parser.expected("file token", next_token)?,
                };
                if parser.next_token().token.eq(&BorrowedToken::Comma) {
@ -1266,7 +1266,7 @@ pub fn parse_copy_into(parser: &Parser) -> Result<Statement, ParserError> {
            parser.expect_token(&BorrowedToken::Eq)?;
            let next_token = parser.next_token();
            pattern = Some(match next_token.token {
-                BorrowedToken::SingleQuotedString(s) => s,
+                BorrowedToken::SingleQuotedString(s) => s.into_owned(),
                _ => parser.expected("pattern", next_token)?,
            });
        // VALIDATION MODE
@ -1417,7 +1417,7 @@ fn parse_stage_params(parser: &Parser) -> Result<StageParamsObject, ParserError>
    if parser.parse_keyword(Keyword::URL) {
        parser.expect_token(&BorrowedToken::Eq)?;
        url = Some(match parser.next_token().token {
-            BorrowedToken::SingleQuotedString(word) => Ok(word),
+            BorrowedToken::SingleQuotedString(word) => Ok(word.into_owned()),
            _ => parser.expected("a URL statement", parser.peek_token()),
        }?)
    }
@ -1432,7 +1432,7 @@ fn parse_stage_params(parser: &Parser) -> Result<StageParamsObject, ParserError>
    if parser.parse_keyword(Keyword::ENDPOINT) {
        parser.expect_token(&BorrowedToken::Eq)?;
        endpoint = Some(match parser.next_token().token {
-            BorrowedToken::SingleQuotedString(word) => Ok(word),
+            BorrowedToken::SingleQuotedString(word) => Ok(word.into_owned()),
            _ => parser.expected("an endpoint statement", parser.peek_token()),
        }?)
    }
@ -1486,7 +1486,7 @@ fn parse_session_options(parser: &Parser, set: bool) -> Result<Vec<KeyValueOptio
                    options.push(option);
                } else {
                    options.push(KeyValueOption {
-                        option_name: key.value,
+                        option_name: key.value.to_string(),
                        option_value: KeyValueOptionKind::Single(Value::Placeholder(empty())),
                    });
                }
--- a/src/keywords.rs
+++ b/src/keywords.rs
@ -1248,3 +1248,57 @@ pub const RESERVED_FOR_IDENTIFIER: &[Keyword] = &[
    Keyword::STRUCT,
    Keyword::TRIM,
 ];
+
+#[cfg(feature = "std")]
+use std::collections::HashMap;
+#[cfg(feature = "std")]
+use std::sync::OnceLock;
+#[cfg(feature = "std")]
+use unicase::UniCase;
+
+/// Lazy-initialized HashMap for O(1) keyword lookups
+#[cfg(feature = "std")]
+static KEYWORD_MAP: OnceLock<HashMap<UniCase<&'static str>, Keyword>> = OnceLock::new();
+
+/// Get the HashMap of keywords, initializing it on first access
+#[cfg(feature = "std")]
+fn get_keyword_map() -> &'static HashMap<UniCase<&'static str>, Keyword> {
+    KEYWORD_MAP.get_or_init(|| {
+        let mut map = HashMap::with_capacity(ALL_KEYWORDS.len());
+        for (keyword_str, keyword_enum) in ALL_KEYWORDS.iter().zip(ALL_KEYWORDS_INDEX.iter()) {
+            map.insert(UniCase::ascii(*keyword_str), *keyword_enum);
+        }
+        map
+    })
+}
+
+/// Look up a keyword by string, case-insensitively, with O(1) complexity
+///
+/// # Arguments
+/// * `word` - The word to look up (case-insensitive)
+///
+/// # Returns
+/// * `Some(Keyword)` if the word is a keyword
+/// * `None` if the word is not a keyword
+///
+/// # Example
+/// ```
+/// use sqlparser::keywords::{get_keyword, Keyword};
+///
+/// assert_eq!(get_keyword("SELECT"), Some(Keyword::SELECT));
+/// assert_eq!(get_keyword("select"), Some(Keyword::SELECT));
+/// assert_eq!(get_keyword("my_table"), None);
+/// ```
+#[cfg(feature = "std")]
+pub fn get_keyword(word: &str) -> Option<Keyword> {
+    get_keyword_map().get(&UniCase::ascii(word)).copied()
+}
+
+/// Fallback for no_std: use binary search (same as before)
+#[cfg(not(feature = "std"))]
+pub fn get_keyword(word: &str) -> Option<Keyword> {
+    ALL_KEYWORDS
+        .binary_search_by(|k| unicase::UniCase::ascii(k).cmp(&unicase::UniCase::ascii(&word)))
+        .ok()
+        .map(|idx| ALL_KEYWORDS_INDEX[idx])
+}
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@ -14,6 +14,7 @@

 #[cfg(not(feature = "std"))]
 use alloc::{
+    borrow::Cow,
    boxed::Box,
    format,
    string::{String, ToString},
@ -25,6 +26,8 @@ use core::{
    str::FromStr,
 };
 use helpers::attached_token::AttachedToken;
+#[cfg(feature = "std")]
+use std::borrow::Cow;

 use log::debug;

@ -1793,8 +1796,11 @@ impl<'a> Parser<'a> {
                        break;
                    }
                    BorrowedToken::SingleQuotedString(s) => {
-                        let expr =
-                            Expr::Identifier(Ident::with_quote_and_span('\'', next_token.span, s));
+                        let expr = Expr::Identifier(Ident::with_quote_and_span(
+                            '\'',
+                            next_token.span,
+                            s.as_ref(),
+                        ));
                        chain.push(AccessExpr::Dot(expr));
                        self.advance_token(); // The consumed string
                    }
@ -3893,7 +3899,7 @@ impl<'a> Parser<'a> {
                // any keyword here unquoted.
                keyword: _,
            }) => Ok(JsonPathElem::Dot {
-                key: value,
+                key: value.to_string(),
                quoted: quote_style.is_some(),
            }),

@ -7744,7 +7750,9 @@ impl<'a> Parser<'a> {
            if dialect_of!(self is HiveDialect) && self.parse_keyword(Keyword::COMMENT) {
                let next_token = self.next_token();
                match next_token.token {
-                    BorrowedToken::SingleQuotedString(str) => Some(CommentDef::WithoutEq(str)),
+                    BorrowedToken::SingleQuotedString(str) => {
+                        Some(CommentDef::WithoutEq(str.into_owned()))
+                    }
                    _ => self.expected("comment", next_token)?,
                }
            } else {
@ -7965,11 +7973,11 @@ impl<'a> Parser<'a> {

            let comment = match (has_eq, value.token) {
                (true, BorrowedToken::SingleQuotedString(s)) => {
-                    Ok(Some(SqlOption::Comment(CommentDef::WithEq(s))))
-                }
-                (false, BorrowedToken::SingleQuotedString(s)) => {
-                    Ok(Some(SqlOption::Comment(CommentDef::WithoutEq(s))))
+                    Ok(Some(SqlOption::Comment(CommentDef::WithEq(s.into_owned()))))
                }
+                (false, BorrowedToken::SingleQuotedString(s)) => Ok(Some(SqlOption::Comment(
+                    CommentDef::WithoutEq(s.into_owned()),
+                ))),
                (_, token) => self.expected(
                    "BorrowedToken::SingleQuotedString",
                    TokenWithSpan::wrap(token),
@ -8014,8 +8022,8 @@ impl<'a> Parser<'a> {
            let value = self.next_token();

            let tablespace = match value.token {
-                BorrowedToken::Word(Word { value: name, .. })
-                | BorrowedToken::SingleQuotedString(name) => {
+                BorrowedToken::Word(Word { value: name, .. }) => {
+                    let name = name.to_string();
                    let storage = match self.parse_keyword(Keyword::STORAGE) {
                        true => {
                            let _ = self.consume_token(&BorrowedToken::Eq);
@ -8038,6 +8046,28 @@ impl<'a> Parser<'a> {
                        storage,
                    })))
                }
+                BorrowedToken::SingleQuotedString(name) => {
+                    let storage = match self.parse_keyword(Keyword::STORAGE) {
+                        true => {
+                            let _ = self.consume_token(&BorrowedToken::Eq);
+                            let storage_token = self.next_token();
+                            match &storage_token.token {
+                                BorrowedToken::Word(w) => match w.value.to_uppercase().as_str() {
+                                    "DISK" => Some(StorageType::Disk),
+                                    "MEMORY" => Some(StorageType::Memory),
+                                    _ => self.expected("DISK or MEMORY", storage_token)?,
+                                },
+                                _ => self.expected("BorrowedToken::Word", storage_token)?,
+                            }
+                        }
+                        false => None,
+                    };
+
+                    Ok(Some(SqlOption::TableSpace(TablespaceOption {
+                        name: name.into_owned(),
+                        storage,
+                    })))
+                }
                _ => {
                    return self.expected("BorrowedToken::Word", value)?;
                }
@ -8176,7 +8206,7 @@ impl<'a> Parser<'a> {
    pub fn parse_comment_value(&self) -> Result<String, ParserError> {
        let next_token = self.next_token();
        let value = match next_token.token {
-            BorrowedToken::SingleQuotedString(str) => str,
+            BorrowedToken::SingleQuotedString(str) => str.into_owned(),
            BorrowedToken::DollarQuotedString(str) => str.value,
            _ => self.expected("string literal", next_token)?,
        };
@ -10381,8 +10411,8 @@ impl<'a> Parser<'a> {
                }
                Keyword::NULL => ok_value(Value::Null),
                Keyword::NoKeyword if w.quote_style.is_some() => match w.quote_style {
-                    Some('"') => ok_value(Value::DoubleQuotedString(w.value)),
-                    Some('\'') => ok_value(Value::SingleQuotedString(w.value)),
+                    Some('"') => ok_value(Value::DoubleQuotedString(w.value.into_owned())),
+                    Some('\'') => ok_value(Value::SingleQuotedString(w.value.into_owned())),
                    _ => self.expected(
                        "A value?",
                        TokenWithSpan {
@ -10484,11 +10514,18 @@ impl<'a> Parser<'a> {

    fn maybe_concat_string_literal(&self, mut str: String) -> String {
        if self.dialect.supports_string_literal_concatenation() {
-            while let BorrowedToken::SingleQuotedString(ref s)
-            | BorrowedToken::DoubleQuotedString(ref s) = self.peek_token_ref().token
-            {
-                str.push_str(s.clone().as_str());
-                self.advance_token();
+            loop {
+                match &self.peek_token_ref().token {
+                    BorrowedToken::SingleQuotedString(s) => {
+                        str.push_str(s.as_ref());
+                        self.advance_token();
+                    }
+                    BorrowedToken::DoubleQuotedString(s) => {
+                        str.push_str(s);
+                        self.advance_token();
+                    }
+                    _ => break,
+                }
            }
        }
        str
@ -10584,8 +10621,8 @@ impl<'a> Parser<'a> {
                value,
                keyword: Keyword::NoKeyword,
                ..
-            }) => Ok(value),
-            BorrowedToken::SingleQuotedString(s) => Ok(s),
+            }) => Ok(value.into_owned()),
+            BorrowedToken::SingleQuotedString(s) => Ok(s.into_owned()),
            BorrowedToken::DoubleQuotedString(s) => Ok(s),
            BorrowedToken::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => {
                Ok(s)
@ -11100,7 +11137,7 @@ impl<'a> Parser<'a> {
        loop {
            let next_token = self.next_token();
            match next_token.token {
-                BorrowedToken::SingleQuotedString(value) => values.push(value),
+                BorrowedToken::SingleQuotedString(value) => values.push(value.into_owned()),
                _ => self.expected("a string", next_token)?,
            }
            let next_token = self.next_token();
@ -12125,7 +12162,7 @@ impl<'a> Parser<'a> {
                match next_token.token {
                    BorrowedToken::Word(w) => modifiers.push(w.to_string()),
                    BorrowedToken::Number(n, _) => modifiers.push(n),
-                    BorrowedToken::SingleQuotedString(s) => modifiers.push(s),
+                    BorrowedToken::SingleQuotedString(s) => modifiers.push(s.into_owned()),

                    BorrowedToken::Comma => {
                        continue;
@ -13261,7 +13298,7 @@ impl<'a> Parser<'a> {
        if token2 == BorrowedToken::Period {
            match token1.token {
                BorrowedToken::Word(w) => {
-                    schema_name = w.value;
+                    schema_name = w.value.to_string();
                }
                _ => {
                    return self.expected("Schema name", token1);
@ -13269,7 +13306,7 @@ impl<'a> Parser<'a> {
            }
            match token3.token {
                BorrowedToken::Word(w) => {
-                    table_name = w.value;
+                    table_name = w.value.to_string();
                }
                _ => {
                    return self.expected("Table name", token3);
@ -13282,7 +13319,7 @@ impl<'a> Parser<'a> {
        } else {
            match token1.token {
                BorrowedToken::Word(w) => {
-                    table_name = w.value;
+                    table_name = w.value.to_string();
                }
                _ => {
                    return self.expected("Table name", token1);
@ -14408,7 +14445,9 @@ impl<'a> Parser<'a> {
                None => {
                    let next_token = self.next_token();
                    if let BorrowedToken::Word(w) = next_token.token {
-                        Expr::Value(Value::Placeholder(w.value).with_span(next_token.span))
+                        Expr::Value(
+                            Value::Placeholder(w.value.into_owned()).with_span(next_token.span),
+                        )
                    } else {
                        return parser_err!(
                            "Expecting number or byte length e.g. 100M",
@ -14962,7 +15001,7 @@ impl<'a> Parser<'a> {
        let r#type = self.parse_data_type()?;
        let path = if let BorrowedToken::SingleQuotedString(path) = self.peek_token().token {
            self.next_token();
-            Some(path)
+            Some(path.into_owned())
        } else {
            None
        };
@ -16491,7 +16530,7 @@ impl<'a> Parser<'a> {
        let opt_ilike = if self.parse_keyword(Keyword::ILIKE) {
            let next_token = self.next_token();
            let pattern = match next_token.token {
-                BorrowedToken::SingleQuotedString(s) => s,
+                BorrowedToken::SingleQuotedString(s) => s.into_owned(),
                _ => return self.expected("ilike pattern", next_token),
            };
            Some(IlikeSelectItem { pattern })
@ -17128,7 +17167,11 @@ impl<'a> Parser<'a> {
            (true, _) => BorrowedToken::RParen,
            (false, BorrowedToken::EOF) => BorrowedToken::EOF,
            (false, BorrowedToken::Word(w)) if end_kws.contains(&w.keyword) => {
-                BorrowedToken::Word(w)
+                BorrowedToken::Word(Word {
+                    value: Cow::Owned(w.value.into_owned()),
+                    quote_style: w.quote_style,
+                    keyword: w.keyword,
+                })
            }
            (false, _) => BorrowedToken::SemiColon,
        };
@ -18327,27 +18370,27 @@ impl<'a> Parser<'a> {
        self.expect_token(&BorrowedToken::Eq)?;
        match self.peek_token().token {
            BorrowedToken::SingleQuotedString(_) => Ok(KeyValueOption {
-                option_name: key.value.clone(),
+                option_name: key.value.to_string(),
                option_value: KeyValueOptionKind::Single(self.parse_value()?.into()),
            }),
            BorrowedToken::Word(word)
                if word.keyword == Keyword::TRUE || word.keyword == Keyword::FALSE =>
            {
                Ok(KeyValueOption {
-                    option_name: key.value.clone(),
+                    option_name: key.value.to_string(),
                    option_value: KeyValueOptionKind::Single(self.parse_value()?.into()),
                })
            }
            BorrowedToken::Number(..) => Ok(KeyValueOption {
-                option_name: key.value.clone(),
+                option_name: key.value.to_string(),
                option_value: KeyValueOptionKind::Single(self.parse_value()?.into()),
            }),
            BorrowedToken::Word(word) => {
                self.next_token();
                Ok(KeyValueOption {
-                    option_name: key.value.clone(),
+                    option_name: key.value.to_string(),
                    option_value: KeyValueOptionKind::Single(Value::Placeholder(
-                        word.value.clone(),
+                        word.value.to_string(),
                    )),
                })
            }
@ -18365,12 +18408,12 @@ impl<'a> Parser<'a> {
                    Some(values) => {
                        let values = values.into_iter().map(|v| v.value).collect();
                        Ok(KeyValueOption {
-                            option_name: key.value.clone(),
+                            option_name: key.value.to_string(),
                            option_value: KeyValueOptionKind::Multi(values),
                        })
                    }
                    None => Ok(KeyValueOption {
-                        option_name: key.value.clone(),
+                        option_name: key.value.to_string(),
                        option_value: KeyValueOptionKind::KeyValueOptions(Box::new(
                            self.parse_key_value_options(true, &[])?,
                        )),
@ -18405,11 +18448,11 @@ fn maybe_prefixed_expr(expr: Expr, prefix: Option<Ident>) -> Expr {
    }
 }

-impl Word {
+impl Word<'_> {
    #[deprecated(since = "0.54.0", note = "please use `into_ident` instead")]
    pub fn to_ident(&self, span: Span) -> Ident {
        Ident {
-            value: self.value.clone(),
+            value: self.value.to_string(),
            quote_style: self.quote_style,
            span,
        }
@ -18418,7 +18461,7 @@ impl Word {
    /// Convert this word into an [`Ident`] identifier
    pub fn into_ident(self, span: Span) -> Ident {
        Ident {
-            value: self.value,
+            value: self.value.into_owned(),
            quote_style: self.quote_style,
            span,
        }
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@ -23,7 +23,7 @@

 #[cfg(not(feature = "std"))]
 use alloc::{
-    borrow::{Cow, ToOwned},
+    borrow::Cow,
    format,
    string::{String, ToString},
    vec,
@ -48,7 +48,7 @@ use crate::dialect::{
    BigQueryDialect, DuckDbDialect, GenericDialect, MySqlDialect, PostgreSqlDialect,
    SnowflakeDialect,
 };
-use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
+use crate::keywords::Keyword;
 use crate::{ast::DollarQuotedString, dialect::HiveDialect};

 /// SQL Token enumeration with lifetime parameter for future zero-copy support
@ -59,13 +59,13 @@ pub enum BorrowedToken<'a> {
    /// An end-of-file marker, not a real token
    EOF,
    /// A keyword (like SELECT) or an optionally quoted SQL identifier
-    Word(Word),
+    Word(Word<'a>),
    /// An unsigned numeric literal
    Number(String, bool),
    /// A character that could not be tokenized
    Char(char),
    /// Single quoted string: i.e: 'string'
-    SingleQuotedString(String),
+    SingleQuotedString(Cow<'a, str>),
    /// Double quoted string: i.e: "string"
    DoubleQuotedString(String),
    /// Triple single quoted strings: Example '''abc'''
@ -110,7 +110,7 @@ pub enum BorrowedToken<'a> {
    /// Comma
    Comma,
    /// Whitespace (space, tab, etc)
-    Whitespace(Whitespace),
+    Whitespace(Whitespace<'a>),
    /// Double equals sign `==`
    DoubleEq,
    /// Equality operator `=`
@ -280,8 +280,6 @@ pub enum BorrowedToken<'a> {
    /// This is used to represent any custom binary operator that is not part of the SQL standard.
    /// PostgreSQL allows defining custom binary operators using CREATE OPERATOR.
    CustomBinaryOperator(String),
-    /// Marker to carry the lifetime parameter (never constructed)
-    _Phantom(Cow<'a, str>),
 }

 /// Type alias for backward compatibility - Token without explicit lifetime uses 'static
@ -399,7 +397,6 @@ impl<'a> fmt::Display for BorrowedToken<'a> {
            BorrowedToken::QuestionAnd => write!(f, "?&"),
            BorrowedToken::QuestionPipe => write!(f, "?|"),
            BorrowedToken::CustomBinaryOperator(s) => f.write_str(s),
-            BorrowedToken::_Phantom(_) => unreachable!("_Phantom should never be constructed"),
        }
    }
 }
@ -409,10 +406,16 @@ impl<'a> BorrowedToken<'a> {
    pub fn to_static(self) -> Token {
        match self {
            BorrowedToken::EOF => BorrowedToken::EOF,
-            BorrowedToken::Word(w) => BorrowedToken::Word(w),
+            BorrowedToken::Word(w) => BorrowedToken::Word(Word {
+                value: Cow::Owned(w.value.into_owned()),
+                quote_style: w.quote_style,
+                keyword: w.keyword,
+            }),
            BorrowedToken::Number(n, l) => BorrowedToken::Number(n, l),
            BorrowedToken::Char(c) => BorrowedToken::Char(c),
-            BorrowedToken::SingleQuotedString(s) => BorrowedToken::SingleQuotedString(s),
+            BorrowedToken::SingleQuotedString(s) => {
+                BorrowedToken::SingleQuotedString(Cow::Owned(s.into_owned()))
+            }
            BorrowedToken::DoubleQuotedString(s) => BorrowedToken::DoubleQuotedString(s),
            BorrowedToken::TripleSingleQuotedString(s) => {
                BorrowedToken::TripleSingleQuotedString(s)
@ -450,7 +453,20 @@ impl<'a> BorrowedToken<'a> {
            BorrowedToken::UnicodeStringLiteral(s) => BorrowedToken::UnicodeStringLiteral(s),
            BorrowedToken::HexStringLiteral(s) => BorrowedToken::HexStringLiteral(s),
            BorrowedToken::Comma => BorrowedToken::Comma,
-            BorrowedToken::Whitespace(ws) => BorrowedToken::Whitespace(ws),
+            BorrowedToken::Whitespace(ws) => BorrowedToken::Whitespace(match ws {
+                Whitespace::Space => Whitespace::Space,
+                Whitespace::Newline => Whitespace::Newline,
+                Whitespace::Tab => Whitespace::Tab,
+                Whitespace::SingleLineComment { comment, prefix } => {
+                    Whitespace::SingleLineComment {
+                        comment: Cow::Owned(comment.into_owned()),
+                        prefix: Cow::Owned(prefix.into_owned()),
+                    }
+                }
+                Whitespace::MultiLineComment(s) => {
+                    Whitespace::MultiLineComment(Cow::Owned(s.into_owned()))
+                }
+            }),
            BorrowedToken::DoubleEq => BorrowedToken::DoubleEq,
            BorrowedToken::Eq => BorrowedToken::Eq,
            BorrowedToken::Neq => BorrowedToken::Neq,
@ -545,7 +561,6 @@ impl<'a> BorrowedToken<'a> {
            BorrowedToken::QuestionAnd => BorrowedToken::QuestionAnd,
            BorrowedToken::QuestionPipe => BorrowedToken::QuestionPipe,
            BorrowedToken::CustomBinaryOperator(s) => BorrowedToken::CustomBinaryOperator(s),
-            BorrowedToken::_Phantom(_) => unreachable!("_Phantom should never be constructed"),
        }
    }
 }
@ -556,13 +571,26 @@ impl BorrowedToken<'static> {
    }

    pub fn make_word(word: &str, quote_style: Option<char>) -> Self {
-        let word_uppercase = word.to_uppercase();
        BorrowedToken::Word(Word {
-            value: word.to_string(),
+            value: Cow::Owned(word.to_string()),
            quote_style,
            keyword: if quote_style.is_none() {
-                let keyword = ALL_KEYWORDS.binary_search(&word_uppercase.as_str());
-                keyword.map_or(Keyword::NoKeyword, |x| ALL_KEYWORDS_INDEX[x])
+                crate::keywords::get_keyword(word).unwrap_or(Keyword::NoKeyword)
+            } else {
+                Keyword::NoKeyword
+            },
+        })
+    }
+}
+
+impl<'a> BorrowedToken<'a> {
+    /// Create a Word token with a borrowed string (zero-copy)
+    pub fn make_word_borrowed(word: &'a str, quote_style: Option<char>) -> Self {
+        BorrowedToken::Word(Word {
+            value: Cow::Borrowed(word),
+            quote_style,
+            keyword: if quote_style.is_none() {
+                crate::keywords::get_keyword(word).unwrap_or(Keyword::NoKeyword)
            } else {
                Keyword::NoKeyword
            },
@ -574,10 +602,10 @@ impl BorrowedToken<'static> {
 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
-pub struct Word {
+pub struct Word<'a> {
    /// The value of the token, without the enclosing quotes, and with the
    /// escape sequences (if any) processed (TODO: escapes are not handled)
-    pub value: String,
+    pub value: Cow<'a, str>,
    /// An identifier can be "quoted" (&lt;delimited identifier> in ANSI parlance).
    /// The standard and most implementations allow using double quotes for this,
    /// but some implementations support other quoting styles as well (e.g. \[MS SQL])
@ -587,7 +615,7 @@ pub struct Word {
    pub keyword: Keyword,
 }

-impl fmt::Display for Word {
+impl fmt::Display for Word<'_> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self.quote_style {
            Some(s) if s == '"' || s == '[' || s == '`' => {
@ -599,7 +627,7 @@ impl fmt::Display for Word {
    }
 }

-impl Word {
+impl Word<'_> {
    fn matching_end_quote(ch: char) -> char {
        match ch {
            '"' => '"', // ANSI and most dialects
@ -613,15 +641,18 @@ impl Word {
 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
-pub enum Whitespace {
+pub enum Whitespace<'a> {
    Space,
    Newline,
    Tab,
-    SingleLineComment { comment: String, prefix: String },
-    MultiLineComment(String),
+    SingleLineComment {
+        comment: Cow<'a, str>,
+        prefix: Cow<'a, str>,
+    },
+    MultiLineComment(Cow<'a, str>),
 }

-impl fmt::Display for Whitespace {
+impl fmt::Display for Whitespace<'_> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Whitespace::Space => f.write_str(" "),
@ -1016,7 +1047,7 @@ impl<'a> Tokenizer<'a> {
    /// assert_eq!(tokens, vec![
    ///   Token::make_word("SELECT", None),
    ///   Token::Whitespace(Whitespace::Space),
-    ///   Token::SingleQuotedString("foo".to_string()),
+    ///   Token::SingleQuotedString("foo".to_string().into()),
    /// ]);
    pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self {
        Self {
@ -1117,15 +1148,18 @@ impl<'a> Tokenizer<'a> {
        &self,
        consumed_byte_len: usize,
        chars: &mut State<'a>,
-    ) -> Result<Option<Token>, TokenizerError> {
+    ) -> Result<Option<BorrowedToken<'a>>, TokenizerError> {
        chars.next(); // consume the first char
-        let word = self.tokenize_word(consumed_byte_len, chars)?;
+
+        // Calculate where the first character started
+        let first_char_byte_pos = chars.byte_pos.saturating_sub(consumed_byte_len);
+        let word = self.tokenize_word_borrowed(first_char_byte_pos, chars)?;

        // TODO: implement parsing of exponent here
        if word.chars().all(|x| x.is_ascii_digit() || x == '.') {
            let mut inner_state = State {
                peekable: word.chars().peekable(),
-                source: &word,
+                source: word,
                line: 0,
                col: 0,
                byte_pos: 0,
@ -1136,7 +1170,7 @@ impl<'a> Tokenizer<'a> {
            return Ok(Some(Token::Number(s, false)));
        }

-        Ok(Some(Token::make_word(&word, None)))
+        Ok(Some(BorrowedToken::make_word_borrowed(word, None)))
    }

    /// Get the next token or return None
@ -1144,7 +1178,7 @@ impl<'a> Tokenizer<'a> {
        &self,
        chars: &mut State<'a>,
        prev_token: Option<&BorrowedToken<'a>>,
-    ) -> Result<Option<Token>, TokenizerError> {
+    ) -> Result<Option<BorrowedToken<'a>>, TokenizerError> {
        match chars.peek() {
            Some(&ch) => match ch {
                ' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)),
@ -1166,12 +1200,12 @@ impl<'a> Tokenizer<'a> {
                        Some('\'') => {
                            if self.dialect.supports_triple_quoted_string() {
                                return self
-                                    .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
+                                    .tokenize_single_or_triple_quoted_string::<fn(String) -> BorrowedToken<'a>>(
                                        chars,
                                        '\'',
                                        false,
-                                        Token::SingleQuotedByteStringLiteral,
-                                        Token::TripleSingleQuotedByteStringLiteral,
+                                        BorrowedToken::SingleQuotedByteStringLiteral,
+                                        BorrowedToken::TripleSingleQuotedByteStringLiteral,
                                    );
                            }
                            let s = self.tokenize_single_quoted_string(chars, '\'', false)?;
@ -1180,12 +1214,12 @@ impl<'a> Tokenizer<'a> {
                        Some('\"') => {
                            if self.dialect.supports_triple_quoted_string() {
                                return self
-                                    .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
+                                    .tokenize_single_or_triple_quoted_string::<fn(String) -> BorrowedToken<'a>>(
                                        chars,
                                        '"',
                                        false,
-                                        Token::DoubleQuotedByteStringLiteral,
-                                        Token::TripleDoubleQuotedByteStringLiteral,
+                                        BorrowedToken::DoubleQuotedByteStringLiteral,
+                                        BorrowedToken::TripleDoubleQuotedByteStringLiteral,
                                    );
                            }
                            let s = self.tokenize_single_quoted_string(chars, '\"', false)?;
@ -1193,8 +1227,9 @@ impl<'a> Tokenizer<'a> {
                        }
                        _ => {
                            // regular identifier starting with an "b" or "B"
-                            let s = self.tokenize_word(b.len_utf8(), chars)?;
-                            Ok(Some(Token::make_word(&s, None)))
+                            let first_char_byte_pos = chars.byte_pos.saturating_sub(b.len_utf8());
+                            let s = self.tokenize_word_borrowed(first_char_byte_pos, chars)?;
+                            Ok(Some(BorrowedToken::make_word_borrowed(s, None)))
                        }
                    }
                }
@ -1203,25 +1238,26 @@ impl<'a> Tokenizer<'a> {
                    chars.next(); // consume
                    match chars.peek() {
                        Some('\'') => self
-                            .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
+                            .tokenize_single_or_triple_quoted_string::<fn(String) -> BorrowedToken<'a>>(
                                chars,
                                '\'',
                                false,
-                                Token::SingleQuotedRawStringLiteral,
-                                Token::TripleSingleQuotedRawStringLiteral,
+                                BorrowedToken::SingleQuotedRawStringLiteral,
+                                BorrowedToken::TripleSingleQuotedRawStringLiteral,
                            ),
                        Some('\"') => self
-                            .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
+                            .tokenize_single_or_triple_quoted_string::<fn(String) -> BorrowedToken<'a>>(
                                chars,
                                '"',
                                false,
-                                Token::DoubleQuotedRawStringLiteral,
-                                Token::TripleDoubleQuotedRawStringLiteral,
+                                BorrowedToken::DoubleQuotedRawStringLiteral,
+                                BorrowedToken::TripleDoubleQuotedRawStringLiteral,
                            ),
                        _ => {
                            // regular identifier starting with an "r" or "R"
-                            let s = self.tokenize_word(b.len_utf8(), chars)?;
-                            Ok(Some(Token::make_word(&s, None)))
+                            let first_char_byte_pos = chars.byte_pos.saturating_sub(b.len_utf8());
+                            let s = self.tokenize_word_borrowed(first_char_byte_pos, chars)?;
+                            Ok(Some(BorrowedToken::make_word_borrowed(s, None)))
                        }
                    }
                }
@ -1239,8 +1275,9 @@ impl<'a> Tokenizer<'a> {
                        }
                        _ => {
                            // regular identifier starting with an "N"
-                            let s = self.tokenize_word(n.len_utf8(), chars)?;
-                            Ok(Some(Token::make_word(&s, None)))
+                            let first_char_byte_pos = chars.byte_pos.saturating_sub(n.len_utf8());
+                            let s = self.tokenize_word_borrowed(first_char_byte_pos, chars)?;
+                            Ok(Some(BorrowedToken::make_word_borrowed(s, None)))
                        }
                    }
                }
@ -1256,8 +1293,9 @@ impl<'a> Tokenizer<'a> {
                        }
                        _ => {
                            // regular identifier starting with an "E" or "e"
-                            let s = self.tokenize_word(x.len_utf8(), chars)?;
-                            Ok(Some(Token::make_word(&s, None)))
+                            let first_char_byte_pos = chars.byte_pos.saturating_sub(x.len_utf8());
+                            let s = self.tokenize_word_borrowed(first_char_byte_pos, chars)?;
+                            Ok(Some(BorrowedToken::make_word_borrowed(s, None)))
                        }
                    }
                }
@ -1275,8 +1313,9 @@ impl<'a> Tokenizer<'a> {
                        }
                    }
                    // regular identifier starting with an "U" or "u"
-                    let s = self.tokenize_word(x.len_utf8(), chars)?;
-                    Ok(Some(Token::make_word(&s, None)))
+                    let first_char_byte_pos = chars.byte_pos.saturating_sub(x.len_utf8());
+                    let s = self.tokenize_word_borrowed(first_char_byte_pos, chars)?;
+                    Ok(Some(BorrowedToken::make_word_borrowed(s, None)))
                }
                // The spec only allows an uppercase 'X' to introduce a hex
                // string, but PostgreSQL, at least, allows a lowercase 'x' too.
@ -1290,8 +1329,9 @@ impl<'a> Tokenizer<'a> {
                        }
                        _ => {
                            // regular identifier starting with an "X"
-                            let s = self.tokenize_word(x.len_utf8(), chars)?;
-                            Ok(Some(Token::make_word(&s, None)))
+                            let first_char_byte_pos = chars.byte_pos.saturating_sub(x.len_utf8());
+                            let s = self.tokenize_word_borrowed(first_char_byte_pos, chars)?;
+                            Ok(Some(BorrowedToken::make_word_borrowed(s, None)))
                        }
                    }
                }
@ -1299,21 +1339,21 @@ impl<'a> Tokenizer<'a> {
                '\'' => {
                    if self.dialect.supports_triple_quoted_string() {
                        return self
-                            .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
+                            .tokenize_single_or_triple_quoted_string::<fn(String) -> BorrowedToken<'a>>(
                                chars,
                                '\'',
                                self.dialect.supports_string_literal_backslash_escape(),
-                                Token::SingleQuotedString,
-                                Token::TripleSingleQuotedString,
+                                |s| BorrowedToken::SingleQuotedString(Cow::Owned(s)),
+                                 BorrowedToken::TripleSingleQuotedString,
                            );
                    }
-                    let s = self.tokenize_single_quoted_string(
+                    let s = self.tokenize_single_quoted_string_borrowed(
                        chars,
                        '\'',
                        self.dialect.supports_string_literal_backslash_escape(),
                    )?;

-                    Ok(Some(Token::SingleQuotedString(s)))
+                    Ok(Some(BorrowedToken::SingleQuotedString(s)))
                }
                // double quoted string
                '\"' if !self.dialect.is_delimited_identifier_start(ch)
@ -1321,12 +1361,12 @@ impl<'a> Tokenizer<'a> {
                {
                    if self.dialect.supports_triple_quoted_string() {
                        return self
-                            .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
+                            .tokenize_single_or_triple_quoted_string::<fn(String) -> BorrowedToken<'a>>(
                                chars,
                                '"',
                                self.dialect.supports_string_literal_backslash_escape(),
-                                Token::DoubleQuotedString,
-                                Token::TripleDoubleQuotedString,
+                                 BorrowedToken::DoubleQuotedString,
+                                 BorrowedToken::TripleDoubleQuotedString,
                            );
                    }
                    let s = self.tokenize_single_quoted_string(
@ -1536,11 +1576,11 @@ impl<'a> Tokenizer<'a> {

                            if is_comment {
                                chars.next(); // consume second '-'
-                                let comment = self.tokenize_single_line_comment(chars)?;
-                                return Ok(Some(Token::Whitespace(
+                                let comment = self.tokenize_single_line_comment_borrowed(chars)?;
+                                return Ok(Some(BorrowedToken::Whitespace(
                                    Whitespace::SingleLineComment {
-                                        prefix: "--".to_owned(),
-                                        comment,
+                                        prefix: Cow::Borrowed("--"),
+                                        comment: Cow::Borrowed(comment),
                                    },
                                )));
                            }
@ -1567,11 +1607,13 @@ impl<'a> Tokenizer<'a> {
                        }
                        Some('/') if dialect_of!(self is SnowflakeDialect) => {
                            chars.next(); // consume the second '/', starting a snowflake single-line comment
-                            let comment = self.tokenize_single_line_comment(chars)?;
-                            Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
-                                prefix: "//".to_owned(),
-                                comment,
-                            })))
+                            let comment = self.tokenize_single_line_comment_borrowed(chars)?;
+                            Ok(Some(BorrowedToken::Whitespace(
+                                Whitespace::SingleLineComment {
+                                    prefix: Cow::Borrowed("//"),
+                                    comment: Cow::Borrowed(comment),
+                                },
+                            )))
                        }
                        Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => {
                            self.consume_and_return(chars, Token::DuckIntDiv)
@ -1773,11 +1815,13 @@ impl<'a> Tokenizer<'a> {
                '#' if dialect_of!(self is SnowflakeDialect | BigQueryDialect | MySqlDialect | HiveDialect) =>
                {
                    chars.next(); // consume the '#', starting a snowflake single-line comment
-                    let comment = self.tokenize_single_line_comment(chars)?;
-                    Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "#".to_owned(),
-                        comment,
-                    })))
+                    let comment = self.tokenize_single_line_comment_borrowed(chars)?;
+                    Ok(Some(BorrowedToken::Whitespace(
+                        Whitespace::SingleLineComment {
+                            prefix: Cow::Borrowed("#"),
+                            comment: Cow::Borrowed(comment),
+                        },
+                    )))
                }
                '~' => {
                    chars.next(); // consume
@ -1923,10 +1967,10 @@ impl<'a> Tokenizer<'a> {
    /// Consume the next character, then parse a custom binary operator. The next character should be included in the prefix
    fn consume_for_binop(
        &self,
-        chars: &mut State,
+        chars: &mut State<'a>,
        prefix: &str,
-        default: Token,
-    ) -> Result<Option<Token>, TokenizerError> {
+        default: BorrowedToken<'a>,
+    ) -> Result<Option<BorrowedToken<'a>>, TokenizerError> {
        chars.next(); // consume the first char
        self.start_binop_opt(chars, prefix, Some(default))
    }
@ -1934,20 +1978,20 @@ impl<'a> Tokenizer<'a> {
    /// parse a custom binary operator
    fn start_binop(
        &self,
-        chars: &mut State,
+        chars: &mut State<'a>,
        prefix: &str,
-        default: Token,
-    ) -> Result<Option<Token>, TokenizerError> {
+        default: BorrowedToken<'a>,
+    ) -> Result<Option<BorrowedToken<'a>>, TokenizerError> {
        self.start_binop_opt(chars, prefix, Some(default))
    }

    /// parse a custom binary operator
    fn start_binop_opt(
        &self,
-        chars: &mut State,
+        chars: &mut State<'a>,
        prefix: &str,
-        default: Option<Token>,
-    ) -> Result<Option<Token>, TokenizerError> {
+        default: Option<BorrowedToken<'a>>,
+    ) -> Result<Option<BorrowedToken<'a>>, TokenizerError> {
        let mut custom = None;
        while let Some(&ch) = chars.peek() {
            if !self.dialect.is_custom_operator_part(ch) {
@ -2132,16 +2176,6 @@ impl<'a> Tokenizer<'a> {
        })
    }

-    // Consume characters until newline
-    fn tokenize_single_line_comment(
-        &self,
-        chars: &mut State<'a>,
-    ) -> Result<String, TokenizerError> {
-        Ok(self
-            .tokenize_single_line_comment_borrowed(chars)?
-            .to_string())
-    }
-
    /// Tokenize a single-line comment, returning a borrowed slice.
    /// Returns a slice that includes the terminating newline character.
    fn tokenize_single_line_comment_borrowed(
@ -2167,29 +2201,6 @@ impl<'a> Tokenizer<'a> {
        self.safe_slice(chars.source, start_pos, chars.byte_pos, error_loc)
    }

-    /// Tokenize an identifier or keyword, after the first char(s) have already been consumed.
-    /// `consumed_byte_len` is the byte length of the consumed character(s).
-    fn tokenize_word(
-        &self,
-        consumed_byte_len: usize,
-        chars: &mut State<'a>,
-    ) -> Result<String, TokenizerError> {
-        let error_loc = chars.location();
-
-        // Overflow check: ensure we can safely subtract
-        if consumed_byte_len > chars.byte_pos {
-            return self.tokenizer_error(error_loc, "Invalid byte position in tokenize_word");
-        }
-
-        // Calculate where the first character started
-        let first_char_byte_pos = chars.byte_pos - consumed_byte_len;
-
-        // Use the zero-copy version and convert to String
-        Ok(self
-            .tokenize_word_borrowed(first_char_byte_pos, chars)?
-            .to_string())
-    }
-
    /// Tokenize an identifier or keyword, returning a borrowed slice when possible.
    /// The first character position must be provided (before it was consumed).
    /// Returns a slice with the same lifetime as the State's source.
@ -2245,14 +2256,14 @@ impl<'a> Tokenizer<'a> {
    /// Examples: `'abc'`, `'''abc'''`, `"""abc"""`.
    fn tokenize_single_or_triple_quoted_string<F>(
        &self,
-        chars: &mut State,
+        chars: &mut State<'a>,
        quote_style: char,
        backslash_escape: bool,
        single_quote_token: F,
        triple_quote_token: F,
-    ) -> Result<Option<Token>, TokenizerError>
+    ) -> Result<Option<BorrowedToken<'a>>, TokenizerError>
    where
-        F: Fn(String) -> Token,
+        F: Fn(String) -> BorrowedToken<'a>,
    {
        let error_loc = chars.location();

@ -2316,6 +2327,79 @@ impl<'a> Tokenizer<'a> {
        )
    }

+    /// Reads a string literal quoted by a single quote character, returning Cow for zero-copy.
+    /// Returns Cow::Borrowed when the string has no escape sequences or doubled quotes,
+    /// Cow::Owned when processing is required.
+    fn tokenize_single_quoted_string_borrowed(
+        &self,
+        chars: &mut State<'a>,
+        quote_style: char,
+        backslash_escape: bool,
+    ) -> Result<Cow<'a, str>, TokenizerError> {
+        let start_byte_pos = chars.byte_pos;
+        let error_loc = chars.location();
+
+        // Consume opening quote
+        if chars.next() != Some(quote_style) {
+            return self.tokenizer_error(error_loc, "Expected opening quote");
+        }
+
+        let content_start = chars.byte_pos;
+        let mut needs_processing = false;
+
+        // Scan the string to detect if processing is needed
+        loop {
+            match chars.peek() {
+                None => {
+                    return self.tokenizer_error(error_loc, "Unterminated string literal");
+                }
+                Some(&ch) if ch == quote_style => {
+                    // Found a quote - check if it's doubled or the end
+                    let quote_pos = chars.byte_pos;
+                    chars.next(); // consume quote
+
+                    if chars.peek() == Some(&quote_style) {
+                        // Doubled quote - needs processing
+                        needs_processing = true;
+                        chars.next(); // consume second quote
+                    } else {
+                        // End of string
+                        if needs_processing {
+                            // Reset and use the owned version
+                            chars.byte_pos = start_byte_pos;
+                            chars.line = error_loc.line;
+                            chars.col = error_loc.column;
+                            // Recreate peekable from current position
+                            let remaining = &chars.source[start_byte_pos..];
+                            chars.peekable = remaining.chars().peekable();
+
+                            let s = self.tokenize_single_quoted_string(
+                                chars,
+                                quote_style,
+                                backslash_escape,
+                            )?;
+                            return Ok(Cow::Owned(s));
+                        } else {
+                            // Can use borrowed slice (excluding quotes)
+                            return Ok(Cow::Borrowed(&chars.source[content_start..quote_pos]));
+                        }
+                    }
+                }
+                Some(&'\\') if backslash_escape => {
+                    // Escape sequence - needs processing
+                    needs_processing = true;
+                    chars.next(); // consume backslash
+                    if chars.next().is_none() {
+                        return self.tokenizer_error(error_loc, "Unterminated string literal");
+                    }
+                }
+                Some(_) => {
+                    chars.next(); // consume regular character
+                }
+            }
+        }
+    }
+
    /// Read a quoted string.
    fn tokenize_quoted_string(
        &self,
@ -2426,11 +2510,11 @@ impl<'a> Tokenizer<'a> {
    fn tokenize_multiline_comment(
        &self,
        chars: &mut State<'a>,
-    ) -> Result<Option<Token>, TokenizerError> {
+    ) -> Result<Option<BorrowedToken<'a>>, TokenizerError> {
        let s = self.tokenize_multiline_comment_borrowed(chars)?;
-        Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(
-            s.to_string(),
-        ))))
+        Ok(Some(BorrowedToken::Whitespace(
+            Whitespace::MultiLineComment(Cow::Borrowed(s)),
+        )))
    }

    /// Tokenize a multi-line comment, returning a borrowed slice.
@ -2541,9 +2625,9 @@ impl<'a> Tokenizer<'a> {
    #[allow(clippy::unnecessary_wraps)]
    fn consume_and_return(
        &self,
-        chars: &mut State,
-        t: Token,
-    ) -> Result<Option<Token>, TokenizerError> {
+        chars: &mut State<'a>,
+        t: BorrowedToken<'a>,
+    ) -> Result<Option<BorrowedToken<'a>>, TokenizerError> {
        chars.next();
        Ok(Some(t))
    }
@ -3062,12 +3146,12 @@ mod tests {
            Token::make_keyword("SELECT"),
            Token::Whitespace(Whitespace::Space),
            Token::Word(Word {
-                value: "foo".to_string(),
+                value: "foo".to_string().into(),
                quote_style: None,
                keyword: Keyword::NoKeyword,
            }),
            Token::DoubleEq,
-            Token::SingleQuotedString("1".to_string()),
+            Token::SingleQuotedString("1".to_string().into()),
        ];

        compare(expected, tokens);
@ -3169,11 +3253,11 @@ mod tests {
        let expected = vec![
            Token::make_keyword("SELECT"),
            Token::Whitespace(Whitespace::Space),
-            Token::SingleQuotedString(String::from("a")),
+            Token::SingleQuotedString(String::from("a").into()),
            Token::Whitespace(Whitespace::Space),
            Token::StringConcat,
            Token::Whitespace(Whitespace::Space),
-            Token::SingleQuotedString(String::from("b")),
+            Token::SingleQuotedString(String::from("b").into()),
        ];

        compare(expected, tokens);
@ -3352,7 +3436,7 @@ mod tests {
            Token::Whitespace(Whitespace::Space),
            Token::Neq,
            Token::Whitespace(Whitespace::Space),
-            Token::SingleQuotedString(String::from("Not Provided")),
+            Token::SingleQuotedString(String::from("Not Provided").into()),
        ];

        compare(expected, tokens);
@ -3379,7 +3463,9 @@ mod tests {

        let dialect = GenericDialect {};
        let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
-        let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())];
+        let expected = vec![Token::SingleQuotedString(
+            "foo\r\nbar\nbaz".to_string().into(),
+        )];
        compare(expected, tokens);
    }

@ -3669,8 +3755,8 @@ mod tests {
                vec![
                    Token::Number("0".to_string(), false),
                    Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "--".to_string(),
-                        comment: "this is a comment\n".to_string(),
+                        prefix: "--".to_string().into(),
+                        comment: "this is a comment\n".to_string().into(),
                    }),
                    Token::Number("1".to_string(), false),
                ],
@ -3680,8 +3766,8 @@ mod tests {
                vec![
                    Token::Number("0".to_string(), false),
                    Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "--".to_string(),
-                        comment: "this is a comment\r1".to_string(),
+                        prefix: "--".to_string().into(),
+                        comment: "this is a comment\r1".to_string().into(),
                    }),
                ],
            ),
@ -3690,8 +3776,8 @@ mod tests {
                vec![
                    Token::Number("0".to_string(), false),
                    Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "--".to_string(),
-                        comment: "this is a comment\r\n".to_string(),
+                        prefix: "--".to_string().into(),
+                        comment: "this is a comment\r\n".to_string().into(),
                    }),
                    Token::Number("1".to_string(), false),
                ],
@ -3715,8 +3801,8 @@ mod tests {
        let expected = vec![
            Token::Number("1".to_string(), false),
            Token::Whitespace(Whitespace::SingleLineComment {
-                prefix: "--".to_string(),
-                comment: "\r".to_string(),
+                prefix: "--".to_string().into(),
+                comment: "\r".to_string().into(),
            }),
            Token::Number("0".to_string(), false),
        ];
@ -3730,8 +3816,8 @@ mod tests {
        let dialect = GenericDialect {};
        let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
        let expected = vec![Token::Whitespace(Whitespace::SingleLineComment {
-            prefix: "--".to_string(),
-            comment: "this is a comment".to_string(),
+            prefix: "--".to_string().into(),
+            comment: "this is a comment".to_string().into(),
        })];
        compare(expected, tokens);
    }
@ -3745,7 +3831,7 @@ mod tests {
        let expected = vec![
            Token::Number("0".to_string(), false),
            Token::Whitespace(Whitespace::MultiLineComment(
-                "multi-line\n* /comment".to_string(),
+                "multi-line\n* /comment".to_string().into(),
            )),
            Token::Number("1".to_string(), false),
        ];
@ -3764,7 +3850,7 @@ mod tests {
                Token::Whitespace(Whitespace::Space),
                Token::Div,
                Token::Word(Word {
-                    value: "comment".to_string(),
+                    value: "comment".to_string().into(),
                    quote_style: None,
                    keyword: Keyword::COMMENT,
                }),
@ -3791,7 +3877,9 @@ mod tests {
                Token::make_keyword("SELECT"),
                Token::Whitespace(Whitespace::Space),
                Token::Number("1".to_string(), false),
-                Token::Whitespace(Whitespace::MultiLineComment(" a /* b */ c ".to_string())),
+                Token::Whitespace(Whitespace::MultiLineComment(
+                    " a /* b */ c ".to_string().into(),
+                )),
                Token::Number("0".to_string(), false),
            ],
        );
@ -3805,7 +3893,7 @@ mod tests {
                Token::make_keyword("select"),
                Token::Whitespace(Whitespace::Space),
                Token::Number("1".to_string(), false),
-                Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string())),
+                Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string().into())),
                Token::Number("0".to_string(), false),
            ],
        );
@ -3820,7 +3908,7 @@ mod tests {
                Token::Whitespace(Whitespace::Space),
                Token::Number("1".to_string(), false),
                Token::Whitespace(Whitespace::MultiLineComment(
-                    "/* nested comment ".to_string(),
+                    "/* nested comment ".to_string().into(),
                )),
                Token::Mul,
                Token::Div,
@ -3837,7 +3925,9 @@ mod tests {
        let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
        let expected = vec![
            Token::Whitespace(Whitespace::Newline),
-            Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())),
+            Token::Whitespace(Whitespace::MultiLineComment(
+                "* Comment *".to_string().into(),
+            )),
            Token::Whitespace(Whitespace::Newline),
        ];
        compare(expected, tokens);
@ -4221,14 +4311,16 @@ mod tests {
                .with_unescape(false)
                .tokenize()
                .unwrap();
-            let expected = vec![Token::SingleQuotedString(expected.to_string())];
+            let expected = vec![Token::SingleQuotedString(expected.to_string().into())];
            compare(expected, tokens);

            let tokens = Tokenizer::new(&dialect, sql)
                .with_unescape(true)
                .tokenize()
                .unwrap();
-            let expected = vec![Token::SingleQuotedString(expected_unescaped.to_string())];
+            let expected = vec![Token::SingleQuotedString(
+                expected_unescaped.to_string().into(),
+            )];
            compare(expected, tokens);
        }

@ -4245,7 +4337,7 @@ mod tests {
            let dialect = GenericDialect {};
            let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();

-            let expected = vec![Token::SingleQuotedString(expected.to_string())];
+            let expected = vec![Token::SingleQuotedString(expected.to_string().into())];

            compare(expected, tokens);
        }
@ -4255,7 +4347,7 @@ mod tests {
            let dialect = MySqlDialect {};
            let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();

-            let expected = vec![Token::SingleQuotedString(expected.to_string())];
+            let expected = vec![Token::SingleQuotedString(expected.to_string().into())];

            compare(expected, tokens);
        }
@ -4358,7 +4450,7 @@ mod tests {
            .unwrap();
        let expected = vec![
            Token::DoubleQuotedString("".to_string()),
-            Token::SingleQuotedString("".to_string()),
+            Token::SingleQuotedString("".to_string().into()),
        ];
        compare(expected, tokens);

@ -4368,7 +4460,7 @@ mod tests {
            .tokenize()
            .unwrap();
        let expected = vec![
-            Token::SingleQuotedString("".to_string()),
+            Token::SingleQuotedString("".to_string().into()),
            Token::DoubleQuotedString("".to_string()),
        ];
        compare(expected, tokens);
@ -4377,7 +4469,7 @@ mod tests {
        let dialect = SnowflakeDialect {};
        let sql = r#"''''''"#;
        let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
-        let expected = vec![Token::SingleQuotedString("''".to_string())];
+        let expected = vec![Token::SingleQuotedString("''".to_string().into())];
        compare(expected, tokens);
    }

@ -4409,7 +4501,7 @@ mod tests {
            Token::make_keyword("SELECT"),
            Token::Whitespace(Whitespace::Space),
            Token::AtSign,
-            Token::SingleQuotedString("1".to_string()),
+            Token::SingleQuotedString("1".to_string().into()),
        ];
        compare(expected, tokens);
    }
@ -4467,7 +4559,7 @@ mod tests {
                Token::make_keyword("select"),
                Token::Whitespace(Whitespace::Space),
                Token::make_word("e", None),
-                Token::SingleQuotedString("...".to_string()),
+                Token::SingleQuotedString("...".to_string().into()),
            ],
        );

@ -4477,7 +4569,7 @@ mod tests {
                Token::make_keyword("select"),
                Token::Whitespace(Whitespace::Space),
                Token::make_word("E", None),
-                Token::SingleQuotedString("...".to_string()),
+                Token::SingleQuotedString("...".to_string().into()),
            ],
        );
    }
@ -4513,7 +4605,7 @@ mod tests {
                    Token::Whitespace(Whitespace::Space),
                    Token::Minus,
                    Token::Minus,
-                    Token::SingleQuotedString("abc".to_string()),
+                    Token::SingleQuotedString("abc".to_string().into()),
                ],
            );

@ -4524,8 +4616,8 @@ mod tests {
                    Token::make_keyword("SELECT"),
                    Token::Whitespace(Whitespace::Space),
                    Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "--".to_string(),
-                        comment: " 'abc'".to_string(),
+                        prefix: "--".to_string().into(),
+                        comment: " 'abc'".to_string().into(),
                    }),
                ],
            );
@ -4551,8 +4643,8 @@ mod tests {
                    Token::make_keyword("SELECT"),
                    Token::Whitespace(Whitespace::Space),
                    Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "--".to_string(),
-                        comment: "'abc'".to_string(),
+                        prefix: "--".to_string().into(),
+                        comment: "'abc'".to_string().into(),
                    }),
                ],
            );
@ -4564,8 +4656,8 @@ mod tests {
                    Token::make_keyword("SELECT"),
                    Token::Whitespace(Whitespace::Space),
                    Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "--".to_string(),
-                        comment: " 'abc'".to_string(),
+                        prefix: "--".to_string().into(),
+                        comment: " 'abc'".to_string().into(),
                    }),
                ],
            );
@ -4577,8 +4669,8 @@ mod tests {
                    Token::make_keyword("SELECT"),
                    Token::Whitespace(Whitespace::Space),
                    Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "--".to_string(),
-                        comment: "".to_string(),
+                        prefix: "--".to_string().into(),
+                        comment: "".to_string().into(),
                    }),
                ],
            );
@ -4622,13 +4714,13 @@ mod tests {
            Token::make_keyword("SELECT"),
            Token::Whitespace(Whitespace::Space),
            Token::Word(Word {
-                value: "table".to_string(),
+                value: "table".to_string().into(),
                quote_style: None,
                keyword: Keyword::TABLE,
            }),
            Token::Period,
            Token::Word(Word {
-                value: "_col".to_string(),
+                value: "_col".to_string().into(),
                quote_style: None,
                keyword: Keyword::NoKeyword,
            }),
--- a/tests/sqlparser_bigquery.rs
+++ b/tests/sqlparser_bigquery.rs
@ -2629,7 +2629,7 @@ fn test_export_data() {
                body: Box::new(SetExpr::Select(Box::new(Select {
                    select_token: AttachedToken(TokenWithSpan::new(
                        Token::Word(Word {
-                            value: "SELECT".to_string(),
+                            value: "SELECT".to_string().into(),
                            quote_style: None,
                            keyword: Keyword::SELECT,
                        }),
@ -2733,7 +2733,7 @@ fn test_export_data() {
                body: Box::new(SetExpr::Select(Box::new(Select {
                    select_token: AttachedToken(TokenWithSpan::new(
                        Token::Word(Word {
-                            value: "SELECT".to_string(),
+                            value: "SELECT".to_string().into(),
                            quote_style: None,
                            keyword: Keyword::SELECT,
                        }),
--- a/tests/sqlparser_mssql.rs
+++ b/tests/sqlparser_mssql.rs
@ -1581,7 +1581,7 @@ fn test_mssql_while_statement() {
            while_block: ConditionalStatementBlock {
                start_token: AttachedToken(TokenWithSpan {
                    token: Token::Word(Word {
-                        value: "WHILE".to_string(),
+                        value: "WHILE".to_string().into(),
                        quote_style: None,
                        keyword: Keyword::WHILE
                    }),
--- a/tests/sqlparser_snowflake.rs
+++ b/tests/sqlparser_snowflake.rs
@ -566,8 +566,8 @@ fn test_snowflake_single_line_tokenize() {
        Token::Whitespace(Whitespace::Space),
        Token::make_keyword("TABLE"),
        Token::Whitespace(Whitespace::SingleLineComment {
-            prefix: "#".to_string(),
-            comment: " this is a comment \n".to_string(),
+            prefix: "#".to_string().into(),
+            comment: " this is a comment \n".to_string().into(),
        }),
        Token::make_word("table_1", None),
    ];
@ -583,8 +583,8 @@ fn test_snowflake_single_line_tokenize() {
        Token::make_keyword("TABLE"),
        Token::Whitespace(Whitespace::Space),
        Token::Whitespace(Whitespace::SingleLineComment {
-            prefix: "//".to_string(),
-            comment: " this is a comment \n".to_string(),
+            prefix: "//".to_string().into(),
+            comment: " this is a comment \n".to_string().into(),
        }),
        Token::make_word("table_1", None),
    ];