mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-12-23 11:12:51 +00:00
Implement zero-copy tokenization for Word, SingleQuotedString, and Whitespace
Convert token string fields to use Cow<'a, str> to enable zero-copy tokenization for commonly used tokens: - Word.value: Regular identifiers and keywords now borrow from source - SingleQuotedString: String literals borrow when no escape processing needed - Whitespace: Single-line and multi-line comments borrow from source Also add benchmark for measuring tokenization performance
This commit is contained in:
parent
0f17b327b9
commit
5458a2b21d
11 changed files with 1288 additions and 224 deletions
5
.gitignore
vendored
5
.gitignore
vendored
|
|
@ -18,4 +18,7 @@ Cargo.lock
|
|||
|
||||
*.swp
|
||||
|
||||
.DS_store
|
||||
.DS_store
|
||||
|
||||
# dhat profiler output files
|
||||
dhat*.json
|
||||
|
|
@ -48,6 +48,7 @@ visitor = ["sqlparser_derive"]
|
|||
bigdecimal = { version = "0.4.1", features = ["serde"], optional = true }
|
||||
log = "0.4"
|
||||
recursive = { version = "0.1.1", optional = true}
|
||||
unicase = "2.7"
|
||||
|
||||
serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true }
|
||||
# serde_json is only used in examples/cli, but we have to put it outside
|
||||
|
|
@ -60,7 +61,12 @@ sqlparser_derive = { version = "0.4.0", path = "derive", optional = true }
|
|||
simple_logger = "5.0"
|
||||
matches = "0.1"
|
||||
pretty_assertions = "1"
|
||||
sysinfo = "0.30"
|
||||
dhat = "0.3.3"
|
||||
criterion = "0.5"
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
# Document these features on docs.rs
|
||||
features = ["serde", "visitor"]
|
||||
features = ["serde", "visitor"]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -31,3 +31,7 @@ criterion = "0.7"
|
|||
[[bench]]
|
||||
name = "sqlparser_bench"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "tokenize_bench"
|
||||
harness = false
|
||||
|
|
|
|||
862
sqlparser_bench/benches/tokenize_bench.rs
Normal file
862
sqlparser_bench/benches/tokenize_bench.rs
Normal file
|
|
@ -0,0 +1,862 @@
|
|||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
//! Benchmark tokenization performance
|
||||
//!
|
||||
//! This benchmark measures tokenization speed using a complex SQL query
|
||||
//! with many identifiers, keywords, string literals, and comments.
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use sqlparser::dialect::GenericDialect;
|
||||
use sqlparser::tokenizer::Tokenizer;
|
||||
|
||||
const COMPLEX_SQL: &str = r#"
|
||||
-- ============================================================================
|
||||
-- Enterprise Sales Analytics Dashboard Query
|
||||
-- ============================================================================
|
||||
-- Purpose: Comprehensive sales analysis across multiple dimensions
|
||||
-- Author: Analytics Team
|
||||
-- Last Modified: 2024-01-15
|
||||
-- ============================================================================
|
||||
|
||||
/*
|
||||
* This query aggregates sales data from multiple sources:
|
||||
* - Customer transactions and lifetime value
|
||||
* - Product performance across categories
|
||||
* - Regional sales trends and patterns
|
||||
* - Employee commission calculations
|
||||
* - Inventory and fulfillment metrics
|
||||
*/
|
||||
|
||||
WITH customer_segments AS (
|
||||
-- Segment customers by purchase behavior and demographics
|
||||
SELECT
|
||||
customer_id,
|
||||
customer_number,
|
||||
customer_name,
|
||||
customer_type,
|
||||
customer_status,
|
||||
customer_tier,
|
||||
email_address,
|
||||
phone_number,
|
||||
mobile_number,
|
||||
fax_number,
|
||||
date_of_birth,
|
||||
registration_date,
|
||||
last_login_date,
|
||||
account_status,
|
||||
email_verified,
|
||||
phone_verified,
|
||||
-- Address information
|
||||
billing_address_line1,
|
||||
billing_address_line2,
|
||||
billing_city,
|
||||
billing_state,
|
||||
billing_postal_code,
|
||||
billing_country,
|
||||
shipping_address_line1,
|
||||
shipping_address_line2,
|
||||
shipping_city,
|
||||
shipping_state,
|
||||
shipping_postal_code,
|
||||
shipping_country,
|
||||
-- Demographics
|
||||
gender,
|
||||
age_group,
|
||||
income_bracket,
|
||||
education_level,
|
||||
occupation,
|
||||
marital_status,
|
||||
-- Marketing preferences
|
||||
marketing_opt_in,
|
||||
sms_opt_in,
|
||||
email_frequency,
|
||||
preferred_channel,
|
||||
preferred_language,
|
||||
-- Calculated fields
|
||||
CASE
|
||||
WHEN customer_status = 'active' AND last_login_date >= CURRENT_DATE - INTERVAL '30' DAY THEN 'highly_active'
|
||||
WHEN customer_status = 'active' AND last_login_date >= CURRENT_DATE - INTERVAL '90' DAY THEN 'active'
|
||||
WHEN customer_status = 'active' THEN 'inactive'
|
||||
ELSE 'dormant'
|
||||
END AS activity_level,
|
||||
CASE
|
||||
WHEN registration_date >= CURRENT_DATE - INTERVAL '1' YEAR THEN 'new'
|
||||
WHEN registration_date >= CURRENT_DATE - INTERVAL '3' YEAR THEN 'established'
|
||||
ELSE 'veteran'
|
||||
END AS customer_tenure
|
||||
FROM customers
|
||||
WHERE customer_status IN ('active', 'pending', 'suspended')
|
||||
AND registration_date >= '2020-01-01'
|
||||
AND billing_country IN ('USA', 'Canada', 'Mexico', 'UK', 'Germany', 'France', 'Spain', 'Italy')
|
||||
AND email_address NOT LIKE '%@test.com'
|
||||
AND email_address NOT LIKE '%@example.com'
|
||||
AND customer_name IS NOT NULL
|
||||
),
|
||||
|
||||
product_catalog AS (
|
||||
-- Product information with categories and attributes
|
||||
SELECT
|
||||
product_id,
|
||||
product_sku,
|
||||
product_name,
|
||||
product_description,
|
||||
product_category,
|
||||
product_subcategory,
|
||||
product_brand,
|
||||
product_manufacturer,
|
||||
product_supplier,
|
||||
product_model,
|
||||
product_series,
|
||||
product_version,
|
||||
-- Pricing
|
||||
list_price,
|
||||
cost_price,
|
||||
sale_price,
|
||||
wholesale_price,
|
||||
minimum_price,
|
||||
suggested_retail_price,
|
||||
-- Attributes
|
||||
product_color,
|
||||
product_size,
|
||||
product_weight,
|
||||
product_length,
|
||||
product_width,
|
||||
product_height,
|
||||
product_material,
|
||||
product_warranty,
|
||||
-- Inventory
|
||||
stock_quantity,
|
||||
reorder_level,
|
||||
reorder_quantity,
|
||||
warehouse_location,
|
||||
bin_location,
|
||||
aisle_number,
|
||||
shelf_number,
|
||||
-- Status
|
||||
product_status,
|
||||
availability_status,
|
||||
is_featured,
|
||||
is_new_arrival,
|
||||
is_on_sale,
|
||||
is_clearance,
|
||||
is_discontinued,
|
||||
launch_date,
|
||||
discontinuation_date,
|
||||
-- Ratings
|
||||
average_rating,
|
||||
review_count,
|
||||
return_rate,
|
||||
defect_rate,
|
||||
-- Categories
|
||||
CASE
|
||||
WHEN product_category = 'electronics' THEN 'high_tech'
|
||||
WHEN product_category IN ('clothing', 'shoes', 'accessories') THEN 'fashion'
|
||||
WHEN product_category IN ('home', 'garden', 'furniture') THEN 'home_living'
|
||||
WHEN product_category IN ('sports', 'outdoor', 'fitness') THEN 'active_lifestyle'
|
||||
ELSE 'general_merchandise'
|
||||
END AS category_group
|
||||
FROM products
|
||||
WHERE product_status = 'active'
|
||||
AND availability_status IN ('in_stock', 'low_stock', 'backorder')
|
||||
AND is_discontinued = FALSE
|
||||
AND launch_date <= CURRENT_DATE
|
||||
),
|
||||
|
||||
order_transactions AS (
|
||||
-- Order and transaction details
|
||||
SELECT
|
||||
order_id,
|
||||
order_number,
|
||||
order_date,
|
||||
order_time,
|
||||
order_timestamp,
|
||||
customer_id,
|
||||
order_status,
|
||||
order_type,
|
||||
order_channel,
|
||||
order_source,
|
||||
-- Payment information
|
||||
payment_method,
|
||||
payment_status,
|
||||
payment_date,
|
||||
payment_reference,
|
||||
transaction_id,
|
||||
authorization_code,
|
||||
-- Financial details
|
||||
subtotal_amount,
|
||||
tax_amount,
|
||||
shipping_amount,
|
||||
discount_amount,
|
||||
coupon_amount,
|
||||
gift_card_amount,
|
||||
total_amount,
|
||||
paid_amount,
|
||||
refund_amount,
|
||||
net_amount,
|
||||
-- Shipping details
|
||||
shipping_method,
|
||||
shipping_carrier,
|
||||
tracking_number,
|
||||
shipped_date,
|
||||
estimated_delivery_date,
|
||||
actual_delivery_date,
|
||||
delivery_status,
|
||||
signature_required,
|
||||
-- Location
|
||||
ship_to_address_line1,
|
||||
ship_to_address_line2,
|
||||
ship_to_city,
|
||||
ship_to_state,
|
||||
ship_to_postal_code,
|
||||
ship_to_country,
|
||||
-- Fulfillment
|
||||
warehouse_id,
|
||||
fulfillment_center,
|
||||
picker_id,
|
||||
packer_id,
|
||||
shipper_id,
|
||||
-- Timestamps
|
||||
created_at,
|
||||
updated_at,
|
||||
completed_at,
|
||||
cancelled_at,
|
||||
-- Flags
|
||||
is_gift,
|
||||
is_rush_order,
|
||||
is_international,
|
||||
requires_signature,
|
||||
is_business_order,
|
||||
-- Notes
|
||||
customer_notes,
|
||||
internal_notes,
|
||||
gift_message,
|
||||
special_instructions
|
||||
FROM orders
|
||||
WHERE order_date >= '2023-01-01'
|
||||
AND order_date < '2024-12-31'
|
||||
AND order_status IN ('pending', 'processing', 'shipped', 'delivered', 'completed')
|
||||
AND order_type IN ('standard', 'express', 'overnight', 'international')
|
||||
AND total_amount > 0
|
||||
AND customer_id IS NOT NULL
|
||||
),
|
||||
|
||||
order_line_items AS (
|
||||
-- Individual line items from orders
|
||||
SELECT
|
||||
line_item_id,
|
||||
order_id,
|
||||
product_id,
|
||||
line_number,
|
||||
-- Quantities
|
||||
quantity_ordered,
|
||||
quantity_shipped,
|
||||
quantity_cancelled,
|
||||
quantity_returned,
|
||||
quantity_damaged,
|
||||
-- Pricing
|
||||
unit_price,
|
||||
unit_cost,
|
||||
unit_discount,
|
||||
line_subtotal,
|
||||
line_tax,
|
||||
line_shipping,
|
||||
line_total,
|
||||
-- Discounts
|
||||
discount_type,
|
||||
discount_code,
|
||||
discount_percentage,
|
||||
discount_reason,
|
||||
-- Product details at time of order
|
||||
product_sku_snapshot,
|
||||
product_name_snapshot,
|
||||
product_category_snapshot,
|
||||
-- Status
|
||||
line_status,
|
||||
fulfillment_status,
|
||||
return_status,
|
||||
-- Warehouse
|
||||
picked_from_warehouse,
|
||||
picked_from_location,
|
||||
picked_by_user,
|
||||
picked_at_timestamp,
|
||||
packed_by_user,
|
||||
packed_at_timestamp,
|
||||
-- Returns
|
||||
return_reason,
|
||||
return_date,
|
||||
refund_amount,
|
||||
restocking_fee,
|
||||
-- Gift wrap
|
||||
is_gift_wrapped,
|
||||
gift_wrap_type,
|
||||
gift_wrap_charge,
|
||||
-- Calculated fields
|
||||
unit_price * quantity_ordered AS line_revenue,
|
||||
unit_cost * quantity_ordered AS line_cost,
|
||||
(unit_price - unit_cost) * quantity_ordered AS line_profit,
|
||||
CASE
|
||||
WHEN quantity_returned > 0 THEN 'returned'
|
||||
WHEN quantity_cancelled > 0 THEN 'cancelled'
|
||||
WHEN quantity_shipped = quantity_ordered THEN 'fulfilled'
|
||||
ELSE 'partial'
|
||||
END AS fulfillment_type
|
||||
FROM order_items
|
||||
WHERE line_status NOT IN ('cancelled', 'voided')
|
||||
AND quantity_ordered > 0
|
||||
),
|
||||
|
||||
employee_data AS (
|
||||
-- Employee and sales representative information
|
||||
SELECT
|
||||
employee_id,
|
||||
employee_number,
|
||||
employee_name,
|
||||
first_name,
|
||||
last_name,
|
||||
middle_name,
|
||||
email_address,
|
||||
phone_extension,
|
||||
mobile_phone,
|
||||
-- Employment details
|
||||
hire_date,
|
||||
termination_date,
|
||||
employment_status,
|
||||
employment_type,
|
||||
job_title,
|
||||
job_level,
|
||||
job_grade,
|
||||
department_id,
|
||||
department_name,
|
||||
division_id,
|
||||
division_name,
|
||||
-- Management
|
||||
manager_id,
|
||||
manager_name,
|
||||
reports_to,
|
||||
-- Location
|
||||
office_location,
|
||||
office_building,
|
||||
office_floor,
|
||||
office_room,
|
||||
work_city,
|
||||
work_state,
|
||||
work_country,
|
||||
-- Compensation
|
||||
base_salary,
|
||||
commission_rate,
|
||||
bonus_target,
|
||||
commission_tier,
|
||||
-- Performance
|
||||
sales_quota,
|
||||
current_sales,
|
||||
quota_attainment,
|
||||
performance_rating,
|
||||
last_review_date,
|
||||
next_review_date
|
||||
FROM employees
|
||||
WHERE employment_status = 'active'
|
||||
AND employee_id IS NOT NULL
|
||||
AND hire_date <= CURRENT_DATE
|
||||
),
|
||||
|
||||
customer_lifetime_metrics AS (
|
||||
-- Calculate customer lifetime value and metrics
|
||||
SELECT
|
||||
cs.customer_id,
|
||||
cs.customer_name,
|
||||
cs.customer_tier,
|
||||
cs.activity_level,
|
||||
-- Order counts
|
||||
COUNT(DISTINCT ot.order_id) AS total_orders,
|
||||
COUNT(DISTINCT CASE WHEN ot.order_date >= CURRENT_DATE - INTERVAL '30' DAY THEN ot.order_id END) AS orders_last_30_days,
|
||||
COUNT(DISTINCT CASE WHEN ot.order_date >= CURRENT_DATE - INTERVAL '90' DAY THEN ot.order_id END) AS orders_last_90_days,
|
||||
COUNT(DISTINCT CASE WHEN ot.order_date >= CURRENT_DATE - INTERVAL '365' DAY THEN ot.order_id END) AS orders_last_year,
|
||||
-- Revenue metrics
|
||||
SUM(ot.total_amount) AS lifetime_revenue,
|
||||
SUM(CASE WHEN ot.order_date >= CURRENT_DATE - INTERVAL '30' DAY THEN ot.total_amount ELSE 0 END) AS revenue_last_30_days,
|
||||
SUM(CASE WHEN ot.order_date >= CURRENT_DATE - INTERVAL '90' DAY THEN ot.total_amount ELSE 0 END) AS revenue_last_90_days,
|
||||
SUM(CASE WHEN ot.order_date >= CURRENT_DATE - INTERVAL '365' DAY THEN ot.total_amount ELSE 0 END) AS revenue_last_year,
|
||||
-- Average values
|
||||
AVG(ot.total_amount) AS average_order_value,
|
||||
AVG(CASE WHEN ot.order_date >= CURRENT_DATE - INTERVAL '365' DAY THEN ot.total_amount END) AS avg_order_value_last_year,
|
||||
-- Product metrics
|
||||
COUNT(DISTINCT oli.product_id) AS unique_products_purchased,
|
||||
SUM(oli.quantity_ordered) AS total_items_purchased,
|
||||
-- Return metrics
|
||||
SUM(oli.quantity_returned) AS total_items_returned,
|
||||
SUM(CASE WHEN oli.quantity_returned > 0 THEN oli.refund_amount ELSE 0 END) AS total_refund_amount,
|
||||
-- Date ranges
|
||||
MIN(ot.order_date) AS first_order_date,
|
||||
MAX(ot.order_date) AS last_order_date,
|
||||
MAX(ot.order_date) - MIN(ot.order_date) AS customer_lifespan_days,
|
||||
-- Recency
|
||||
CURRENT_DATE - MAX(ot.order_date) AS days_since_last_order
|
||||
FROM customer_segments cs
|
||||
LEFT JOIN order_transactions ot ON cs.customer_id = ot.customer_id
|
||||
LEFT JOIN order_line_items oli ON ot.order_id = oli.order_id
|
||||
WHERE ot.order_status IN ('delivered', 'completed')
|
||||
GROUP BY
|
||||
cs.customer_id,
|
||||
cs.customer_name,
|
||||
cs.customer_tier,
|
||||
cs.activity_level
|
||||
),
|
||||
|
||||
product_performance AS (
|
||||
-- Product sales performance metrics
|
||||
SELECT
|
||||
pc.product_id,
|
||||
pc.product_sku,
|
||||
pc.product_name,
|
||||
pc.product_category,
|
||||
pc.product_subcategory,
|
||||
pc.product_brand,
|
||||
pc.category_group,
|
||||
-- Sales metrics
|
||||
COUNT(DISTINCT oli.order_id) AS total_orders,
|
||||
SUM(oli.quantity_ordered) AS total_quantity_sold,
|
||||
SUM(oli.quantity_returned) AS total_quantity_returned,
|
||||
SUM(oli.line_revenue) AS total_revenue,
|
||||
SUM(oli.line_cost) AS total_cost,
|
||||
SUM(oli.line_profit) AS total_profit,
|
||||
-- Averages
|
||||
AVG(oli.unit_price) AS average_selling_price,
|
||||
AVG(oli.line_revenue) AS average_line_revenue,
|
||||
-- Return rate
|
||||
CAST(SUM(oli.quantity_returned) AS DECIMAL) / NULLIF(SUM(oli.quantity_ordered), 0) AS return_rate,
|
||||
-- Profit margin
|
||||
CAST(SUM(oli.line_profit) AS DECIMAL) / NULLIF(SUM(oli.line_revenue), 0) AS profit_margin,
|
||||
-- Rankings
|
||||
RANK() OVER (PARTITION BY pc.product_category ORDER BY SUM(oli.line_revenue) DESC) AS revenue_rank_in_category,
|
||||
RANK() OVER (ORDER BY SUM(oli.quantity_ordered) DESC) AS quantity_rank_overall
|
||||
FROM product_catalog pc
|
||||
INNER JOIN order_line_items oli ON pc.product_id = oli.product_id
|
||||
INNER JOIN order_transactions ot ON oli.order_id = ot.order_id
|
||||
WHERE ot.order_status IN ('delivered', 'completed')
|
||||
AND ot.order_date >= '2023-01-01'
|
||||
GROUP BY
|
||||
pc.product_id,
|
||||
pc.product_sku,
|
||||
pc.product_name,
|
||||
pc.product_category,
|
||||
pc.product_subcategory,
|
||||
pc.product_brand,
|
||||
pc.category_group
|
||||
),
|
||||
|
||||
regional_sales AS (
|
||||
-- Sales performance by region
|
||||
SELECT
|
||||
cs.billing_country,
|
||||
cs.billing_state,
|
||||
cs.billing_city,
|
||||
-- Order metrics
|
||||
COUNT(DISTINCT ot.order_id) AS total_orders,
|
||||
COUNT(DISTINCT cs.customer_id) AS unique_customers,
|
||||
-- Revenue
|
||||
SUM(ot.total_amount) AS total_revenue,
|
||||
SUM(ot.shipping_amount) AS total_shipping_revenue,
|
||||
SUM(ot.tax_amount) AS total_tax_collected,
|
||||
AVG(ot.total_amount) AS average_order_value,
|
||||
-- Time periods
|
||||
SUM(CASE WHEN ot.order_date >= '2024-01-01' THEN ot.total_amount ELSE 0 END) AS revenue_2024,
|
||||
SUM(CASE WHEN ot.order_date >= '2023-01-01' AND ot.order_date < '2024-01-01' THEN ot.total_amount ELSE 0 END) AS revenue_2023,
|
||||
-- Growth
|
||||
(SUM(CASE WHEN ot.order_date >= '2024-01-01' THEN ot.total_amount ELSE 0 END) -
|
||||
SUM(CASE WHEN ot.order_date >= '2023-01-01' AND ot.order_date < '2024-01-01' THEN ot.total_amount ELSE 0 END)) /
|
||||
NULLIF(SUM(CASE WHEN ot.order_date >= '2023-01-01' AND ot.order_date < '2024-01-01' THEN ot.total_amount ELSE 0 END), 0) AS year_over_year_growth
|
||||
FROM customer_segments cs
|
||||
INNER JOIN order_transactions ot ON cs.customer_id = ot.customer_id
|
||||
WHERE ot.order_status IN ('delivered', 'completed')
|
||||
GROUP BY
|
||||
cs.billing_country,
|
||||
cs.billing_state,
|
||||
cs.billing_city
|
||||
HAVING SUM(ot.total_amount) > 1000
|
||||
),
|
||||
|
||||
monthly_trends AS (
|
||||
-- Monthly sales trends and seasonality
|
||||
SELECT
|
||||
DATE_TRUNC('month', ot.order_date) AS order_month,
|
||||
EXTRACT(YEAR FROM ot.order_date) AS order_year,
|
||||
EXTRACT(MONTH FROM ot.order_date) AS month_number,
|
||||
EXTRACT(QUARTER FROM ot.order_date) AS quarter_number,
|
||||
-- Volume metrics
|
||||
COUNT(DISTINCT ot.order_id) AS orders,
|
||||
COUNT(DISTINCT ot.customer_id) AS customers,
|
||||
SUM(oli.quantity_ordered) AS items_sold,
|
||||
-- Financial metrics
|
||||
SUM(ot.subtotal_amount) AS subtotal,
|
||||
SUM(ot.tax_amount) AS tax,
|
||||
SUM(ot.shipping_amount) AS shipping,
|
||||
SUM(ot.discount_amount) AS discounts,
|
||||
SUM(ot.total_amount) AS revenue,
|
||||
-- Averages
|
||||
AVG(ot.total_amount) AS avg_order_value,
|
||||
AVG(oli.quantity_ordered) AS avg_items_per_order,
|
||||
-- Moving averages
|
||||
AVG(SUM(ot.total_amount)) OVER (ORDER BY DATE_TRUNC('month', ot.order_date) ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS three_month_moving_avg,
|
||||
AVG(SUM(ot.total_amount)) OVER (ORDER BY DATE_TRUNC('month', ot.order_date) ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS six_month_moving_avg
|
||||
FROM order_transactions ot
|
||||
INNER JOIN order_line_items oli ON ot.order_id = oli.order_id
|
||||
WHERE ot.order_status IN ('delivered', 'completed')
|
||||
AND ot.order_date >= '2022-01-01'
|
||||
GROUP BY
|
||||
DATE_TRUNC('month', ot.order_date),
|
||||
EXTRACT(YEAR FROM ot.order_date),
|
||||
EXTRACT(MONTH FROM ot.order_date),
|
||||
EXTRACT(QUARTER FROM ot.order_date)
|
||||
),
|
||||
|
||||
category_analysis AS (
|
||||
-- Category performance analysis
|
||||
SELECT
|
||||
pc.product_category,
|
||||
pc.product_subcategory,
|
||||
pc.category_group,
|
||||
-- Sales
|
||||
COUNT(DISTINCT oli.order_id) AS orders,
|
||||
SUM(oli.quantity_ordered) AS quantity,
|
||||
SUM(oli.line_revenue) AS revenue,
|
||||
SUM(oli.line_profit) AS profit,
|
||||
-- Market share
|
||||
SUM(oli.line_revenue) / SUM(SUM(oli.line_revenue)) OVER () AS revenue_share,
|
||||
-- Pricing
|
||||
AVG(oli.unit_price) AS avg_price,
|
||||
MIN(oli.unit_price) AS min_price,
|
||||
MAX(oli.unit_price) AS max_price,
|
||||
-- Profitability
|
||||
SUM(oli.line_profit) / NULLIF(SUM(oli.line_revenue), 0) AS profit_margin,
|
||||
-- Returns
|
||||
SUM(oli.quantity_returned) AS returns,
|
||||
CAST(SUM(oli.quantity_returned) AS DECIMAL) / NULLIF(SUM(oli.quantity_ordered), 0) AS return_rate
|
||||
FROM product_catalog pc
|
||||
INNER JOIN order_line_items oli ON pc.product_id = oli.product_id
|
||||
INNER JOIN order_transactions ot ON oli.order_id = ot.order_id
|
||||
WHERE ot.order_status IN ('delivered', 'completed')
|
||||
GROUP BY
|
||||
pc.product_category,
|
||||
pc.product_subcategory,
|
||||
pc.category_group
|
||||
)
|
||||
|
||||
-- Main query combining all CTEs
|
||||
SELECT
|
||||
-- Customer information
|
||||
cs.customer_id,
|
||||
cs.customer_number,
|
||||
cs.customer_name,
|
||||
cs.customer_type,
|
||||
cs.customer_tier,
|
||||
cs.activity_level,
|
||||
cs.customer_tenure,
|
||||
cs.email_address,
|
||||
cs.phone_number,
|
||||
cs.billing_city,
|
||||
cs.billing_state,
|
||||
cs.billing_country,
|
||||
cs.age_group,
|
||||
cs.gender,
|
||||
cs.income_bracket,
|
||||
-- Customer metrics
|
||||
clm.total_orders,
|
||||
clm.orders_last_30_days,
|
||||
clm.orders_last_90_days,
|
||||
clm.orders_last_year,
|
||||
clm.lifetime_revenue,
|
||||
clm.revenue_last_30_days,
|
||||
clm.revenue_last_90_days,
|
||||
clm.revenue_last_year,
|
||||
clm.average_order_value,
|
||||
clm.unique_products_purchased,
|
||||
clm.total_items_purchased,
|
||||
clm.total_items_returned,
|
||||
clm.first_order_date,
|
||||
clm.last_order_date,
|
||||
clm.days_since_last_order,
|
||||
-- Order details
|
||||
ot.order_id,
|
||||
ot.order_number,
|
||||
ot.order_date,
|
||||
ot.order_status,
|
||||
ot.order_type,
|
||||
ot.order_channel,
|
||||
ot.payment_method,
|
||||
ot.payment_status,
|
||||
ot.subtotal_amount,
|
||||
ot.tax_amount,
|
||||
ot.shipping_amount,
|
||||
ot.discount_amount,
|
||||
ot.total_amount,
|
||||
ot.shipping_method,
|
||||
ot.shipping_carrier,
|
||||
ot.tracking_number,
|
||||
ot.delivery_status,
|
||||
-- Line item details
|
||||
oli.line_item_id,
|
||||
oli.product_id,
|
||||
oli.quantity_ordered,
|
||||
oli.quantity_shipped,
|
||||
oli.unit_price,
|
||||
oli.line_total,
|
||||
oli.discount_type,
|
||||
oli.line_status,
|
||||
-- Product information
|
||||
pc.product_sku,
|
||||
pc.product_name,
|
||||
pc.product_category,
|
||||
pc.product_subcategory,
|
||||
pc.product_brand,
|
||||
pc.product_manufacturer,
|
||||
pc.category_group,
|
||||
pc.list_price,
|
||||
pc.product_color,
|
||||
pc.product_size,
|
||||
pc.average_rating,
|
||||
pc.review_count,
|
||||
-- Product performance
|
||||
pp.total_quantity_sold AS product_total_quantity_sold,
|
||||
pp.total_revenue AS product_total_revenue,
|
||||
pp.total_profit AS product_total_profit,
|
||||
pp.return_rate AS product_return_rate,
|
||||
pp.profit_margin AS product_profit_margin,
|
||||
pp.revenue_rank_in_category,
|
||||
-- Employee information
|
||||
ed.employee_id,
|
||||
ed.employee_name,
|
||||
ed.job_title,
|
||||
ed.department_name,
|
||||
ed.office_location,
|
||||
ed.commission_rate,
|
||||
ed.sales_quota,
|
||||
-- Regional metrics
|
||||
rs.total_orders AS region_total_orders,
|
||||
rs.unique_customers AS region_unique_customers,
|
||||
rs.total_revenue AS region_total_revenue,
|
||||
rs.average_order_value AS region_avg_order_value,
|
||||
rs.year_over_year_growth AS region_yoy_growth,
|
||||
-- Category metrics
|
||||
ca.revenue AS category_revenue,
|
||||
ca.profit AS category_profit,
|
||||
ca.revenue_share AS category_revenue_share,
|
||||
ca.profit_margin AS category_profit_margin,
|
||||
ca.return_rate AS category_return_rate,
|
||||
-- Monthly trends
|
||||
mt.order_month,
|
||||
mt.three_month_moving_avg,
|
||||
mt.six_month_moving_avg,
|
||||
-- Calculated fields
|
||||
CASE
|
||||
WHEN clm.lifetime_revenue > 10000 THEN 'vip'
|
||||
WHEN clm.lifetime_revenue > 5000 THEN 'premium'
|
||||
WHEN clm.lifetime_revenue > 1000 THEN 'standard'
|
||||
ELSE 'basic'
|
||||
END AS calculated_tier,
|
||||
CASE
|
||||
WHEN clm.days_since_last_order <= 30 THEN 'very_recent'
|
||||
WHEN clm.days_since_last_order <= 90 THEN 'recent'
|
||||
WHEN clm.days_since_last_order <= 180 THEN 'moderate'
|
||||
ELSE 'at_risk'
|
||||
END AS recency_segment,
|
||||
CASE
|
||||
WHEN clm.total_orders >= 50 THEN 'frequent'
|
||||
WHEN clm.total_orders >= 20 THEN 'regular'
|
||||
WHEN clm.total_orders >= 5 THEN 'occasional'
|
||||
ELSE 'rare'
|
||||
END AS frequency_segment,
|
||||
oli.unit_price * oli.quantity_ordered AS calculated_line_revenue,
|
||||
(oli.unit_price * oli.quantity_ordered) * (ed.commission_rate / 100) AS calculated_commission,
|
||||
ROUND(oli.unit_price * oli.quantity_ordered * 0.9, 2) AS discounted_line_total,
|
||||
-- Window functions
|
||||
ROW_NUMBER() OVER (PARTITION BY cs.customer_id ORDER BY ot.order_date DESC) AS order_recency_rank,
|
||||
RANK() OVER (PARTITION BY cs.billing_country ORDER BY clm.lifetime_revenue DESC) AS customer_value_rank_in_country,
|
||||
DENSE_RANK() OVER (PARTITION BY pc.product_category ORDER BY oli.quantity_ordered DESC) AS product_popularity_rank,
|
||||
SUM(ot.total_amount) OVER (PARTITION BY cs.customer_id ORDER BY ot.order_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_customer_revenue,
|
||||
AVG(ot.total_amount) OVER (PARTITION BY cs.customer_id ORDER BY ot.order_date ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS rolling_5_order_avg,
|
||||
-- Aggregates
|
||||
SUM(oli.quantity_ordered) OVER (PARTITION BY pc.product_category) AS category_total_quantity,
|
||||
COUNT(DISTINCT ot.order_id) OVER (PARTITION BY cs.billing_country, DATE_TRUNC('month', ot.order_date)) AS monthly_orders_in_country,
|
||||
MAX(ot.total_amount) OVER (PARTITION BY cs.customer_id) AS customer_largest_order,
|
||||
MIN(ot.order_date) OVER (PARTITION BY pc.product_id) AS product_first_sale_date
|
||||
|
||||
FROM customer_segments cs
|
||||
INNER JOIN customer_lifetime_metrics clm ON cs.customer_id = clm.customer_id
|
||||
INNER JOIN order_transactions ot ON cs.customer_id = ot.customer_id
|
||||
INNER JOIN order_line_items oli ON ot.order_id = oli.order_id
|
||||
INNER JOIN product_catalog pc ON oli.product_id = pc.product_id
|
||||
INNER JOIN product_performance pp ON pc.product_id = pp.product_id
|
||||
LEFT JOIN employee_data ed ON ot.order_id IN (
|
||||
SELECT order_id FROM employee_assignments WHERE employee_id = ed.employee_id
|
||||
)
|
||||
LEFT JOIN regional_sales rs ON cs.billing_country = rs.billing_country
|
||||
AND cs.billing_state = rs.billing_state
|
||||
AND cs.billing_city = rs.billing_city
|
||||
LEFT JOIN category_analysis ca ON pc.product_category = ca.product_category
|
||||
AND pc.product_subcategory = ca.product_subcategory
|
||||
LEFT JOIN monthly_trends mt ON DATE_TRUNC('month', ot.order_date) = mt.order_month
|
||||
|
||||
WHERE
|
||||
-- Date filters
|
||||
ot.order_date >= '2023-01-01'
|
||||
AND ot.order_date < '2024-12-31'
|
||||
-- Status filters
|
||||
AND ot.order_status IN ('processing', 'shipped', 'delivered', 'completed')
|
||||
AND oli.line_status NOT IN ('cancelled', 'voided', 'rejected')
|
||||
AND cs.customer_status = 'active'
|
||||
AND pc.product_status = 'active'
|
||||
-- Geographic filters
|
||||
AND cs.billing_country IN ('USA', 'Canada', 'Mexico', 'UK', 'Germany', 'France', 'Spain', 'Italy', 'Japan', 'Australia')
|
||||
AND cs.billing_state NOT IN ('test', 'demo', 'internal')
|
||||
-- Category filters
|
||||
AND pc.product_category IN ('electronics', 'clothing', 'home', 'sports', 'books', 'toys', 'automotive', 'health', 'beauty', 'grocery')
|
||||
AND pc.product_subcategory NOT LIKE '%test%'
|
||||
-- Amount filters
|
||||
AND ot.total_amount > 0
|
||||
AND ot.total_amount < 100000
|
||||
AND oli.quantity_ordered > 0
|
||||
AND oli.unit_price > 0
|
||||
-- Quality filters
|
||||
AND cs.email_address NOT LIKE '%@test.com'
|
||||
AND cs.email_address NOT LIKE '%@example.com'
|
||||
AND cs.email_address NOT LIKE '%@invalid.com'
|
||||
AND cs.customer_name NOT LIKE '%test%'
|
||||
AND cs.customer_name NOT LIKE '%demo%'
|
||||
AND pc.product_name NOT LIKE '%sample%'
|
||||
AND pc.product_name NOT LIKE '%demo%'
|
||||
-- Tier filters
|
||||
AND cs.customer_tier IN ('gold', 'silver', 'bronze', 'platinum')
|
||||
AND cs.activity_level IN ('highly_active', 'active')
|
||||
-- Payment filters
|
||||
AND ot.payment_status = 'completed'
|
||||
AND ot.payment_method IN ('credit_card', 'debit_card', 'paypal', 'apple_pay', 'google_pay', 'bank_transfer')
|
||||
-- Shipping filters
|
||||
AND ot.delivery_status IN ('delivered', 'in_transit', 'out_for_delivery')
|
||||
AND ot.shipping_method IN ('standard', 'express', 'overnight', 'two_day')
|
||||
-- Channel filters
|
||||
AND ot.order_channel IN ('web', 'mobile', 'tablet', 'phone', 'store', 'marketplace')
|
||||
-- Null checks
|
||||
AND cs.customer_id IS NOT NULL
|
||||
AND ot.order_id IS NOT NULL
|
||||
AND oli.product_id IS NOT NULL
|
||||
AND pc.product_sku IS NOT NULL
|
||||
AND ot.total_amount IS NOT NULL
|
||||
|
||||
GROUP BY
|
||||
cs.customer_id, cs.customer_number, cs.customer_name, cs.customer_type, cs.customer_tier,
|
||||
cs.activity_level, cs.customer_tenure, cs.email_address, cs.phone_number,
|
||||
cs.billing_city, cs.billing_state, cs.billing_country, cs.age_group, cs.gender, cs.income_bracket,
|
||||
clm.total_orders, clm.orders_last_30_days, clm.orders_last_90_days, clm.orders_last_year,
|
||||
clm.lifetime_revenue, clm.revenue_last_30_days, clm.revenue_last_90_days, clm.revenue_last_year,
|
||||
clm.average_order_value, clm.unique_products_purchased, clm.total_items_purchased,
|
||||
clm.total_items_returned, clm.first_order_date, clm.last_order_date, clm.days_since_last_order,
|
||||
ot.order_id, ot.order_number, ot.order_date, ot.order_status, ot.order_type, ot.order_channel,
|
||||
ot.payment_method, ot.payment_status, ot.subtotal_amount, ot.tax_amount, ot.shipping_amount,
|
||||
ot.discount_amount, ot.total_amount, ot.shipping_method, ot.shipping_carrier, ot.tracking_number,
|
||||
ot.delivery_status, oli.line_item_id, oli.product_id, oli.quantity_ordered, oli.quantity_shipped,
|
||||
oli.unit_price, oli.line_total, oli.discount_type, oli.line_status,
|
||||
pc.product_sku, pc.product_name, pc.product_category, pc.product_subcategory, pc.product_brand,
|
||||
pc.product_manufacturer, pc.category_group, pc.list_price, pc.product_color, pc.product_size,
|
||||
pc.average_rating, pc.review_count, pp.total_quantity_sold, pp.total_revenue, pp.total_profit,
|
||||
pp.return_rate, pp.profit_margin, pp.revenue_rank_in_category,
|
||||
ed.employee_id, ed.employee_name, ed.job_title, ed.department_name, ed.office_location,
|
||||
ed.commission_rate, ed.sales_quota, rs.total_orders, rs.unique_customers, rs.total_revenue,
|
||||
rs.average_order_value, rs.year_over_year_growth,
|
||||
ca.revenue, ca.profit, ca.revenue_share, ca.profit_margin, ca.return_rate,
|
||||
mt.order_month, mt.three_month_moving_avg, mt.six_month_moving_avg
|
||||
|
||||
HAVING
|
||||
SUM(oli.quantity_ordered) > 0
|
||||
AND SUM(oli.line_total) > 0
|
||||
AND COUNT(DISTINCT ot.order_id) >= 1
|
||||
|
||||
ORDER BY
|
||||
clm.lifetime_revenue DESC,
|
||||
clm.total_orders DESC,
|
||||
ot.order_date DESC,
|
||||
cs.customer_name ASC,
|
||||
pc.product_category ASC,
|
||||
pc.product_name ASC,
|
||||
oli.line_number ASC,
|
||||
ot.order_id ASC
|
||||
|
||||
LIMIT 100000
|
||||
OFFSET 0;
|
||||
|
||||
-- Additional analytics queries for dashboard
|
||||
|
||||
-- Top customers by revenue
|
||||
SELECT
|
||||
customer_id,
|
||||
customer_name,
|
||||
customer_tier,
|
||||
total_orders,
|
||||
lifetime_revenue,
|
||||
average_order_value,
|
||||
days_since_last_order
|
||||
FROM customer_lifetime_metrics
|
||||
WHERE lifetime_revenue > 1000
|
||||
ORDER BY lifetime_revenue DESC
|
||||
LIMIT 100;
|
||||
|
||||
-- Top products by sales
|
||||
SELECT
|
||||
product_sku,
|
||||
product_name,
|
||||
product_category,
|
||||
product_brand,
|
||||
total_quantity_sold,
|
||||
total_revenue,
|
||||
total_profit,
|
||||
profit_margin,
|
||||
return_rate
|
||||
FROM product_performance
|
||||
WHERE total_revenue > 5000
|
||||
ORDER BY total_revenue DESC
|
||||
LIMIT 50;
|
||||
|
||||
-- Regional performance summary
|
||||
SELECT
|
||||
billing_country,
|
||||
billing_state,
|
||||
total_orders,
|
||||
unique_customers,
|
||||
total_revenue,
|
||||
average_order_value,
|
||||
year_over_year_growth
|
||||
FROM regional_sales
|
||||
WHERE total_revenue > 10000
|
||||
ORDER BY total_revenue DESC;
|
||||
"#;
|
||||
|
||||
fn tokenization_benchmark(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("tokenization");
|
||||
let dialect = GenericDialect {};
|
||||
|
||||
group.bench_function("tokenize_complex_sql", |b| {
|
||||
b.iter(|| {
|
||||
let mut tokenizer = Tokenizer::new(&dialect, COMPLEX_SQL);
|
||||
tokenizer.tokenize().unwrap()
|
||||
});
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, tokenization_benchmark);
|
||||
criterion_main!(benches);
|
||||
|
|
@ -1251,7 +1251,7 @@ pub fn parse_copy_into(parser: &Parser) -> Result<Statement, ParserError> {
|
|||
continue_loop = false;
|
||||
let next_token = parser.next_token();
|
||||
match next_token.token {
|
||||
BorrowedToken::SingleQuotedString(s) => files.push(s),
|
||||
BorrowedToken::SingleQuotedString(s) => files.push(s.into_owned()),
|
||||
_ => parser.expected("file token", next_token)?,
|
||||
};
|
||||
if parser.next_token().token.eq(&BorrowedToken::Comma) {
|
||||
|
|
@ -1266,7 +1266,7 @@ pub fn parse_copy_into(parser: &Parser) -> Result<Statement, ParserError> {
|
|||
parser.expect_token(&BorrowedToken::Eq)?;
|
||||
let next_token = parser.next_token();
|
||||
pattern = Some(match next_token.token {
|
||||
BorrowedToken::SingleQuotedString(s) => s,
|
||||
BorrowedToken::SingleQuotedString(s) => s.into_owned(),
|
||||
_ => parser.expected("pattern", next_token)?,
|
||||
});
|
||||
// VALIDATION MODE
|
||||
|
|
@ -1417,7 +1417,7 @@ fn parse_stage_params(parser: &Parser) -> Result<StageParamsObject, ParserError>
|
|||
if parser.parse_keyword(Keyword::URL) {
|
||||
parser.expect_token(&BorrowedToken::Eq)?;
|
||||
url = Some(match parser.next_token().token {
|
||||
BorrowedToken::SingleQuotedString(word) => Ok(word),
|
||||
BorrowedToken::SingleQuotedString(word) => Ok(word.into_owned()),
|
||||
_ => parser.expected("a URL statement", parser.peek_token()),
|
||||
}?)
|
||||
}
|
||||
|
|
@ -1432,7 +1432,7 @@ fn parse_stage_params(parser: &Parser) -> Result<StageParamsObject, ParserError>
|
|||
if parser.parse_keyword(Keyword::ENDPOINT) {
|
||||
parser.expect_token(&BorrowedToken::Eq)?;
|
||||
endpoint = Some(match parser.next_token().token {
|
||||
BorrowedToken::SingleQuotedString(word) => Ok(word),
|
||||
BorrowedToken::SingleQuotedString(word) => Ok(word.into_owned()),
|
||||
_ => parser.expected("an endpoint statement", parser.peek_token()),
|
||||
}?)
|
||||
}
|
||||
|
|
@ -1486,7 +1486,7 @@ fn parse_session_options(parser: &Parser, set: bool) -> Result<Vec<KeyValueOptio
|
|||
options.push(option);
|
||||
} else {
|
||||
options.push(KeyValueOption {
|
||||
option_name: key.value,
|
||||
option_name: key.value.to_string(),
|
||||
option_value: KeyValueOptionKind::Single(Value::Placeholder(empty())),
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1248,3 +1248,57 @@ pub const RESERVED_FOR_IDENTIFIER: &[Keyword] = &[
|
|||
Keyword::STRUCT,
|
||||
Keyword::TRIM,
|
||||
];
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
use std::collections::HashMap;
|
||||
#[cfg(feature = "std")]
|
||||
use std::sync::OnceLock;
|
||||
#[cfg(feature = "std")]
|
||||
use unicase::UniCase;
|
||||
|
||||
/// Lazy-initialized HashMap for O(1) keyword lookups
|
||||
#[cfg(feature = "std")]
|
||||
static KEYWORD_MAP: OnceLock<HashMap<UniCase<&'static str>, Keyword>> = OnceLock::new();
|
||||
|
||||
/// Get the HashMap of keywords, initializing it on first access
|
||||
#[cfg(feature = "std")]
|
||||
fn get_keyword_map() -> &'static HashMap<UniCase<&'static str>, Keyword> {
|
||||
KEYWORD_MAP.get_or_init(|| {
|
||||
let mut map = HashMap::with_capacity(ALL_KEYWORDS.len());
|
||||
for (keyword_str, keyword_enum) in ALL_KEYWORDS.iter().zip(ALL_KEYWORDS_INDEX.iter()) {
|
||||
map.insert(UniCase::ascii(*keyword_str), *keyword_enum);
|
||||
}
|
||||
map
|
||||
})
|
||||
}
|
||||
|
||||
/// Look up a keyword by string, case-insensitively, with O(1) complexity
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `word` - The word to look up (case-insensitive)
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Some(Keyword)` if the word is a keyword
|
||||
/// * `None` if the word is not a keyword
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use sqlparser::keywords::{get_keyword, Keyword};
|
||||
///
|
||||
/// assert_eq!(get_keyword("SELECT"), Some(Keyword::SELECT));
|
||||
/// assert_eq!(get_keyword("select"), Some(Keyword::SELECT));
|
||||
/// assert_eq!(get_keyword("my_table"), None);
|
||||
/// ```
|
||||
#[cfg(feature = "std")]
|
||||
pub fn get_keyword(word: &str) -> Option<Keyword> {
|
||||
get_keyword_map().get(&UniCase::ascii(word)).copied()
|
||||
}
|
||||
|
||||
/// Fallback for no_std: use binary search (same as before)
|
||||
#[cfg(not(feature = "std"))]
|
||||
pub fn get_keyword(word: &str) -> Option<Keyword> {
|
||||
ALL_KEYWORDS
|
||||
.binary_search_by(|k| unicase::UniCase::ascii(k).cmp(&unicase::UniCase::ascii(&word)))
|
||||
.ok()
|
||||
.map(|idx| ALL_KEYWORDS_INDEX[idx])
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
#[cfg(not(feature = "std"))]
|
||||
use alloc::{
|
||||
borrow::Cow,
|
||||
boxed::Box,
|
||||
format,
|
||||
string::{String, ToString},
|
||||
|
|
@ -25,6 +26,8 @@ use core::{
|
|||
str::FromStr,
|
||||
};
|
||||
use helpers::attached_token::AttachedToken;
|
||||
#[cfg(feature = "std")]
|
||||
use std::borrow::Cow;
|
||||
|
||||
use log::debug;
|
||||
|
||||
|
|
@ -1793,8 +1796,11 @@ impl<'a> Parser<'a> {
|
|||
break;
|
||||
}
|
||||
BorrowedToken::SingleQuotedString(s) => {
|
||||
let expr =
|
||||
Expr::Identifier(Ident::with_quote_and_span('\'', next_token.span, s));
|
||||
let expr = Expr::Identifier(Ident::with_quote_and_span(
|
||||
'\'',
|
||||
next_token.span,
|
||||
s.as_ref(),
|
||||
));
|
||||
chain.push(AccessExpr::Dot(expr));
|
||||
self.advance_token(); // The consumed string
|
||||
}
|
||||
|
|
@ -3893,7 +3899,7 @@ impl<'a> Parser<'a> {
|
|||
// any keyword here unquoted.
|
||||
keyword: _,
|
||||
}) => Ok(JsonPathElem::Dot {
|
||||
key: value,
|
||||
key: value.to_string(),
|
||||
quoted: quote_style.is_some(),
|
||||
}),
|
||||
|
||||
|
|
@ -7744,7 +7750,9 @@ impl<'a> Parser<'a> {
|
|||
if dialect_of!(self is HiveDialect) && self.parse_keyword(Keyword::COMMENT) {
|
||||
let next_token = self.next_token();
|
||||
match next_token.token {
|
||||
BorrowedToken::SingleQuotedString(str) => Some(CommentDef::WithoutEq(str)),
|
||||
BorrowedToken::SingleQuotedString(str) => {
|
||||
Some(CommentDef::WithoutEq(str.into_owned()))
|
||||
}
|
||||
_ => self.expected("comment", next_token)?,
|
||||
}
|
||||
} else {
|
||||
|
|
@ -7965,11 +7973,11 @@ impl<'a> Parser<'a> {
|
|||
|
||||
let comment = match (has_eq, value.token) {
|
||||
(true, BorrowedToken::SingleQuotedString(s)) => {
|
||||
Ok(Some(SqlOption::Comment(CommentDef::WithEq(s))))
|
||||
}
|
||||
(false, BorrowedToken::SingleQuotedString(s)) => {
|
||||
Ok(Some(SqlOption::Comment(CommentDef::WithoutEq(s))))
|
||||
Ok(Some(SqlOption::Comment(CommentDef::WithEq(s.into_owned()))))
|
||||
}
|
||||
(false, BorrowedToken::SingleQuotedString(s)) => Ok(Some(SqlOption::Comment(
|
||||
CommentDef::WithoutEq(s.into_owned()),
|
||||
))),
|
||||
(_, token) => self.expected(
|
||||
"BorrowedToken::SingleQuotedString",
|
||||
TokenWithSpan::wrap(token),
|
||||
|
|
@ -8014,8 +8022,8 @@ impl<'a> Parser<'a> {
|
|||
let value = self.next_token();
|
||||
|
||||
let tablespace = match value.token {
|
||||
BorrowedToken::Word(Word { value: name, .. })
|
||||
| BorrowedToken::SingleQuotedString(name) => {
|
||||
BorrowedToken::Word(Word { value: name, .. }) => {
|
||||
let name = name.to_string();
|
||||
let storage = match self.parse_keyword(Keyword::STORAGE) {
|
||||
true => {
|
||||
let _ = self.consume_token(&BorrowedToken::Eq);
|
||||
|
|
@ -8038,6 +8046,28 @@ impl<'a> Parser<'a> {
|
|||
storage,
|
||||
})))
|
||||
}
|
||||
BorrowedToken::SingleQuotedString(name) => {
|
||||
let storage = match self.parse_keyword(Keyword::STORAGE) {
|
||||
true => {
|
||||
let _ = self.consume_token(&BorrowedToken::Eq);
|
||||
let storage_token = self.next_token();
|
||||
match &storage_token.token {
|
||||
BorrowedToken::Word(w) => match w.value.to_uppercase().as_str() {
|
||||
"DISK" => Some(StorageType::Disk),
|
||||
"MEMORY" => Some(StorageType::Memory),
|
||||
_ => self.expected("DISK or MEMORY", storage_token)?,
|
||||
},
|
||||
_ => self.expected("BorrowedToken::Word", storage_token)?,
|
||||
}
|
||||
}
|
||||
false => None,
|
||||
};
|
||||
|
||||
Ok(Some(SqlOption::TableSpace(TablespaceOption {
|
||||
name: name.into_owned(),
|
||||
storage,
|
||||
})))
|
||||
}
|
||||
_ => {
|
||||
return self.expected("BorrowedToken::Word", value)?;
|
||||
}
|
||||
|
|
@ -8176,7 +8206,7 @@ impl<'a> Parser<'a> {
|
|||
pub fn parse_comment_value(&self) -> Result<String, ParserError> {
|
||||
let next_token = self.next_token();
|
||||
let value = match next_token.token {
|
||||
BorrowedToken::SingleQuotedString(str) => str,
|
||||
BorrowedToken::SingleQuotedString(str) => str.into_owned(),
|
||||
BorrowedToken::DollarQuotedString(str) => str.value,
|
||||
_ => self.expected("string literal", next_token)?,
|
||||
};
|
||||
|
|
@ -10381,8 +10411,8 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
Keyword::NULL => ok_value(Value::Null),
|
||||
Keyword::NoKeyword if w.quote_style.is_some() => match w.quote_style {
|
||||
Some('"') => ok_value(Value::DoubleQuotedString(w.value)),
|
||||
Some('\'') => ok_value(Value::SingleQuotedString(w.value)),
|
||||
Some('"') => ok_value(Value::DoubleQuotedString(w.value.into_owned())),
|
||||
Some('\'') => ok_value(Value::SingleQuotedString(w.value.into_owned())),
|
||||
_ => self.expected(
|
||||
"A value?",
|
||||
TokenWithSpan {
|
||||
|
|
@ -10484,11 +10514,18 @@ impl<'a> Parser<'a> {
|
|||
|
||||
fn maybe_concat_string_literal(&self, mut str: String) -> String {
|
||||
if self.dialect.supports_string_literal_concatenation() {
|
||||
while let BorrowedToken::SingleQuotedString(ref s)
|
||||
| BorrowedToken::DoubleQuotedString(ref s) = self.peek_token_ref().token
|
||||
{
|
||||
str.push_str(s.clone().as_str());
|
||||
self.advance_token();
|
||||
loop {
|
||||
match &self.peek_token_ref().token {
|
||||
BorrowedToken::SingleQuotedString(s) => {
|
||||
str.push_str(s.as_ref());
|
||||
self.advance_token();
|
||||
}
|
||||
BorrowedToken::DoubleQuotedString(s) => {
|
||||
str.push_str(s);
|
||||
self.advance_token();
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
str
|
||||
|
|
@ -10584,8 +10621,8 @@ impl<'a> Parser<'a> {
|
|||
value,
|
||||
keyword: Keyword::NoKeyword,
|
||||
..
|
||||
}) => Ok(value),
|
||||
BorrowedToken::SingleQuotedString(s) => Ok(s),
|
||||
}) => Ok(value.into_owned()),
|
||||
BorrowedToken::SingleQuotedString(s) => Ok(s.into_owned()),
|
||||
BorrowedToken::DoubleQuotedString(s) => Ok(s),
|
||||
BorrowedToken::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => {
|
||||
Ok(s)
|
||||
|
|
@ -11100,7 +11137,7 @@ impl<'a> Parser<'a> {
|
|||
loop {
|
||||
let next_token = self.next_token();
|
||||
match next_token.token {
|
||||
BorrowedToken::SingleQuotedString(value) => values.push(value),
|
||||
BorrowedToken::SingleQuotedString(value) => values.push(value.into_owned()),
|
||||
_ => self.expected("a string", next_token)?,
|
||||
}
|
||||
let next_token = self.next_token();
|
||||
|
|
@ -12125,7 +12162,7 @@ impl<'a> Parser<'a> {
|
|||
match next_token.token {
|
||||
BorrowedToken::Word(w) => modifiers.push(w.to_string()),
|
||||
BorrowedToken::Number(n, _) => modifiers.push(n),
|
||||
BorrowedToken::SingleQuotedString(s) => modifiers.push(s),
|
||||
BorrowedToken::SingleQuotedString(s) => modifiers.push(s.into_owned()),
|
||||
|
||||
BorrowedToken::Comma => {
|
||||
continue;
|
||||
|
|
@ -13261,7 +13298,7 @@ impl<'a> Parser<'a> {
|
|||
if token2 == BorrowedToken::Period {
|
||||
match token1.token {
|
||||
BorrowedToken::Word(w) => {
|
||||
schema_name = w.value;
|
||||
schema_name = w.value.to_string();
|
||||
}
|
||||
_ => {
|
||||
return self.expected("Schema name", token1);
|
||||
|
|
@ -13269,7 +13306,7 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
match token3.token {
|
||||
BorrowedToken::Word(w) => {
|
||||
table_name = w.value;
|
||||
table_name = w.value.to_string();
|
||||
}
|
||||
_ => {
|
||||
return self.expected("Table name", token3);
|
||||
|
|
@ -13282,7 +13319,7 @@ impl<'a> Parser<'a> {
|
|||
} else {
|
||||
match token1.token {
|
||||
BorrowedToken::Word(w) => {
|
||||
table_name = w.value;
|
||||
table_name = w.value.to_string();
|
||||
}
|
||||
_ => {
|
||||
return self.expected("Table name", token1);
|
||||
|
|
@ -14408,7 +14445,9 @@ impl<'a> Parser<'a> {
|
|||
None => {
|
||||
let next_token = self.next_token();
|
||||
if let BorrowedToken::Word(w) = next_token.token {
|
||||
Expr::Value(Value::Placeholder(w.value).with_span(next_token.span))
|
||||
Expr::Value(
|
||||
Value::Placeholder(w.value.into_owned()).with_span(next_token.span),
|
||||
)
|
||||
} else {
|
||||
return parser_err!(
|
||||
"Expecting number or byte length e.g. 100M",
|
||||
|
|
@ -14962,7 +15001,7 @@ impl<'a> Parser<'a> {
|
|||
let r#type = self.parse_data_type()?;
|
||||
let path = if let BorrowedToken::SingleQuotedString(path) = self.peek_token().token {
|
||||
self.next_token();
|
||||
Some(path)
|
||||
Some(path.into_owned())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
|
@ -16491,7 +16530,7 @@ impl<'a> Parser<'a> {
|
|||
let opt_ilike = if self.parse_keyword(Keyword::ILIKE) {
|
||||
let next_token = self.next_token();
|
||||
let pattern = match next_token.token {
|
||||
BorrowedToken::SingleQuotedString(s) => s,
|
||||
BorrowedToken::SingleQuotedString(s) => s.into_owned(),
|
||||
_ => return self.expected("ilike pattern", next_token),
|
||||
};
|
||||
Some(IlikeSelectItem { pattern })
|
||||
|
|
@ -17128,7 +17167,11 @@ impl<'a> Parser<'a> {
|
|||
(true, _) => BorrowedToken::RParen,
|
||||
(false, BorrowedToken::EOF) => BorrowedToken::EOF,
|
||||
(false, BorrowedToken::Word(w)) if end_kws.contains(&w.keyword) => {
|
||||
BorrowedToken::Word(w)
|
||||
BorrowedToken::Word(Word {
|
||||
value: Cow::Owned(w.value.into_owned()),
|
||||
quote_style: w.quote_style,
|
||||
keyword: w.keyword,
|
||||
})
|
||||
}
|
||||
(false, _) => BorrowedToken::SemiColon,
|
||||
};
|
||||
|
|
@ -18327,27 +18370,27 @@ impl<'a> Parser<'a> {
|
|||
self.expect_token(&BorrowedToken::Eq)?;
|
||||
match self.peek_token().token {
|
||||
BorrowedToken::SingleQuotedString(_) => Ok(KeyValueOption {
|
||||
option_name: key.value.clone(),
|
||||
option_name: key.value.to_string(),
|
||||
option_value: KeyValueOptionKind::Single(self.parse_value()?.into()),
|
||||
}),
|
||||
BorrowedToken::Word(word)
|
||||
if word.keyword == Keyword::TRUE || word.keyword == Keyword::FALSE =>
|
||||
{
|
||||
Ok(KeyValueOption {
|
||||
option_name: key.value.clone(),
|
||||
option_name: key.value.to_string(),
|
||||
option_value: KeyValueOptionKind::Single(self.parse_value()?.into()),
|
||||
})
|
||||
}
|
||||
BorrowedToken::Number(..) => Ok(KeyValueOption {
|
||||
option_name: key.value.clone(),
|
||||
option_name: key.value.to_string(),
|
||||
option_value: KeyValueOptionKind::Single(self.parse_value()?.into()),
|
||||
}),
|
||||
BorrowedToken::Word(word) => {
|
||||
self.next_token();
|
||||
Ok(KeyValueOption {
|
||||
option_name: key.value.clone(),
|
||||
option_name: key.value.to_string(),
|
||||
option_value: KeyValueOptionKind::Single(Value::Placeholder(
|
||||
word.value.clone(),
|
||||
word.value.to_string(),
|
||||
)),
|
||||
})
|
||||
}
|
||||
|
|
@ -18365,12 +18408,12 @@ impl<'a> Parser<'a> {
|
|||
Some(values) => {
|
||||
let values = values.into_iter().map(|v| v.value).collect();
|
||||
Ok(KeyValueOption {
|
||||
option_name: key.value.clone(),
|
||||
option_name: key.value.to_string(),
|
||||
option_value: KeyValueOptionKind::Multi(values),
|
||||
})
|
||||
}
|
||||
None => Ok(KeyValueOption {
|
||||
option_name: key.value.clone(),
|
||||
option_name: key.value.to_string(),
|
||||
option_value: KeyValueOptionKind::KeyValueOptions(Box::new(
|
||||
self.parse_key_value_options(true, &[])?,
|
||||
)),
|
||||
|
|
@ -18405,11 +18448,11 @@ fn maybe_prefixed_expr(expr: Expr, prefix: Option<Ident>) -> Expr {
|
|||
}
|
||||
}
|
||||
|
||||
impl Word {
|
||||
impl Word<'_> {
|
||||
#[deprecated(since = "0.54.0", note = "please use `into_ident` instead")]
|
||||
pub fn to_ident(&self, span: Span) -> Ident {
|
||||
Ident {
|
||||
value: self.value.clone(),
|
||||
value: self.value.to_string(),
|
||||
quote_style: self.quote_style,
|
||||
span,
|
||||
}
|
||||
|
|
@ -18418,7 +18461,7 @@ impl Word {
|
|||
/// Convert this word into an [`Ident`] identifier
|
||||
pub fn into_ident(self, span: Span) -> Ident {
|
||||
Ident {
|
||||
value: self.value,
|
||||
value: self.value.into_owned(),
|
||||
quote_style: self.quote_style,
|
||||
span,
|
||||
}
|
||||
|
|
|
|||
434
src/tokenizer.rs
434
src/tokenizer.rs
|
|
@ -23,7 +23,7 @@
|
|||
|
||||
#[cfg(not(feature = "std"))]
|
||||
use alloc::{
|
||||
borrow::{Cow, ToOwned},
|
||||
borrow::Cow,
|
||||
format,
|
||||
string::{String, ToString},
|
||||
vec,
|
||||
|
|
@ -48,7 +48,7 @@ use crate::dialect::{
|
|||
BigQueryDialect, DuckDbDialect, GenericDialect, MySqlDialect, PostgreSqlDialect,
|
||||
SnowflakeDialect,
|
||||
};
|
||||
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
|
||||
use crate::keywords::Keyword;
|
||||
use crate::{ast::DollarQuotedString, dialect::HiveDialect};
|
||||
|
||||
/// SQL Token enumeration with lifetime parameter for future zero-copy support
|
||||
|
|
@ -59,13 +59,13 @@ pub enum BorrowedToken<'a> {
|
|||
/// An end-of-file marker, not a real token
|
||||
EOF,
|
||||
/// A keyword (like SELECT) or an optionally quoted SQL identifier
|
||||
Word(Word),
|
||||
Word(Word<'a>),
|
||||
/// An unsigned numeric literal
|
||||
Number(String, bool),
|
||||
/// A character that could not be tokenized
|
||||
Char(char),
|
||||
/// Single quoted string: i.e: 'string'
|
||||
SingleQuotedString(String),
|
||||
SingleQuotedString(Cow<'a, str>),
|
||||
/// Double quoted string: i.e: "string"
|
||||
DoubleQuotedString(String),
|
||||
/// Triple single quoted strings: Example '''abc'''
|
||||
|
|
@ -110,7 +110,7 @@ pub enum BorrowedToken<'a> {
|
|||
/// Comma
|
||||
Comma,
|
||||
/// Whitespace (space, tab, etc)
|
||||
Whitespace(Whitespace),
|
||||
Whitespace(Whitespace<'a>),
|
||||
/// Double equals sign `==`
|
||||
DoubleEq,
|
||||
/// Equality operator `=`
|
||||
|
|
@ -280,8 +280,6 @@ pub enum BorrowedToken<'a> {
|
|||
/// This is used to represent any custom binary operator that is not part of the SQL standard.
|
||||
/// PostgreSQL allows defining custom binary operators using CREATE OPERATOR.
|
||||
CustomBinaryOperator(String),
|
||||
/// Marker to carry the lifetime parameter (never constructed)
|
||||
_Phantom(Cow<'a, str>),
|
||||
}
|
||||
|
||||
/// Type alias for backward compatibility - Token without explicit lifetime uses 'static
|
||||
|
|
@ -399,7 +397,6 @@ impl<'a> fmt::Display for BorrowedToken<'a> {
|
|||
BorrowedToken::QuestionAnd => write!(f, "?&"),
|
||||
BorrowedToken::QuestionPipe => write!(f, "?|"),
|
||||
BorrowedToken::CustomBinaryOperator(s) => f.write_str(s),
|
||||
BorrowedToken::_Phantom(_) => unreachable!("_Phantom should never be constructed"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -409,10 +406,16 @@ impl<'a> BorrowedToken<'a> {
|
|||
pub fn to_static(self) -> Token {
|
||||
match self {
|
||||
BorrowedToken::EOF => BorrowedToken::EOF,
|
||||
BorrowedToken::Word(w) => BorrowedToken::Word(w),
|
||||
BorrowedToken::Word(w) => BorrowedToken::Word(Word {
|
||||
value: Cow::Owned(w.value.into_owned()),
|
||||
quote_style: w.quote_style,
|
||||
keyword: w.keyword,
|
||||
}),
|
||||
BorrowedToken::Number(n, l) => BorrowedToken::Number(n, l),
|
||||
BorrowedToken::Char(c) => BorrowedToken::Char(c),
|
||||
BorrowedToken::SingleQuotedString(s) => BorrowedToken::SingleQuotedString(s),
|
||||
BorrowedToken::SingleQuotedString(s) => {
|
||||
BorrowedToken::SingleQuotedString(Cow::Owned(s.into_owned()))
|
||||
}
|
||||
BorrowedToken::DoubleQuotedString(s) => BorrowedToken::DoubleQuotedString(s),
|
||||
BorrowedToken::TripleSingleQuotedString(s) => {
|
||||
BorrowedToken::TripleSingleQuotedString(s)
|
||||
|
|
@ -450,7 +453,20 @@ impl<'a> BorrowedToken<'a> {
|
|||
BorrowedToken::UnicodeStringLiteral(s) => BorrowedToken::UnicodeStringLiteral(s),
|
||||
BorrowedToken::HexStringLiteral(s) => BorrowedToken::HexStringLiteral(s),
|
||||
BorrowedToken::Comma => BorrowedToken::Comma,
|
||||
BorrowedToken::Whitespace(ws) => BorrowedToken::Whitespace(ws),
|
||||
BorrowedToken::Whitespace(ws) => BorrowedToken::Whitespace(match ws {
|
||||
Whitespace::Space => Whitespace::Space,
|
||||
Whitespace::Newline => Whitespace::Newline,
|
||||
Whitespace::Tab => Whitespace::Tab,
|
||||
Whitespace::SingleLineComment { comment, prefix } => {
|
||||
Whitespace::SingleLineComment {
|
||||
comment: Cow::Owned(comment.into_owned()),
|
||||
prefix: Cow::Owned(prefix.into_owned()),
|
||||
}
|
||||
}
|
||||
Whitespace::MultiLineComment(s) => {
|
||||
Whitespace::MultiLineComment(Cow::Owned(s.into_owned()))
|
||||
}
|
||||
}),
|
||||
BorrowedToken::DoubleEq => BorrowedToken::DoubleEq,
|
||||
BorrowedToken::Eq => BorrowedToken::Eq,
|
||||
BorrowedToken::Neq => BorrowedToken::Neq,
|
||||
|
|
@ -545,7 +561,6 @@ impl<'a> BorrowedToken<'a> {
|
|||
BorrowedToken::QuestionAnd => BorrowedToken::QuestionAnd,
|
||||
BorrowedToken::QuestionPipe => BorrowedToken::QuestionPipe,
|
||||
BorrowedToken::CustomBinaryOperator(s) => BorrowedToken::CustomBinaryOperator(s),
|
||||
BorrowedToken::_Phantom(_) => unreachable!("_Phantom should never be constructed"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -556,13 +571,26 @@ impl BorrowedToken<'static> {
|
|||
}
|
||||
|
||||
pub fn make_word(word: &str, quote_style: Option<char>) -> Self {
|
||||
let word_uppercase = word.to_uppercase();
|
||||
BorrowedToken::Word(Word {
|
||||
value: word.to_string(),
|
||||
value: Cow::Owned(word.to_string()),
|
||||
quote_style,
|
||||
keyword: if quote_style.is_none() {
|
||||
let keyword = ALL_KEYWORDS.binary_search(&word_uppercase.as_str());
|
||||
keyword.map_or(Keyword::NoKeyword, |x| ALL_KEYWORDS_INDEX[x])
|
||||
crate::keywords::get_keyword(word).unwrap_or(Keyword::NoKeyword)
|
||||
} else {
|
||||
Keyword::NoKeyword
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BorrowedToken<'a> {
|
||||
/// Create a Word token with a borrowed string (zero-copy)
|
||||
pub fn make_word_borrowed(word: &'a str, quote_style: Option<char>) -> Self {
|
||||
BorrowedToken::Word(Word {
|
||||
value: Cow::Borrowed(word),
|
||||
quote_style,
|
||||
keyword: if quote_style.is_none() {
|
||||
crate::keywords::get_keyword(word).unwrap_or(Keyword::NoKeyword)
|
||||
} else {
|
||||
Keyword::NoKeyword
|
||||
},
|
||||
|
|
@ -574,10 +602,10 @@ impl BorrowedToken<'static> {
|
|||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub struct Word {
|
||||
pub struct Word<'a> {
|
||||
/// The value of the token, without the enclosing quotes, and with the
|
||||
/// escape sequences (if any) processed (TODO: escapes are not handled)
|
||||
pub value: String,
|
||||
pub value: Cow<'a, str>,
|
||||
/// An identifier can be "quoted" (<delimited identifier> in ANSI parlance).
|
||||
/// The standard and most implementations allow using double quotes for this,
|
||||
/// but some implementations support other quoting styles as well (e.g. \[MS SQL])
|
||||
|
|
@ -587,7 +615,7 @@ pub struct Word {
|
|||
pub keyword: Keyword,
|
||||
}
|
||||
|
||||
impl fmt::Display for Word {
|
||||
impl fmt::Display for Word<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.quote_style {
|
||||
Some(s) if s == '"' || s == '[' || s == '`' => {
|
||||
|
|
@ -599,7 +627,7 @@ impl fmt::Display for Word {
|
|||
}
|
||||
}
|
||||
|
||||
impl Word {
|
||||
impl Word<'_> {
|
||||
fn matching_end_quote(ch: char) -> char {
|
||||
match ch {
|
||||
'"' => '"', // ANSI and most dialects
|
||||
|
|
@ -613,15 +641,18 @@ impl Word {
|
|||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub enum Whitespace {
|
||||
pub enum Whitespace<'a> {
|
||||
Space,
|
||||
Newline,
|
||||
Tab,
|
||||
SingleLineComment { comment: String, prefix: String },
|
||||
MultiLineComment(String),
|
||||
SingleLineComment {
|
||||
comment: Cow<'a, str>,
|
||||
prefix: Cow<'a, str>,
|
||||
},
|
||||
MultiLineComment(Cow<'a, str>),
|
||||
}
|
||||
|
||||
impl fmt::Display for Whitespace {
|
||||
impl fmt::Display for Whitespace<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Whitespace::Space => f.write_str(" "),
|
||||
|
|
@ -1016,7 +1047,7 @@ impl<'a> Tokenizer<'a> {
|
|||
/// assert_eq!(tokens, vec![
|
||||
/// Token::make_word("SELECT", None),
|
||||
/// Token::Whitespace(Whitespace::Space),
|
||||
/// Token::SingleQuotedString("foo".to_string()),
|
||||
/// Token::SingleQuotedString("foo".to_string().into()),
|
||||
/// ]);
|
||||
pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self {
|
||||
Self {
|
||||
|
|
@ -1117,15 +1148,18 @@ impl<'a> Tokenizer<'a> {
|
|||
&self,
|
||||
consumed_byte_len: usize,
|
||||
chars: &mut State<'a>,
|
||||
) -> Result<Option<Token>, TokenizerError> {
|
||||
) -> Result<Option<BorrowedToken<'a>>, TokenizerError> {
|
||||
chars.next(); // consume the first char
|
||||
let word = self.tokenize_word(consumed_byte_len, chars)?;
|
||||
|
||||
// Calculate where the first character started
|
||||
let first_char_byte_pos = chars.byte_pos.saturating_sub(consumed_byte_len);
|
||||
let word = self.tokenize_word_borrowed(first_char_byte_pos, chars)?;
|
||||
|
||||
// TODO: implement parsing of exponent here
|
||||
if word.chars().all(|x| x.is_ascii_digit() || x == '.') {
|
||||
let mut inner_state = State {
|
||||
peekable: word.chars().peekable(),
|
||||
source: &word,
|
||||
source: word,
|
||||
line: 0,
|
||||
col: 0,
|
||||
byte_pos: 0,
|
||||
|
|
@ -1136,7 +1170,7 @@ impl<'a> Tokenizer<'a> {
|
|||
return Ok(Some(Token::Number(s, false)));
|
||||
}
|
||||
|
||||
Ok(Some(Token::make_word(&word, None)))
|
||||
Ok(Some(BorrowedToken::make_word_borrowed(word, None)))
|
||||
}
|
||||
|
||||
/// Get the next token or return None
|
||||
|
|
@ -1144,7 +1178,7 @@ impl<'a> Tokenizer<'a> {
|
|||
&self,
|
||||
chars: &mut State<'a>,
|
||||
prev_token: Option<&BorrowedToken<'a>>,
|
||||
) -> Result<Option<Token>, TokenizerError> {
|
||||
) -> Result<Option<BorrowedToken<'a>>, TokenizerError> {
|
||||
match chars.peek() {
|
||||
Some(&ch) => match ch {
|
||||
' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)),
|
||||
|
|
@ -1166,12 +1200,12 @@ impl<'a> Tokenizer<'a> {
|
|||
Some('\'') => {
|
||||
if self.dialect.supports_triple_quoted_string() {
|
||||
return self
|
||||
.tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
|
||||
.tokenize_single_or_triple_quoted_string::<fn(String) -> BorrowedToken<'a>>(
|
||||
chars,
|
||||
'\'',
|
||||
false,
|
||||
Token::SingleQuotedByteStringLiteral,
|
||||
Token::TripleSingleQuotedByteStringLiteral,
|
||||
BorrowedToken::SingleQuotedByteStringLiteral,
|
||||
BorrowedToken::TripleSingleQuotedByteStringLiteral,
|
||||
);
|
||||
}
|
||||
let s = self.tokenize_single_quoted_string(chars, '\'', false)?;
|
||||
|
|
@ -1180,12 +1214,12 @@ impl<'a> Tokenizer<'a> {
|
|||
Some('\"') => {
|
||||
if self.dialect.supports_triple_quoted_string() {
|
||||
return self
|
||||
.tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
|
||||
.tokenize_single_or_triple_quoted_string::<fn(String) -> BorrowedToken<'a>>(
|
||||
chars,
|
||||
'"',
|
||||
false,
|
||||
Token::DoubleQuotedByteStringLiteral,
|
||||
Token::TripleDoubleQuotedByteStringLiteral,
|
||||
BorrowedToken::DoubleQuotedByteStringLiteral,
|
||||
BorrowedToken::TripleDoubleQuotedByteStringLiteral,
|
||||
);
|
||||
}
|
||||
let s = self.tokenize_single_quoted_string(chars, '\"', false)?;
|
||||
|
|
@ -1193,8 +1227,9 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
_ => {
|
||||
// regular identifier starting with an "b" or "B"
|
||||
let s = self.tokenize_word(b.len_utf8(), chars)?;
|
||||
Ok(Some(Token::make_word(&s, None)))
|
||||
let first_char_byte_pos = chars.byte_pos.saturating_sub(b.len_utf8());
|
||||
let s = self.tokenize_word_borrowed(first_char_byte_pos, chars)?;
|
||||
Ok(Some(BorrowedToken::make_word_borrowed(s, None)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1203,25 +1238,26 @@ impl<'a> Tokenizer<'a> {
|
|||
chars.next(); // consume
|
||||
match chars.peek() {
|
||||
Some('\'') => self
|
||||
.tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
|
||||
.tokenize_single_or_triple_quoted_string::<fn(String) -> BorrowedToken<'a>>(
|
||||
chars,
|
||||
'\'',
|
||||
false,
|
||||
Token::SingleQuotedRawStringLiteral,
|
||||
Token::TripleSingleQuotedRawStringLiteral,
|
||||
BorrowedToken::SingleQuotedRawStringLiteral,
|
||||
BorrowedToken::TripleSingleQuotedRawStringLiteral,
|
||||
),
|
||||
Some('\"') => self
|
||||
.tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
|
||||
.tokenize_single_or_triple_quoted_string::<fn(String) -> BorrowedToken<'a>>(
|
||||
chars,
|
||||
'"',
|
||||
false,
|
||||
Token::DoubleQuotedRawStringLiteral,
|
||||
Token::TripleDoubleQuotedRawStringLiteral,
|
||||
BorrowedToken::DoubleQuotedRawStringLiteral,
|
||||
BorrowedToken::TripleDoubleQuotedRawStringLiteral,
|
||||
),
|
||||
_ => {
|
||||
// regular identifier starting with an "r" or "R"
|
||||
let s = self.tokenize_word(b.len_utf8(), chars)?;
|
||||
Ok(Some(Token::make_word(&s, None)))
|
||||
let first_char_byte_pos = chars.byte_pos.saturating_sub(b.len_utf8());
|
||||
let s = self.tokenize_word_borrowed(first_char_byte_pos, chars)?;
|
||||
Ok(Some(BorrowedToken::make_word_borrowed(s, None)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1239,8 +1275,9 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
_ => {
|
||||
// regular identifier starting with an "N"
|
||||
let s = self.tokenize_word(n.len_utf8(), chars)?;
|
||||
Ok(Some(Token::make_word(&s, None)))
|
||||
let first_char_byte_pos = chars.byte_pos.saturating_sub(n.len_utf8());
|
||||
let s = self.tokenize_word_borrowed(first_char_byte_pos, chars)?;
|
||||
Ok(Some(BorrowedToken::make_word_borrowed(s, None)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1256,8 +1293,9 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
_ => {
|
||||
// regular identifier starting with an "E" or "e"
|
||||
let s = self.tokenize_word(x.len_utf8(), chars)?;
|
||||
Ok(Some(Token::make_word(&s, None)))
|
||||
let first_char_byte_pos = chars.byte_pos.saturating_sub(x.len_utf8());
|
||||
let s = self.tokenize_word_borrowed(first_char_byte_pos, chars)?;
|
||||
Ok(Some(BorrowedToken::make_word_borrowed(s, None)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1275,8 +1313,9 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
}
|
||||
// regular identifier starting with an "U" or "u"
|
||||
let s = self.tokenize_word(x.len_utf8(), chars)?;
|
||||
Ok(Some(Token::make_word(&s, None)))
|
||||
let first_char_byte_pos = chars.byte_pos.saturating_sub(x.len_utf8());
|
||||
let s = self.tokenize_word_borrowed(first_char_byte_pos, chars)?;
|
||||
Ok(Some(BorrowedToken::make_word_borrowed(s, None)))
|
||||
}
|
||||
// The spec only allows an uppercase 'X' to introduce a hex
|
||||
// string, but PostgreSQL, at least, allows a lowercase 'x' too.
|
||||
|
|
@ -1290,8 +1329,9 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
_ => {
|
||||
// regular identifier starting with an "X"
|
||||
let s = self.tokenize_word(x.len_utf8(), chars)?;
|
||||
Ok(Some(Token::make_word(&s, None)))
|
||||
let first_char_byte_pos = chars.byte_pos.saturating_sub(x.len_utf8());
|
||||
let s = self.tokenize_word_borrowed(first_char_byte_pos, chars)?;
|
||||
Ok(Some(BorrowedToken::make_word_borrowed(s, None)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1299,21 +1339,21 @@ impl<'a> Tokenizer<'a> {
|
|||
'\'' => {
|
||||
if self.dialect.supports_triple_quoted_string() {
|
||||
return self
|
||||
.tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
|
||||
.tokenize_single_or_triple_quoted_string::<fn(String) -> BorrowedToken<'a>>(
|
||||
chars,
|
||||
'\'',
|
||||
self.dialect.supports_string_literal_backslash_escape(),
|
||||
Token::SingleQuotedString,
|
||||
Token::TripleSingleQuotedString,
|
||||
|s| BorrowedToken::SingleQuotedString(Cow::Owned(s)),
|
||||
BorrowedToken::TripleSingleQuotedString,
|
||||
);
|
||||
}
|
||||
let s = self.tokenize_single_quoted_string(
|
||||
let s = self.tokenize_single_quoted_string_borrowed(
|
||||
chars,
|
||||
'\'',
|
||||
self.dialect.supports_string_literal_backslash_escape(),
|
||||
)?;
|
||||
|
||||
Ok(Some(Token::SingleQuotedString(s)))
|
||||
Ok(Some(BorrowedToken::SingleQuotedString(s)))
|
||||
}
|
||||
// double quoted string
|
||||
'\"' if !self.dialect.is_delimited_identifier_start(ch)
|
||||
|
|
@ -1321,12 +1361,12 @@ impl<'a> Tokenizer<'a> {
|
|||
{
|
||||
if self.dialect.supports_triple_quoted_string() {
|
||||
return self
|
||||
.tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
|
||||
.tokenize_single_or_triple_quoted_string::<fn(String) -> BorrowedToken<'a>>(
|
||||
chars,
|
||||
'"',
|
||||
self.dialect.supports_string_literal_backslash_escape(),
|
||||
Token::DoubleQuotedString,
|
||||
Token::TripleDoubleQuotedString,
|
||||
BorrowedToken::DoubleQuotedString,
|
||||
BorrowedToken::TripleDoubleQuotedString,
|
||||
);
|
||||
}
|
||||
let s = self.tokenize_single_quoted_string(
|
||||
|
|
@ -1536,11 +1576,11 @@ impl<'a> Tokenizer<'a> {
|
|||
|
||||
if is_comment {
|
||||
chars.next(); // consume second '-'
|
||||
let comment = self.tokenize_single_line_comment(chars)?;
|
||||
return Ok(Some(Token::Whitespace(
|
||||
let comment = self.tokenize_single_line_comment_borrowed(chars)?;
|
||||
return Ok(Some(BorrowedToken::Whitespace(
|
||||
Whitespace::SingleLineComment {
|
||||
prefix: "--".to_owned(),
|
||||
comment,
|
||||
prefix: Cow::Borrowed("--"),
|
||||
comment: Cow::Borrowed(comment),
|
||||
},
|
||||
)));
|
||||
}
|
||||
|
|
@ -1567,11 +1607,13 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
Some('/') if dialect_of!(self is SnowflakeDialect) => {
|
||||
chars.next(); // consume the second '/', starting a snowflake single-line comment
|
||||
let comment = self.tokenize_single_line_comment(chars)?;
|
||||
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "//".to_owned(),
|
||||
comment,
|
||||
})))
|
||||
let comment = self.tokenize_single_line_comment_borrowed(chars)?;
|
||||
Ok(Some(BorrowedToken::Whitespace(
|
||||
Whitespace::SingleLineComment {
|
||||
prefix: Cow::Borrowed("//"),
|
||||
comment: Cow::Borrowed(comment),
|
||||
},
|
||||
)))
|
||||
}
|
||||
Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => {
|
||||
self.consume_and_return(chars, Token::DuckIntDiv)
|
||||
|
|
@ -1773,11 +1815,13 @@ impl<'a> Tokenizer<'a> {
|
|||
'#' if dialect_of!(self is SnowflakeDialect | BigQueryDialect | MySqlDialect | HiveDialect) =>
|
||||
{
|
||||
chars.next(); // consume the '#', starting a snowflake single-line comment
|
||||
let comment = self.tokenize_single_line_comment(chars)?;
|
||||
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "#".to_owned(),
|
||||
comment,
|
||||
})))
|
||||
let comment = self.tokenize_single_line_comment_borrowed(chars)?;
|
||||
Ok(Some(BorrowedToken::Whitespace(
|
||||
Whitespace::SingleLineComment {
|
||||
prefix: Cow::Borrowed("#"),
|
||||
comment: Cow::Borrowed(comment),
|
||||
},
|
||||
)))
|
||||
}
|
||||
'~' => {
|
||||
chars.next(); // consume
|
||||
|
|
@ -1923,10 +1967,10 @@ impl<'a> Tokenizer<'a> {
|
|||
/// Consume the next character, then parse a custom binary operator. The next character should be included in the prefix
|
||||
fn consume_for_binop(
|
||||
&self,
|
||||
chars: &mut State,
|
||||
chars: &mut State<'a>,
|
||||
prefix: &str,
|
||||
default: Token,
|
||||
) -> Result<Option<Token>, TokenizerError> {
|
||||
default: BorrowedToken<'a>,
|
||||
) -> Result<Option<BorrowedToken<'a>>, TokenizerError> {
|
||||
chars.next(); // consume the first char
|
||||
self.start_binop_opt(chars, prefix, Some(default))
|
||||
}
|
||||
|
|
@ -1934,20 +1978,20 @@ impl<'a> Tokenizer<'a> {
|
|||
/// parse a custom binary operator
|
||||
fn start_binop(
|
||||
&self,
|
||||
chars: &mut State,
|
||||
chars: &mut State<'a>,
|
||||
prefix: &str,
|
||||
default: Token,
|
||||
) -> Result<Option<Token>, TokenizerError> {
|
||||
default: BorrowedToken<'a>,
|
||||
) -> Result<Option<BorrowedToken<'a>>, TokenizerError> {
|
||||
self.start_binop_opt(chars, prefix, Some(default))
|
||||
}
|
||||
|
||||
/// parse a custom binary operator
|
||||
fn start_binop_opt(
|
||||
&self,
|
||||
chars: &mut State,
|
||||
chars: &mut State<'a>,
|
||||
prefix: &str,
|
||||
default: Option<Token>,
|
||||
) -> Result<Option<Token>, TokenizerError> {
|
||||
default: Option<BorrowedToken<'a>>,
|
||||
) -> Result<Option<BorrowedToken<'a>>, TokenizerError> {
|
||||
let mut custom = None;
|
||||
while let Some(&ch) = chars.peek() {
|
||||
if !self.dialect.is_custom_operator_part(ch) {
|
||||
|
|
@ -2132,16 +2176,6 @@ impl<'a> Tokenizer<'a> {
|
|||
})
|
||||
}
|
||||
|
||||
// Consume characters until newline
|
||||
fn tokenize_single_line_comment(
|
||||
&self,
|
||||
chars: &mut State<'a>,
|
||||
) -> Result<String, TokenizerError> {
|
||||
Ok(self
|
||||
.tokenize_single_line_comment_borrowed(chars)?
|
||||
.to_string())
|
||||
}
|
||||
|
||||
/// Tokenize a single-line comment, returning a borrowed slice.
|
||||
/// Returns a slice that includes the terminating newline character.
|
||||
fn tokenize_single_line_comment_borrowed(
|
||||
|
|
@ -2167,29 +2201,6 @@ impl<'a> Tokenizer<'a> {
|
|||
self.safe_slice(chars.source, start_pos, chars.byte_pos, error_loc)
|
||||
}
|
||||
|
||||
/// Tokenize an identifier or keyword, after the first char(s) have already been consumed.
|
||||
/// `consumed_byte_len` is the byte length of the consumed character(s).
|
||||
fn tokenize_word(
|
||||
&self,
|
||||
consumed_byte_len: usize,
|
||||
chars: &mut State<'a>,
|
||||
) -> Result<String, TokenizerError> {
|
||||
let error_loc = chars.location();
|
||||
|
||||
// Overflow check: ensure we can safely subtract
|
||||
if consumed_byte_len > chars.byte_pos {
|
||||
return self.tokenizer_error(error_loc, "Invalid byte position in tokenize_word");
|
||||
}
|
||||
|
||||
// Calculate where the first character started
|
||||
let first_char_byte_pos = chars.byte_pos - consumed_byte_len;
|
||||
|
||||
// Use the zero-copy version and convert to String
|
||||
Ok(self
|
||||
.tokenize_word_borrowed(first_char_byte_pos, chars)?
|
||||
.to_string())
|
||||
}
|
||||
|
||||
/// Tokenize an identifier or keyword, returning a borrowed slice when possible.
|
||||
/// The first character position must be provided (before it was consumed).
|
||||
/// Returns a slice with the same lifetime as the State's source.
|
||||
|
|
@ -2245,14 +2256,14 @@ impl<'a> Tokenizer<'a> {
|
|||
/// Examples: `'abc'`, `'''abc'''`, `"""abc"""`.
|
||||
fn tokenize_single_or_triple_quoted_string<F>(
|
||||
&self,
|
||||
chars: &mut State,
|
||||
chars: &mut State<'a>,
|
||||
quote_style: char,
|
||||
backslash_escape: bool,
|
||||
single_quote_token: F,
|
||||
triple_quote_token: F,
|
||||
) -> Result<Option<Token>, TokenizerError>
|
||||
) -> Result<Option<BorrowedToken<'a>>, TokenizerError>
|
||||
where
|
||||
F: Fn(String) -> Token,
|
||||
F: Fn(String) -> BorrowedToken<'a>,
|
||||
{
|
||||
let error_loc = chars.location();
|
||||
|
||||
|
|
@ -2316,6 +2327,79 @@ impl<'a> Tokenizer<'a> {
|
|||
)
|
||||
}
|
||||
|
||||
/// Reads a string literal quoted by a single quote character, returning Cow for zero-copy.
|
||||
/// Returns Cow::Borrowed when the string has no escape sequences or doubled quotes,
|
||||
/// Cow::Owned when processing is required.
|
||||
fn tokenize_single_quoted_string_borrowed(
|
||||
&self,
|
||||
chars: &mut State<'a>,
|
||||
quote_style: char,
|
||||
backslash_escape: bool,
|
||||
) -> Result<Cow<'a, str>, TokenizerError> {
|
||||
let start_byte_pos = chars.byte_pos;
|
||||
let error_loc = chars.location();
|
||||
|
||||
// Consume opening quote
|
||||
if chars.next() != Some(quote_style) {
|
||||
return self.tokenizer_error(error_loc, "Expected opening quote");
|
||||
}
|
||||
|
||||
let content_start = chars.byte_pos;
|
||||
let mut needs_processing = false;
|
||||
|
||||
// Scan the string to detect if processing is needed
|
||||
loop {
|
||||
match chars.peek() {
|
||||
None => {
|
||||
return self.tokenizer_error(error_loc, "Unterminated string literal");
|
||||
}
|
||||
Some(&ch) if ch == quote_style => {
|
||||
// Found a quote - check if it's doubled or the end
|
||||
let quote_pos = chars.byte_pos;
|
||||
chars.next(); // consume quote
|
||||
|
||||
if chars.peek() == Some("e_style) {
|
||||
// Doubled quote - needs processing
|
||||
needs_processing = true;
|
||||
chars.next(); // consume second quote
|
||||
} else {
|
||||
// End of string
|
||||
if needs_processing {
|
||||
// Reset and use the owned version
|
||||
chars.byte_pos = start_byte_pos;
|
||||
chars.line = error_loc.line;
|
||||
chars.col = error_loc.column;
|
||||
// Recreate peekable from current position
|
||||
let remaining = &chars.source[start_byte_pos..];
|
||||
chars.peekable = remaining.chars().peekable();
|
||||
|
||||
let s = self.tokenize_single_quoted_string(
|
||||
chars,
|
||||
quote_style,
|
||||
backslash_escape,
|
||||
)?;
|
||||
return Ok(Cow::Owned(s));
|
||||
} else {
|
||||
// Can use borrowed slice (excluding quotes)
|
||||
return Ok(Cow::Borrowed(&chars.source[content_start..quote_pos]));
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(&'\\') if backslash_escape => {
|
||||
// Escape sequence - needs processing
|
||||
needs_processing = true;
|
||||
chars.next(); // consume backslash
|
||||
if chars.next().is_none() {
|
||||
return self.tokenizer_error(error_loc, "Unterminated string literal");
|
||||
}
|
||||
}
|
||||
Some(_) => {
|
||||
chars.next(); // consume regular character
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Read a quoted string.
|
||||
fn tokenize_quoted_string(
|
||||
&self,
|
||||
|
|
@ -2426,11 +2510,11 @@ impl<'a> Tokenizer<'a> {
|
|||
fn tokenize_multiline_comment(
|
||||
&self,
|
||||
chars: &mut State<'a>,
|
||||
) -> Result<Option<Token>, TokenizerError> {
|
||||
) -> Result<Option<BorrowedToken<'a>>, TokenizerError> {
|
||||
let s = self.tokenize_multiline_comment_borrowed(chars)?;
|
||||
Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(
|
||||
s.to_string(),
|
||||
))))
|
||||
Ok(Some(BorrowedToken::Whitespace(
|
||||
Whitespace::MultiLineComment(Cow::Borrowed(s)),
|
||||
)))
|
||||
}
|
||||
|
||||
/// Tokenize a multi-line comment, returning a borrowed slice.
|
||||
|
|
@ -2541,9 +2625,9 @@ impl<'a> Tokenizer<'a> {
|
|||
#[allow(clippy::unnecessary_wraps)]
|
||||
fn consume_and_return(
|
||||
&self,
|
||||
chars: &mut State,
|
||||
t: Token,
|
||||
) -> Result<Option<Token>, TokenizerError> {
|
||||
chars: &mut State<'a>,
|
||||
t: BorrowedToken<'a>,
|
||||
) -> Result<Option<BorrowedToken<'a>>, TokenizerError> {
|
||||
chars.next();
|
||||
Ok(Some(t))
|
||||
}
|
||||
|
|
@ -3062,12 +3146,12 @@ mod tests {
|
|||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Word(Word {
|
||||
value: "foo".to_string(),
|
||||
value: "foo".to_string().into(),
|
||||
quote_style: None,
|
||||
keyword: Keyword::NoKeyword,
|
||||
}),
|
||||
Token::DoubleEq,
|
||||
Token::SingleQuotedString("1".to_string()),
|
||||
Token::SingleQuotedString("1".to_string().into()),
|
||||
];
|
||||
|
||||
compare(expected, tokens);
|
||||
|
|
@ -3169,11 +3253,11 @@ mod tests {
|
|||
let expected = vec![
|
||||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::SingleQuotedString(String::from("a")),
|
||||
Token::SingleQuotedString(String::from("a").into()),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::StringConcat,
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::SingleQuotedString(String::from("b")),
|
||||
Token::SingleQuotedString(String::from("b").into()),
|
||||
];
|
||||
|
||||
compare(expected, tokens);
|
||||
|
|
@ -3352,7 +3436,7 @@ mod tests {
|
|||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Neq,
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::SingleQuotedString(String::from("Not Provided")),
|
||||
Token::SingleQuotedString(String::from("Not Provided").into()),
|
||||
];
|
||||
|
||||
compare(expected, tokens);
|
||||
|
|
@ -3379,7 +3463,9 @@ mod tests {
|
|||
|
||||
let dialect = GenericDialect {};
|
||||
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||
let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())];
|
||||
let expected = vec![Token::SingleQuotedString(
|
||||
"foo\r\nbar\nbaz".to_string().into(),
|
||||
)];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
|
|
@ -3669,8 +3755,8 @@ mod tests {
|
|||
vec![
|
||||
Token::Number("0".to_string(), false),
|
||||
Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "--".to_string(),
|
||||
comment: "this is a comment\n".to_string(),
|
||||
prefix: "--".to_string().into(),
|
||||
comment: "this is a comment\n".to_string().into(),
|
||||
}),
|
||||
Token::Number("1".to_string(), false),
|
||||
],
|
||||
|
|
@ -3680,8 +3766,8 @@ mod tests {
|
|||
vec![
|
||||
Token::Number("0".to_string(), false),
|
||||
Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "--".to_string(),
|
||||
comment: "this is a comment\r1".to_string(),
|
||||
prefix: "--".to_string().into(),
|
||||
comment: "this is a comment\r1".to_string().into(),
|
||||
}),
|
||||
],
|
||||
),
|
||||
|
|
@ -3690,8 +3776,8 @@ mod tests {
|
|||
vec![
|
||||
Token::Number("0".to_string(), false),
|
||||
Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "--".to_string(),
|
||||
comment: "this is a comment\r\n".to_string(),
|
||||
prefix: "--".to_string().into(),
|
||||
comment: "this is a comment\r\n".to_string().into(),
|
||||
}),
|
||||
Token::Number("1".to_string(), false),
|
||||
],
|
||||
|
|
@ -3715,8 +3801,8 @@ mod tests {
|
|||
let expected = vec![
|
||||
Token::Number("1".to_string(), false),
|
||||
Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "--".to_string(),
|
||||
comment: "\r".to_string(),
|
||||
prefix: "--".to_string().into(),
|
||||
comment: "\r".to_string().into(),
|
||||
}),
|
||||
Token::Number("0".to_string(), false),
|
||||
];
|
||||
|
|
@ -3730,8 +3816,8 @@ mod tests {
|
|||
let dialect = GenericDialect {};
|
||||
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||
let expected = vec![Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "--".to_string(),
|
||||
comment: "this is a comment".to_string(),
|
||||
prefix: "--".to_string().into(),
|
||||
comment: "this is a comment".to_string().into(),
|
||||
})];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
|
@ -3745,7 +3831,7 @@ mod tests {
|
|||
let expected = vec![
|
||||
Token::Number("0".to_string(), false),
|
||||
Token::Whitespace(Whitespace::MultiLineComment(
|
||||
"multi-line\n* /comment".to_string(),
|
||||
"multi-line\n* /comment".to_string().into(),
|
||||
)),
|
||||
Token::Number("1".to_string(), false),
|
||||
];
|
||||
|
|
@ -3764,7 +3850,7 @@ mod tests {
|
|||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Div,
|
||||
Token::Word(Word {
|
||||
value: "comment".to_string(),
|
||||
value: "comment".to_string().into(),
|
||||
quote_style: None,
|
||||
keyword: Keyword::COMMENT,
|
||||
}),
|
||||
|
|
@ -3791,7 +3877,9 @@ mod tests {
|
|||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number("1".to_string(), false),
|
||||
Token::Whitespace(Whitespace::MultiLineComment(" a /* b */ c ".to_string())),
|
||||
Token::Whitespace(Whitespace::MultiLineComment(
|
||||
" a /* b */ c ".to_string().into(),
|
||||
)),
|
||||
Token::Number("0".to_string(), false),
|
||||
],
|
||||
);
|
||||
|
|
@ -3805,7 +3893,7 @@ mod tests {
|
|||
Token::make_keyword("select"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number("1".to_string(), false),
|
||||
Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string())),
|
||||
Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string().into())),
|
||||
Token::Number("0".to_string(), false),
|
||||
],
|
||||
);
|
||||
|
|
@ -3820,7 +3908,7 @@ mod tests {
|
|||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number("1".to_string(), false),
|
||||
Token::Whitespace(Whitespace::MultiLineComment(
|
||||
"/* nested comment ".to_string(),
|
||||
"/* nested comment ".to_string().into(),
|
||||
)),
|
||||
Token::Mul,
|
||||
Token::Div,
|
||||
|
|
@ -3837,7 +3925,9 @@ mod tests {
|
|||
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||
let expected = vec![
|
||||
Token::Whitespace(Whitespace::Newline),
|
||||
Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())),
|
||||
Token::Whitespace(Whitespace::MultiLineComment(
|
||||
"* Comment *".to_string().into(),
|
||||
)),
|
||||
Token::Whitespace(Whitespace::Newline),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
|
|
@ -4221,14 +4311,16 @@ mod tests {
|
|||
.with_unescape(false)
|
||||
.tokenize()
|
||||
.unwrap();
|
||||
let expected = vec![Token::SingleQuotedString(expected.to_string())];
|
||||
let expected = vec![Token::SingleQuotedString(expected.to_string().into())];
|
||||
compare(expected, tokens);
|
||||
|
||||
let tokens = Tokenizer::new(&dialect, sql)
|
||||
.with_unescape(true)
|
||||
.tokenize()
|
||||
.unwrap();
|
||||
let expected = vec![Token::SingleQuotedString(expected_unescaped.to_string())];
|
||||
let expected = vec![Token::SingleQuotedString(
|
||||
expected_unescaped.to_string().into(),
|
||||
)];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
|
|
@ -4245,7 +4337,7 @@ mod tests {
|
|||
let dialect = GenericDialect {};
|
||||
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||
|
||||
let expected = vec![Token::SingleQuotedString(expected.to_string())];
|
||||
let expected = vec![Token::SingleQuotedString(expected.to_string().into())];
|
||||
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
|
@ -4255,7 +4347,7 @@ mod tests {
|
|||
let dialect = MySqlDialect {};
|
||||
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||
|
||||
let expected = vec![Token::SingleQuotedString(expected.to_string())];
|
||||
let expected = vec![Token::SingleQuotedString(expected.to_string().into())];
|
||||
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
|
@ -4358,7 +4450,7 @@ mod tests {
|
|||
.unwrap();
|
||||
let expected = vec![
|
||||
Token::DoubleQuotedString("".to_string()),
|
||||
Token::SingleQuotedString("".to_string()),
|
||||
Token::SingleQuotedString("".to_string().into()),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
|
||||
|
|
@ -4368,7 +4460,7 @@ mod tests {
|
|||
.tokenize()
|
||||
.unwrap();
|
||||
let expected = vec![
|
||||
Token::SingleQuotedString("".to_string()),
|
||||
Token::SingleQuotedString("".to_string().into()),
|
||||
Token::DoubleQuotedString("".to_string()),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
|
|
@ -4377,7 +4469,7 @@ mod tests {
|
|||
let dialect = SnowflakeDialect {};
|
||||
let sql = r#"''''''"#;
|
||||
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||
let expected = vec![Token::SingleQuotedString("''".to_string())];
|
||||
let expected = vec![Token::SingleQuotedString("''".to_string().into())];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
|
|
@ -4409,7 +4501,7 @@ mod tests {
|
|||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::AtSign,
|
||||
Token::SingleQuotedString("1".to_string()),
|
||||
Token::SingleQuotedString("1".to_string().into()),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
|
@ -4467,7 +4559,7 @@ mod tests {
|
|||
Token::make_keyword("select"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::make_word("e", None),
|
||||
Token::SingleQuotedString("...".to_string()),
|
||||
Token::SingleQuotedString("...".to_string().into()),
|
||||
],
|
||||
);
|
||||
|
||||
|
|
@ -4477,7 +4569,7 @@ mod tests {
|
|||
Token::make_keyword("select"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::make_word("E", None),
|
||||
Token::SingleQuotedString("...".to_string()),
|
||||
Token::SingleQuotedString("...".to_string().into()),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
|
@ -4513,7 +4605,7 @@ mod tests {
|
|||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Minus,
|
||||
Token::Minus,
|
||||
Token::SingleQuotedString("abc".to_string()),
|
||||
Token::SingleQuotedString("abc".to_string().into()),
|
||||
],
|
||||
);
|
||||
|
||||
|
|
@ -4524,8 +4616,8 @@ mod tests {
|
|||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "--".to_string(),
|
||||
comment: " 'abc'".to_string(),
|
||||
prefix: "--".to_string().into(),
|
||||
comment: " 'abc'".to_string().into(),
|
||||
}),
|
||||
],
|
||||
);
|
||||
|
|
@ -4551,8 +4643,8 @@ mod tests {
|
|||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "--".to_string(),
|
||||
comment: "'abc'".to_string(),
|
||||
prefix: "--".to_string().into(),
|
||||
comment: "'abc'".to_string().into(),
|
||||
}),
|
||||
],
|
||||
);
|
||||
|
|
@ -4564,8 +4656,8 @@ mod tests {
|
|||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "--".to_string(),
|
||||
comment: " 'abc'".to_string(),
|
||||
prefix: "--".to_string().into(),
|
||||
comment: " 'abc'".to_string().into(),
|
||||
}),
|
||||
],
|
||||
);
|
||||
|
|
@ -4577,8 +4669,8 @@ mod tests {
|
|||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "--".to_string(),
|
||||
comment: "".to_string(),
|
||||
prefix: "--".to_string().into(),
|
||||
comment: "".to_string().into(),
|
||||
}),
|
||||
],
|
||||
);
|
||||
|
|
@ -4622,13 +4714,13 @@ mod tests {
|
|||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Word(Word {
|
||||
value: "table".to_string(),
|
||||
value: "table".to_string().into(),
|
||||
quote_style: None,
|
||||
keyword: Keyword::TABLE,
|
||||
}),
|
||||
Token::Period,
|
||||
Token::Word(Word {
|
||||
value: "_col".to_string(),
|
||||
value: "_col".to_string().into(),
|
||||
quote_style: None,
|
||||
keyword: Keyword::NoKeyword,
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -2629,7 +2629,7 @@ fn test_export_data() {
|
|||
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||
select_token: AttachedToken(TokenWithSpan::new(
|
||||
Token::Word(Word {
|
||||
value: "SELECT".to_string(),
|
||||
value: "SELECT".to_string().into(),
|
||||
quote_style: None,
|
||||
keyword: Keyword::SELECT,
|
||||
}),
|
||||
|
|
@ -2733,7 +2733,7 @@ fn test_export_data() {
|
|||
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||
select_token: AttachedToken(TokenWithSpan::new(
|
||||
Token::Word(Word {
|
||||
value: "SELECT".to_string(),
|
||||
value: "SELECT".to_string().into(),
|
||||
quote_style: None,
|
||||
keyword: Keyword::SELECT,
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -1581,7 +1581,7 @@ fn test_mssql_while_statement() {
|
|||
while_block: ConditionalStatementBlock {
|
||||
start_token: AttachedToken(TokenWithSpan {
|
||||
token: Token::Word(Word {
|
||||
value: "WHILE".to_string(),
|
||||
value: "WHILE".to_string().into(),
|
||||
quote_style: None,
|
||||
keyword: Keyword::WHILE
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -566,8 +566,8 @@ fn test_snowflake_single_line_tokenize() {
|
|||
Token::Whitespace(Whitespace::Space),
|
||||
Token::make_keyword("TABLE"),
|
||||
Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "#".to_string(),
|
||||
comment: " this is a comment \n".to_string(),
|
||||
prefix: "#".to_string().into(),
|
||||
comment: " this is a comment \n".to_string().into(),
|
||||
}),
|
||||
Token::make_word("table_1", None),
|
||||
];
|
||||
|
|
@ -583,8 +583,8 @@ fn test_snowflake_single_line_tokenize() {
|
|||
Token::make_keyword("TABLE"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Whitespace(Whitespace::SingleLineComment {
|
||||
prefix: "//".to_string(),
|
||||
comment: " this is a comment \n".to_string(),
|
||||
prefix: "//".to_string().into(),
|
||||
comment: " this is a comment \n".to_string().into(),
|
||||
}),
|
||||
Token::make_word("table_1", None),
|
||||
];
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue