From 5cab18963e5d6eda5bc61c13b22991000b82b8c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Tue, 14 Jul 2020 21:48:07 +0200 Subject: [PATCH] Add TPCH reggression tests (#221) * Add TPC-H reggression tests --- tests/queries/tpch/1.sql | 21 +++++++++++++++ tests/queries/tpch/10.sql | 34 +++++++++++++++++++++++++ tests/queries/tpch/11.sql | 30 ++++++++++++++++++++++ tests/queries/tpch/12.sql | 31 ++++++++++++++++++++++ tests/queries/tpch/13.sql | 23 +++++++++++++++++ tests/queries/tpch/14.sql | 16 ++++++++++++ tests/queries/tpch/15.sql | 36 ++++++++++++++++++++++++++ tests/queries/tpch/16.sql | 33 ++++++++++++++++++++++++ tests/queries/tpch/17.sql | 20 +++++++++++++++ tests/queries/tpch/18.sql | 35 +++++++++++++++++++++++++ tests/queries/tpch/19.sql | 38 +++++++++++++++++++++++++++ tests/queries/tpch/2.sql | 46 +++++++++++++++++++++++++++++++++ tests/queries/tpch/20.sql | 40 +++++++++++++++++++++++++++++ tests/queries/tpch/21.sql | 42 ++++++++++++++++++++++++++++++ tests/queries/tpch/22.sql | 40 +++++++++++++++++++++++++++++ tests/queries/tpch/3.sql | 25 ++++++++++++++++++ tests/queries/tpch/4.sql | 24 ++++++++++++++++++ tests/queries/tpch/5.sql | 27 ++++++++++++++++++++ tests/queries/tpch/6.sql | 12 +++++++++ tests/queries/tpch/7.sql | 42 ++++++++++++++++++++++++++++++ tests/queries/tpch/8.sql | 40 +++++++++++++++++++++++++++++ tests/queries/tpch/9.sql | 35 +++++++++++++++++++++++++ tests/sqlparser_regression.rs | 48 +++++++++++++++++++++++++++++++++++ 23 files changed, 738 insertions(+) create mode 100644 tests/queries/tpch/1.sql create mode 100644 tests/queries/tpch/10.sql create mode 100644 tests/queries/tpch/11.sql create mode 100644 tests/queries/tpch/12.sql create mode 100644 tests/queries/tpch/13.sql create mode 100644 tests/queries/tpch/14.sql create mode 100644 tests/queries/tpch/15.sql create mode 100644 tests/queries/tpch/16.sql create mode 100644 tests/queries/tpch/17.sql create mode 100644 tests/queries/tpch/18.sql create mode 100644 tests/queries/tpch/19.sql create mode 100644 tests/queries/tpch/2.sql create mode 100644 tests/queries/tpch/20.sql create mode 100644 tests/queries/tpch/21.sql create mode 100644 tests/queries/tpch/22.sql create mode 100644 tests/queries/tpch/3.sql create mode 100644 tests/queries/tpch/4.sql create mode 100644 tests/queries/tpch/5.sql create mode 100644 tests/queries/tpch/6.sql create mode 100644 tests/queries/tpch/7.sql create mode 100644 tests/queries/tpch/8.sql create mode 100644 tests/queries/tpch/9.sql create mode 100644 tests/sqlparser_regression.rs diff --git a/tests/queries/tpch/1.sql b/tests/queries/tpch/1.sql new file mode 100644 index 00000000..ae44c94d --- /dev/null +++ b/tests/queries/tpch/1.sql @@ -0,0 +1,21 @@ +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from + lineitem +where + l_shipdate <= date '1998-12-01' - interval '90' day (3) +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus; diff --git a/tests/queries/tpch/10.sql b/tests/queries/tpch/10.sql new file mode 100644 index 00000000..a8de1299 --- /dev/null +++ b/tests/queries/tpch/10.sql @@ -0,0 +1,34 @@ +-- using default substitutions + + +select + c_custkey, + c_name, + sum(l_extendedprice * (1 - l_discount)) as revenue, + c_acctbal, + n_name, + c_address, + c_phone, + c_comment +from + customer, + orders, + lineitem, + nation +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate >= date '1993-10-01' + and o_orderdate < date '1993-10-01' + interval '3' month + and l_returnflag = 'R' + and c_nationkey = n_nationkey +group by + c_custkey, + c_name, + c_acctbal, + c_phone, + n_name, + c_address, + c_comment +order by + revenue desc; diff --git a/tests/queries/tpch/11.sql b/tests/queries/tpch/11.sql new file mode 100644 index 00000000..f9cf254b --- /dev/null +++ b/tests/queries/tpch/11.sql @@ -0,0 +1,30 @@ +-- using default substitutions + + +select + ps_partkey, + sum(ps_supplycost * ps_availqty) as value +from + partsupp, + supplier, + nation +where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'GERMANY' +group by + ps_partkey having + sum(ps_supplycost * ps_availqty) > ( + select + sum(ps_supplycost * ps_availqty) * 0.0001000000 + from + partsupp, + supplier, + nation + where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'GERMANY' + ) +order by + value desc; diff --git a/tests/queries/tpch/12.sql b/tests/queries/tpch/12.sql new file mode 100644 index 00000000..ca9c494e --- /dev/null +++ b/tests/queries/tpch/12.sql @@ -0,0 +1,31 @@ +-- using default substitutions + + +select + l_shipmode, + sum(case + when o_orderpriority = '1-URGENT' + or o_orderpriority = '2-HIGH' + then 1 + else 0 + end) as high_line_count, + sum(case + when o_orderpriority <> '1-URGENT' + and o_orderpriority <> '2-HIGH' + then 1 + else 0 + end) as low_line_count +from + orders, + lineitem +where + o_orderkey = l_orderkey + and l_shipmode in ('MAIL', 'SHIP') + and l_commitdate < l_receiptdate + and l_shipdate < l_commitdate + and l_receiptdate >= date '1994-01-01' + and l_receiptdate < date '1994-01-01' + interval '1' year +group by + l_shipmode +order by + l_shipmode; diff --git a/tests/queries/tpch/13.sql b/tests/queries/tpch/13.sql new file mode 100644 index 00000000..32b0ebeb --- /dev/null +++ b/tests/queries/tpch/13.sql @@ -0,0 +1,23 @@ +-- using default substitutions + + +select + c_count, + count(*) as custdist +from + ( + select + c_custkey, + count(o_orderkey) + from + customer left outer join orders on + c_custkey = o_custkey + and o_comment not like '%special%requests%' + group by + c_custkey + ) as c_orders (c_custkey, c_count) +group by + c_count +order by + custdist desc, + c_count desc; diff --git a/tests/queries/tpch/14.sql b/tests/queries/tpch/14.sql new file mode 100644 index 00000000..74f9643e --- /dev/null +++ b/tests/queries/tpch/14.sql @@ -0,0 +1,16 @@ +-- using default substitutions + + +select + 100.00 * sum(case + when p_type like 'PROMO%' + then l_extendedprice * (1 - l_discount) + else 0 + end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue +from + lineitem, + part +where + l_partkey = p_partkey + and l_shipdate >= date '1995-09-01' + and l_shipdate < date '1995-09-01' + interval '1' month; diff --git a/tests/queries/tpch/15.sql b/tests/queries/tpch/15.sql new file mode 100644 index 00000000..8b3b8c1e --- /dev/null +++ b/tests/queries/tpch/15.sql @@ -0,0 +1,36 @@ +-- using default substitutions + +create view revenue0 (supplier_no, total_revenue) as + select + l_suppkey, + sum(l_extendedprice * (1 - l_discount)) + from + lineitem + where + l_shipdate >= date '1996-01-01' + and l_shipdate < date '1996-01-01' + interval '3' month + group by + l_suppkey; + + +select + s_suppkey, + s_name, + s_address, + s_phone, + total_revenue +from + supplier, + revenue0 +where + s_suppkey = supplier_no + and total_revenue = ( + select + max(total_revenue) + from + revenue0 + ) +order by + s_suppkey; + +drop view revenue0; diff --git a/tests/queries/tpch/16.sql b/tests/queries/tpch/16.sql new file mode 100644 index 00000000..a0412fcb --- /dev/null +++ b/tests/queries/tpch/16.sql @@ -0,0 +1,33 @@ +-- using default substitutions + + +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp, + part +where + p_partkey = ps_partkey + and p_brand <> 'Brand#45' + and p_type not like 'MEDIUM POLISHED%' + and p_size in (49, 14, 23, 45, 19, 3, 36, 9) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size; diff --git a/tests/queries/tpch/17.sql b/tests/queries/tpch/17.sql new file mode 100644 index 00000000..d59bc18a --- /dev/null +++ b/tests/queries/tpch/17.sql @@ -0,0 +1,20 @@ +-- using default substitutions + + +select + sum(l_extendedprice) / 7.0 as avg_yearly +from + lineitem, + part +where + p_partkey = l_partkey + and p_brand = 'Brand#23' + and p_container = 'MED BOX' + and l_quantity < ( + select + 0.2 * avg(l_quantity) + from + lineitem + where + l_partkey = p_partkey + ); diff --git a/tests/queries/tpch/18.sql b/tests/queries/tpch/18.sql new file mode 100644 index 00000000..e07956fe --- /dev/null +++ b/tests/queries/tpch/18.sql @@ -0,0 +1,35 @@ +-- using default substitutions + + +select + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice, + sum(l_quantity) +from + customer, + orders, + lineitem +where + o_orderkey in ( + select + l_orderkey + from + lineitem + group by + l_orderkey having + sum(l_quantity) > 300 + ) + and c_custkey = o_custkey + and o_orderkey = l_orderkey +group by + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice +order by + o_totalprice desc, + o_orderdate; diff --git a/tests/queries/tpch/19.sql b/tests/queries/tpch/19.sql new file mode 100644 index 00000000..908e0829 --- /dev/null +++ b/tests/queries/tpch/19.sql @@ -0,0 +1,38 @@ +-- using default substitutions + + +select + sum(l_extendedprice* (1 - l_discount)) as revenue +from + lineitem, + part +where + ( + p_partkey = l_partkey + and p_brand = 'Brand#12' + and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + and l_quantity >= 1 and l_quantity <= 1 + 10 + and p_size between 1 and 5 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#23' + and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + and l_quantity >= 10 and l_quantity <= 10 + 10 + and p_size between 1 and 10 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#34' + and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + and l_quantity >= 20 and l_quantity <= 20 + 10 + and p_size between 1 and 15 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ); diff --git a/tests/queries/tpch/2.sql b/tests/queries/tpch/2.sql new file mode 100644 index 00000000..f04c1d49 --- /dev/null +++ b/tests/queries/tpch/2.sql @@ -0,0 +1,46 @@ +-- using default substitutions + + +select + s_acctbal, + s_name, + n_name, + p_partkey, + p_mfgr, + s_address, + s_phone, + s_comment +from + part, + supplier, + partsupp, + nation, + region +where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = 15 + and p_type like '%BRASS' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'EUROPE' + and ps_supplycost = ( + select + min(ps_supplycost) + from + partsupp, + supplier, + nation, + region + where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'EUROPE' + ) +order by + s_acctbal desc, + n_name, + s_name, + p_partkey; diff --git a/tests/queries/tpch/20.sql b/tests/queries/tpch/20.sql new file mode 100644 index 00000000..7aaabc2d --- /dev/null +++ b/tests/queries/tpch/20.sql @@ -0,0 +1,40 @@ +-- using default substitutions + + +select + s_name, + s_address +from + supplier, + nation +where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like 'forest%' + ) + and ps_availqty > ( + select + 0.5 * sum(l_quantity) + from + lineitem + where + l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' year + ) + ) + and s_nationkey = n_nationkey + and n_name = 'CANADA' +order by + s_name; diff --git a/tests/queries/tpch/21.sql b/tests/queries/tpch/21.sql new file mode 100644 index 00000000..5a287f9a --- /dev/null +++ b/tests/queries/tpch/21.sql @@ -0,0 +1,42 @@ +-- using default substitutions + + +select + s_name, + count(*) as numwait +from + supplier, + lineitem l1, + orders, + nation +where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.l_receiptdate > l1.l_commitdate + and exists ( + select + * + from + lineitem l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select + * + from + lineitem l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + and l3.l_receiptdate > l3.l_commitdate + ) + and s_nationkey = n_nationkey + and n_name = 'SAUDI ARABIA' +group by + s_name +order by + numwait desc, + s_name; diff --git a/tests/queries/tpch/22.sql b/tests/queries/tpch/22.sql new file mode 100644 index 00000000..1fc8523a --- /dev/null +++ b/tests/queries/tpch/22.sql @@ -0,0 +1,40 @@ +-- using default substitutions + + +select + cntrycode, + count(*) as numcust, + sum(c_acctbal) as totacctbal +from + ( + select + substring(c_phone from 1 for 2) as cntrycode, + c_acctbal + from + customer + where + substring(c_phone from 1 for 2) in + ('13', '31', '23', '29', '30', '18', '17') + and c_acctbal > ( + select + avg(c_acctbal) + from + customer + where + c_acctbal > 0.00 + and substring(c_phone from 1 for 2) in + ('13', '31', '23', '29', '30', '18', '17') + ) + and not exists ( + select + * + from + orders + where + o_custkey = c_custkey + ) + ) as custsale +group by + cntrycode +order by + cntrycode; diff --git a/tests/queries/tpch/3.sql b/tests/queries/tpch/3.sql new file mode 100644 index 00000000..710aac52 --- /dev/null +++ b/tests/queries/tpch/3.sql @@ -0,0 +1,25 @@ +-- using default substitutions + + +select + l_orderkey, + sum(l_extendedprice * (1 - l_discount)) as revenue, + o_orderdate, + o_shippriority +from + customer, + orders, + lineitem +where + c_mktsegment = 'BUILDING' + and c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate < date '1995-03-15' + and l_shipdate > date '1995-03-15' +group by + l_orderkey, + o_orderdate, + o_shippriority +order by + revenue desc, + o_orderdate; diff --git a/tests/queries/tpch/4.sql b/tests/queries/tpch/4.sql new file mode 100644 index 00000000..e5adb934 --- /dev/null +++ b/tests/queries/tpch/4.sql @@ -0,0 +1,24 @@ +-- using default substitutions + + +select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority; diff --git a/tests/queries/tpch/5.sql b/tests/queries/tpch/5.sql new file mode 100644 index 00000000..ea376576 --- /dev/null +++ b/tests/queries/tpch/5.sql @@ -0,0 +1,27 @@ +-- using default substitutions + + +select + n_name, + sum(l_extendedprice * (1 - l_discount)) as revenue +from + customer, + orders, + lineitem, + supplier, + nation, + region +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + and o_orderdate >= date '1994-01-01' + and o_orderdate < date '1994-01-01' + interval '1' year +group by + n_name +order by + revenue desc; diff --git a/tests/queries/tpch/6.sql b/tests/queries/tpch/6.sql new file mode 100644 index 00000000..949b7b16 --- /dev/null +++ b/tests/queries/tpch/6.sql @@ -0,0 +1,12 @@ +-- using default substitutions + + +select + sum(l_extendedprice * l_discount) as revenue +from + lineitem +where + l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' year + and l_discount between .06 - 0.01 and .06 + 0.01 + and l_quantity < 24; diff --git a/tests/queries/tpch/7.sql b/tests/queries/tpch/7.sql new file mode 100644 index 00000000..85dd8f9c --- /dev/null +++ b/tests/queries/tpch/7.sql @@ -0,0 +1,42 @@ +-- using default substitutions + + +select + supp_nation, + cust_nation, + l_year, + sum(volume) as revenue +from + ( + select + n1.n_name as supp_nation, + n2.n_name as cust_nation, + extract(year from l_shipdate) as l_year, + l_extendedprice * (1 - l_discount) as volume + from + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2 + where + s_suppkey = l_suppkey + and o_orderkey = l_orderkey + and c_custkey = o_custkey + and s_nationkey = n1.n_nationkey + and c_nationkey = n2.n_nationkey + and ( + (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') + or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE') + ) + and l_shipdate between date '1995-01-01' and date '1996-12-31' + ) as shipping +group by + supp_nation, + cust_nation, + l_year +order by + supp_nation, + cust_nation, + l_year; diff --git a/tests/queries/tpch/8.sql b/tests/queries/tpch/8.sql new file mode 100644 index 00000000..e6e4d30b --- /dev/null +++ b/tests/queries/tpch/8.sql @@ -0,0 +1,40 @@ +-- using default substitutions + + +select + o_year, + sum(case + when nation = 'BRAZIL' then volume + else 0 + end) / sum(volume) as mkt_share +from + ( + select + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) as volume, + n2.n_name as nation + from + part, + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2, + region + where + p_partkey = l_partkey + and s_suppkey = l_suppkey + and l_orderkey = o_orderkey + and o_custkey = c_custkey + and c_nationkey = n1.n_nationkey + and n1.n_regionkey = r_regionkey + and r_name = 'AMERICA' + and s_nationkey = n2.n_nationkey + and o_orderdate between date '1995-01-01' and date '1996-12-31' + and p_type = 'ECONOMY ANODIZED STEEL' + ) as all_nations +group by + o_year +order by + o_year; diff --git a/tests/queries/tpch/9.sql b/tests/queries/tpch/9.sql new file mode 100644 index 00000000..f9eaf65e --- /dev/null +++ b/tests/queries/tpch/9.sql @@ -0,0 +1,35 @@ +-- using default substitutions + + +select + nation, + o_year, + sum(amount) as sum_profit +from + ( + select + n_name as nation, + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount + from + part, + supplier, + lineitem, + partsupp, + orders, + nation + where + s_suppkey = l_suppkey + and ps_suppkey = l_suppkey + and ps_partkey = l_partkey + and p_partkey = l_partkey + and o_orderkey = l_orderkey + and s_nationkey = n_nationkey + and p_name like '%green%' + ) as profit +group by + nation, + o_year +order by + nation, + o_year desc; diff --git a/tests/sqlparser_regression.rs b/tests/sqlparser_regression.rs new file mode 100644 index 00000000..26a5e7d4 --- /dev/null +++ b/tests/sqlparser_regression.rs @@ -0,0 +1,48 @@ +use sqlparser::dialect::GenericDialect; +use sqlparser::parser::Parser; + +macro_rules! tpch_tests { + ($($name:ident: $value:expr,)*) => { + const QUERIES: &[&str] = &[ + $(include_str!(concat!("queries/tpch/", $value, ".sql"))),* + ]; + $( + + #[test] + fn $name() { + let dialect = GenericDialect {}; + + let res = Parser::parse_sql(&dialect, QUERIES[$value -1]); + // Ignore 6.sql and 22.sql + if $value != 6 && $value != 22 { + assert!(res.is_ok()); + } + } + )* + } +} + +tpch_tests! { + tpch_1: 1, + tpch_2: 2, + tpch_3: 3, + tpch_4: 4, + tpch_5: 5, + tpch_6: 6, + tpch_7: 7, + tpch_8: 8, + tpch_9: 9, + tpch_10: 10, + tpch_11: 11, + tpch_12: 12, + tpch_13: 13, + tpch_14: 14, + tpch_15: 15, + tpch_16: 16, + tpch_17: 17, + tpch_18: 18, + tpch_19: 19, + tpch_20: 20, + tpch_21: 21, + tpch_22: 22, +}