Skip to content

[SPARK-22800][TEST][SQL] Add a SSB query suite #19990

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions sql/core/src/test/resources/ssb/1.1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
select sum(lo_extendedprice*lo_discount) as revenue
from lineorder, date
where lo_orderdate = d_datekey
and d_year = 1993
and lo_discount between 1 and 3
and lo_quantity < 25
6 changes: 6 additions & 0 deletions sql/core/src/test/resources/ssb/1.2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
select sum(lo_extendedprice*lo_discount) as revenue
from lineorder, date
where lo_orderdate = d_datekey
and d_yearmonthnum = 199401
and lo_discount between 4 and 6
and lo_quantity between 26 and 35
6 changes: 6 additions & 0 deletions sql/core/src/test/resources/ssb/1.3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
select sum(lo_extendedprice*lo_discount) as revenue
from lineorder, date
where lo_orderdate = d_datekey
and d_weeknuminyear = 6 and d_year = 1994
and lo_discount between 5 and 7
and lo_quantity between 36 and 40
9 changes: 9 additions & 0 deletions sql/core/src/test/resources/ssb/2.1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
select sum(lo_revenue), d_year, p_brand1
from lineorder, date, part, supplier
where lo_orderdate = d_datekey
and lo_partkey = p_partkey
and lo_suppkey = s_suppkey
and p_category = 'MFGR#12'
and s_region = 'AMERICA'
group by d_year, p_brand1
order by d_year, p_brand1
9 changes: 9 additions & 0 deletions sql/core/src/test/resources/ssb/2.2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
select sum(lo_revenue), d_year, p_brand1
from lineorder, date, part, supplier
where lo_orderdate = d_datekey
and lo_partkey = p_partkey
and lo_suppkey = s_suppkey
and p_brand1 between 'MFGR#2221' and 'MFGR#2228'
and s_region = 'ASIA'
group by d_year, p_brand1
order by d_year, p_brand1
9 changes: 9 additions & 0 deletions sql/core/src/test/resources/ssb/2.3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
select sum(lo_revenue), d_year, p_brand1
from lineorder, date, part, supplier
where lo_orderdate = d_datekey
and lo_partkey = p_partkey
and lo_suppkey = s_suppkey
and p_brand1 = 'MFGR#2221'
and s_region = 'EUROPE'
group by d_year, p_brand1
order by d_year, p_brand1
10 changes: 10 additions & 0 deletions sql/core/src/test/resources/ssb/3.1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
select c_nation, s_nation, d_year, sum(lo_revenue) as revenue
from customer, lineorder, supplier, date
where lo_custkey = c_custkey
and lo_suppkey = s_suppkey
and lo_orderdate = d_datekey
and c_region = 'ASIA'
and s_region = 'ASIA'
and d_year >= 1992 and d_year <= 1997
group by c_nation, s_nation, d_year
order by d_year asc, revenue desc
10 changes: 10 additions & 0 deletions sql/core/src/test/resources/ssb/3.2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
select c_city, s_city, d_year, sum(lo_revenue) as revenue
from customer, lineorder, supplier, date
where lo_custkey = c_custkey
and lo_suppkey = s_suppkey
and lo_orderdate = d_datekey
and c_nation = 'UNITED STATES'
and s_nation = 'UNITED STATES'
and d_year >= 1992 and d_year <= 1997
group by c_city, s_city, d_year
order by d_year asc, revenue desc
12 changes: 12 additions & 0 deletions sql/core/src/test/resources/ssb/3.3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
select c_city, s_city, d_year, sum(lo_revenue) as revenue
from customer, lineorder, supplier, date
where lo_custkey = c_custkey
and lo_suppkey = s_suppkey
and lo_orderdate = d_datekey
and c_nation = 'UNITED KINGDOM'
and (c_city='UNITED KI1' or c_city='UNITED KI5')
and (s_city='UNITED KI1' or s_city='UNITED KI5')
and s_nation = 'UNITED KINGDOM'
and d_year >= 1992 and d_year <= 1997
group by c_city, s_city, d_year
order by d_year asc, revenue desc
12 changes: 12 additions & 0 deletions sql/core/src/test/resources/ssb/3.4.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
select c_city, s_city, d_year, sum(lo_revenue) as revenue
from customer, lineorder, supplier, date
where lo_custkey = c_custkey
and lo_suppkey = s_suppkey
and lo_orderdate = d_datekey
and c_nation = 'UNITED KINGDOM'
and (c_city='UNITED KI1' or c_city='UNITED KI5')
and (s_city='UNITED KI1' or s_city='UNITED KI5')
and s_nation = 'UNITED KINGDOM'
and d_yearmonth = 'Dec1997'
group by c_city, s_city, d_year
order by d_year asc, revenue desc
11 changes: 11 additions & 0 deletions sql/core/src/test/resources/ssb/4.1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
select d_year, c_nation, sum(lo_revenue-lo_supplycost) as profit1
from date, customer, supplier, part, lineorder
where lo_custkey = c_custkey
and lo_suppkey = s_suppkey
and lo_partkey = p_partkey
and lo_orderdate = d_datekey
and c_region = 'AMERICA'
and s_region = 'AMERICA'
and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
group by d_year, c_nation
order by d_year, c_nation
12 changes: 12 additions & 0 deletions sql/core/src/test/resources/ssb/4.2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
select d_year, s_nation, p_category, sum(lo_revenue-lo_supplycost) as profit1
from date, customer, supplier, part, lineorder
where lo_custkey = c_custkey
and lo_suppkey = s_suppkey
and lo_partkey = p_partkey
and lo_orderdate = d_datekey
and c_region = 'AMERICA'
and s_region = 'AMERICA'
and (d_year = 1997 or d_year = 1998)
and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
group by d_year, s_nation, p_category
order by d_year, s_nation, p_category
12 changes: 12 additions & 0 deletions sql/core/src/test/resources/ssb/4.3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
select d_year, s_city, p_brand1, sum(lo_revenue-lo_supplycost) as profit1
from date, customer, supplier, part, lineorder
where lo_custkey = c_custkey
and lo_suppkey = s_suppkey
and lo_partkey = p_partkey
and lo_orderdate = d_datekey
and c_region = 'AMERICA'
and s_nation = 'UNITED STATES'
and (d_year = 1997 or d_year = 1998)
and p_category = 'MFGR#14'
group by d_year, s_city, p_brand1
order by d_year, s_city, p_brand1
87 changes: 87 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql

import org.apache.spark.sql.catalyst.util.resourceToString

/**
* This test suite ensures all the Star Schema Benchmark queries can be successfully analyzed,
* optimized and compiled without hitting the max iteration threshold.
*/
class SSBQuerySuite extends BenchmarkQueryTest {

override def beforeAll {
super.beforeAll

sql(
"""
|CREATE TABLE `part` (`p_partkey` INT, `p_name` STRING, `p_mfgr` STRING,
|`p_category` STRING, `p_brand1` STRING, `p_color` STRING, `p_type` STRING, `p_size` INT,
|`p_container` STRING)
|USING parquet
""".stripMargin)

sql(
"""
|CREATE TABLE `supplier` (`s_suppkey` INT, `s_name` STRING, `s_address` STRING,
|`s_city` STRING, `s_nation` STRING, `s_region` STRING, `s_phone` STRING)
|USING parquet
""".stripMargin)

sql(
"""
|CREATE TABLE `customer` (`c_custkey` INT, `c_name` STRING, `c_address` STRING,
|`c_city` STRING, `c_nation` STRING, `c_region` STRING, `c_phone` STRING,
|`c_mktsegment` STRING)
|USING parquet
""".stripMargin)

sql(
"""
|CREATE TABLE `date` (`d_datekey` INT, `d_date` STRING, `d_dayofweek` STRING,
|`d_month` STRING, `d_year` INT, `d_yearmonthnum` INT, `d_yearmonth` STRING,
|`d_daynuminweek` INT, `d_daynuminmonth` INT, `d_daynuminyear` INT, `d_monthnuminyear` INT,
|`d_weeknuminyear` INT, `d_sellingseason` STRING, `d_lastdayinweekfl` STRING,
|`d_lastdayinmonthfl` STRING, `d_holidayfl` STRING, `d_weekdayfl` STRING)
|USING parquet
""".stripMargin)

sql(
"""
|CREATE TABLE `lineorder` (`lo_orderkey` INT, `lo_linenumber` INT, `lo_custkey` INT,
|`lo_partkey` INT, `lo_suppkey` INT, `lo_orderdate` INT, `lo_orderpriority` STRING,
|`lo_shippriority` STRING, `lo_quantity` INT, `lo_extendedprice` INT,
|`lo_ordertotalprice` INT, `lo_discount` INT, `lo_revenue` INT, `lo_supplycost` INT,
|`lo_tax` INT, `lo_commitdate` INT, `lo_shipmode` STRING)
|USING parquet
""".stripMargin)
}

val ssbQueries = Seq(
"1.1", "1.2", "1.3", "2.1", "2.2", "2.3", "3.1", "3.2", "3.3", "3.4", "4.1", "4.2", "4.3")

ssbQueries.foreach { name =>
val queryString = resourceToString(s"ssb/$name.sql",
classLoader = Thread.currentThread.getContextClassLoader)
test(name) {
// check the plans can be properly generated
val plan = sql(queryString).queryExecution.executedPlan
checkGeneratedCode(plan)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@

package org.apache.spark.sql

import org.apache.spark.sql.catalyst.rules.RuleExecutor
import org.apache.spark.sql.catalyst.util.resourceToString
import org.apache.spark.util.Utils

/**
* This test suite ensures all the TPC-H queries can be successfully analyzed, optimized
Expand Down