Skip to content

Commit 4f2630e

Browse files
committed
[SPARK-40789][PYTHON][TESTS] Separate tests under pyspark.sql.tests
### What changes were proposed in this pull request? This PR proposes to split the tests into the sub-packages: **Before** ``` tests ├── __init__.py ├── test_arrow.py ├── test_arrow_map.py ├── test_catalog.py ├── test_column.py ├── test_conf.py ├── test_connect_basic.py ├── test_connect_column_expressions.py ├── test_connect_plan_only.py ├── test_connect_select_ops.py ├── test_context.py ├── test_dataframe.py ├── test_datasources.py ├── test_functions.py ├── test_group.py ├── test_pandas_cogrouped_map.py ├── test_pandas_grouped_map.py ├── test_pandas_grouped_map_with_state.py ├── test_pandas_map.py ├── test_pandas_udf.py ├── test_pandas_udf_grouped_agg.py ├── test_pandas_udf_scalar.py ├── test_pandas_udf_typehints.py ├── test_pandas_udf_typehints_with_future_annotations.py ├── test_pandas_udf_window.py ├── test_readwriter.py ├── test_serde.py ├── test_session.py ├── test_streaming.py ├── test_streaming_listener.py ├── test_types.py ├── test_udf.py ├── test_udf_profiler.py ├── test_utils.py └── typing ├── ... ``` **After** ``` tests ├── __init__.py ├── connect │   ├── __init__.py │   ├── test_connect_basic.py │   ├── test_connect_column_expressions.py │   ├── test_connect_plan_only.py │   └── test_connect_select_ops.py ├── pandas │   ├── __init__.py │   ├── test_pandas_cogrouped_map.py │   ├── test_pandas_grouped_map.py │   ├── test_pandas_grouped_map_with_state.py │   ├── test_pandas_map.py │   ├── test_pandas_udf.py │   ├── test_pandas_udf_grouped_agg.py │   ├── test_pandas_udf_scalar.py │   ├── test_pandas_udf_typehints.py │   ├── test_pandas_udf_typehints_with_future_annotations.py │   └── test_pandas_udf_window.py ├── streaming │   ├── __init__.py │   ├── test_streaming.py │   └── test_streaming_listener.py ├── test_arrow.py ├── test_arrow_map.py ├── test_catalog.py ├── test_column.py ├── test_conf.py ├── test_context.py ├── test_dataframe.py ├── test_datasources.py ├── test_functions.py ├── test_group.py ├── test_readwriter.py ├── test_serde.py ├── test_session.py ├── test_types.py ├── test_udf.py ├── test_udf_profiler.py ├── test_utils.py └── typing ├── ... ``` This way is consistent with `pyspark.pandas.tests`. ### Why are the changes needed? To make it easier to maintain, track and add the tests. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? CI in this PR should test it out. Closes #38239 from HyukjinKwon/SPARK-40789. Lead-authored-by: Hyukjin Kwon <gurwls223@apache.org> Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
1 parent c4ef9ae commit 4f2630e

22 files changed

+86
-38
lines changed

.github/labeler.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,12 @@ SPARK SHELL:
8484
- "repl/**/*"
8585
- "bin/spark-shell*"
8686
SQL:
87-
#- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming/**/*", "!python/pyspark/sql/tests/test_streaming.py"]
87+
#- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming/**/*", "!python/pyspark/sql/tests/streaming/test_streaming.py"]
8888
- "**/sql/**/*"
8989
- "common/unsafe/**/*"
9090
#- "!python/pyspark/sql/avro/**/*"
9191
#- "!python/pyspark/sql/streaming/**/*"
92-
#- "!python/pyspark/sql/tests/test_streaming.py"
92+
#- "!python/pyspark/sql/tests/streaming/test_streaming.py"
9393
- "bin/spark-sql*"
9494
- "bin/beeline*"
9595
- "sbin/*thriftserver*.sh"
@@ -125,7 +125,7 @@ STRUCTURED STREAMING:
125125
- "**/sql/**/streaming/**/*"
126126
- "connector/kafka-0-10-sql/**/*"
127127
- "python/pyspark/sql/streaming/**/*"
128-
- "python/pyspark/sql/tests/test_streaming.py"
128+
- "python/pyspark/sql/tests/streaming/test_streaming.py"
129129
- "**/*streaming.R"
130130
PYTHON:
131131
- "bin/pyspark*"
@@ -156,5 +156,5 @@ CONNECT:
156156
- "**/sql/sparkconnect/**/*"
157157
- "python/pyspark/sql/**/connect/**/*"
158158
PROTOBUF:
159-
- "connector/protobuf/**/*"
160-
- "python/pyspark/sql/protobuf/**/*"
159+
- "connector/protobuf/**/*"
160+
- "python/pyspark/sql/protobuf/**/*"

dev/sparktestsupport/modules.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -461,22 +461,22 @@ def __hash__(self):
461461
"pyspark.sql.tests.test_datasources",
462462
"pyspark.sql.tests.test_functions",
463463
"pyspark.sql.tests.test_group",
464-
"pyspark.sql.tests.test_pandas_cogrouped_map",
465-
"pyspark.sql.tests.test_pandas_grouped_map",
466-
"pyspark.sql.tests.test_pandas_grouped_map_with_state",
467-
"pyspark.sql.tests.test_pandas_map",
464+
"pyspark.sql.tests.pandas.test_pandas_cogrouped_map",
465+
"pyspark.sql.tests.pandas.test_pandas_grouped_map",
466+
"pyspark.sql.tests.pandas.test_pandas_grouped_map_with_state",
467+
"pyspark.sql.tests.pandas.test_pandas_map",
468468
"pyspark.sql.tests.test_arrow_map",
469-
"pyspark.sql.tests.test_pandas_udf",
470-
"pyspark.sql.tests.test_pandas_udf_grouped_agg",
471-
"pyspark.sql.tests.test_pandas_udf_scalar",
472-
"pyspark.sql.tests.test_pandas_udf_typehints",
473-
"pyspark.sql.tests.test_pandas_udf_typehints_with_future_annotations",
474-
"pyspark.sql.tests.test_pandas_udf_window",
469+
"pyspark.sql.tests.pandas.test_pandas_udf",
470+
"pyspark.sql.tests.pandas.test_pandas_udf_grouped_agg",
471+
"pyspark.sql.tests.pandas.test_pandas_udf_scalar",
472+
"pyspark.sql.tests.pandas.test_pandas_udf_typehints",
473+
"pyspark.sql.tests.pandas.test_pandas_udf_typehints_with_future_annotations",
474+
"pyspark.sql.tests.pandas.test_pandas_udf_window",
475475
"pyspark.sql.tests.test_readwriter",
476476
"pyspark.sql.tests.test_serde",
477477
"pyspark.sql.tests.test_session",
478-
"pyspark.sql.tests.test_streaming",
479-
"pyspark.sql.tests.test_streaming_listener",
478+
"pyspark.sql.tests.streaming.test_streaming",
479+
"pyspark.sql.tests.streaming.test_streaming_listener",
480480
"pyspark.sql.tests.test_types",
481481
"pyspark.sql.tests.test_udf",
482482
"pyspark.sql.tests.test_udf_profiler",
@@ -492,10 +492,10 @@ def __hash__(self):
492492
# doctests
493493
# No doctests yet.
494494
# unittests
495-
"pyspark.sql.tests.test_connect_column_expressions",
496-
"pyspark.sql.tests.test_connect_plan_only",
497-
"pyspark.sql.tests.test_connect_select_ops",
498-
"pyspark.sql.tests.test_connect_basic",
495+
"pyspark.sql.tests.connect.test_connect_column_expressions",
496+
"pyspark.sql.tests.connect.test_connect_plan_only",
497+
"pyspark.sql.tests.connect.test_connect_select_ops",
498+
"pyspark.sql.tests.connect.test_connect_basic",
499499
],
500500
excluded_python_implementations=[
501501
"PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and

python/pyspark/sql/connect/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,6 @@ To use the release version of Spark Connect:
4646
## Run Tests
4747

4848
```bash
49-
./python/run-tests --testnames 'pyspark.sql.tests.test_connect_basic'
49+
./python/run-tests --testnames 'pyspark.sql.tests.connect.test_connect_basic'
5050
```
5151

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#

python/pyspark/sql/tests/test_connect_basic.py renamed to python/pyspark/sql/tests/connect/test_connect_basic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_simple_explain_string(self):
8181

8282

8383
if __name__ == "__main__":
84-
from pyspark.sql.tests.test_connect_basic import * # noqa: F401
84+
from pyspark.sql.tests.connect.test_connect_basic import * # noqa: F401
8585

8686
try:
8787
import xmlrunner # type: ignore

python/pyspark/sql/tests/test_connect_column_expressions.py renamed to python/pyspark/sql/tests/connect/test_connect_column_expressions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def test_column_literals(self):
5454

5555
if __name__ == "__main__":
5656
import unittest
57-
from pyspark.sql.tests.test_connect_column_expressions import * # noqa: F401
57+
from pyspark.sql.tests.connect.test_connect_column_expressions import * # noqa: F401
5858

5959
try:
6060
import xmlrunner # type: ignore

python/pyspark/sql/tests/test_connect_plan_only.py renamed to python/pyspark/sql/tests/connect/test_connect_plan_only.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def read_table(x):
6464

6565

6666
if __name__ == "__main__":
67-
from pyspark.sql.tests.test_connect_plan_only import * # noqa: F401
67+
from pyspark.sql.tests.connect.test_connect_plan_only import * # noqa: F401
6868

6969
try:
7070
import xmlrunner # type: ignore

python/pyspark/sql/tests/test_connect_select_ops.py renamed to python/pyspark/sql/tests/connect/test_connect_select_ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def test_select_with_literal(self):
2929

3030
if __name__ == "__main__":
3131
import unittest
32-
from pyspark.sql.tests.test_connect_select_ops import * # noqa: F401
32+
from pyspark.sql.tests.connect.test_connect_select_ops import * # noqa: F401
3333

3434
try:
3535
import xmlrunner # type: ignore
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#

python/pyspark/sql/tests/test_pandas_cogrouped_map.py renamed to python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ def merge_pandas(lft, rgt):
404404

405405

406406
if __name__ == "__main__":
407-
from pyspark.sql.tests.test_pandas_cogrouped_map import * # noqa: F401
407+
from pyspark.sql.tests.pandas.test_pandas_cogrouped_map import * # noqa: F401
408408

409409
try:
410410
import xmlrunner # type: ignore[import]

0 commit comments

Comments
 (0)