Skip to content

Commit

Permalink
Try this on CI
Browse files Browse the repository at this point in the history
  • Loading branch information
lorenabalan committed Nov 29, 2021
1 parent 56ea747 commit c837288
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 86 deletions.
80 changes: 40 additions & 40 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -449,29 +449,29 @@ workflows:
regular:
jobs:
- unit_tests_36
- linters_36
- e2e_tests_36
- docs_36
- docs_linkcheck_37
# - linters_36
# - e2e_tests_36
# - docs_36
# - docs_linkcheck_37
- unit_tests_37
- linters_37
- e2e_tests_37
- docs_37
# - linters_37
# - e2e_tests_37
# - docs_37
- unit_tests_38
- linters_38
- e2e_tests_38
- pip_compile_36
- pip_compile_37
- pip_compile_38
- win_unit_tests_36
- win_unit_tests_37
- win_unit_tests_38
- win_pip_compile_36
- win_pip_compile_37
- win_pip_compile_38
- win_e2e_tests_36
- win_e2e_tests_37
- win_e2e_tests_38
# - linters_38
# - e2e_tests_38
# - pip_compile_36
# - pip_compile_37
# - pip_compile_38
# - win_unit_tests_36
# - win_unit_tests_37
# - win_unit_tests_38
# - win_pip_compile_36
# - win_pip_compile_37
# - win_pip_compile_38
# - win_e2e_tests_36
# - win_e2e_tests_37
# - win_e2e_tests_38
- run_kedro_viz:
filters:
branches:
Expand All @@ -480,28 +480,28 @@ workflows:
- all_circleci_checks_succeeded:
requires:
- unit_tests_36
- linters_36
- e2e_tests_36
- docs_36
# - linters_36
# - e2e_tests_36
# - docs_36
- unit_tests_37
- linters_37
- e2e_tests_37
- docs_37
- docs_linkcheck_37
# - linters_37
# - e2e_tests_37
# - docs_37
# - docs_linkcheck_37
- unit_tests_38
- linters_38
- e2e_tests_38
- pip_compile_36
- pip_compile_37
- pip_compile_38
- win_pip_compile_36
- win_pip_compile_37
- win_pip_compile_38
- win_unit_tests_36
- win_unit_tests_37
# - linters_38
# - e2e_tests_38
# - pip_compile_36
# - pip_compile_37
# - pip_compile_38
# - win_pip_compile_36
# - win_pip_compile_37
# - win_pip_compile_38
# - win_unit_tests_36
# - win_unit_tests_37
# Skipped due to `pywin32 is in an unsupported or invalid wheel`
# - win_e2e_tests_36
# Skipped due to Windows fatal exception: stack overflow
# - win_unit_tests_38
- win_e2e_tests_37
- win_e2e_tests_38
# - win_e2e_tests_37
# - win_e2e_tests_38
13 changes: 12 additions & 1 deletion tests/extras/datasets/spark/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from subprocess import Popen

import pytest
from delta import configure_spark_with_delta_pip

try:
from pyspark import SparkContext
Expand Down Expand Up @@ -35,7 +36,17 @@ def replace_spark_default_getorcreate():
@pytest.fixture(scope="module")
def spark_session(): # SKIP_IF_NO_SPARK
SparkSession.builder.getOrCreate = the_real_getOrCreate
spark = SparkSession.builder.getOrCreate()
builder = (
SparkSession.builder.appName("MyApp")
.config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
.config(
"spark.sql.catalog.spark_catalog",
"org.apache.spark.sql.delta.catalog.DeltaCatalog",
)
)

spark = configure_spark_with_delta_pip(builder).getOrCreate()
# spark = SparkSession.builder.getOrCreate()
yield spark
spark.stop()
SparkSession.builder.getOrCreate = UseTheSparkSessionFixtureOrMock
Expand Down
95 changes: 50 additions & 45 deletions tests/extras/datasets/spark/test_deltatable_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,53 +16,58 @@


# clean up pyspark after the test module finishes
@pytest.fixture(scope="module", autouse=True)
def delta_spark_session(replace_spark_default_getorcreate):
SparkSession.builder.getOrCreate = replace_spark_default_getorcreate

try:
# As recommended in https://docs.delta.io/latest/quick-start.html#python
builder = (
SparkSession.builder.appName("MyApp")
.config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
.config(
"spark.sql.catalog.spark_catalog",
"org.apache.spark.sql.delta.catalog.DeltaCatalog",
)
)

spark = configure_spark_with_delta_pip(builder).getOrCreate()

yield spark

# This fixture should be a dependency of other fixtures dealing with spark delta data
# in this module so that it always exits last and stops the spark session
# after tests are finished.
spark.stop()
except PermissionError: # pragma: no cover
# On Windows machine TemporaryDirectory can't be removed because some
# files are still used by Java process.
pass

SparkSession.builder.getOrCreate = UseTheSparkSessionFixtureOrMock

# remove the cached JVM vars
SparkContext._jvm = None # pylint: disable=protected-access
SparkContext._gateway = None # pylint: disable=protected-access

# py4j doesn't shutdown properly so kill the actual JVM process
for obj in gc.get_objects():
try:
if isinstance(obj, Popen) and "pyspark" in obj.args[0]:
obj.terminate() # pragma: no cover
except ReferenceError: # pragma: no cover
# gc.get_objects may return dead weak proxy objects that will raise
# ReferenceError when you isinstance them
pass
# @pytest.fixture(scope="module", autouse=True)
# def delta_spark_session(replace_spark_default_getorcreate):
# SparkSession.builder.getOrCreate = replace_spark_default_getorcreate
#
# try:
# # As recommended in https://docs.delta.io/latest/quick-start.html#python
# builder = (
# SparkSession.builder.appName("MyApp")
# .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
# .config(
# "spark.sql.catalog.spark_catalog",
# "org.apache.spark.sql.delta.catalog.DeltaCatalog",
# )
# )
#
# spark = configure_spark_with_delta_pip(builder).getOrCreate()
#
# yield spark
#
# # This fixture should be a dependency of other fixtures dealing with spark delta data
# # in this module so that it always exits last and stops the spark session
# # after tests are finished.
# spark.stop()
# except PermissionError: # pragma: no cover
# # On Windows machine TemporaryDirectory can't be removed because some
# # files are still used by Java process.
# pass
#
# SparkSession.builder.getOrCreate = UseTheSparkSessionFixtureOrMock
#
# # remove the cached JVM vars
# SparkContext._jvm = None # pylint: disable=protected-access
# SparkContext._gateway = None # pylint: disable=protected-access
#
# # py4j doesn't shutdown properly so kill the actual JVM process
# for obj in gc.get_objects():
# try:
# if isinstance(obj, Popen) and "pyspark" in obj.args[0]:
# obj.terminate() # pragma: no cover
# except ReferenceError: # pragma: no cover
# # gc.get_objects may return dead weak proxy objects that will raise
# # ReferenceError when you isinstance them
# pass

@pytest.fixture(autouse=True)
def spark_session_autouse(spark_session):
# all the tests in this file require Spark
return spark_session


@pytest.fixture
def sample_spark_df(delta_spark_session):
def sample_spark_df():
schema = StructType(
[
StructField("name", StringType(), True),
Expand All @@ -72,7 +77,7 @@ def sample_spark_df(delta_spark_session):

data = [("Alex", 31), ("Bob", 12), ("Clarke", 65), ("Dave", 29)]

return delta_spark_session.createDataFrame(data, schema)
return SparkSession.builder.getOrCreate().createDataFrame(data, schema)


class TestDeltaTableDataSet:
Expand Down

0 comments on commit c837288

Please sign in to comment.