chore: Remove gcp requirement for local tests (#2972)

* Remove unused objects Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Switch from bigquery to file sources Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Switch tests using example_feature_repo_1 to use file offline store Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Disable tests that require gcp Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Remove duplicate test Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Remove integration marker Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix snowflake config Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix import Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Add empty feature repo Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix comments Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Add new example feature repo Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Add new feature repo with just feature service Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Move tests from integration to unit Signed-off-by: Felix Wang <wangfelix98@gmail.com>
feast-dev · Aug 1, 2022 · c611eb8 · c611eb8
1 parent 651ce34
commit c611eb8
Show file tree

Hide file tree

Showing 15 changed files with 298 additions and 276 deletions.
diff --git a/Makefile b/Makefile
@@ -78,7 +78,10 @@ test-python-integration-local:
 			-k "not test_apply_entity_integration and \
 				not test_apply_feature_view_integration and \
 				not test_apply_data_source_integration and \
-				not test_lambda_materialization" \
+				not test_lambda_materialization and \
+				not test_feature_view_inference_success and \
+				not test_update_file_data_source_with_inferred_event_timestamp_col and \
+				not test_nullable_online_store" \
 		sdk/python/tests \
 	) || echo "This script uses Docker, and it isn't running - please start the Docker Daemon and try again!";
 

diff --git a/sdk/python/tests/example_repos/empty_feature_repo.py b/sdk/python/tests/example_repos/empty_feature_repo.py
@@ -0,0 +1,3 @@
+# This example feature repo is deliberately left empty. It should be used for tests that do not need
+# any feature views or other objects (for example, a test that checks that a feature service can be
+# applied and retrieved correctly).
diff --git a/sdk/python/tests/example_repos/example_feature_repo_1.py b/sdk/python/tests/example_repos/example_feature_repo_1.py
@@ -1,34 +1,26 @@
 from datetime import timedelta
 
-from feast import BigQuerySource, Entity, FeatureService, FeatureView, Field, PushSource
+from feast import Entity, FeatureService, FeatureView, Field, FileSource, PushSource
 from feast.types import Float32, Int64, String
 
-driver_locations_source = BigQuerySource(
-    table="feast-oss.public.drivers",
-    timestamp_field="event_timestamp",
-    created_timestamp_column="created_timestamp",
-)
-
-driver_locations_source_query = BigQuerySource(
-    query="SELECT * from feast-oss.public.drivers",
-    timestamp_field="event_timestamp",
-    created_timestamp_column="created_timestamp",
-)
+# Note that file source paths are not validated, so there doesn't actually need to be any data
+# at the paths for these file sources. Since these paths are effectively fake, this example
+# feature repo should not be used for historical retrieval.
 
-driver_locations_source_query_2 = BigQuerySource(
-    query="SELECT lat * 2 FROM feast-oss.public.drivers",
+driver_locations_source = FileSource(
+    path="data/driver_locations.parquet",
     timestamp_field="event_timestamp",
     created_timestamp_column="created_timestamp",
 )
 
-customer_profile_source = BigQuerySource(
+customer_profile_source = FileSource(
     name="customer_profile_source",
-    table="feast-oss.public.customers",
+    path="data/customer_profiles.parquet",
     timestamp_field="event_timestamp",
 )
 
-customer_driver_combined_source = BigQuerySource(
-    table="feast-oss.public.customer_driver",
+customer_driver_combined_source = FileSource(
+    path="data/customer_driver_combined.parquet",
     timestamp_field="event_timestamp",
 )
 

diff --git a/sdk/python/tests/example_repos/example_feature_repo_with_driver_stats_feature_view.py b/sdk/python/tests/example_repos/example_feature_repo_with_driver_stats_feature_view.py
@@ -0,0 +1,30 @@
+from datetime import timedelta
+
+from feast import Entity, FeatureView, Field, FileSource
+from feast.types import Float32, Int32, Int64
+
+driver_hourly_stats = FileSource(
+    path="data/driver_stats.parquet",  # Fake path
+    timestamp_field="event_timestamp",
+    created_timestamp_column="created",
+)
+
+driver = Entity(
+    name="driver_id",
+    description="driver id",
+)
+
+driver_hourly_stats_view = FeatureView(
+    name="driver_hourly_stats",
+    entities=[driver],
+    ttl=timedelta(days=1),
+    schema=[
+        Field(name="conv_rate", dtype=Float32),
+        Field(name="acc_rate", dtype=Float32),
+        Field(name="avg_daily_trips", dtype=Int64),
+        Field(name="driver_id", dtype=Int32),
+    ],
+    online=True,
+    source=driver_hourly_stats,
+    tags={},
+)
diff --git a/sdk/python/tests/example_repos/example_feature_repo_with_feature_service.py b/sdk/python/tests/example_repos/example_feature_repo_with_feature_service.py
@@ -0,0 +1,36 @@
+from datetime import timedelta
+
+from feast import Entity, FeatureService, FeatureView, Field, FileSource
+from feast.types import Float32, Int64, String
+
+driver_locations_source = FileSource(
+    path="data/driver_locations.parquet",
+    timestamp_field="event_timestamp",
+    created_timestamp_column="created_timestamp",
+)
+
+driver = Entity(
+    name="driver",  # The name is derived from this argument, not object name.
+    join_keys=["driver_id"],
+    description="driver id",
+)
+
+driver_locations = FeatureView(
+    name="driver_locations",
+    entities=[driver],
+    ttl=timedelta(days=1),
+    schema=[
+        Field(name="lat", dtype=Float32),
+        Field(name="lon", dtype=String),
+        Field(name="driver_id", dtype=Int64),
+    ],
+    online=True,
+    batch_source=driver_locations_source,
+    tags={},
+)
+
+all_drivers_feature_service = FeatureService(
+    name="driver_locations_service",
+    features=[driver_locations],
+    tags={"release": "production"},
+)
diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py
@@ -2,13 +2,8 @@
 
 import pytest
 
-from feast import BigQuerySource, Entity, FeatureView, Field
-from feast.feature_service import FeatureService
-from feast.types import Float32, String
 from tests.integration.feature_repos.universal.entities import driver
 from tests.integration.feature_repos.universal.feature_views import driver_feature_view
-from tests.utils.basic_read_write_test import basic_rw_test
-from tests.utils.cli_repo_creator import CliRunner, get_example_repo
 from tests.utils.e2e_test_validation import validate_offline_online_store_consistency
 
 
@@ -32,68 +27,3 @@ def test_e2e_consistency(environment, e2e_data_sources, infer_features):
     split_dt = df["ts_1"][4].to_pydatetime() - timedelta(seconds=1)
 
     validate_offline_online_store_consistency(fs, fv, split_dt)
-
-
-@pytest.mark.integration
-def test_partial() -> None:
-    """
-    Add another table to existing repo using partial apply API. Make sure both the table
-    applied via CLI apply and the new table are passing RW test.
-    """
-
-    runner = CliRunner()
-    with runner.local_repo(
-        get_example_repo("example_feature_repo_1.py"), "bigquery"
-    ) as store:
-        driver = Entity(name="driver", join_keys=["test"])
-
-        driver_locations_source = BigQuerySource(
-            table="feast-oss.public.drivers",
-            timestamp_field="event_timestamp",
-            created_timestamp_column="created_timestamp",
-        )
-
-        driver_locations_100 = FeatureView(
-            name="driver_locations_100",
-            entities=[driver],
-            ttl=timedelta(days=1),
-            schema=[
-                Field(name="lat", dtype=Float32),
-                Field(name="lon", dtype=String),
-                Field(name="name", dtype=String),
-                Field(name="test", dtype=String),
-            ],
-            online=True,
-            batch_source=driver_locations_source,
-            tags={},
-        )
-
-        store.apply([driver_locations_100])
-
-        basic_rw_test(store, view_name="driver_locations")
-        basic_rw_test(store, view_name="driver_locations_100")
-
-
-@pytest.mark.integration
-def test_read_pre_applied() -> None:
-    """
-    Read feature values from the FeatureStore using a FeatureService.
-    """
-    runner = CliRunner()
-    with runner.local_repo(
-        get_example_repo("example_feature_repo_1.py"), "bigquery"
-    ) as store:
-
-        assert len(store.list_feature_services()) == 1
-        fs = store.get_feature_service("driver_locations_service")
-        assert len(fs.tags) == 1
-        assert fs.tags["release"] == "production"
-
-        fv = store.get_feature_view("driver_locations")
-
-        fs = FeatureService(name="new_feature_service", features=[fv[["lon"]]])
-
-        store.apply([fs])
-
-        assert len(store.list_feature_services()) == 2
-        store.get_feature_service("new_feature_service")
diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py
@@ -75,11 +75,11 @@
 
 SNOWFLAKE_CONFIG = {
     "type": "snowflake.online",
-    "account": os.environ["SNOWFLAKE_CI_DEPLOYMENT"],
-    "user": os.environ["SNOWFLAKE_CI_USER"],
-    "password": os.environ["SNOWFLAKE_CI_PASSWORD"],
-    "role": os.environ["SNOWFLAKE_CI_ROLE"],
-    "warehouse": os.environ["SNOWFLAKE_CI_WAREHOUSE"],
+    "account": os.environ.get("SNOWFLAKE_CI_DEPLOYMENT", ""),
+    "user": os.environ.get("SNOWFLAKE_CI_USER", ""),
+    "password": os.environ.get("SNOWFLAKE_CI_PASSWORD", ""),
+    "role": os.environ.get("SNOWFLAKE_CI_ROLE", ""),
+    "warehouse": os.environ.get("SNOWFLAKE_CI_WAREHOUSE", ""),
     "database": "FEAST",
     "schema": "ONLINE",
 }

diff --git a/sdk/python/tests/integration/registration/test_universal_cli.py b/sdk/python/tests/integration/registration/test_universal_cli.py
@@ -159,7 +159,7 @@ def test_nullable_online_store(test_nullable_online_store) -> None:
             repo_config.write_text(dedent(feature_store_yaml))
 
             repo_example = repo_path / "example.py"
-            repo_example.write_text(get_example_repo("example_feature_repo_1.py"))
+            repo_example.write_text(get_example_repo("empty_feature_repo.py"))
             result = runner.run(["apply"], cwd=repo_path)
             assertpy.assert_that(result.returncode).is_equal_to(0)
         finally:

diff --git a/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py b/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py
@@ -49,9 +49,8 @@ def run_simple_apply_test(example_repo_file_name: str, expected_error: bytes):
 
 def test_cli_apply_imported_featureview() -> None:
     """
-    Test apply feature views with duplicated names and single py file in a feature repo using CLI
+    Tests that applying a feature view imported from a separate Python file is successful.
     """
-
     with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
         runner = CliRunner()
         # Construct an example repo in a temporary dir
@@ -72,8 +71,11 @@ def test_cli_apply_imported_featureview() -> None:
             )
         )
 
+        # Import feature view from an existing file so it exists in two files.
         repo_example = repo_path / "example.py"
-        repo_example.write_text(get_example_repo("example_feature_repo_2.py"))
+        repo_example.write_text(
+            get_example_repo("example_feature_repo_with_driver_stats_feature_view.py")
+        )
         repo_example_2 = repo_path / "example_2.py"
         repo_example_2.write_text(
             "from example import driver_hourly_stats_view\n"
@@ -92,9 +94,9 @@ def test_cli_apply_imported_featureview() -> None:
 
 def test_cli_apply_imported_featureview_with_duplication() -> None:
     """
-    Test apply feature views with duplicated names and single py file in a feature repo using CLI
+    Tests that applying feature views with duplicated names is not possible, even if one of the
+    duplicated feature views is imported from another file.
     """
-
     with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
         runner = CliRunner()
         # Construct an example repo in a temporary dir
@@ -115,8 +117,11 @@ def test_cli_apply_imported_featureview_with_duplication() -> None:
             )
         )
 
+        # Import feature view with duplicated name to try breaking the deduplication logic.
         repo_example = repo_path / "example.py"
-        repo_example.write_text(get_example_repo("example_feature_repo_2.py"))
+        repo_example.write_text(
+            get_example_repo("example_feature_repo_with_driver_stats_feature_view.py")
+        )
         repo_example_2 = repo_path / "example_2.py"
         repo_example_2.write_text(
             "from datetime import timedelta\n"
@@ -147,7 +152,6 @@ def test_cli_apply_duplicated_featureview_names_multiple_py_files() -> None:
     """
     Test apply feature views with duplicated names from multiple py files in a feature repo using CLI
     """
-
     with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
         runner = CliRunner()
         # Construct an example repo in a temporary dir
@@ -170,7 +174,11 @@ def test_cli_apply_duplicated_featureview_names_multiple_py_files() -> None:
         # Create multiple py files containing the same feature view name
         for i in range(3):
             repo_example = repo_path / f"example{i}.py"
-            repo_example.write_text(get_example_repo("example_feature_repo_2.py"))
+            repo_example.write_text(
+                get_example_repo(
+                    "example_feature_repo_with_driver_stats_feature_view.py"
+                )
+            )
         rc, output = runner.run_with_output(["apply"], cwd=repo_path)
 
         assert (

diff --git a/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py b/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py
@@ -5,11 +5,14 @@
 
 import pandas as pd
 
+from feast import Entity, FeatureView, Field, FileSource
 from feast.driver_test_data import (
     create_driver_hourly_stats_df,
     create_global_daily_stats_df,
 )
 from feast.feature_store import FeatureStore
+from feast.types import Float32, String
+from tests.utils.basic_read_write_test import basic_rw_test
 from tests.utils.cli_repo_creator import CliRunner, get_example_repo
 from tests.utils.feature_records import validate_online_features
 
@@ -120,3 +123,41 @@ def _test_materialize_and_online_retrieval(
 
     assert r.returncode == 0, f"stdout: {r.stdout}\n stderr: {r.stderr}"
     validate_online_features(store, driver_df, end_date)
+
+
+def test_partial() -> None:
+    """
+    Add another table to existing repo using partial apply API. Make sure both the table
+    applied via CLI apply and the new table are passing RW test.
+    """
+    runner = CliRunner()
+    with runner.local_repo(
+        get_example_repo("example_feature_repo_1.py"), "file"
+    ) as store:
+        driver = Entity(name="driver", join_keys=["test"])
+
+        driver_locations_source = FileSource(
+            path="data/driver_locations.parquet",  # Fake path
+            timestamp_field="event_timestamp",
+            created_timestamp_column="created_timestamp",
+        )
+
+        driver_locations_100 = FeatureView(
+            name="driver_locations_100",
+            entities=[driver],
+            ttl=timedelta(days=1),
+            schema=[
+                Field(name="lat", dtype=Float32),
+                Field(name="lon", dtype=String),
+                Field(name="name", dtype=String),
+                Field(name="test", dtype=String),
+            ],
+            online=True,
+            batch_source=driver_locations_source,
+            tags={},
+        )
+
+        store.apply([driver_locations_100])
+
+        basic_rw_test(store, view_name="driver_locations")
+        basic_rw_test(store, view_name="driver_locations_100")
diff --git a/sdk/python/tests/unit/local_feast_tests/test_feature_service_apply.py b/sdk/python/tests/unit/local_feast_tests/test_feature_service_apply.py
@@ -0,0 +1,25 @@
+from feast.feature_service import FeatureService
+from tests.utils.cli_repo_creator import CliRunner, get_example_repo
+
+
+def test_read_pre_applied() -> None:
+    """
+    Read feature values from the FeatureStore using a FeatureService.
+    """
+    runner = CliRunner()
+    with runner.local_repo(
+        get_example_repo("example_feature_repo_with_feature_service.py"), "file"
+    ) as store:
+        assert len(store.list_feature_services()) == 1
+        fs = store.get_feature_service("driver_locations_service")
+        assert len(fs.tags) == 1
+        assert fs.tags["release"] == "production"
+
+        fv = store.get_feature_view("driver_locations")
+
+        fs = FeatureService(name="new_feature_service", features=[fv[["lon"]]])
+
+        store.apply([fs])
+
+        assert len(store.list_feature_services()) == 2
+        store.get_feature_service("new_feature_service")