Skip to content

Commit

Permalink
chore: Remove gcp requirement for local tests (#2972)
Browse files Browse the repository at this point in the history
* Remove unused objects

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Switch from bigquery to file sources

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Switch tests using example_feature_repo_1 to use file offline store

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Disable tests that require gcp

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Remove duplicate test

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Remove integration marker

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Fix snowflake config

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Fix import

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Add empty feature repo

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Fix comments

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Add new example feature repo

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Add new feature repo with just feature service

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Move tests from integration to unit

Signed-off-by: Felix Wang <wangfelix98@gmail.com>
  • Loading branch information
felixwang9817 authored Aug 1, 2022
1 parent 651ce34 commit c611eb8
Show file tree
Hide file tree
Showing 15 changed files with 298 additions and 276 deletions.
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,10 @@ test-python-integration-local:
-k "not test_apply_entity_integration and \
not test_apply_feature_view_integration and \
not test_apply_data_source_integration and \
not test_lambda_materialization" \
not test_lambda_materialization and \
not test_feature_view_inference_success and \
not test_update_file_data_source_with_inferred_event_timestamp_col and \
not test_nullable_online_store" \
sdk/python/tests \
) || echo "This script uses Docker, and it isn't running - please start the Docker Daemon and try again!";

Expand Down
3 changes: 3 additions & 0 deletions sdk/python/tests/example_repos/empty_feature_repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# This example feature repo is deliberately left empty. It should be used for tests that do not need
# any feature views or other objects (for example, a test that checks that a feature service can be
# applied and retrieved correctly).
28 changes: 10 additions & 18 deletions sdk/python/tests/example_repos/example_feature_repo_1.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,26 @@
from datetime import timedelta

from feast import BigQuerySource, Entity, FeatureService, FeatureView, Field, PushSource
from feast import Entity, FeatureService, FeatureView, Field, FileSource, PushSource
from feast.types import Float32, Int64, String

driver_locations_source = BigQuerySource(
table="feast-oss.public.drivers",
timestamp_field="event_timestamp",
created_timestamp_column="created_timestamp",
)

driver_locations_source_query = BigQuerySource(
query="SELECT * from feast-oss.public.drivers",
timestamp_field="event_timestamp",
created_timestamp_column="created_timestamp",
)
# Note that file source paths are not validated, so there doesn't actually need to be any data
# at the paths for these file sources. Since these paths are effectively fake, this example
# feature repo should not be used for historical retrieval.

driver_locations_source_query_2 = BigQuerySource(
query="SELECT lat * 2 FROM feast-oss.public.drivers",
driver_locations_source = FileSource(
path="data/driver_locations.parquet",
timestamp_field="event_timestamp",
created_timestamp_column="created_timestamp",
)

customer_profile_source = BigQuerySource(
customer_profile_source = FileSource(
name="customer_profile_source",
table="feast-oss.public.customers",
path="data/customer_profiles.parquet",
timestamp_field="event_timestamp",
)

customer_driver_combined_source = BigQuerySource(
table="feast-oss.public.customer_driver",
customer_driver_combined_source = FileSource(
path="data/customer_driver_combined.parquet",
timestamp_field="event_timestamp",
)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from datetime import timedelta

from feast import Entity, FeatureView, Field, FileSource
from feast.types import Float32, Int32, Int64

driver_hourly_stats = FileSource(
path="data/driver_stats.parquet", # Fake path
timestamp_field="event_timestamp",
created_timestamp_column="created",
)

driver = Entity(
name="driver_id",
description="driver id",
)

driver_hourly_stats_view = FeatureView(
name="driver_hourly_stats",
entities=[driver],
ttl=timedelta(days=1),
schema=[
Field(name="conv_rate", dtype=Float32),
Field(name="acc_rate", dtype=Float32),
Field(name="avg_daily_trips", dtype=Int64),
Field(name="driver_id", dtype=Int32),
],
online=True,
source=driver_hourly_stats,
tags={},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from datetime import timedelta

from feast import Entity, FeatureService, FeatureView, Field, FileSource
from feast.types import Float32, Int64, String

driver_locations_source = FileSource(
path="data/driver_locations.parquet",
timestamp_field="event_timestamp",
created_timestamp_column="created_timestamp",
)

driver = Entity(
name="driver", # The name is derived from this argument, not object name.
join_keys=["driver_id"],
description="driver id",
)

driver_locations = FeatureView(
name="driver_locations",
entities=[driver],
ttl=timedelta(days=1),
schema=[
Field(name="lat", dtype=Float32),
Field(name="lon", dtype=String),
Field(name="driver_id", dtype=Int64),
],
online=True,
batch_source=driver_locations_source,
tags={},
)

all_drivers_feature_service = FeatureService(
name="driver_locations_service",
features=[driver_locations],
tags={"release": "production"},
)
70 changes: 0 additions & 70 deletions sdk/python/tests/integration/e2e/test_universal_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,8 @@

import pytest

from feast import BigQuerySource, Entity, FeatureView, Field
from feast.feature_service import FeatureService
from feast.types import Float32, String
from tests.integration.feature_repos.universal.entities import driver
from tests.integration.feature_repos.universal.feature_views import driver_feature_view
from tests.utils.basic_read_write_test import basic_rw_test
from tests.utils.cli_repo_creator import CliRunner, get_example_repo
from tests.utils.e2e_test_validation import validate_offline_online_store_consistency


Expand All @@ -32,68 +27,3 @@ def test_e2e_consistency(environment, e2e_data_sources, infer_features):
split_dt = df["ts_1"][4].to_pydatetime() - timedelta(seconds=1)

validate_offline_online_store_consistency(fs, fv, split_dt)


@pytest.mark.integration
def test_partial() -> None:
"""
Add another table to existing repo using partial apply API. Make sure both the table
applied via CLI apply and the new table are passing RW test.
"""

runner = CliRunner()
with runner.local_repo(
get_example_repo("example_feature_repo_1.py"), "bigquery"
) as store:
driver = Entity(name="driver", join_keys=["test"])

driver_locations_source = BigQuerySource(
table="feast-oss.public.drivers",
timestamp_field="event_timestamp",
created_timestamp_column="created_timestamp",
)

driver_locations_100 = FeatureView(
name="driver_locations_100",
entities=[driver],
ttl=timedelta(days=1),
schema=[
Field(name="lat", dtype=Float32),
Field(name="lon", dtype=String),
Field(name="name", dtype=String),
Field(name="test", dtype=String),
],
online=True,
batch_source=driver_locations_source,
tags={},
)

store.apply([driver_locations_100])

basic_rw_test(store, view_name="driver_locations")
basic_rw_test(store, view_name="driver_locations_100")


@pytest.mark.integration
def test_read_pre_applied() -> None:
"""
Read feature values from the FeatureStore using a FeatureService.
"""
runner = CliRunner()
with runner.local_repo(
get_example_repo("example_feature_repo_1.py"), "bigquery"
) as store:

assert len(store.list_feature_services()) == 1
fs = store.get_feature_service("driver_locations_service")
assert len(fs.tags) == 1
assert fs.tags["release"] == "production"

fv = store.get_feature_view("driver_locations")

fs = FeatureService(name="new_feature_service", features=[fv[["lon"]]])

store.apply([fs])

assert len(store.list_feature_services()) == 2
store.get_feature_service("new_feature_service")
10 changes: 5 additions & 5 deletions sdk/python/tests/integration/feature_repos/repo_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@

SNOWFLAKE_CONFIG = {
"type": "snowflake.online",
"account": os.environ["SNOWFLAKE_CI_DEPLOYMENT"],
"user": os.environ["SNOWFLAKE_CI_USER"],
"password": os.environ["SNOWFLAKE_CI_PASSWORD"],
"role": os.environ["SNOWFLAKE_CI_ROLE"],
"warehouse": os.environ["SNOWFLAKE_CI_WAREHOUSE"],
"account": os.environ.get("SNOWFLAKE_CI_DEPLOYMENT", ""),
"user": os.environ.get("SNOWFLAKE_CI_USER", ""),
"password": os.environ.get("SNOWFLAKE_CI_PASSWORD", ""),
"role": os.environ.get("SNOWFLAKE_CI_ROLE", ""),
"warehouse": os.environ.get("SNOWFLAKE_CI_WAREHOUSE", ""),
"database": "FEAST",
"schema": "ONLINE",
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def test_nullable_online_store(test_nullable_online_store) -> None:
repo_config.write_text(dedent(feature_store_yaml))

repo_example = repo_path / "example.py"
repo_example.write_text(get_example_repo("example_feature_repo_1.py"))
repo_example.write_text(get_example_repo("empty_feature_repo.py"))
result = runner.run(["apply"], cwd=repo_path)
assertpy.assert_that(result.returncode).is_equal_to(0)
finally:
Expand Down
24 changes: 16 additions & 8 deletions sdk/python/tests/unit/cli/test_cli_apply_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,8 @@ def run_simple_apply_test(example_repo_file_name: str, expected_error: bytes):

def test_cli_apply_imported_featureview() -> None:
"""
Test apply feature views with duplicated names and single py file in a feature repo using CLI
Tests that applying a feature view imported from a separate Python file is successful.
"""

with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
runner = CliRunner()
# Construct an example repo in a temporary dir
Expand All @@ -72,8 +71,11 @@ def test_cli_apply_imported_featureview() -> None:
)
)

# Import feature view from an existing file so it exists in two files.
repo_example = repo_path / "example.py"
repo_example.write_text(get_example_repo("example_feature_repo_2.py"))
repo_example.write_text(
get_example_repo("example_feature_repo_with_driver_stats_feature_view.py")
)
repo_example_2 = repo_path / "example_2.py"
repo_example_2.write_text(
"from example import driver_hourly_stats_view\n"
Expand All @@ -92,9 +94,9 @@ def test_cli_apply_imported_featureview() -> None:

def test_cli_apply_imported_featureview_with_duplication() -> None:
"""
Test apply feature views with duplicated names and single py file in a feature repo using CLI
Tests that applying feature views with duplicated names is not possible, even if one of the
duplicated feature views is imported from another file.
"""

with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
runner = CliRunner()
# Construct an example repo in a temporary dir
Expand All @@ -115,8 +117,11 @@ def test_cli_apply_imported_featureview_with_duplication() -> None:
)
)

# Import feature view with duplicated name to try breaking the deduplication logic.
repo_example = repo_path / "example.py"
repo_example.write_text(get_example_repo("example_feature_repo_2.py"))
repo_example.write_text(
get_example_repo("example_feature_repo_with_driver_stats_feature_view.py")
)
repo_example_2 = repo_path / "example_2.py"
repo_example_2.write_text(
"from datetime import timedelta\n"
Expand Down Expand Up @@ -147,7 +152,6 @@ def test_cli_apply_duplicated_featureview_names_multiple_py_files() -> None:
"""
Test apply feature views with duplicated names from multiple py files in a feature repo using CLI
"""

with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
runner = CliRunner()
# Construct an example repo in a temporary dir
Expand All @@ -170,7 +174,11 @@ def test_cli_apply_duplicated_featureview_names_multiple_py_files() -> None:
# Create multiple py files containing the same feature view name
for i in range(3):
repo_example = repo_path / f"example{i}.py"
repo_example.write_text(get_example_repo("example_feature_repo_2.py"))
repo_example.write_text(
get_example_repo(
"example_feature_repo_with_driver_stats_feature_view.py"
)
)
rc, output = runner.run_with_output(["apply"], cwd=repo_path)

assert (
Expand Down
41 changes: 41 additions & 0 deletions sdk/python/tests/unit/local_feast_tests/test_e2e_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@

import pandas as pd

from feast import Entity, FeatureView, Field, FileSource
from feast.driver_test_data import (
create_driver_hourly_stats_df,
create_global_daily_stats_df,
)
from feast.feature_store import FeatureStore
from feast.types import Float32, String
from tests.utils.basic_read_write_test import basic_rw_test
from tests.utils.cli_repo_creator import CliRunner, get_example_repo
from tests.utils.feature_records import validate_online_features

Expand Down Expand Up @@ -120,3 +123,41 @@ def _test_materialize_and_online_retrieval(

assert r.returncode == 0, f"stdout: {r.stdout}\n stderr: {r.stderr}"
validate_online_features(store, driver_df, end_date)


def test_partial() -> None:
"""
Add another table to existing repo using partial apply API. Make sure both the table
applied via CLI apply and the new table are passing RW test.
"""
runner = CliRunner()
with runner.local_repo(
get_example_repo("example_feature_repo_1.py"), "file"
) as store:
driver = Entity(name="driver", join_keys=["test"])

driver_locations_source = FileSource(
path="data/driver_locations.parquet", # Fake path
timestamp_field="event_timestamp",
created_timestamp_column="created_timestamp",
)

driver_locations_100 = FeatureView(
name="driver_locations_100",
entities=[driver],
ttl=timedelta(days=1),
schema=[
Field(name="lat", dtype=Float32),
Field(name="lon", dtype=String),
Field(name="name", dtype=String),
Field(name="test", dtype=String),
],
online=True,
batch_source=driver_locations_source,
tags={},
)

store.apply([driver_locations_100])

basic_rw_test(store, view_name="driver_locations")
basic_rw_test(store, view_name="driver_locations_100")
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from feast.feature_service import FeatureService
from tests.utils.cli_repo_creator import CliRunner, get_example_repo


def test_read_pre_applied() -> None:
"""
Read feature values from the FeatureStore using a FeatureService.
"""
runner = CliRunner()
with runner.local_repo(
get_example_repo("example_feature_repo_with_feature_service.py"), "file"
) as store:
assert len(store.list_feature_services()) == 1
fs = store.get_feature_service("driver_locations_service")
assert len(fs.tags) == 1
assert fs.tags["release"] == "production"

fv = store.get_feature_view("driver_locations")

fs = FeatureService(name="new_feature_service", features=[fv[["lon"]]])

store.apply([fs])

assert len(store.list_feature_services()) == 2
store.get_feature_service("new_feature_service")
Loading

0 comments on commit c611eb8

Please sign in to comment.