diff --git a/Makefile b/Makefile index 41041d7c08..61549d4d0f 100644 --- a/Makefile +++ b/Makefile @@ -74,6 +74,19 @@ test-python-integration-container: test-python-universal-contrib: PYTHONPATH='.' FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.offline_stores.contrib.contrib_repo_configuration FEAST_USAGE=False IS_TEST=True python -m pytest -n 8 --integration --universal sdk/python/tests +test-python-universal-postgres: + PYTHONPATH='.' \ + FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.offline_stores.contrib.postgres_repo_configuration \ + FEAST_USAGE=False \ + IS_TEST=True \ + python -m pytest --integration --universal \ + -k "not test_historical_retrieval_fails_on_validation and \ + not test_historical_retrieval_with_validation and \ + not test_historical_features_persisting and \ + not test_historical_retrieval_fails_on_validation and \ + not test_universal_cli" \ + sdk/python/tests + test-python-universal-local: FEAST_USAGE=False IS_TEST=True FEAST_IS_LOCAL_TEST=True python -m pytest -n 8 --integration --universal sdk/python/tests diff --git a/README.md b/README.md index af4df06175..a65cb9ce27 100644 --- a/README.md +++ b/README.md @@ -146,8 +146,8 @@ The list below contains the functionality that contributors are planning to deve * [x] [Parquet file source](https://docs.feast.dev/reference/data-sources/file) * [x] [Synapse source (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) - * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) - * [x] [Spark (community plugin)](https://docs.feast.dev/reference/data-sources/spark) + * [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/data-sources/postgres) + * [x] [Spark (contrib plugin)](https://docs.feast.dev/reference/data-sources/spark) * [x] Kafka / Kinesis sources (via [push support into the online store](https://docs.feast.dev/reference/data-sources/push)) * [ ] HTTP source * **Offline Stores** @@ -156,9 +156,9 @@ The list below contains the functionality that contributors are planning to deve * [x] [BigQuery](https://docs.feast.dev/reference/offline-stores/bigquery) * [x] [Synapse (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) - * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) - * [x] [Trino (community plugin)](https://github.com/Shopify/feast-trino) - * [x] [Spark (community plugin)](https://docs.feast.dev/reference/offline-stores/spark) + * [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/offline-stores/postgres) + * [x] [Trino (contrib plugin)](https://github.com/Shopify/feast-trino) + * [x] [Spark (contrib plugin)](https://docs.feast.dev/reference/offline-stores/spark) * [x] [In-memory / Pandas](https://docs.feast.dev/reference/offline-stores/file) * [x] [Custom offline store support](https://docs.feast.dev/how-to-guides/adding-a-new-offline-store) * **Online Stores** @@ -167,7 +167,7 @@ The list below contains the functionality that contributors are planning to deve * [x] [Datastore](https://docs.feast.dev/reference/online-stores/datastore) * [x] [SQLite](https://docs.feast.dev/reference/online-stores/sqlite) * [x] [Azure Cache for Redis (community plugin)](https://github.com/Azure/feast-azure) - * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) + * [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/online-stores/postgres) * [x] [Custom online store support](https://docs.feast.dev/how-to-guides/adding-support-for-a-new-online-store) * [ ] Bigtable (in progress) * [ ] Cassandra diff --git a/docs/getting-started/third-party-integrations.md b/docs/getting-started/third-party-integrations.md index 0c233d7b69..ab92668266 100644 --- a/docs/getting-started/third-party-integrations.md +++ b/docs/getting-started/third-party-integrations.md @@ -19,8 +19,8 @@ Don't see your offline store or online store of choice here? Check out our guide * [x] [Parquet file source](https://docs.feast.dev/reference/data-sources/file) * [x] [Synapse source (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) -* [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) -* [x] [Spark (community plugin)](https://docs.feast.dev/reference/data-sources/spark) +* [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/data-sources/postgres) +* [x] [Spark (contrib plugin)](https://docs.feast.dev/reference/data-sources/spark) * [x] Kafka / Kinesis sources (via [push support into the online store](https://docs.feast.dev/reference/data-sources/push)) * [ ] HTTP source @@ -31,9 +31,9 @@ Don't see your offline store or online store of choice here? Check out our guide * [x] [BigQuery](https://docs.feast.dev/reference/offline-stores/bigquery) * [x] [Synapse (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) -* [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) -* [x] [Trino (community plugin)](https://github.com/Shopify/feast-trino) -* [x] [Spark (community plugin)](https://docs.feast.dev/reference/offline-stores/spark) +* [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/offline-stores/postgres) +* [x] [Trino (contrib plugin)](https://github.com/Shopify/feast-trino) +* [x] [Spark (contrib plugin)](https://docs.feast.dev/reference/offline-stores/spark) * [x] [In-memory / Pandas](https://docs.feast.dev/reference/offline-stores/file) * [x] [Custom offline store support](https://docs.feast.dev/how-to-guides/adding-a-new-offline-store) @@ -44,7 +44,7 @@ Don't see your offline store or online store of choice here? Check out our guide * [x] [Datastore](https://docs.feast.dev/reference/online-stores/datastore) * [x] [SQLite](https://docs.feast.dev/reference/online-stores/sqlite) * [x] [Azure Cache for Redis (community plugin)](https://github.com/Azure/feast-azure) -* [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) +* [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/online-stores/postgres) * [x] [Custom online store support](https://docs.feast.dev/how-to-guides/adding-support-for-a-new-online-store) * [ ] Bigtable (in progress) * [ ] Cassandra diff --git a/docs/reference/data-sources/postgres.md b/docs/reference/data-sources/postgres.md new file mode 100644 index 0000000000..759cb50bbd --- /dev/null +++ b/docs/reference/data-sources/postgres.md @@ -0,0 +1,25 @@ +# PostgreSQL + +## Description + +**NOTE**: The Postgres plugin is a contrib plugin. This means it may not be fully stable. + + +The PostgreSQL data source allows for the retrieval of historical feature values from a PostgreSQL database for building training datasets as well as materializing features into an online store. + +## Examples + +Defining a Postgres source + +```python +from feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source import ( + PostgreSQLSource, +) + +driver_stats_source = PostgreSQLSource( + name="feast_driver_hourly_stats", + query="SELECT * FROM feast_driver_hourly_stats", + timestamp_field="event_timestamp", + created_timestamp_column="created", +) +``` diff --git a/docs/reference/data-sources/spark.md b/docs/reference/data-sources/spark.md index 2c1d1ec879..266a401a51 100644 --- a/docs/reference/data-sources/spark.md +++ b/docs/reference/data-sources/spark.md @@ -1,4 +1,4 @@ -# Spark +# Spark (contrib) ## Description diff --git a/docs/reference/offline-stores/postgres.md b/docs/reference/offline-stores/postgres.md new file mode 100644 index 0000000000..b64b2350ca --- /dev/null +++ b/docs/reference/offline-stores/postgres.md @@ -0,0 +1,34 @@ +# PostgreSQL (contrib) + +## Description + +The PostgreSQL offline store is an offline store that provides support for reading [PostgreSQL](../data-sources/postgres.md) data sources. + + +**DISCLAIMER**: This PostgreSQL offline store still does not achieve full test coverage. + +* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. Pandas dataframes will be converted to a Spark dataframe and processed as a temporary view. +* A `SparkRetrievalJob` is returned when calling `get_historical_features()`. + * This allows you to call + * `to_df` to retrieve the pandas dataframe. + * `to_arrow` to retrieve the dataframe as a PyArrow table. + +## Example + +{% code title="feature_store.yaml" %} +```yaml +project: my_project +registry: data/registry.db +provider: local +offline_store: + type: postgres + host: DB_HOST + port: DB_PORT + database: DB_NAME + db_schema: DB_SCHEMA + user: DB_USERNAME + password: DB_PASSWORD +online_store: + path: data/online_store.db +``` +{% endcode %} diff --git a/docs/reference/offline-stores/spark.md b/docs/reference/offline-stores/spark.md index 48ddf46d17..7eec8d7b73 100644 --- a/docs/reference/offline-stores/spark.md +++ b/docs/reference/offline-stores/spark.md @@ -1,4 +1,4 @@ -# Spark +# Spark (contrib) ## Description diff --git a/docs/reference/online-stores/postgres.md b/docs/reference/online-stores/postgres.md new file mode 100644 index 0000000000..917673f278 --- /dev/null +++ b/docs/reference/online-stores/postgres.md @@ -0,0 +1,27 @@ +# PostgreSQL (contrib) + +## Description + +The PostgreSQL online store provides support for materializing feature values into a PostgreSQL database for serving online features. + +* Only the latest feature values are persisted + +## Example + +{% code title="feature_store.yaml" %} +```yaml +project: my_feature_repo +registry: data/registry.db +provider: local +online_store: + type: postgres + host: DB_HOST + port: DB_PORT + database: DB_NAME + db_schema: DB_SCHEMA + user: DB_USERNAME + password: DB_PASSWORD +``` +{% endcode %} + +Configuration options are available [here](https://rtd.feast.dev/en/latest/#feast.repo_config.SqliteOnlineStoreConfig). diff --git a/docs/roadmap.md b/docs/roadmap.md index 3eb181c0da..bfee5b155c 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -14,8 +14,8 @@ The list below contains the functionality that contributors are planning to deve * [x] [Parquet file source](https://docs.feast.dev/reference/data-sources/file) * [x] [Synapse source (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) - * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) - * [x] [Spark (community plugin)](https://docs.feast.dev/reference/data-sources/spark) + * [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/data-sources/postgres) + * [x] [Spark (contrib plugin)](https://docs.feast.dev/reference/data-sources/spark) * [x] Kafka / Kinesis sources (via [push support into the online store](https://docs.feast.dev/reference/data-sources/push)) * [ ] HTTP source * **Offline Stores** @@ -24,9 +24,9 @@ The list below contains the functionality that contributors are planning to deve * [x] [BigQuery](https://docs.feast.dev/reference/offline-stores/bigquery) * [x] [Synapse (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) - * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) - * [x] [Trino (community plugin)](https://github.com/Shopify/feast-trino) - * [x] [Spark (community plugin)](https://docs.feast.dev/reference/offline-stores/spark) + * [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/offline-stores/postgres) + * [x] [Trino (contrib plugin)](https://github.com/Shopify/feast-trino) + * [x] [Spark (contrib plugin)](https://docs.feast.dev/reference/offline-stores/spark) * [x] [In-memory / Pandas](https://docs.feast.dev/reference/offline-stores/file) * [x] [Custom offline store support](https://docs.feast.dev/how-to-guides/adding-a-new-offline-store) * **Online Stores** @@ -35,7 +35,7 @@ The list below contains the functionality that contributors are planning to deve * [x] [Datastore](https://docs.feast.dev/reference/online-stores/datastore) * [x] [SQLite](https://docs.feast.dev/reference/online-stores/sqlite) * [x] [Azure Cache for Redis (community plugin)](https://github.com/Azure/feast-azure) - * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) + * [x] [Postgres (contrib plugin)](https://docs.feast.dev/reference/online-stores/postgres) * [x] [Custom online store support](https://docs.feast.dev/how-to-guides/adding-support-for-a-new-online-store) * [ ] Bigtable (in progress) * [ ] Cassandra diff --git a/sdk/python/docs/index.rst b/sdk/python/docs/index.rst index 52783b40e3..ca746707ea 100644 --- a/sdk/python/docs/index.rst +++ b/sdk/python/docs/index.rst @@ -59,6 +59,12 @@ Trino Source :members: :exclude-members: TrinoOptions +PostgreSQL Source +------------------ + +.. automodule:: feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source + :members: + :exclude-members: PostgreSQLOptions File Source ------------------ @@ -192,6 +198,12 @@ Trino Offline Store .. automodule:: feast.infra.offline_stores.contrib.trino_offline_store.trino :members: +PostgreSQL Offline Store +------------------------ + +.. automodule:: feast.infra.offline_stores.contrib.postgres_offline_store.postgres + :members: + Online Store ================== @@ -223,4 +235,11 @@ Redis Online Store .. automodule:: feast.infra.online_stores.redis :members: - :noindex: \ No newline at end of file + :noindex: + +PostgreSQL Online Store +----------------------- + +.. automodule:: feast.infra.online_stores.contrib.postgres + :members: + :noindex: diff --git a/sdk/python/docs/source/feast.infra.offline_stores.contrib.postgres_offline_store.rst b/sdk/python/docs/source/feast.infra.offline_stores.contrib.postgres_offline_store.rst new file mode 100644 index 0000000000..95afafb5b8 --- /dev/null +++ b/sdk/python/docs/source/feast.infra.offline_stores.contrib.postgres_offline_store.rst @@ -0,0 +1,29 @@ +feast.infra.offline\_stores.contrib.postgres\_offline\_store package +==================================================================== + +Submodules +---------- + +feast.infra.offline\_stores.contrib.postgres\_offline\_store.postgres module +---------------------------------------------------------------------------- + +.. automodule:: feast.infra.offline_stores.contrib.postgres_offline_store.postgres + :members: + :undoc-members: + :show-inheritance: + +feast.infra.offline\_stores.contrib.postgres\_offline\_store.postgres\_source module +------------------------------------------------------------------------------------ + +.. automodule:: feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: feast.infra.offline_stores.contrib.postgres_offline_store + :members: + :undoc-members: + :show-inheritance: diff --git a/sdk/python/docs/source/feast.infra.offline_stores.contrib.rst b/sdk/python/docs/source/feast.infra.offline_stores.contrib.rst index 4fabad6844..39902da130 100644 --- a/sdk/python/docs/source/feast.infra.offline_stores.contrib.rst +++ b/sdk/python/docs/source/feast.infra.offline_stores.contrib.rst @@ -7,6 +7,7 @@ Subpackages .. toctree:: :maxdepth: 4 + feast.infra.offline_stores.contrib.postgres_offline_store feast.infra.offline_stores.contrib.spark_offline_store feast.infra.offline_stores.contrib.trino_offline_store @@ -21,6 +22,14 @@ feast.infra.offline\_stores.contrib.contrib\_repo\_configuration module :undoc-members: :show-inheritance: +feast.infra.offline\_stores.contrib.postgres\_repo\_configuration module +------------------------------------------------------------------------ + +.. automodule:: feast.infra.offline_stores.contrib.postgres_repo_configuration + :members: + :undoc-members: + :show-inheritance: + Module contents --------------- diff --git a/sdk/python/docs/source/feast.infra.online_stores.contrib.rst b/sdk/python/docs/source/feast.infra.online_stores.contrib.rst new file mode 100644 index 0000000000..6ee9b1a532 --- /dev/null +++ b/sdk/python/docs/source/feast.infra.online_stores.contrib.rst @@ -0,0 +1,21 @@ +feast.infra.online\_stores.contrib package +========================================== + +Submodules +---------- + +feast.infra.online\_stores.contrib.postgres module +-------------------------------------------------- + +.. automodule:: feast.infra.online_stores.contrib.postgres + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: feast.infra.online_stores.contrib + :members: + :undoc-members: + :show-inheritance: diff --git a/sdk/python/docs/source/feast.infra.online_stores.rst b/sdk/python/docs/source/feast.infra.online_stores.rst index 5c23796bf8..842522c9d7 100644 --- a/sdk/python/docs/source/feast.infra.online_stores.rst +++ b/sdk/python/docs/source/feast.infra.online_stores.rst @@ -1,6 +1,14 @@ feast.infra.online\_stores package ================================== +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + feast.infra.online_stores.contrib + Submodules ---------- diff --git a/sdk/python/docs/source/feast.infra.utils.postgres.rst b/sdk/python/docs/source/feast.infra.utils.postgres.rst new file mode 100644 index 0000000000..119c8c1dee --- /dev/null +++ b/sdk/python/docs/source/feast.infra.utils.postgres.rst @@ -0,0 +1,29 @@ +feast.infra.utils.postgres package +================================== + +Submodules +---------- + +feast.infra.utils.postgres.connection\_utils module +--------------------------------------------------- + +.. automodule:: feast.infra.utils.postgres.connection_utils + :members: + :undoc-members: + :show-inheritance: + +feast.infra.utils.postgres.postgres\_config module +-------------------------------------------------- + +.. automodule:: feast.infra.utils.postgres.postgres_config + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: feast.infra.utils.postgres + :members: + :undoc-members: + :show-inheritance: diff --git a/sdk/python/docs/source/feast.infra.utils.rst b/sdk/python/docs/source/feast.infra.utils.rst index 9655acc206..30ec6166bf 100644 --- a/sdk/python/docs/source/feast.infra.utils.rst +++ b/sdk/python/docs/source/feast.infra.utils.rst @@ -1,6 +1,14 @@ feast.infra.utils package ========================= +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + feast.infra.utils.postgres + Submodules ---------- diff --git a/sdk/python/docs/source/feast.rst b/sdk/python/docs/source/feast.rst index 35220913be..fb1e77a373 100644 --- a/sdk/python/docs/source/feast.rst +++ b/sdk/python/docs/source/feast.rst @@ -24,6 +24,14 @@ feast.base\_feature\_view module :undoc-members: :show-inheritance: +feast.batch\_feature\_view module +--------------------------------- + +.. automodule:: feast.batch_feature_view + :members: + :undoc-members: + :show-inheritance: + feast.cli module ---------------- @@ -264,6 +272,14 @@ feast.saved\_dataset module :undoc-members: :show-inheritance: +feast.stream\_feature\_view module +---------------------------------- + +.. automodule:: feast.stream_feature_view + :members: + :undoc-members: + :show-inheritance: + feast.transformation\_server module ----------------------------------- diff --git a/sdk/python/docs/source/index.rst b/sdk/python/docs/source/index.rst index 52783b40e3..ca746707ea 100644 --- a/sdk/python/docs/source/index.rst +++ b/sdk/python/docs/source/index.rst @@ -59,6 +59,12 @@ Trino Source :members: :exclude-members: TrinoOptions +PostgreSQL Source +------------------ + +.. automodule:: feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source + :members: + :exclude-members: PostgreSQLOptions File Source ------------------ @@ -192,6 +198,12 @@ Trino Offline Store .. automodule:: feast.infra.offline_stores.contrib.trino_offline_store.trino :members: +PostgreSQL Offline Store +------------------------ + +.. automodule:: feast.infra.offline_stores.contrib.postgres_offline_store.postgres + :members: + Online Store ================== @@ -223,4 +235,11 @@ Redis Online Store .. automodule:: feast.infra.online_stores.redis :members: - :noindex: \ No newline at end of file + :noindex: + +PostgreSQL Online Store +----------------------- + +.. automodule:: feast.infra.online_stores.contrib.postgres + :members: + :noindex: diff --git a/sdk/python/docs/source/modules.rst b/sdk/python/docs/source/modules.rst new file mode 100644 index 0000000000..3a6f8333ab --- /dev/null +++ b/sdk/python/docs/source/modules.rst @@ -0,0 +1,7 @@ +feast +===== + +.. toctree:: + :maxdepth: 4 + + feast diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py index 80cd1844b6..ec5b6cd1b6 100644 --- a/sdk/python/feast/cli.py +++ b/sdk/python/feast/cli.py @@ -539,7 +539,7 @@ def materialize_incremental_command(ctx: click.Context, end_ts: str, views: List "--template", "-t", type=click.Choice( - ["local", "gcp", "aws", "snowflake", "spark"], case_sensitive=False + ["local", "gcp", "aws", "snowflake", "spark", "postgres"], case_sensitive=False ), help="Specify a template for the created project", default="local", diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/__init__.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py new file mode 100644 index 0000000000..5e99addcb4 --- /dev/null +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py @@ -0,0 +1,541 @@ +import contextlib +from dataclasses import asdict +from datetime import datetime +from typing import ( + Any, + Callable, + ContextManager, + Iterator, + KeysView, + List, + Optional, + Tuple, + Union, +) + +import pandas as pd +import pyarrow as pa +from jinja2 import BaseLoader, Environment +from psycopg2 import sql +from pydantic.typing import Literal +from pytz import utc + +from feast.data_source import DataSource +from feast.errors import InvalidEntityType +from feast.feature_view import DUMMY_ENTITY_ID, DUMMY_ENTITY_VAL, FeatureView +from feast.infra.offline_stores import offline_utils +from feast.infra.offline_stores.offline_store import ( + OfflineStore, + RetrievalJob, + RetrievalMetadata, +) +from feast.infra.utils.postgres.connection_utils import ( + _get_conn, + df_to_postgres_table, + get_query_schema, +) +from feast.infra.utils.postgres.postgres_config import PostgreSQLConfig +from feast.on_demand_feature_view import OnDemandFeatureView +from feast.registry import Registry +from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDatasetStorage +from feast.type_map import pg_type_code_to_arrow + +from .postgres_source import PostgreSQLSource + + +class PostgreSQLOfflineStoreConfig(PostgreSQLConfig): + type: Literal["postgres"] = "postgres" + + +class PostgreSQLOfflineStore(OfflineStore): + @staticmethod + def pull_latest_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + event_timestamp_column: str, + created_timestamp_column: Optional[str], + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + assert isinstance(data_source, PostgreSQLSource) + from_expression = data_source.get_table_query_string() + + partition_by_join_key_string = ", ".join(_append_alias(join_key_columns, "a")) + if partition_by_join_key_string != "": + partition_by_join_key_string = ( + "PARTITION BY " + partition_by_join_key_string + ) + timestamps = [event_timestamp_column] + if created_timestamp_column: + timestamps.append(created_timestamp_column) + timestamp_desc_string = " DESC, ".join(_append_alias(timestamps, "a")) + " DESC" + a_field_string = ", ".join( + _append_alias(join_key_columns + feature_name_columns + timestamps, "a") + ) + b_field_string = ", ".join( + _append_alias(join_key_columns + feature_name_columns + timestamps, "b") + ) + + query = f""" + SELECT + {b_field_string} + {f", {repr(DUMMY_ENTITY_VAL)} AS {DUMMY_ENTITY_ID}" if not join_key_columns else ""} + FROM ( + SELECT {a_field_string}, + ROW_NUMBER() OVER({partition_by_join_key_string} ORDER BY {timestamp_desc_string}) AS _feast_row + FROM ({from_expression}) a + WHERE a."{event_timestamp_column}" BETWEEN '{start_date}'::timestamptz AND '{end_date}'::timestamptz + ) b + WHERE _feast_row = 1 + """ + + return PostgreSQLRetrievalJob( + query=query, + config=config, + full_feature_names=False, + on_demand_feature_views=None, + ) + + @staticmethod + def get_historical_features( + config: RepoConfig, + feature_views: List[FeatureView], + feature_refs: List[str], + entity_df: Union[pd.DataFrame, str], + registry: Registry, + project: str, + full_feature_names: bool = False, + ) -> RetrievalJob: + @contextlib.contextmanager + def query_generator() -> Iterator[str]: + table_name = None + if isinstance(entity_df, pd.DataFrame): + table_name = offline_utils.get_temp_entity_table_name() + entity_schema = df_to_postgres_table( + config.offline_store, entity_df, table_name + ) + df_query = table_name + elif isinstance(entity_df, str): + df_query = f"({entity_df}) AS sub" + entity_schema = get_query_schema(config.offline_store, df_query) + else: + raise TypeError(entity_df) + + entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( + entity_schema + ) + + expected_join_keys = offline_utils.get_expected_join_keys( + project, feature_views, registry + ) + + offline_utils.assert_expected_columns_in_entity_df( + entity_schema, expected_join_keys, entity_df_event_timestamp_col + ) + + entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( + entity_df, entity_df_event_timestamp_col, config, df_query, + ) + + query_context = offline_utils.get_feature_view_query_context( + feature_refs, + feature_views, + registry, + project, + entity_df_event_timestamp_range, + ) + + query_context_dict = [asdict(context) for context in query_context] + # Hack for query_context.entity_selections to support uppercase in columns + for context in query_context_dict: + context["entity_selections"] = [ + f'''"{entity_selection.replace(' AS ', '" AS "')}\"''' + for entity_selection in context["entity_selections"] + ] + + try: + yield build_point_in_time_query( + query_context_dict, + left_table_query_string=df_query, + entity_df_event_timestamp_col=entity_df_event_timestamp_col, + entity_df_columns=entity_schema.keys(), + query_template=MULTIPLE_FEATURE_VIEW_POINT_IN_TIME_JOIN, + full_feature_names=full_feature_names, + ) + finally: + if table_name: + with _get_conn(config.offline_store) as conn, conn.cursor() as cur: + cur.execute( + sql.SQL( + """ + DROP TABLE IF EXISTS {}; + """ + ).format(sql.Identifier(table_name)), + ) + + return PostgreSQLRetrievalJob( + query=query_generator, + config=config, + full_feature_names=full_feature_names, + on_demand_feature_views=OnDemandFeatureView.get_requested_odfvs( + feature_refs, project, registry + ), + ) + + @staticmethod + def pull_all_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + event_timestamp_column: str, + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + assert isinstance(data_source, PostgreSQLSource) + from_expression = data_source.get_table_query_string() + + field_string = ", ".join( + join_key_columns + feature_name_columns + [event_timestamp_column] + ) + + start_date = start_date.astimezone(tz=utc) + end_date = end_date.astimezone(tz=utc) + + query = f""" + SELECT {field_string} + FROM {from_expression} + WHERE "{event_timestamp_column}" BETWEEN '{start_date}'::timestamptz AND '{end_date}'::timestamptz + """ + + return PostgreSQLRetrievalJob( + query=query, + config=config, + full_feature_names=False, + on_demand_feature_views=None, + ) + + +class PostgreSQLRetrievalJob(RetrievalJob): + def __init__( + self, + query: Union[str, Callable[[], ContextManager[str]]], + config: RepoConfig, + full_feature_names: bool, + on_demand_feature_views: Optional[List[OnDemandFeatureView]], + metadata: Optional[RetrievalMetadata] = None, + ): + if not isinstance(query, str): + self._query_generator = query + else: + + @contextlib.contextmanager + def query_generator() -> Iterator[str]: + assert isinstance(query, str) + yield query + + self._query_generator = query_generator + self.config = config + self._full_feature_names = full_feature_names + self._on_demand_feature_views = on_demand_feature_views + self._metadata = metadata + + @property + def full_feature_names(self) -> bool: + return self._full_feature_names + + @property + def on_demand_feature_views(self) -> Optional[List[OnDemandFeatureView]]: + return self._on_demand_feature_views + + def _to_df_internal(self) -> pd.DataFrame: + # We use arrow format because it gives better control of the table schema + return self._to_arrow_internal().to_pandas() + + def to_sql(self) -> str: + with self._query_generator() as query: + return query + + def _to_arrow_internal(self) -> pa.Table: + with self._query_generator() as query: + with _get_conn(self.config.offline_store) as conn, conn.cursor() as cur: + conn.set_session(readonly=True) + cur.execute(query) + fields = [ + (c.name, pg_type_code_to_arrow(c.type_code)) + for c in cur.description + ] + data = cur.fetchall() + schema = pa.schema(fields) + # TODO: Fix... + data_transposed: List[List[Any]] = [] + for col in range(len(fields)): + data_transposed.append([]) + for row in range(len(data)): + data_transposed[col].append(data[row][col]) + + table = pa.Table.from_arrays( + [pa.array(row) for row in data_transposed], schema=schema + ) + return table + + @property + def metadata(self) -> Optional[RetrievalMetadata]: + return self._metadata + + def persist(self, storage: SavedDatasetStorage): + pass + + +def _get_entity_df_event_timestamp_range( + entity_df: Union[pd.DataFrame, str], + entity_df_event_timestamp_col: str, + config: RepoConfig, + table_name: str, +) -> Tuple[datetime, datetime]: + if isinstance(entity_df, pd.DataFrame): + entity_df_event_timestamp = entity_df.loc[ + :, entity_df_event_timestamp_col + ].infer_objects() + if pd.api.types.is_string_dtype(entity_df_event_timestamp): + entity_df_event_timestamp = pd.to_datetime( + entity_df_event_timestamp, utc=True + ) + entity_df_event_timestamp_range = ( + entity_df_event_timestamp.min(), + entity_df_event_timestamp.max(), + ) + elif isinstance(entity_df, str): + # If the entity_df is a string (SQL query), determine range + # from table + with _get_conn(config.offline_store) as conn, conn.cursor() as cur: + cur.execute( + f"SELECT MIN({entity_df_event_timestamp_col}) AS min, MAX({entity_df_event_timestamp_col}) AS max FROM {table_name}" + ), + res = cur.fetchone() + entity_df_event_timestamp_range = (res[0], res[1]) + else: + raise InvalidEntityType(type(entity_df)) + + return entity_df_event_timestamp_range + + +def _append_alias(field_names: List[str], alias: str) -> List[str]: + return [f'{alias}."{field_name}"' for field_name in field_names] + + +def build_point_in_time_query( + feature_view_query_contexts: List[dict], + left_table_query_string: str, + entity_df_event_timestamp_col: str, + entity_df_columns: KeysView[str], + query_template: str, + full_feature_names: bool = False, +) -> str: + """Build point-in-time query between each feature view table and the entity dataframe for PostgreSQL""" + template = Environment(loader=BaseLoader()).from_string(source=query_template) + + final_output_feature_names = list(entity_df_columns) + final_output_feature_names.extend( + [ + ( + f'{fv["name"]}__{fv["field_mapping"].get(feature, feature)}' + if full_feature_names + else fv["field_mapping"].get(feature, feature) + ) + for fv in feature_view_query_contexts + for feature in fv["features"] + ] + ) + + # Add additional fields to dict + template_context = { + "left_table_query_string": left_table_query_string, + "entity_df_event_timestamp_col": entity_df_event_timestamp_col, + "unique_entity_keys": set( + [entity for fv in feature_view_query_contexts for entity in fv["entities"]] + ), + "featureviews": feature_view_query_contexts, + "full_feature_names": full_feature_names, + "final_output_feature_names": final_output_feature_names, + } + + query = template.render(template_context) + return query + + +# Copied from the Feast Redshift offline store implementation +# Note: Keep this in sync with sdk/python/feast/infra/offline_stores/redshift.py: +# MULTIPLE_FEATURE_VIEW_POINT_IN_TIME_JOIN +# https://github.com/feast-dev/feast/blob/master/sdk/python/feast/infra/offline_stores/redshift.py + +MULTIPLE_FEATURE_VIEW_POINT_IN_TIME_JOIN = """ +/* + Compute a deterministic hash for the `left_table_query_string` that will be used throughout + all the logic as the field to GROUP BY the data +*/ +WITH entity_dataframe AS ( + SELECT *, + {{entity_df_event_timestamp_col}} AS entity_timestamp + {% for featureview in featureviews %} + {% if featureview.entities %} + ,( + {% for entity in featureview.entities %} + CAST("{{entity}}" as VARCHAR) || + {% endfor %} + CAST("{{entity_df_event_timestamp_col}}" AS VARCHAR) + ) AS "{{featureview.name}}__entity_row_unique_id" + {% else %} + ,CAST("{{entity_df_event_timestamp_col}}" AS VARCHAR) AS "{{featureview.name}}__entity_row_unique_id" + {% endif %} + {% endfor %} + FROM {{ left_table_query_string }} +), + +{% for featureview in featureviews %} + +"{{ featureview.name }}__entity_dataframe" AS ( + SELECT + {% if featureview.entities %}"{{ featureview.entities | join('", "') }}",{% endif %} + entity_timestamp, + "{{featureview.name}}__entity_row_unique_id" + FROM entity_dataframe + GROUP BY + {% if featureview.entities %}"{{ featureview.entities | join('", "')}}",{% endif %} + entity_timestamp, + "{{featureview.name}}__entity_row_unique_id" +), + +/* + This query template performs the point-in-time correctness join for a single feature set table + to the provided entity table. + + 1. We first join the current feature_view to the entity dataframe that has been passed. + This JOIN has the following logic: + - For each row of the entity dataframe, only keep the rows where the `event_timestamp_column` + is less than the one provided in the entity dataframe + - If there a TTL for the current feature_view, also keep the rows where the `event_timestamp_column` + is higher the the one provided minus the TTL + - For each row, Join on the entity key and retrieve the `entity_row_unique_id` that has been + computed previously + + The output of this CTE will contain all the necessary information and already filtered out most + of the data that is not relevant. +*/ + +"{{ featureview.name }}__subquery" AS ( + SELECT + "{{ featureview.event_timestamp_column }}" as event_timestamp, + {{ '"' ~ featureview.created_timestamp_column ~ '" as created_timestamp,' if featureview.created_timestamp_column else '' }} + {{ featureview.entity_selections | join(', ')}}{% if featureview.entity_selections %},{% else %}{% endif %} + {% for feature in featureview.features %} + "{{ feature }}" as {% if full_feature_names %}"{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}"{% else %}"{{ featureview.field_mapping.get(feature, feature) }}"{% endif %}{% if loop.last %}{% else %}, {% endif %} + {% endfor %} + FROM {{ featureview.table_subquery }} AS sub + WHERE "{{ featureview.event_timestamp_column }}" <= (SELECT MAX(entity_timestamp) FROM entity_dataframe) + {% if featureview.ttl == 0 %}{% else %} + AND "{{ featureview.event_timestamp_column }}" >= (SELECT MIN(entity_timestamp) FROM entity_dataframe) - {{ featureview.ttl }} * interval '1' second + {% endif %} +), + +"{{ featureview.name }}__base" AS ( + SELECT + subquery.*, + entity_dataframe.entity_timestamp, + entity_dataframe."{{featureview.name}}__entity_row_unique_id" + FROM "{{ featureview.name }}__subquery" AS subquery + INNER JOIN "{{ featureview.name }}__entity_dataframe" AS entity_dataframe + ON TRUE + AND subquery.event_timestamp <= entity_dataframe.entity_timestamp + + {% if featureview.ttl == 0 %}{% else %} + AND subquery.event_timestamp >= entity_dataframe.entity_timestamp - {{ featureview.ttl }} * interval '1' second + {% endif %} + + {% for entity in featureview.entities %} + AND subquery."{{ entity }}" = entity_dataframe."{{ entity }}" + {% endfor %} +), + +/* + 2. If the `created_timestamp_column` has been set, we need to + deduplicate the data first. This is done by calculating the + `MAX(created_at_timestamp)` for each event_timestamp. + We then join the data on the next CTE +*/ +{% if featureview.created_timestamp_column %} +"{{ featureview.name }}__dedup" AS ( + SELECT + "{{featureview.name}}__entity_row_unique_id", + event_timestamp, + MAX(created_timestamp) as created_timestamp + FROM "{{ featureview.name }}__base" + GROUP BY "{{featureview.name}}__entity_row_unique_id", event_timestamp +), +{% endif %} + +/* + 3. The data has been filtered during the first CTE "*__base" + Thus we only need to compute the latest timestamp of each feature. +*/ +"{{ featureview.name }}__latest" AS ( + SELECT + event_timestamp, + {% if featureview.created_timestamp_column %}created_timestamp,{% endif %} + "{{featureview.name}}__entity_row_unique_id" + FROM + ( + SELECT *, + ROW_NUMBER() OVER( + PARTITION BY "{{featureview.name}}__entity_row_unique_id" + ORDER BY event_timestamp DESC{% if featureview.created_timestamp_column %},created_timestamp DESC{% endif %} + ) AS row_number + FROM "{{ featureview.name }}__base" + {% if featureview.created_timestamp_column %} + INNER JOIN "{{ featureview.name }}__dedup" + USING ("{{featureview.name}}__entity_row_unique_id", event_timestamp, created_timestamp) + {% endif %} + ) AS sub + WHERE row_number = 1 +), + +/* + 4. Once we know the latest value of each feature for a given timestamp, + we can join again the data back to the original "base" dataset +*/ +"{{ featureview.name }}__cleaned" AS ( + SELECT base.* + FROM "{{ featureview.name }}__base" as base + INNER JOIN "{{ featureview.name }}__latest" + USING( + "{{featureview.name}}__entity_row_unique_id", + event_timestamp + {% if featureview.created_timestamp_column %} + ,created_timestamp + {% endif %} + ) +){% if loop.last %}{% else %}, {% endif %} + + +{% endfor %} +/* + Joins the outputs of multiple time travel joins to a single table. + The entity_dataframe dataset being our source of truth here. + */ + +SELECT "{{ final_output_feature_names | join('", "')}}" +FROM entity_dataframe +{% for featureview in featureviews %} +LEFT JOIN ( + SELECT + "{{featureview.name}}__entity_row_unique_id" + {% for feature in featureview.features %} + ,"{% if full_feature_names %}{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}{% else %}{{ featureview.field_mapping.get(feature, feature) }}{% endif %}" + {% endfor %} + FROM "{{ featureview.name }}__cleaned" +) AS "{{featureview.name}}" USING ("{{featureview.name}}__entity_row_unique_id") +{% endfor %} +""" diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres_source.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres_source.py new file mode 100644 index 0000000000..c76bd7d2f9 --- /dev/null +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres_source.py @@ -0,0 +1,117 @@ +import json +from typing import Callable, Dict, Iterable, Optional, Tuple + +from feast.data_source import DataSource +from feast.infra.utils.postgres.connection_utils import _get_conn +from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto +from feast.repo_config import RepoConfig +from feast.type_map import pg_type_code_to_pg_type, pg_type_to_feast_value_type +from feast.value_type import ValueType + + +class PostgreSQLSource(DataSource): + def __init__( + self, + name: str, + query: str, + timestamp_field: Optional[str] = "", + created_timestamp_column: Optional[str] = "", + field_mapping: Optional[Dict[str, str]] = None, + date_partition_column: Optional[str] = "", + ): + self._postgres_options = PostgreSQLOptions(name=name, query=query) + + super().__init__( + name=name, + timestamp_field=timestamp_field, + created_timestamp_column=created_timestamp_column, + field_mapping=field_mapping, + date_partition_column=date_partition_column, + ) + + def __hash__(self): + return super().__hash__() + + def __eq__(self, other): + if not isinstance(other, PostgreSQLSource): + raise TypeError( + "Comparisons should only involve PostgreSQLSource class objects." + ) + + return ( + self._postgres_options._query == other._postgres_options._query + and self.timestamp_field == other.timestamp_field + and self.created_timestamp_column == other.created_timestamp_column + and self.field_mapping == other.field_mapping + ) + + @staticmethod + def from_proto(data_source: DataSourceProto): + assert data_source.HasField("custom_options") + + postgres_options = json.loads(data_source.custom_options.configuration) + return PostgreSQLSource( + name=postgres_options["name"], + query=postgres_options["query"], + field_mapping=dict(data_source.field_mapping), + timestamp_field=data_source.timestamp_field, + created_timestamp_column=data_source.created_timestamp_column, + date_partition_column=data_source.date_partition_column, + ) + + def to_proto(self) -> DataSourceProto: + data_source_proto = DataSourceProto( + type=DataSourceProto.CUSTOM_SOURCE, + data_source_class_type="feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source.PostgreSQLSource", + field_mapping=self.field_mapping, + custom_options=self._postgres_options.to_proto(), + ) + + data_source_proto.timestamp_field = self.timestamp_field + data_source_proto.created_timestamp_column = self.created_timestamp_column + data_source_proto.date_partition_column = self.date_partition_column + + return data_source_proto + + def validate(self, config: RepoConfig): + pass + + @staticmethod + def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]: + return pg_type_to_feast_value_type + + def get_table_column_names_and_types( + self, config: RepoConfig + ) -> Iterable[Tuple[str, str]]: + with _get_conn(config.offline_store) as conn, conn.cursor() as cur: + cur.execute( + f"SELECT * FROM ({self.get_table_query_string()}) AS sub LIMIT 0" + ) + return ( + (c.name, pg_type_code_to_pg_type(c.type_code)) for c in cur.description + ) + + def get_table_query_string(self) -> str: + return f"({self._postgres_options._query})" + + +class PostgreSQLOptions: + def __init__(self, name: str, query: Optional[str]): + self._name = name + self._query = query + + @classmethod + def from_proto(cls, postgres_options_proto: DataSourceProto.CustomSourceOptions): + config = json.loads(postgres_options_proto.configuration.decode("utf8")) + postgres_options = cls(name=config["name"], query=config["query"]) + + return postgres_options + + def to_proto(self) -> DataSourceProto.CustomSourceOptions: + postgres_options_proto = DataSourceProto.CustomSourceOptions( + configuration=json.dumps( + {"name": self._name, "query": self._query} + ).encode() + ) + + return postgres_options_proto diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_repo_configuration.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_repo_configuration.py new file mode 100644 index 0000000000..91ab686895 --- /dev/null +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_repo_configuration.py @@ -0,0 +1,24 @@ +from tests.integration.feature_repos.integration_test_repo_config import ( + IntegrationTestRepoConfig, +) +from tests.integration.feature_repos.universal.data_sources.postgres import ( + PostgreSQLDataSourceCreator, +) + +POSTGRES_ONLINE_CONFIG = { + "type": "postgres", + "host": "localhost", + "port": "5432", + "database": "postgres", + "db_schema": "feature_store", + "user": "postgres", + "password": "docker", +} + +FULL_REPO_CONFIGS = [ + IntegrationTestRepoConfig( + provider="local", + offline_store_creator=PostgreSQLDataSourceCreator, + online_store=POSTGRES_ONLINE_CONFIG, + ), +] diff --git a/sdk/python/feast/infra/online_stores/contrib/__init__.py b/sdk/python/feast/infra/online_stores/contrib/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/feast/infra/online_stores/contrib/postgres.py b/sdk/python/feast/infra/online_stores/contrib/postgres.py new file mode 100644 index 0000000000..65b904c97b --- /dev/null +++ b/sdk/python/feast/infra/online_stores/contrib/postgres.py @@ -0,0 +1,234 @@ +import logging +from collections import defaultdict +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple + +import psycopg2 +import pytz +from psycopg2 import sql +from psycopg2.extras import execute_values +from pydantic.schema import Literal + +from feast import Entity +from feast.feature_view import FeatureView +from feast.infra.key_encoding_utils import serialize_entity_key +from feast.infra.online_stores.online_store import OnlineStore +from feast.infra.utils.postgres.connection_utils import _get_conn +from feast.infra.utils.postgres.postgres_config import PostgreSQLConfig +from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import Value as ValueProto +from feast.repo_config import RepoConfig + + +class PostgreSQLOnlineStoreConfig(PostgreSQLConfig): + type: Literal["postgres"] = "postgres" + + +class PostgreSQLOnlineStore(OnlineStore): + _conn: Optional[psycopg2._psycopg.connection] = None + + def _get_conn(self, config: RepoConfig): + if not self._conn: + assert config.online_store.type == "postgres" + self._conn = _get_conn(config.online_store) + return self._conn + + def online_write_batch( + self, + config: RepoConfig, + table: FeatureView, + data: List[ + Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] + ], + progress: Optional[Callable[[int], Any]], + ) -> None: + project = config.project + + with self._get_conn(config) as conn, conn.cursor() as cur: + insert_values = [] + for entity_key, values, timestamp, created_ts in data: + entity_key_bin = serialize_entity_key(entity_key) + timestamp = _to_naive_utc(timestamp) + if created_ts is not None: + created_ts = _to_naive_utc(created_ts) + + for feature_name, val in values.items(): + insert_values.append( + ( + entity_key_bin, + feature_name, + val.SerializeToString(), + timestamp, + created_ts, + ) + ) + # Controll the batch so that we can update the progress + batch_size = 5000 + for i in range(0, len(insert_values), batch_size): + cur_batch = insert_values[i : i + batch_size] + execute_values( + cur, + sql.SQL( + """ + INSERT INTO {} + (entity_key, feature_name, value, event_ts, created_ts) + VALUES %s + ON CONFLICT (entity_key, feature_name) DO + UPDATE SET + value = EXCLUDED.value, + event_ts = EXCLUDED.event_ts, + created_ts = EXCLUDED.created_ts; + """, + ).format(sql.Identifier(_table_id(project, table))), + cur_batch, + page_size=batch_size, + ) + if progress: + progress(len(cur_batch)) + + def online_read( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] + + project = config.project + with self._get_conn(config) as conn, conn.cursor() as cur: + # Collecting all the keys to a list allows us to make fewer round trips + # to PostgreSQL + keys = [] + for entity_key in entity_keys: + keys.append(serialize_entity_key(entity_key)) + + cur.execute( + sql.SQL( + """ + SELECT entity_key, feature_name, value, event_ts + FROM {} WHERE entity_key = ANY(%s); + """ + ).format(sql.Identifier(_table_id(project, table)),), + (keys,), + ) + + rows = cur.fetchall() + + # Since we don't know the order returned from PostgreSQL we'll need + # to construct a dict to be able to quickly look up the correct row + # when we iterate through the keys since they are in the correct order + values_dict = defaultdict(list) + for row in rows if rows is not None else []: + values_dict[row[0].tobytes()].append(row[1:]) + + for key in keys: + if key in values_dict: + value = values_dict[key] + res = {} + for feature_name, value_bin, event_ts in value: + val = ValueProto() + val.ParseFromString(value_bin) + res[feature_name] = val + result.append((event_ts, res)) + else: + result.append((None, None)) + + return result + + def update( + self, + config: RepoConfig, + tables_to_delete: Sequence[FeatureView], + tables_to_keep: Sequence[FeatureView], + entities_to_delete: Sequence[Entity], + entities_to_keep: Sequence[Entity], + partial: bool, + ): + project = config.project + schema_name = config.online_store.db_schema or config.online_store.user + with self._get_conn(config) as conn, conn.cursor() as cur: + # If a db_schema is provided, then that schema gets created if it doesn't + # exist. Else a schema is created for the feature store user. + + cur.execute( + """ + SELECT schema_name + FROM information_schema.schemata + WHERE schema_name = %s + """, + (schema_name,), + ) + schema_exists = cur.fetchone() + if not schema_exists: + cur.execute( + sql.SQL("CREATE SCHEMA IF NOT EXISTS {} AUTHORIZATION {}").format( + sql.Identifier(schema_name), + sql.Identifier(config.online_store.user), + ), + ) + + for table in tables_to_delete: + table_name = _table_id(project, table) + cur.execute(_drop_table_and_index(table_name)) + + for table in tables_to_keep: + table_name = _table_id(project, table) + cur.execute( + sql.SQL( + """ + CREATE TABLE IF NOT EXISTS {} + ( + entity_key BYTEA, + feature_name TEXT, + value BYTEA, + event_ts TIMESTAMPTZ, + created_ts TIMESTAMPTZ, + PRIMARY KEY(entity_key, feature_name) + ); + CREATE INDEX IF NOT EXISTS {} ON {} (entity_key); + """ + ).format( + sql.Identifier(table_name), + sql.Identifier(f"{table_name}_ek"), + sql.Identifier(table_name), + ) + ) + + conn.commit() + + def teardown( + self, + config: RepoConfig, + tables: Sequence[FeatureView], + entities: Sequence[Entity], + ): + project = config.project + try: + with self._get_conn(config) as conn, conn.cursor() as cur: + for table in tables: + table_name = _table_id(project, table) + cur.execute(_drop_table_and_index(table_name)) + except Exception: + logging.exception("Teardown failed") + raise + + +def _table_id(project: str, table: FeatureView) -> str: + return f"{project}_{table.name}" + + +def _drop_table_and_index(table_name): + return sql.SQL( + """ + DROP TABLE IF EXISTS {}; + DROP INDEX IF EXISTS {}; + """ + ).format(sql.Identifier(table_name), sql.Identifier(f"{table_name}_ek"),) + + +def _to_naive_utc(ts: datetime): + if ts.tzinfo is None: + return ts + else: + return ts.astimezone(pytz.utc).replace(tzinfo=None) diff --git a/sdk/python/feast/infra/registry_stores/contrib/postgres/registry_store.py b/sdk/python/feast/infra/registry_stores/contrib/postgres/registry_store.py new file mode 100644 index 0000000000..6b0ae1910d --- /dev/null +++ b/sdk/python/feast/infra/registry_stores/contrib/postgres/registry_store.py @@ -0,0 +1,112 @@ +import psycopg2 +from psycopg2 import sql + +from feast.infra.utils.postgres.connection_utils import _get_conn +from feast.infra.utils.postgres.postgres_config import PostgreSQLConfig +from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto +from feast.registry_store import RegistryStore +from feast.repo_config import RegistryConfig + + +class PostgresRegistryConfig(RegistryConfig): + host: str + port: int + database: str + db_schema: str + user: str + password: str + + +class PostgreSQLRegistryStore(RegistryStore): + def __init__(self, config: PostgresRegistryConfig, registry_path: str): + self.db_config = PostgreSQLConfig( + host=config.host, + port=config.port, + database=config.database, + db_schema=config.db_schema, + user=config.user, + password=config.password, + ) + self.table_name = config.path + self.cache_ttl_seconds = config.cache_ttl_seconds + + def get_registry_proto(self) -> RegistryProto: + registry_proto = RegistryProto() + try: + with _get_conn(self.db_config) as conn, conn.cursor() as cur: + cur.execute( + sql.SQL( + """ + SELECT registry + FROM {} + WHERE version = (SELECT max(version) FROM {}) + """ + ).format( + sql.Identifier(self.table_name), + sql.Identifier(self.table_name), + ) + ) + row = cur.fetchone() + if row: + registry_proto = registry_proto.FromString(row[0]) + except psycopg2.errors.UndefinedTable: + pass + return registry_proto + + def update_registry_proto(self, registry_proto: RegistryProto): + """ + Overwrites the current registry proto with the proto passed in. This method + writes to the registry path. + + Args: + registry_proto: the new RegistryProto + """ + schema_name = self.db_config.db_schema or self.db_config.user + with _get_conn(self.db_config) as conn, conn.cursor() as cur: + cur.execute( + """ + SELECT schema_name + FROM information_schema.schemata + WHERE schema_name = %s + """, + (schema_name,), + ) + schema_exists = cur.fetchone() + if not schema_exists: + cur.execute( + sql.SQL("CREATE SCHEMA IF NOT EXISTS {} AUTHORIZATION {}").format( + sql.Identifier(schema_name), + sql.Identifier(self.db_config.user), + ), + ) + + cur.execute( + sql.SQL( + """ + CREATE TABLE IF NOT EXISTS {} ( + version BIGSERIAL PRIMARY KEY, + registry BYTEA NOT NULL + ); + """ + ).format(sql.Identifier(self.table_name)), + ) + # Do we want to keep track of the history or just keep the latest? + cur.execute( + sql.SQL( + """ + INSERT INTO {} (registry) + VALUES (%s); + """ + ).format(sql.Identifier(self.table_name)), + [registry_proto.SerializeToString()], + ) + + def teardown(self): + with _get_conn(self.db_config) as conn, conn.cursor() as cur: + cur.execute( + sql.SQL( + """ + DROP TABLE IF EXISTS {}; + """ + ).format(sql.Identifier(self.table_name)) + ) diff --git a/sdk/python/feast/infra/utils/postgres/__init__.py b/sdk/python/feast/infra/utils/postgres/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/feast/infra/utils/postgres/connection_utils.py b/sdk/python/feast/infra/utils/postgres/connection_utils.py new file mode 100644 index 0000000000..c3a7c913d4 --- /dev/null +++ b/sdk/python/feast/infra/utils/postgres/connection_utils.py @@ -0,0 +1,64 @@ +from typing import Dict + +import numpy as np +import pandas as pd +import psycopg2 +import psycopg2.extras +import pyarrow as pa + +from feast.infra.utils.postgres.postgres_config import PostgreSQLConfig +from feast.type_map import arrow_to_pg_type + + +def _get_conn(config: PostgreSQLConfig): + conn = psycopg2.connect( + dbname=config.database, + host=config.host, + port=int(config.port), + user=config.user, + password=config.password, + options="-c search_path={}".format(config.db_schema or config.user), + ) + return conn + + +def _df_to_create_table_sql(entity_df, table_name) -> str: + pa_table = pa.Table.from_pandas(entity_df) + columns = [ + f""""{f.name}" {arrow_to_pg_type(str(f.type))}""" for f in pa_table.schema + ] + return f""" + CREATE TABLE "{table_name}" ( + {", ".join(columns)} + ); + """ + + +def df_to_postgres_table( + config: PostgreSQLConfig, df: pd.DataFrame, table_name: str +) -> Dict[str, np.dtype]: + """ + Create a table for the data frame, insert all the values, and return the table schema + """ + with _get_conn(config) as conn, conn.cursor() as cur: + cur.execute(_df_to_create_table_sql(df, table_name)) + psycopg2.extras.execute_values( + cur, + f""" + INSERT INTO {table_name} + VALUES %s + """, + df.replace({np.NaN: None}).to_numpy(), + ) + return dict(zip(df.columns, df.dtypes)) + + +def get_query_schema(config: PostgreSQLConfig, sql_query: str) -> Dict[str, np.dtype]: + """ + We'll use the statement when we perform the query rather than copying data to a + new table + """ + with _get_conn(config) as conn: + conn.set_session(readonly=True) + df = pd.read_sql(f"SELECT * FROM {sql_query} LIMIT 0", conn,) + return dict(zip(df.columns, df.dtypes)) diff --git a/sdk/python/feast/infra/utils/postgres/postgres_config.py b/sdk/python/feast/infra/utils/postgres/postgres_config.py new file mode 100644 index 0000000000..c7a581f072 --- /dev/null +++ b/sdk/python/feast/infra/utils/postgres/postgres_config.py @@ -0,0 +1,12 @@ +from pydantic import StrictStr + +from feast.repo_config import FeastConfigBaseModel + + +class PostgreSQLConfig(FeastConfigBaseModel): + host: StrictStr + port: int = 5432 + database: StrictStr + db_schema: StrictStr = "public" + user: StrictStr + password: StrictStr diff --git a/sdk/python/feast/registry.py b/sdk/python/feast/registry.py index 5f5d27318a..be009566d0 100644 --- a/sdk/python/feast/registry.py +++ b/sdk/python/feast/registry.py @@ -58,6 +58,7 @@ "GCSRegistryStore": "feast.infra.gcp.GCSRegistryStore", "S3RegistryStore": "feast.infra.aws.S3RegistryStore", "LocalRegistryStore": "feast.infra.local.LocalRegistryStore", + "PostgreSQLRegistryStore": "feast.infra.registry_stores.contrib.postgres.registry_store.PostgreSQLRegistryStore", } REGISTRY_STORE_CLASS_FOR_SCHEME = { diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index c86a42a8bd..5ea7a8979f 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -35,6 +35,7 @@ "redis": "feast.infra.online_stores.redis.RedisOnlineStore", "dynamodb": "feast.infra.online_stores.dynamodb.DynamoDBOnlineStore", "snowflake.online": "feast.infra.online_stores.snowflake.SnowflakeOnlineStore", + "postgres": "feast.infra.online_stores.contrib.postgres.PostgreSQLOnlineStore", } OFFLINE_STORE_CLASS_FOR_TYPE = { @@ -44,6 +45,7 @@ "snowflake.offline": "feast.infra.offline_stores.snowflake.SnowflakeOfflineStore", "spark": "feast.infra.offline_stores.contrib.spark_offline_store.spark.SparkOfflineStore", "trino": "feast.infra.offline_stores.contrib.trino_offline_store.trino.TrinoOfflineStore", + "postgres": "feast.infra.offline_stores.contrib.postgres_offline_store.postgres.PostgreSQLOfflineStore", } FEATURE_SERVER_CONFIG_CLASS_FOR_TYPE = { diff --git a/sdk/python/feast/templates/postgres/__init__.py b/sdk/python/feast/templates/postgres/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/feast/templates/postgres/bootstrap.py b/sdk/python/feast/templates/postgres/bootstrap.py new file mode 100644 index 0000000000..078d7cdc68 --- /dev/null +++ b/sdk/python/feast/templates/postgres/bootstrap.py @@ -0,0 +1,78 @@ +import click +import psycopg2 + +from feast.infra.utils.postgres.connection_utils import df_to_postgres_table +from feast.infra.utils.postgres.postgres_config import PostgreSQLConfig + + +def bootstrap(): + # Bootstrap() will automatically be called from the init_repo() during `feast init` + + import pathlib + from datetime import datetime, timedelta + + from feast.driver_test_data import create_driver_hourly_stats_df + + repo_path = pathlib.Path(__file__).parent.absolute() + config_file = repo_path / "feature_store.yaml" + + end_date = datetime.now().replace(microsecond=0, second=0, minute=0) + start_date = end_date - timedelta(days=15) + + driver_entities = [1001, 1002, 1003, 1004, 1005] + driver_df = create_driver_hourly_stats_df(driver_entities, start_date, end_date) + + postgres_host = click.prompt("Postgres host", default="localhost") + postgres_port = click.prompt("Postgres port", default="5432") + postgres_database = click.prompt("Postgres DB name", default="postgres") + postgres_schema = click.prompt("Postgres schema", default="public") + postgres_user = click.prompt("Postgres user") + postgres_password = click.prompt("Postgres password", hide_input=True) + + if click.confirm( + 'Should I upload example data to Postgres (overwriting "feast_driver_hourly_stats" table)?', + default=True, + ): + db_connection = psycopg2.connect( + dbname=postgres_database, + host=postgres_host, + port=int(postgres_port), + user=postgres_user, + password=postgres_password, + options=f"-c search_path={postgres_schema}", + ) + + with db_connection as conn, conn.cursor() as cur: + cur.execute('DROP TABLE IF EXISTS "feast_driver_hourly_stats"') + + df_to_postgres_table( + config=PostgreSQLConfig( + host=postgres_host, + port=int(postgres_port), + database=postgres_database, + db_schema=postgres_schema, + user=postgres_user, + password=postgres_password, + ), + df=driver_df, + table_name="feast_driver_hourly_stats", + ) + + replace_str_in_file(config_file, "DB_HOST", postgres_host) + replace_str_in_file(config_file, "DB_PORT", postgres_port) + replace_str_in_file(config_file, "DB_NAME", postgres_database) + replace_str_in_file(config_file, "DB_SCHEMA", postgres_schema) + replace_str_in_file(config_file, "DB_USERNAME", postgres_user) + replace_str_in_file(config_file, "DB_PASSWORD", postgres_password) + + +def replace_str_in_file(file_path, match_str, sub_str): + with open(file_path, "r") as f: + contents = f.read() + contents = contents.replace(match_str, sub_str) + with open(file_path, "wt") as f: + f.write(contents) + + +if __name__ == "__main__": + bootstrap() diff --git a/sdk/python/feast/templates/postgres/driver_repo.py b/sdk/python/feast/templates/postgres/driver_repo.py new file mode 100644 index 0000000000..34bc0022e2 --- /dev/null +++ b/sdk/python/feast/templates/postgres/driver_repo.py @@ -0,0 +1,29 @@ +from datetime import timedelta + +from feast import Entity, FeatureView, Field +from feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source import ( + PostgreSQLSource, +) +from feast.types import Float32, Int64 + +driver = Entity(name="driver_id", join_keys=["driver_id"],) + + +driver_stats_source = PostgreSQLSource( + name="feast_driver_hourly_stats", + query="SELECT * FROM feast_driver_hourly_stats", + timestamp_field="event_timestamp", + created_timestamp_column="created", +) + +driver_stats_fv = FeatureView( + name="driver_hourly_stats", + entities=["driver_id"], + ttl=timedelta(weeks=52), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), + ], + source=driver_stats_source, +) diff --git a/sdk/python/feast/templates/postgres/feature_store.yaml b/sdk/python/feast/templates/postgres/feature_store.yaml new file mode 100644 index 0000000000..53b86b7064 --- /dev/null +++ b/sdk/python/feast/templates/postgres/feature_store.yaml @@ -0,0 +1,27 @@ +project: my_project +provider: local +registry: + registry_store_type: PostgreSQLRegistryStore + path: feast_registry + host: DB_HOST + port: DB_PORT + database: DB_NAME + db_schema: DB_SCHEMA + user: DB_USERNAME + password: DB_PASSWORD +online_store: + type: postgres + host: DB_HOST + port: DB_PORT + database: DB_NAME + db_schema: DB_SCHEMA + user: DB_USERNAME + password: DB_PASSWORD +offline_store: + type: postgres + host: DB_HOST + port: DB_PORT + database: DB_NAME + db_schema: DB_SCHEMA + user: DB_USERNAME + password: DB_PASSWORD diff --git a/sdk/python/feast/templates/postgres/test.py b/sdk/python/feast/templates/postgres/test.py new file mode 100644 index 0000000000..81ac299698 --- /dev/null +++ b/sdk/python/feast/templates/postgres/test.py @@ -0,0 +1,63 @@ +from datetime import datetime, timedelta + +import pandas as pd +from driver_repo import driver, driver_stats_fv + +from feast import FeatureStore + + +def main(): + pd.set_option("display.max_columns", None) + pd.set_option("display.width", 1000) + + # Load the feature store from the current path + fs = FeatureStore(repo_path=".") + + print("Deploying feature store to Postgres...") + fs.apply([driver, driver_stats_fv]) + + # Select features + features = ["driver_hourly_stats:conv_rate", "driver_hourly_stats:acc_rate"] + + # Create an entity dataframe. This is the dataframe that will be enriched with historical features + entity_df = pd.DataFrame( + { + "event_timestamp": [ + pd.Timestamp(dt, unit="ms", tz="UTC").round("ms") + for dt in pd.date_range( + start=datetime.now() - timedelta(days=3), + end=datetime.now(), + periods=3, + ) + ], + "driver_id": [1001, 1002, 1003], + } + ) + + print("Retrieving training data...") + + training_df = fs.get_historical_features( + features=features, entity_df=entity_df + ).to_df() + + print() + print(training_df) + + print() + print("Loading features into the online store...") + fs.materialize_incremental(end_date=datetime.now()) + + print() + print("Retrieving online features...") + + # Retrieve features from the online store + online_features = fs.get_online_features( + features=features, entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], + ).to_dict() + + print() + print(pd.DataFrame.from_dict(online_features)) + + +if __name__ == "__main__": + main() diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index a94d8aa59b..d22c51d596 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -639,3 +639,143 @@ def spark_schema_to_np_dtypes(dtypes: List[Tuple[str, str]]) -> Iterator[np.dtyp ) return (type_map[t] for _, t in dtypes) + + +def arrow_to_pg_type(t_str: str) -> str: + try: + if t_str.startswith("timestamp") or t_str.startswith("datetime"): + return "timestamptz" if "tz=" in t_str else "timestamp" + return { + "null": "null", + "bool": "boolean", + "int8": "tinyint", + "int16": "smallint", + "int32": "int", + "int64": "bigint", + "list": "int[]", + "list": "bigint[]", + "list": "boolean[]", + "list": "double precision[]", + "list": "timestamp[]", + "uint8": "smallint", + "uint16": "int", + "uint32": "bigint", + "uint64": "bigint", + "float": "float", + "double": "double precision", + "binary": "binary", + "string": "text", + }[t_str] + except KeyError: + raise ValueError(f"Unsupported type: {t_str}") + + +def pg_type_to_feast_value_type(type_str: str) -> ValueType: + type_map: Dict[str, ValueType] = { + "boolean": ValueType.BOOL, + "bytea": ValueType.BYTES, + "char": ValueType.STRING, + "bigint": ValueType.INT64, + "smallint": ValueType.INT32, + "integer": ValueType.INT32, + "real": ValueType.DOUBLE, + "double precision": ValueType.DOUBLE, + "boolean[]": ValueType.BOOL_LIST, + "bytea[]": ValueType.BYTES_LIST, + "char[]": ValueType.STRING_LIST, + "smallint[]": ValueType.INT32_LIST, + "integer[]": ValueType.INT32_LIST, + "text": ValueType.STRING, + "text[]": ValueType.STRING_LIST, + "character[]": ValueType.STRING_LIST, + "bigint[]": ValueType.INT64_LIST, + "real[]": ValueType.DOUBLE_LIST, + "double precision[]": ValueType.DOUBLE_LIST, + "character": ValueType.STRING, + "character varying": ValueType.STRING, + "date": ValueType.UNIX_TIMESTAMP, + "time without time zone": ValueType.UNIX_TIMESTAMP, + "timestamp without time zone": ValueType.UNIX_TIMESTAMP, + "timestamp without time zone[]": ValueType.UNIX_TIMESTAMP_LIST, + "date[]": ValueType.UNIX_TIMESTAMP_LIST, + "time without time zone[]": ValueType.UNIX_TIMESTAMP_LIST, + "timestamp with time zone": ValueType.UNIX_TIMESTAMP, + "timestamp with time zone[]": ValueType.UNIX_TIMESTAMP_LIST, + "numeric[]": ValueType.DOUBLE_LIST, + "numeric": ValueType.DOUBLE, + "uuid": ValueType.STRING, + "uuid[]": ValueType.STRING_LIST, + } + value = ( + type_map[type_str.lower()] + if type_str.lower() in type_map + else ValueType.UNKNOWN + ) + if value == ValueType.UNKNOWN: + print("unknown type:", type_str) + return value + + +def feast_value_type_to_pa(feast_type: ValueType) -> pyarrow.DataType: + type_map = { + ValueType.INT32: pyarrow.int32(), + ValueType.INT64: pyarrow.int64(), + ValueType.DOUBLE: pyarrow.float64(), + ValueType.FLOAT: pyarrow.float32(), + ValueType.STRING: pyarrow.string(), + ValueType.BYTES: pyarrow.binary(), + ValueType.BOOL: pyarrow.bool_(), + ValueType.UNIX_TIMESTAMP: pyarrow.timestamp("us"), + ValueType.INT32_LIST: pyarrow.list_(pyarrow.int32()), + ValueType.INT64_LIST: pyarrow.list_(pyarrow.int64()), + ValueType.DOUBLE_LIST: pyarrow.list_(pyarrow.float64()), + ValueType.FLOAT_LIST: pyarrow.list_(pyarrow.float32()), + ValueType.STRING_LIST: pyarrow.list_(pyarrow.string()), + ValueType.BYTES_LIST: pyarrow.list_(pyarrow.binary()), + ValueType.BOOL_LIST: pyarrow.list_(pyarrow.bool_()), + ValueType.UNIX_TIMESTAMP_LIST: pyarrow.list_(pyarrow.timestamp("us")), + ValueType.NULL: pyarrow.null(), + } + return type_map[feast_type] + + +def pg_type_code_to_pg_type(code: int) -> str: + return { + 16: "boolean", + 17: "bytea", + 20: "bigint", + 21: "smallint", + 23: "integer", + 25: "text", + 700: "real", + 701: "double precision", + 1000: "boolean[]", + 1001: "bytea[]", + 1005: "smallint[]", + 1007: "integer[]", + 1009: "text[]", + 1014: "character[]", + 1016: "bigint[]", + 1021: "real[]", + 1022: "double precision[]", + 1042: "character", + 1043: "character varying", + 1082: "date", + 1083: "time without time zone", + 1114: "timestamp without time zone", + 1115: "timestamp without time zone[]", + 1182: "date[]", + 1183: "time without time zone[]", + 1184: "timestamp with time zone", + 1185: "timestamp with time zone[]", + 1231: "numeric[]", + 1700: "numeric", + 2950: "uuid", + 2951: "uuid[]", + }[code] + + +def pg_type_code_to_arrow(code: int) -> str: + return feast_value_type_to_pa( + pg_type_to_feast_value_type(pg_type_code_to_pg_type(code)) + ) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 346aa6da47..597ef0397f 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -63,22 +63,22 @@ azure-identity==1.9.0 # via adlfs azure-storage-blob==12.11.0 # via adlfs -babel==2.9.1 +babel==2.10.0 # via sphinx backcall==0.2.0 # via ipython black==19.10b0 # via feast (setup.py) -boto3==1.21.41 +boto3==1.21.43 # via # feast (setup.py) # moto -botocore==1.24.41 +botocore==1.24.43 # via # boto3 # moto # s3transfer -cachecontrol==0.12.10 +cachecontrol==0.12.11 # via firebase-admin cachetools==4.2.4 # via google-auth @@ -100,7 +100,7 @@ charset-normalizer==2.0.12 # aiohttp # requests # snowflake-connector-python -click==8.1.2 +click==8.0.1 # via # black # feast (setup.py) @@ -156,7 +156,7 @@ execnet==1.9.0 # via pytest-xdist executing==0.8.3 # via stack-data -fastapi==0.75.1 +fastapi==0.75.2 # via feast (setup.py) fastavro==1.4.10 # via @@ -191,7 +191,7 @@ google-api-core[grpc]==1.31.5 # google-cloud-core # google-cloud-datastore # google-cloud-firestore -google-api-python-client==2.44.0 +google-api-python-client==2.45.0 # via firebase-admin google-auth==1.35.0 # via @@ -306,7 +306,7 @@ jsonschema==4.4.0 # feast (setup.py) # great-expectations # nbformat -jupyter-core==4.9.2 +jupyter-core==4.10.0 # via nbformat locket==0.2.1 # via partd @@ -326,7 +326,7 @@ mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) -moto==3.1.4 +moto==3.1.5 # via feast (setup.py) msal==1.17.0 # via @@ -403,7 +403,7 @@ pickleshare==0.7.5 # via ipython pip-tools==6.6.0 # via feast (setup.py) -platformdirs==2.5.1 +platformdirs==2.5.2 # via virtualenv pluggy==1.0.0 # via pytest @@ -434,6 +434,8 @@ protobuf==3.19.4 # tensorflow-metadata psutil==5.9.0 # via feast (setup.py) +psycopg2-binary==2.9.3 + # via feast (setup.py) ptyprocess==0.7.0 # via pexpect pure-eval==0.2.2 @@ -666,29 +668,29 @@ traitlets==5.1.1 # nbformat trino==0.312.0 # via feast (setup.py) -typed-ast==1.5.2 +typed-ast==1.5.3 # via black -types-protobuf==3.19.15 +types-protobuf==3.19.17 # via # feast (setup.py) # mypy-protobuf -types-python-dateutil==2.8.10 +types-python-dateutil==2.8.11 # via feast (setup.py) types-pytz==2021.3.6 # via feast (setup.py) -types-pyyaml==6.0.5 +types-pyyaml==6.0.6 # via feast (setup.py) -types-redis==4.1.19 +types-redis==4.1.21 # via feast (setup.py) -types-requests==2.27.16 +types-requests==2.27.19 # via feast (setup.py) -types-setuptools==57.4.12 +types-setuptools==57.4.14 # via feast (setup.py) -types-tabulate==0.8.6 +types-tabulate==0.8.7 # via feast (setup.py) -types-urllib3==1.26.11 +types-urllib3==1.26.13 # via types-requests -typing-extensions==4.1.1 +typing-extensions==4.2.0 # via # azure-core # great-expectations @@ -720,7 +722,7 @@ wcwidth==0.2.5 # via prompt-toolkit websocket-client==1.3.2 # via docker -websockets==10.2 +websockets==10.3 # via uvicorn werkzeug==2.1.1 # via moto diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 455d80b790..44589f680a 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -20,7 +20,7 @@ certifi==2021.10.8 # via requests charset-normalizer==2.0.12 # via requests -click==8.1.2 +click==8.0.1 # via # feast (setup.py) # uvicorn @@ -32,7 +32,7 @@ dask==2022.1.1 # via feast (setup.py) dill==0.3.4 # via feast (setup.py) -fastapi==0.75.1 +fastapi==0.75.2 # via feast (setup.py) fastavro==1.4.10 # via @@ -154,7 +154,7 @@ toolz==0.11.2 # partd tqdm==4.64.0 # via feast (setup.py) -typing-extensions==4.1.1 +typing-extensions==4.2.0 # via pydantic urllib3==1.26.9 # via requests @@ -164,5 +164,5 @@ uvloop==0.16.0 # via uvicorn watchgod==0.8.2 # via uvicorn -websockets==10.2 +websockets==10.3 # via uvicorn diff --git a/sdk/python/requirements/py3.7-ci-requirements.txt b/sdk/python/requirements/py3.7-ci-requirements.txt index 224840a6f7..2fe543c901 100644 --- a/sdk/python/requirements/py3.7-ci-requirements.txt +++ b/sdk/python/requirements/py3.7-ci-requirements.txt @@ -23,20 +23,16 @@ alabaster==0.7.12 altair==4.2.0 # via great-expectations anyio==3.5.0 - # via starlette + # via + # starlette + # watchgod appdirs==1.4.4 # via black -appnope==0.1.2 - # via - # ipykernel - # ipython -argon2-cffi==21.3.0 - # via notebook -argon2-cffi-bindings==21.2.0 - # via argon2-cffi +appnope==0.1.3 + # via ipython asgiref==3.5.0 # via uvicorn -asn1crypto==1.4.0 +asn1crypto==1.5.1 # via # oscrypto # snowflake-connector-python @@ -56,18 +52,18 @@ attrs==21.4.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.23.0 +azure-core==1.23.1 # via # adlfs # azure-identity # azure-storage-blob azure-datalake-store==0.0.52 # via adlfs -azure-identity==1.8.0 +azure-identity==1.9.0 # via adlfs -azure-storage-blob==12.9.0 +azure-storage-blob==12.11.0 # via adlfs -babel==2.9.1 +babel==2.10.0 # via sphinx backcall==0.2.0 # via ipython @@ -77,18 +73,16 @@ backports-zoneinfo==0.2.1 # tzlocal black==19.10b0 # via feast (setup.py) -bleach==4.1.0 - # via nbconvert -boto3==1.21.11 +boto3==1.21.43 # via # feast (setup.py) # moto -botocore==1.24.11 +botocore==1.24.43 # via # boto3 # moto # s3transfer -cachecontrol==0.12.10 +cachecontrol==0.12.11 # via firebase-admin cachetools==4.2.4 # via google-auth @@ -100,7 +94,6 @@ certifi==2021.10.8 # snowflake-connector-python cffi==1.15.0 # via - # argon2-cffi-bindings # azure-datalake-store # cryptography # snowflake-connector-python @@ -111,7 +104,7 @@ charset-normalizer==2.0.12 # aiohttp # requests # snowflake-connector-python -click==8.0.4 +click==8.0.1 # via # black # feast (setup.py) @@ -121,7 +114,9 @@ click==8.0.4 cloudpickle==2.0.0 # via dask colorama==0.4.4 - # via feast (setup.py) + # via + # feast (setup.py) + # great-expectations coverage[toml]==6.3.2 # via pytest-cov cryptography==3.4.8 @@ -130,21 +125,19 @@ cryptography==3.4.8 # azure-identity # azure-storage-blob # feast (setup.py) + # great-expectations # moto # msal - # pyjwt # pyopenssl # snowflake-connector-python dask==2022.1.1 # via feast (setup.py) -debugpy==1.5.1 - # via ipykernel +dataclasses==0.6 + # via great-expectations decorator==5.1.1 # via # gcsfs # ipython -defusedxml==0.7.1 - # via nbconvert deprecated==1.2.13 # via redis deprecation==2.1.0 @@ -162,18 +155,17 @@ docutils==0.17.1 # sphinx # sphinx-rtd-theme entrypoints==0.4 - # via - # altair - # jupyter-client - # nbconvert + # via altair execnet==1.9.0 # via pytest-xdist -fastapi==0.74.1 +fastapi==0.75.2 # via feast (setup.py) -fastavro==1.4.9 +fastavro==1.4.10 # via # feast (setup.py) # pandavro +fastjsonschema==2.15.3 + # via nbformat filelock==3.6.0 # via virtualenv firebase-admin==4.5.2 @@ -184,12 +176,12 @@ frozenlist==1.3.0 # via # aiohttp # aiosignal -fsspec==2022.2.0 +fsspec==2022.3.0 # via # adlfs # dask # gcsfs -gcsfs==2022.2.0 +gcsfs==2022.3.0 # via feast (setup.py) google-api-core[grpc]==1.31.5 # via @@ -201,7 +193,7 @@ google-api-core[grpc]==1.31.5 # google-cloud-core # google-cloud-datastore # google-cloud-firestore -google-api-python-client==2.39.0 +google-api-python-client==2.45.0 # via firebase-admin google-auth==1.35.0 # via @@ -214,11 +206,11 @@ google-auth==1.35.0 # google-cloud-storage google-auth-httplib2==0.1.0 # via google-api-python-client -google-auth-oauthlib==0.5.0 +google-auth-oauthlib==0.5.1 # via gcsfs -google-cloud-bigquery==2.34.1 +google-cloud-bigquery==2.34.3 # via feast (setup.py) -google-cloud-bigquery-storage==2.12.0 +google-cloud-bigquery-storage==2.13.1 # via feast (setup.py) google-cloud-core==1.7.2 # via @@ -227,9 +219,9 @@ google-cloud-core==1.7.2 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-cloud-datastore==2.5.0 +google-cloud-datastore==2.5.1 # via feast (setup.py) -google-cloud-firestore==2.3.4 +google-cloud-firestore==2.4.0 # via firebase-admin google-cloud-storage==1.40.0 # via @@ -247,7 +239,7 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -great-expectations==0.14.8 +great-expectations==0.14.13 # via feast (setup.py) grpcio==1.44.0 # via @@ -271,9 +263,9 @@ httplib2==0.20.4 # via # google-api-python-client # google-auth-httplib2 -httptools==0.3.0 +httptools==0.4.0 # via uvicorn -identify==2.4.11 +identify==2.4.12 # via pre-commit idna==3.3 # via @@ -296,24 +288,11 @@ importlib-metadata==4.2.0 # pytest # redis # virtualenv -importlib-resources==5.4.0 +importlib-resources==5.7.1 # via jsonschema iniconfig==1.1.1 # via pytest -ipykernel==6.9.1 - # via - # ipywidgets - # notebook ipython==7.32.0 - # via - # ipykernel - # ipywidgets -ipython-genutils==0.2.0 - # via - # ipywidgets - # nbformat - # notebook -ipywidgets==7.6.5 # via great-expectations isodate==0.6.1 # via msrest @@ -327,16 +306,14 @@ jinja2==3.0.3 # feast (setup.py) # great-expectations # moto - # nbconvert - # notebook # sphinx -jmespath==0.10.0 +jmespath==1.0.0 # via # boto3 # botocore jsonpatch==1.32 # via great-expectations -jsonpointer==2.2 +jsonpointer==2.3 # via jsonpatch jsonschema==4.4.0 # via @@ -344,44 +321,27 @@ jsonschema==4.4.0 # feast (setup.py) # great-expectations # nbformat -jupyter-client==7.1.2 - # via - # ipykernel - # nbclient - # notebook -jupyter-core==4.9.2 - # via - # jupyter-client - # nbconvert - # nbformat - # notebook -jupyterlab-pygments==0.1.2 - # via nbconvert -jupyterlab-widgets==1.0.2 - # via ipywidgets +jupyter-core==4.10.0 + # via nbformat locket==0.2.1 # via partd -markupsafe==2.1.0 +markupsafe==2.1.1 # via # jinja2 # moto matplotlib-inline==0.1.3 - # via - # ipykernel - # ipython + # via ipython mccabe==0.6.1 # via flake8 minio==7.1.0 # via feast (setup.py) -mistune==0.8.4 - # via - # great-expectations - # nbconvert +mistune==2.0.2 + # via great-expectations mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) -moto==3.0.5 +moto==3.1.5 # via feast (setup.py) msal==1.17.0 # via @@ -407,27 +367,11 @@ mypy-extensions==0.4.3 # via mypy mypy-protobuf==3.1 # via feast (setup.py) -nbclient==0.5.11 - # via nbconvert -nbconvert==6.4.2 - # via notebook -nbformat==5.1.3 - # via - # ipywidgets - # nbclient - # nbconvert - # notebook -nest-asyncio==1.5.4 - # via - # ipykernel - # jupyter-client - # nbclient - # notebook +nbformat==5.3.0 + # via great-expectations nodeenv==1.6.0 # via pre-commit -notebook==6.4.10 - # via widgetsnbextension -numpy==1.21.5 +numpy==1.21.6 # via # altair # great-expectations @@ -437,16 +381,15 @@ numpy==1.21.5 # scipy oauthlib==3.2.0 # via requests-oauthlib -oscrypto==1.2.1 +oscrypto==1.3.0 # via snowflake-connector-python packaging==21.3 # via - # bleach # dask # deprecation # google-api-core # google-cloud-bigquery - # google-cloud-firestore + # great-expectations # pytest # redis # sphinx @@ -459,8 +402,6 @@ pandas==1.3.5 # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) -pandocfilters==1.5.0 - # via nbconvert parso==0.8.3 # via jedi partd==1.2.0 @@ -475,19 +416,17 @@ pexpect==4.8.0 # via ipython pickleshare==0.7.5 # via ipython -pip-tools==6.5.1 +pip-tools==6.6.0 # via feast (setup.py) -platformdirs==2.5.1 +platformdirs==2.5.2 # via virtualenv pluggy==1.0.0 # via pytest portalocker==2.4.0 # via msal-extensions -pre-commit==2.17.0 +pre-commit==2.18.1 # via feast (setup.py) -prometheus-client==0.13.1 - # via notebook -prompt-toolkit==3.0.28 +prompt-toolkit==3.0.29 # via ipython proto-plus==1.19.6 # via @@ -510,10 +449,10 @@ protobuf==3.19.4 # tensorflow-metadata psutil==5.9.0 # via feast (setup.py) +psycopg2-binary==2.9.3 + # via feast (setup.py) ptyprocess==0.7.0 - # via - # pexpect - # terminado + # via pexpect py==1.11.0 # via # pytest @@ -532,7 +471,7 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pybindgen==0.22.0 +pybindgen==0.22.1 # via feast (setup.py) pycodestyle==2.8.0 # via flake8 @@ -549,8 +488,6 @@ pyflakes==2.4.0 pygments==2.11.2 # via # ipython - # jupyterlab-pygments - # nbconvert # sphinx pyjwt[crypto]==2.3.0 # via @@ -568,7 +505,7 @@ pyrsistent==0.18.1 # via jsonschema pyspark==3.2.1 # via feast (setup.py) -pytest==7.0.1 +pytest==7.1.1 # via # feast (setup.py) # pytest-benchmark @@ -601,12 +538,11 @@ python-dateutil==2.8.2 # botocore # google-cloud-bigquery # great-expectations - # jupyter-client # moto # pandas -python-dotenv==0.19.2 +python-dotenv==0.20.0 # via uvicorn -pytz==2021.3 +pytz==2022.1 # via # babel # google-api-core @@ -623,13 +559,9 @@ pyyaml==6.0 # feast (setup.py) # pre-commit # uvicorn -pyzmq==22.3.0 - # via - # jupyter-client - # notebook redis==4.2.2 # via feast (setup.py) -regex==2022.3.2 +regex==2022.3.15 # via black requests==2.27.1 # via @@ -656,7 +588,7 @@ requests-oauthlib==1.3.1 # via # google-auth-oauthlib # msrest -responses==0.18.0 +responses==0.20.0 # via moto rsa==4.8 # via google-auth @@ -668,21 +600,17 @@ s3transfer==0.5.2 # via boto3 scipy==1.7.3 # via great-expectations -send2trash==1.8.0 - # via notebook six==1.16.0 # via # absl-py # azure-core # azure-identity - # bleach # google-api-core # google-auth # google-auth-httplib2 # google-cloud-core # google-resumable-media # grpcio - # isodate # mock # msrestazure # pandavro @@ -693,7 +621,7 @@ sniffio==1.2.0 # via anyio snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==2.7.4 +snowflake-connector-python[pandas]==2.7.6 # via feast (setup.py) sphinx==4.3.2 # via @@ -723,12 +651,8 @@ tensorflow-metadata==1.7.0 # via feast (setup.py) termcolor==1.1.0 # via great-expectations -terminado==0.13.2 - # via notebook testcontainers==3.5.3 # via feast (setup.py) -testpath==0.6.0 - # via nbconvert toml==0.10.2 # via # black @@ -745,59 +669,46 @@ toolz==0.11.2 # altair # dask # partd -tornado==6.1 - # via - # ipykernel - # jupyter-client - # notebook - # terminado -tqdm==4.63.0 +tqdm==4.64.0 # via # feast (setup.py) # great-expectations traitlets==5.1.1 # via - # ipykernel # ipython - # ipywidgets - # jupyter-client # jupyter-core # matplotlib-inline - # nbclient - # nbconvert # nbformat - # notebook trino==0.312.0 # via feast (setup.py) -typed-ast==1.5.2 +typed-ast==1.5.3 # via # black # mypy -types-protobuf==3.19.12 +types-protobuf==3.19.17 # via # feast (setup.py) # mypy-protobuf -types-python-dateutil==2.8.9 +types-python-dateutil==2.8.11 # via feast (setup.py) -types-pytz==2021.3.5 +types-pytz==2021.3.6 # via feast (setup.py) -types-pyyaml==6.0.4 +types-pyyaml==6.0.6 # via feast (setup.py) -types-redis==4.1.17 +types-redis==4.1.21 # via feast (setup.py) -types-requests==2.27.11 +types-requests==2.27.19 # via feast (setup.py) -types-setuptools==57.4.9 +types-setuptools==57.4.14 # via feast (setup.py) -types-tabulate==0.8.5 +types-tabulate==0.8.7 # via feast (setup.py) -types-urllib3==1.26.10 +types-urllib3==1.26.13 # via types-requests -typing-extensions==4.1.1 +typing-extensions==4.2.0 # via # aiohttp # anyio - # argon2-cffi # asgiref # async-timeout # azure-core @@ -811,42 +722,39 @@ typing-extensions==4.1.1 # starlette # uvicorn # yarl -tzdata==2021.5 +tzdata==2022.1 # via pytz-deprecation-shim -tzlocal==4.1 +tzlocal==4.2 # via great-expectations uritemplate==4.1.1 # via google-api-python-client -urllib3==1.26.8 +urllib3==1.26.9 # via # botocore # feast (setup.py) + # great-expectations # minio # requests # responses -uvicorn[standard]==0.17.5 +uvicorn[standard]==0.17.6 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -virtualenv==20.13.2 +virtualenv==20.14.1 # via pre-commit -watchgod==0.7 +watchgod==0.8.2 # via uvicorn wcwidth==0.2.5 # via prompt-toolkit -webencodings==0.5.1 - # via bleach -websocket-client==1.3.1 +websocket-client==1.3.2 # via docker -websockets==10.2 +websockets==10.3 # via uvicorn -werkzeug==2.0.3 +werkzeug==2.1.1 # via moto wheel==0.37.1 # via pip-tools -widgetsnbextension==3.5.2 - # via ipywidgets -wrapt==1.13.3 +wrapt==1.14.0 # via # deprecated # testcontainers @@ -854,7 +762,7 @@ xmltodict==0.12.0 # via moto yarl==1.7.2 # via aiohttp -zipp==3.7.0 +zipp==3.8.0 # via # importlib-metadata # importlib-resources diff --git a/sdk/python/requirements/py3.7-requirements.txt b/sdk/python/requirements/py3.7-requirements.txt index f3c90a6e3b..693486bcbf 100644 --- a/sdk/python/requirements/py3.7-requirements.txt +++ b/sdk/python/requirements/py3.7-requirements.txt @@ -7,7 +7,9 @@ absl-py==1.0.0 # via tensorflow-metadata anyio==3.5.0 - # via starlette + # via + # starlette + # watchgod asgiref==3.5.0 # via uvicorn attrs==21.4.0 @@ -18,7 +20,7 @@ certifi==2021.10.8 # via requests charset-normalizer==2.0.12 # via requests -click==8.0.4 +click==8.0.1 # via # feast (setup.py) # uvicorn @@ -30,17 +32,17 @@ dask==2022.1.1 # via feast (setup.py) dill==0.3.4 # via feast (setup.py) -fastapi==0.74.1 +fastapi==0.75.2 # via feast (setup.py) -fastavro==1.4.9 +fastavro==1.4.10 # via # feast (setup.py) # pandavro -fsspec==2022.2.0 +fsspec==2022.3.0 # via dask -google-api-core==2.5.0 +google-api-core==2.7.2 # via feast (setup.py) -google-auth==2.6.0 +google-auth==2.6.5 # via google-api-core googleapis-common-protos==1.52.0 # via @@ -55,29 +57,29 @@ grpcio-reflection==1.44.0 # via feast (setup.py) h11==0.13.0 # via uvicorn -httptools==0.3.0 +httptools==0.4.0 # via uvicorn idna==3.3 # via # anyio # requests -importlib-metadata==4.11.1 +importlib-metadata==4.11.3 # via # click # jsonschema -importlib-resources==5.4.0 +importlib-resources==5.7.1 # via jsonschema -jinja2==3.0.3 +jinja2==3.1.1 # via feast (setup.py) jsonschema==4.4.0 # via feast (setup.py) locket==0.2.1 # via partd -markupsafe==2.1.0 +markupsafe==2.1.1 # via jinja2 mmh3==3.0.0 # via feast (setup.py) -numpy==1.21.5 +numpy==1.21.6 # via # pandas # pandavro @@ -114,15 +116,15 @@ pydantic==1.9.0 # via # fastapi # feast (setup.py) -pyparsing==3.0.7 +pyparsing==3.0.8 # via packaging pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via pandas -python-dotenv==0.19.2 +python-dotenv==0.20.0 # via uvicorn -pytz==2021.3 +pytz==2022.1 # via pandas pyyaml==6.0 # via @@ -148,7 +150,7 @@ tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.6.0 +tensorflow-metadata==1.7.0 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -156,9 +158,9 @@ toolz==0.11.2 # via # dask # partd -tqdm==4.62.3 +tqdm==4.64.0 # via feast (setup.py) -typing-extensions==4.1.1 +typing-extensions==4.2.0 # via # anyio # asgiref @@ -168,17 +170,17 @@ typing-extensions==4.1.1 # pydantic # starlette # uvicorn -urllib3==1.26.8 +urllib3==1.26.9 # via requests -uvicorn[standard]==0.17.5 +uvicorn[standard]==0.17.6 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -watchgod==0.7 +watchgod==0.8.2 # via uvicorn -websockets==10.2 +websockets==10.3 # via uvicorn -zipp==3.7.0 +zipp==3.8.0 # via # importlib-metadata # importlib-resources diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 34032ef7d2..07bf635a51 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -63,7 +63,7 @@ azure-identity==1.9.0 # via adlfs azure-storage-blob==12.11.0 # via adlfs -babel==2.9.1 +babel==2.10.0 # via sphinx backcall==0.2.0 # via ipython @@ -73,16 +73,16 @@ backports-zoneinfo==0.2.1 # tzlocal black==19.10b0 # via feast (setup.py) -boto3==1.21.41 +boto3==1.21.43 # via # feast (setup.py) # moto -botocore==1.24.41 +botocore==1.24.43 # via # boto3 # moto # s3transfer -cachecontrol==0.12.10 +cachecontrol==0.12.11 # via firebase-admin cachetools==4.2.4 # via google-auth @@ -104,7 +104,7 @@ charset-normalizer==2.0.12 # aiohttp # requests # snowflake-connector-python -click==8.1.2 +click==8.0.1 # via # black # feast (setup.py) @@ -160,7 +160,7 @@ execnet==1.9.0 # via pytest-xdist executing==0.8.3 # via stack-data -fastapi==0.75.1 +fastapi==0.75.2 # via feast (setup.py) fastavro==1.4.10 # via @@ -195,7 +195,7 @@ google-api-core[grpc]==1.31.5 # google-cloud-core # google-cloud-datastore # google-cloud-firestore -google-api-python-client==2.44.0 +google-api-python-client==2.45.0 # via firebase-admin google-auth==1.35.0 # via @@ -279,7 +279,7 @@ imagesize==1.3.0 # via sphinx importlib-metadata==4.11.3 # via great-expectations -importlib-resources==5.7.0 +importlib-resources==5.7.1 # via jsonschema iniconfig==1.1.1 # via pytest @@ -312,7 +312,7 @@ jsonschema==4.4.0 # feast (setup.py) # great-expectations # nbformat -jupyter-core==4.9.2 +jupyter-core==4.10.0 # via nbformat locket==0.2.1 # via partd @@ -332,7 +332,7 @@ mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) -moto==3.1.4 +moto==3.1.5 # via feast (setup.py) msal==1.17.0 # via @@ -409,7 +409,7 @@ pickleshare==0.7.5 # via ipython pip-tools==6.6.0 # via feast (setup.py) -platformdirs==2.5.1 +platformdirs==2.5.2 # via virtualenv pluggy==1.0.0 # via pytest @@ -440,6 +440,8 @@ protobuf==3.19.4 # tensorflow-metadata psutil==5.9.0 # via feast (setup.py) +psycopg2-binary==2.9.3 + # via feast (setup.py) ptyprocess==0.7.0 # via pexpect pure-eval==0.2.2 @@ -674,29 +676,29 @@ traitlets==5.1.1 # nbformat trino==0.312.0 # via feast (setup.py) -typed-ast==1.5.2 +typed-ast==1.5.3 # via black -types-protobuf==3.19.15 +types-protobuf==3.19.17 # via # feast (setup.py) # mypy-protobuf -types-python-dateutil==2.8.10 +types-python-dateutil==2.8.11 # via feast (setup.py) types-pytz==2021.3.6 # via feast (setup.py) -types-pyyaml==6.0.5 +types-pyyaml==6.0.6 # via feast (setup.py) -types-redis==4.1.19 +types-redis==4.1.21 # via feast (setup.py) -types-requests==2.27.16 +types-requests==2.27.19 # via feast (setup.py) -types-setuptools==57.4.12 +types-setuptools==57.4.14 # via feast (setup.py) -types-tabulate==0.8.6 +types-tabulate==0.8.7 # via feast (setup.py) -types-urllib3==1.26.11 +types-urllib3==1.26.13 # via types-requests -typing-extensions==4.1.1 +typing-extensions==4.2.0 # via # azure-core # great-expectations @@ -728,7 +730,7 @@ wcwidth==0.2.5 # via prompt-toolkit websocket-client==1.3.2 # via docker -websockets==10.2 +websockets==10.3 # via uvicorn werkzeug==2.1.1 # via moto diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index 9000c7b1f7..92445b305b 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -20,7 +20,7 @@ certifi==2021.10.8 # via requests charset-normalizer==2.0.12 # via requests -click==8.1.2 +click==8.0.1 # via # feast (setup.py) # uvicorn @@ -32,7 +32,7 @@ dask==2022.1.1 # via feast (setup.py) dill==0.3.4 # via feast (setup.py) -fastapi==0.75.1 +fastapi==0.75.2 # via feast (setup.py) fastavro==1.4.10 # via @@ -63,7 +63,7 @@ idna==3.3 # via # anyio # requests -importlib-resources==5.7.0 +importlib-resources==5.7.1 # via jsonschema jinja2==3.1.1 # via feast (setup.py) @@ -156,7 +156,7 @@ toolz==0.11.2 # partd tqdm==4.64.0 # via feast (setup.py) -typing-extensions==4.1.1 +typing-extensions==4.2.0 # via pydantic urllib3==1.26.9 # via requests @@ -166,7 +166,7 @@ uvloop==0.16.0 # via uvicorn watchgod==0.8.2 # via uvicorn -websockets==10.2 +websockets==10.3 # via uvicorn zipp==3.8.0 # via importlib-resources diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 1ab910a16d..42d7b35335 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -63,22 +63,22 @@ azure-identity==1.9.0 # via adlfs azure-storage-blob==12.11.0 # via adlfs -babel==2.9.1 +babel==2.10.0 # via sphinx backcall==0.2.0 # via ipython black==19.10b0 # via feast (setup.py) -boto3==1.21.41 +boto3==1.21.43 # via # feast (setup.py) # moto -botocore==1.24.41 +botocore==1.24.43 # via # boto3 # moto # s3transfer -cachecontrol==0.12.10 +cachecontrol==0.12.11 # via firebase-admin cachetools==4.2.4 # via google-auth @@ -100,7 +100,7 @@ charset-normalizer==2.0.12 # aiohttp # requests # snowflake-connector-python -click==8.1.2 +click==8.0.1 # via # black # feast (setup.py) @@ -156,7 +156,7 @@ execnet==1.9.0 # via pytest-xdist executing==0.8.3 # via stack-data -fastapi==0.75.1 +fastapi==0.75.2 # via feast (setup.py) fastavro==1.4.10 # via @@ -191,7 +191,7 @@ google-api-core[grpc]==1.31.5 # google-cloud-core # google-cloud-datastore # google-cloud-firestore -google-api-python-client==2.44.0 +google-api-python-client==2.45.0 # via firebase-admin google-auth==1.35.0 # via @@ -306,7 +306,7 @@ jsonschema==4.4.0 # feast (setup.py) # great-expectations # nbformat -jupyter-core==4.9.2 +jupyter-core==4.10.0 # via nbformat locket==0.2.1 # via partd @@ -326,7 +326,7 @@ mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) -moto==3.1.4 +moto==3.1.5 # via feast (setup.py) msal==1.17.0 # via @@ -403,7 +403,7 @@ pickleshare==0.7.5 # via ipython pip-tools==6.6.0 # via feast (setup.py) -platformdirs==2.5.1 +platformdirs==2.5.2 # via virtualenv pluggy==1.0.0 # via pytest @@ -434,6 +434,8 @@ protobuf==3.19.4 # tensorflow-metadata psutil==5.9.0 # via feast (setup.py) +psycopg2-binary==2.9.3 + # via feast (setup.py) ptyprocess==0.7.0 # via pexpect pure-eval==0.2.2 @@ -577,10 +579,10 @@ responses==0.20.0 # via moto rsa==4.8 # via google-auth -ruamel-yaml==0.17.17 +ruamel.yaml==0.17.17 # via great-expectations -ruamel-yaml-clib==0.2.6 - # via ruamel-yaml +ruamel.yaml.clib==0.2.6 + # via ruamel.yaml s3transfer==0.5.2 # via boto3 scipy==1.8.0 @@ -668,29 +670,29 @@ traitlets==5.1.1 # nbformat trino==0.312.0 # via feast (setup.py) -typed-ast==1.5.2 +typed-ast==1.5.3 # via black -types-protobuf==3.19.15 +types-protobuf==3.19.17 # via # feast (setup.py) # mypy-protobuf -types-python-dateutil==2.8.10 +types-python-dateutil==2.8.11 # via feast (setup.py) types-pytz==2021.3.6 # via feast (setup.py) -types-pyyaml==6.0.5 +types-pyyaml==6.0.6 # via feast (setup.py) -types-redis==4.1.19 +types-redis==4.1.21 # via feast (setup.py) -types-requests==2.27.16 +types-requests==2.27.19 # via feast (setup.py) -types-setuptools==57.4.12 +types-setuptools==57.4.14 # via feast (setup.py) -types-tabulate==0.8.6 +types-tabulate==0.8.7 # via feast (setup.py) -types-urllib3==1.26.11 +types-urllib3==1.26.13 # via types-requests -typing-extensions==4.1.1 +typing-extensions==4.2.0 # via # azure-core # great-expectations @@ -722,7 +724,7 @@ wcwidth==0.2.5 # via prompt-toolkit websocket-client==1.3.2 # via docker -websockets==10.2 +websockets==10.3 # via uvicorn werkzeug==2.1.1 # via moto diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 6413886c5b..d277f60689 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -20,7 +20,7 @@ certifi==2021.10.8 # via requests charset-normalizer==2.0.12 # via requests -click==8.1.2 +click==8.0.1 # via # feast (setup.py) # uvicorn @@ -32,7 +32,7 @@ dask==2022.1.1 # via feast (setup.py) dill==0.3.4 # via feast (setup.py) -fastapi==0.75.1 +fastapi==0.75.2 # via feast (setup.py) fastavro==1.4.10 # via @@ -154,7 +154,7 @@ toolz==0.11.2 # partd tqdm==4.64.0 # via feast (setup.py) -typing-extensions==4.1.1 +typing-extensions==4.2.0 # via pydantic urllib3==1.26.9 # via requests @@ -164,5 +164,5 @@ uvloop==0.16.0 # via uvicorn watchgod==0.8.2 # via uvicorn -websockets==10.2 +websockets==10.3 # via uvicorn diff --git a/sdk/python/setup.py b/sdk/python/setup.py index ed1a1a7f9f..8527d2f8c1 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -46,7 +46,7 @@ REQUIRES_PYTHON = ">=3.7.0" REQUIRED = [ - "click>=7.0.0", + "click>=7.0.0,<8.0.2", "colorama>=0.3.9", "dill==0.3.*", "fastavro>=1.1.0", @@ -105,6 +105,10 @@ "trino>=0.305.0,<0.400.0", ] +POSTGRES_REQUIRED = [ + "psycopg2-binary>=2.8.3", +] + GE_REQUIRED = [ "great_expectations>=0.14.0,<0.15.0" ] @@ -157,6 +161,7 @@ + AWS_REQUIRED + SNOWFLAKE_REQUIRED + SPARK_REQUIRED + + POSTGRES_REQUIRED + TRINO_REQUIRED + GE_REQUIRED ) @@ -433,6 +438,7 @@ def copy_extensions_to_source(self): "snowflake": SNOWFLAKE_REQUIRED, "spark": SPARK_REQUIRED, "trino": TRINO_REQUIRED, + "postgres": POSTGRES_REQUIRED, "ge": GE_REQUIRED, }, include_package_data=True, diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/__init__.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/postgres.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/postgres.py new file mode 100644 index 0000000000..de5df6496f --- /dev/null +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/postgres.py @@ -0,0 +1,71 @@ +from typing import Dict, List, Optional + +import pandas as pd + +from feast.data_source import DataSource +from feast.infra.offline_stores.contrib.postgres_offline_store.postgres import ( + PostgreSQLOfflineStoreConfig, + PostgreSQLSource, +) +from feast.infra.utils.postgres.connection_utils import _get_conn, df_to_postgres_table +from feast.repo_config import FeastConfigBaseModel +from tests.integration.feature_repos.universal.data_source_creator import ( + DataSourceCreator, +) + + +class PostgreSQLDataSourceCreator(DataSourceCreator): + tables: List[str] = [] + + def __init__(self, project_name: str, *args, **kwargs): + super().__init__(project_name) + self.project_name = project_name + + self.offline_store_config = PostgreSQLOfflineStoreConfig( + type="postgres", + host="localhost", + port=5432, + database="postgres", + db_schema="public", + user="postgres", + password="docker", + ) + + def create_data_source( + self, + df: pd.DataFrame, + destination_name: str, + suffix: Optional[str] = None, + timestamp_field="ts", + created_timestamp_column="created_ts", + field_mapping: Dict[str, str] = None, + ) -> DataSource: + + destination_name = self.get_prefixed_table_name(destination_name) + + df_to_postgres_table(self.offline_store_config, df, destination_name) + + self.tables.append(destination_name) + + return PostgreSQLSource( + name=destination_name, + query=f"SELECT * FROM {destination_name}", + timestamp_field=timestamp_field, + created_timestamp_column=created_timestamp_column, + field_mapping=field_mapping or {"ts_1": "ts"}, + ) + + def create_offline_store_config(self) -> FeastConfigBaseModel: + return self.offline_store_config + + def get_prefixed_table_name(self, suffix: str) -> str: + return f"{self.project_name}_{suffix}" + + def create_saved_dataset_destination(self): + # FIXME: ... + return None + + def teardown(self): + with _get_conn(self.offline_store_config) as conn, conn.cursor() as cur: + for table in self.tables: + cur.execute("DROP TABLE IF EXISTS " + table)