Skip to content

Commit

Permalink
chore: Add docstrings (feast-dev#3128)
Browse files Browse the repository at this point in the history
* Fix base feature view docstring

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Add docstring to trino source

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Fix docstrings for bfv and sfv

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Temp work

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Update data source docstrings

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

Signed-off-by: Felix Wang <wangfelix98@gmail.com>
  • Loading branch information
felixwang9817 authored Aug 24, 2022
1 parent f0594e1 commit e610f62
Show file tree
Hide file tree
Showing 9 changed files with 100 additions and 30 deletions.
4 changes: 2 additions & 2 deletions sdk/python/feast/base_feature_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ class BaseFeatureView(ABC):
maintainer.
projection: The feature view projection storing modifications to be applied to
this base feature view at retrieval time.
created_timestamp (optional): The time when the base feature view was created.
last_updated_timestamp (optional): The time when the base feature view was last
created_timestamp: The time when the base feature view was created.
last_updated_timestamp: The time when the base feature view was last
updated.
"""

Expand Down
52 changes: 46 additions & 6 deletions sdk/python/feast/batch_feature_view.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from datetime import timedelta
from typing import Dict, List, Optional, Union
import warnings
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple, Union

from feast import flags_helper
from feast.data_source import DataSource
from feast.entity import Entity
from feast.feature_view import FeatureView
from feast.field import Field
from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto

warnings.simplefilter("once", RuntimeWarning)

SUPPORTED_BATCH_SOURCES = {
"BigQuerySource",
"FileSource",
Expand All @@ -19,22 +23,58 @@


class BatchFeatureView(FeatureView):
"""
A batch feature view defines a logical group of features that has only a batch data source.
Attributes:
name: The unique name of the batch feature view.
entities: List of entities or entity join keys.
ttl: The amount of time this group of features lives. A ttl of 0 indicates that
this group of features lives forever. Note that large ttl's or a ttl of 0
can result in extremely computationally intensive queries.
schema: The schema of the feature view, including feature, timestamp, and entity
columns. If not specified, can be inferred from the underlying data source.
source: The batch source of data where this group of features is stored.
online: A boolean indicating whether online retrieval is enabled for this feature view.
description: A human-readable description.
tags: A dictionary of key-value pairs to store arbitrary metadata.
owner: The owner of the batch feature view, typically the email of the primary maintainer.
"""

name: str
entities: List[str]
ttl: Optional[timedelta]
source: DataSource
schema: List[Field]
entity_columns: List[Field]
features: List[Field]
online: bool
description: str
tags: Dict[str, str]
owner: str
timestamp_field: str
materialization_intervals: List[Tuple[datetime, datetime]]

def __init__(
self,
*,
name: Optional[str] = None,
name: str,
source: DataSource,
entities: Optional[Union[List[Entity], List[str]]] = None,
ttl: Optional[timedelta] = None,
tags: Optional[Dict[str, str]] = None,
online: bool = True,
description: str = "",
owner: str = "",
schema: Optional[List[Field]] = None,
source: Optional[DataSource] = None,
):
if not flags_helper.is_test():
warnings.warn(
"Batch feature views are experimental features in alpha development. "
"Some functionality may still be unstable so functionality can change in the future.",
RuntimeWarning,
)

if source is None:
raise ValueError("Feature views need a source specified")
if (
type(source).__name__ not in SUPPORTED_BATCH_SOURCES
and source.to_proto().type != DataSourceProto.SourceType.CUSTOM_SOURCE
Expand Down
8 changes: 6 additions & 2 deletions sdk/python/feast/infra/offline_stores/bigquery_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,18 @@ def __init__(
"""Create a BigQuerySource from an existing table or query.
Args:
name (optional): Name for the source. Defaults to the table if not specified.
name (optional): Name for the source. Defaults to the table if not specified, in which
case the table must be specified.
timestamp_field (optional): Event timestamp field used for point in time
joins of feature values.
table (optional): BigQuery table where the features are stored. Exactly one of 'table'
and 'query' must be specified.
table (optional): The BigQuery table where features can be found.
created_timestamp_column (optional): Timestamp column when row was created, used for deduplicating rows.
field_mapping (optional): A dictionary mapping of column names in this data source to feature names in a feature table
or view. Only used for feature columns, not entities or timestamp columns.
query (optional): SQL query to execute to generate data for this data source.
query (optional): The query to be executed to obtain the features. Exactly one of 'table'
and 'query' must be specified.
description (optional): A human-readable description.
tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
owner (optional): The owner of the bigquery source, typically the email of the primary
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,24 +38,24 @@ def __init__(
Args:
timestamp_field : event timestamp column.
table (optional): Athena table where the features are stored.
table (optional): Athena table where the features are stored. Exactly one of 'table'
and 'query' must be specified.
database: Athena Database Name
data_source (optional): Athena data source
created_timestamp_column (optional): Timestamp column indicating when the
row was created, used for deduplicating rows.
field_mapping (optional): A dictionary mapping of column names in this data
source to column names in a feature table or view.
date_partition_column : Timestamp column used for partitioning.
query (optional): The query to be executed to obtain the features.
name (optional): Name for the source. Defaults to the table_ref if not specified.
query (optional): The query to be executed to obtain the features. Exactly one of 'table'
and 'query' must be specified.
name (optional): Name for the source. Defaults to the table if not specified, in which
case the table must be specified.
description (optional): A human-readable description.
tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
owner (optional): The owner of the athena source, typically the email of the primary
maintainer.
"""

_database = "default" if table and not database else database
self.athena_options = AthenaOptions(
table=table, query=query, database=_database, data_source=data_source
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,27 @@ def __init__(
tags: Optional[Dict[str, str]] = None,
owner: Optional[str] = "",
):
"""
Creates a TrinoSource object.
Args:
name (optional): Name for the source. Defaults to the table if not specified, in which
case the table must be specified.
timestamp_field (optional): Event timestamp field used for point in time
joins of feature values.
table (optional): Trino table where the features are stored. Exactly one of 'table' and
'query' must be specified.
created_timestamp_column (optional): Timestamp column indicating when the
row was created, used for deduplicating rows.
field_mapping (optional): A dictionary mapping of column names in this data
source to column names in a feature table or view.
query (optional): The query to be executed to obtain the features. Exactly one of 'table'
and 'query' must be specified.
description (optional): A human-readable description.
tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
owner (optional): The owner of the snowflake source, typically the email of the primary
maintainer.
"""
# If no name, use the table as the default name.
if name is None and table is None:
raise DataSourceNoNameException()
Expand Down
9 changes: 6 additions & 3 deletions sdk/python/feast/infra/offline_stores/redshift_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,19 @@ def __init__(
Creates a RedshiftSource object.
Args:
name (optional): Name for the source. Defaults to the table_ref if not specified.
name (optional): Name for the source. Defaults to the table if not specified, in which
case the table must be specified.
timestamp_field (optional): Event timestamp field used for point in time
joins of feature values.
table (optional): Redshift table where the features are stored.
table (optional): Redshift table where the features are stored. Exactly one of 'table'
and 'query' must be specified.
schema (optional): Redshift schema in which the table is located.
created_timestamp_column (optional): Timestamp column indicating when the
row was created, used for deduplicating rows.
field_mapping (optional): A dictionary mapping of column names in this data
source to column names in a feature table or view.
query (optional): The query to be executed to obtain the features.
query (optional): The query to be executed to obtain the features. Exactly one of 'table'
and 'query' must be specified.
description (optional): A human-readable description.
tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
owner (optional): The owner of the redshift source, typically the email of the primary
Expand Down
9 changes: 6 additions & 3 deletions sdk/python/feast/infra/offline_stores/snowflake_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,17 @@ def __init__(
Creates a SnowflakeSource object.
Args:
name (optional): Name for the source. Defaults to the table if not specified.
name (optional): Name for the source. Defaults to the table if not specified, in which
case the table must be specified.
timestamp_field (optional): Event timestamp field used for point in time
joins of feature values.
database (optional): Snowflake database where the features are stored.
warehouse (optional): Snowflake warehouse where the database is stored.
schema (optional): Snowflake schema in which the table is located.
table (optional): Snowflake table where the features are stored.
query (optional): The query to be executed to obtain the features.
table (optional): Snowflake table where the features are stored. Exactly one of 'table'
and 'query' must be specified.
query (optional): The query to be executed to obtain the features. Exactly one of 'table'
and 'query' must be specified.
created_timestamp_column (optional): Timestamp column indicating when the
row was created, used for deduplicating rows.
field_mapping (optional): A dictionary mapping of column names in this data
Expand Down
13 changes: 6 additions & 7 deletions sdk/python/feast/stream_feature_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
@typechecked
class StreamFeatureView(FeatureView):
"""
NOTE: Stream Feature Views are not yet fully implemented and exist to allow users to register their stream sources and
schemas with Feast.
A stream feature view defines a logical group of features that has both a stream data source and
a batch data source.
Attributes:
name: The unique name of the stream feature view.
Expand All @@ -44,15 +44,14 @@ class StreamFeatureView(FeatureView):
can result in extremely computationally intensive queries.
schema: The schema of the feature view, including feature, timestamp, and entity
columns. If not specified, can be inferred from the underlying data source.
source: DataSource. The stream source of data where this group of features is stored.
source: The stream source of data where this group of features is stored.
aggregations: List of aggregations registered with the stream feature view.
mode: The mode of execution.
timestamp_field: Must be specified if aggregations are specified. Defines the timestamp column on which to aggregate windows.
online: Defines whether this stream feature view is used in online feature retrieval.
online: A boolean indicating whether online retrieval is enabled for this feature view.
description: A human-readable description.
tags: A dictionary of key-value pairs to store arbitrary metadata.
owner: The owner of the on demand feature view, typically the email of the primary
maintainer.
owner: The owner of the stream feature view, typically the email of the primary maintainer.
udf: The user defined transformation function. This transformation function should have all of the corresponding imports imported within the function.
"""

Expand Down Expand Up @@ -92,7 +91,7 @@ def __init__(
):
if not flags_helper.is_test():
warnings.warn(
"Stream Feature Views are experimental features in alpha development. "
"Stream feature views are experimental features in alpha development. "
"Some functionality may still be unstable so functionality can change in the future.",
RuntimeWarning,
)
Expand Down
2 changes: 1 addition & 1 deletion sdk/python/tests/unit/test_feature_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_create_batch_feature_view():
source=batch_source,
)

with pytest.raises(ValueError):
with pytest.raises(TypeError):
BatchFeatureView(
name="test batch feature view", entities=[], ttl=timedelta(days=30)
)
Expand Down

0 comments on commit e610f62

Please sign in to comment.