Skip to content

Commit

Permalink
feat(telemetry): Implement telemetry message notification (kedro-org#760
Browse files Browse the repository at this point in the history
)

* Implement telemetry message notification

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>

* Fix docs build

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>

---------

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>
Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com>
  • Loading branch information
DimedS authored and merelcht committed Aug 27, 2024
1 parent 7303e1f commit 021cb73
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 75 deletions.
106 changes: 53 additions & 53 deletions kedro-datasets/docs/source/api/kedro_datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,56 +11,56 @@ kedro_datasets
:toctree:
:template: autosummary/class.rst

kedro_datasets.api.APIDataset
kedro_datasets.biosequence.BioSequenceDataset
kedro_datasets.dask.CSVDataset
kedro_datasets.dask.ParquetDataset
kedro_datasets.databricks.ManagedTableDataset
kedro_datasets.email.EmailMessageDataset
kedro_datasets.geopandas.GeoJSONDataset
kedro_datasets.holoviews.HoloviewsWriter
kedro_datasets.huggingface.HFDataset
kedro_datasets.huggingface.HFTransformerPipelineDataset
kedro_datasets.ibis.TableDataset
kedro_datasets.json.JSONDataset
kedro_datasets.matlab.MatlabDataset
kedro_datasets.matplotlib.MatplotlibWriter
kedro_datasets.networkx.GMLDataset
kedro_datasets.networkx.GraphMLDataset
kedro_datasets.networkx.JSONDataset
kedro_datasets.pandas.CSVDataset
kedro_datasets.pandas.DeltaTableDataset
kedro_datasets.pandas.ExcelDataset
kedro_datasets.pandas.FeatherDataset
kedro_datasets.pandas.GBQQueryDataset
kedro_datasets.pandas.GBQTableDataset
kedro_datasets.pandas.GenericDataset
kedro_datasets.pandas.HDFDataset
kedro_datasets.pandas.JSONDataset
kedro_datasets.pandas.ParquetDataset
kedro_datasets.pandas.SQLQueryDataset
kedro_datasets.pandas.SQLTableDataset
kedro_datasets.pandas.XMLDataset
kedro_datasets.partitions.IncrementalDataset
kedro_datasets.partitions.PartitionedDataset
kedro_datasets.pickle.PickleDataset
kedro_datasets.pillow.ImageDataset
kedro_datasets.plotly.JSONDataset
kedro_datasets.plotly.PlotlyDataset
kedro_datasets.polars.CSVDataset
kedro_datasets.polars.EagerPolarsDataset
kedro_datasets.polars.LazyPolarsDataset
kedro_datasets.redis.PickleDataset
kedro_datasets.snowflake.SnowparkTableDataset
kedro_datasets.spark.DeltaTableDataset
kedro_datasets.spark.SparkDataset
kedro_datasets.spark.SparkHiveDataset
kedro_datasets.spark.SparkJDBCDataset
kedro_datasets.spark.SparkStreamingDataset
kedro_datasets.svmlight.SVMLightDataset
kedro_datasets.tensorflow.TensorFlowModelDataset
kedro_datasets.text.TextDataset
kedro_datasets.tracking.JSONDataset
kedro_datasets.tracking.MetricsDataset
kedro_datasets.video.VideoDataset
kedro_datasets.yaml.YAMLDataset
api.APIDataset
biosequence.BioSequenceDataset
dask.CSVDataset
dask.ParquetDataset
databricks.ManagedTableDataset
email.EmailMessageDataset
geopandas.GeoJSONDataset
holoviews.HoloviewsWriter
huggingface.HFDataset
huggingface.HFTransformerPipelineDataset
ibis.TableDataset
json.JSONDataset
matlab.MatlabDataset
matplotlib.MatplotlibWriter
networkx.GMLDataset
networkx.GraphMLDataset
networkx.JSONDataset
pandas.CSVDataset
pandas.DeltaTableDataset
pandas.ExcelDataset
pandas.FeatherDataset
pandas.GBQQueryDataset
pandas.GBQTableDataset
pandas.GenericDataset
pandas.HDFDataset
pandas.JSONDataset
pandas.ParquetDataset
pandas.SQLQueryDataset
pandas.SQLTableDataset
pandas.XMLDataset
partitions.IncrementalDataset
partitions.PartitionedDataset
pickle.PickleDataset
pillow.ImageDataset
plotly.JSONDataset
plotly.PlotlyDataset
polars.CSVDataset
polars.EagerPolarsDataset
polars.LazyPolarsDataset
redis.PickleDataset
snowflake.SnowparkTableDataset
spark.DeltaTableDataset
spark.SparkDataset
spark.SparkHiveDataset
spark.SparkJDBCDataset
spark.SparkStreamingDataset
svmlight.SVMLightDataset
tensorflow.TensorFlowModelDataset
text.TextDataset
tracking.JSONDataset
tracking.MetricsDataset
video.VideoDataset
yaml.YAMLDataset
12 changes: 6 additions & 6 deletions kedro-datasets/docs/source/api/kedro_datasets_experimental.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ kedro_datasets_experimental
:toctree:
:template: autosummary/class.rst

kedro_datasets_experimental.langchain.ChatAnthropicDataset
kedro_datasets_experimental.langchain.ChatCohereDataset
kedro_datasets_experimental.langchain.ChatOpenAIDataset
kedro_datasets_experimental.langchain.OpenAIEmbeddingsDataset
kedro_datasets_experimental.netcdf.NetCDFDataset
kedro_datasets_experimental.rioxarray.GeoTIFFDataset
langchain.ChatAnthropicDataset
langchain.ChatCohereDataset
langchain.ChatOpenAIDataset
langchain.OpenAIEmbeddingsDataset
netcdf.NetCDFDataset
rioxarray.GeoTIFFDataset
4 changes: 4 additions & 0 deletions kedro-telemetry/kedro_telemetry/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""Kedro plugin for collecting Kedro usage data."""

__version__ = "0.5.0"

import logging

logging.getLogger(__name__).setLevel(logging.INFO)
18 changes: 10 additions & 8 deletions kedro-telemetry/kedro_telemetry/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,20 @@ def before_command_run(

consent = _check_for_telemetry_consent(project_metadata.project_path)
if not consent:
logger.debug(
logger.info(
"Kedro-Telemetry is installed, but you have opted out of "
"sharing usage analytics so none will be collected.",
)
return

logger.info(
"Kedro is sending anonymous usage data with the sole purpose of improving the product. "
"No personal data or IP addresses are stored on our side. "
"If you want to opt out, set the `KEDRO_DISABLE_TELEMETRY` or `DO_NOT_TRACK` environment variables, "
"or create a `.telemetry` file in the current working directory with the contents `consent: false`. "
"Read more at https://docs.kedro.org/en/stable/configuration/telemetry.html"
)

# get KedroCLI and its structure from actual project root
cli = KedroCLI(project_path=project_metadata.project_path)
cli_struct = _get_cli_structure(cli_obj=cli, get_help=False)
Expand All @@ -177,7 +185,6 @@ def before_command_run(
)
main_command = masked_command_args[0] if masked_command_args else "kedro"

logger.debug("You have opted into product usage analytics.")
user_uuid = _get_or_create_uuid()
project_properties = _get_project_properties(
user_uuid, project_metadata.project_path / PYPROJECT_CONFIG_NAME
Expand Down Expand Up @@ -219,15 +226,10 @@ def after_context_created(self, context):

@hook_impl
def after_catalog_created(self, catalog):
# The user notification message is sent only once per command during the before_command_run hook
if not self.consent:
logger.debug(
"Kedro-Telemetry is installed, but you have opted out of "
"sharing usage analytics so none will be collected.",
)
return

logger.debug("You have opted into product usage analytics.")

default_pipeline = pipelines.get("__default__") # __default__
user_uuid = _get_or_create_uuid()

Expand Down
47 changes: 39 additions & 8 deletions kedro-telemetry/tests/test_plugin.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import sys
from pathlib import Path

Expand Down Expand Up @@ -121,7 +122,7 @@ def fake_sub_pipeline():


class TestKedroTelemetryCLIHooks:
def test_before_command_run(self, mocker, fake_metadata):
def test_before_command_run(self, mocker, fake_metadata, caplog):
mocker.patch(
"kedro_telemetry.plugin._check_for_telemetry_consent", return_value=True
)
Expand All @@ -139,9 +140,10 @@ def test_before_command_run(self, mocker, fake_metadata):
)

mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
telemetry_hook = KedroTelemetryCLIHooks()
command_args = ["--version"]
telemetry_hook.before_command_run(fake_metadata, command_args)
with caplog.at_level(logging.INFO):
telemetry_hook = KedroTelemetryCLIHooks()
command_args = ["--version"]
telemetry_hook.before_command_run(fake_metadata, command_args)
expected_properties = {
"username": "user_uuid",
"project_id": "digested",
Expand Down Expand Up @@ -170,6 +172,20 @@ def test_before_command_run(self, mocker, fake_metadata):
),
]
assert mocked_heap_call.call_args_list == expected_calls
assert any(
"Kedro is sending anonymous usage data with the sole purpose of improving the product. "
"No personal data or IP addresses are stored on our side. "
"If you want to opt out, set the `KEDRO_DISABLE_TELEMETRY` or `DO_NOT_TRACK` environment variables, "
"or create a `.telemetry` file in the current working directory with the contents `consent: false`. "
"Read more at https://docs.kedro.org/en/stable/configuration/telemetry.html"
in record.message
for record in caplog.records
)
assert not any(
"Kedro-Telemetry is installed, but you have opted out of "
"sharing usage analytics so none will be collected." in record.message
for record in caplog.records
)

def test_before_command_run_with_tools(self, mocker, fake_metadata):
mocker.patch(
Expand Down Expand Up @@ -276,17 +292,32 @@ def test_before_command_run_empty_args(self, mocker, fake_metadata):

assert mocked_heap_call.call_args_list == expected_calls

def test_before_command_run_no_consent_given(self, mocker, fake_metadata):
def test_before_command_run_no_consent_given(self, mocker, fake_metadata, caplog):
mocker.patch(
"kedro_telemetry.plugin._check_for_telemetry_consent", return_value=False
)

mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
telemetry_hook = KedroTelemetryCLIHooks()
command_args = ["--version"]
telemetry_hook.before_command_run(fake_metadata, command_args)
with caplog.at_level(logging.INFO):
telemetry_hook = KedroTelemetryCLIHooks()
command_args = ["--version"]
telemetry_hook.before_command_run(fake_metadata, command_args)

mocked_heap_call.assert_not_called()
assert not any(
"Kedro is sending anonymous usage data with the sole purpose of improving the product. "
"No personal data or IP addresses are stored on our side. "
"If you want to opt out, set the `KEDRO_DISABLE_TELEMETRY` or `DO_NOT_TRACK` environment variables, "
"or create a `.telemetry` file in the current working directory with the contents `consent: false`. "
"Read more at https://docs.kedro.org/en/latest/configuration/telemetry.html"
in record.message
for record in caplog.records
)
assert any(
"Kedro-Telemetry is installed, but you have opted out of "
"sharing usage analytics so none will be collected." in record.message
for record in caplog.records
)

def test_before_command_run_connection_error(self, mocker, fake_metadata, caplog):
mocker.patch(
Expand Down

0 comments on commit 021cb73

Please sign in to comment.