-
Notifications
You must be signed in to change notification settings - Fork 90
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(telemetry): Unique User IDs in kedro-telemetry - merge only for kedro-telemetry release 0.4.0 #596
feat(telemetry): Unique User IDs in kedro-telemetry - merge only for kedro-telemetry release 0.4.0 #596
Changes from 14 commits
2b36ddd
2735206
a17da4a
5c1f2be
75480b6
11c28b1
4c9427c
a20d0d8
ea96a13
e16cde6
ec0594e
3bc7546
9795b5e
ae5694a
0c706f0
b1bca73
74cf22d
313f448
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,11 @@ | ||
"""Kedro Telemetry plugin for collecting Kedro usage data.""" | ||
|
||
import getpass | ||
import hashlib | ||
import json | ||
import logging | ||
import os | ||
import sys | ||
import uuid | ||
from copy import deepcopy | ||
from datetime import datetime | ||
from pathlib import Path | ||
|
@@ -15,6 +15,7 @@ | |
import requests | ||
import toml | ||
import yaml | ||
from appdirs import user_config_dir | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this a new dependency? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
yes, we added it to the dependencies list |
||
from kedro import __version__ as KEDRO_VERSION | ||
from kedro.framework.cli.cli import KedroCLI | ||
from kedro.framework.cli.hooks import cli_hook_impl | ||
|
@@ -41,6 +42,7 @@ | |
"BUILDKITE", # https://buildkite.com/docs/pipelines/environment-variables | ||
} | ||
TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" | ||
CONFIG_FILENAME = "telemetry.toml" | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
@@ -49,15 +51,45 @@ def _hash(string: str) -> str: | |
return hashlib.sha512(bytes(string, encoding="utf8")).hexdigest() | ||
|
||
|
||
def _get_hashed_username(): | ||
def _get_or_create_uuid() -> str: | ||
""" | ||
Reads a UUID from a configuration file or generates and saves a new one if not present. | ||
""" | ||
config_path = user_config_dir("kedro") | ||
astrojuanlu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
full_path = os.path.join(config_path, CONFIG_FILENAME) | ||
|
||
try: | ||
username = getpass.getuser() | ||
return _hash(username) | ||
except Exception as exc: | ||
logger.warning( | ||
"Something went wrong with getting the username. Exception: %s", | ||
exc, | ||
) | ||
if os.path.exists(full_path): | ||
with open(full_path) as f: | ||
config = toml.load(f) | ||
|
||
if "telemetry" in config and "uuid" in config["telemetry"]: | ||
return uuid.UUID(config["telemetry"]["uuid"]).hex | ||
|
||
# Generate a new UUID and save it to the config file | ||
new_uuid = _generate_new_uuid(full_path) | ||
|
||
return new_uuid | ||
|
||
except Exception as e: | ||
logging.error(f"Failed to retrieve UUID: {e}") | ||
return "" | ||
|
||
|
||
def _generate_new_uuid(full_path: str) -> str: | ||
try: | ||
config = {} | ||
config["telemetry"] = {} | ||
new_uuid = uuid.uuid4().hex | ||
config["telemetry"]["uuid"] = new_uuid | ||
|
||
os.makedirs(os.path.dirname(full_path), exist_ok=True) | ||
with open(full_path, "w") as f: | ||
toml.dump(config, f) | ||
|
||
return new_uuid | ||
except Exception as e: | ||
logging.error(f"Failed to create UUID: {e}") | ||
return "" | ||
|
||
|
||
|
@@ -90,17 +122,17 @@ def before_command_run( | |
main_command = masked_command_args[0] if masked_command_args else "kedro" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can't comment on the line above since it hasn't been changed but could line 117 be changed to - |
||
|
||
logger.debug("You have opted into product usage analytics.") | ||
hashed_username = _get_hashed_username() | ||
user_uuid = _get_or_create_uuid() | ||
project_properties = _get_project_properties( | ||
hashed_username, project_metadata.project_path | ||
user_uuid, project_metadata.project_path | ||
) | ||
cli_properties = _format_user_cli_data( | ||
project_properties, masked_command_args | ||
) | ||
|
||
_send_heap_event( | ||
event_name=f"Command run: {main_command}", | ||
identity=hashed_username, | ||
identity=user_uuid, | ||
properties=cli_properties, | ||
) | ||
|
||
|
@@ -109,7 +141,7 @@ def before_command_run( | |
generic_properties["main_command"] = main_command | ||
_send_heap_event( | ||
event_name="CLI command", | ||
identity=hashed_username, | ||
identity=user_uuid, | ||
properties=generic_properties, | ||
) | ||
except Exception as exc: | ||
|
@@ -141,16 +173,16 @@ def after_catalog_created(self, catalog): | |
logger.debug("You have opted into product usage analytics.") | ||
|
||
default_pipeline = pipelines.get("__default__") # __default__ | ||
hashed_username = _get_hashed_username() | ||
user_uuid = _get_or_create_uuid() | ||
|
||
project_properties = _get_project_properties(hashed_username, self.project_path) | ||
project_properties = _get_project_properties(user_uuid, self.project_path) | ||
|
||
project_statistics_properties = _format_project_statistics_data( | ||
project_properties, catalog, default_pipeline, pipelines | ||
) | ||
_send_heap_event( | ||
event_name="Kedro Project Statistics", | ||
identity=hashed_username, | ||
identity=user_uuid, | ||
properties=project_statistics_properties, | ||
) | ||
|
||
|
@@ -163,10 +195,10 @@ def _is_known_ci_env(known_ci_env_var_keys=KNOWN_CI_ENV_VAR_KEYS): | |
return any(os.getenv(key) for key in known_ci_env_var_keys) | ||
|
||
|
||
def _get_project_properties(hashed_username: str, project_path: str) -> Dict: | ||
def _get_project_properties(user_uuid: str, project_path: str) -> Dict: | ||
hashed_package_name = _hash(PACKAGE_NAME) if PACKAGE_NAME else "undefined" | ||
properties = { | ||
"username": hashed_username, | ||
"username": user_uuid, | ||
"package_name": hashed_package_name, | ||
"project_version": KEDRO_VERSION, | ||
"telemetry_version": TELEMETRY_VERSION, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The heading will say the release number anyway