Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 0 additions & 24 deletions airflow/utils/usage_data_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@

from airflow import __version__ as airflow_version, settings
from airflow.configuration import conf
from airflow.plugins_manager import get_plugin_info


def usage_data_collection():
Expand Down Expand Up @@ -97,26 +96,3 @@ def get_executor() -> str:
def get_python_version() -> str:
# Cut only major+minor from the python version string (e.g. 3.10.12 --> 3.10)
return ".".join(platform.python_version().split(".")[0:2])


def get_plugin_counts() -> dict[str, int]:
plugin_info = get_plugin_info()

return {
"plugins": len(plugin_info),
"flask_blueprints": sum(len(x["flask_blueprints"]) for x in plugin_info),
"appbuilder_views": sum(len(x["appbuilder_views"]) for x in plugin_info),
"appbuilder_menu_items": sum(len(x["appbuilder_menu_items"]) for x in plugin_info),
"timetables": sum(len(x["timetables"]) for x in plugin_info),
}


def to_bucket(counter: int) -> str:
"""As we don't want to have preceise numbers, make number into a bucket."""
if counter == 0:
return "0"
buckets = [0, 5, 10, 20, 50, 100, 200, 500, 1000, 2000]
for idx, val in enumerate(buckets[1:]):
if buckets[idx] < counter and counter <= val:
return f"{buckets[idx] + 1}-{val}"
return f"{buckets[-1]}+"
3 changes: 0 additions & 3 deletions airflow/www/templates/airflow/dags.html
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,4 @@ <h2>{{ page_title }}</h2>
return false;
}
</script>
{% if scarf_url %}
<img referrerpolicy="no-referrer" src="{{ scarf_url }}" width="0" height="0" alt="" style="display:none;" />
{% endif %}
{% endblock %}
47 changes: 1 addition & 46 deletions airflow/www/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@
from airflow.timetables._cron import CronMixin
from airflow.timetables.base import DataInterval, TimeRestriction
from airflow.timetables.simple import ContinuousTimetable
from airflow.utils import json as utils_json, timezone, usage_data_collection, yaml
from airflow.utils import json as utils_json, timezone, yaml
from airflow.utils.airflow_flask_app import get_airflow_app
from airflow.utils.api_migration import mark_fastapi_migration_done
from airflow.utils.dag_edges import dag_edges
Expand Down Expand Up @@ -219,45 +219,6 @@ def get_safe_url(url):
return redirect_url.geturl()


def build_scarf_url(dags_count: int) -> str:
"""
Build the URL for the Scarf usage data collection.

:meta private:
"""
if not settings.is_usage_data_collection_enabled():
return ""

scarf_domain = "https://apacheairflow.gateway.scarf.sh"
platform_sys, platform_arch = usage_data_collection.get_platform_info()
db_version = usage_data_collection.get_database_version()
db_name = usage_data_collection.get_database_name()
executor = usage_data_collection.get_executor()
python_version = usage_data_collection.get_python_version()
plugin_counts = usage_data_collection.get_plugin_counts()
plugins_count = plugin_counts["plugins"]
flask_blueprints_count = plugin_counts["flask_blueprints"]
appbuilder_views_count = plugin_counts["appbuilder_views"]
appbuilder_menu_items_count = plugin_counts["appbuilder_menu_items"]
timetables_count = plugin_counts["timetables"]
dag_bucket = usage_data_collection.to_bucket(dags_count)
plugins_bucket = usage_data_collection.to_bucket(plugins_count)
timetable_bucket = usage_data_collection.to_bucket(timetables_count)

# Path Format:
# /{version}/{python_version}/{platform}/{arch}/{database}/{db_version}/{executor}/{num_dags}/{plugin_count}/{flask_blueprint_count}/{appbuilder_view_count}/{appbuilder_menu_item_count}/{timetables}
#
# This path redirects to a Pixel tracking URL
scarf_url = (
f"{scarf_domain}/webserver"
f"/{version}/{python_version}"
f"/{platform_sys}/{platform_arch}/{db_name}/{db_version}/{executor}/{dag_bucket}"
f"/{plugins_bucket}/{flask_blueprints_count}/{appbuilder_views_count}/{appbuilder_menu_items_count}/{timetable_bucket}"
)

return scarf_url


def get_date_time_num_runs_dag_runs_form_data(www_request, session, dag):
"""Get Execution Data, Base Date & Number of runs from a Request."""
date_time = www_request.args.get("execution_date")
Expand Down Expand Up @@ -1125,11 +1086,6 @@ def _iter_parsed_moved_data_table_names():
"warning",
)

try:
scarf_url = build_scarf_url(dags_count=all_dags_count)
except Exception:
scarf_url = ""

return self.render_template(
"airflow/dags.html",
dags=dags,
Expand Down Expand Up @@ -1169,7 +1125,6 @@ def _iter_parsed_moved_data_table_names():
sorting_direction=arg_sorting_direction,
auto_refresh_interval=conf.getint("webserver", "auto_refresh_interval"),
asset_triggered_next_run_info=asset_triggered_next_run_info,
scarf_url=scarf_url,
file_tokens=file_tokens,
)

Expand Down
3 changes: 0 additions & 3 deletions docs/apache-airflow/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,3 @@ The telemetry data collected is limited to the following:
- Operating system & machine architecture
- Executor
- Metadata DB type & its version
- Number of DAGs
- Number of Airflow plugins
- Number of timetables, Flask blueprints, Flask AppBuilder views, and Flask Appbuilder menu items from Airflow plugins
18 changes: 0 additions & 18 deletions tests/utils/test_usage_data_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from airflow.utils.usage_data_collection import (
get_database_version,
get_python_version,
to_bucket,
usage_data_collection,
)

Expand Down Expand Up @@ -101,20 +100,3 @@ def test_get_database_version(version_info, expected_version):
def test_get_python_version(version_info, expected_version):
with mock.patch("platform.python_version", return_value=version_info):
assert get_python_version() == expected_version


@pytest.mark.parametrize(
"counter, expected_bucket",
[
(0, "0"),
(1, "1-5"),
(5, "1-5"),
(6, "6-10"),
(11, "11-20"),
(20, "11-20"),
(21, "21-50"),
(10000, "2000+"),
],
)
def test_to_bucket(counter, expected_bucket):
assert to_bucket(counter) == expected_bucket
38 changes: 0 additions & 38 deletions tests/www/views/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import pytest
from markupsafe import Markup

from airflow import __version__ as airflow_version
from airflow.configuration import (
initialize_config,
write_default_airflow_configuration_if_needed,
Expand All @@ -36,7 +35,6 @@
from airflow.utils.task_group import TaskGroup
from airflow.www.views import (
ProviderView,
build_scarf_url,
get_key_paths,
get_safe_url,
get_task_stats_from_query,
Expand Down Expand Up @@ -608,39 +606,3 @@ def test_invalid_dates(app, admin_client, url, content):

assert resp.status_code == 400
assert re.search(content, resp.get_data().decode())


@pytest.mark.parametrize("enabled", [False, True])
@patch("airflow.utils.usage_data_collection.get_platform_info", return_value=("Linux", "x86_64"))
@patch("airflow.utils.usage_data_collection.get_database_version", return_value="12.3")
@patch("airflow.utils.usage_data_collection.get_database_name", return_value="postgres")
@patch("airflow.utils.usage_data_collection.get_executor", return_value="SequentialExecutor")
@patch("airflow.utils.usage_data_collection.get_python_version", return_value="3.9")
@patch("airflow.utils.usage_data_collection.get_plugin_counts")
def test_build_scarf_url(
get_plugin_counts,
get_python_version,
get_executor,
get_database_name,
get_database_version,
get_platform_info,
enabled,
):
get_plugin_counts.return_value = {
"plugins": 10,
"flask_blueprints": 15,
"appbuilder_views": 20,
"appbuilder_menu_items": 25,
"timetables": 30,
}
with patch("airflow.settings.is_usage_data_collection_enabled", return_value=enabled):
result = build_scarf_url(5)
expected_url = (
"https://apacheairflow.gateway.scarf.sh/webserver/"
f"{airflow_version}/3.9/Linux/x86_64/postgres/12.3/SequentialExecutor/1-5"
f"/6-10/15/20/25/21-50"
)
if enabled:
assert result == expected_url
else:
assert result == ""
14 changes: 0 additions & 14 deletions tests/www/views/test_views_home.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,20 +459,6 @@ def test_sorting_home_view(url, lower_key, greater_key, user_client, _working_da
assert lower_index < greater_index


@pytest.mark.parametrize("is_enabled, should_have_pixel", [(False, False), (True, True)])
def test_analytics_pixel(user_client, is_enabled, should_have_pixel):
"""
Test that the analytics pixel is not included when the feature is disabled
"""
with mock.patch("airflow.settings.is_usage_data_collection_enabled", return_value=is_enabled):
resp = user_client.get("home", follow_redirects=True)

if should_have_pixel:
check_content_in_response("apacheairflow.gateway.scarf.sh", resp)
else:
check_content_not_in_response("apacheairflow.gateway.scarf.sh", resp)


@pytest.mark.parametrize(
"url, filter_tags_cookie_val, filter_lastrun_cookie_val, expected_filter_tags, expected_filter_lastrun",
[
Expand Down