Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions mozilla_schema_generator/generic_ping.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ class GenericPing(object):
cache_dir = pathlib.Path(os.environ.get("MSG_PROBE_CACHE_DIR", ".probe_cache"))

def __init__(self, schema_url, env_url, probes_url, mps_branch="main"):
self.schema_url = schema_url.format(branch=mps_branch)
self.env_url = env_url.format(branch=mps_branch)
self.branch_name = mps_branch
self.schema_url = schema_url.format(branch=self.branch_name)
self.env_url = env_url.format(branch=self.branch_name)
self.probes_url = probes_url

def get_schema(self) -> Schema:
Expand Down
42 changes: 36 additions & 6 deletions mozilla_schema_generator/glean_ping.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,18 @@

logger = logging.getLogger(__name__)

DEFAULT_SCHEMA_URL = (
"https://raw.githubusercontent.com/mozilla-services/mozilla-pipeline-schemas"
"/{branch}/schemas/glean/glean/glean.1.schema.json"
)

MINIMUM_SCHEMA_URL = (
"https://raw.githubusercontent.com/mozilla-services/mozilla-pipeline-schemas"
"/{branch}/schemas/glean/glean/glean-min.1.schema.json"
)


class GleanPing(GenericPing):
schema_url = (
"https://raw.githubusercontent.com/mozilla-services/mozilla-pipeline-schemas"
"/{branch}/schemas/glean/glean/glean.1.schema.json"
)
probes_url_template = GenericPing.probe_info_base_url + "/glean/{}/metrics"
ping_url_template = GenericPing.probe_info_base_url + "/glean/{}/pings"
repos_url = GenericPing.probe_info_base_url + "/glean/repositories"
Expand All @@ -45,8 +51,8 @@ def __init__(self, repo, **kwargs): # TODO: Make env-url optional
self.repo_name = repo["name"]
self.app_id = repo["app_id"]
super().__init__(
self.schema_url,
self.schema_url,
DEFAULT_SCHEMA_URL,
DEFAULT_SCHEMA_URL,
self.probes_url_template.format(self.repo_name),
**kwargs,
)
Expand Down Expand Up @@ -249,6 +255,7 @@ def reorder_metadata(metadata):
"bq_dataset_family",
"bq_table",
"bq_metadata_format",
"include_info_sections",
"submission_timestamp_granularity",
"expiration_policy",
"override_attributes",
Expand Down Expand Up @@ -279,6 +286,7 @@ def get_pings_and_pipeline_metadata(self) -> Dict[str, Dict]:
pings = self._get_ping_data_and_dependencies_with_default_metadata()
for ping_name, ping_data in pings.items():
metadata = ping_data.get("moz_pipeline_metadata")
metadata["include_info_sections"] = self._include_info_sections(ping_data)

# While technically unnecessary, the dictionary elements are re-ordered to match the
# currently deployed order and used to verify no difference in output.
Expand All @@ -290,6 +298,26 @@ def get_ping_descriptions(self) -> Dict[str, str]:
k: v["history"][-1]["description"] for k, v in self._get_ping_data().items()
}

def _include_info_sections(self, ping_data) -> bool:
# Default to true if not specified.
if "history" not in ping_data or len(ping_data["history"]) == 0:
return True
latest_ping_data = ping_data["history"][-1]
return (
"include_info_sections" not in latest_ping_data
or latest_ping_data["include_info_sections"]
)

def set_schema_url(self, metadata):
"""
Switch between the glean-min and glean schemas if the ping does not require
info sections as specified in the parsed ping info in probe scraper.
"""
if not metadata["include_info_sections"]:
self.schema_url = MINIMUM_SCHEMA_URL.format(branch=self.branch_name)
else:
self.schema_url = DEFAULT_SCHEMA_URL.format(branch=self.branch_name)

def generate_schema(self, config, generic_schema=False) -> Dict[str, Schema]:
pings = self.get_pings_and_pipeline_metadata()
schemas = {}
Expand Down Expand Up @@ -322,6 +350,8 @@ def generate_schema(self, config, generic_schema=False) -> Dict[str, Schema]:

defaults = {"mozPipelineMetadata": pipeline_meta}

# Adjust the schema path if the ping does not require info sections
self.set_schema_url(pipeline_meta)
if generic_schema: # Use the generic glean ping schema
schema = self.get_schema(generic_schema=True)
schema.schema.update(defaults)
Expand Down
69 changes: 62 additions & 7 deletions tests/test_glean.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggest adding a test to cover the include_info_sections = False case as well.

Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def get_dependencies(self):
current_repo = next((x for x in repos if x.get("app_id") == self.repo_name), {})
return current_repo.get("dependencies", [])

def _get_history(self):
return []

def _get_dependency_pings(self, dependency):
return {
"dependency_ping": {
Expand All @@ -83,6 +86,7 @@ def _get_ping_data_without_dependencies(self) -> Dict[str, Dict]:
"in-source": True,
"moz_pipeline_metadata": self.ping_metadata,
"name": "ping1",
"history": self._get_history(),
}
}

Expand All @@ -96,6 +100,7 @@ class GleanPingWithExpirationPolicy(GleanPingStub):
"delete_after_days": 12,
"collect_through_date": "2022-06-10",
},
"include_info_sections": True,
}


Expand All @@ -104,6 +109,7 @@ class GleanPingWithEncryption(GleanPingStub):
"bq_dataset_family": "app1",
"bq_metadata_format": "structured",
"bq_table": "ping1_v1",
"include_info_sections": True,
"jwe_mappings": [
{
"decrypted_field_path": "",
Expand All @@ -118,6 +124,7 @@ class GleanPingNoMetadata(GleanPingStub):
"bq_dataset_family": "app1",
"bq_metadata_format": "structured",
"bq_table": "ping1_v1",
"include_info_sections": True,
}


Expand All @@ -126,6 +133,7 @@ class GleanPingWithOverrideAttributes(GleanPingStub):
"bq_dataset_family": "app1",
"bq_metadata_format": "structured",
"bq_table": "ping1_v1",
"include_info_sections": True,
"override_attributes": [{"name": "geo_city", "value": None}],
}

Expand All @@ -135,16 +143,30 @@ class GleanPingWithGranularity(GleanPingStub):
"bq_dataset_family": "app1",
"bq_metadata_format": "structured",
"bq_table": "ping1_v1",
"include_info_sections": True,
"submission_timestamp_granularity": "seconds",
}


class GleanPingNoInfoSection(GleanPingStub):
ping_metadata = {
"bq_dataset_family": "app1",
"bq_metadata_format": "structured",
"bq_table": "ping1_v1",
"include_info_sections": False,
}

def _get_history(self):
return [{"include_info_sections": False}]


class GleanPingWithMultiplePings(GleanPingStub):
ping1_metadata = {
"bq_dataset_family": "app1",
"bq_metadata_format": "structured",
"bq_table": "ping1_v1",
"expiration_policy": {"delete_after_days": 30},
"include_info_sections": True,
"override_attributes": [{"name": "geo_city", "value": None}],
"submission_timestamp_granularity": "seconds",
}
Expand All @@ -154,6 +176,7 @@ class GleanPingWithMultiplePings(GleanPingStub):
"bq_metadata_format": "structured",
"bq_table": "ping2_v1",
"expiration_policy": {"delete_after_days": 45},
"include_info_sections": True,
"submission_timestamp_granularity": "millis",
}

Expand Down Expand Up @@ -318,6 +341,7 @@ def test_generic_schema(self, glean, config):
"bq_dataset_family": "glean_core",
"bq_metadata_format": "structured",
"bq_table": name.replace("-", "_") + "_v1",
"include_info_sections": True,
}
print_and_test(generic_schema, schema)

Expand Down Expand Up @@ -363,7 +387,7 @@ def test_expiration_policy(self, mock_get_repos, config):
)
if name == "dependency_ping":
# Need to do individual comparison due to update of value based on app_id
assert len(schema["mozPipelineMetadata"]) == 4
assert len(schema["mozPipelineMetadata"]) == 5
assert schema["mozPipelineMetadata"]["bq_dataset_family"] == "app1"
assert (
schema["mozPipelineMetadata"]["bq_metadata_format"] == "structured"
Expand Down Expand Up @@ -415,7 +439,7 @@ def test_jwe_mappings(self, mock_get_repos, config):
)
if name == "dependency_ping":
# Need to do individual comparison due to update of value based on app_id
assert len(schema["mozPipelineMetadata"]) == 4
assert len(schema["mozPipelineMetadata"]) == 5
assert schema["mozPipelineMetadata"]["bq_dataset_family"] == "app1"
assert (
schema["mozPipelineMetadata"]["bq_metadata_format"] == "structured"
Expand Down Expand Up @@ -455,7 +479,7 @@ def test_no_metadata_defaults(self, mock_get_repos, config):
)
if name == "dependency_ping":
# Need to do individual comparison due to update of value based on app_id
assert len(schema["mozPipelineMetadata"]) == 3
assert len(schema["mozPipelineMetadata"]) == 4
assert schema["mozPipelineMetadata"]["bq_dataset_family"] == "app1"
assert (
schema["mozPipelineMetadata"]["bq_metadata_format"] == "structured"
Expand Down Expand Up @@ -492,7 +516,7 @@ def test_override_attributes(self, mock_get_repos, config):
)
if name == "dependency_ping":
# Need to do individual comparison due to update of value based on app_id
assert len(schema["mozPipelineMetadata"]) == 4
assert len(schema["mozPipelineMetadata"]) == 5
assert schema["mozPipelineMetadata"]["bq_dataset_family"] == "app1"
assert (
schema["mozPipelineMetadata"]["bq_metadata_format"] == "structured"
Expand Down Expand Up @@ -537,7 +561,7 @@ def test_submission_timestamp_granularity(self, mock_get_repos, config):
)
if name == "dependency_ping":
# Need to do individual comparison due to update of value based on app_id
assert len(schema["mozPipelineMetadata"]) == 4
assert len(schema["mozPipelineMetadata"]) == 5
assert schema["mozPipelineMetadata"]["bq_dataset_family"] == "app1"
assert (
schema["mozPipelineMetadata"]["bq_metadata_format"] == "structured"
Expand Down Expand Up @@ -579,6 +603,36 @@ def test_metadata_no_dependency(self, mock_get_repos, config):
== GleanPingWithGranularity.ping_metadata
)

@patch.object(glean_ping.GleanPing, "get_repos")
def test_ping_no_info_sections(self, mock_get_repos, config):
mock_get_repos.return_value = [
{
"app_id": "app1",
"dependencies": [],
"moz_pipeline_metadata": {},
"moz_pipeline_metadata_defaults": {
"bq_dataset_family": "app1",
"bq_metadata_format": "structured",
},
"name": "app1",
}
]

glean = GleanPingNoInfoSection(
{"name": "app1", "app_id": "app1"},
)
schemas = glean.generate_schema(config, generic_schema=True)
final_schemas = {k: schemas[k].schema for k in schemas}

assert len(final_schemas) == 1
for name, schema in final_schemas.items():
assert "required" not in schema
if name == "ping1":
assert (
schema["mozPipelineMetadata"]
== GleanPingNoInfoSection.ping_metadata
)

# Unit test covering case where 2 pings have specific metadata and default metadata is applied
# to the dependency ping
@patch.object(glean_ping.GleanPing, "get_repos")
Expand Down Expand Up @@ -620,7 +674,7 @@ def test_metadata_multiple_pings(self, mock_get_repos, config):
)
if name == "dependency_ping":
# Need to do individual comparison due to update of value based on app_id
assert len(schema["mozPipelineMetadata"]) == 5
assert len(schema["mozPipelineMetadata"]) == 6
assert schema["mozPipelineMetadata"]["bq_dataset_family"] == "app1"
assert (
schema["mozPipelineMetadata"]["bq_metadata_format"] == "structured"
Expand Down Expand Up @@ -763,7 +817,7 @@ def test_override_nested_defaults(self, config):
# This should continue to work
# even if the upstream schema actually gains those fields.
json = generic_ping.GenericPing._get_json(
glean_ping.GleanPing.schema_url.format(branch="main")
glean_ping.DEFAULT_SCHEMA_URL.format(branch="main")
)
json.update(
{
Expand All @@ -789,6 +843,7 @@ def test_override_nested_defaults(self, config):
"bq_table": "metrics_v1",
"bq_metadata_format": "structured",
"json_object_path_regex": "metrics\\.object\\..*",
"include_info_sections": True,
}
for name, schema in final_schemas.items():
if name == "metrics":
Expand Down