Skip to content

Commit dfc541c

Browse files
jklukasacmiyaguchi
authored andcommitted
Bug 1672448 Persist Glean retention_days setting to generated schema (#166)
This translates the `retention_days` setting from Glean repositories.yaml to `mozPipelineMetadata.expiration_policy.delete_after_days` in all generated JSON schemas for the application. We will need a further step in the ops logic to act on this schema-level metadata, setting the appropriate table-level retention policy in BQ.
1 parent 7d6ae86 commit dfc541c

File tree

7 files changed

+121
-43
lines changed

7 files changed

+121
-43
lines changed

bin/generate_commit

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ function _generate_schemas() {
3838
--mps-branch "$mps_branch_source" \
3939
--out-dir ./telemetry
4040

41+
mozilla-schema-generator generate-bhr-ping \
42+
--mps-branch "$mps_branch_source" \
43+
--out-dir ./telemetry
44+
4145
mozilla-schema-generator generate-common-pings \
4246
--common-pings-config "$COMMON_PINGS_PATH" \
4347
--mps-branch "$mps_branch_source" \

mozilla_schema_generator/__main__.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import click
1313
import yaml
1414

15+
from .bhr_ping import BhrPing
1516
from .common_ping import CommonPing
1617
from .config import Config
1718
from .glean_ping import GleanPing
@@ -88,6 +89,19 @@ def generate_main_ping(config, out_dir, split, pretty, mps_branch):
8889
config_data = yaml.safe_load(f)
8990

9091
config = Config("main", config_data)
92+
schemas = schema_generator.generate_schema(config, split=False)
93+
# schemas introduces an extra layer to the actual schema
94+
dump_schema(schemas, out_dir, pretty, version=4)
95+
96+
97+
@click.command()
98+
@common_options
99+
def generate_bhr_ping(out_dir, split, pretty, mps_branch):
100+
schema_generator = BhrPing(mps_branch=mps_branch)
101+
if out_dir:
102+
out_dir = Path(out_dir)
103+
104+
config = Config("bhr", {})
91105
schemas = schema_generator.generate_schema(config, split=split)
92106
dump_schema(schemas, out_dir, pretty, version=4)
93107

@@ -178,34 +192,23 @@ def generate_glean_pings(
178192
repos = GleanPing.get_repos()
179193

180194
if repo is not None:
181-
repos = [(r_name, r_id) for r_name, r_id in repos if r_id == repo]
195+
repos = [r for r in repos if r["app_id"] == repo]
182196

183197
with open(config, "r") as f:
184198
config_data = yaml.safe_load(f)
185199

186200
config = Config("glean", config_data)
187201

188-
for repo_name, repo_id in repos:
189-
write_schema(
190-
repo_name,
191-
repo_id,
192-
config,
193-
out_dir,
194-
split,
195-
pretty,
196-
generic_schema,
197-
mps_branch,
198-
)
202+
for repo in repos:
203+
write_schema(repo, config, out_dir, split, pretty, generic_schema, mps_branch)
199204

200205

201-
def write_schema(
202-
repo, repo_id, config, out_dir, split, pretty, generic_schema, mps_branch
203-
):
204-
schema_generator = GleanPing(repo, repo_id, mps_branch=mps_branch)
206+
def write_schema(repo, config, out_dir, split, pretty, generic_schema, mps_branch):
207+
schema_generator = GleanPing(repo, mps_branch=mps_branch)
205208
schemas = schema_generator.generate_schema(
206209
config, split=False, generic_schema=generic_schema
207210
)
208-
dump_schema(schemas, out_dir and out_dir.joinpath(repo_id), pretty)
211+
dump_schema(schemas, out_dir and out_dir.joinpath(repo["app_id"]), pretty)
209212

210213

211214
def dump_schema(schemas, out_dir, pretty, *, version=1):
@@ -245,6 +248,7 @@ def main(args=None):
245248

246249

247250
main.add_command(generate_main_ping)
251+
main.add_command(generate_bhr_ping)
248252
main.add_command(generate_glean_pings)
249253
main.add_command(generate_common_pings)
250254

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# This Source Code Form is subject to the terms of the Mozilla Public
4+
# License, v. 2.0. If a copy of the MPL was not distributed with this
5+
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6+
7+
from .common_ping import CommonPing
8+
from .utils import prepend_properties
9+
10+
11+
class BhrPing(CommonPing):
12+
schema_url = (
13+
"https://raw.githubusercontent.com/mozilla-services/mozilla-pipeline-schemas"
14+
"/{branch}/schemas/telemetry/bhr/bhr.4.schema.json"
15+
)
16+
17+
def __init__(self, **kwargs):
18+
super().__init__(self.schema_url, **kwargs)
19+
20+
def _update_env(self, schema):
21+
# hangs is an array of objects
22+
stack = prepend_properties(("payload", "hangs")) + (
23+
"items",
24+
"properties",
25+
"stack",
26+
)
27+
schema.set_schema_elem(
28+
stack,
29+
{
30+
"type": "string",
31+
"description": (
32+
"JSON representation of the stack field."
33+
" Injected by mozilla-schema-generator."
34+
),
35+
},
36+
# this may otherwise overwrite the "items" fields
37+
propagate=False,
38+
)
39+
40+
return super()._update_env(schema)

mozilla_schema_generator/glean_ping.py

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,14 @@ class GleanPing(GenericPing):
3939
"glean_client_info",
4040
}
4141

42-
def __init__(self, repo, app_id, **kwargs): # TODO: Make env-url optional
42+
def __init__(self, repo, **kwargs): # TODO: Make env-url optional
4343
self.repo = repo
44-
self.app_id = app_id
44+
self.repo_name = repo["name"]
45+
self.app_id = repo["app_id"]
4546
super().__init__(
4647
self.schema_url,
4748
self.schema_url,
48-
self.probes_url_template.format(repo),
49+
self.probes_url_template.format(self.repo_name),
4950
**kwargs,
5051
)
5152

@@ -57,10 +58,10 @@ def get_dependencies(self):
5758
# map those back to the name of the repository in the repository file.
5859
try:
5960
dependencies = self._get_json(
60-
self.dependencies_url_template.format(self.repo)
61+
self.dependencies_url_template.format(self.repo_name)
6162
)
6263
except HTTPError:
63-
logging.info(f"For {self.repo}, using default Glean dependencies")
64+
logging.info(f"For {self.repo_name}, using default Glean dependencies")
6465
return self.default_dependencies
6566

6667
dependency_library_names = list(dependencies.keys())
@@ -77,10 +78,10 @@ def get_dependencies(self):
7778
dependencies.append(repos_by_dependency_name[name])
7879

7980
if len(dependencies) == 0:
80-
logging.info(f"For {self.repo}, using default Glean dependencies")
81+
logging.info(f"For {self.repo_name}, using default Glean dependencies")
8182
return self.default_dependencies
8283

83-
logging.info(f"For {self.repo}, found Glean dependencies: {dependencies}")
84+
logging.info(f"For {self.repo_name}, found Glean dependencies: {dependencies}")
8485
return dependencies
8586

8687
def get_probes(self) -> List[GleanProbe]:
@@ -109,7 +110,7 @@ def get_probes(self) -> List[GleanProbe]:
109110
"firefox-android-release",
110111
}
111112
if (
112-
self.repo in issue_118_affected
113+
self.repo_name in issue_118_affected
113114
and probe.get_name() == "installation.timestamp"
114115
):
115116
logging.info(f"Writing column {probe.get_name()} for compatibility.")
@@ -134,7 +135,7 @@ def get_probes(self) -> List[GleanProbe]:
134135
return processed
135136

136137
def get_pings(self) -> Set[str]:
137-
url = self.ping_url_template.format(self.repo)
138+
url = self.ping_url_template.format(self.repo_name)
138139
pings = GleanPing._get_json(url).keys()
139140

140141
for dependency in self.get_dependencies():
@@ -156,14 +157,19 @@ def generate_schema(
156157
for matcher in matchers.values():
157158
matcher.matcher["send_in_pings"]["contains"] = ping
158159
new_config = Config(ping, matchers=matchers)
160+
retention_days = self.repo.get("retention_days", None)
159161

160-
defaults = {
161-
"mozPipelineMetadata": {
162-
"bq_dataset_family": self.app_id.replace("-", "_"),
163-
"bq_table": ping.replace("-", "_") + "_v1",
164-
"bq_metadata_format": "structured",
165-
}
162+
pipeline_meta = {
163+
"bq_dataset_family": self.app_id.replace("-", "_"),
164+
"bq_table": ping.replace("-", "_") + "_v1",
165+
"bq_metadata_format": "structured",
166166
}
167+
if retention_days is not None:
168+
expiration = pipeline_meta.get("expiration_policy", {})
169+
expiration["delete_after_days"] = int(retention_days)
170+
pipeline_meta["expiration_policy"] = expiration
171+
172+
defaults = {"mozPipelineMetadata": pipeline_meta}
167173

168174
if generic_schema: # Use the generic glean ping schema
169175
schema = self.get_schema()
@@ -181,11 +187,7 @@ def generate_schema(
181187
@staticmethod
182188
def get_repos():
183189
"""
184-
Retrieve name and app_id for Glean repositories
190+
Retrieve metadata for all non-library Glean repositories
185191
"""
186192
repos = GleanPing._get_json(GleanPing.repos_url)
187-
return [
188-
(repo["name"], repo["app_id"])
189-
for repo in repos
190-
if "library_names" not in repo
191-
]
193+
return [repo for repo in repos if "library_names" not in repo]

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
setup(
2121
name="mozilla-schema-generator",
2222
python_requires=">=3.6.0",
23-
version="0.2.0",
23+
version="0.3.0",
2424
description="Create full representations of schemas using the probe info service.",
2525
long_description=readme,
2626
long_description_content_type="text/markdown",

tests/test_bhr.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# This Source Code Form is subject to the terms of the Mozilla Public
4+
# License, v. 2.0. If a copy of the MPL was not distributed with this
5+
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6+
7+
from mozilla_schema_generator.bhr_ping import BhrPing
8+
from mozilla_schema_generator.config import Config
9+
10+
11+
def test_schema_contains_hangs_stacks():
12+
schema = BhrPing().generate_schema(Config("bhr", {}))["bhr"][0].schema
13+
hangs = schema["properties"]["payload"]["properties"]["hangs"]
14+
assert hangs["items"]["properties"]["stack"]["type"] == "string"

tests/test_glean.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
@pytest.fixture
2121
def glean():
22-
return glean_ping.GleanPing("glean", "org-mozilla-glean")
22+
return glean_ping.GleanPing({"name": "glean", "app_id": "org-mozilla-glean"})
2323

2424

2525
@pytest.fixture
@@ -61,7 +61,8 @@ def test_single_schema(self, glean, config):
6161

6262
def test_get_repos(self):
6363
repos = glean_ping.GleanPing.get_repos()
64-
assert ("fenix", "org-mozilla-fenix") in repos
64+
names_ids = [(r["name"], r["app_id"]) for r in repos]
65+
assert ("fenix", "org-mozilla-fenix") in names_ids
6566

6667
def test_generic_schema(self, glean, config):
6768
schemas = glean.generate_schema(config, split=False, generic_schema=True)
@@ -79,7 +80,20 @@ def test_generic_schema(self, glean, config):
7980

8081
def test_missing_data(self, config):
8182
# When there are no files, this should error
82-
83-
not_glean = NoProbeGleanPing("LeanGleanPingNoIding", "org-mozilla-lean")
83+
repo = {"name": "LeanGleanPingNoIding", "app_id": "org-mozilla-lean"}
84+
not_glean = NoProbeGleanPing(repo)
8485
with pytest.raises(requests.exceptions.HTTPError):
8586
not_glean.generate_schema(config, split=False)
87+
88+
def test_retention_days(self, config):
89+
glean = glean_ping.GleanPing(
90+
{"name": "glean", "app_id": "org-mozilla-glean", "retention_days": 90}
91+
)
92+
schemas = glean.generate_schema(config, split=False, generic_schema=True)
93+
94+
final_schemas = {k: schemas[k][0].schema for k in schemas}
95+
for name, schema in final_schemas.items():
96+
assert (
97+
schema["mozPipelineMetadata"]["expiration_policy"]["delete_after_days"]
98+
== 90
99+
)

0 commit comments

Comments
 (0)