Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add windows recording rules to azure monitor metrics addon (managed prometheus) #5992

Merged
merged 12 commits into from
Mar 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions linter_exclusions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ aks update:
ksm_metric_labels_allow_list:
rule_exclusions:
- option_length_too_long
enable_windows_recording_rules:
rule_exclusions:
- option_length_too_long
custom_ca_trust_certificates:
rule_exclusions:
- option_length_too_long
Expand Down
3 changes: 3 additions & 0 deletions src/aks-preview/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ To release a new version, please select a new version number (usually plus 1 to
Pending
+++++++

0.5.132
+++++++
* Change the short name of option `--source-resource-id` in command `az aks trustedaccess rolebinding create` from `-s` to `-r`.
* Add parameter to enable windows recording rules `--enable-windows-recording-rules` for the Azure Monitor Metrics addon

0.5.131
+++++++
Expand Down
3 changes: 3 additions & 0 deletions src/aks-preview/azext_aks_preview/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,9 @@
- name: --grafana-resource-id
type: string
short-summary: Resource ID of the Azure Managed Grafana Workspace
- name: --enable-windows-recording-rules
type: bool
short-summary: Enable Windows Recording Rules when enabling the Azure Monitor Metrics addon
- name: --disable-azuremonitormetrics
type: bool
short-summary: Disable Azure Monitor Metrics Profile
Expand Down
1 change: 1 addition & 0 deletions src/aks-preview/azext_aks_preview/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,7 @@ def load_arguments(self, _):
c.argument('ksm_metric_labels_allow_list', validator=validate_ksm_labels, is_preview=True)
c.argument('ksm_metric_annotations_allow_list', validator=validate_ksm_annotations, is_preview=True)
c.argument('grafana_resource_id', validator=validate_grafanaresourceid, is_preview=True)
c.argument('enable_windows_recording_rules', action='store_true', is_preview=True)
c.argument('disable_azuremonitormetrics', action='store_true', is_preview=True)
c.argument('enable_vpa', action='store_true', is_preview=True, help="enable vertical pod autoscaler for cluster")
c.argument('disable_vpa', action='store_true', is_preview=True, help="disable vertical pod autoscaler for cluster")
Expand Down
93 changes: 51 additions & 42 deletions src/aks-preview/azext_aks_preview/azuremonitorprofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from six import with_metaclass
from azure.core import CaseInsensitiveEnumMeta
from azure.core.exceptions import HttpResponseError
from azure.cli.core.util import get_az_user_agent

AKS_CLUSTER_API = "2022-07-02-preview"
MAC_API = "2021-06-03-preview"
Expand Down Expand Up @@ -508,20 +507,8 @@ def link_grafana_instance(cmd, raw_parameters, azure_monitor_workspace_resource_
return GrafanaLink.SUCCESS


def create_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster_name, azure_monitor_workspace_resource_id, mac_region):
def put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, i):
from azure.cli.core.util import send_raw_request
with urllib.request.urlopen("https://defaultrulessc.blob.core.windows.net/defaultrules/ManagedPrometheusDefaultRecordingRules.json") as url:
default_rules_template = json.loads(url.read().decode())
default_rule_group_name = "NodeRecordingRulesRuleGroup-{0}".format(cluster_name)
default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format(
cluster_subscription,
cluster_resource_group_name,
default_rule_group_name
)
url = "https://management.azure.com{0}?api-version={1}".format(
default_rule_group_id,
RULES_API
)
body = json.dumps({
"id": default_rule_group_id,
"name": default_rule_group_name,
Expand All @@ -531,14 +518,15 @@ def create_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster
"scopes": [
azure_monitor_workspace_resource_id
],
"enabled": True,
"clusterName": cluster_name,
"interval": "PT1M",
"rules": default_rules_template["resources"][0]["properties"]["rules"]
"rules": default_rules_template["resources"][i]["properties"]["rules"]
}
})
for _ in range(3):
try:
headers = ['User-Agent=azuremonitormetrics.create_rules_node']
headers = ['User-Agent=azuremonitormetrics.put_rules.' + default_rule_group_name]
send_raw_request(cmd.cli_ctx, "PUT", url,
body=body, headers=headers)
error = None
Expand All @@ -547,6 +535,24 @@ def create_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster
error = e
else:
raise error


def create_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster_name, azure_monitor_workspace_resource_id, mac_region, raw_parameters):
from azure.cli.core.util import send_raw_request
with urllib.request.urlopen("https://defaultrulessc.blob.core.windows.net/defaultrules/ManagedPrometheusDefaultRecordingRules.json") as url:
default_rules_template = json.loads(url.read().decode())
default_rule_group_name = "NodeRecordingRulesRuleGroup-{0}".format(cluster_name)
default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format(
cluster_subscription,
cluster_resource_group_name,
default_rule_group_name
)
url = "https://management.azure.com{0}?api-version={1}".format(
default_rule_group_id,
RULES_API
)
put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, 0)

default_rule_group_name = "KubernetesRecordingRulesRuleGroup-{0}".format(cluster_name)
default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format(
cluster_subscription,
Expand All @@ -557,31 +563,34 @@ def create_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster
default_rule_group_id,
RULES_API
)
body = json.dumps({
"id": default_rule_group_id,
"name": default_rule_group_name,
"type": "Microsoft.AlertsManagement/prometheusRuleGroups",
"location": mac_region,
"properties": {
"scopes": [
azure_monitor_workspace_resource_id
],
"clusterName": cluster_name,
"rules": default_rules_template["resources"][1]["properties"]["rules"]
}
})
for _ in range(3):
try:
headers = ['User-Agent=azuremonitormetrics.create_rules_kubernetes']
send_raw_request(cmd.cli_ctx, "PUT", url,
body=body, headers=headers)
error = None
break
except CLIError as e:
print(e)
error = e
else:
raise error
put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, 1)

enable_windows_recording_rules = raw_parameters.get("enable_windows_recording_rules")

if enable_windows_recording_rules is True:
default_rule_group_name = "NodeRecordingRulesRuleGroup-Win-{0}".format(cluster_name)
default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format(
cluster_subscription,
cluster_resource_group_name,
default_rule_group_name
)
url = "https://management.azure.com{0}?api-version={1}".format(
default_rule_group_id,
RULES_API
)
put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, 2)

default_rule_group_name = "NodeAndKubernetesRecordingRulesRuleGroup-Win-{0}".format(cluster_name)
default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format(
cluster_subscription,
cluster_resource_group_name,
default_rule_group_name
)
url = "https://management.azure.com{0}?api-version={1}".format(
default_rule_group_id,
RULES_API
)
put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, 3)


def delete_dcra(cmd, cluster_region, cluster_subscription, cluster_resource_group_name, cluster_name):
Expand Down Expand Up @@ -667,7 +676,7 @@ def link_azure_monitor_profile_artifacts(cmd, cluster_subscription, cluster_reso
# Link grafana
link_grafana_instance(cmd, raw_parameters, azure_monitor_workspace_resource_id)
# create recording rules and alerts
create_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster_name, azure_monitor_workspace_resource_id, mac_region)
create_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster_name, azure_monitor_workspace_resource_id, mac_region, raw_parameters)


def unlink_azure_monitor_profile_artifacts(cmd, cluster_subscription, cluster_resource_group_name, cluster_name, cluster_region):
Expand Down
1 change: 1 addition & 0 deletions src/aks-preview/azext_aks_preview/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,7 @@ def aks_update(
ksm_metric_labels_allow_list=None,
ksm_metric_annotations_allow_list=None,
grafana_resource_id=None,
enable_windows_recording_rules=False,
disable_azuremonitormetrics=False,
enable_vpa=False,
disable_vpa=False,
Expand Down
Loading