diff --git a/MonitoringTools/tests/__init__.py b/MonitoringTools/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/MonitoringTools/tests/test_limits_to_influx.py b/MonitoringTools/tests/test_limits_to_influx.py new file mode 100644 index 00000000..1d03ab0a --- /dev/null +++ b/MonitoringTools/tests/test_limits_to_influx.py @@ -0,0 +1,201 @@ +from unittest.mock import patch, call, NonCallableMock +from limits_to_influx import ( + convert_to_data_string, + get_limit_prop_string, + extract_limits, + get_limits_for_project, + get_all_limits, + main, +) +import pytest + + +def test_convert_to_data_string_no_items(): + """ + Tests convert_to_data_string returns empty string when given empty dict as limit_details + """ + assert convert_to_data_string(NonCallableMock(), {}) == "" + + +@patch("limits_to_influx.get_limit_prop_string") +def test_convert_to_data_string_one_item(mock_get_limit_prop_string): + """ + Tests convert_to_data_string works with single entry in dict for limit_details + """ + mock_instance = "prod" + mock_project_details = NonCallableMock() + mock_limit_details = {"project foo": mock_project_details} + mock_get_limit_prop_string.return_value = "prop1=val1" + + res = convert_to_data_string(mock_instance, mock_limit_details) + assert res == 'Limits,Project="project\ foo",instance=Prod prop1=val1\n' + mock_get_limit_prop_string.assert_called_once_with(mock_project_details) + + +@patch("limits_to_influx.get_limit_prop_string") +def test_convert_to_data_string_multi_item(mock_get_limit_prop_string): + """ + Tests convert_to_data_string works with multiple entries in dict for limit_details + """ + mock_instance = "prod" + mock_project_details = NonCallableMock() + + mock_limit_details = { + "project foo": mock_project_details, + "project bar": mock_project_details, + } + mock_get_limit_prop_string.side_effect = ["prop1=val1", "prop1=val2"] + assert ( + convert_to_data_string(mock_instance, mock_limit_details) + == 'Limits,Project="project\ foo",instance=Prod prop1=val1\n' + 'Limits,Project="project\ bar",instance=Prod prop1=val2\n' + ) + + +@pytest.mark.parametrize( + "details, expected", + [ + ({}, ""), + ({"key1": "123"}, "key1=123i"), + ( + {"key1": "123", "key2": "456", "key3": "789"}, + "key1=123i,key2=456i,key3=789i", + ), + ], +) +def test_limit_prop_string(details, expected): + """ + tests get_limit_prop_string converts dict into data string properly + """ + assert get_limit_prop_string(details) == expected + + +def test_extract_limits_invalid(): + """ + tests extract_limits when given limits dict that is invalid + """ + with pytest.raises(RuntimeError): + extract_limits({}) + + +def test_extract_limits_valid(): + """ + test extract_limits function extracts proper limits and outputs in correct format + """ + mock_project_limits_dict = { + "server_meta": NonCallableMock(), + "personality": NonCallableMock(), + "server_groups_used": NonCallableMock(), + "image_meta": NonCallableMock(), + "personality_size": NonCallableMock(), + "keypairs": NonCallableMock(), + "security_group_rules": NonCallableMock(), + "server_groups": NonCallableMock(), + "total_cores_used": NonCallableMock(), + "total_ram_used": NonCallableMock(), + "instances_used": NonCallableMock(), + "security_groups": NonCallableMock(), + "floating_ips_used": NonCallableMock(), + "total_cores": NonCallableMock(), + "server_group_members": NonCallableMock(), + "floating_ips": NonCallableMock(), + "security_groups_used": NonCallableMock(), + "instances": NonCallableMock(), + "total_ram": NonCallableMock(), + } + assert extract_limits(mock_project_limits_dict) == { + "maxServerMeta": mock_project_limits_dict["server_meta"], + "maxPersonality": mock_project_limits_dict["personality"], + "totalServerGroupsUsed": mock_project_limits_dict["server_groups_used"], + "maxImageMeta": mock_project_limits_dict["image_meta"], + "maxPersonalitySize": mock_project_limits_dict["personality_size"], + "maxTotalKeypairs": mock_project_limits_dict["keypairs"], + "maxSecurityGroupRules": mock_project_limits_dict["security_group_rules"], + "maxServerGroups": mock_project_limits_dict["server_groups"], + "totalCoresUsed": mock_project_limits_dict["total_cores_used"], + "totalRAMUsed": mock_project_limits_dict["total_ram_used"], + "totalInstancesUsed": mock_project_limits_dict["instances_used"], + "maxSecurityGroups": mock_project_limits_dict["security_groups"], + "totalFloatingIpsUsed": mock_project_limits_dict["floating_ips_used"], + "maxTotalCores": mock_project_limits_dict["total_cores"], + "maxServerGroupMembers": mock_project_limits_dict["server_group_members"], + "maxTotalFloatingIps": mock_project_limits_dict["floating_ips"], + "totalSecurityGroupsUsed": mock_project_limits_dict["security_groups_used"], + "maxTotalInstances": mock_project_limits_dict["instances"], + "maxTotalRAMSize": mock_project_limits_dict["total_ram"], + } + + +@patch("limits_to_influx.extract_limits") +@patch("limits_to_influx.openstack") +def test_get_limits_for_project(mock_openstack, mock_extract_limits): + """ + tests get_limits_for_project gets the limits for a project by calling appropriate functions + """ + mock_instance = NonCallableMock() + mock_project_id = NonCallableMock() + + mock_conn = mock_openstack.connect.return_value + mock_conn.get_volume_limits.return_value = {"absolute": {"lim1": "val1"}} + mock_extract_limits.return_value = {"lim2": "val2"} + + res = get_limits_for_project(mock_instance, mock_project_id) + mock_openstack.connect.assert_called_once_with(mock_instance) + mock_conn.get_compute_limits.assert_called_once_with(mock_project_id) + mock_conn.get_volume_limits.assert_called_once_with(mock_project_id) + mock_extract_limits.assert_called_once_with(mock_conn.get_compute_limits.return_value) + assert res == {"lim1": "val1", "lim2": "val2"} + + +@patch("limits_to_influx.openstack") +@patch("limits_to_influx.get_limits_for_project") +@patch("limits_to_influx.convert_to_data_string") +def test_get_all_limits( + mock_convert_to_data_string, mock_get_limits_for_project, mock_openstack +): + """ + tests get_all_limits function gets the limits of project appropriately + """ + mock_project_list = [ + # to be ignored + {"name": "xyz_rally", "id": "foo"}, + {"name": "844_xyz", "id": "bar"}, + # not to be ignored + {"name": "proj1", "id": "proj1-id"}, + {"name": "proj2", "id": "proj2-id"}, + ] + mock_conn_obj = mock_openstack.connect.return_value + mock_conn_obj.list_projects.return_value = mock_project_list + + mock_instance = NonCallableMock() + res = get_all_limits(mock_instance) + mock_openstack.connect.assert_called_once_with(cloud=mock_instance) + mock_conn_obj.list_projects.assert_called_once() + mock_get_limits_for_project.assert_has_calls( + [call(mock_instance, "proj1-id"), call(mock_instance, "proj2-id")] + ) + + mock_convert_to_data_string.assert_called_once_with( + mock_instance, + { + "proj1": mock_get_limits_for_project.return_value, + "proj2": mock_get_limits_for_project.return_value, + }, + ) + assert res == mock_convert_to_data_string.return_value + + +@patch("limits_to_influx.run_scrape") +@patch("limits_to_influx.parse_args") +def test_main(mock_parse_args, mock_run_scrape): + """ + tests main function calls run_scrape utility function properly + """ + mock_user_args = NonCallableMock() + main(mock_user_args) + mock_run_scrape.assert_called_once_with( + mock_parse_args.return_value, get_all_limits + ) + mock_parse_args.assert_called_once_with( + mock_user_args, description="Get All Project Limits" + ) diff --git a/MonitoringTools/tests/test_send_metric_utils.py b/MonitoringTools/tests/test_send_metric_utils.py new file mode 100644 index 00000000..02aecc90 --- /dev/null +++ b/MonitoringTools/tests/test_send_metric_utils.py @@ -0,0 +1,144 @@ +import configparser +from pathlib import Path +from unittest.mock import patch, call, NonCallableMock, MagicMock + +import pytest + +from send_metric_utils import read_config_file, post_to_influxdb, parse_args, run_scrape + + +@patch("send_metric_utils.ConfigParser") +def test_read_config_file_valid(mock_config_parser): + """ + tests read_config_file function when given a valid config file + """ + mock_config_obj = mock_config_parser.return_value + mock_config_obj.sections.return_value = ["auth", "cloud", "db"] + mock_config_obj.items.side_effect = [ + [("password", "pass"), ("username", "user")], + [("instance", "prod")], + [("database", "cloud"), ("host", "localhost:8086")], + ] + mock_filepath = NonCallableMock() + res = read_config_file(mock_filepath) + mock_config_parser.assert_called_once() + mock_config_obj.sections.assert_called_once() + mock_config_obj.items.assert_has_calls([call("auth"), call("cloud"), call("db")]) + + assert res == { + "auth.password": "pass", + "auth.username": "user", + "cloud.instance": "prod", + "db.database": "cloud", + "db.host": "localhost:8086", + } + + +@patch("send_metric_utils.ConfigParser") +def test_read_config_file_empty(mock_config_parser): + """ + tests read_config_file function when given a emtpy config file + """ + mock_config_parser.return_value.sections.return_value = [] + with pytest.raises(AssertionError): + read_config_file(NonCallableMock()) + + +@patch("send_metric_utils.requests") +def test_post_to_influxdb_valid(mock_requests): + """ + tests post_to_influxdb function uses requests.post to post data correctly + """ + mock_data_string = NonCallableMock() + mock_host = "localhost:8086" + mock_db_name = "cloud" + mock_pass = NonCallableMock() + mock_user = NonCallableMock() + + post_to_influxdb(mock_data_string, mock_host, mock_db_name, (mock_user, mock_pass)) + mock_requests.post.assert_called_once_with( + "http://localhost:8086/write?db=cloud&precision=s", + data=mock_data_string, + auth=(mock_user, mock_pass), + ) + mock_response = mock_requests.post.return_value + mock_response.raise_for_status.assert_called_once() + + +@patch("send_metric_utils.requests") +def test_post_to_influxdb_empty_string(mock_requests): + """ + tests post_to_influxdb function when datastring is empty, should do nothing + """ + post_to_influxdb( + "", NonCallableMock(), NonCallableMock(), (NonCallableMock(), NonCallableMock()) + ) + mock_requests.post.assert_not_called() + + +@patch("send_metric_utils.read_config_file") +def test_parse_args_valid_args(mock_read_config_file): + """ + tests parse_args function with a valid filepath + """ + res = parse_args(["../usr/local/bin/influxdb.conf"]) + assert res == mock_read_config_file.return_value + + +def test_parse_args_filepath_does_not_exist(): + """ + tests parse_args function with invalid filepath (doesn't exist) + """ + with pytest.raises(RuntimeError): + parse_args(["./invalid-filepath"]) + + +def test_parse_args_filepath_invalid_dir_fp(): + """ + tests parse_args function with invalid filepath (points to directory) + """ + with pytest.raises(RuntimeError): + parse_args(["."]) + + +@patch("send_metric_utils.read_config_file") +def test_parse_args_filepath_read_config_fails(mock_read_config_file): + """ + tests parse_args function fails when read_config_file returns config error + """ + mock_read_config_file.side_effect = configparser.Error + with pytest.raises(RuntimeError): + parse_args(["../usr/local/bin/influxdb.conf"]) + + mock_read_config_file.assert_called_once_with( + Path("../usr/local/bin/influxdb.conf") + ) + + +@patch("send_metric_utils.post_to_influxdb") +def test_run_scrape(mock_post_to_influxdb): + """ + Tests run_scrape function. + """ + mock_pass = NonCallableMock() + mock_user = NonCallableMock() + mock_host = NonCallableMock() + mock_db = NonCallableMock() + mock_instance = NonCallableMock() + + mock_influxdb_args = { + "auth.password": mock_pass, + "auth.username": mock_user, + "cloud.instance": mock_instance, + "db.database": mock_db, + "db.host": mock_host, + } + mock_scrape_func = MagicMock() + + run_scrape(mock_influxdb_args, mock_scrape_func) + mock_post_to_influxdb.assert_called_once_with( + mock_scrape_func.return_value, + host=mock_host, + db_name=mock_db, + auth=(mock_user, mock_pass), + ) diff --git a/MonitoringTools/tests/test_service_status_to_influx.py b/MonitoringTools/tests/test_service_status_to_influx.py new file mode 100644 index 00000000..b95e41d2 --- /dev/null +++ b/MonitoringTools/tests/test_service_status_to_influx.py @@ -0,0 +1,446 @@ +from unittest.mock import patch, call, NonCallableMock, MagicMock +import pytest + +from service_status_to_influx import ( + get_hypervisor_properties, + get_service_properties, + get_agent_properties, + convert_to_data_string, + get_service_prop_string, + get_all_hv_details, + update_with_service_statuses, + update_with_agent_statuses, + get_all_service_statuses, + main, +) + + +def test_get_hypervisor_properties_state_up(): + """ + tests that get_hypervisor_properties parses a valid hypervisor entry properly and extracts + useful information and returns the result in correct format - when hv state is up + """ + mock_hv = { + "state": "up", + "memory_size": 1, + "memory_used": 2, + "memory_free": 3, + "vcpus_used": 4, + "vcpus": 5, + } + expected_result = { + "hv": { + "aggregate": "no-aggregate", + "memorymax": 1, + "memoryused": 2, + "memoryavailable": 3, + "cpuused": 4, + "cpumax": 5, + "cpuavailable": 1, + "agent": 1, + "state": 1, + "statetext": "Up", + } + } + assert get_hypervisor_properties(mock_hv) == expected_result + + +def test_get_hypervisor_properties_state_down(): + """ + tests that get_hypervisor_properties parses a valid hypervisor entry properly and extracts + useful information and returns the result in correct format - when hv state is down + :return: + """ + mock_hv = { + "state": "down", + "memory_size": 1, + "memory_used": 2, + "memory_free": 3, + "vcpus_used": 4, + "vcpus": 5, + } + expected_result = { + "hv": { + "aggregate": "no-aggregate", + "memorymax": 1, + "memoryused": 2, + "memoryavailable": 3, + "cpuused": 4, + "cpumax": 5, + "cpuavailable": 1, + "agent": 1, + "state": 0, + "statetext": "Down", + } + } + assert get_hypervisor_properties(mock_hv) == expected_result + + +def test_get_service_properties_enabled_up(): + """ + tests that get_service_properties parses a valid service entry properly and extracts + useful information and returns the result in correct format - when status=enabled, state=up + """ + mock_service = {"binary": "foo", "status": "enabled", "state": "up"} + expected_result = { + "foo": { + "status": 1, + "statustext": "Enabled", + "state": 1, + "statetext": "Up", + "agent": 1, + } + } + assert get_service_properties(mock_service) == expected_result + + +def test_get_service_properties_disabled_down(): + """ + tests that get_service_properties parses a valid service entry properly and extracts + useful information and returns the result in correct format - when status=disabled, state=down + """ + mock_service = {"binary": "bar", "status": "disabled", "state": "down"} + expected_result = { + "bar": { + "status": 0, + "statustext": "Disabled", + "state": 0, + "statetext": "Down", + "agent": 1, + } + } + assert get_service_properties(mock_service) == expected_result + + +def test_get_agent_properties_alive_admin_up(): + """ + tests that get_agent_properties parses a valid network agent entry properly and extracts + useful information and returns the result in correct format + - when is_alive=True, is_admin_state_up=True + """ + mock_agent = { + "binary": "foo", + "is_alive": True, + "is_admin_state_up": True, + } + expected_result = { + "foo": { + "state": 1, + "statetext": "Up", + "status": 1, + "statustext": "Enabled", + "agent": 1, + } + } + assert get_agent_properties(mock_agent) == expected_result + + +def test_get_agent_properties_disabled_down(): + """ + tests that get_agent_properties parses a valid network agent entry properly and extracts + useful information and returns the result in correct format + - when is_alive=False, is_admin_state_up=False + """ + mock_agent = { + "binary": "bar", + "is_alive": False, + "is_admin_state_up": False, + } + expected_result = { + "bar": { + "state": 0, + "statetext": "Down", + "status": 0, + "statustext": "Disabled", + "agent": 1, + } + } + assert get_agent_properties(mock_agent) == expected_result + + +def test_convert_to_data_string_no_items(): + """ + Tests convert_to_data_string returns empty string when given no details + """ + assert convert_to_data_string(NonCallableMock(), {}) == "" + + +@patch("service_status_to_influx.get_service_prop_string") +def test_convert_to_data_string_one_hv_one_service(mock_get_service_prop_string): + """ + Tests convert_to_data_string works with single entry in details + """ + mock_instance = "prod" + mock_service_details = NonCallableMock() + mock_details = {"hv1": {"service1": mock_service_details}} + + mock_get_service_prop_string.return_value = "prop1=val1" + + res = convert_to_data_string(mock_instance, mock_details) + assert ( + res == 'ServiceStatus,host="hv1",service="service1",instance=Prod prop1=val1\n' + ) + mock_get_service_prop_string.assert_called_once_with(mock_service_details) + + +@patch("service_status_to_influx.get_service_prop_string") +def test_convert_to_data_string_one_hv_multi_service(mock_get_service_prop_string): + """ + Tests convert_to_data_string works with single entry in details with multiple service binaries + """ + mock_instance = "prod" + mock_service_details_1 = NonCallableMock() + mock_service_details_2 = NonCallableMock() + mock_details = { + "hv1": {"service1": mock_service_details_1, "service2": mock_service_details_2} + } + + mock_get_service_prop_string.side_effect = ["prop1=val1", "prop1=val2"] + + res = convert_to_data_string(mock_instance, mock_details) + assert res == ( + 'ServiceStatus,host="hv1",service="service1",instance=Prod prop1=val1\n' + 'ServiceStatus,host="hv1",service="service2",instance=Prod prop1=val2\n' + ) + mock_get_service_prop_string.assert_has_calls( + [call(mock_service_details_1), call(mock_service_details_2)] + ) + + +@patch("service_status_to_influx.get_service_prop_string") +def test_convert_to_data_string_multi_item(mock_get_service_prop_string): + """ + Tests convert_to_data_string works with multiple entries in dict for details + """ + mock_instance = "prod" + mock_service_details_1 = NonCallableMock() + mock_service_details_2 = NonCallableMock() + mock_service_details_3 = NonCallableMock() + mock_details = { + "hv1": { + "service1": mock_service_details_1, + "service2": mock_service_details_2, + }, + "hv2": {"service3": mock_service_details_3}, + } + + mock_get_service_prop_string.side_effect = [ + "prop1=val1", + "prop1=val2", + "prop1=val3", + ] + + res = convert_to_data_string(mock_instance, mock_details) + assert res == ( + 'ServiceStatus,host="hv1",service="service1",instance=Prod prop1=val1\n' + 'ServiceStatus,host="hv1",service="service2",instance=Prod prop1=val2\n' + 'ServiceStatus,host="hv2",service="service3",instance=Prod prop1=val3\n' + ) + mock_get_service_prop_string.assert_has_calls( + [ + call(mock_service_details_1), + call(mock_service_details_2), + call(mock_service_details_3), + ] + ) + + +def test_get_service_prop_string_empty_dict(): + """ + tests get_service_prop_string returns nothing when given empty service_dict + """ + assert get_service_prop_string({}) == "" + + +def test_get_service_prop_string_with_string_props(): + """ + tests get_service_prop_string returns correct prop string + when given string props it should not suffix each property value with i + """ + props = {"statetext": "foo", "statustext": "bar", "aggregate": "baz"} + expected_result = 'statetext="foo",statustext="bar",aggregate="baz"' + assert get_service_prop_string(props) == expected_result + + +def test_get_service_prop_string_with_int_props(): + """ + tests get_service_prop_string returns correct prop string + when given int props it should suffix each property value with i + """ + props = {"prop1": 1, "prop2": 2, "prop3": 3} + expected_result = 'prop1="1i",prop2="2i",prop3="3i"' + assert get_service_prop_string(props) == expected_result + + +@patch("service_status_to_influx.get_hypervisor_properties") +def test_get_all_hv_details(mock_get_hypervisor_properties): + """ + tests get_all_hv_details returns dict of hypervisor status information + - for each hypervisor, call get_hypervisor_properties and store in a dict, + - then for each aggregate update the aggregate property for each hv with the aggregate name + that the hv belongs to + """ + mock_conn = MagicMock() + mock_hvs = [{"name": "hv1"}, {"name": "hv2"}, {"name": "hv3"}] + + mock_aggregates = [ + {"name": "ag1", "hosts": ["hv1", "hv2"]}, + {"name": "ag2", "hosts": ["hv3", "hv4"]}, + {"name": "ag3", "hosts": ["hv5"]}, + ] + + # stubs out getting props + mock_get_hypervisor_properties.side_effect = [{"hv": {}}, {"hv": {}}, {"hv": {}}] + mock_conn.list_hypervisors.return_value = mock_hvs + mock_conn.compute.aggregates.return_value = mock_aggregates + res = get_all_hv_details(mock_conn) + + mock_conn.list_hypervisors.assert_called_once() + mock_conn.compute.aggregates.assert_called_once() + + mock_get_hypervisor_properties.assert_has_calls([call(hv) for hv in mock_hvs]) + + assert res == { + "hv1": {"hv": {"aggregate": "ag1"}}, + "hv2": {"hv": {"aggregate": "ag1"}}, + "hv3": {"hv": {"aggregate": "ag2"}}, + } + + +@patch("service_status_to_influx.get_service_properties") +def test_update_with_service_statuses(mock_get_service_properties): + """ + tests update_with_service_statuses, for each service found, get its properties + and update provided dictionary status_details dict with service info + """ + mock_conn = MagicMock() + mock_status_details = { + "hv1": {"hv": {}, "foo": {}, "bar": {}}, + "hv2": {"hv": {}}, + } + + mock_services = [ + {"host": "hv1", "binary": "nova-compute"}, + {"host": "hv1", "binary": "other-svc"}, + {"host": "hv2", "binary": "other-svc"}, + {"host": "hv3", "binary": "nova-compute"}, + ] + mock_conn.compute.services.return_value = mock_services + + # stubs out actually getting properties + mock_get_service_properties.side_effect = [ + {"nova-compute": {"status": "enabled"}}, + {"other-service": {}}, + {"other-service": {"status": "enabled"}}, + {"nova-compute": {"status": "disabled"}}, + ] + + res = update_with_service_statuses(mock_conn, mock_status_details) + + mock_conn.compute.services.assert_called_once() + mock_get_service_properties.assert_has_calls([call(svc) for svc in mock_services]) + assert res == { + # shouldn't override what's already there + # add hv status == nova-compute svc status + "hv1": { + "hv": {"status": "enabled"}, + "nova-compute": {"status": "enabled"}, + "foo": {}, + "bar": {}, + "other-service": {}, + }, + # only nova-compute status adds hv status + "hv2": {"hv": {}, "other-service": {"status": "enabled"}}, + # adds what doesn't exist, no "hv" so no setting status + "hv3": {"nova-compute": {"status": "disabled"}}, + } + + +@patch("service_status_to_influx.get_agent_properties") +def test_update_with_agent_statuses(mock_get_agent_properties): + """ + tests update_with_agent_statuses, for each network agent found, get its properties + and update provided dictionary status_details dict with agent info + """ + mock_conn = MagicMock() + mock_status_details = {"hv1": {"foo": {}}, "hv2": {}} + + mock_agents = [ + {"host": "hv1", "binary": "ag1"}, + {"host": "hv1", "binary": "ag2"}, + {"host": "hv2", "binary": "ag1"}, + {"host": "hv3", "binary": "ag3"}, + ] + mock_conn.network.agents.return_value = mock_agents + + # stubs out actually getting properties + mock_get_agent_properties.side_effect = [ + {"ag1": {}}, + {"ag2": {}}, + {"ag1": {}}, + {"ag3": {}}, + ] + + res = update_with_agent_statuses(mock_conn, mock_status_details) + + mock_conn.network.agents.assert_called_once() + mock_get_agent_properties.assert_has_calls([call(agent) for agent in mock_agents]) + assert res == { + # shouldn't override what's already there + "hv1": {"foo": {}, "ag1": {}, "ag2": {}}, + "hv2": {"ag1": {}}, + # adds what doesn't exist + "hv3": {"ag3": {}}, + } + + +@patch("service_status_to_influx.openstack") +@patch("service_status_to_influx.get_all_hv_details") +@patch("service_status_to_influx.update_with_service_statuses") +@patch("service_status_to_influx.update_with_agent_statuses") +@patch("service_status_to_influx.convert_to_data_string") +def test_get_all_service_statuses( + mock_convert, + mock_get_agent_statuses, + mock_get_service_statuses, + mock_get_hv_statuses, + mock_openstack, +): + """ + Tests get_all_service_statuses calls appropriate functions: + - get hv status info + - update with service status info + - update with agent status info + - calls convert_to_data_string on result and output + """ + mock_instance = NonCallableMock() + mock_conn = mock_openstack.connect.return_value + res = get_all_service_statuses(mock_instance) + mock_openstack.connect.assert_called_once_with(mock_instance) + mock_get_hv_statuses.assert_called_once_with(mock_conn) + mock_get_service_statuses.assert_called_once_with( + mock_conn, mock_get_hv_statuses.return_value + ) + mock_get_agent_statuses.assert_called_once_with( + mock_conn, mock_get_service_statuses.return_value + ) + mock_convert.assert_called_once_with( + mock_instance, mock_get_agent_statuses.return_value + ) + assert res == mock_convert.return_value + + +@patch("service_status_to_influx.run_scrape") +@patch("service_status_to_influx.parse_args") +def test_main(mock_parse_args, mock_run_scrape): + """ + tests main function calls run_scrape utility function properly + """ + mock_user_args = NonCallableMock() + main(mock_user_args) + mock_run_scrape.assert_called_once_with( + mock_parse_args.return_value, get_all_service_statuses + ) + mock_parse_args.assert_called_once_with( + mock_user_args, description="Get All Service Statuses" + ) diff --git a/MonitoringTools/tests/test_slottifier.py b/MonitoringTools/tests/test_slottifier.py new file mode 100644 index 00000000..1ed9936d --- /dev/null +++ b/MonitoringTools/tests/test_slottifier.py @@ -0,0 +1,655 @@ +from unittest.mock import NonCallableMock, MagicMock, patch, call +from slottifier import ( + get_hv_info, + get_flavor_requirements, + get_valid_flavors_for_aggregate, + convert_to_data_string, + calculate_slots_on_hv, + get_openstack_resources, + get_all_hv_info_for_aggregate, + update_slots, + get_slottifier_details, + main, +) +import pytest + +from slottifier_entry import SlottifierEntry + + +@pytest.fixture(name="mock_hypervisors") +def mock_hypervisors_fixture(): + """ fixture for setting up various mock hvs""" + return { + "hv1": { + "name": "hv1", + "status": "enabled", + "vcpus": 8, + "vcpus_used": 2, + "memory_size": 8192, + "memory_used": 2048, + }, + "hv2": { + "name": "hv2", + "status": "enabled", + "vcpus": 4, + "vcpus_used": 6, + "memory_size": 2048, + "memory_used": 4096, + }, + "hv3": { + "name": "hv3", + "status": "disabled" + }, + } + + +@pytest.fixture(name="mock_compute_services") +def mock_service_fixture(): + return { + "svc1": {"host": "hv1", "name": "svc1"}, + "svc2": {"host": "hv2", "name": "svc2"}, + "svc3": {"host": "hv4", "name": "svc3"}, + } + + +@pytest.fixture(name="mock_aggregate") +def mock_aggregate_fixture(): + """ fixture for setting up a mock aggregate""" + def _mock_aggregate(hosttype=None, gpu_num=None): + """ + helper function for setting up mock aggregate + :param hosttype: optional hosttype to set + :param gpu_num: optional gpu_num to set + """ + ag = {"metadata": {}} + if hosttype: + ag["metadata"]["hosttype"] = hosttype + if gpu_num: + ag["metadata"]["gpunum"] = gpu_num + return ag + + return _mock_aggregate + + +@pytest.fixture(name="mock_flavors_list") +def mock_flavors_fixture(): + """ fixture for setting up various mock flavors """ + return [ + {"id": 1, "extra_specs": {"aggregate_instance_extra_specs:hosttype": "A"}}, + {"id": 2, "extra_specs": {"aggregate_instance_extra_specs:hosttype": "B"}}, + {"id": 3, "extra_specs": {}}, + {"id": 4, "extra_specs": {"aggregate_instance_extra_specs:hosttype": "A"}}, + {"id": 5, "extra_specs": {"aggregate_instance_extra_specs:hosttype": "C"}}, + ] + + +def test_get_hv_info_exists_and_enabled( + mock_hypervisors, mock_aggregate +): + """ tests get_hv_info when hv exists and enabled - should parse results properly """ + + assert get_hv_info( + mock_hypervisors["hv1"], mock_aggregate(gpu_num="1"), {"status": "enabled"} + ) == { + "cores_available": 6, + "mem_available": 6144, + "gpu_capacity": 1, + "core_capacity": 8, + "mem_capacity": 8192, + "compute_service_status": "enabled", + } + + +def test_get_hv_info_negative_results_floored( + mock_hypervisors, mock_aggregate +): + """ + tests get_hv_info when results for available mem/cores are negative + - should set it to 0 instead + """ + + assert get_hv_info( + mock_hypervisors["hv2"], mock_aggregate(), {"status": "enabled"} + ) == { + "cores_available": 0, + "mem_available": 0, + "gpu_capacity": 0, + "core_capacity": 4, + "mem_capacity": 2048, + "compute_service_status": "enabled", + } + + +def test_get_hv_info_exists_but_disabled( + mock_hypervisors, mock_aggregate +): + """ + tests get_hv_info when hv is disabled - should return default results + """ + assert get_hv_info( + mock_hypervisors["hv3"], mock_aggregate(), {"status": "disabled"} + ) == { + "cores_available": 0, + "mem_available": 0, + "gpu_capacity": 0, + "core_capacity": 0, + "mem_capacity": 0, + "compute_service_status": "disabled", + } + + +def test_get_flavor_requirements_with_valid_flavor(): + """ + tests get_flavor_requirements with valid flavor + """ + mock_flavor = { + "extra_specs": {"accounting:gpu_num": "2"}, + "vcpus": "4", + "ram": "8192", + } + assert get_flavor_requirements(mock_flavor) == { + "gpus_required": 2, + "cores_required": 4, + "mem_required": 8192, + } + + +def test_get_flavor_requirements_with_missing_values(): + """ + tests get_flavor_requirements with all missing values + - should return 0s for requirements + """ + with pytest.raises(RuntimeError): + get_flavor_requirements({}) + + +def test_get_flavor_requirements_with_partial_values(): + """ + tests get_flavor_requirements with missing gpu_num attr + should default it to 0 + """ + req_dict = {"ram": "8192", "vcpus": 8} + assert get_flavor_requirements(req_dict) == { + "gpus_required": 0, + "cores_required": 8, + "mem_required": 8192, + } + + +def test_get_valid_flavors_with_matching_type(mock_flavors_list, mock_aggregate): + """ + test get_valid_flavors_for_aggregate should find all flavors with matching + aggregate hosttype + """ + assert get_valid_flavors_for_aggregate(mock_flavors_list, mock_aggregate("A")) == [ + {"id": 1, "extra_specs": {"aggregate_instance_extra_specs:hosttype": "A"}}, + {"id": 4, "extra_specs": {"aggregate_instance_extra_specs:hosttype": "A"}}, + ] + + +def test_get_valid_flavors_with_empty_flavors_list(mock_aggregate): + """ + test get_valid_flavors_for_aggregate should return empty list if no flavors given + """ + assert get_valid_flavors_for_aggregate([], mock_aggregate("A")) == [] + + +def test_get_valid_flavors_with_non_matching_hosttype( + mock_flavors_list, mock_aggregate +): + """ + test get_valid_flavors_for_aggregate should return empty list if no flavors found with + matching aggregate hosttype + """ + assert get_valid_flavors_for_aggregate(mock_flavors_list, mock_aggregate("D")) == [] + + +def test_convert_to_data_string_no_items(): + """ + Tests convert_to_data_string returns empty string when given empty dict as slots_dict + """ + assert convert_to_data_string(NonCallableMock(), {}) == "" + + +def test_convert_to_data_string_one_item(): + """ + Tests convert_to_data_string works with single entry in dict for slots_dict + """ + mock_instance = "prod" + + mock_slot_info_dataclass = MagicMock() + mock_slot_info_dataclass.slots_available = "1" + mock_slot_info_dataclass.max_gpu_slots_capacity = "2" + mock_slot_info_dataclass.estimated_gpu_slots_used = "3" + mock_slot_info_dataclass.max_gpu_slots_capacity_enabled = "4" + + mock_slots_dict = {"flavor1": mock_slot_info_dataclass} + + res = convert_to_data_string(mock_instance, mock_slots_dict) + assert res == ( + "SlotsAvailable,instance=Prod,flavor=flavor1 " + "SlotsAvailable=1i,maxSlotsAvailable=2i,usedSlots=3i,enabledSlots=4i\n" + ) + + +def test_convert_to_data_string_multi_item(): + """ + Tests convert_to_data_string works with multiple entries in dict for slots_dict + """ + mock_instance = "prod" + mock_slot_info_dataclass = MagicMock() + mock_slot_info_dataclass.slots_available = "1" + mock_slot_info_dataclass.max_gpu_slots_capacity = "2" + mock_slot_info_dataclass.estimated_gpu_slots_used = "3" + mock_slot_info_dataclass.max_gpu_slots_capacity_enabled = "4" + + mock_slots_dict = { + "flavor1": mock_slot_info_dataclass, + "flavor2": mock_slot_info_dataclass, + } + + res = convert_to_data_string(mock_instance, mock_slots_dict) + assert res == ( + "SlotsAvailable,instance=Prod,flavor=flavor1 " + "SlotsAvailable=1i,maxSlotsAvailable=2i,usedSlots=3i,enabledSlots=4i\n" + "SlotsAvailable,instance=Prod,flavor=flavor2 " + "SlotsAvailable=1i,maxSlotsAvailable=2i,usedSlots=3i,enabledSlots=4i\n" + ) + + +def test_calculate_slots_on_hv_non_gpu_disabled(): + """ + tests calculate_slots_on_hv calculates slots properly for non-gpu flavor + - should return 0s since hv is disabled + """ + res = calculate_slots_on_hv( + "flavor1", + {"cores_required": 10, "mem_required": 10}, + { + "compute_service_status": "disabled", + # can fit 10 slots, but should be 0 since compute service disabled + "cores_available": 100, + "mem_available": 100, + }, + ) + assert res.slots_available == 0 + assert res.max_gpu_slots_capacity == 0 + assert res.estimated_gpu_slots_used == 0 + assert res.max_gpu_slots_capacity_enabled == 0 + + +def test_calculate_slots_on_hv_gpu_no_gpunum(): + """ + tests calculate_slots_on_hv when provided a gpu flavor but gpus_required is set to 0 + should raise error + """ + with pytest.raises(RuntimeError): + calculate_slots_on_hv( + # g- specifies gpu flavor + "g-flavor1", + {"gpus_required": 0, "cores_required": 10, "mem_required": 10}, + { + "compute_service_status": "disabled", + # can fit 10 slots, but should be 0 since compute service disabled + "cores_available": 100, + "mem_available": 100, + } + ) + + +def test_calculate_slots_on_hv_gpu_disabled(): + """ + tests calculate_slots_on_hv calculates slots properly for gpu flavor + - should return 0s since hv is disabled, but keep track of max gpu slots capacity + """ + + res = calculate_slots_on_hv( + # g- specifies gpu flavor + "g-flavor1", + {"gpus_required": 1, "cores_required": 10, "mem_required": 10}, + { + "compute_service_status": "disabled", + # can fit 10 slots, but should be 0 since compute service disabled + "cores_available": 100, + "mem_available": 100, + "core_capacity": 100, + "mem_capacity": 100, + "gpu_capacity": 10, + }, + ) + assert res.slots_available == 0 + # still want capacity to be updated + assert res.max_gpu_slots_capacity == 10 + assert res.estimated_gpu_slots_used == 0 + assert res.max_gpu_slots_capacity_enabled == 0 + + +def test_calculate_slots_on_hv_mem_available_max(): + """ + tests calculate_slots_on_hv calculates slots properly for non-gpu flavor + - where memory available is limiting factor + """ + + res = calculate_slots_on_hv( + "flavor1", + {"cores_required": 10, "mem_required": 10}, + { + "compute_service_status": "enabled", + "cores_available": 100, + # can fit only one slot + "mem_available": 10, + }, + ) + assert res.slots_available == 1 + assert res.max_gpu_slots_capacity == 0 + assert res.estimated_gpu_slots_used == 0 + assert res.max_gpu_slots_capacity_enabled == 0 + + +def test_calculate_slots_on_hv_cores_available_max(): + """ + tests calculate_slots_on_hv calculates slots properly for non-gpu flavor + - where cores available is limiting factor + """ + res = calculate_slots_on_hv( + "flavor1", + {"cores_required": 10, "mem_required": 10}, + { + "compute_service_status": "enabled", + # can fit 10 cpu slots + "cores_available": 100, + "mem_available": 1000, + }, + ) + assert res.slots_available == 10 + assert res.max_gpu_slots_capacity == 0 + assert res.estimated_gpu_slots_used == 0 + assert res.max_gpu_slots_capacity_enabled == 0 + + +def test_calculate_slots_on_hv_gpu_available_max(): + """ + tests calculate_slots_on_hv calculates slots properly for gpu flavor + - where gpus available is limiting factor + """ + res = calculate_slots_on_hv( + # specifies a gpu flavor + "g-flavor1", + {"gpus_required": 1, "cores_required": 10, "mem_required": 10}, + { + "compute_service_status": "enabled", + # should find only 5 slots available since gpus are the limiting factor + "gpu_capacity": 5, + "cores_available": 100, + "mem_available": 100, + "core_capacity": 100, + "mem_capacity": 100, + }, + ) + assert res.slots_available == 5 + assert res.max_gpu_slots_capacity == 5 + assert res.estimated_gpu_slots_used == 0 + assert res.max_gpu_slots_capacity_enabled == 5 + + +def test_calculate_slots_on_hv_calculates_used_gpu_capacity(): + """ + tests calculate_slots_on_hv calculates slots properly for gpu flavor + - should calculate estimated used gpus slots properly + """ + res = calculate_slots_on_hv( + # specifies a gpu flavor + "g-flavor1", + {"gpus_required": 1, "cores_required": 10, "mem_required": 10}, + { + "compute_service_status": "enabled", + # should find only 5 slots available since gpus are the limiting factor + "gpu_capacity": 5, + "cores_available": 10, + "mem_available": 10, + # there's 4 flavor slots that could have already been used + "core_capacity": 50, + "mem_capacity": 50, + }, + ) + assert res.slots_available == 1 + assert res.max_gpu_slots_capacity == 5 + assert res.estimated_gpu_slots_used == 4 + assert res.max_gpu_slots_capacity_enabled == 5 + + +@patch("slottifier.openstack") +def test_get_openstack_resources(mock_openstack): + """ + tests get_openstack_resources gets all required resources via openstacksdk + and outputs them properly + """ + mock_conn = mock_openstack.connect.return_value + + mock_conn.list_hypervisors.return_value = [{"name": "hv1", "id": 1}] + mock_conn.compute.aggregates.return_value = [{"name": "ag1", "id": 2}] + mock_conn.compute.services.return_value = [{"name": "svc1", "id": 3}] + mock_conn.compute.flavors.return_value = [{"name": "flv1", "id": 4}] + + mock_instance = NonCallableMock() + res = get_openstack_resources(mock_instance) + + mock_openstack.connect.assert_called_once_with(cloud=mock_instance) + mock_conn.compute.services.assert_called_once() + mock_conn.compute.aggregates.assert_called_once() + mock_conn.list_hypervisors.assert_called_once() + mock_conn.compute.flavors.assert_called_once_with(get_extra_specs=True) + + assert res == { + "compute_services": [{"name": "svc1", "id": 3}], + "aggregates": [{"name": "ag1", "id": 2}], + "hypervisors": [{"name": "hv1", "id": 1}], + "flavors": [{"name": "flv1", "id": 4}], + } + + +@patch("slottifier.get_hv_info") +def test_get_all_hv_info_for_aggregate_with_valid_data( + mock_get_hv_info, mock_hypervisors, mock_compute_services +): + """ + Tests get_all_hv_info_for_aggregate with valid data. + should call get_hv_info with correct hv and service object that match aggregate and + add results to list + """ + mock_aggregate = {"hosts": ["hv1", "hv2"]} + res = get_all_hv_info_for_aggregate( + mock_aggregate, mock_compute_services.values(), mock_hypervisors.values() + ) + mock_get_hv_info.assert_has_calls( + [ + # svc1 holds host: hv1 + call(mock_hypervisors["hv1"], mock_aggregate, mock_compute_services["svc1"]), + # svc2 holds host: hv2 + call(mock_hypervisors["hv2"], mock_aggregate, mock_compute_services["svc2"]), + ] + ) + assert res == [mock_get_hv_info.return_value, mock_get_hv_info.return_value] + + +def test_get_all_hv_info_for_aggregate_with_invalid_data( + mock_hypervisors, mock_compute_services +): + """ + Tests get_all_hv_info_for_aggregate with invalid data. + should not add hv with invalid data to the resulting list + """ + mock_aggregate = { + "hosts": [ + # hv4 has service but not found in list of hvs + "hv4", + # hv5 has no service and not in list of hvs + "hv5", + ] + } + assert ( + get_all_hv_info_for_aggregate( + mock_aggregate, + mock_compute_services.values(), + mock_hypervisors.values() + ) == [] + ) + + +def test_get_all_hv_info_for_aggregate_with_empty_aggregate( + mock_hypervisors, mock_compute_services +): + """ + Tests get_all_hv_info_for_aggregate with aggregate with no hosts. + should do nothing and return empty list + """ + mock_aggregate = {"hosts": []} + assert ( + get_all_hv_info_for_aggregate( + mock_aggregate, + mock_hypervisors.values(), + mock_compute_services.values() + ) == [] + ) + + +@patch("slottifier.get_flavor_requirements") +@patch("slottifier.calculate_slots_on_hv") +def test_update_slots_one_flavor_one_hv( + mock_calculate_slots_on_hv, mock_get_flavor_requirements +): + """ + Tests update_slots with one flavor and one hv. + should call calculate_slots_on_hv once with the given flavor and hv + """ + mock_flavor = {"name": "flv1"} + mock_host = NonCallableMock() + + slots_dict = {"flv1": 1} + mock_calculate_slots_on_hv.return_value = 1 + res = update_slots([mock_flavor], [mock_host], slots_dict=slots_dict) + mock_get_flavor_requirements.assert_called_once_with(mock_flavor) + mock_calculate_slots_on_hv.assert_called_once_with( + "flv1", mock_get_flavor_requirements.return_value, mock_host + ) + assert res == {"flv1": 2} + + +@patch("slottifier.get_flavor_requirements") +@patch("slottifier.calculate_slots_on_hv") +def test_update_slots_one_flavor_multi_hv( + mock_calculate_slots_on_hv, mock_get_flavor_requirements +): + """ + Tests update_slots with one flavor and multiple hvs. + should call calculate_slots_on_hv on each hv with the same flavor + """ + mock_flavor = {"name": "flv1"} + mock_host_1 = NonCallableMock() + mock_host_2 = NonCallableMock() + slots_dict = {"flv1": 1} + mock_calculate_slots_on_hv.side_effect = [1, 2] + res = update_slots([mock_flavor], [mock_host_1, mock_host_2], slots_dict=slots_dict) + mock_get_flavor_requirements.assert_called_once_with(mock_flavor) + mock_calculate_slots_on_hv.assert_has_calls( + [ + call("flv1", mock_get_flavor_requirements.return_value, mock_host_1), + call("flv1", mock_get_flavor_requirements.return_value, mock_host_2), + ] + ) + assert res == {"flv1": 4} + + +@patch("slottifier.get_flavor_requirements") +@patch("slottifier.calculate_slots_on_hv") +def test_update_slots_multi_flavor_multi_hv( + mock_calculate_slots_on_hv, mock_get_flavor_requirements +): + """ + Tests update_slots with multiple flavors and multiple hvs. + should call calculate_slots_on_hv with each unique hv-flavor pairings + """ + mock_flavor_1 = {"name": "flv1"} + mock_flavor_2 = {"name": "flv2"} + mock_host_1 = NonCallableMock() + mock_host_2 = NonCallableMock() + slots_dict = {"flv1": 1, "flv2": 0} + mock_calculate_slots_on_hv.side_effect = [1, 2, 0, 0] + res = update_slots( + [mock_flavor_1, mock_flavor_2], + [mock_host_1, mock_host_2], + slots_dict=slots_dict, + ) + mock_get_flavor_requirements.assert_has_calls( + [call(mock_flavor_1), call(mock_flavor_2)] + ) + mock_calculate_slots_on_hv.assert_has_calls( + [ + call("flv1", mock_get_flavor_requirements.return_value, mock_host_1), + call("flv1", mock_get_flavor_requirements.return_value, mock_host_2), + call("flv2", mock_get_flavor_requirements.return_value, mock_host_1), + call("flv2", mock_get_flavor_requirements.return_value, mock_host_2), + ] + ) + assert res == {"flv1": 4, "flv2": 0} + + +@patch("slottifier.get_openstack_resources") +@patch("slottifier.get_valid_flavors_for_aggregate") +@patch("slottifier.get_all_hv_info_for_aggregate") +@patch("slottifier.update_slots") +@patch("slottifier.convert_to_data_string") +def test_get_slottifier_details_one_aggregate( + mock_convert_to_data_string, + mock_update_slots, + mock_get_all_hv_info_for_aggregate, + mock_get_valid_flavors_for_aggregate, + mock_get_openstack_resources, +): + """ + Tests get_slottifier_details with one aggregate. + """ + mock_instance = NonCallableMock() + mock_flavors = [{"name": "flv1"}, {"name": "flv2"}] + mock_compute_services = NonCallableMock() + mock_hypervisors = NonCallableMock() + + mock_get_openstack_resources.return_value = { + "aggregates": ["ag1"], + "flavors": mock_flavors, + "compute_services": mock_compute_services, + "hypervisors": mock_hypervisors, + } + res = get_slottifier_details(mock_instance) + mock_get_openstack_resources.assert_called_once_with(mock_instance) + mock_get_valid_flavors_for_aggregate.assert_called_once_with(mock_flavors, "ag1") + mock_get_all_hv_info_for_aggregate.assert_called_once_with( + "ag1", mock_compute_services, mock_hypervisors + ) + + mock_update_slots.assert_called_once_with( + mock_get_valid_flavors_for_aggregate.return_value, + mock_get_all_hv_info_for_aggregate.return_value, + {"flv1": SlottifierEntry(), "flv2": SlottifierEntry()}, + ) + + mock_convert_to_data_string.assert_called_once_with( + mock_instance, mock_update_slots.return_value + ) + assert res == mock_convert_to_data_string.return_value + + +@patch("slottifier.run_scrape") +@patch("slottifier.parse_args") +def test_main(mock_parse_args, mock_run_scrape): + """ + tests main function calls run_scrape utility function properly + """ + mock_user_args = NonCallableMock() + main(mock_user_args) + mock_run_scrape.assert_called_once_with( + mock_parse_args.return_value, get_slottifier_details + ) + mock_parse_args.assert_called_once_with( + mock_user_args, description="Get All Service Statuses" + ) diff --git a/MonitoringTools/tests/test_slottifier_entry.py b/MonitoringTools/tests/test_slottifier_entry.py new file mode 100644 index 00000000..e64bbe57 --- /dev/null +++ b/MonitoringTools/tests/test_slottifier_entry.py @@ -0,0 +1,27 @@ +from slottifier_entry import SlottifierEntry + + +def test_add(): + """ + test that adding two SlottifierEntry dataclasses works properly + """ + a = SlottifierEntry( + slots_available=1, + estimated_gpu_slots_used=1, + max_gpu_slots_capacity=1, + max_gpu_slots_capacity_enabled=1 + ) + + b = SlottifierEntry( + slots_available=2, + estimated_gpu_slots_used=3, + max_gpu_slots_capacity=4, + max_gpu_slots_capacity_enabled=5 + ) + + assert a + b == SlottifierEntry( + slots_available=3, + estimated_gpu_slots_used=4, + max_gpu_slots_capacity=5, + max_gpu_slots_capacity_enabled=6 + ) diff --git a/MonitoringTools/usr/local/bin/influxdb.conf b/MonitoringTools/usr/local/bin/influxdb.conf new file mode 100644 index 00000000..41b14cdd --- /dev/null +++ b/MonitoringTools/usr/local/bin/influxdb.conf @@ -0,0 +1,12 @@ +[auth] +# auth for influxdb +password=admin +username=admin + +[cloud] +# requires /etc/openstack/clouds.yaml with "prod" cloud account +instance=prod + +[db] +database=cloud +host=localhost:8086 diff --git a/MonitoringTools/usr/local/bin/limits_to_influx.py b/MonitoringTools/usr/local/bin/limits_to_influx.py new file mode 100755 index 00000000..f2c0b8ac --- /dev/null +++ b/MonitoringTools/usr/local/bin/limits_to_influx.py @@ -0,0 +1,128 @@ +#!/usr/bin/python +import sys +from typing import Dict, List +import openstack +from openstack.identity.v3.project import Project +from send_metric_utils import run_scrape, parse_args + + +def convert_to_data_string(instance: str, limit_details: Dict) -> str: + """ + converts a dictionary of values into a data-string influxdb can read + :param instance: which cloud the info was scraped from (prod or dev) + :param limit_details: a dictionary of values to convert to string + :return: a comma-separated string of key=value taken from input dictionary + """ + data_string = "" + for project_name, limit_entry in limit_details.items(): + parsed_project_name = project_name.replace(" ", "\ ") + data_string += ( + f'Limits,Project="{parsed_project_name}",' + f"instance={instance.capitalize()} " + f"{get_limit_prop_string(limit_entry)}\n" + ) + return data_string + + +def get_limit_prop_string(limit_details): + """ + This function is a helper function that creates a partial data string of just the + properties scraped for a single service + :param limit_details: properties scraped for a single project + :return: a data string of scraped info + """ + # all limit properties are integers so add 'i' for each value + limit_strings = [] + for limit, val in limit_details.items(): + limit_strings.append(f"{limit}={val}i") + return ",".join(limit_strings) + + +def extract_limits(limits_dict) -> Dict: + """ + helper function to get info from + :param limits_dict: a dictionary of project limits to extract useful properties from + :return: a dictionary of useful properties with keys that match expected keys in influxdb + """ + # the keys need changing to match legacy data when we used the openstack-cli + mappings = { + "server_meta": "maxServerMeta", + "personality": "maxPersonality", + "server_groups_used": "totalServerGroupsUsed", + "image_meta": "maxImageMeta", + "personality_size": "maxPersonalitySize", + "keypairs": "maxTotalKeypairs", + "security_group_rules": "maxSecurityGroupRules", + "server_groups": "maxServerGroups", + "total_cores_used": "totalCoresUsed", + "total_ram_used": "totalRAMUsed", + "instances_used": "totalInstancesUsed", + "security_groups": "maxSecurityGroups", + "floating_ips_used": "totalFloatingIpsUsed", + "total_cores": "maxTotalCores", + "server_group_members": "maxServerGroupMembers", + "floating_ips": "maxTotalFloatingIps", + "security_groups_used": "totalSecurityGroupsUsed", + "instances": "maxTotalInstances", + "total_ram": "maxTotalRAMSize", + } + parsed_limits = {} + for key, val in mappings.items(): + try: + parsed_limits[val] = limits_dict[key] + except KeyError as exp: + raise RuntimeError(f"could not find {key} in project limits") from exp + return parsed_limits + + +def get_limits_for_project(instance: str, project_id) -> Dict: + """ + Get limits for a project. This is currently using openstack-cli + This will be rewritten to instead use openstacksdk + :param instance: cloud we want to scrape from + :param project_id: project id we want to collect limits for + :return: a set of limit properties for project we want + """ + conn = openstack.connect(instance) + project_details = { + **extract_limits(conn.get_compute_limits(project_id)), + **conn.get_volume_limits(project_id)["absolute"] + } + return project_details + + +def is_valid_project(project: Project) -> bool: + """ + helper function which returns if project is valid to get limits for + :param project: project to check + :return: boolean, True if project should be accounted for in limits + """ + invalid_strings = ["_rally", "844"] + return all(string not in project["name"] for string in invalid_strings) + + +def get_all_limits(instance: str) -> str: + """ + This function gets limits for each project on openstack + :param instance: which cloud to scrape from (prod or dev) + :return: A data string of scraped info + """ + conn = openstack.connect(cloud=instance) + limit_details = {} + for project in conn.list_projects(): + if is_valid_project(project): + limit_details[project["name"]] = get_limits_for_project(instance, project["id"]) + return convert_to_data_string(instance, limit_details) + + +def main(user_args: List): + """ + send limits to influx + :param user_args: args passed into script by user + """ + influxdb_args = parse_args(user_args, description="Get All Project Limits") + run_scrape(influxdb_args, get_all_limits) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/MonitoringTools/usr/local/bin/send_metric_utils.py b/MonitoringTools/usr/local/bin/send_metric_utils.py new file mode 100644 index 00000000..fe5ee516 --- /dev/null +++ b/MonitoringTools/usr/local/bin/send_metric_utils.py @@ -0,0 +1,94 @@ +from typing import Dict, Tuple, Callable, List +from pathlib import Path +import configparser +import requests +import argparse +from configparser import ConfigParser + + +def read_config_file(config_filepath: Path) -> Dict: + """ + This function reads a config file and puts it into a dictionary + :param config_filepath: + :return: A flattened dictionary containing key-value pairs from config file + """ + config = ConfigParser() + config.read(config_filepath) + config_dict = {} + for section in config.sections(): + for key, value in config.items(section): + config_dict[f"{section}.{key}"] = value + + required_values = [ + "auth.password", + "auth.username", + "cloud.instance", + "db.database", + "db.host", + ] + assert all( + val in config_dict for val in required_values + ), "Config file is missing required values." + return config_dict + + +def post_to_influxdb( + data_string: str, host: str, db_name: str, auth: Tuple[str, str] +) -> None: + """ + This function posts information to influxdb + :param data_string: data to write + :param host: hostname and port where influxdb can be accessed + :param db_name: database name to write to + :param auth: tuple of (username, password) to authenticate with influxdb + """ + if not data_string: + return + + url = f"http://{host}/write?db={db_name}&precision=s" + response = requests.post(url, data=data_string, auth=auth) + response.raise_for_status() + + +def parse_args(inp_args, description: str = "scrape metrics script") -> Dict: + """ + This function parses influxdb args from a filepath passed into script when its run. + The only thing the scripts takes as input is the path to the config file. + :param description: The description of the script to print on help command + :param inp_args: input arguments passed when a 'gather metrics' script is run + :return: args from + """ + + parser = argparse.ArgumentParser(description=description) + parser.add_argument( + "config_filepath", type=Path, help="Path to influxdb config file" + ) + try: + args = parser.parse_args(inp_args) + except argparse.ArgumentTypeError as exp: + raise RuntimeError("Error reading input arguments") from exp + + if not args.config_filepath.is_file(): + raise RuntimeError(f"Invalid filepath given '{args.config_filepath}'") + + try: + return read_config_file(args.config_filepath) + except configparser.Error as exp: + raise RuntimeError( + f"could not read influxdb config file '{args.config_filepath}'" + ) from exp + + +def run_scrape(influxdb_args, scrape_func: Callable[[str], str]): + """ + run script to scrape info and post to influxdb + :param influxdb_args: set of args passed in by user upon running script + :param scrape_func: function to use to scrape info + """ + scrape_res = scrape_func(influxdb_args["cloud.instance"]) + post_to_influxdb( + scrape_res, + host=influxdb_args["db.host"], + db_name=influxdb_args["db.database"], + auth=(influxdb_args["auth.username"], influxdb_args["auth.password"]), + ) diff --git a/MonitoringTools/usr/local/bin/service_status_to_influx.py b/MonitoringTools/usr/local/bin/service_status_to_influx.py new file mode 100644 index 00000000..b46e9a11 --- /dev/null +++ b/MonitoringTools/usr/local/bin/service_status_to_influx.py @@ -0,0 +1,185 @@ +#!/usr/bin/python +import sys +from typing import Dict, List +import openstack +from openstack.compute.v2.hypervisor import Hypervisor +from openstack.compute.v2.service import Service +from openstack.network.v2.agent import Agent +from send_metric_utils import run_scrape, parse_args + + +def get_hypervisor_properties(hypervisor: Hypervisor) -> Dict: + """ + This function parses a openstacksdk Hypervisor object to get properties in the correct format + to feed into influxdb + :param hypervisor: hypervisor to extract properties from + :return: A dictionary of useful properties + """ + hv_prop_dict = { + "hv": { + # this is populated by another command + "aggregate": "no-aggregate", + "memorymax": hypervisor["memory_size"], + "memoryused": hypervisor["memory_used"], + "memoryavailable": hypervisor["memory_free"], + "cpumax": hypervisor["vcpus"], + "cpuused": hypervisor["vcpus_used"], + "cpuavailable": hypervisor["vcpus"] - hypervisor["vcpus_used"], + "agent": 1, + "state": 1 if hypervisor["state"] == "up" else 0, + "statetext": hypervisor["state"].capitalize(), + } + } + return hv_prop_dict + + +def get_service_properties(service: Service) -> Dict: + """ + This function parses a openstacksdk Service object to get properties in the correct format + to feed into influxdb + :param service: service to extract properties from + :return: A dictionary of useful properties + """ + service_prop_dict = { + service["binary"]: { + "agent": 1, + "status": 1 if service["status"] == "enabled" else 0, + "statustext": service["status"].capitalize(), + "state": 1 if service["state"] == "up" else 0, + "statetext": service["state"].capitalize(), + } + } + return service_prop_dict + + +def get_agent_properties(agent: Agent) -> Dict: + """ + This function parses a openstacksdk Agent object to get properties in the correct format + to feed into influxdb + :param agent: agent to extract properties from + :return: A dictionary of useful properties + """ + agent_prop_dict = { + agent["binary"]: { + "agent": 1, + "state": 1 if agent["is_alive"] else 0, + "statetext": "Up" if agent["is_alive"] else "Down", + "status": 1 if agent["is_admin_state_up"] else 0, + "statustext": "Enabled" if agent["is_admin_state_up"] else "Disabled", + } + } + return agent_prop_dict + + +def convert_to_data_string(instance: str, service_details: Dict) -> str: + """ + This function creates a data string from service properties to feed into influxdb + :param instance: the cloud instance (prod or dev) that details were scraped from + :param service_details: a set of service properties to parse + :return: A data string of scraped info + """ + data_string = "" + for hypervisor_name, services in service_details.items(): + for service_binary, service_stats in services.items(): + data_string += ( + f'ServiceStatus,host="{hypervisor_name}",' + f'service="{service_binary}",instance={instance.capitalize()} ' + f"{get_service_prop_string(service_stats)}\n" + ) + return data_string + + +def get_service_prop_string(service_dict: Dict) -> str: + """ + This function is a helper function that creates a partial data string of just the + properties scraped for a single service + :param service_dict: properties scraped for a single service + :return: a data string of scraped info + """ + stats_strings = [] + for stat, val in service_dict.items(): + parsed_val = val + if stat not in ["statetext", "statustext", "aggregate"]: + parsed_val = f"{val}i" + stats_strings.append(f'{stat}="{parsed_val}"') + return ",".join(stats_strings) + + +def get_all_hv_details(conn) -> Dict: + """ + Get all hypervisor status information from openstack + :param conn: openstack connection object + :return: a dictionary of hypervisor status information + """ + hv_details = {} + for hypervisor in conn.list_hypervisors(): + hv_details[hypervisor["name"]] = get_hypervisor_properties(hypervisor) + + # populate found hypervisors with what aggregate they belong to - so we can filter by aggregate in grafana + for aggregate in conn.compute.aggregates(): + for host_name in aggregate["hosts"]: + if host_name in hv_details.keys(): + hv_details[host_name]["hv"]["aggregate"] = aggregate["name"] + return hv_details + + +def update_with_service_statuses(conn, status_details: Dict) -> Dict: + """ + update status details with service status information from openstack + :param conn: openstack connection object + :param status_details: status details dictionary to update + :return: a dictionary of updated status information with service statuses + """ + for service in conn.compute.services(): + if service["host"] not in status_details.keys(): + status_details[service["host"]] = {} + + service_host = status_details[service["host"]] + service_host.update(get_service_properties(service)) + if "hv" in service_host and service["binary"] == "nova-compute": + service_host["hv"]["status"] = service_host["nova-compute"]["status"] + + return status_details + + +def update_with_agent_statuses(conn, status_details: Dict) -> Dict: + """ + update status details with network agent status information from openstack + :param conn: openstack connection object + :param status_details: status details dictionary to update + :return: a dictionary of updated status information with network agent statuses + """ + for agent in conn.network.agents(): + if agent["host"] not in status_details.keys(): + status_details[agent["host"]] = {} + + status_details[agent["host"]].update(get_agent_properties(agent)) + + return status_details + + +def get_all_service_statuses(instance: str) -> str: + """ + This function gets status information for each service node, hypervisor and network + agent in openstack. + :param instance: which cloud to scrape from (prod or dev) + :return: A data string of scraped info + """ + conn = openstack.connect(instance) + all_details = get_all_hv_details(conn) + all_details = update_with_service_statuses(conn, all_details) + all_details = update_with_agent_statuses(conn, all_details) + return convert_to_data_string(instance, all_details) + + +def main(user_args: List): + """ + send service status info to influx + :param user_args: args passed into script by user + """ + influxdb_args = parse_args(user_args, description="Get All Service Statuses") + run_scrape(influxdb_args, get_all_service_statuses) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/MonitoringTools/usr/local/bin/slottifier.py b/MonitoringTools/usr/local/bin/slottifier.py new file mode 100644 index 00000000..4b4010d6 --- /dev/null +++ b/MonitoringTools/usr/local/bin/slottifier.py @@ -0,0 +1,294 @@ +#!/usr/bin/python +from typing import List +import openstack +import sys +from typing import Dict +from slottifier_entry import SlottifierEntry +from send_metric_utils import parse_args, run_scrape + + +def get_hv_info(hypervisor: Dict, aggregate_info: Dict, service_info: Dict) -> Dict: + """ + Helper function to get hv information on cores/memory available + :param hypervisor: a dictionary holding info on hypervisor + :param aggregate_info: a dictionary holding info on aggregate hypervisor belongs to + :param service_info: a dictionary holding info on nova compute service running on hypervisor + :return: a dictionary of cores/memory available for given hv + """ + hv_info = { + "cores_available": 0, + "mem_available": 0, + "gpu_capacity": 0, + "core_capacity": 0, + "mem_capacity": 0, + "compute_service_status": "disabled", + } + if hypervisor and hypervisor["status"] != "disabled": + hv_info["cores_available"] = max( + 0, hypervisor["vcpus"] - hypervisor["vcpus_used"] + ) + hv_info["mem_available"] = max( + 0, hypervisor["memory_size"] - hypervisor["memory_used"] + ) + hv_info["core_capacity"] = hypervisor["vcpus"] + hv_info["mem_capacity"] = hypervisor["memory_size"] + + hv_info["gpu_capacity"] = int(aggregate_info["metadata"].get("gpunum", 0)) + hv_info["compute_service_status"] = service_info["status"] + + return hv_info + + +def get_flavor_requirements(flavor: Dict) -> Dict: + """ + Helper function to get flavor memory/ram/gpu requirements for a VM of that type to be built on a hv + :param flavor: flavor to get requirements from + :return: dictionary of requirements + """ + try: + flavor_reqs = { + "cores_required": int(flavor["vcpus"]), + "mem_required": int(flavor["ram"]), + } + except (ValueError, KeyError) as exp: + flavor_name = flavor.get("name", "Name Not Found") + raise RuntimeError( + f"could not get flavor requirements for flavor {flavor_name}" + ) from exp + + flavor_reqs.update({ + "gpus_required": int( + flavor.get("extra_specs", {}).get("accounting:gpu_num", 0) + ), + }) + return flavor_reqs + + +def get_valid_flavors_for_aggregate(flavor_list: List, aggregate: Dict) -> List: + """ + Helper function that filters a list of flavors to find those that can be built on a hv belonging to a given aggregate + :param flavor_list: a list of flavors to check + :param aggregate: specifies the aggregate to find compatible flavors for + :return: a list of valid flavors for hosttype + """ + valid_flavors = [] + hypervisor_hosttype = aggregate["metadata"].get("hosttype", None) + + if not hypervisor_hosttype: + return valid_flavors + + for flavor in flavor_list: + # validate that flavor can be used on host aggregate + if ( + "aggregate_instance_extra_specs:hosttype" + not in flavor["extra_specs"].keys() + ): + continue + if ( + flavor["extra_specs"]["aggregate_instance_extra_specs:hosttype"] + != hypervisor_hosttype + ): + continue + valid_flavors.append(flavor) + return valid_flavors + + +def convert_to_data_string(instance: str, slots_dict: Dict) -> str: + """ + converts a dictionary of values into a data-string influxdb can read + :param slots_dict: a dictionary of slots available for each flavor + :param instance: which cloud the info was scraped from (prod or dev) + :return: a comma-separated string of key=value taken from input dictionary + """ + data_string = "" + for flavor, slot_info in slots_dict.items(): + data_string += ( + f"SlotsAvailable,instance={instance.capitalize()},flavor={flavor}" + f" SlotsAvailable={slot_info.slots_available}i" + f",maxSlotsAvailable={slot_info.max_gpu_slots_capacity}i" + f",usedSlots={slot_info.estimated_gpu_slots_used}i" + f",enabledSlots={slot_info.max_gpu_slots_capacity_enabled}i\n" + ) + return data_string + + +def calculate_slots_on_hv(flavor_name: str, flavor_reqs: Dict, hv_info: Dict) -> SlottifierEntry: + """ + Helper function that calculates available slots for a flavor on a given hypervisor + :param flavor_name: name of flavor + :param flavor_reqs: dictionary of memory, cpu, and gpu requirements of flavor + :param hv_info: dictionary of memory, cpu, and gpu capacity/availability on hypervisor + and whether hv compute service is enabled + :return: A dataclass holding slottifer information to update with + """ + slots_dataclass = SlottifierEntry() + + slots_available = min( + hv_info["cores_available"] // flavor_reqs["cores_required"], + hv_info["mem_available"] // flavor_reqs["mem_required"], + ) + + if "g-" in flavor_name: + # workaround for bugs where gpu number not specified + if flavor_reqs["gpus_required"] == 0: + raise RuntimeError(f"gpu flavor {flavor_name} does not have 'gpunum' metadata") + + theoretical_gpu_slots_available = ( + hv_info["gpu_capacity"] // flavor_reqs["gpus_required"] + ) + + estimated_slots_used = ( + min( + hv_info["core_capacity"] // flavor_reqs["cores_required"], + hv_info["mem_capacity"] // flavor_reqs["mem_required"], + ) + - slots_available + ) + + # estimated number of GPU slots used - based off of how much cpu/mem is currently being used + # assumes that all VMs on the HV contains only this flavor - which may not be true + # if slots used is greater than gpu slots available we assume all gpus are being used + slots_dataclass.estimated_gpu_slots_used = min( + theoretical_gpu_slots_available, estimated_slots_used + ) + + slots_dataclass.max_gpu_slots_capacity = hv_info["gpu_capacity"] + + if hv_info["compute_service_status"] == "enabled": + slots_dataclass.max_gpu_slots_capacity_enabled = hv_info["gpu_capacity"] + + slots_available = min( + slots_available, + theoretical_gpu_slots_available - slots_dataclass.estimated_gpu_slots_used, + ) + + if hv_info["compute_service_status"] == "enabled": + slots_dataclass.slots_available = slots_available + return slots_dataclass + + +def get_openstack_resources(instance: str) -> Dict: + """ + This is a helper function that gets information from openstack in one go to calculate flavor slots. This + is quicker than getting resources one at a time + :param instance: which cloud to calculate slots for + :return: a dictionary containing 4 entries, key is an openstack component, value is a list of all components of that + type: compute_services, aggregates, hypervisors and flavors + """ + conn = openstack.connect(cloud=instance) + + # we get all openstack info first because it is quicker than getting them one at a time + # dictionaries prevent duplicates + + all_compute_services = { + service["id"]: service for service in conn.compute.services() + } + all_aggregates = { + aggregate["id"]: aggregate for aggregate in conn.compute.aggregates() + } + + # needs to be list_hypervisors and not conn.compute.hypervisors otherwise vcpu/mem info is empty for some reason + all_hypervisors = {h["id"]: h for h in conn.list_hypervisors()} + all_flavors = { + flavor["id"]: flavor for flavor in conn.compute.flavors(get_extra_specs=True) + } + + return { + "compute_services": list(all_compute_services.values()), + "aggregates": list(all_aggregates.values()), + "hypervisors": list(all_hypervisors.values()), + "flavors": list(all_flavors.values()), + } + + +def get_all_hv_info_for_aggregate( + aggregate: Dict, all_compute_services: List, all_hypervisors: List +) -> List: + """ + helper function to get all useful info from hypervisors belonging to a given aggregate + :param aggregate: aggregate that we want to get hvs for + :param all_compute_services: all compute services to validate hvs against + - ensure they have a nova_compute service attached + :param all_hypervisors: all hypervisors to get hv info from + :return: list of dictionaries of hypervisor information for calculating slots + """ + + valid_hvs = [] + for host in aggregate["hosts"]: + + host_compute_service = None + for cs in all_compute_services: + if cs["host"] == host: + host_compute_service = cs + + if not host_compute_service: + continue + + hv_obj = None + for hv in all_hypervisors: + if host_compute_service["host"] == hv["name"]: + hv_obj = hv + if not hv_obj: + continue + + valid_hvs.append(get_hv_info(hv_obj, aggregate, host_compute_service)) + return valid_hvs + + +def update_slots(flavors: List, host_info_list: List, slots_dict: Dict) -> Dict: + """ + update total slots by calculating slots available for a set of flavors on a set of hosts + :param flavors: a list of flavors + :param host_info_list: a list of dictionaries holding info about a hypervisor capacity/availability + :param slots_dict: dictionary of slot info to update + :return: + """ + + for flavor in flavors: + flavor_reqs = get_flavor_requirements(flavor) + for hv in host_info_list: + slots_dict[flavor["name"]] += calculate_slots_on_hv( + flavor["name"], flavor_reqs, hv + ) + return slots_dict + + +def get_slottifier_details(instance: str) -> str: + """ + This function gets calculates slots available for each flavor in openstack and outputs results in + data string format which can be posted to InfluxDB + :param instance: which cloud to calculate slots for + :return: A data string of scraped info + """ + all_openstack_info = get_openstack_resources(instance) + + slots_dict = { + flavor["name"]: SlottifierEntry() for flavor in all_openstack_info["flavors"] + } + for aggregate in all_openstack_info["aggregates"]: + valid_flavors = get_valid_flavors_for_aggregate( + all_openstack_info["flavors"], aggregate + ) + + aggregate_host_info = get_all_hv_info_for_aggregate( + aggregate, + all_openstack_info["compute_services"], + all_openstack_info["hypervisors"], + ) + + slots_dict = update_slots(valid_flavors, aggregate_host_info, slots_dict) + + return convert_to_data_string(instance, slots_dict) + + +def main(user_args: List): + """ + send slottifier info to influx + :param user_args: args passed into script by user + """ + influxdb_args = parse_args(user_args, description="Get All Service Statuses") + run_scrape(influxdb_args, get_slottifier_details) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/MonitoringTools/usr/local/bin/slottifier_entry.py b/MonitoringTools/usr/local/bin/slottifier_entry.py new file mode 100644 index 00000000..ccf82a6a --- /dev/null +++ b/MonitoringTools/usr/local/bin/slottifier_entry.py @@ -0,0 +1,44 @@ +from dataclasses import dataclass + + +@dataclass +class SlottifierEntry: + """ + A dataclass to hold slottifier information + :param slots_available: Number of slots available for a flavor + :param estimated_gpu_slots_used: Number of gpu slots currently used that could host this flavor + - estimated by amount of cores/mem already used by hvs as there's no way in openstack to find this out directly + :param max_gpu_slots_capacity: Number of gpus available on all compatible hypervisors to build this flavor on + :param max_gpu_slots_capacity_enabled: like max_gpu_slots_capacity, but only counting hosts with nova-compute + service enabled + """ + + slots_available: int = 0 + estimated_gpu_slots_used: int = 0 + max_gpu_slots_capacity: int = 0 + max_gpu_slots_capacity_enabled: int = 0 + + def __add__(self, other): + """ + dunder method to add two SlottifierEntry values together. + :param other: Another SlottifierEntry dataclass to add + :return: A SlottifierEntry dataclass where each attribute value from current dataclass and given dataclass are + added together + """ + if not isinstance(other, SlottifierEntry): + raise TypeError( + f"Unsupported operand type for +: '{type(self)}' and '{type(other)}'" + ) + + return SlottifierEntry( + slots_available=self.slots_available + other.slots_available, + + estimated_gpu_slots_used=self.estimated_gpu_slots_used + + other.estimated_gpu_slots_used, + + max_gpu_slots_capacity=self.max_gpu_slots_capacity + + other.max_gpu_slots_capacity, + + max_gpu_slots_capacity_enabled=self.max_gpu_slots_capacity_enabled + + other.max_gpu_slots_capacity_enabled, + )