Skip to content

Commit

Permalink
Add connection refresh option (#11507)
Browse files Browse the repository at this point in the history
* Plan out vsphere timeout logic

* Add code

* remove cached connection on failure

* validate models and configs

* fix typing

* add test

* debug log and style fix

* adjust test times

* move type

* remove unneeded test case and fixtures

* shorten sleep time

* lengthen test time

Co-authored-by: steveny91 <steven.yuen@datadoghq.com>
  • Loading branch information
yzhan289 and steveny91 authored Sep 15, 2022
1 parent b2f176c commit acf16e1
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 1 deletion.
6 changes: 6 additions & 0 deletions vsphere/assets/configuration/spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,12 @@ files:
when `collect_tags` instance config is enabled.
options:
- template: instances/http
- name: connection_reset_timeout
description: |
The time interval in seconds at which the agent should re-authenticate using the provided credentials
value:
type: integer
example: 900
- template: instances/default
overrides:
empty_default_hostname.display_priority: 1
Expand Down
1 change: 1 addition & 0 deletions vsphere/datadog_checks/vsphere/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def __init__(self, instance, init_config, log):
self.refresh_metrics_metadata_cache_interval = instance.get(
'refresh_metrics_metadata_cache_interval', DEFAULT_REFRESH_METRICS_METADATA_CACHE_INTERVAL
)
self.connection_reset_timeout = instance.get("connection_reset_timeout", 900)

# Always collect events if `collect_events_only` is true
if self.collect_events_only:
Expand Down
4 changes: 4 additions & 0 deletions vsphere/datadog_checks/vsphere/config_models/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ def instance_collection_type(field, value):
return 'realtime'


def instance_connection_reset_timeout(field, value):
return 900


def instance_disable_generic_tags(field, value):
return False

Expand Down
1 change: 1 addition & 0 deletions vsphere/datadog_checks/vsphere/config_models/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ class Config:
collect_tags: Optional[bool]
collection_level: Optional[int]
collection_type: Optional[str]
connection_reset_timeout: Optional[int]
disable_generic_tags: Optional[bool]
empty_default_hostname: bool
excluded_host_tags: Optional[Sequence[str]]
Expand Down
5 changes: 5 additions & 0 deletions vsphere/datadog_checks/vsphere/data/conf.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,11 @@ instances:
#
# allow_redirects: true

## @param connection_reset_timeout - integer - optional - default: 900
## The time interval in seconds at which the agent should re-authenticate using the provided credentials
#
# connection_reset_timeout: 900

## @param tags - list of strings - optional
## A list of tags to attach to every metric and service check emitted by this instance.
##
Expand Down
13 changes: 12 additions & 1 deletion vsphere/datadog_checks/vsphere/vsphere.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from datadog_checks.base import AgentCheck, is_affirmative, to_string
from datadog_checks.base.checks.libs.timer import Timer
from datadog_checks.base.utils.time import get_current_datetime
from datadog_checks.base.utils.time import get_current_datetime, get_timestamp
from datadog_checks.vsphere.api import APIConnectionError, VSphereAPI
from datadog_checks.vsphere.api_rest import VSphereRestAPI
from datadog_checks.vsphere.cache import InfrastructureCache, MetricsMetadataCache
Expand Down Expand Up @@ -90,6 +90,8 @@ def __init__(self, *args, **kwargs):
self.thread_pool = ThreadPoolExecutor(max_workers=self._config.threads_count)
self.check_initializations.append(self.initiate_api_connection)

self.last_connection_time = get_timestamp()

def initiate_api_connection(self):
# type: () -> None
try:
Expand All @@ -99,6 +101,8 @@ def initiate_api_connection(self):
self.api = VSphereAPI(self._config, self.log)
self.log.debug("Connected")
except APIConnectionError:
# Clear the API connection object if the authentication fails
self.api = cast(VSphereAPI, None)
self.log.error("Cannot authenticate to vCenter API. The check will not run.")
self.service_check(SERVICE_CHECK_NAME, AgentCheck.CRITICAL, tags=self._config.base_tags, hostname=None)
raise
Expand Down Expand Up @@ -606,6 +610,13 @@ def check(self, _):
# type: (Any) -> None
self._hostname = datadog_agent.get_hostname()
# Assert the health of the vCenter API by getting the version, and submit the service_check accordingly

now = get_timestamp()
if self.last_connection_time + self._config.connection_reset_timeout <= now or self.api is None:
self.last_connection_time = now
self.log.debug("Refreshing vCenter connection")
self.initiate_api_connection()

try:
version_info = self.api.get_version()
if self.is_metadata_collection_enabled():
Expand Down
30 changes: 30 additions & 0 deletions vsphere/tests/test_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import datetime as dt
import json
import os
import time

import mock
import pytest
Expand Down Expand Up @@ -431,3 +432,32 @@ def test_specs_start_time(aggregator, dd_run_check, historical_instance):
assert len(start_times) != 0
for start_time in start_times:
assert start_time == (mock_time - dt.timedelta(hours=2))


@pytest.mark.parametrize(
'test_timeout, expected_result',
[
(1, False),
(2, False),
(20, True),
],
)
@pytest.mark.usefixtures('mock_type', 'mock_api')
def test_connection_refresh(aggregator, dd_run_check, realtime_instance, test_timeout, expected_result):
# This test is to ensure that the connection is refreshed after a specified period of time.
# We run the check initially to get a connection object, sleep for a period of time, and then
# rerun the check and compare and see if the connection objects are the same.
realtime_instance['connection_reset_timeout'] = test_timeout
check = VSphereCheck('vsphere', {}, [realtime_instance])
dd_run_check(check)
first_connection = check.api

time.sleep(2)

dd_run_check(check)

same_object = False
if first_connection == check.api:
same_object = True

assert same_object == expected_result

0 comments on commit acf16e1

Please sign in to comment.