From f298f87f0ccc6555b8622c6b4b7d555613526575 Mon Sep 17 00:00:00 2001 From: Daniel Nelson Date: Mon, 11 Mar 2019 11:55:20 -0700 Subject: [PATCH] Update changelog; vsphere readme --- CHANGELOG.md | 4 ++- plugins/inputs/vsphere/README.md | 58 ++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eac3abe752a5b..6411fe8bff159 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,10 @@ #### Bugfixes -- [#5448](https://github.com/influxdata/telegraf/pull/5448): Show error when TLS configuration cannot be loaded. +- [#5448](https://github.com/influxdata/telegraf/issues/5448): Show error when TLS configuration cannot be loaded. - [#5543](https://github.com/influxdata/telegraf/pull/5543): Add Base64-encoding/decoding for Google Cloud PubSub plugins. +- [#5565](https://github.com/influxdata/telegraf/issues/5565): Fix type compatibility in vsphere plugin with use_int_samples option. +- [#5492](https://github.com/influxdata/telegraf/issues/5492): Fix vsphere input shows failed task in vCenter. ## v1.10 [2019-03-05] diff --git a/plugins/inputs/vsphere/README.md b/plugins/inputs/vsphere/README.md index e14de2cee5ae8..ae7cdc37b05fe 100644 --- a/plugins/inputs/vsphere/README.md +++ b/plugins/inputs/vsphere/README.md @@ -1,7 +1,7 @@ # VMware vSphere Input Plugin The VMware vSphere plugin uses the vSphere API to gather metrics from multiple vCenter servers. - + * Clusters * Hosts * VMs @@ -9,7 +9,7 @@ The VMware vSphere plugin uses the vSphere API to gather metrics from multiple v ## Configuration -NOTE: To disable collection of a specific resource type, simply exclude all metrics using the XX_metric_exclude. +NOTE: To disable collection of a specific resource type, simply exclude all metrics using the XX_metric_exclude. For example, to disable collection of VMs, add this: ``` @@ -52,7 +52,7 @@ vm_metric_exclude = [ "*" ] "net.droppedRx.summation", "net.droppedTx.summation", "net.usage.average", - "power.power.average", + "power.power.average", "virtualDisk.numberReadAveraged.average", "virtualDisk.numberWriteAveraged.average", "virtualDisk.read.average", @@ -67,7 +67,7 @@ vm_metric_exclude = [ "*" ] # vm_metric_exclude = [] ## Nothing is excluded by default # vm_instances = true ## true by default - ## Hosts + ## Hosts ## Typical host metrics (if omitted or empty, all metrics are collected) # host_include = [ "/*/host/**"] # Inventory path to hosts to collect (by default all are collected) host_metric_include = [ @@ -121,25 +121,25 @@ vm_metric_exclude = [ "*" ] # host_metric_exclude = [] ## Nothing excluded by default # host_instances = true ## true by default - ## Clusters + ## Clusters # cluster_include = [ "/*/host/**"] # Inventory path to clusters to collect (by default all are collected) # cluster_metric_include = [] ## if omitted or empty, all metrics are collected # cluster_metric_exclude = [] ## Nothing excluded by default - # cluster_instances = false ## false by default + # cluster_instances = false ## false by default - ## Datastores + ## Datastores # cluster_include = [ "/*/datastore/**"] # Inventory path to datastores to collect (by default all are collected) # datastore_metric_include = [] ## if omitted or empty, all metrics are collected # datastore_metric_exclude = [] ## Nothing excluded by default - # datastore_instances = false ## false by default + # datastore_instances = false ## false by default ## Datacenters # datacenter_include = [ "/*/host/**"] # Inventory path to clusters to collect (by default all are collected) datacenter_metric_include = [] ## if omitted or empty, all metrics are collected datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default. - # datacenter_instances = false ## false by default + # datacenter_instances = false ## false by default - ## Plugin Settings + ## Plugin Settings ## separator character to use for measurement and field names (default: "_") # separator = "_" @@ -166,17 +166,25 @@ vm_metric_exclude = [ "*" ] ## timeout applies to any of the api request made to vcenter # timeout = "60s" + ## When set to true, all samples are sent as integers. This makes the output + ## data types backwards compatible with Telegraf 1.9 or lower. Normally all + ## samples from vCenter, with the exception of percentages, are integer + ## values, but under some conditions, some averaging takes place internally in + ## the plugin. Setting this flag to "false" will send values as floats to + ## preserve the full precision when averaging takes place. + # use_int_samples = true + ## Optional SSL Config # ssl_ca = "/path/to/cafile" # ssl_cert = "/path/to/certfile" # ssl_key = "/path/to/keyfile" ## Use SSL but skip chain & host verification # insecure_skip_verify = false -``` +``` ### Objects and Metrics Per Query -By default, in vCenter's configuration a limit is set to the number of entities that are included in a performance chart query. Default settings for vCenter 6.5 and above is 256. Prior versions of vCenter have this set to 64. +By default, in vCenter's configuration a limit is set to the number of entities that are included in a performance chart query. Default settings for vCenter 6.5 and above is 256. Prior versions of vCenter have this set to 64. A vCenter administrator can change this setting, see this [VMware KB article](https://kb.vmware.com/s/article/2107096) for more information. Any modification should be reflected in this plugin by modifying the parameter `max_query_objects` @@ -233,23 +241,23 @@ to a file system. A vSphere inventory has a structure similar to this: #### Using Inventory Paths Using familiar UNIX-style paths, one could select e.g. VM2 with the path ```/DC0/vm/VM2```. -Often, we want to select a group of resource, such as all the VMs in a folder. We could use the path ```/DC0/vm/Folder1/*``` for that. +Often, we want to select a group of resource, such as all the VMs in a folder. We could use the path ```/DC0/vm/Folder1/*``` for that. Another possibility is to select objects using a partial name, such as ```/DC0/vm/Folder1/hadoop*``` yielding all vms in Folder1 with a name starting with "hadoop". Finally, due to the arbitrary nesting of the folder structure, we need a "recursive wildcard" for traversing multiple folders. We use the "**" symbol for that. If we want to look for a VM with a name starting with "hadoop" in any folder, we could use the following path: ```/DC0/vm/**/hadoop*``` #### Multiple paths to VMs -As we can see from the example tree above, VMs appear both in its on folder under the datacenter, as well as under the hosts. This is useful when you like to select VMs on a specific host. For example, ```/DC0/host/Cluster1/Host1/hadoop*``` selects all VMs with a name starting with "hadoop" that are running on Host1. +As we can see from the example tree above, VMs appear both in its on folder under the datacenter, as well as under the hosts. This is useful when you like to select VMs on a specific host. For example, ```/DC0/host/Cluster1/Host1/hadoop*``` selects all VMs with a name starting with "hadoop" that are running on Host1. We can extend this to looking at a cluster level: ```/DC0/host/Cluster1/*/hadoop*```. This selects any VM matching "hadoop*" on any host in Cluster1. ## Performance Considerations ### Realtime vs. historical metrics -vCenter keeps two different kinds of metrics, known as realtime and historical metrics. +vCenter keeps two different kinds of metrics, known as realtime and historical metrics. -* Realtime metrics: Avaialable at a 20 second granularity. These metrics are stored in memory and are very fast and cheap to query. Our tests have shown that a complete set of realtime metrics for 7000 virtual machines can be obtained in less than 20 seconds. Realtime metrics are only available on **ESXi hosts** and **virtual machine** resources. Realtime metrics are only stored for 1 hour in vCenter. +* Realtime metrics: Avaialable at a 20 second granularity. These metrics are stored in memory and are very fast and cheap to query. Our tests have shown that a complete set of realtime metrics for 7000 virtual machines can be obtained in less than 20 seconds. Realtime metrics are only available on **ESXi hosts** and **virtual machine** resources. Realtime metrics are only stored for 1 hour in vCenter. * Historical metrics: Available at a 5 minute, 30 minutes, 2 hours and 24 hours rollup levels. The vSphere Telegraf plugin only uses the 5 minute rollup. These metrics are stored in the vCenter database and can be expensive and slow to query. Historical metrics are the only type of metrics available for **clusters**, **datastores** and **datacenters**. For more information, refer to the vSphere documentation here: https://pubs.vmware.com/vsphere-50/index.jsp?topic=%2Fcom.vmware.wssdk.pg.doc_50%2FPG_Ch16_Performance.18.2.html @@ -293,7 +301,7 @@ This will disrupt the metric collection and can result in missed samples. The be host_metric_exclude = ["*"] # Exclude realtime metrics vm_metric_exclude = ["*"] # Exclude realtime metrics - max_query_metrics = 256 + max_query_metrics = 256 collect_concurrency = 3 ``` @@ -303,7 +311,7 @@ The ```max_query_metrics``` determines the maximum number of metrics to attempt ```2019-01-21T03:24:18Z W! [input.vsphere] Configured max_query_metrics is 256, but server limits it to 64. Reducing.``` -You may ask a vCenter administrator to increase this limit to help boost performance. +You may ask a vCenter administrator to increase this limit to help boost performance. ### Cluster metrics and the max_query_metrics setting @@ -313,7 +321,7 @@ Cluster metrics are handled a bit differently by vCenter. They are aggregated fr There are two ways of addressing this: * Ask your vCenter administrator to set ```config.vpxd.stats.maxQueryMetrics``` to a number that's higher than the total number of virtual machines managed by a vCenter instance. -* Exclude the cluster metrics and use either the basicstats aggregator to calculate sums and averages per cluster or use queries in the visualization tool to obtain the same result. +* Exclude the cluster metrics and use either the basicstats aggregator to calculate sums and averages per cluster or use queries in the visualization tool to obtain the same result. ### Concurrency settings @@ -321,7 +329,7 @@ The vSphere plugin allows you to specify two concurrency settings: * ```collect_concurrency```: The maximum number of simultaneous queries for performance metrics allowed per resource type. * ```discover_concurrency```: The maximum number of simultaneous queries for resource discovery allowed. -While a higher level of concurrency typically has a positive impact on performance, increasing these numbers too much can cause performance issues at the vCenter server. A rule of thumb is to set these parameters to the number of virtual machines divided by 1500 and rounded up to the nearest integer. +While a higher level of concurrency typically has a positive impact on performance, increasing these numbers too much can cause performance issues at the vCenter server. A rule of thumb is to set these parameters to the number of virtual machines divided by 1500 and rounded up to the nearest integer. ## Measurements & Fields @@ -342,7 +350,7 @@ While a higher level of concurrency typically has a positive impact on performan - Storage Path: commands, latency, # reads/writes - System Resources: cpu active, cpu max, cpu running, cpu usage, mem allocated, mem consumed, mem shared, swap - System: uptime - - Flash Module: active VMDKs + - Flash Module: active VMDKs - VM Stats: - CPU: demand, usage, readiness, cost, mhz - Datastore: latency, # reads/writes @@ -352,12 +360,12 @@ While a higher level of concurrency typically has a positive impact on performan - Power: energy, usage - Res CPU: active, max, running - System: operating system uptime, uptime - - Virtual Disk: seeks, # reads/writes, latency, load + - Virtual Disk: seeks, # reads/writes, latency, load - Datastore stats: - - Disk: Capacity, provisioned, used + - Disk: Capacity, provisioned, used For a detailed list of commonly available metrics, please refer to [METRICS.md](METRICS.md) - + ## Tags - all metrics @@ -380,7 +388,7 @@ For a detailed list of commonly available metrics, please refer to [METRICS.md]( - interface (name of network interface) - storageAdapter stats for Host - adapter (name of storage adapter) -- storagePath stats for Host +- storagePath stats for Host - path (id of storage path) - sys.resource* stats for Host - resource (resource type)