forked from cerndb/SparkPlugins
-
Notifications
You must be signed in to change notification settings - Fork 0
/
HDFSMetrics.scala
69 lines (58 loc) · 2.41 KB
/
HDFSMetrics.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
package ch.cern
import java.util.{Map => JMap}
import scala.collection.JavaConverters._
import com.codahale.metrics.{Gauge, MetricRegistry}
import org.apache.spark.api.plugin.{DriverPlugin, ExecutorPlugin, PluginContext, SparkPlugin}
import org.apache.spark.SparkContext
import org.apache.hadoop.fs.FileSystem.getGlobalStorageStatistics
/**
* Monitor HDFS I/0 metrics using Hadoop's API FileSystem.getGlobalStorageStatistics
* FileSystem.getGlobalStorageStatistics has been introduced in Hadoop (client) version 2.8
*
* Note: use with Spark 3.x built with Hadoop 3.2 profile (or higher, when available)
* or use it with Spark built without Hadoop and use your own Hadoop client version 2.8 or higher.
*/
class HDFSMetrics extends SparkPlugin {
val fsName = "hdfs"
val fsMetrics = Seq("bytesRead", "bytesWritten", "readOps", "writeOps", "largeReadOps",
"bytesReadLocalHost", "bytesReadDistanceOfOneOrTwo", "bytesReadDistanceOfThreeOrFour",
"bytesReadDistanceOfFiveOrLarger", "bytesReadErasureCoded")
// Note: getGlobalStorageStatistics.get("fsName") will return null till the first use of "fsName"
// hdfsMetrics registers the metrics and their getValue method
def hdfsMetrics(myContext: PluginContext): Unit= {
val metricRegistry = myContext.metricRegistry
fsMetrics.foreach ( name =>
metricRegistry.register(MetricRegistry.name(name), new Gauge[Long] {
override def getValue: Long = {
val fsStats = getGlobalStorageStatistics.get(fsName)
fsStats match {
case null => 0L
case _ => fsStats.getLong(name)
}
}
})
)
}
// Return the plugin's driver-side component.
// register metrics conditional to --conf spark.cernSparkPlugin.registerOnDriver=true
override def driverPlugin(): DriverPlugin = {
new DriverPlugin() {
override def init(sc: SparkContext, myContext: PluginContext): JMap[String, String] = {
val registerOnDriver =
myContext.conf.getBoolean("spark.cernSparkPlugin.registerOnDriver", true)
if (registerOnDriver) {
hdfsMetrics(myContext)
}
Map.empty[String, String].asJava
}
}
}
// Return the plugin's executor-side component.
override def executorPlugin(): ExecutorPlugin = {
new ExecutorPlugin() {
override def init(myContext:PluginContext, extraConf:JMap[String, String]) = {
hdfsMetrics(myContext)
}
}
}
}