Skip to content

Commit

Permalink
Add version for config of History
Browse files Browse the repository at this point in the history
  • Loading branch information
beliefer committed Mar 1, 2020
1 parent b2a7410 commit 68a0aa1
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 1 deletion.
28 changes: 28 additions & 0 deletions core/src/main/scala/org/apache/spark/internal/config/History.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,53 +26,64 @@ private[spark] object History {
val DEFAULT_LOG_DIR = "file:/tmp/spark-events"

val HISTORY_LOG_DIR = ConfigBuilder("spark.history.fs.logDirectory")
.version("1.1.0")
.stringConf
.createWithDefault(DEFAULT_LOG_DIR)

val SAFEMODE_CHECK_INTERVAL_S = ConfigBuilder("spark.history.fs.safemodeCheck.interval")
.version("1.6.0")
.timeConf(TimeUnit.SECONDS)
.createWithDefaultString("5s")

val UPDATE_INTERVAL_S = ConfigBuilder("spark.history.fs.update.interval")
.version("1.4.0")
.timeConf(TimeUnit.SECONDS)
.createWithDefaultString("10s")

val CLEANER_ENABLED = ConfigBuilder("spark.history.fs.cleaner.enabled")
.version("1.3.0")
.booleanConf
.createWithDefault(false)

val CLEANER_INTERVAL_S = ConfigBuilder("spark.history.fs.cleaner.interval")
.version("1.4.0")
.timeConf(TimeUnit.SECONDS)
.createWithDefaultString("1d")

val MAX_LOG_AGE_S = ConfigBuilder("spark.history.fs.cleaner.maxAge")
.version("1.4.0")
.timeConf(TimeUnit.SECONDS)
.createWithDefaultString("7d")

val MAX_LOG_NUM = ConfigBuilder("spark.history.fs.cleaner.maxNum")
.doc("The maximum number of log files in the event log directory.")
.version("3.0.0")
.intConf
.createWithDefault(Int.MaxValue)

val LOCAL_STORE_DIR = ConfigBuilder("spark.history.store.path")
.doc("Local directory where to cache application history information. By default this is " +
"not set, meaning all history information will be kept in memory.")
.version("2.3.0")
.stringConf
.createOptional

val MAX_LOCAL_DISK_USAGE = ConfigBuilder("spark.history.store.maxDiskUsage")
.version("2.3.0")
.bytesConf(ByteUnit.BYTE)
.createWithDefaultString("10g")

val HISTORY_SERVER_UI_PORT = ConfigBuilder("spark.history.ui.port")
.doc("Web UI port to bind Spark History Server")
.version("1.0.0")
.intConf
.createWithDefault(18080)

val FAST_IN_PROGRESS_PARSING =
ConfigBuilder("spark.history.fs.inProgressOptimization.enabled")
.doc("Enable optimized handling of in-progress logs. This option may leave finished " +
"applications that fail to rename their event logs listed as in-progress.")
.version("2.4.0")
.booleanConf
.createWithDefault(true)

Expand All @@ -81,6 +92,7 @@ private[spark] object History {
.doc("How many bytes to parse at the end of log files looking for the end event. " +
"This is used to speed up generation of application listings by skipping unnecessary " +
"parts of event log files. It can be disabled by setting this config to 0.")
.version("2.4.0")
.bytesConf(ByteUnit.BYTE)
.createWithDefaultString("1m")

Expand All @@ -90,6 +102,7 @@ private[spark] object History {
"By default, all event log files will be retained. Please set the configuration " +
s"and ${EVENT_LOG_ROLLING_MAX_FILE_SIZE.key} accordingly if you want to control " +
"the overall size of event log files.")
.version("3.0.0")
.intConf
.checkValue(_ > 0, "Max event log files to retain should be higher than 0.")
.createWithDefault(Integer.MAX_VALUE)
Expand All @@ -99,54 +112,67 @@ private[spark] object History {
.doc("The threshold score to determine whether it's good to do the compaction or not. " +
"The compaction score is calculated in analyzing, and being compared to this value. " +
"Compaction will proceed only when the score is higher than the threshold value.")
.version("3.0.0")
.internal()
.doubleConf
.createWithDefault(0.7d)

val DRIVER_LOG_CLEANER_ENABLED = ConfigBuilder("spark.history.fs.driverlog.cleaner.enabled")
.version("3.0.0")
.fallbackConf(CLEANER_ENABLED)

val DRIVER_LOG_CLEANER_INTERVAL = ConfigBuilder("spark.history.fs.driverlog.cleaner.interval")
.version("3.0.0")
.fallbackConf(CLEANER_INTERVAL_S)

val MAX_DRIVER_LOG_AGE_S = ConfigBuilder("spark.history.fs.driverlog.cleaner.maxAge")
.version("3.0.0")
.fallbackConf(MAX_LOG_AGE_S)

val HISTORY_SERVER_UI_ACLS_ENABLE = ConfigBuilder("spark.history.ui.acls.enable")
.version("1.0.1")
.booleanConf
.createWithDefault(false)

val HISTORY_SERVER_UI_ADMIN_ACLS = ConfigBuilder("spark.history.ui.admin.acls")
.version("2.1.1")
.stringConf
.toSequence
.createWithDefault(Nil)

val HISTORY_SERVER_UI_ADMIN_ACLS_GROUPS = ConfigBuilder("spark.history.ui.admin.acls.groups")
.version("2.1.1")
.stringConf
.toSequence
.createWithDefault(Nil)

val NUM_REPLAY_THREADS = ConfigBuilder("spark.history.fs.numReplayThreads")
.version("2.0.0")
.intConf
.createWithDefaultFunction(() => Math.ceil(Runtime.getRuntime.availableProcessors() / 4f).toInt)

val RETAINED_APPLICATIONS = ConfigBuilder("spark.history.retainedApplications")
.version("1.0.0")
.intConf
.createWithDefault(50)

val PROVIDER = ConfigBuilder("spark.history.provider")
.version("1.1.0")
.stringConf
.createOptional

val KERBEROS_ENABLED = ConfigBuilder("spark.history.kerberos.enabled")
.version("1.0.1")
.booleanConf
.createWithDefault(false)

val KERBEROS_PRINCIPAL = ConfigBuilder("spark.history.kerberos.principal")
.version("1.0.1")
.stringConf
.createOptional

val KERBEROS_KEYTAB = ConfigBuilder("spark.history.kerberos.keytab")
.version("1.0.1")
.stringConf
.createOptional

Expand All @@ -156,6 +182,7 @@ private[spark] object History {
"some path variables via patterns which can vary on cluster manager. Please check the " +
"documentation for your cluster manager to see which patterns are supported, if any. " +
"This configuration has no effect on a live application, it only affects the history server.")
.version("3.0.0")
.stringConf
.createOptional

Expand All @@ -165,6 +192,7 @@ private[spark] object History {
s"${CUSTOM_EXECUTOR_LOG_URL.key}, to incomplete application as well. " +
"Even if this is true, this still only affects the behavior of the history server, " +
"not running spark applications.")
.version("3.0.0")
.booleanConf
.createWithDefault(true)
}
26 changes: 25 additions & 1 deletion docs/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,14 @@ Security options for the Spark History Server are covered more detail in the
[Security](security.html#web-ui) page.

<table class="table">
<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
<tr>
<td>spark.history.provider</td>
<td><code>org.apache.spark.deploy.history.FsHistoryProvider</code></td>
<td>Name of the class implementing the application history backend. Currently there is only
one implementation, provided by Spark, which looks for application logs stored in the
file system.</td>
<td>1.1.0</td>
</tr>
<tr>
<td>spark.history.fs.logDirectory</td>
Expand All @@ -160,6 +161,7 @@ Security options for the Spark History Server are covered more detail in the
an HDFS path <code>hdfs://namenode/shared/spark-logs</code>
or that of an alternative filesystem supported by the Hadoop APIs.
</td>
<td>1.1.0</td>
</tr>
<tr>
<td>spark.history.fs.update.interval</td>
Expand All @@ -171,6 +173,7 @@ Security options for the Spark History Server are covered more detail in the
As soon as an update has completed, listings of the completed and incomplete applications
will reflect the changes.
</td>
<td>1.4.0</td>
</tr>
<tr>
<td>spark.history.retainedApplications</td>
Expand All @@ -180,6 +183,7 @@ Security options for the Spark History Server are covered more detail in the
the oldest applications will be removed from the cache. If an application is not in the cache,
it will have to be loaded from disk if it is accessed from the UI.
</td>
<td>1.0.0</td>
</tr>
<tr>
<td>spark.history.ui.maxApplications</td>
Expand All @@ -188,13 +192,15 @@ Security options for the Spark History Server are covered more detail in the
The number of applications to display on the history summary page. Application UIs are still
available by accessing their URLs directly even if they are not displayed on the history summary page.
</td>
<td>2.0.1</td>
</tr>
<tr>
<td>spark.history.ui.port</td>
<td>18080</td>
<td>
The port to which the web interface of the history server binds.
</td>
<td>1.0.0</td>
</tr>
<tr>
<td>spark.history.kerberos.enabled</td>
Expand All @@ -203,27 +209,31 @@ Security options for the Spark History Server are covered more detail in the
Indicates whether the history server should use kerberos to login. This is required
if the history server is accessing HDFS files on a secure Hadoop cluster.
</td>
<td>1.0.1</td>
</tr>
<tr>
<td>spark.history.kerberos.principal</td>
<td>(none)</td>
<td>
When <code>spark.history.kerberos.enabled=true</code>, specifies kerberos principal name for the History Server.
</td>
<td>1.0.1</td>
</tr>
<tr>
<td>spark.history.kerberos.keytab</td>
<td>(none)</td>
<td>
When <code>spark.history.kerberos.enabled=true</code>, specifies location of the kerberos keytab file for the History Server.
</td>
<td>1.0.1</td>
</tr>
<tr>
<td>spark.history.fs.cleaner.enabled</td>
<td>false</td>
<td>
Specifies whether the History Server should periodically clean up event logs from storage.
</td>
<td>1.3.0</td>
</tr>
<tr>
<td>spark.history.fs.cleaner.interval</td>
Expand All @@ -236,13 +246,15 @@ Security options for the Spark History Server are covered more detail in the
<code>spark.history.fs.cleaner.maxNum</code>, Spark tries to clean up the completed attempts
from the applications based on the order of their oldest attempt time.
</td>
<td>1.4.0</td>
</tr>
<tr>
<td>spark.history.fs.cleaner.maxAge</td>
<td>7d</td>
<td>
When <code>spark.history.fs.cleaner.enabled=true</code>, job history files older than this will be deleted when the filesystem history cleaner runs.
</td>
<td>1.4.0</td>
</tr>
<tr>
<td>spark.history.fs.cleaner.maxNum</td>
Expand All @@ -253,6 +265,7 @@ Security options for the Spark History Server are covered more detail in the
This should be smaller than the underlying file system limit like
`dfs.namenode.fs-limits.max-directory-items` in HDFS.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td>spark.history.fs.endEventReparseChunkSize</td>
Expand All @@ -262,6 +275,7 @@ Security options for the Spark History Server are covered more detail in the
This is used to speed up generation of application listings by skipping unnecessary
parts of event log files. It can be disabled by setting this config to 0.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td>spark.history.fs.inProgressOptimization.enabled</td>
Expand All @@ -270,13 +284,15 @@ Security options for the Spark History Server are covered more detail in the
Enable optimized handling of in-progress logs. This option may leave finished
applications that fail to rename their event logs listed as in-progress.
</td>
<td>2.4.0</td>
</tr>
<tr>
<td>spark.history.fs.driverlog.cleaner.enabled</td>
<td><code>spark.history.fs.cleaner.enabled</code></td>
<td>
Specifies whether the History Server should periodically clean up driver logs from storage.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td>spark.history.fs.driverlog.cleaner.interval</td>
Expand All @@ -285,20 +301,23 @@ Security options for the Spark History Server are covered more detail in the
When <code>spark.history.fs.driverlog.cleaner.enabled=true</code>, specifies how often the filesystem driver log cleaner checks for files to delete.
Files are only deleted if they are older than <code>spark.history.fs.driverlog.cleaner.maxAge</code>
</td>
<td>3.0.0</td>
</tr>
<tr>
<td>spark.history.fs.driverlog.cleaner.maxAge</td>
<td><code>spark.history.fs.cleaner.maxAge</code></td>
<td>
When <code>spark.history.fs.driverlog.cleaner.enabled=true</code>, driver log files older than this will be deleted when the driver log cleaner runs.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td>spark.history.fs.numReplayThreads</td>
<td>25% of available cores</td>
<td>
Number of threads that will be used by history server to process event logs.
</td>
<td>2.0.0</td>
</tr>
<tr>
<td>spark.history.store.maxDiskUsage</td>
Expand All @@ -307,6 +326,7 @@ Security options for the Spark History Server are covered more detail in the
Maximum disk usage for the local directory where the cache application history information
are stored.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td>spark.history.store.path</td>
Expand All @@ -316,6 +336,7 @@ Security options for the Spark History Server are covered more detail in the
server will store application data on disk instead of keeping it in memory. The data
written to disk will be re-used in the event of a history server restart.
</td>
<td>2.3.0</td>
</tr>
<tr>
<td>spark.history.custom.executor.log.url</td>
Expand All @@ -329,6 +350,7 @@ Security options for the Spark History Server are covered more detail in the
<p/>
For now, only YARN mode supports this configuration
</td>
<td>3.0.0</td>
</tr>
<tr>
<td>spark.history.custom.executor.log.url.applyIncompleteApplication</td>
Expand All @@ -339,6 +361,7 @@ Security options for the Spark History Server are covered more detail in the
Please note that incomplete applications may include applications which didn't shutdown gracefully.
Even this is set to `true`, this configuration has no effect on a live application, it only affects the history server.
</td>
<td>3.0.0</td>
</tr>
<tr>
<td>spark.history.fs.eventLog.rolling.maxFilesToRetain</td>
Expand All @@ -348,6 +371,7 @@ Security options for the Spark History Server are covered more detail in the
all event log files will be retained. The lowest value is 1 for technical reason.<br/>
Please read the section of "Applying compaction of old event log files" for more details.
</td>
<td>3.0.0</td>
</tr>
</table>

Expand Down

0 comments on commit 68a0aa1

Please sign in to comment.