Skip to content

Commit 653b31e

Browse files
committed
[SPARK-45771][CORE] Enable spark.eventLog.rolling.enabled by default
### What changes were proposed in this pull request? This PR aims to enable `spark.eventLog.rolling.enabled` by default for Apache Spark 4.0.0. ### Why are the changes needed? Since Apache Spark 3.0.0, we have been using event log rolling not only for **long-running jobs**, but also for **some failed jobs** to archive the partial event logs incrementally. - #25670 ### Does this PR introduce _any_ user-facing change? - No because `spark.eventLog.enabled` is disabled by default. - For the users with `spark.eventLog.enabled=true`, yes, `spark-events` directory will have different layouts. However, all 3.3+ `Spark History Server` can read both old and new event logs. I believe that the event log users are already using this configuration to avoid the loss of event logs for long-running jobs and some failed jobs. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43638 from dongjoon-hyun/SPARK-45771. Authored-by: Dongjoon Hyun <dhyun@apple.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
1 parent 5970d35 commit 653b31e

File tree

6 files changed

+8
-3
lines changed

6 files changed

+8
-3
lines changed

core/src/main/scala/org/apache/spark/internal/config/package.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ package object config {
238238
"each event log file to the configured size.")
239239
.version("3.0.0")
240240
.booleanConf
241-
.createWithDefault(false)
241+
.createWithDefault(true)
242242

243243
private[spark] val EVENT_LOG_ROLLING_MAX_FILE_SIZE =
244244
ConfigBuilder("spark.eventLog.rolling.maxFileSize")

core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,7 @@ class SparkSubmitSuite
731731
"--conf", "spark.master.rest.enabled=false",
732732
"--conf", "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD=secret_password",
733733
"--conf", "spark.eventLog.enabled=true",
734+
"--conf", "spark.eventLog.rolling.enabled=false",
734735
"--conf", "spark.eventLog.testing=true",
735736
"--conf", s"spark.eventLog.dir=${testDirPath.toUri.toString}",
736737
"--conf", "spark.hadoop.fs.defaultFS=unsupported://example.com",

core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ abstract class EventLogFileWritersSuite extends SparkFunSuite with LocalSparkCon
6666
conf.set(EVENT_LOG_DIR, testDir.toString)
6767

6868
// default config
69-
buildWriterAndVerify(conf, classOf[SingleEventLogFileWriter])
69+
buildWriterAndVerify(conf, classOf[RollingEventLogFilesWriter])
7070

7171
conf.set(EVENT_LOG_ENABLE_ROLLING, true)
7272
buildWriterAndVerify(conf, classOf[RollingEventLogFilesWriter])

core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ object EventLogTestHelper {
3838
def getLoggingConf(logDir: Path, compressionCodec: Option[String] = None): SparkConf = {
3939
val conf = new SparkConf
4040
conf.set(EVENT_LOG_ENABLED, true)
41+
conf.set(EVENT_LOG_ENABLE_ROLLING, false)
4142
conf.set(EVENT_LOG_BLOCK_UPDATES, true)
4243
conf.set(EVENT_LOG_TESTING, true)
4344
conf.set(EVENT_LOG_DIR, logDir.toString)

core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ import org.apache.spark.deploy.SparkHadoopUtil
3333
import org.apache.spark.deploy.history.{EventLogFileReader, SingleEventLogFileWriter}
3434
import org.apache.spark.deploy.history.EventLogTestHelper._
3535
import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
36-
import org.apache.spark.internal.config.{EVENT_LOG_COMPRESS, EVENT_LOG_DIR, EVENT_LOG_ENABLED}
36+
import org.apache.spark.internal.config.{EVENT_LOG_COMPRESS, EVENT_LOG_DIR, EVENT_LOG_ENABLE_ROLLING, EVENT_LOG_ENABLED}
3737
import org.apache.spark.io._
3838
import org.apache.spark.metrics.{ExecutorMetricType, MetricsSystem}
3939
import org.apache.spark.resource.ResourceProfile
@@ -163,6 +163,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
163163
test("SPARK-31764: isBarrier should be logged in event log") {
164164
val conf = new SparkConf()
165165
conf.set(EVENT_LOG_ENABLED, true)
166+
conf.set(EVENT_LOG_ENABLE_ROLLING, false)
166167
conf.set(EVENT_LOG_COMPRESS, false)
167168
conf.set(EVENT_LOG_DIR, testDirPath.toString)
168169
val sc = new SparkContext("local", "test-SPARK-31764", conf)

docs/core-migration-guide.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ license: |
2424

2525
## Upgrading from Core 3.5 to 4.0
2626

27+
- Since Spark 4.0, Spark will roll event logs to archive them incrementally. To restore the behavior before Spark 4.0, you can set `spark.eventLog.rolling.enabled` to `false`.
28+
2729
- Since Spark 4.0, Spark will compress event logs. To restore the behavior before Spark 4.0, you can set `spark.eventLog.compress` to `false`.
2830

2931
- Since Spark 4.0, `spark.shuffle.service.db.backend` is set to `ROCKSDB` by default which means Spark will use RocksDB store for shuffle service. To restore the behavior before Spark 4.0, you can set `spark.shuffle.service.db.backend` to `LEVELDB`.

0 commit comments

Comments
 (0)