@@ -38,10 +38,13 @@ package object config {
38
38
ConfigBuilder (" spark.driver.userClassPathFirst" ).booleanConf.createWithDefault(false )
39
39
40
40
private [spark] val DRIVER_MEMORY = ConfigBuilder (" spark.driver.memory" )
41
+ .doc(" Amount of memory to use for the driver process, in MiB unless otherwise specified." )
41
42
.bytesConf(ByteUnit .MiB )
42
43
.createWithDefaultString(" 1g" )
43
44
44
45
private [spark] val DRIVER_MEMORY_OVERHEAD = ConfigBuilder (" spark.driver.memoryOverhead" )
46
+ .doc(" The amount of off-heap memory to be allocated per driver in cluster mode, " +
47
+ " in MiB unless otherwise specified." )
45
48
.bytesConf(ByteUnit .MiB )
46
49
.createOptional
47
50
@@ -62,6 +65,7 @@ package object config {
62
65
.createWithDefault(false )
63
66
64
67
private [spark] val EVENT_LOG_OUTPUT_BUFFER_SIZE = ConfigBuilder (" spark.eventLog.buffer.kb" )
68
+ .doc(" Buffer size to use when writing to output streams, in KiB unless otherwise specified." )
65
69
.bytesConf(ByteUnit .KiB )
66
70
.createWithDefaultString(" 100k" )
67
71
@@ -81,10 +85,13 @@ package object config {
81
85
ConfigBuilder (" spark.executor.userClassPathFirst" ).booleanConf.createWithDefault(false )
82
86
83
87
private [spark] val EXECUTOR_MEMORY = ConfigBuilder (" spark.executor.memory" )
88
+ .doc(" Amount of memory to use per executor process, in MiB unless otherwise specified." )
84
89
.bytesConf(ByteUnit .MiB )
85
90
.createWithDefaultString(" 1g" )
86
91
87
92
private [spark] val EXECUTOR_MEMORY_OVERHEAD = ConfigBuilder (" spark.executor.memoryOverhead" )
93
+ .doc(" The amount of off-heap memory to be allocated per executor in cluster mode, " +
94
+ " in MiB unless otherwise specified." )
88
95
.bytesConf(ByteUnit .MiB )
89
96
.createOptional
90
97
@@ -353,7 +360,7 @@ package object config {
353
360
private [spark] val BUFFER_WRITE_CHUNK_SIZE =
354
361
ConfigBuilder (" spark.buffer.write.chunkSize" )
355
362
.internal()
356
- .doc(" The chunk size during writing out the bytes of ChunkedByteBuffer." )
363
+ .doc(" The chunk size in bytes during writing out the bytes of ChunkedByteBuffer." )
357
364
.bytesConf(ByteUnit .BYTE )
358
365
.checkValue(_ <= Int .MaxValue , " The chunk size during writing out the bytes of" +
359
366
" ChunkedByteBuffer should not larger than Int.MaxValue." )
@@ -368,9 +375,9 @@ package object config {
368
375
369
376
private [spark] val SHUFFLE_ACCURATE_BLOCK_THRESHOLD =
370
377
ConfigBuilder (" spark.shuffle.accurateBlockThreshold" )
371
- .doc(" When we compress the size of shuffle blocks in HighlyCompressedMapStatus, we will " +
372
- " record the size accurately if it's above this config . This helps to prevent OOM by " +
373
- " avoiding underestimating shuffle block size when fetch shuffle blocks." )
378
+ .doc(" Threshold in bytes above which the size of shuffle blocks in " +
379
+ " HighlyCompressedMapStatus is accurately recorded . This helps to prevent OOM " +
380
+ " by avoiding underestimating shuffle block size when fetch shuffle blocks." )
374
381
.bytesConf(ByteUnit .BYTE )
375
382
.createWithDefault(100 * 1024 * 1024 )
376
383
@@ -389,23 +396,23 @@ package object config {
389
396
390
397
private [spark] val REDUCER_MAX_BLOCKS_IN_FLIGHT_PER_ADDRESS =
391
398
ConfigBuilder (" spark.reducer.maxBlocksInFlightPerAddress" )
392
- .doc(" This configuration limits the number of remote blocks being fetched per reduce task" +
393
- " from a given host port. When a large number of blocks are being requested from a given" +
394
- " address in a single fetch or simultaneously, this could crash the serving executor or" +
395
- " Node Manager. This is especially useful to reduce the load on the Node Manager when" +
396
- " external shuffle is enabled. You can mitigate the issue by setting it to a lower value." )
399
+ .doc(" This configuration limits the number of remote blocks being fetched per reduce task " +
400
+ " from a given host port. When a large number of blocks are being requested from a given " +
401
+ " address in a single fetch or simultaneously, this could crash the serving executor or " +
402
+ " Node Manager. This is especially useful to reduce the load on the Node Manager when " +
403
+ " external shuffle is enabled. You can mitigate the issue by setting it to a lower value." )
397
404
.intConf
398
405
.checkValue(_ > 0 , " The max no. of blocks in flight cannot be non-positive." )
399
406
.createWithDefault(Int .MaxValue )
400
407
401
408
private [spark] val MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM =
402
409
ConfigBuilder (" spark.maxRemoteBlockSizeFetchToMem" )
403
- .doc(" Remote block will be fetched to disk when size of the block is " +
404
- " above this threshold . This is to avoid a giant request takes too much memory. We can " +
405
- " enable this config by setting a specific value(e.g. 200m). Note this configuration will " +
406
- " affect both shuffle fetch and block manager remote block fetch. For users who " +
407
- " enabled external shuffle service, this feature can only be worked when external shuffle" +
408
- " service is newer than Spark 2.2." )
410
+ .doc(" Remote block will be fetched to disk when size of the block is above this threshold " +
411
+ " in bytes . This is to avoid a giant request takes too much memory. We can enable this " +
412
+ " config by setting a specific value(e.g. 200m). Note this configuration will affect " +
413
+ " both shuffle fetch and block manager remote block fetch. For users who enabled " +
414
+ " external shuffle service, this feature can only be worked when external shuffle" +
415
+ " service is newer than Spark 2.2." )
409
416
.bytesConf(ByteUnit .BYTE )
410
417
.createWithDefault(Long .MaxValue )
411
418
@@ -419,9 +426,9 @@ package object config {
419
426
420
427
private [spark] val SHUFFLE_FILE_BUFFER_SIZE =
421
428
ConfigBuilder (" spark.shuffle.file.buffer" )
422
- .doc(" Size of the in-memory buffer for each shuffle file output stream. " +
423
- " These buffers reduce the number of disk seeks and system calls made " +
424
- " in creating intermediate shuffle files." )
429
+ .doc(" Size of the in-memory buffer for each shuffle file output stream, in KiB unless " +
430
+ " otherwise specified. These buffers reduce the number of disk seeks and system calls " +
431
+ " made in creating intermediate shuffle files." )
425
432
.bytesConf(ByteUnit .KiB )
426
433
.checkValue(v => v > 0 && v <= Int .MaxValue / 1024 ,
427
434
s " The file buffer size must be greater than 0 and less than ${Int .MaxValue / 1024 }. " )
@@ -430,15 +437,15 @@ package object config {
430
437
private [spark] val SHUFFLE_UNSAFE_FILE_OUTPUT_BUFFER_SIZE =
431
438
ConfigBuilder (" spark.shuffle.unsafe.file.output.buffer" )
432
439
.doc(" The file system for this buffer size after each partition " +
433
- " is written in unsafe shuffle writer." )
440
+ " is written in unsafe shuffle writer. In KiB unless otherwise specified. " )
434
441
.bytesConf(ByteUnit .KiB )
435
442
.checkValue(v => v > 0 && v <= Int .MaxValue / 1024 ,
436
443
s " The buffer size must be greater than 0 and less than ${Int .MaxValue / 1024 }. " )
437
444
.createWithDefaultString(" 32k" )
438
445
439
446
private [spark] val SHUFFLE_DISK_WRITE_BUFFER_SIZE =
440
447
ConfigBuilder (" spark.shuffle.spill.diskWriteBufferSize" )
441
- .doc(" The buffer size to use when writing the sorted records to an on-disk file." )
448
+ .doc(" The buffer size, in bytes, to use when writing the sorted records to an on-disk file." )
442
449
.bytesConf(ByteUnit .BYTE )
443
450
.checkValue(v => v > 0 && v <= Int .MaxValue ,
444
451
s " The buffer size must be greater than 0 and less than ${Int .MaxValue }. " )
0 commit comments