Skip to content

Commit ca3efd9

Browse files
committed
Add docs for shuffle manager properties, and allow short names for them
1 parent d0ae3c5 commit ca3efd9

File tree

2 files changed

+27
-4
lines changed

2 files changed

+27
-4
lines changed

core/src/main/scala/org/apache/spark/SparkEnv.scala

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,8 @@ object SparkEnv extends Logging {
158158

159159
// Create an instance of the class named by the given Java system property, or by
160160
// defaultClassName if the property is not set, and return it as a T
161-
def instantiateClass[T](propertyName: String, defaultClassName: String): T = {
162-
val name = conf.get(propertyName, defaultClassName)
161+
def instantiateClass[T](propertyName: String, defaultClassName: String = null): T = {
162+
val name = conf.get(propertyName, defaultClassName)
163163
val cls = Class.forName(name, true, Utils.getContextOrSparkClassLoader)
164164
// Look for a constructor taking a SparkConf and a boolean isDriver, then one taking just
165165
// SparkConf, then one taking no arguments
@@ -246,8 +246,13 @@ object SparkEnv extends Logging {
246246
"."
247247
}
248248

249-
val shuffleManager = instantiateClass[ShuffleManager](
250-
"spark.shuffle.manager", "org.apache.spark.shuffle.hash.HashShuffleManager")
249+
// Let the user specify short names for shuffle managers
250+
val shortShuffleMgrNames = Map(
251+
"HASH" -> "org.apache.spark.shuffle.hash.HashShuffleManager",
252+
"SORT" -> "org.apache.spark.shuffle.sort.SortShuffleManager")
253+
val shuffleMgrName = conf.get("spark.shuffle.manager", "HASH")
254+
val shuffleMgrClass = shortShuffleMgrNames.getOrElse(shuffleMgrName, shuffleMgrName)
255+
val shuffleManager = instantiateClass[ShuffleManager](shuffleMgrClass)
251256

252257
val shuffleMemoryManager = new ShuffleMemoryManager(conf)
253258

docs/configuration.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,24 @@ Apart from these, the following properties are also available, and may be useful
281281
overhead per reduce task, so keep it small unless you have a large amount of memory.
282282
</td>
283283
</tr>
284+
<tr>
285+
<td><code>spark.shuffle.manager</code></td>
286+
<td>HASH</td>
287+
<td>
288+
Implementation to use for shuffling data. A hash-based shuffle manager is the default, but
289+
starting in Spark 1.1 there is an experimental sort-based shuffle manager that is more
290+
memory-efficient in environments with small executors, such as YARN. To use that, change
291+
this value to <code>SORT</code>.
292+
</td>
293+
</tr>
294+
<tr>
295+
<td><code>spark.shuffle.sort.bypassMergeThreshold</code></td>
296+
<td>200</td>
297+
<td>
298+
(Advanced) In the sort-based shuffle manager, avoid merge-sorting data if there is no
299+
map-side aggregation and there are at most this many reduce partitions.
300+
</td>
301+
</tr>
284302
</table>
285303

286304
#### Spark UI

0 commit comments

Comments
 (0)