@@ -98,6 +98,26 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
98
98
}
99
99
}
100
100
101
+ /**
102
+ * Use `size` to test if there is enough space in MemoryStore. If so, create the ByteBuffer and
103
+ * put it into MemoryStore. Otherwise, the ByteBuffer won't be created.
104
+ *
105
+ * The caller should guarantee that `size` is correct.
106
+ */
107
+ def putBytes (blockId : BlockId , size : Long , _bytes : () => ByteBuffer ): PutResult = {
108
+ // Work on a duplicate - since the original input might be used elsewhere.
109
+ lazy val bytes = _bytes().duplicate().rewind().asInstanceOf [ByteBuffer ]
110
+ val putAttempt = tryToPut(blockId, () => bytes, size, deserialized = false )
111
+ val data =
112
+ if (putAttempt.success) {
113
+ assert(bytes.limit == size)
114
+ Right (bytes.duplicate())
115
+ } else {
116
+ null
117
+ }
118
+ PutResult (size, data, putAttempt.droppedBlocks)
119
+ }
120
+
101
121
override def putArray (
102
122
blockId : BlockId ,
103
123
values : Array [Any ],
@@ -312,11 +332,22 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
312
332
blockId.asRDDId.map(_.rddId)
313
333
}
314
334
335
+ private def tryToPut (
336
+ blockId : BlockId ,
337
+ value : Any ,
338
+ size : Long ,
339
+ deserialized : Boolean ): ResultWithDroppedBlocks = {
340
+ tryToPut(blockId, () => value, size, deserialized)
341
+ }
342
+
315
343
/**
316
344
* Try to put in a set of values, if we can free up enough space. The value should either be
317
345
* an Array if deserialized is true or a ByteBuffer otherwise. Its (possibly estimated) size
318
346
* must also be passed by the caller.
319
347
*
348
+ * `value` will be lazily created. If it cannot be put into MemoryStore or disk, `value` won't be
349
+ * created to avoid OOM since it may be a big ByteBuffer.
350
+ *
320
351
* Synchronize on `accountingLock` to ensure that all the put requests and its associated block
321
352
* dropping is done by only on thread at a time. Otherwise while one thread is dropping
322
353
* blocks to free memory for one block, another thread may use up the freed space for
@@ -326,7 +357,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
326
357
*/
327
358
private def tryToPut (
328
359
blockId : BlockId ,
329
- value : Any ,
360
+ value : () => Any ,
330
361
size : Long ,
331
362
deserialized : Boolean ): ResultWithDroppedBlocks = {
332
363
@@ -345,7 +376,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
345
376
droppedBlocks ++= freeSpaceResult.droppedBlocks
346
377
347
378
if (enoughFreeSpace) {
348
- val entry = new MemoryEntry (value, size, deserialized)
379
+ val entry = new MemoryEntry (value() , size, deserialized)
349
380
entries.synchronized {
350
381
entries.put(blockId, entry)
351
382
currentMemory += size
@@ -357,12 +388,12 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
357
388
} else {
358
389
// Tell the block manager that we couldn't put it in memory so that it can drop it to
359
390
// disk if the block allows disk storage.
360
- val data = if (deserialized) {
361
- Left (value.asInstanceOf [Array [Any ]])
391
+ lazy val data = if (deserialized) {
392
+ Left (value() .asInstanceOf [Array [Any ]])
362
393
} else {
363
- Right (value.asInstanceOf [ByteBuffer ].duplicate())
394
+ Right (value() .asInstanceOf [ByteBuffer ].duplicate())
364
395
}
365
- val droppedBlockStatus = blockManager.dropFromMemory(blockId, data)
396
+ val droppedBlockStatus = blockManager.dropFromMemory(blockId, () => data)
366
397
droppedBlockStatus.foreach { status => droppedBlocks += ((blockId, status)) }
367
398
}
368
399
// Release the unroll memory used because we no longer need the underlying Array
0 commit comments