apache
diff --git a/‎core/src/main/scala/org/apache/spark/CacheManager.scala‎
Lines changed: 5 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/CacheManager.scala‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala‎
Lines changed: 51 additions & 17 deletions b/‎core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala‎
Lines changed: 51 additions & 17 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/executor/Executor.scala‎
Lines changed: 17 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/executor/Executor.scala‎
Lines changed: 17 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/network/BlockDataManager.scala‎
Lines changed: 9 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/network/BlockDataManager.scala‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala‎
Lines changed: 5 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/Task.scala‎
Lines changed: 1 addition & 0 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/Task.scala‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/storage/BlockInfo.scala‎
Lines changed: 0 additions & 83 deletions b/‎core/src/main/scala/org/apache/spark/storage/BlockInfo.scala‎
Lines changed: 0 additions & 83 deletions
@@ -21,6 +21,7 @@ import scala.collection.mutable
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage._
+import org.apache.spark.util.CompletionIterator
 
 /**
  * Spark class responsible for passing RDDs partition contents to the BlockManager and making
@@ -47,6 +48,7 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
         existingMetrics.incBytesReadInternal(blockResult.bytes)
 
         val iter = blockResult.data.asInstanceOf[Iterator[T]]
+
         new InterruptibleIterator[T](context, iter) {
           override def next(): T = {
             existingMetrics.incRecordsReadInternal(1)
@@ -156,7 +158,9 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
         case Left(arr) =>
           // We have successfully unrolled the entire partition, so cache it in memory
           blockManager.putArray(key, arr, level, tellMaster = true, effectiveStorageLevel)
-          arr.iterator.asInstanceOf[Iterator[T]]
+          CompletionIterator[T, Iterator[T]](
+            arr.iterator.asInstanceOf[Iterator[T]],
+            blockManager.releaseLock(key))
         case Right(it) =>
           // There is not enough space to cache this partition in memory
           val returnValues = it.asInstanceOf[Iterator[T]]
 
@@ -24,10 +24,10 @@ import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 import scala.util.Random
 
-import org.apache.spark.{Logging, SparkConf, SparkEnv, SparkException}
+import org.apache.spark._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.serializer.Serializer
-import org.apache.spark.storage.{BroadcastBlockId, StorageLevel}
+import org.apache.spark.storage.{BlockId, BroadcastBlockId, StorageLevel}
 import org.apache.spark.util.{ByteBufferInputStream, Utils}
 import org.apache.spark.util.io.ByteArrayChunkOutputStream
 
@@ -90,22 +90,29 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
 
   /**
    * Divide the object into multiple blocks and put those blocks in the block manager.
+   *
    * @param value the object to divide
    * @return number of blocks this broadcast variable is divided into
    */
   private def writeBlocks(value: T): Int = {
+    import StorageLevel._
     // Store a copy of the broadcast variable in the driver so that tasks run on the driver
     // do not create a duplicate copy of the broadcast variable's value.
-    SparkEnv.get.blockManager.putSingle(broadcastId, value, StorageLevel.MEMORY_AND_DISK,
-      tellMaster = false)
+    val blockManager = SparkEnv.get.blockManager
+    if (blockManager.putSingle(broadcastId, value, MEMORY_AND_DISK, tellMaster = false)) {
+      blockManager.releaseLock(broadcastId)
+    } else {
+      throw new SparkException(s"Failed to store $broadcastId in BlockManager")
+    }
     val blocks =
       TorrentBroadcast.blockifyObject(value, blockSize, SparkEnv.get.serializer, compressionCodec)
     blocks.zipWithIndex.foreach { case (block, i) =>
-      SparkEnv.get.blockManager.putBytes(
-        BroadcastBlockId(id, "piece" + i),
-        block,
-        StorageLevel.MEMORY_AND_DISK_SER,
-        tellMaster = true)
+      val pieceId = BroadcastBlockId(id, "piece" + i)
+      if (blockManager.putBytes(pieceId, block, MEMORY_AND_DISK_SER, tellMaster = true)) {
+        blockManager.releaseLock(pieceId)
+      } else {
+        throw new SparkException(s"Failed to store $pieceId of $broadcastId in local BlockManager")
+      }
     }
     blocks.length
   }
@@ -127,16 +134,18 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
       def getRemote: Option[ByteBuffer] = bm.getRemoteBytes(pieceId).map { block =>
         // If we found the block from remote executors/driver's BlockManager, put the block
         // in this executor's BlockManager.
-        SparkEnv.get.blockManager.putBytes(
-          pieceId,
-          block,
-          StorageLevel.MEMORY_AND_DISK_SER,
-          tellMaster = true)
+        if (!bm.putBytes(pieceId, block, StorageLevel.MEMORY_AND_DISK_SER, tellMaster = true)) {
+          throw new SparkException(
+            s"Failed to store $pieceId of $broadcastId in local BlockManager")
+        }
         block
       }
       val block: ByteBuffer = getLocal.orElse(getRemote).getOrElse(
         throw new SparkException(s"Failed to get $pieceId of $broadcastId"))
+      // At this point we are guaranteed to hold a read lock, since we either got the block locally
+      // or stored the remotely-fetched block and automatically downgraded the write lock.
       blocks(pid) = block
+      releaseLock(pieceId)
     }
     blocks
   }
@@ -165,8 +174,10 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   private def readBroadcastBlock(): T = Utils.tryOrIOException {
     TorrentBroadcast.synchronized {
       setConf(SparkEnv.get.conf)
-      SparkEnv.get.blockManager.getLocal(broadcastId).map(_.data.next()) match {
+      val blockManager = SparkEnv.get.blockManager
+      blockManager.getLocal(broadcastId).map(_.data.next()) match {
         case Some(x) =>
+          releaseLock(broadcastId)
           x.asInstanceOf[T]
 
         case None =>
@@ -179,13 +190,36 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
             blocks, SparkEnv.get.serializer, compressionCodec)
           // Store the merged copy in BlockManager so other tasks on this executor don't
           // need to re-fetch it.
-          SparkEnv.get.blockManager.putSingle(
-            broadcastId, obj, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
+          val storageLevel = StorageLevel.MEMORY_AND_DISK
+          if (blockManager.putSingle(broadcastId, obj, storageLevel, tellMaster = false)) {
+            releaseLock(broadcastId)
+          } else {
+            throw new SparkException(s"Failed to store $broadcastId in BlockManager")
+          }
           obj
       }
     }
   }
 
+  /**
+   * If running in a task, register the given block's locks for release upon task completion.
+   * Otherwise, if not running in a task then immediately release the lock.
+   */
+  private def releaseLock(blockId: BlockId): Unit = {
+    val blockManager = SparkEnv.get.blockManager
+    Option(TaskContext.get()) match {
+      case Some(taskContext) =>
+        taskContext.addTaskCompletionListener(_ => blockManager.releaseLock(blockId))
+      case None =>
+        // This should only happen on the driver, where broadcast variables may be accessed
+        // outside of running tasks (e.g. when computing rdd.partitions()). In order to allow
+        // broadcast variables to be garbage collected we need to free the reference here
+        // which is slightly unsafe but is technically okay because broadcast variables aren't
+        // stored off-heap.
+        blockManager.releaseLock(blockId)
+    }
+  }
+
 }
 
 
 
@@ -218,7 +218,9 @@ private[spark] class Executor(
           threwException = false
           res
         } finally {
+          val releasedLocks = env.blockManager.releaseAllLocksForTask(taskId)
           val freedMemory = taskMemoryManager.cleanUpAllAllocatedMemory()
+
           if (freedMemory > 0) {
             val errMsg = s"Managed memory leak detected; size = $freedMemory bytes, TID = $taskId"
             if (conf.getBoolean("spark.unsafe.exceptionOnMemoryLeak", false) && !threwException) {
@@ -227,6 +229,17 @@ private[spark] class Executor(
               logError(errMsg)
             }
           }
+
+          if (releasedLocks.nonEmpty) {
+            val errMsg =
+              s"${releasedLocks.size} block locks were not released by TID = $taskId:\n" +
+                releasedLocks.mkString("[", ", ", "]")
+            if (conf.getBoolean("spark.storage.exceptionOnPinLeak", false) && !threwException) {
+              throw new SparkException(errMsg)
+            } else {
+              logError(errMsg)
+            }
+          }
         }
         val taskFinish = System.currentTimeMillis()
 
@@ -266,8 +279,11 @@ private[spark] class Executor(
             ser.serialize(new IndirectTaskResult[Any](TaskResultBlockId(taskId), resultSize))
           } else if (resultSize >= maxRpcMessageSize) {
             val blockId = TaskResultBlockId(taskId)
-            env.blockManager.putBytes(
+            val putSucceeded = env.blockManager.putBytes(
               blockId, serializedDirectResult, StorageLevel.MEMORY_AND_DISK_SER)
+            if (putSucceeded) {
+              env.blockManager.releaseLock(blockId)
+            }
             logInfo(
               s"Finished $taskName (TID $taskId). $resultSize bytes result sent via BlockManager)")
             ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))
 
@@ -31,6 +31,14 @@ trait BlockDataManager {
 
   /**
    * Put the block locally, using the given storage level.
+   *
+   * Returns true if the block was stored and false if the put operation failed or the block
+   * already existed.
    */
-  def putBlockData(blockId: BlockId, data: ManagedBuffer, level: StorageLevel): Unit
+  def putBlockData(blockId: BlockId, data: ManagedBuffer, level: StorageLevel): Boolean
+
+  /**
+   * Release locks acquired by [[putBlockData()]] and [[getBlockData()]].
+   */
+  def releaseLock(blockId: BlockId): Unit
 }
@@ -65,7 +65,11 @@ class NettyBlockRpcServer(
         val level: StorageLevel =
           serializer.newInstance().deserialize(ByteBuffer.wrap(uploadBlock.metadata))
         val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData))
-        blockManager.putBlockData(BlockId(uploadBlock.blockId), data, level)
+        val blockId = BlockId(uploadBlock.blockId)
+        val putSucceeded = blockManager.putBlockData(blockId, data, level)
+        if (putSucceeded) {
+          blockManager.releaseLock(blockId)
+        }
         responseContext.onSuccess(ByteBuffer.allocate(0))
     }
   }
 
@@ -64,6 +64,7 @@ private[spark] abstract class Task[T](
       taskAttemptId: Long,
       attemptNumber: Int,
       metricsSystem: MetricsSystem): T = {
+    SparkEnv.get.blockManager.registerTask(taskAttemptId)
     context = new TaskContextImpl(
       stageId,
       partitionId,
Original file line number	Diff line number	Diff line change
`@@ -65,7 +65,11 @@ class NettyBlockRpcServer(`
`65`	`65`	`val level: StorageLevel =`
`66`	`66`	`serializer.newInstance().deserialize(ByteBuffer.wrap(uploadBlock.metadata))`
`67`	`67`	`val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData))`
`68`		`- blockManager.putBlockData(BlockId(uploadBlock.blockId), data, level)`
	`68`	`+ val blockId = BlockId(uploadBlock.blockId)`
	`69`	`+ val putSucceeded = blockManager.putBlockData(blockId, data, level)`
	`70`	`+ if (putSucceeded) {`
	`71`	`+ blockManager.releaseLock(blockId)`
	`72`	`+ }`
`69`	`73`	`responseContext.onSuccess(ByteBuffer.allocate(0))`
`70`	`74`	`}`
`71`	`75`	`}`