Skip to content

Commit 8237df8

Browse files
witgopwendell
authored andcommitted
Avoid Option while generating call site
This is an update on #180, which changes the solution from blacklisting "Option.scala" to avoiding the Option code path while generating the call path. Also includes a unit test to prevent this issue in the future, and some minor refactoring. Thanks @witgo for reporting this issue and working on the initial solution! Author: witgo <witgo@qq.com> Author: Aaron Davidson <aaron@databricks.com> Closes #222 from aarondav/180 and squashes the following commits: f74aad1 [Aaron Davidson] Avoid Option while generating call site & add unit tests d2b4980 [witgo] Modify the position of the filter 1bc22d7 [witgo] Fix Stage.name return "apply at Option.scala:120"
1 parent f8111ea commit 8237df8

File tree

4 files changed

+47
-12
lines changed

4 files changed

+47
-12
lines changed

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,8 @@ class SparkContext(
877877
* has overridden the call site, this will return the user's version.
878878
*/
879879
private[spark] def getCallSite(): String = {
880-
Option(getLocalProperty("externalCallSite")).getOrElse(Utils.formatCallSiteInfo())
880+
val defaultCallSite = Utils.getCallSiteInfo
881+
Option(getLocalProperty("externalCallSite")).getOrElse(defaultCallSite.toString)
881882
}
882883

883884
/**

core/src/main/scala/org/apache/spark/rdd/RDD.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,7 @@ abstract class RDD[T: ClassTag](
10411041

10421042
/** User code that created this RDD (e.g. `textFile`, `parallelize`). */
10431043
@transient private[spark] val creationSiteInfo = Utils.getCallSiteInfo
1044-
private[spark] def getCreationSite = Utils.formatCallSiteInfo(creationSiteInfo)
1044+
private[spark] def getCreationSite: String = creationSiteInfo.toString
10451045

10461046
private[spark] def elementClassTag: ClassTag[T] = classTag[T]
10471047

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -679,16 +679,22 @@ private[spark] object Utils extends Logging {
679679
private val SPARK_CLASS_REGEX = """^org\.apache\.spark(\.api\.java)?(\.util)?(\.rdd)?\.[A-Z]""".r
680680

681681
private[spark] class CallSiteInfo(val lastSparkMethod: String, val firstUserFile: String,
682-
val firstUserLine: Int, val firstUserClass: String)
682+
val firstUserLine: Int, val firstUserClass: String) {
683+
684+
/** Returns a printable version of the call site info suitable for logs. */
685+
override def toString = {
686+
"%s at %s:%s".format(lastSparkMethod, firstUserFile, firstUserLine)
687+
}
688+
}
683689

684690
/**
685691
* When called inside a class in the spark package, returns the name of the user code class
686692
* (outside the spark package) that called into Spark, as well as which Spark method they called.
687693
* This is used, for example, to tell users where in their code each RDD got created.
688694
*/
689695
def getCallSiteInfo: CallSiteInfo = {
690-
val trace = Thread.currentThread.getStackTrace().filter( el =>
691-
(!el.getMethodName.contains("getStackTrace")))
696+
val trace = Thread.currentThread.getStackTrace()
697+
.filterNot(_.getMethodName.contains("getStackTrace"))
692698

693699
// Keep crawling up the stack trace until we find the first function not inside of the spark
694700
// package. We track the last (shallowest) contiguous Spark method. This might be an RDD
@@ -721,12 +727,6 @@ private[spark] object Utils extends Logging {
721727
new CallSiteInfo(lastSparkMethod, firstUserFile, firstUserLine, firstUserClass)
722728
}
723729

724-
/** Returns a printable version of the call site info suitable for logs. */
725-
def formatCallSiteInfo(callSiteInfo: CallSiteInfo = Utils.getCallSiteInfo) = {
726-
"%s at %s:%s".format(callSiteInfo.lastSparkMethod, callSiteInfo.firstUserFile,
727-
callSiteInfo.firstUserLine)
728-
}
729-
730730
/** Return a string containing part of a file from byte 'start' to 'end'. */
731731
def offsetBytes(path: String, start: Long, end: Long): String = {
732732
val file = new File(path)

core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark
1919

20-
import org.scalatest.FunSuite
20+
import org.scalatest.{Assertions, FunSuite}
2121

2222
class SparkContextInfoSuite extends FunSuite with LocalSparkContext {
2323
test("getPersistentRDDs only returns RDDs that are marked as cached") {
@@ -56,4 +56,38 @@ class SparkContextInfoSuite extends FunSuite with LocalSparkContext {
5656
rdd.collect()
5757
assert(sc.getRDDStorageInfo.size === 1)
5858
}
59+
60+
test("call sites report correct locations") {
61+
sc = new SparkContext("local", "test")
62+
testPackage.runCallSiteTest(sc)
63+
}
64+
}
65+
66+
/** Call site must be outside of usual org.apache.spark packages (see Utils#SPARK_CLASS_REGEX). */
67+
package object testPackage extends Assertions {
68+
private val CALL_SITE_REGEX = "(.+) at (.+):([0-9]+)".r
69+
70+
def runCallSiteTest(sc: SparkContext) {
71+
val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2)
72+
val rddCreationSite = rdd.getCreationSite
73+
val curCallSite = sc.getCallSite() // note: 2 lines after definition of "rdd"
74+
75+
val rddCreationLine = rddCreationSite match {
76+
case CALL_SITE_REGEX(func, file, line) => {
77+
assert(func === "makeRDD")
78+
assert(file === "SparkContextInfoSuite.scala")
79+
line.toInt
80+
}
81+
case _ => fail("Did not match expected call site format")
82+
}
83+
84+
curCallSite match {
85+
case CALL_SITE_REGEX(func, file, line) => {
86+
assert(func === "getCallSite") // this is correct because we called it from outside of Spark
87+
assert(file === "SparkContextInfoSuite.scala")
88+
assert(line.toInt === rddCreationLine.toInt + 2)
89+
}
90+
case _ => fail("Did not match expected call site format")
91+
}
92+
}
5993
}

0 commit comments

Comments
 (0)