Skip to content

Commit fa4234c

Browse files
committed
Address reviews
1 parent 1b27080 commit fa4234c

File tree

4 files changed

+49
-18
lines changed

4 files changed

+49
-18
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,15 +1214,11 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
12141214
/**
12151215
* Java bytecode statistics of a compiled class by Janino.
12161216
*/
1217-
case class ByteCodeStats(maxClassCodeSize: Int, maxMethodCodeSize: Int, maxConstPoolSize: Int)
1217+
case class ByteCodeStats(
1218+
maxClassCodeSize: Int, maxMethodCodeSize: Int, maxConstPoolSize: Int, numInnerClasses: Int)
12181219

12191220
object ByteCodeStats {
1220-
1221-
val unavailable = ByteCodeStats(-1, -1, -1)
1222-
1223-
def apply(codeStats: (Int, Int, Int)): ByteCodeStats = {
1224-
ByteCodeStats(codeStats._1, codeStats._2, codeStats._3)
1225-
}
1221+
val UNAVAILABLE = ByteCodeStats(-1, -1, -1, -1)
12261222
}
12271223

12281224
object CodeGenerator extends Logging {
@@ -1332,9 +1328,9 @@ object CodeGenerator extends Logging {
13321328
}
13331329

13341330
/**
1335-
* Returns the bytecode statistics (max class bytecode size, max method bytecode size, and
1336-
* max constant pool size) of generated classes by inspecting janino private fields.inspecting
1337-
* janino private fields. Also, this method updates the metrics information.
1331+
* Returns the bytecode statistics (max class bytecode size, max method bytecode size,
1332+
* max constant pool size, and # of inner classes) of generated classes
1333+
* by inspecting Janino classes. Also, this method updates the metrics information.
13381334
*/
13391335
private def updateAndGetCompilationStats(evaluator: ClassBodyEvaluator): ByteCodeStats = {
13401336
// First retrieve the generated classes.
@@ -1378,9 +1374,17 @@ object CodeGenerator extends Logging {
13781374
}
13791375
}
13801376

1381-
ByteCodeStats(codeStats.reduce[(Int, Int, Int)] { case (v1, v2) =>
1377+
// Computes the max values of the three metrics: class code size, method code size,
1378+
// and constant pool size.
1379+
val maxCodeStats = codeStats.reduce[(Int, Int, Int)] { case (v1, v2) =>
13821380
(Math.max(v1._1, v2._1), Math.max(v1._2, v2._2), Math.max(v1._3, v2._3))
1383-
})
1381+
}
1382+
ByteCodeStats(
1383+
maxClassCodeSize = maxCodeStats._1,
1384+
maxMethodCodeSize = maxCodeStats._2,
1385+
maxConstPoolSize = maxCodeStats._3,
1386+
// Minus 2 for `GeneratedClass` and an outer-most generated class
1387+
numInnerClasses = classes.size - 2)
13841388
}
13851389

13861390
/**

sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,10 @@ package object debug {
8383
val codegenSeq = codegenStringSeq(plan)
8484
append(s"Found ${codegenSeq.size} WholeStageCodegen subtrees.\n")
8585
for (((subtree, code, codeStats), i) <- codegenSeq.zipWithIndex) {
86-
val codeStatsStr = s"maxClassCodeSize:${codeStats.maxClassCodeSize} " +
87-
s"maxMethodCodeSize:${codeStats.maxMethodCodeSize} " +
88-
s"maxConstantPoolSize:${codeStats.maxConstPoolSize}"
86+
val codeStatsStr = s"maxClassCodeSize:${codeStats.maxClassCodeSize}; " +
87+
s"maxMethodCodeSize:${codeStats.maxMethodCodeSize}; " +
88+
s"maxConstantPoolSize:${codeStats.maxConstPoolSize}; " +
89+
s"numInnerClasses:${codeStats.numInnerClasses}"
8990
append(s"== Subtree ${i + 1} / ${codegenSeq.size} ($codeStatsStr) ==\n")
9091
append(subtree)
9192
append("\nGenerated code:\n")
@@ -113,7 +114,7 @@ package object debug {
113114
CodeGenerator.compile(source)._2
114115
} catch {
115116
case NonFatal(_) =>
116-
ByteCodeStats.unavailable
117+
ByteCodeStats.UNAVAILABLE
117118
}
118119
(subtree.toString, CodeFormatter.format(source), codeStats)
119120
}

sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,10 +213,10 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession {
213213

214214
ignore("SPARK-21871 check if we can get large code size when compiling too long functions") {
215215
val codeWithShortFunctions = genGroupByCode(3)
216-
val (_, ByteCodeStats(_, maxCodeSize1, _)) = CodeGenerator.compile(codeWithShortFunctions)
216+
val (_, ByteCodeStats(_, maxCodeSize1, _, _)) = CodeGenerator.compile(codeWithShortFunctions)
217217
assert(maxCodeSize1 < SQLConf.WHOLESTAGE_HUGE_METHOD_LIMIT.defaultValue.get)
218218
val codeWithLongFunctions = genGroupByCode(50)
219-
val (_, ByteCodeStats(_, maxCodeSize2, _)) = CodeGenerator.compile(codeWithLongFunctions)
219+
val (_, ByteCodeStats(_, maxCodeSize2, _, _)) = CodeGenerator.compile(codeWithLongFunctions)
220220
assert(maxCodeSize2 > SQLConf.WHOLESTAGE_HUGE_METHOD_LIMIT.defaultValue.get)
221221
}
222222

sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,4 +90,30 @@ class DebuggingSuite extends SharedSparkSession {
9090
| id LongType: {}
9191
|""".stripMargin))
9292
}
93+
94+
test("Prints bytecode statistics in debugCodegen") {
95+
Seq(("SELECT sum(v) FROM VALUES(1) t(v)", (0, 0)),
96+
// We expect HashAggregate uses an inner class for fast hash maps
97+
// in partial aggregates with keys.
98+
("SELECT k, avg(v) FROM VALUES((1, 1)) t(k, v) GROUP BY k", (0, 1)))
99+
.foreach { case (query, (expectedNumInnerClasses0, expectedNumInnerClasses1)) =>
100+
101+
val executedPlan = sql(query).queryExecution.executedPlan
102+
val res = codegenStringSeq(executedPlan)
103+
assert(res.length == 2)
104+
assert(res.forall { case (_, _, codeStats) =>
105+
codeStats.maxClassCodeSize > 0 &&
106+
codeStats.maxMethodCodeSize > 0 &&
107+
codeStats.maxConstPoolSize > 0
108+
})
109+
assert(res(0)._3.numInnerClasses == expectedNumInnerClasses0)
110+
assert(res(1)._3.numInnerClasses == expectedNumInnerClasses1)
111+
112+
val debugCodegenStr = codegenString(executedPlan)
113+
assert(debugCodegenStr.contains("maxClassCodeSize:"))
114+
assert(debugCodegenStr.contains("maxMethodCodeSize:"))
115+
assert(debugCodegenStr.contains("maxConstantPoolSize:"))
116+
assert(debugCodegenStr.contains("numInnerClasses:"))
117+
}
118+
}
93119
}

0 commit comments

Comments
 (0)