Skip to content

Commit 740341b

Browse files
committed
Optimize method dispatch for primitive type conversions
1 parent befc613 commit 740341b

File tree

1 file changed

+55
-31
lines changed

1 file changed

+55
-31
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala

Lines changed: 55 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,19 @@ object CatalystTypeConverters {
3737
// Since the map values can be mutable, we explicitly import scala.collection.Map at here.
3838
import scala.collection.Map
3939

40+
private def isPrimitive(dataType: DataType): Boolean = {
41+
dataType match {
42+
case BooleanType => true
43+
case ByteType => true
44+
case ShortType => true
45+
case IntegerType => true
46+
case LongType => true
47+
case FloatType => true
48+
case DoubleType => true
49+
case _ => false
50+
}
51+
}
52+
4053
private def getConverterForType(dataType: DataType): CatalystTypeConverter[Any, Any, Any] = {
4154
val converter = dataType match {
4255
case udt: UserDefinedType[_] => UDTConverter(udt)
@@ -73,15 +86,17 @@ object CatalystTypeConverters {
7386
* and Options.
7487
*/
7588
final def toCatalyst(@Nullable maybeScalaValue: Any): CatalystType = {
76-
maybeScalaValue match {
77-
case opt: Option[ScalaInputType] =>
78-
if (opt.isDefined) {
79-
toCatalystImpl(opt.get)
80-
} else {
81-
null.asInstanceOf[CatalystType]
82-
}
83-
case null => null.asInstanceOf[CatalystType]
84-
case scalaValue: ScalaInputType => toCatalystImpl(scalaValue)
89+
if (maybeScalaValue == null) {
90+
null.asInstanceOf[CatalystType]
91+
} else if (maybeScalaValue.isInstanceOf[Option[ScalaInputType]]) {
92+
val opt = maybeScalaValue.asInstanceOf[Option[ScalaInputType]]
93+
if (opt.isDefined) {
94+
toCatalystImpl(opt.get)
95+
} else {
96+
null.asInstanceOf[CatalystType]
97+
}
98+
} else {
99+
toCatalystImpl(maybeScalaValue.asInstanceOf[ScalaInputType])
85100
}
86101
}
87102

@@ -272,46 +287,37 @@ object CatalystTypeConverters {
272287
}
273288
}
274289

275-
private object BooleanConverter extends CatalystTypeConverter[Boolean, Any, Any] {
290+
private abstract class PrimitiveConverter[T] extends CatalystTypeConverter[T, Any, Any] {
291+
final override def toScala(catalystValue: Any): Any = catalystValue
292+
final override def toCatalystImpl(scalaValue: T): Any = scalaValue
293+
}
294+
295+
private object BooleanConverter extends PrimitiveConverter[Boolean] {
276296
override def toScalaImpl(row: Row, column: Int): Boolean = row.getBoolean(column)
277-
override def toScala(catalystValue: Any): Any = catalystValue
278-
override def toCatalystImpl(scalaValue: Boolean): Boolean = scalaValue
279297
}
280298

281-
private object ByteConverter extends CatalystTypeConverter[Byte, Any, Any] {
299+
private object ByteConverter extends PrimitiveConverter[Byte] {
282300
override def toScalaImpl(row: Row, column: Int): Byte = row.getByte(column)
283-
override def toScala(catalystValue: Any): Any = catalystValue
284-
override def toCatalystImpl(scalaValue: Byte): Byte = scalaValue
285301
}
286302

287-
private object ShortConverter extends CatalystTypeConverter[Short, Any, Any] {
303+
private object ShortConverter extends PrimitiveConverter[Short] {
288304
override def toScalaImpl(row: Row, column: Int): Short = row.getShort(column)
289-
override def toScala(catalystValue: Any): Any = catalystValue
290-
override def toCatalystImpl(scalaValue: Short): Short = scalaValue
291305
}
292306

293-
private object IntConverter extends CatalystTypeConverter[Int, Any, Any] {
307+
private object IntConverter extends PrimitiveConverter[Int] {
294308
override def toScalaImpl(row: Row, column: Int): Int = row.getInt(column)
295-
override def toScala(catalystValue: Any): Any = catalystValue
296-
override def toCatalystImpl(scalaValue: Int): Int = scalaValue
297309
}
298310

299-
private object LongConverter extends CatalystTypeConverter[Long, Any, Any] {
311+
private object LongConverter extends PrimitiveConverter[Long] {
300312
override def toScalaImpl(row: Row, column: Int): Long = row.getLong(column)
301-
override def toScala(catalystValue: Any): Any = catalystValue
302-
override def toCatalystImpl(scalaValue: Long): Long = scalaValue
303313
}
304314

305-
private object FloatConverter extends CatalystTypeConverter[Float, Any, Any] {
315+
private object FloatConverter extends PrimitiveConverter[Float] {
306316
override def toScalaImpl(row: Row, column: Int): Float = row.getFloat(column)
307-
override def toScala(catalystValue: Any): Any = catalystValue
308-
override def toCatalystImpl(scalaValue: Float): Float = scalaValue
309317
}
310318

311-
private object DoubleConverter extends CatalystTypeConverter[Double, Any, Any] {
319+
private object DoubleConverter extends PrimitiveConverter[Double] {
312320
override def toScalaImpl(row: Row, column: Int): Double = row.getDouble(column)
313-
override def toScala(catalystValue: Any): Any = catalystValue
314-
override def toCatalystImpl(scalaValue: Double): Double = scalaValue
315321
}
316322

317323
/**
@@ -330,7 +336,25 @@ object CatalystTypeConverters {
330336
* call this function once to get a converter, and apply it to every row.
331337
*/
332338
private[sql] def createToCatalystConverter(dataType: DataType): Any => Any = {
333-
getConverterForType(dataType).toCatalyst
339+
if (isPrimitive(dataType)) {
340+
// Although the `else` branch here is capable of handling inbound conversion of primitives,
341+
// we add some special-case handling for those types here. The motivation for this relates to
342+
// Java method invocation costs: if we have rows that consist entirely of primitive columns,
343+
// then returning the same conversion function for all of the columns means that the call site
344+
// will be monomorphic instead of polymorphic. In microbenchmarks, this actually resulted in
345+
// a measurable performance impact. Note that this optimization will be unnecessary if we
346+
// use code generation to construct Scala Row -> Catalyst Row converters.
347+
def convert(maybeScalaValue: Any): Any = {
348+
if (maybeScalaValue.isInstanceOf[Option[Any]]) {
349+
maybeScalaValue.asInstanceOf[Option[Any]].orNull
350+
} else {
351+
maybeScalaValue
352+
}
353+
}
354+
convert
355+
} else {
356+
getConverterForType(dataType).toCatalyst
357+
}
334358
}
335359

336360
/**

0 commit comments

Comments
 (0)