Amortize map construction.

JoshRosen · JoshRosen · commit 0b412b0069de · 2016-06-03T17:58:28.000-07:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
@@ -82,31 +82,19 @@ object BindReferences extends Logging {
 
   def bindReference[A <: Expression](
       expression: A,
-      input: Seq[Attribute],
+      input: AttributeSeq,
       allowFailures: Boolean = false): A = {
-    val inputArr = input.toArray
-    val inputToOrdinal = {
-      val map = new java.util.HashMap[ExprId, Int](inputArr.length * 2)
-      var index = 0
-      input.foreach { attr =>
-        if (!map.containsKey(attr.exprId)) {
-          map.put(attr.exprId, index)
-        }
-        index += 1
-      }
-      map
-    }
     expression.transform { case a: AttributeReference =>
       attachTree(a, "Binding attribute") {
-        val ordinal = Option(inputToOrdinal.get(a.exprId)).getOrElse(-1)
+        val ordinal = input.getOrdinal(a.exprId)
         if (ordinal == -1) {
           if (allowFailures) {
             a
           } else {
-            sys.error(s"Couldn't find $a in ${input.mkString("[", ",", "]")}")
+            sys.error(s"Couldn't find $a in ${input.attrs.mkString("[", ",", "]")}")
           }
         } else {
-          BoundReference(ordinal, a.dataType, inputArr(ordinal).nullable)
+          BoundReference(ordinal, a.dataType, input(ordinal).nullable)
         }
       }
     }.asInstanceOf[A] // Kind of a hack, but safe.  TODO: Tighten return type when possible.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -86,11 +86,31 @@ package object expressions  {
   /**
    * Helper functions for working with `Seq[Attribute]`.
    */
-  implicit class AttributeSeq(attrs: Seq[Attribute]) {
+  implicit class AttributeSeq(val attrs: Seq[Attribute]) {
     /** Creates a StructType with a schema matching this `Seq[Attribute]`. */
     def toStructType: StructType = {
       StructType(attrs.map(a => StructField(a.name, a.dataType, a.nullable)))
     }
+
+    private lazy val inputArr = attrs.toArray
+
+    private lazy val inputToOrdinal = {
+      val map = new java.util.HashMap[ExprId, Int](inputArr.length * 2)
+      var index = 0
+      attrs.foreach { attr =>
+        if (!map.containsKey(attr.exprId)) {
+          map.put(attr.exprId, index)
+        }
+        index += 1
+      }
+      map
+    }
+
+    def apply(ordinal: Int): Attribute = inputArr(ordinal)
+
+    def getOrdinal(exprId: ExprId): Int = {
+      Option(inputToOrdinal.get(exprId)).getOrElse(-1)
+    }
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -296,7 +296,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
   /**
    * All the attributes that are used for this plan.
    */
-  lazy val allAttributes: Seq[Attribute] = children.flatMap(_.output)
+  lazy val allAttributes: AttributeSeq = children.flatMap(_.output)
 
   private def cleanExpression(e: Expression): Expression = e match {
     case a: Alias =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -49,7 +49,7 @@ case class HashAggregateExec(
 
   require(HashAggregateExec.supportsAggregate(aggregateBufferAttributes))
 
-  override lazy val allAttributes: Seq[Attribute] =
+  override lazy val allAttributes: AttributeSeq =
     child.output ++ aggregateBufferAttributes ++ aggregateAttributes ++
       aggregateExpressions.flatMap(_.aggregateFunction.inputAggBufferAttributes)