|  | 
| 19 | 19 | 
 | 
| 20 | 20 | package org.apache.spark.sql.comet | 
| 21 | 21 | 
 | 
| 22 |  | -import java.io.{ByteArrayOutputStream, DataInputStream} | 
| 23 |  | -import java.nio.channels.Channels | 
|  | 22 | +import java.io.ByteArrayOutputStream | 
| 24 | 23 | 
 | 
| 25 | 24 | import scala.collection.mutable | 
| 26 | 25 | import scala.collection.mutable.ArrayBuffer | 
| 27 | 26 | 
 | 
| 28 |  | -import org.apache.spark.{SparkEnv, TaskContext} | 
| 29 |  | -import org.apache.spark.io.CompressionCodec | 
|  | 27 | +import org.apache.spark.TaskContext | 
| 30 | 28 | import org.apache.spark.rdd.RDD | 
| 31 | 29 | import org.apache.spark.sql.catalyst.InternalRow | 
| 32 | 30 | import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, Expression, NamedExpression, SortOrder} | 
| 33 | 31 | import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateMode} | 
| 34 | 32 | import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} | 
| 35 | 33 | import org.apache.spark.sql.catalyst.plans._ | 
| 36 | 34 | import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, PartitioningCollection, UnknownPartitioning} | 
| 37 |  | -import org.apache.spark.sql.comet.execution.shuffle.{ArrowReaderIterator, CometShuffleExchangeExec} | 
|  | 35 | +import org.apache.spark.sql.comet.execution.shuffle.CometShuffleExchangeExec | 
| 38 | 36 | import org.apache.spark.sql.comet.plans.PartitioningPreservingUnaryExecNode | 
| 39 | 37 | import org.apache.spark.sql.comet.util.Utils | 
| 40 | 38 | import org.apache.spark.sql.execution.{BinaryExecNode, ColumnarToRowExec, ExecSubqueryExpression, ExplainUtils, LeafExecNode, ScalarSubquery, SparkPlan, UnaryExecNode} | 
| @@ -78,18 +76,6 @@ abstract class CometExec extends CometPlan { | 
| 78 | 76 |   // outputPartitioning of SparkPlan, e.g., AQEShuffleReadExec. | 
| 79 | 77 |   override def outputPartitioning: Partitioning = originalPlan.outputPartitioning | 
| 80 | 78 | 
 | 
| 81 |  | -  /** | 
| 82 |  | -   * Executes the Comet operator and returns the result as an iterator of ColumnarBatch. | 
| 83 |  | -   */ | 
| 84 |  | -  def executeColumnarCollectIterator(): (Long, Iterator[ColumnarBatch]) = { | 
| 85 |  | -    val countsAndBytes = CometExec.getByteArrayRdd(this).collect() | 
| 86 |  | -    val total = countsAndBytes.map(_._1).sum | 
| 87 |  | -    val rows = countsAndBytes.iterator | 
| 88 |  | -      .flatMap(countAndBytes => | 
| 89 |  | -        CometExec.decodeBatches(countAndBytes._2, this.getClass.getSimpleName)) | 
| 90 |  | -    (total, rows) | 
| 91 |  | -  } | 
| 92 |  | - | 
| 93 | 79 |   protected def setSubqueries(planId: Long, sparkPlan: SparkPlan): Unit = { | 
| 94 | 80 |     sparkPlan.children.foreach(setSubqueries(planId, _)) | 
| 95 | 81 | 
 | 
| @@ -161,21 +147,6 @@ object CometExec { | 
| 161 | 147 |       Utils.serializeBatches(iter) | 
| 162 | 148 |     } | 
| 163 | 149 |   } | 
| 164 |  | - | 
| 165 |  | -  /** | 
| 166 |  | -   * Decodes the byte arrays back to ColumnarBatchs and put them into buffer. | 
| 167 |  | -   */ | 
| 168 |  | -  def decodeBatches(bytes: ChunkedByteBuffer, source: String): Iterator[ColumnarBatch] = { | 
| 169 |  | -    if (bytes.size == 0) { | 
| 170 |  | -      return Iterator.empty | 
| 171 |  | -    } | 
| 172 |  | - | 
| 173 |  | -    val codec = CompressionCodec.createCodec(SparkEnv.get.conf) | 
| 174 |  | -    val cbbis = bytes.toInputStream() | 
| 175 |  | -    val ins = new DataInputStream(codec.compressedInputStream(cbbis)) | 
| 176 |  | - | 
| 177 |  | -    new ArrowReaderIterator(Channels.newChannel(ins), source) | 
| 178 |  | -  } | 
| 179 | 150 | } | 
| 180 | 151 | 
 | 
| 181 | 152 | /** | 
|  | 
0 commit comments