apache · andygrove · Jan 16, 2025 · Nov 13, 2024 · Nov 14, 2024 · Nov 14, 2024
diff --git a/README.md b/README.md
@@ -46,30 +46,23 @@ The following chart shows the time it takes to run the 22 TPC-H queries against
 using a single executor with 8 cores. See the [Comet Benchmarking Guide](https://datafusion.apache.org/comet/contributor-guide/benchmarking.html)
 for details of the environment used for these benchmarks.
 
-When using Comet, the overall run time is reduced from 615 seconds to 364 seconds, a 1.7x speedup, with query 1
-running 9x faster than Spark.
+When using Comet, the overall run time is reduced from 640 seconds to 331 seconds, very close to a 2x speedup.
 
-Running the same queries with DataFusion standalone (without Spark) using the same number of cores results in a 3.6x 
-speedup compared to Spark.
+![](docs/source/_static/images/benchmark-results/0.5.0/tpch_allqueries.png)
 
-Comet is not yet achieving full DataFusion speeds in all cases, but with future work we aim to provide a 2x-4x speedup 
-for a broader set of queries.
+Here is a breakdown showing relative performance of Spark and Comet for each TPC-H query.
 
-![](docs/source/_static/images/benchmark-results/0.4.0/tpch_allqueries.png)
-
-Here is a breakdown showing relative performance of Spark, Comet, and DataFusion for each TPC-H query.
-
-![](docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_compare.png)
+![](docs/source/_static/images/benchmark-results/0.5.0/tpch_queries_compare.png)
 
 The following charts shows how much Comet currently accelerates each query from the benchmark.
 
 ### Relative speedup
 
-![](docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_speedup_rel.png)
+![](docs/source/_static/images/benchmark-results/0.5.0/tpch_queries_speedup_rel.png)
 
 ### Absolute speedup
 
-![](docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_speedup_abs.png)
+![](docs/source/_static/images/benchmark-results/0.5.0/tpch_queries_speedup_abs.png)
 
 These benchmarks can be reproduced in any environment using the documentation in the 
 [Comet Benchmarking Guide](https://datafusion.apache.org/comet/contributor-guide/benchmarking.html). We encourage 

diff --git a/common/src/main/java/org/apache/comet/parquet/ColumnReader.java b/common/src/main/java/org/apache/comet/parquet/ColumnReader.java
@@ -172,28 +172,6 @@ public void close() {
 
   /** Returns a decoded {@link CometDecodedVector Comet vector}. */
   public CometDecodedVector loadVector() {
-    // Only re-use Comet vector iff:
-    //   1. if we're not using dictionary encoding, since with dictionary encoding, the native
-    //      side may fallback to plain encoding and the underlying memory address for the vector
-    //      will change as result.
-    //   2. if the column type is of fixed width, in other words, string/binary are not supported
-    //      since the native side may resize the vector and therefore change memory address.
-    //   3. if the last loaded vector contains null values: if values of last vector are all not
-    //      null, Arrow C data API will skip loading the native validity buffer, therefore we
-    //      should not re-use the vector in that case.
-    //   4. if the last loaded vector doesn't contain any null value, but the current vector also
-    //      are all not null, which means we can also re-use the loaded vector.
-    //   5. if the new number of value is the same or smaller
-    if ((hadNull || currentNumNulls == 0)
-        && currentVector != null
-        && dictionary == null
-        && currentVector.isFixedLength()
-        && currentVector.numValues() >= currentNumValues) {
-      currentVector.setNumNulls(currentNumNulls);
-      currentVector.setNumValues(currentNumValues);
-      return currentVector;
-    }
-
     LOG.debug("Reloading vector");
 
     // Close the previous vector first to release struct memory allocated to import Arrow array &

diff --git a/common/src/main/java/org/apache/comet/parquet/ConstantColumnReader.java b/common/src/main/java/org/apache/comet/parquet/ConstantColumnReader.java
@@ -53,13 +53,13 @@ public ConstantColumnReader(
 
   public ConstantColumnReader(
       DataType type, ColumnDescriptor descriptor, Object value, boolean useDecimal128) {
-    super(type, descriptor, useDecimal128);
+    super(type, descriptor, useDecimal128, true);
     this.value = value;
   }
 
   ConstantColumnReader(
       DataType type, ColumnDescriptor descriptor, int batchSize, boolean useDecimal128) {
-    super(type, descriptor, useDecimal128);
+    super(type, descriptor, useDecimal128, true);
     this.batchSize = batchSize;
     initNative();
   }

diff --git a/common/src/main/java/org/apache/comet/parquet/MetadataColumnReader.java b/common/src/main/java/org/apache/comet/parquet/MetadataColumnReader.java
@@ -40,9 +40,14 @@ public class MetadataColumnReader extends AbstractColumnReader {
   private ArrowArray array = null;
   private ArrowSchema schema = null;
 
-  public MetadataColumnReader(DataType type, ColumnDescriptor descriptor, boolean useDecimal128) {
+  private boolean isConstant;
+
+  public MetadataColumnReader(
+      DataType type, ColumnDescriptor descriptor, boolean useDecimal128, boolean isConstant) {
     // TODO: should we handle legacy dates & timestamps for metadata columns?
     super(type, descriptor, useDecimal128, false);
+
+    this.isConstant = isConstant;
   }
 
   @Override
@@ -62,7 +67,7 @@ public void readBatch(int total) {
 
       Native.currentBatch(nativeHandle, arrayAddr, schemaAddr);
       FieldVector fieldVector = Data.importVector(allocator, array, schema, null);
-      vector = new CometPlainVector(fieldVector, useDecimal128);
+      vector = new CometPlainVector(fieldVector, useDecimal128, false, isConstant);
     }
 
     vector.setNumValues(total);

diff --git a/common/src/main/java/org/apache/comet/parquet/RowIndexColumnReader.java b/common/src/main/java/org/apache/comet/parquet/RowIndexColumnReader.java
@@ -33,7 +33,7 @@ public class RowIndexColumnReader extends MetadataColumnReader {
   private long offset;
 
   public RowIndexColumnReader(StructField field, int batchSize, long[] indices) {
-    super(field.dataType(), TypeUtil.convertToParquet(field), false);
+    super(field.dataType(), TypeUtil.convertToParquet(field), false, false);
     this.indices = indices;
     setBatchSize(batchSize);
   }

diff --git a/common/src/main/java/org/apache/comet/vector/CometPlainVector.java b/common/src/main/java/org/apache/comet/vector/CometPlainVector.java
@@ -38,11 +38,18 @@ public class CometPlainVector extends CometDecodedVector {
   private byte booleanByteCache;
   private int booleanByteCacheIndex = -1;
 
+  private boolean isReused;
+
   public CometPlainVector(ValueVector vector, boolean useDecimal128) {
     this(vector, useDecimal128, false);
   }
 
   public CometPlainVector(ValueVector vector, boolean useDecimal128, boolean isUuid) {
+    this(vector, useDecimal128, isUuid, false);
+  }
+
+  public CometPlainVector(
+      ValueVector vector, boolean useDecimal128, boolean isUuid, boolean isReused) {
     super(vector, vector.getField(), useDecimal128, isUuid);
     // NullType doesn't have data buffer.
     if (vector instanceof NullVector) {
@@ -52,6 +59,15 @@ public CometPlainVector(ValueVector vector, boolean useDecimal128, boolean isUui
     }
 
     isBaseFixedWidthVector = valueVector instanceof BaseFixedWidthVector;
+    this.isReused = isReused;
+  }
+
+  public boolean isReused() {
+    return isReused;
+  }
+
+  public void setReused(boolean isReused) {
+    this.isReused = isReused;
   }
 
   @Override

diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala
@@ -210,6 +210,8 @@ object CometConf extends ShimCometConf {
     createExecEnabledConfig("window", defaultValue = true)
   val COMET_EXEC_TAKE_ORDERED_AND_PROJECT_ENABLED: ConfigEntry[Boolean] =
     createExecEnabledConfig("takeOrderedAndProject", defaultValue = true)
+  val COMET_EXEC_INITCAP_ENABLED: ConfigEntry[Boolean] =
+    createExecEnabledConfig("initCap", defaultValue = false)
 
   val COMET_EXEC_SORT_MERGE_JOIN_WITH_JOIN_FILTER_ENABLED: ConfigEntry[Boolean] =
     conf("spark.comet.exec.sortMergeJoinWithJoinFilter.enabled")
@@ -275,6 +277,13 @@ object CometConf extends ShimCometConf {
     .checkValues(Set("native", "jvm", "auto"))
     .createWithDefault("auto")
 
+  val COMET_SHUFFLE_FALLBACK_TO_COLUMNAR: ConfigEntry[Boolean] =
+    conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.fallbackToColumnar")
+      .doc("Whether to try falling back to columnar shuffle when native shuffle is not supported")
+      .internal()
+      .booleanConf
+      .createWithDefault(false)
+
   val COMET_EXEC_BROADCAST_FORCE_ENABLED: ConfigEntry[Boolean] =
     conf(s"$COMET_EXEC_CONFIG_PREFIX.broadcast.enabled")
       .doc(
@@ -293,12 +302,29 @@ object CometConf extends ShimCometConf {
       .booleanConf
       .createWithDefault(false)
 
-  val COMET_EXEC_SHUFFLE_CODEC: ConfigEntry[String] = conf(
-    s"$COMET_EXEC_CONFIG_PREFIX.shuffle.codec")
-    .doc(
-      "The codec of Comet native shuffle used to compress shuffle data. Only zstd is supported.")
-    .stringConf
-    .createWithDefault("zstd")
+  val COMET_EXEC_SHUFFLE_COMPRESSION_CODEC: ConfigEntry[String] =
+    conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.compression.codec")
+      .doc(
+        "The codec of Comet native shuffle used to compress shuffle data. lz4, zstd, and " +
+          "snappy are supported. Compression can be disabled by setting " +
+          "spark.shuffle.compress=false.")
+      .stringConf
+      .checkValues(Set("zstd", "lz4", "snappy"))
+      .createWithDefault("lz4")
+
+  val COMET_EXEC_SHUFFLE_COMPRESSION_ZSTD_LEVEL: ConfigEntry[Int] =
+    conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.compression.zstd.level")
+      .doc("The compression level to use when compressing shuffle files with zstd.")
+      .intConf
+      .createWithDefault(1)
+
+  val COMET_SHUFFLE_ENABLE_FAST_ENCODING: ConfigEntry[Boolean] =
+    conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.enableFastEncoding")
+      .doc("Whether to enable Comet's faster proprietary encoding for shuffle blocks " +
+        "rather than using Arrow IPC.")
+      .internal()
+      .booleanConf
+      .createWithDefault(true)
 
   val COMET_COLUMNAR_SHUFFLE_ASYNC_ENABLED: ConfigEntry[Boolean] =
     conf("spark.comet.columnar.shuffle.async.enabled")
@@ -465,21 +491,21 @@ object CometConf extends ShimCometConf {
     .intConf
     .createWithDefault(8192)
 
-  val COMET_EXEC_MEMORY_FRACTION: ConfigEntry[Double] = conf("spark.comet.exec.memoryFraction")
-    .doc(
-      "The fraction of memory from Comet memory overhead that the native memory " +
-        "manager can use for execution. The purpose of this config is to set aside memory for " +
-        "untracked data structures, as well as imprecise size estimation during memory " +
-        "acquisition.")
-    .doubleConf
-    .createWithDefault(0.7)
-
   val COMET_PARQUET_ENABLE_DIRECT_BUFFER: ConfigEntry[Boolean] =
     conf("spark.comet.parquet.enable.directBuffer")
       .doc("Whether to use Java direct byte buffer when reading Parquet.")
       .booleanConf
       .createWithDefault(false)
 
+  val COMET_EXEC_MEMORY_POOL_TYPE: ConfigEntry[String] = conf("spark.comet.exec.memoryPool")
+    .doc(
+      "The type of memory pool to be used for Comet native execution. " +
+        "Available memory pool types are 'greedy', 'fair_spill', 'greedy_task_shared', " +
+        "'fair_spill_task_shared', 'greedy_global' and 'fair_spill_global', By default, " +
+        "this config is 'greedy_task_shared'.")
+    .stringConf
+    .createWithDefault("greedy_task_shared")
+
   val COMET_SCAN_PREFETCH_ENABLED: ConfigEntry[Boolean] =
     conf("spark.comet.scan.preFetch.enabled")
       .doc("Whether to enable pre-fetching feature of CometScan.")

diff --git a/common/src/main/scala/org/apache/comet/vector/NativeUtil.scala b/common/src/main/scala/org/apache/comet/vector/NativeUtil.scala
@@ -163,8 +163,6 @@ class NativeUtil {
       case numRows =>
         val cometVectors = importVector(arrays, schemas)
         Some(new ColumnarBatch(cometVectors.toArray, numRows.toInt))
-      case flag =>
-        throw new IllegalStateException(s"Invalid native flag: $flag")
     }
   }
 

diff --git a/.../src/main/scala/org/apache/spark/sql/comet/execution/shuffle/IpcInputStreamIterator.scala b/.../src/main/scala/org/apache/spark/sql/comet/execution/shuffle/IpcInputStreamIterator.scala