apache · snleee · Jan 24, 2024 · Dec 15, 2023 · Dec 21, 2023 · Dec 22, 2023
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java b/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java
@@ -104,6 +104,7 @@ public static abstract class MergeTask {
     // Segment config
     public static final String MAX_NUM_RECORDS_PER_TASK_KEY = "maxNumRecordsPerTask";
     public static final String MAX_NUM_RECORDS_PER_SEGMENT_KEY = "maxNumRecordsPerSegment";
+    public static final String SEGMENT_MAPPER_FILE_SIZE_IN_BYTES = "segmentMapperFileSizeThresholdInBytes";
     public static final String MAX_NUM_PARALLEL_BUCKETS = "maxNumParallelBuckets";
     public static final String SEGMENT_NAME_PREFIX_KEY = "segmentNamePrefix";
     public static final String SEGMENT_NAME_POSTFIX_KEY = "segmentNamePostfix";

diff --git a/...-core/src/main/java/org/apache/pinot/core/segment/processing/framework/SegmentConfig.java b/...-core/src/main/java/org/apache/pinot/core/segment/processing/framework/SegmentConfig.java
@@ -31,22 +31,28 @@
 @JsonIgnoreProperties(ignoreUnknown = true)
 public class SegmentConfig {
   public static final int DEFAULT_MAX_NUM_RECORDS_PER_SEGMENT = 5_000_000;
+  public static final long DEFAULT_SEGMENT_MAPPER_FILE_SIZE_IN_BYTES = Long.MAX_VALUE;
 
   private final int _maxNumRecordsPerSegment;
   private final String _segmentNamePrefix;
   private final String _segmentNamePostfix;
   private final String _fixedSegmentName;
+  private final long _segmentMapperFileSizeThresholdInBytes;
 
   @JsonCreator
   private SegmentConfig(@JsonProperty(value = "maxNumRecordsPerSegment", required = true) int maxNumRecordsPerSegment,
       @JsonProperty("segmentNamePrefix") @Nullable String segmentNamePrefix,
       @JsonProperty("segmentNamePostfix") @Nullable String segmentNamePostfix,
-      @JsonProperty("fixedSegmentName") @Nullable String fixedSegmentName) {
+      @JsonProperty("fixedSegmentName") @Nullable String fixedSegmentName,
+      @JsonProperty(value = "segmentMapperFileSizeThresholdInBytes", required = true)
+      long segmentMapperFileSizeThresholdInBytes) {
     Preconditions.checkState(maxNumRecordsPerSegment > 0, "Max num records per segment must be > 0");
+    Preconditions.checkState(segmentMapperFileSizeThresholdInBytes > 0, "Intermediate file size threshold must be > 0");
     _maxNumRecordsPerSegment = maxNumRecordsPerSegment;
     _segmentNamePrefix = segmentNamePrefix;
     _segmentNamePostfix = segmentNamePostfix;
     _fixedSegmentName = fixedSegmentName;
+    _segmentMapperFileSizeThresholdInBytes = segmentMapperFileSizeThresholdInBytes;
   }
 
   /**
@@ -71,15 +77,21 @@ public String getFixedSegmentName() {
     return _fixedSegmentName;
   }
 
+  public long getIntermediateFileSizeThreshold() {
+    return _segmentMapperFileSizeThresholdInBytes;
+  }
+
   /**
    * Builder for SegmentConfig
    */
   public static class Builder {
     private int _maxNumRecordsPerSegment = DEFAULT_MAX_NUM_RECORDS_PER_SEGMENT;
+    private long _segmentMapperFileSizeThresholdInBytes = DEFAULT_SEGMENT_MAPPER_FILE_SIZE_IN_BYTES;
     private String _segmentNamePrefix;
     private String _segmentNamePostfix;
     private String _fixedSegmentName;
 
+
     public Builder setMaxNumRecordsPerSegment(int maxNumRecordsPerSegment) {
       _maxNumRecordsPerSegment = maxNumRecordsPerSegment;
       return this;
@@ -99,17 +111,25 @@ public Builder setFixedSegmentName(String fixedSegmentName) {
       _fixedSegmentName = fixedSegmentName;
       return this;
     }
+    public Builder setIntermediateFileSizeThreshold(long segmentMapperFileSizeThresholdInBytes) {
+      _segmentMapperFileSizeThresholdInBytes = segmentMapperFileSizeThresholdInBytes;
+      return this;
+    }
 
     public SegmentConfig build() {
       Preconditions.checkState(_maxNumRecordsPerSegment > 0, "Max num records per segment must be > 0");
-      return new SegmentConfig(_maxNumRecordsPerSegment, _segmentNamePrefix, _segmentNamePostfix, _fixedSegmentName);
+      Preconditions.checkState(_segmentMapperFileSizeThresholdInBytes > 0,
+          "Intermediate file size threshold must be > 0");
+      return new SegmentConfig(_maxNumRecordsPerSegment, _segmentNamePrefix, _segmentNamePostfix, _fixedSegmentName,
+          _segmentMapperFileSizeThresholdInBytes);
     }
   }
 
   @Override
   public String toString() {
-    return "SegmentConfig{" + "_maxNumRecordsPerSegment=" + _maxNumRecordsPerSegment + ", _segmentNamePrefix='"
-        + _segmentNamePrefix + '\'' + ", _segmentNamePostfix='" + _segmentNamePostfix + '\'' + ", _fixedSegmentName='"
-        + _fixedSegmentName + '\'' + '}';
+    return "SegmentConfig{" + "_maxNumRecordsPerSegment=" + _maxNumRecordsPerSegment
+        + ", _segmentMapperFileSizeThresholdInBytes=" + _segmentMapperFileSizeThresholdInBytes
+        + ", _segmentNamePrefix='" + _segmentNamePrefix + '\'' + ", _segmentNamePostfix='" + _segmentNamePostfix + '\''
+        + ", _fixedSegmentName='" + _fixedSegmentName + '\'' + '}';
   }
 }
diff --git a/...in/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFramework.java b/...in/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFramework.java
@@ -66,8 +66,8 @@ public class SegmentProcessorFramework {
   private final File _mapperOutputDir;
   private final File _reducerOutputDir;
   private final File _segmentsOutputDir;
-  private Map<String, GenericRowFileManager> _partitionToFileManagerMap;
   private final SegmentNumRowProvider _segmentNumRowProvider;
+  private int _segmentSequenceId = 0;
 
   /**
    * Initializes the SegmentProcessorFramework with record readers, config and working directory. We will now rely on
@@ -124,14 +124,8 @@ public List<File> process()
     try {
       return doProcess();
     } catch (Exception e) {
-      // Cleaning up file managers no matter they are from map phase or reduce phase. For those from reduce phase, the
-      // reducers should have cleaned up the corresponding file managers from map phase already.
-      if (_partitionToFileManagerMap != null) {
-        for (GenericRowFileManager fileManager : _partitionToFileManagerMap.values()) {
-          fileManager.cleanUp();
-        }
-      }
-      // Cleaning up output dir as processing has failed.
+      // Cleaning up output dir as processing has failed. file managers left from map or reduce phase will be cleaned
+      // up in the respective phases.
       FileUtils.deleteQuietly(_segmentsOutputDir);
       throw e;
     } finally {
@@ -142,24 +136,103 @@ public List<File> process()
 
   private List<File> doProcess()
       throws Exception {
-    // Map phase
-    LOGGER.info("Beginning map phase on {} record readers", _recordReaderFileConfigs.size());
-    SegmentMapper mapper = new SegmentMapper(_recordReaderFileConfigs, _customRecordTransformers,
-        _segmentProcessorConfig, _mapperOutputDir);
-    _partitionToFileManagerMap = mapper.map();
-
-    // Check for mapper output files
-    if (_partitionToFileManagerMap.isEmpty()) {
-      LOGGER.info("No partition generated from mapper phase, skipping the reducer phase");
-      return Collections.emptyList();
+    List<File> outputSegmentDirs = new ArrayList<>();
+    int numRecordReaders = _recordReaderFileConfigs.size();
+    int nextRecordReaderIndexToBeProcessed = 0;
+    int iterationCount = 1;
+    Consumer<Object> observer = _segmentProcessorConfig.getProgressObserver();
+    boolean isMapperOutputSizeThresholdEnabled =
+        _segmentProcessorConfig.getSegmentConfig().getIntermediateFileSizeThreshold() != Long.MAX_VALUE;
+
+    while (nextRecordReaderIndexToBeProcessed < numRecordReaders) {
+      // Initialise the mapper. Eliminate the record readers that have been processed in the previous iterations.
+      SegmentMapper mapper =
+          new SegmentMapper(_recordReaderFileConfigs.subList(nextRecordReaderIndexToBeProcessed, numRecordReaders),
+              _customRecordTransformers, _segmentProcessorConfig, _mapperOutputDir);
+
+      // Log start of iteration details only if intermediate file size threshold is set.
+      if (isMapperOutputSizeThresholdEnabled) {
+        String logMessage =
+            String.format("Starting iteration %d with %d record readers. Starting index = %d, end index = %d",
+                iterationCount,
+                _recordReaderFileConfigs.subList(nextRecordReaderIndexToBeProcessed, numRecordReaders).size(),
+                nextRecordReaderIndexToBeProcessed + 1, numRecordReaders);
+        LOGGER.info(logMessage);
+        observer.accept(logMessage);
+      }
+
+      // Map phase.
+      long mapStartTimeInMs = System.currentTimeMillis();
+      Map<String, GenericRowFileManager> partitionToFileManagerMap = mapper.map();
+
+      // Log the time taken to map.
+      LOGGER.info("Finished iteration {} in {}ms", iterationCount, System.currentTimeMillis() - mapStartTimeInMs);
+
+      // Check for mapper output files, if no files are generated, skip the reducer phase and move on to the next
+      // iteration.
+      if (partitionToFileManagerMap.isEmpty()) {
+        LOGGER.info("No mapper output files generated, skipping reduce phase");
+        nextRecordReaderIndexToBeProcessed = getNextRecordReaderIndexToBeProcessed(nextRecordReaderIndexToBeProcessed);
+        continue;
+      }
+
+      // Reduce phase.
+      doReduce(partitionToFileManagerMap);
+
+      // Segment creation phase. Add the created segments to the final list.
+      outputSegmentDirs.addAll(generateSegment(partitionToFileManagerMap));
+
+      // Store the starting index of the record readers that were processed in this iteration for logging purposes.
+      int startingProcessedRecordReaderIndex = nextRecordReaderIndexToBeProcessed;
+
+      // Update next record reader index to be processed.
+      nextRecordReaderIndexToBeProcessed = getNextRecordReaderIndexToBeProcessed(nextRecordReaderIndexToBeProcessed);
+
+      // Log the details between iteration only if intermediate file size threshold is set.
+      if (isMapperOutputSizeThresholdEnabled) {
+        // Take care of logging the proper RecordReader index in case of the last iteration.
+        int boundaryIndexToLog =
+            nextRecordReaderIndexToBeProcessed == numRecordReaders ? nextRecordReaderIndexToBeProcessed
+                : nextRecordReaderIndexToBeProcessed + 1;
+
+        // We are sure that the last RecordReader is completely processed in the last iteration else it may or may not
+        // have completed processing. Log it accordingly.
+        String logMessage;
+        if (nextRecordReaderIndexToBeProcessed == numRecordReaders) {
+          logMessage = String.format("Finished processing all of %d RecordReaders", numRecordReaders);
+        } else {
+          logMessage = String.format(
+              "Finished processing RecordReaders %d to %d (RecordReader %d might be partially processed) out of %d in "
+                  + "iteration %d", startingProcessedRecordReaderIndex + 1, boundaryIndexToLog,
+              nextRecordReaderIndexToBeProcessed + 1, numRecordReaders, iterationCount);
+        }
+
+        observer.accept(logMessage);
+        LOGGER.info(logMessage);
+      }
+
+      iterationCount++;
     }
+    return outputSegmentDirs;
+  }
 
-    // Reduce phase
-    LOGGER.info("Beginning reduce phase on partitions: {}", _partitionToFileManagerMap.keySet());
+  private int getNextRecordReaderIndexToBeProcessed(int currentRecordIndex) {
+    for (int i = currentRecordIndex; i < _recordReaderFileConfigs.size(); i++) {
+      RecordReaderFileConfig recordReaderFileConfig = _recordReaderFileConfigs.get(i);
+      if (!recordReaderFileConfig.isRecordReaderDone()) {
+        return i;
+      }
+    }
+    return _recordReaderFileConfigs.size();
+  }
+
+  private void doReduce(Map<String, GenericRowFileManager> partitionToFileManagerMap)
+      throws Exception {
+    LOGGER.info("Beginning reduce phase on partitions: {}", partitionToFileManagerMap.keySet());
     Consumer<Object> observer = _segmentProcessorConfig.getProgressObserver();
-    int totalCount = _partitionToFileManagerMap.keySet().size();
+    int totalCount = partitionToFileManagerMap.keySet().size();
     int count = 1;
-    for (Map.Entry<String, GenericRowFileManager> entry : _partitionToFileManagerMap.entrySet()) {
+    for (Map.Entry<String, GenericRowFileManager> entry : partitionToFileManagerMap.entrySet()) {
       String partitionId = entry.getKey();
       observer.accept(
           String.format("Doing reduce phase on data from partition: %s (%d out of %d)", partitionId, count++,
@@ -168,9 +241,11 @@ private List<File> doProcess()
       Reducer reducer = ReducerFactory.getReducer(partitionId, fileManager, _segmentProcessorConfig, _reducerOutputDir);
       entry.setValue(reducer.reduce());
     }
+  }
 
-    // Segment creation phase
-    LOGGER.info("Beginning segment creation phase on partitions: {}", _partitionToFileManagerMap.keySet());
+  private List<File> generateSegment(Map<String, GenericRowFileManager> partitionToFileManagerMap)
+      throws Exception {
+    LOGGER.info("Beginning segment creation phase on partitions: {}", partitionToFileManagerMap.keySet());
     List<File> outputSegmentDirs = new ArrayList<>();
     TableConfig tableConfig = _segmentProcessorConfig.getTableConfig();
     Schema schema = _segmentProcessorConfig.getSchema();
@@ -179,6 +254,7 @@ private List<File> doProcess()
     String fixedSegmentName = _segmentProcessorConfig.getSegmentConfig().getFixedSegmentName();
     SegmentGeneratorConfig generatorConfig = new SegmentGeneratorConfig(tableConfig, schema);
     generatorConfig.setOutDir(_segmentsOutputDir.getPath());
+    Consumer<Object> observer = _segmentProcessorConfig.getProgressObserver();
 
     if (tableConfig.getIndexingConfig().getSegmentNameGeneratorType() != null) {
       generatorConfig.setSegmentNameGenerator(
@@ -190,8 +266,7 @@ private List<File> doProcess()
       generatorConfig.setSegmentNamePostfix(segmentNamePostfix);
     }
 
-    int sequenceId = 0;
-    for (Map.Entry<String, GenericRowFileManager> entry : _partitionToFileManagerMap.entrySet()) {
+    for (Map.Entry<String, GenericRowFileManager> entry : partitionToFileManagerMap.entrySet()) {
       String partitionId = entry.getKey();
       GenericRowFileManager fileManager = entry.getValue();
       try {
@@ -202,15 +277,15 @@ private List<File> doProcess()
             numSortFields);
         GenericRowFileRecordReader recordReader = fileReader.getRecordReader();
         int maxNumRecordsPerSegment;
-        for (int startRowId = 0; startRowId < numRows; startRowId += maxNumRecordsPerSegment, sequenceId++) {
+        for (int startRowId = 0; startRowId < numRows; startRowId += maxNumRecordsPerSegment, _segmentSequenceId++) {
           maxNumRecordsPerSegment = _segmentNumRowProvider.getNumRows();
           int endRowId = Math.min(startRowId + maxNumRecordsPerSegment, numRows);
-          LOGGER.info("Start creating segment of sequenceId: {} with row range: {} to {}", sequenceId, startRowId,
-              endRowId);
+          LOGGER.info("Start creating segment of sequenceId: {} with row range: {} to {}", _segmentSequenceId,
+              startRowId, endRowId);
           observer.accept(String.format(
               "Creating segment of sequentId: %d with data from partition: %s and row range: [%d, %d) out of [0, %d)",
-              sequenceId, partitionId, startRowId, endRowId, numRows));
-          generatorConfig.setSequenceId(sequenceId);
+              _segmentSequenceId, partitionId, startRowId, endRowId, numRows));
+          generatorConfig.setSequenceId(_segmentSequenceId);
           GenericRowFileRecordReader recordReaderForRange = recordReader.getRecordReaderForRange(startRowId, endRowId);
           SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
           driver.init(generatorConfig, new RecordReaderSegmentCreationDataSource(recordReaderForRange),

diff --git a/...n/java/org/apache/pinot/core/segment/processing/genericrow/AdaptiveConstraintsWriter.java b/...n/java/org/apache/pinot/core/segment/processing/genericrow/AdaptiveConstraintsWriter.java
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.segment.processing.genericrow;
+
+import java.io.IOException;
+
+
+/**
+ * Interface for a writer which can track constraints. This will be used by SegmentProcessorFramework.
+ * */
+
+public interface AdaptiveConstraintsWriter<W, D> {
+  boolean canWrite();
+
+  void write(W writer, D dataUnit)
+      throws IOException;
+}
diff --git a/...ain/java/org/apache/pinot/core/segment/processing/genericrow/AdaptiveSizeBasedWriter.java b/...ain/java/org/apache/pinot/core/segment/processing/genericrow/AdaptiveSizeBasedWriter.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.segment.processing.genericrow;
+
+import java.io.IOException;
+import org.apache.pinot.spi.data.readers.GenericRow;
+
+
+public class AdaptiveSizeBasedWriter implements AdaptiveConstraintsWriter<GenericRowFileWriter, GenericRow> {
+
+  private final long _bytesLimit;
+  private long _numBytesWritten;
+
+  public AdaptiveSizeBasedWriter(long bytesLimit) {
+    _bytesLimit = bytesLimit;
+    _numBytesWritten = 0;
+  }
+
+  public long getBytesLimit() {
+    return _bytesLimit;
+  }
+  public long getNumBytesWritten() {
+    return _numBytesWritten;
+  }
+
+  @Override
+  public boolean canWrite() {
+    return _numBytesWritten < _bytesLimit;
+  }
+
+  @Override
+  public void write(GenericRowFileWriter writer, GenericRow row) throws IOException {
+    _numBytesWritten += writer.writeData(row);
+  }
+}