grab bag of non-contraversial clean up tasks

airbytehq · cgardens · May 31, 2023 · May 27, 2023 · May 27, 2023 · May 31, 2023
commit f80c459c2b4da7f03373cd7e486ab9eb2c1944f1
diff --git a/...java-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/CsvSerializedBuffer.java b/...java-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/CsvSerializedBuffer.java
@@ -21,9 +21,13 @@
 import org.apache.commons.csv.CSVPrinter;
 import org.apache.commons.csv.QuoteMode;
 import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class CsvSerializedBuffer extends BaseSerializedBuffer {
 
+  private static final Logger LOGGER = LoggerFactory.getLogger(CsvSerializedBuffer.class);
+
   public static final String CSV_GZ_SUFFIX = ".csv.gz";
 
   private final CsvSheetGenerator csvSheetGenerator;
@@ -36,8 +40,8 @@ public CsvSerializedBuffer(final BufferStorage bufferStorage,
       throws Exception {
     super(bufferStorage);
     this.csvSheetGenerator = csvSheetGenerator;
-    this.csvPrinter = null;
-    this.csvFormat = CSVFormat.DEFAULT;
+    csvPrinter = null;
+    csvFormat = CSVFormat.DEFAULT;
     // we always want to compress csv files
     withCompression(compression);
   }
@@ -62,12 +66,20 @@ protected void writeRecord(final AirbyteRecordMessage record) throws IOException
 
   @Override
   protected void flushWriter() throws IOException {
-    csvPrinter.flush();
+    if (csvPrinter != null) {
+      csvPrinter.flush();
+    } else {
+      LOGGER.warn("Trying to flush but no printer is initialized.");
+    }
   }
 
   @Override
   protected void closeWriter() throws IOException {
-    csvPrinter.close();
+    if (csvPrinter != null) {
+      csvPrinter.close();
+    } else {
+      LOGGER.warn("Trying to close but no printer is initialized.");
+    }
   }
 
   public static BufferCreateFunction createFunction(final S3CsvFormatConfig config,

diff --git a/...ava/io/airbyte/integrations/destination/buffered_stream_consumer/RecordSizeEstimator.java b/...ava/io/airbyte/integrations/destination/buffered_stream_consumer/RecordSizeEstimator.java
@@ -34,8 +34,8 @@ public class RecordSizeEstimator {
    * determined by {@code sampleBatchSize}.
    */
   public RecordSizeEstimator(final int sampleBatchSize) {
-    this.streamRecordSizeEstimation = new HashMap<>();
-    this.streamSampleCountdown = new HashMap<>();
+    streamRecordSizeEstimation = new HashMap<>();
+    streamSampleCountdown = new HashMap<>();
     this.sampleBatchSize = sampleBatchSize;
   }
 
@@ -71,7 +71,7 @@ public long getEstimatedByteSize(final AirbyteRecordMessage record) {
   }
 
   @VisibleForTesting
-  static long getStringByteSize(final JsonNode data) {
+  public static long getStringByteSize(final JsonNode data) {
     // assume UTF-8 encoding, and each char is 4 bytes long
     return Jsons.serialize(data).length() * 4L;
   }

diff --git a/...src/main/java/io/airbyte/integrations/destination/record_buffer/BaseSerializedBuffer.java b/...src/main/java/io/airbyte/integrations/destination/record_buffer/BaseSerializedBuffer.java
@@ -128,7 +128,7 @@ public void flush() throws IOException {
     if (inputStream == null && !isClosed) {
       flushWriter();
       if (compressedBuffer != null) {
-        LOGGER.info("Wrapping up compression and write GZIP trailer data.");
+        LOGGER.debug("Wrapping up compression and write GZIP trailer data.");
         compressedBuffer.flush();
         compressedBuffer.close();
       }

diff --git a/...s/base-java/src/main/java/io/airbyte/integrations/destination_async/AirbyteFileUtils.java b/...s/base-java/src/main/java/io/airbyte/integrations/destination_async/AirbyteFileUtils.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination_async;
+
+import java.text.DecimalFormat;
+
+/**
+ * Replicate the behavior of {@link org.apache.commons.io.FileUtils} to match the proclivities of
+ * Davin and Charles. Courteously written by ChatGPT.
+ */
+public class AirbyteFileUtils {
+
+  private static final double ONE_KB = 1024;
+  private static final double ONE_MB = ONE_KB * 1024;
+  private static final double ONE_GB = ONE_MB * 1024;
+  private static final double ONE_TB = ONE_GB * 1024;
+  private static final DecimalFormat df = new DecimalFormat("#.##");
+
+  /**
+   * Replicate the behavior of {@link org.apache.commons.io.FileUtils} but instead of rounding down to
+   * the nearest whole number, it rounds to two decimal places.
+   *
+   * @param sizeInBytes size in bytes
+   * @return human-readable size
+   */
+  public static String byteCountToDisplaySize(final long sizeInBytes) {
+
+    if (sizeInBytes < ONE_KB) {
+      return df.format(sizeInBytes) + " bytes";
+    } else if (sizeInBytes < ONE_MB) {
+      return df.format((double) sizeInBytes / ONE_KB) + " KB";
+    } else if (sizeInBytes < ONE_GB) {
+      return df.format((double) sizeInBytes / ONE_MB) + " MB";
+    } else if (sizeInBytes < ONE_TB) {
+      return df.format((double) sizeInBytes / ONE_GB) + " GB";
+    } else {
+      return df.format((double) sizeInBytes / ONE_TB) + " TB";
+    }
+  }
+
+}
diff --git a/...ase-java/src/main/java/io/airbyte/integrations/destination_async/GlobalMemoryManager.java b/...ase-java/src/main/java/io/airbyte/integrations/destination_async/GlobalMemoryManager.java
@@ -6,11 +6,12 @@
 
 import java.util.concurrent.atomic.AtomicLong;
 import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.io.FileUtils;
 
 /**
  * Responsible for managing global memory across multiple queues in a thread-safe way.
  * <p>
- * This means memory allocation and deallocation for each queue can be dynamically adjusted
+ * This means memory allocation and de-allocation for each queue can be dynamically adjusted
  * according to the overall available memory. Memory blocks are managed in chunks of
  * {@link #BLOCK_SIZE_BYTES}, and the total amount of memory managed is configured at creation time.
  * <p>
@@ -68,6 +69,10 @@ public synchronized long requestMemory() {
     final var toAllocateBytes = Math.min(freeMem, BLOCK_SIZE_BYTES);
     currentMemoryBytes.addAndGet(toAllocateBytes);
 
+    log.debug("Memory Requested: max: {}, allocated: {}, allocated in this request: {}",
+        FileUtils.byteCountToDisplaySize(maxMemoryBytes),
+        FileUtils.byteCountToDisplaySize(currentMemoryBytes.get()),
+        FileUtils.byteCountToDisplaySize(toAllocateBytes));
     return toAllocateBytes;
   }
 

diff --git a/...se-java/src/test/java/io/airbyte/integrations/destination_async/AirbyteFileUtilsTest.java b/...se-java/src/test/java/io/airbyte/integrations/destination_async/AirbyteFileUtilsTest.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination_async;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.junit.jupiter.api.Test;
+
+public class AirbyteFileUtilsTest {
+
+  @Test
+  void testByteCountToDisplaySize() {
+
+    assertEquals("500 bytes", AirbyteFileUtils.byteCountToDisplaySize(500L));
+    assertEquals("1.95 KB", AirbyteFileUtils.byteCountToDisplaySize(2000L));
+    assertEquals("2.93 MB", AirbyteFileUtils.byteCountToDisplaySize(3072000L));
+    assertEquals("2.67 GB", AirbyteFileUtils.byteCountToDisplaySize(2872000000L));
+    assertEquals("1.82 TB", AirbyteFileUtils.byteCountToDisplaySize(2000000000000L));
+  }
+
+}
@@ -2,7 +2,7 @@
   "streams": [
     {
       "stream": {
-        "name": "users",
+        "name": "users_10m",
         "namespace": "PERF_TEST_HARNESS",
         "json_schema": {
           "type": "object",

@@ -2,7 +2,7 @@
   "streams": [
     {
       "stream": {
-        "name": "users",
+        "name": "users_1m",
         "namespace": "PERF_TEST_HARNESS",
         "json_schema": {
           "type": "object",