Skip to content

Commit

Permalink
Spark 3.3: Support truncate in FunctionCatalog (apache#5431)
Browse files Browse the repository at this point in the history
  • Loading branch information
kbendick authored Aug 12, 2022
1 parent 5a15efc commit 6a5051b
Show file tree
Hide file tree
Showing 7 changed files with 959 additions and 16 deletions.
21 changes: 6 additions & 15 deletions api/src/main/java/org/apache/iceberg/transforms/Truncate.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import org.apache.iceberg.relocated.com.google.common.base.Objects;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.util.BinaryUtil;
import org.apache.iceberg.util.TruncateUtil;
import org.apache.iceberg.util.UnicodeUtil;

abstract class Truncate<T> implements Transform<T, T> {
Expand Down Expand Up @@ -87,7 +89,7 @@ public Integer apply(Integer value) {
return null;
}

return value - (((value % width) + width) % width);
return TruncateUtil.truncateInt(width, value);
}

@Override
Expand Down Expand Up @@ -171,7 +173,7 @@ public Long apply(Long value) {
return null;
}

return value - (((value % width) + width) % width);
return TruncateUtil.truncateLong(width, value);
}

@Override
Expand Down Expand Up @@ -391,9 +393,7 @@ public ByteBuffer apply(ByteBuffer value) {
return null;
}

ByteBuffer ret = value.duplicate();
ret.limit(Math.min(value.limit(), value.position() + length));
return ret;
return BinaryUtil.truncateBinaryUnsafe(value, length);
}

@Override
Expand Down Expand Up @@ -480,16 +480,7 @@ public BigDecimal apply(BigDecimal value) {
return null;
}

BigDecimal remainder =
new BigDecimal(
value
.unscaledValue()
.remainder(unscaledWidth)
.add(unscaledWidth)
.remainder(unscaledWidth),
value.scale());

return value.subtract(remainder);
return TruncateUtil.truncateDecimal(unscaledWidth, value);
}

@Override
Expand Down
14 changes: 14 additions & 0 deletions api/src/main/java/org/apache/iceberg/util/BinaryUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,20 @@ public static ByteBuffer truncateBinary(ByteBuffer input, int length) {
return ByteBuffer.wrap(array);
}

/**
* Truncates the input byte buffer to the given length.
*
* <p>Unlike {@linkplain #truncateBinary(ByteBuffer, int)}, this skips copying the input data.
*
* @param value The ByteBuffer to be truncated
* @param width The non-negative length to truncate input to
*/
public static ByteBuffer truncateBinaryUnsafe(ByteBuffer value, int width) {
ByteBuffer ret = value.duplicate();
ret.limit(Math.min(value.limit(), value.position() + width));
return ret;
}

/**
* Returns a byte buffer whose length is lesser than or equal to truncateLength and is lower than
* the given input
Expand Down
75 changes: 75 additions & 0 deletions api/src/main/java/org/apache/iceberg/util/TruncateUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.util;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;

/**
* Contains the logic for various {@code truncate} transformations for various types.
*
* <p>This utility class allows for the logic to be reused in different scenarios where input
* validation is done at different times either in org.apache.iceberg.transforms.Truncate and within
* defined SQL functions for different compute engines for usage in SQL.
*
* <p>In general, the inputs to the functions should have already been validated by the calling
* code, as different classes use truncate with different preprocessing. This generally means that
* the truncation width is positive and the value to truncate is non-null.
*
* <p>Thus, <b>none</b> of these utility functions validate their input. <i>It is the responsibility
* of the calling code to validate input.</i>
*
* <p>See also {@linkplain UnicodeUtil#truncateString(CharSequence, int)} and {@link
* BinaryUtil#truncateBinaryUnsafe(ByteBuffer, int)} for similar methods for Strings and
* ByteBuffers.
*/
public class TruncateUtil {

private TruncateUtil() {}

public static byte truncateByte(int width, byte value) {
return (byte) (value - (((value % width) + width) % width));
}

public static short truncateShort(int width, short value) {
return (short) (value - (((value % width) + width) % width));
}

public static int truncateInt(int width, int value) {
return value - (((value % width) + width) % width);
}

public static long truncateLong(int width, long value) {
return value - (((value % width) + width) % width);
}

public static BigDecimal truncateDecimal(BigInteger unscaledWidth, BigDecimal value) {
BigDecimal remainder =
new BigDecimal(
value
.unscaledValue()
.remainder(unscaledWidth)
.add(unscaledWidth)
.remainder(unscaledWidth),
value.scale());

return value.subtract(remainder);
}
}
35 changes: 35 additions & 0 deletions core/src/test/java/org/apache/iceberg/util/TestTruncateUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.util;

import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.Test;

class TestTruncateUtil {
@Test
public void testInvalidInputWidthBehavior() {
Assertions.assertThatNoException()
.as("Invalid width input shouldn't necessarily throw an exception as it's not validated")
.isThrownBy(() -> TruncateUtil.truncateInt(-1, 100));

Assertions.assertThatException()
.as("Invalid width input can possibly throw an exception")
.isThrownBy(() -> TruncateUtil.truncateInt(0, 100));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ public class SparkFunctions {
private SparkFunctions() {}

private static final Map<String, UnboundFunction> FUNCTIONS =
ImmutableMap.of("iceberg_version", new IcebergVersionFunction());
ImmutableMap.of(
"iceberg_version", new IcebergVersionFunction(),
"truncate", new TruncateFunction());

private static final List<String> FUNCTION_NAMES = ImmutableList.copyOf(FUNCTIONS.keySet());

Expand Down
Loading

0 comments on commit 6a5051b

Please sign in to comment.