Skip to content

Commit

Permalink
[SPARK-21344][SQL] BinaryType comparison does signed byte array compa…
Browse files Browse the repository at this point in the history
…rison

## What changes were proposed in this pull request?

This PR fixes a wrong comparison for `BinaryType`. This PR enables unsigned comparison and unsigned prefix generation for an array for `BinaryType`. Previous implementations uses signed operations.

## How was this patch tested?

Added a test suite in `OrderingSuite`.

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #18571 from kiszk/SPARK-21344.

(cherry picked from commit ac5d5d7)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
  • Loading branch information
kiszk authored and gatorsmile committed Jul 15, 2017
1 parent 4229e16 commit 1afded0
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public static long getPrefix(byte[] bytes) {
final int minLen = Math.min(bytes.length, 8);
long p = 0;
for (int i = 0; i < minLen; ++i) {
p |= (128L + Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i))
p |= ((long) Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i) & 0xff)
<< (56 - 8 * i);
}
return p;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {

def compareBinary(x: Array[Byte], y: Array[Byte]): Int = {
for (i <- 0 until x.length; if i < y.length) {
val res = x(i).compare(y(i))
val v1 = x(i) & 0xff
val v2 = y(i) & 0xff
val res = v1 - v2
if (res != 0) return res
}
x.length - y.length
Expand All @@ -79,6 +81,19 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
(prefixComparisonResult > 0 && compareBinary(x, y) > 0))
}

val binaryRegressionTests = Seq(
(Array[Byte](1), Array[Byte](-1)),
(Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, -1)),
(Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, -1)),
(Array[Byte](1), Array[Byte](1, 1, 1, 1)),
(Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1)),
(Array[Byte](-1), Array[Byte](-1, -1, -1, -1)),
(Array[Byte](-1, -1, -1, -1, -1), Array[Byte](-1, -1, -1, -1, -1, -1, -1, -1, -1))
)
binaryRegressionTests.foreach { case (b1, b2) =>
testPrefixComparison(b1, b2)
}

// scalastyle:off
val regressionTests = Table(
("s1", "s2"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ object TypeUtils {

def compareBinary(x: Array[Byte], y: Array[Byte]): Int = {
for (i <- 0 until x.length; if i < y.length) {
val res = x(i).compareTo(y(i))
val v1 = x(i) & 0xff
val v2 = y(i) & 0xff
val res = v1 - v2
if (res != 0) return res
}
x.length - y.length
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,4 +137,23 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper {
// verify that we can support up to 5000 ordering comparisons, which should be sufficient
GenerateOrdering.generate(Array.fill(5000)(sortOrder))
}

test("SPARK-21344: BinaryType comparison does signed byte array comparison") {
val data = Seq(
(Array[Byte](1), Array[Byte](-1)),
(Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, -1)),
(Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, -1))
)
data.foreach { case (b1, b2) =>
val rowOrdering = InterpretedOrdering.forSchema(Seq(BinaryType))
val genOrdering = GenerateOrdering.generate(
BoundReference(0, BinaryType, nullable = true).asc :: Nil)
val rowType = StructType(StructField("b", BinaryType, nullable = true) :: Nil)
val toCatalyst = CatalystTypeConverters.createToCatalystConverter(rowType)
val rowB1 = toCatalyst(Row(b1)).asInstanceOf[InternalRow]
val rowB2 = toCatalyst(Row(b2)).asInstanceOf[InternalRow]
assert(rowOrdering.compare(rowB1, rowB2) < 0)
assert(genOrdering.compare(rowB1, rowB2) < 0)
}
}
}
3 changes: 3 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/comparator.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- binary type
select x'00' < x'0f';
select x'00' < x'ff';
18 changes: 18 additions & 0 deletions sql/core/src/test/resources/sql-tests/results/comparator.sql.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 2


-- !query 0
select x'00' < x'0f'
-- !query 0 schema
struct<(X'00' < X'0F'):boolean>
-- !query 0 output
true


-- !query 1
select x'00' < x'ff'
-- !query 1 schema
struct<(X'00' < X'FF'):boolean>
-- !query 1 output
true

0 comments on commit 1afded0

Please sign in to comment.