Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix unsigned bigint in group by columns #780

Merged
merged 5 commits into from
May 31, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions core/src/main/scala/com/pingcap/tispark/utils/TiUtil.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package com.pingcap.tispark.utils

import java.util.concurrent.TimeUnit

import com.google.common.primitives.UnsignedLong
import com.pingcap.tikv.TiConfiguration
import com.pingcap.tikv.expression.ExpressionBlacklist
import com.pingcap.tikv.expression.visitor.{MetaResolver, SupportedExpressionValidator}
Expand All @@ -32,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, Literal, NamedExpression}
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.execution.aggregate.SortAggregateExec
import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType}
import org.apache.spark.sql.types.{Decimal, MetadataBuilder, StructField, StructType}
import org.tikv.kvproto.Kvrpcpb.{CommandPri, IsolationLevel}

import scala.collection.JavaConversions._
Expand Down Expand Up @@ -116,7 +117,18 @@ object TiUtil {
val rowArray = new Array[Any](finalTypes.size)

for (i <- 0 until transRow.fieldCount) {
rowArray(i) = transRow.get(i, finalTypes(i))
val colTp = finalTypes(i)
val isBigInt = colTp.getType.equals(MySQLType.TypeLonglong)
val isUnsigned = colTp.isUnsigned
val tmp = transRow.get(i, finalTypes(i))
rowArray(i) = if (isBigInt && isUnsigned) {
tmp match {
case l: java.lang.Long => Decimal.apply(UnsignedLong.fromLongBits(l).bigIntegerValue())
case _ => tmp
}
} else {
tmp
}
}

Row.fromSeq(rowArray)
Expand Down
32 changes: 24 additions & 8 deletions core/src/test/scala/org/apache/spark/sql/IssueTestSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ class IssueTestSuite extends BaseTiSparkTest {
tidbStmt.execute(
"insert into t values(1),(2),(3),(4),(null)"
)
refreshConnections()

assert(spark.sql("select * from t limit 10").count() == 5)
assert(spark.sql("select a from t limit 10").count() == 5)
Expand All @@ -85,7 +84,6 @@ class IssueTestSuite extends BaseTiSparkTest {
tidbStmt.execute(
"insert into t values(1,771.64),(2,378.49),(3,920.92),(4,113.97)"
)
refreshConnections()

assert(try {
judge("select a, max(b) from t group by a limit 2")
Expand Down Expand Up @@ -133,7 +131,6 @@ class IssueTestSuite extends BaseTiSparkTest {
tidbStmt.execute(
"insert into single_read values(1, 1, 1, 2, null), (1, 2, 1, 1, null), (2, 1, 3, 2, null), (2, 2, 2, 1, 0)"
)
refreshConnections()

judge("select count(1) from single_read")
judge("select count(c1) from single_read")
Expand Down Expand Up @@ -163,7 +160,6 @@ class IssueTestSuite extends BaseTiSparkTest {
)
tidbStmt.execute("insert into t1 values(1, 201707, 'aa'), (2, 201707, 'aa')")
tidbStmt.execute("insert into t2 values(2, 201707, 'aa')")
refreshConnections()

// Note: Left outer join for DataSet is different from that in mysql.
// The result of DataSet[a, b, c] left outer join DataSet[d, b, c]
Expand Down Expand Up @@ -191,7 +187,6 @@ class IssueTestSuite extends BaseTiSparkTest {
tidbStmt.execute(
"INSERT INTO `tmp_debug` VALUES ('0000-00-00 00:00:00','0000-00-00','0000-00-00 00:00:00')"
)
refreshConnections()
spark.sql("select * from tmp_debug").collect()
}

Expand Down Expand Up @@ -231,15 +226,14 @@ class IssueTestSuite extends BaseTiSparkTest {
tidbStmt.execute("insert into t values(1)")
tidbStmt.execute("insert into t values(2)")
tidbStmt.execute("insert into t values(4)")
refreshConnections() // refresh since we need to load data again
judge("select count(c1) from t")
judge("select count(c1 + 1) from t")
judge("select count(1 + c1) from t")
tidbStmt.execute("drop table if exists t")
tidbStmt.execute("create table t(c1 int not null, c2 int not null)")
tidbStmt.execute("insert into t values(1, 4)")
tidbStmt.execute("insert into t values(2, 2)")
refreshConnections()
ti.meta.reloadAllMeta()
judge("select count(c1 + c2) from t")
}

Expand All @@ -249,7 +243,6 @@ class IssueTestSuite extends BaseTiSparkTest {
tidbStmt.execute(
"CREATE TABLE `tmp_empty_tbl` (`c1` varchar(20))"
)
refreshConnections()
judge("select count(1) from `tmp_empty_tbl`")
judge("select cast(count(1) as char(20)) from `tmp_empty_tbl`")
}
Expand All @@ -274,6 +267,28 @@ class IssueTestSuite extends BaseTiSparkTest {
)
}

test("unsigned bigint as group by column") {
def explainTestAndCollect(sql: String): Unit = {
val df = spark.sql(sql)
df.explain
df.show
df.collect.foreach(println)
}
tidbStmt.execute("drop table if exists table_group_by_bigint")
tidbStmt.execute("""
|CREATE TABLE `table_group_by_bigint` (
| `a` int(11) NOT NULL,
| `b` bigint(20) UNSIGNED DEFAULT NULL
|) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin
""".stripMargin)
tidbStmt.execute(
"insert into table_group_by_bigint values(1, 2), (2, 18446744073709551615), (3, 18446744073709551615), (4, 18446744073709551614)"
)
explainTestAndCollect(
"select sum(a) from table_group_by_bigint group by b"
)
}

override def afterAll(): Unit =
try {
tidbStmt.execute("drop table if exists t")
Expand All @@ -283,6 +298,7 @@ class IssueTestSuite extends BaseTiSparkTest {
tidbStmt.execute("drop table if exists single_read")
tidbStmt.execute("drop table if exists set_t")
tidbStmt.execute("drop table if exists enum_t")
tidbStmt.execute("drop table if exists table_group_by_bigint")
} finally {
super.afterAll()
}
Expand Down