pingcap · birdstorm · May 31, 2019 · May 31, 2019 · May 31, 2019 · May 31, 2019
diff --git a/core/src/main/scala/com/pingcap/tispark/utils/TiUtil.scala b/core/src/main/scala/com/pingcap/tispark/utils/TiUtil.scala
@@ -17,6 +17,7 @@ package com.pingcap.tispark.utils
 
 import java.util.concurrent.TimeUnit
 
+import com.google.common.primitives.UnsignedLong
 import com.pingcap.tikv.TiConfiguration
 import com.pingcap.tikv.expression.ExpressionBlacklist
 import com.pingcap.tikv.expression.visitor.{MetaResolver, SupportedExpressionValidator}
@@ -32,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, Literal, NamedExpression}
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.aggregate.SortAggregateExec
-import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType}
+import org.apache.spark.sql.types.{Decimal, MetadataBuilder, StructField, StructType}
 import org.tikv.kvproto.Kvrpcpb.{CommandPri, IsolationLevel}
 
 import scala.collection.JavaConversions._
@@ -116,7 +117,18 @@ object TiUtil {
     val rowArray = new Array[Any](finalTypes.size)
 
     for (i <- 0 until transRow.fieldCount) {
-      rowArray(i) = transRow.get(i, finalTypes(i))
+      val colTp = finalTypes(i)
+      val isBigInt = colTp.getType.equals(MySQLType.TypeLonglong)
+      val isUnsigned = colTp.isUnsigned
+      val tmp = transRow.get(i, finalTypes(i))
+      rowArray(i) = if (isBigInt && isUnsigned) {
+        tmp match {
+          case l: java.lang.Long => Decimal.apply(UnsignedLong.fromLongBits(l).bigIntegerValue())
+          case _                 => tmp
+        }
+      } else {
+        tmp
+      }
     }
 
     Row.fromSeq(rowArray)

diff --git a/core/src/test/scala/org/apache/spark/sql/IssueTestSuite.scala b/core/src/test/scala/org/apache/spark/sql/IssueTestSuite.scala
@@ -63,7 +63,6 @@ class IssueTestSuite extends BaseTiSparkTest {
     tidbStmt.execute(
       "insert into t values(1),(2),(3),(4),(null)"
     )
-    refreshConnections()
 
     assert(spark.sql("select * from t limit 10").count() == 5)
     assert(spark.sql("select a from t limit 10").count() == 5)
@@ -85,7 +84,6 @@ class IssueTestSuite extends BaseTiSparkTest {
     tidbStmt.execute(
       "insert into t values(1,771.64),(2,378.49),(3,920.92),(4,113.97)"
     )
-    refreshConnections()
 
     assert(try {
       judge("select a, max(b) from t group by a limit 2")
@@ -133,7 +131,6 @@ class IssueTestSuite extends BaseTiSparkTest {
     tidbStmt.execute(
       "insert into single_read values(1, 1, 1, 2, null), (1, 2, 1, 1, null), (2, 1, 3, 2, null), (2, 2, 2, 1, 0)"
     )
-    refreshConnections()
 
     judge("select count(1) from single_read")
     judge("select count(c1) from single_read")
@@ -163,7 +160,6 @@ class IssueTestSuite extends BaseTiSparkTest {
     )
     tidbStmt.execute("insert into t1 values(1, 201707, 'aa'), (2, 201707, 'aa')")
     tidbStmt.execute("insert into t2 values(2, 201707, 'aa')")
-    refreshConnections()
 
     // Note: Left outer join for DataSet is different from that in mysql.
     // The result of DataSet[a, b, c] left outer join DataSet[d, b, c]
@@ -191,7 +187,6 @@ class IssueTestSuite extends BaseTiSparkTest {
     tidbStmt.execute(
       "INSERT INTO `tmp_debug` VALUES ('0000-00-00 00:00:00','0000-00-00','0000-00-00 00:00:00')"
     )
-    refreshConnections()
     spark.sql("select * from tmp_debug").collect()
   }
 
@@ -231,15 +226,14 @@ class IssueTestSuite extends BaseTiSparkTest {
     tidbStmt.execute("insert into t values(1)")
     tidbStmt.execute("insert into t values(2)")
     tidbStmt.execute("insert into t values(4)")
-    refreshConnections() // refresh since we need to load data again
     judge("select count(c1) from t")
     judge("select count(c1 + 1) from t")
     judge("select count(1 + c1) from t")
     tidbStmt.execute("drop table if exists t")
     tidbStmt.execute("create table t(c1 int not null, c2 int not null)")
     tidbStmt.execute("insert into t values(1, 4)")
     tidbStmt.execute("insert into t values(2, 2)")
-    refreshConnections()
+    ti.meta.reloadAllMeta()
     judge("select count(c1 + c2) from t")
   }
 
@@ -249,7 +243,6 @@ class IssueTestSuite extends BaseTiSparkTest {
     tidbStmt.execute(
       "CREATE TABLE `tmp_empty_tbl` (`c1` varchar(20))"
     )
-    refreshConnections()
     judge("select count(1) from `tmp_empty_tbl`")
     judge("select cast(count(1) as char(20)) from `tmp_empty_tbl`")
   }
@@ -274,6 +267,28 @@ class IssueTestSuite extends BaseTiSparkTest {
     )
   }
 
+  test("unsigned bigint as group by column") {
+    def explainTestAndCollect(sql: String): Unit = {
+      val df = spark.sql(sql)
+      df.explain
+      df.show
+      df.collect.foreach(println)
+    }
+    tidbStmt.execute("drop table if exists table_group_by_bigint")
+    tidbStmt.execute("""
+                       |CREATE TABLE `table_group_by_bigint` (
+                       |  `a` int(11) NOT NULL,
+                       |  `b` bigint(20) UNSIGNED DEFAULT NULL
+                       |) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin
+      """.stripMargin)
+    tidbStmt.execute(
+      "insert into table_group_by_bigint values(1, 2), (2, 18446744073709551615), (3, 18446744073709551615), (4, 18446744073709551614)"
+    )
+    explainTestAndCollect(
+      "select sum(a) from table_group_by_bigint group by b"
+    )
+  }
+
   override def afterAll(): Unit =
     try {
       tidbStmt.execute("drop table if exists t")
@@ -283,6 +298,7 @@ class IssueTestSuite extends BaseTiSparkTest {
       tidbStmt.execute("drop table if exists single_read")
       tidbStmt.execute("drop table if exists set_t")
       tidbStmt.execute("drop table if exists enum_t")
+      tidbStmt.execute("drop table if exists table_group_by_bigint")
     } finally {
       super.afterAll()
     }