revert changes in Analyzer + add UT

mgaido91 · mgaido91 · commit 64aafc5fa08d · 2018-10-14T10:51:52.000+02:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -40,18 +40,28 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 /**
- * A trivial [[Analyzer]] with a dummy [[SessionCatalog]] and [[EmptyFunctionRegistry]].
+ * Trivial [[Analyzer]]s with a dummy [[SessionCatalog]] and [[EmptyFunctionRegistry]].
  * Used for testing when all relations are already filled in and the analyzer needs only
  * to resolve attribute references.
  */
-object SimpleAnalyzer extends Analyzer(
+sealed class BaseSimpleAnalyzer(caseSensitive: Boolean) extends Analyzer(
   new SessionCatalog(
     new InMemoryCatalog,
     EmptyFunctionRegistry,
-    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)) {
+    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)) {
     override def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean) {}
   },
-  new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true))
+  new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive))
+
+/**
+ * A trivial analyzer which use case sensitive resolution.
+ */
+object SimpleAnalyzer extends BaseSimpleAnalyzer(true)
+
+/**
+ * A trivial analyzer which use case insensitive resolution.
+ */
+object SimpleCaseInsensitiveAnalyzer extends BaseSimpleAnalyzer(false)
 
 /**
  * Provides a way to keep state during the analysis, this enables us to decouple the concerns
@@ -1189,7 +1199,7 @@ class Analyzer(
 
       case f @ Filter(cond, child) if (!f.resolved || f.missingInput.nonEmpty) && child.resolved =>
         val (newCond, newChild) = resolveExprsAndAddMissingAttrs(Seq(cond), child)
-        if (child.sameOutput(newChild)) {
+        if (child.output == newChild.output) {
           f.copy(condition = newCond.head)
         } else {
           // Add missing attributes and then project them away.
@@ -2087,7 +2097,7 @@ class Analyzer(
       // todo: It's hard to write a general rule to pull out nondeterministic expressions
       // from LogicalPlan, currently we only do it for UnaryNode which has same output
       // schema with its child.
-      case p: UnaryNode if p.sameOutput(p.child) && p.expressions.exists(!_.deterministic) =>
+      case p: UnaryNode if p.output == p.child.output && p.expressions.exists(!_.deterministic) =>
         val nondeterToAttr = getNondeterToAttr(p.expressions)
         val newPlan = p.transformExpressions { case e =>
           nondeterToAttr.get(e).map(_.toAttribute).getOrElse(e)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -401,6 +401,9 @@ package object dsl {
       def analyze: LogicalPlan =
         EliminateSubqueryAliases(analysis.SimpleAnalyzer.execute(logicalPlan))
 
+      def analyzeCaseInsensitive: LogicalPlan =
+        EliminateSubqueryAliases(analysis.SimpleCaseInsensitiveAnalyzer.execute(logicalPlan))
+
       def hint(name: String, parameters: Any*): LogicalPlan =
         UnresolvedHint(name, parameters, logicalPlan)
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
@@ -124,4 +124,11 @@ class RemoveRedundantAliasAndProjectSuite extends PlanTest with PredicateHelper
     val expected = Subquery(relation.select('a as "a", 'b).where('b < 10).select('a).analyze)
     comparePlans(optimized, expected)
   }
+
+  test("SPARK-25691: RemoveRedundantProject works also with different cases") {
+    val relation = LocalRelation('a.int, 'b.int)
+    val query = relation.select('A, 'b).analyzeCaseInsensitive
+    val optimized = Optimize.execute(query)
+    comparePlans(optimized, relation)
+  }
 }

Original file line number	Diff line number	Diff line change
`@@ -401,6 +401,9 @@ package object dsl {`
`401`	`401`	`def analyze: LogicalPlan =`
`402`	`402`	`EliminateSubqueryAliases(analysis.SimpleAnalyzer.execute(logicalPlan))`
`403`	`403`
	`404`	`+ def analyzeCaseInsensitive: LogicalPlan =`
	`405`	`+ EliminateSubqueryAliases(analysis.SimpleCaseInsensitiveAnalyzer.execute(logicalPlan))`
	`406`	`+`
`404`	`407`	`def hint(name: String, parameters: Any*): LogicalPlan =`
`405`	`408`	`UnresolvedHint(name, parameters, logicalPlan)`
`406`	`409`	`}`
Original file line number	Diff line number	Diff line change
`@@ -124,4 +124,11 @@ class RemoveRedundantAliasAndProjectSuite extends PlanTest with PredicateHelper`
`124`	`124`	`val expected = Subquery(relation.select('a as "a", 'b).where('b < 10).select('a).analyze)`
`125`	`125`	`comparePlans(optimized, expected)`
`126`	`126`	`}`
	`127`	`+`
	`128`	`+ test("SPARK-25691: RemoveRedundantProject works also with different cases") {`
	`129`	`+ val relation = LocalRelation('a.int, 'b.int)`
	`130`	`+ val query = relation.select('A, 'b).analyzeCaseInsensitive`
	`131`	`+ val optimized = Optimize.execute(query)`
	`132`	`+ comparePlans(optimized, relation)`
	`133`	`+ }`
`127`	`134`	`}`