microsoft
diff --git a/‎src/main/scala/com/microsoft/hyperspace/actions/CreateAction.scala‎
Lines changed: 6 additions & 18 deletions b/‎src/main/scala/com/microsoft/hyperspace/actions/CreateAction.scala‎
Lines changed: 6 additions & 18 deletions
diff --git a/‎src/main/scala/com/microsoft/hyperspace/actions/CreateActionBase.scala‎
Lines changed: 17 additions & 20 deletions b/‎src/main/scala/com/microsoft/hyperspace/actions/CreateActionBase.scala‎
Lines changed: 17 additions & 20 deletions
diff --git a/‎src/main/scala/com/microsoft/hyperspace/actions/RefreshActionBase.scala‎
Lines changed: 5 additions & 1 deletion b/‎src/main/scala/com/microsoft/hyperspace/actions/RefreshActionBase.scala‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/main/scala/com/microsoft/hyperspace/actions/RefreshIncrementalAction.scala‎
Lines changed: 2 additions & 1 deletion b/‎src/main/scala/com/microsoft/hyperspace/actions/RefreshIncrementalAction.scala‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/main/scala/com/microsoft/hyperspace/index/rules/JoinIndexRule.scala‎
Lines changed: 2 additions & 2 deletions b/‎src/main/scala/com/microsoft/hyperspace/index/rules/JoinIndexRule.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/main/scala/com/microsoft/hyperspace/util/ResolverUtils.scala‎
Lines changed: 69 additions & 0 deletions b/‎src/main/scala/com/microsoft/hyperspace/util/ResolverUtils.scala‎
Lines changed: 69 additions & 0 deletions
@@ -19,13 +19,12 @@ package com.microsoft.hyperspace.actions
 import scala.util.Try
 
 import org.apache.spark.sql.{DataFrame, SparkSession}
-import org.apache.spark.sql.types.StructType
 
 import com.microsoft.hyperspace.{Hyperspace, HyperspaceException}
 import com.microsoft.hyperspace.actions.Constants.States.{ACTIVE, CREATING, DOESNOTEXIST}
 import com.microsoft.hyperspace.index._
 import com.microsoft.hyperspace.telemetry.{AppInfo, CreateActionEvent, HyperspaceEvent}
-import com.microsoft.hyperspace.util.{ResolverUtils, SchemaUtils}
+import com.microsoft.hyperspace.util.ResolverUtils
 
 class CreateAction(
     spark: SparkSession,
@@ -50,7 +49,7 @@ class CreateAction(
     }
 
     // schema validity checks
-    if (!isValidIndexSchema(indexConfig, df.schema)) {
+    if (!isValidIndexSchema(indexConfig, df)) {
       throw HyperspaceException("Index config is not applicable to dataframe schema.")
     }
 
@@ -64,24 +63,13 @@ class CreateAction(
     }
   }
 
-  private def isValidIndexSchema(config: IndexConfig, schema: StructType): Boolean = {
-    // First we flatten the schema. Instead of having struct of leaves
-    // the flatten method will return a list of field names.
-    // The second step is escaping the field names as there are some problems when
-    // using field names with dots. One is `partitionBy` does not works well
-    // with field names that contains the `.` (dot). See more on this Apache Spark
-    // ticket: https://issues.apache.org/jira/browse/SPARK-18084. Other is doing
-    // encountering  `AnalysisException: Cannot resolve column name...` exceptions.
-    // So, given `struct(nested, struct(nst, struct(field1)))`, the fields variable
-    // will contain `Seq("nested__nst__field1")`.
-    val fields = SchemaUtils.escapeFieldNames(SchemaUtils.flatten(schema))
-    // Resolve index config columns from available column names present in the schema.
+  private def isValidIndexSchema(config: IndexConfig, dataFrame: DataFrame): Boolean = {
+    // Resolve index config columns from available column names present in the dataframe.
     ResolverUtils
       .resolve(
         spark,
-        SchemaUtils.escapeFieldNames(config.indexedColumns)
-            ++ SchemaUtils.escapeFieldNames(config.includedColumns),
-        fields)
+        config.indexedColumns ++ config.includedColumns,
+        dataFrame.queryExecution.analyzed)
       .isDefined
   }
 
 
@@ -103,7 +103,7 @@ private[actions] abstract class CreateActionBase(dataManager: IndexDataManager)
 
     // run job
     val repartitionedIndexDataFrame =
-      indexDataFrame.repartition(numBuckets, resolvedIndexedColumns.map(c => col(s"$c")): _*)
+      indexDataFrame.repartition(numBuckets, resolvedIndexedColumns.map(c => col(s"`$c`")): _*)
 
     // Save the index with the number of buckets specified.
     repartitionedIndexDataFrame.write
@@ -144,7 +144,8 @@ private[actions] abstract class CreateActionBase(dataManager: IndexDataManager)
   }
 
   private def usesNestedFieldsProperty(indexConfig: IndexConfig): Option[(String, String)] = {
-    if (SchemaUtils.hasNestedFields(indexConfig.indexedColumns ++ indexConfig.includedColumns)) {
+    if (SchemaUtils.containsNestedFieldNames(indexConfig.indexedColumns ++
+          indexConfig.includedColumns)) {
       Some(IndexConstants.USES_NESTED_FIELDS_PROPERTY -> "true")
     } else {
       None
@@ -155,26 +156,22 @@ private[actions] abstract class CreateActionBase(dataManager: IndexDataManager)
       df: DataFrame,
       indexConfig: IndexConfig): (Seq[String], Seq[String]) = {
     val spark = df.sparkSession
-    // Flatten will transform nested field names from `struct(nested, struct(nst, struct(field1)))`
-    // into `Seq("nested.nst.field1")`.
-    val dfColumnNames = SchemaUtils.flatten(df.schema)
-    // The index config will contain the field names as they are given with `.` (dots).
-    // (ie: `Seq("nested.nst.field")`) and they need to be escaped to `nested__nst__field1` as
-    // the index entry know to work with escaped values only.
-    val indexedColumns = SchemaUtils.unescapeFieldNames(indexConfig.indexedColumns)
-    val includedColumns = SchemaUtils.unescapeFieldNames(indexConfig.includedColumns)
-    val resolvedIndexedColumns = ResolverUtils.resolve(spark, indexedColumns, dfColumnNames)
-    val resolvedIncludedColumns = ResolverUtils.resolve(spark, includedColumns, dfColumnNames)
+    val plan = df.queryExecution.analyzed
+    val indexedColumns = indexConfig.indexedColumns
+    val includedColumns = indexConfig.includedColumns
+    val resolvedIndexedColumns = ResolverUtils.resolve(spark, indexedColumns, plan)
+    val resolvedIncludedColumns = ResolverUtils.resolve(spark, includedColumns, plan)
 
     (resolvedIndexedColumns, resolvedIncludedColumns) match {
-      case (Some(indexed), Some(included)) => (indexed, included)
+      case (Some(indexed), Some(included)) =>
+        (indexed, included)
       case _ =>
         val unresolvedColumns = (indexedColumns ++ includedColumns)
-          .map(c => (c, ResolverUtils.resolve(spark, c, dfColumnNames)))
+          .map(c => (c, ResolverUtils.resolve(spark, Seq(c), plan)))
           .collect { case c if c._2.isEmpty => c._1 }
         throw HyperspaceException(
           s"Columns '${unresolvedColumns.mkString(",")}' could not be resolved " +
-            s"from available source columns '${dfColumnNames.mkString(",")}'")
+            s"from available source columns:\n${df.schema.treeString}")
     }
   }
 
@@ -218,15 +215,15 @@ private[actions] abstract class CreateActionBase(dataManager: IndexDataManager)
         .select(
           allIndexColumns.head,
           allIndexColumns.tail :+ IndexConstants.DATA_FILE_NAME_ID: _*)
-        .toDF(
-          SchemaUtils.escapeFieldNames(allIndexColumns) :+ IndexConstants.DATA_FILE_NAME_ID: _*)
+        .toDF(SchemaUtils.prefixNestedFieldNames(allIndexColumns) :+
+          IndexConstants.DATA_FILE_NAME_ID: _*)
     } else {
       df.select(columnsFromIndexConfig.head, columnsFromIndexConfig.tail: _*)
-        .toDF(SchemaUtils.escapeFieldNames(columnsFromIndexConfig): _*)
+        .toDF(SchemaUtils.prefixNestedFieldNames(columnsFromIndexConfig): _*)
     }
 
-    val escapedIndexedColumns = SchemaUtils.escapeFieldNames(resolvedIndexedColumns)
-    val escapedIncludedColumns = SchemaUtils.escapeFieldNames(resolvedIncludedColumns)
+    val escapedIndexedColumns = SchemaUtils.prefixNestedFieldNames(resolvedIndexedColumns)
+    val escapedIncludedColumns = SchemaUtils.prefixNestedFieldNames(resolvedIncludedColumns)
 
     (indexDF, escapedIndexedColumns, escapedIncludedColumns)
   }
 
@@ -22,6 +22,7 @@ import org.apache.spark.sql.types.{DataType, StructType}
 import com.microsoft.hyperspace.{Hyperspace, HyperspaceException}
 import com.microsoft.hyperspace.actions.Constants.States.{ACTIVE, REFRESHING}
 import com.microsoft.hyperspace.index._
+import com.microsoft.hyperspace.util.SchemaUtils
 
 /**
  * Base abstract class containing common code for different types of index refresh actions.
@@ -86,7 +87,10 @@ private[actions] abstract class RefreshActionBase(
 
   protected lazy val indexConfig: IndexConfig = {
     val ddColumns = previousIndexLogEntry.derivedDataset.properties.columns
-    IndexConfig(previousIndexLogEntry.name, ddColumns.indexed, ddColumns.included)
+    IndexConfig(
+      previousIndexLogEntry.name,
+      SchemaUtils.removePrefixNestedFieldNames(ddColumns.indexed),
+      SchemaUtils.removePrefixNestedFieldNames(ddColumns.included))
   }
 
   final override val transientState: String = REFRESHING
 
@@ -23,6 +23,7 @@ import com.microsoft.hyperspace.{Hyperspace, HyperspaceException}
 import com.microsoft.hyperspace.index._
 import com.microsoft.hyperspace.index.DataFrameWriterExtensions.Bucketizer
 import com.microsoft.hyperspace.telemetry.{AppInfo, HyperspaceEvent, RefreshIncrementalActionEvent}
+import com.microsoft.hyperspace.util.SchemaUtils
 
 /**
  * Action to refresh indexes with newly appended files and deleted files in an incremental way.
@@ -90,7 +91,7 @@ class RefreshIncrementalAction(
         refreshDF,
         indexDataPath.toString,
         previousIndexLogEntry.numBuckets,
-        indexConfig.indexedColumns,
+        SchemaUtils.prefixNestedFieldNames(indexConfig.indexedColumns),
         writeMode)
     }
   }
 
@@ -305,8 +305,8 @@ object JoinIndexRule
     val rRequiredIndexedCols = lRMap.values.toSeq
 
     // All required columns resolved with base relation.
-    val lRequiredAllCols = resolve(spark, allRequiredCols(left), lBaseAttrs).get
-    val rRequiredAllCols = resolve(spark, allRequiredCols(right), rBaseAttrs).get
+    val lRequiredAllCols = resolve(spark, allRequiredCols(left), leftRelation.plan).get
+    val rRequiredAllCols = resolve(spark, allRequiredCols(right), rightRelation.plan).get
 
     // Make sure required indexed columns are subset of all required columns for a subplan
     require(resolve(spark, lRequiredIndexedCols, lRequiredAllCols).isDefined)
 
@@ -18,6 +18,11 @@ package com.microsoft.hyperspace.util
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.analysis.Resolver
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, GetStructField}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.types.StructType
+
+import com.microsoft.hyperspace.HyperspaceException
 
 /**
  * [[ResolverUtils]] provides utility functions to resolve strings based on spark's resolver.
@@ -71,4 +76,68 @@ object ResolverUtils {
       availableStrings: Seq[String]): Option[Seq[String]] = {
     Some(requiredStrings.map(resolve(spark, _, availableStrings).getOrElse { return None }))
   }
+
+  /**
+   * Finds all resolved strings for requiredStrings, from the given logical plan. Returns
+   * optional seq of resolved strings if all required strings are resolved, otherwise None.
+   *
+   * @param spark Spark session.
+   * @param requiredStrings List of strings to resolve.
+   * @param plan Logical plan to resolve against.
+   * @return Optional Seq of resolved strings if all required strings are resolved. Else, None.
+   */
+  def resolve(
+      spark: SparkSession,
+      requiredStrings: Seq[String],
+      plan: LogicalPlan): Option[Seq[String]] = {
+
+    def fixParts(parts: Seq[String], outputs: Seq[Attribute]): Seq[String] = {
+      var newParts = Seq.empty[String]
+      val h :: t = parts.toList
+      val topLevelField = outputs.find(_.name.compareToIgnoreCase(h) == 0)
+      topLevelField match {
+        case Some(o) =>
+          newParts = newParts :+ o.name
+          var children = o.dataType.asInstanceOf[StructType]
+          t.foreach { e =>
+            val elIdx: Int = children.fieldNames.indexWhere { f => f.compareToIgnoreCase(e) == 0 }
+            if (elIdx >= 0) {
+              newParts = newParts :+ children.fieldNames(elIdx)
+              children.fields.toSeq(elIdx).dataType match {
+                case s: StructType =>
+                  children = s
+                case _ =>
+              }
+            }
+          }
+        case None =>
+          newParts = h +: t
+      }
+      newParts
+    }
+
+    val schemaFieldNames = plan.output.map(_.name)
+    Some(requiredStrings.map { requiredField =>
+      plan
+        .resolveQuoted(requiredField, spark.sessionState.conf.resolver)
+        .getOrElse { return None }
+        .collectFirst {
+          case a: AttributeReference =>
+            schemaFieldNames.find(_.compareToIgnoreCase(a.name) == 0).getOrElse(a.name)
+          case g: GetStructField =>
+            val parts = g.sql // returns backtick enclosed string (ie: `a`.`b`.`c`)
+              .replaceAll("^`(.*)`$", "$1") // strip first and last backticks
+              .split("`\\.`") // split by "`.`"
+            val fixedParts = fixParts(parts, plan.output) // fix the casing
+            val resolvedFieldName = fixedParts.mkString(".") // put back the field name fixed
+            val unsupported = parts.filter(_.contains("."))
+            if (unsupported.nonEmpty) { // the fields with dots should throw exception
+              throw HyperspaceException(s"The following field name construct " +
+                s"$resolvedFieldName contains unsupported parts: ${unsupported.mkString(",")}.")
+            }
+            resolvedFieldName
+        }
+        .getOrElse { return None }
+    })
+  }
 }