Integrate feedback (2)

andrei-ionescu · andrei-ionescu · commit 8f02f54e3fb8 · 2021-03-15T12:28:49.000+02:00
diff --git a/src/main/scala/com/microsoft/hyperspace/util/SchemaUtils.scala b/src/main/scala/com/microsoft/hyperspace/util/SchemaUtils.scala
@@ -21,7 +21,8 @@ import org.apache.spark.sql.types.{ArrayType, MapType, StructField, StructType}
 object SchemaUtils {
 
   val BACKTICK_MARKER_REGEX = "^`(.*)`$"
-  val NESTED_FIELD_NEEDLE_REGEX = "\\."
+  val NESTED_FIELD_NEEDLE = "."
+  val NESTED_FIELD_NEEDLE_REGEX = s"\\$NESTED_FIELD_NEEDLE"
   val NESTED_FIELD_REPLACEMENT = "__"
 
   /**
@@ -70,7 +71,8 @@ object SchemaUtils {
         flatten(keys, Some(prefix.map(o => s"$o.$name").getOrElse(name))) ++
           flatten(values, Some(prefix.map(o => s"$o.$name").getOrElse(name)))
       case other =>
-        if (other.name.contains(".")) {
+        if (other.name.contains(NESTED_FIELD_NEEDLE) ||
+            other.name.contains(NESTED_FIELD_REPLACEMENT)) {
           // first clean it, then prefix it, then again enclose it with backticks
           val cleanName = other.name.replaceAll(BACKTICK_MARKER_REGEX, "$1")
           val prefixed = prefix.map(o => s"$o.$cleanName").getOrElse(cleanName)
diff --git a/src/test/scala/com/microsoft/hyperspace/index/CreateIndexNestedTest.scala b/src/test/scala/com/microsoft/hyperspace/index/CreateIndexNestedTest.scala
@@ -47,11 +47,11 @@ class CreateIndexNestedTest extends HyperspaceSuite with SQLHelper {
     FileUtils.delete(new Path(testDir), isRecursive = true)
 
     val dataColumns = Seq("Date", "RGUID", "Query", "imprs", "clicks", "nested")
-    // save test data non-partitioned.
+    // Save test data non-partitioned.
     SampleNestedData.save(spark, nonPartitionedDataPath, dataColumns)
     nonPartitionedDataDF = spark.read.parquet(nonPartitionedDataPath)
 
-    // save test data partitioned.
+    // Save test data partitioned.
     SampleNestedData.save(spark, partitionedDataPath, dataColumns, Some(partitionKeys))
     partitionedDataDF = spark.read.parquet(partitionedDataPath)
   }
@@ -65,7 +65,7 @@ class CreateIndexNestedTest extends HyperspaceSuite with SQLHelper {
     FileUtils.delete(systemPath)
   }
 
-  test("Index creation with nested indexed and included columns") {
+  test("Index creation with nested indexed and included columns.") {
     hyperspace.createIndex(nonPartitionedDataDF, indexConfig1)
     assert(hyperspace.indexes.where(s"name = 'index1' ").count == 1)
     assert(
diff --git a/src/test/scala/com/microsoft/hyperspace/util/SchemaUtilsTests.scala b/src/test/scala/com/microsoft/hyperspace/util/SchemaUtilsTests.scala
@@ -123,6 +123,27 @@ class SchemaUtilsTest extends SparkFunSuite with SparkInvolvedSuite {
     assert(flattened3(4) == "nested.f2")
   }
 
+  test("flatten - double underscores") {
+    import spark.implicits._
+
+    val df = Seq(
+      (1, "name1", "other1", NestedType4("nf1", NestedType("n1", 1L))),
+      (2, "name2", "other2", NestedType4("nf2", NestedType("n2", 2L))),
+      (3, "name3", "other3", NestedType4("nf3", NestedType("n3", 3L))),
+      (4, "name4", "other4", NestedType4("nf4", NestedType("n4", 4L)))
+    ).toDF("id", "name__special", "other.dot", "nested")
+
+    val flattened = SchemaUtils.flatten(df.schema)
+
+    assert(flattened.length == 6)
+    assert(flattened(0) == "id")
+    assert(flattened(1) == "`name__special`")
+    assert(flattened(2) == "`other.dot`")
+    assert(flattened(3) == "nested.nf1_b")
+    assert(flattened(4) == "nested.n.f1")
+    assert(flattened(5) == "nested.n.f2")
+  }
+
   test("flatten - array") {
     import spark.implicits._