Skip to content
This repository was archived by the owner on Jun 14, 2024. It is now read-only.

Commit 8f02f54

Browse files
Integrate feedback (2)
1 parent 04eeb91 commit 8f02f54

File tree

3 files changed

+28
-5
lines changed

3 files changed

+28
-5
lines changed

src/main/scala/com/microsoft/hyperspace/util/SchemaUtils.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ import org.apache.spark.sql.types.{ArrayType, MapType, StructField, StructType}
2121
object SchemaUtils {
2222

2323
val BACKTICK_MARKER_REGEX = "^`(.*)`$"
24-
val NESTED_FIELD_NEEDLE_REGEX = "\\."
24+
val NESTED_FIELD_NEEDLE = "."
25+
val NESTED_FIELD_NEEDLE_REGEX = s"\\$NESTED_FIELD_NEEDLE"
2526
val NESTED_FIELD_REPLACEMENT = "__"
2627

2728
/**
@@ -70,7 +71,8 @@ object SchemaUtils {
7071
flatten(keys, Some(prefix.map(o => s"$o.$name").getOrElse(name))) ++
7172
flatten(values, Some(prefix.map(o => s"$o.$name").getOrElse(name)))
7273
case other =>
73-
if (other.name.contains(".")) {
74+
if (other.name.contains(NESTED_FIELD_NEEDLE) ||
75+
other.name.contains(NESTED_FIELD_REPLACEMENT)) {
7476
// first clean it, then prefix it, then again enclose it with backticks
7577
val cleanName = other.name.replaceAll(BACKTICK_MARKER_REGEX, "$1")
7678
val prefixed = prefix.map(o => s"$o.$cleanName").getOrElse(cleanName)

src/test/scala/com/microsoft/hyperspace/index/CreateIndexNestedTest.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@ class CreateIndexNestedTest extends HyperspaceSuite with SQLHelper {
4747
FileUtils.delete(new Path(testDir), isRecursive = true)
4848

4949
val dataColumns = Seq("Date", "RGUID", "Query", "imprs", "clicks", "nested")
50-
// save test data non-partitioned.
50+
// Save test data non-partitioned.
5151
SampleNestedData.save(spark, nonPartitionedDataPath, dataColumns)
5252
nonPartitionedDataDF = spark.read.parquet(nonPartitionedDataPath)
5353

54-
// save test data partitioned.
54+
// Save test data partitioned.
5555
SampleNestedData.save(spark, partitionedDataPath, dataColumns, Some(partitionKeys))
5656
partitionedDataDF = spark.read.parquet(partitionedDataPath)
5757
}
@@ -65,7 +65,7 @@ class CreateIndexNestedTest extends HyperspaceSuite with SQLHelper {
6565
FileUtils.delete(systemPath)
6666
}
6767

68-
test("Index creation with nested indexed and included columns") {
68+
test("Index creation with nested indexed and included columns.") {
6969
hyperspace.createIndex(nonPartitionedDataDF, indexConfig1)
7070
assert(hyperspace.indexes.where(s"name = 'index1' ").count == 1)
7171
assert(

src/test/scala/com/microsoft/hyperspace/util/SchemaUtilsTests.scala

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,27 @@ class SchemaUtilsTest extends SparkFunSuite with SparkInvolvedSuite {
123123
assert(flattened3(4) == "nested.f2")
124124
}
125125

126+
test("flatten - double underscores") {
127+
import spark.implicits._
128+
129+
val df = Seq(
130+
(1, "name1", "other1", NestedType4("nf1", NestedType("n1", 1L))),
131+
(2, "name2", "other2", NestedType4("nf2", NestedType("n2", 2L))),
132+
(3, "name3", "other3", NestedType4("nf3", NestedType("n3", 3L))),
133+
(4, "name4", "other4", NestedType4("nf4", NestedType("n4", 4L)))
134+
).toDF("id", "name__special", "other.dot", "nested")
135+
136+
val flattened = SchemaUtils.flatten(df.schema)
137+
138+
assert(flattened.length == 6)
139+
assert(flattened(0) == "id")
140+
assert(flattened(1) == "`name__special`")
141+
assert(flattened(2) == "`other.dot`")
142+
assert(flattened(3) == "nested.nf1_b")
143+
assert(flattened(4) == "nested.n.f1")
144+
assert(flattened(5) == "nested.n.f2")
145+
}
146+
126147
test("flatten - array") {
127148
import spark.implicits._
128149

0 commit comments

Comments
 (0)