Skip to content

Commit ab659ba

Browse files
LuciferYangdongjoon-hyun
authored andcommitted
[SPARK-41708][SQL][FOLLOWUP] Override toString method of FileIndex
### What changes were proposed in this pull request? The main change of this pr is to fix suggestions of #39598 (comment): - revert the change of #39598 - override `toString` method of `FileIndex` to print className with `rootPaths` - change the replacement rule of `SQLQueryTestHelper#replaceNotIncludedMsg` method for `file://xxx/clsName/` path to replace one by one instead of replacing the longest match ### Why are the changes needed? Fix suggestions of #39598 (comment) ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass GitHub Actions - Manually test `build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" -Pscala-2.13` , run successful Closes #39610 from LuciferYang/SPARK-41708-FOLLOWUP-n. Lead-authored-by: yangjie01 <yangjie01@baidu.com> Co-authored-by: YangJie <yangjie01@baidu.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
1 parent 5c7518c commit ab659ba

File tree

4 files changed

+9
-8
lines changed

4 files changed

+9
-8
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileIndex.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,4 +82,6 @@ trait FileIndex {
8282
* to update the metrics.
8383
*/
8484
def metadataOpsTimeNs: Option[Long] = None
85+
86+
override def toString: String = s"${getClass.getName}(${rootPaths.mkString(",")})"
8587
}

sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,20 +1086,20 @@ struct<plan:string>
10861086
+- 'UnresolvedRelation [explain_temp4], [], false
10871087

10881088
== Analyzed Logical Plan ==
1089-
InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex, [key, val]
1089+
InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/explain_temp5, false, [val#x], Parquet, [path=file:[not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex(file:[not included in comparison]/{warehouse_dir}/explain_temp5), [key, val]
10901090
+- Project [key#x, val#x]
10911091
+- SubqueryAlias spark_catalog.default.explain_temp4
10921092
+- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet
10931093

10941094
== Optimized Logical Plan ==
1095-
InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex, [key, val]
1095+
InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/explain_temp5, false, [val#x], Parquet, [path=file:[not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex(file:[not included in comparison]/{warehouse_dir}/explain_temp5), [key, val]
10961096
+- WriteFiles
10971097
+- Sort [val#x ASC NULLS FIRST], false
10981098
+- Project [key#x, empty2null(val#x) AS val#x]
10991099
+- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet
11001100

11011101
== Physical Plan ==
1102-
Execute InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex, [key, val]
1102+
Execute InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/explain_temp5, false, [val#x], Parquet, [path=file:[not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex(file:[not included in comparison]/{warehouse_dir}/explain_temp5), [key, val]
11031103
+- WriteFiles
11041104
+- *Sort [val#x ASC NULLS FIRST], false, 0
11051105
+- *Project [key#x, empty2null(val#x) AS val#x]

sql/core/src/test/resources/sql-tests/results/explain.sql.out

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,20 +1028,20 @@ struct<plan:string>
10281028
+- 'UnresolvedRelation [explain_temp4], [], false
10291029

10301030
== Analyzed Logical Plan ==
1031-
InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex, [key, val]
1031+
InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/explain_temp5, false, [val#x], Parquet, [path=file:[not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex(file:[not included in comparison]/{warehouse_dir}/explain_temp5), [key, val]
10321032
+- Project [key#x, val#x]
10331033
+- SubqueryAlias spark_catalog.default.explain_temp4
10341034
+- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet
10351035

10361036
== Optimized Logical Plan ==
1037-
InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex, [key, val]
1037+
InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/explain_temp5, false, [val#x], Parquet, [path=file:[not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex(file:[not included in comparison]/{warehouse_dir}/explain_temp5), [key, val]
10381038
+- WriteFiles
10391039
+- Sort [val#x ASC NULLS FIRST], false
10401040
+- Project [key#x, empty2null(val#x) AS val#x]
10411041
+- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet
10421042

10431043
== Physical Plan ==
1044-
Execute InsertIntoHadoopFsRelationCommand Location [not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex, [key, val]
1044+
Execute InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/explain_temp5, false, [val#x], Parquet, [path=file:[not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex(file:[not included in comparison]/{warehouse_dir}/explain_temp5), [key, val]
10451045
+- WriteFiles
10461046
+- *Sort [val#x ASC NULLS FIRST], false, 0
10471047
+- *Project [key#x, empty2null(val#x) AS val#x]

sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,12 @@ trait SQLQueryTestHelper {
4141
.replaceAll(
4242
s"Location.*$clsName/",
4343
s"Location $notIncludedMsg/{warehouse_dir}/")
44-
.replaceAll(s"file:.*$clsName", s"Location $notIncludedMsg/{warehouse_dir}")
44+
.replaceAll(s"file:.*?$clsName", s"file:$notIncludedMsg/{warehouse_dir}")
4545
.replaceAll("Created By.*", s"Created By $notIncludedMsg")
4646
.replaceAll("Created Time.*", s"Created Time $notIncludedMsg")
4747
.replaceAll("Last Access.*", s"Last Access $notIncludedMsg")
4848
.replaceAll("Partition Statistics\t\\d+", s"Partition Statistics\t$notIncludedMsg")
4949
.replaceAll("\\*\\(\\d+\\) ", "*") // remove the WholeStageCodegen codegenStageIds
50-
.replaceAll("@[0-9a-z]+,", ",") // remove hashCode
5150
}
5251

5352

0 commit comments

Comments
 (0)