Skip to content

Commit 9101223

Browse files
author
cafreeman
committed
Merge branch 'branch-1.4' of github.com:apache/spark into branch-1.4
2 parents 5a80844 + 1380216 commit 9101223

File tree

8 files changed

+47
-4
lines changed

8 files changed

+47
-4
lines changed

core/src/main/scala/org/apache/spark/ui/ToolTips.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ private[spark] object ToolTips {
3535

3636
val OUTPUT = "Bytes and records written to Hadoop."
3737

38+
val STORAGE_MEMORY =
39+
"Memory used / total available memory for storage of data " +
40+
"like RDD partitions cached in memory. "
41+
3842
val SHUFFLE_WRITE =
3943
"Bytes and records written to disk in order to be read by a shuffle in a future stage."
4044

core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ private[ui] class ExecutorsPage(
6767
<th>Executor ID</th>
6868
<th>Address</th>
6969
<th>RDD Blocks</th>
70-
<th>Memory Used</th>
70+
<th><span data-toggle="tooltip" title={ToolTips.STORAGE_MEMORY}>Storage Memory</span></th>
7171
<th>Disk Used</th>
7272
<th>Active Tasks</th>
7373
<th>Failed Tasks</th>

sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,12 @@ private[sql] class ParquetRelation2(
195195
committerClass,
196196
classOf[ParquetOutputCommitter])
197197

198+
// We're not really using `ParquetOutputFormat[Row]` for writing data here, because we override
199+
// it in `ParquetOutputWriter` to support appending and dynamic partitioning. The reason why
200+
// we set it here is to setup the output committer class to `ParquetOutputCommitter`, which is
201+
// bundled with `ParquetOutputFormat[Row]`.
202+
job.setOutputFormatClass(classOf[ParquetOutputFormat[Row]])
203+
198204
// TODO There's no need to use two kinds of WriteSupport
199205
// We should unify them. `SpecificMutableRow` can process both atomic (primitive) types and
200206
// complex types.

sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import org.apache.hadoop.hive.ql.io.orc.{OrcInputFormat, OrcOutputFormat, OrcSer
2727
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
2828
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils
2929
import org.apache.hadoop.io.{NullWritable, Writable}
30-
import org.apache.hadoop.mapred.{InputFormat => MapRedInputFormat, JobConf, RecordWriter, Reporter}
30+
import org.apache.hadoop.mapred.{InputFormat => MapRedInputFormat, JobConf, OutputFormat => MapRedOutputFormat, RecordWriter, Reporter}
3131
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
3232
import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
3333

@@ -193,6 +193,16 @@ private[sql] class OrcRelation(
193193
}
194194

195195
override def prepareJobForWrite(job: Job): OutputWriterFactory = {
196+
job.getConfiguration match {
197+
case conf: JobConf =>
198+
conf.setOutputFormat(classOf[OrcOutputFormat])
199+
case conf =>
200+
conf.setClass(
201+
"mapred.output.format.class",
202+
classOf[OrcOutputFormat],
203+
classOf[MapRedOutputFormat[_, _]])
204+
}
205+
196206
new OutputWriterFactory {
197207
override def newInstance(
198208
path: String,

sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ class HiveSparkSubmitSuite
8787
new File(sparkHome),
8888
Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome))
8989
try {
90-
val exitCode = failAfter(120 seconds) { process.waitFor() }
90+
val exitCode = failAfter(180 seconds) { process.waitFor() }
9191
if (exitCode != 0) {
9292
fail(s"Process returned with exit code $exitCode. See the log4j logs for more detail.")
9393
}

sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ class SimpleTextRelation(
119119
}
120120

121121
override def prepareJobForWrite(job: Job): OutputWriterFactory = new OutputWriterFactory {
122+
job.setOutputFormatClass(classOf[TextOutputFormat[_, _]])
123+
122124
override def newInstance(
123125
path: String,
124126
dataSchema: StructType,

sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,4 +719,25 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest {
719719
}
720720
}
721721
}
722+
723+
test("SPARK-8604: Parquet data source should write summary file while doing appending") {
724+
withTempPath { dir =>
725+
val path = dir.getCanonicalPath
726+
val df = sqlContext.range(0, 5)
727+
df.write.mode(SaveMode.Overwrite).parquet(path)
728+
729+
val summaryPath = new Path(path, "_metadata")
730+
val commonSummaryPath = new Path(path, "_common_metadata")
731+
732+
val fs = summaryPath.getFileSystem(configuration)
733+
fs.delete(summaryPath, true)
734+
fs.delete(commonSummaryPath, true)
735+
736+
df.write.mode(SaveMode.Append).parquet(path)
737+
checkAnswer(sqlContext.read.parquet(path), df.unionAll(df))
738+
739+
assert(fs.exists(summaryPath))
740+
assert(fs.exists(commonSummaryPath))
741+
}
742+
}
722743
}

unsafe/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
<groupId>net.alchim31.maven</groupId>
8181
<artifactId>scala-maven-plugin</artifactId>
8282
<configuration>
83-
<javacArgs>
83+
<javacArgs combine.children="append">
8484
<!-- This option is needed to suppress warnings from sun.misc.Unsafe usage -->
8585
<javacArg>-XDignore.symbol.file</javacArg>
8686
</javacArgs>

0 commit comments

Comments
 (0)