Skip to content

Commit c04b5df

Browse files
author
Andrew Or
committed
Review comments
1 parent d2e4e23 commit c04b5df

File tree

2 files changed

+19
-36
lines changed

2 files changed

+19
-36
lines changed

core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -149,11 +149,6 @@ class TaskMetrics extends Serializable {
149149
*/
150150
def outputMetrics: Option[OutputMetrics] = _outputMetrics
151151

152-
@deprecated("setting OutputMetrics is for internal use only", "2.0.0")
153-
def outputMetrics_=(om: Option[OutputMetrics]): Unit = {
154-
_outputMetrics = om
155-
}
156-
157152
/**
158153
* Get or create a new [[OutputMetrics]] associated with this task.
159154
*/
@@ -230,11 +225,6 @@ class TaskMetrics extends Serializable {
230225
*/
231226
def shuffleWriteMetrics: Option[ShuffleWriteMetrics] = _shuffleWriteMetrics
232227

233-
@deprecated("setting ShuffleWriteMetrics is for internal use only", "2.0.0")
234-
def shuffleWriteMetrics_=(swm: Option[ShuffleWriteMetrics]): Unit = {
235-
_shuffleWriteMetrics = swm
236-
}
237-
238228
/**
239229
* Get or create a new [[ShuffleWriteMetrics]] associated with this task.
240230
*/
@@ -267,11 +257,6 @@ class TaskMetrics extends Serializable {
267257
if (_updatedBlockStatuses.nonEmpty) Some(_updatedBlockStatuses) else None
268258
}
269259

270-
@deprecated("setting updated blocks is for internal use only", "2.0.0")
271-
def updatedBlocks_=(ub: Option[Seq[(BlockId, BlockStatus)]]): Unit = {
272-
_updatedBlockStatuses = ub.getOrElse(Seq.empty[(BlockId, BlockStatus)])
273-
}
274-
275260
/**
276261
* Returns the input metrics object that the task should use. Currently, if
277262
* there exists an input metric with the same readMethod, we return that one

core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,7 +1092,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
10921092
val committer = format.getOutputCommitter(hadoopContext)
10931093
committer.setupTask(hadoopContext)
10941094

1095-
val (outputMetrics, bytesWrittenCallback) = initHadoopOutputMetrics(context)
1095+
val outputMetricsAndBytesWrittenCallback: Option[(OutputMetrics, () => Long)] =
1096+
initHadoopOutputMetrics(context)
10961097

10971098
val writer = format.getRecordWriter(hadoopContext).asInstanceOf[NewRecordWriter[K, V]]
10981099
require(writer != null, "Unable to obtain RecordWriter")
@@ -1103,15 +1104,15 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
11031104
writer.write(pair._1, pair._2)
11041105

11051106
// Update bytes written metric every few records
1106-
maybeUpdateOutputMetrics(bytesWrittenCallback, outputMetrics, recordsWritten)
1107+
maybeUpdateOutputMetrics(outputMetricsAndBytesWrittenCallback, recordsWritten)
11071108
recordsWritten += 1
11081109
}
11091110
} {
11101111
writer.close(hadoopContext)
11111112
}
11121113
committer.commitTask(hadoopContext)
1113-
outputMetrics.foreach { om =>
1114-
bytesWrittenCallback.foreach { fn => om.setBytesWritten(fn()) }
1114+
outputMetricsAndBytesWrittenCallback.foreach { case (om, callback) =>
1115+
om.setBytesWritten(callback())
11151116
om.setRecordsWritten(recordsWritten)
11161117
}
11171118
1
@@ -1179,7 +1180,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
11791180
// around by taking a mod. We expect that no task will be attempted 2 billion times.
11801181
val taskAttemptId = (context.taskAttemptId % Int.MaxValue).toInt
11811182

1182-
val (outputMetrics, bytesWrittenCallback) = initHadoopOutputMetrics(context)
1183+
val outputMetricsAndBytesWrittenCallback: Option[(OutputMetrics, () => Long)] =
1184+
initHadoopOutputMetrics(context)
11831185

11841186
writer.setup(context.stageId, context.partitionId, taskAttemptId)
11851187
writer.open()
@@ -1191,15 +1193,15 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
11911193
writer.write(record._1.asInstanceOf[AnyRef], record._2.asInstanceOf[AnyRef])
11921194

11931195
// Update bytes written metric every few records
1194-
maybeUpdateOutputMetrics(bytesWrittenCallback, outputMetrics, recordsWritten)
1196+
maybeUpdateOutputMetrics(outputMetricsAndBytesWrittenCallback, recordsWritten)
11951197
recordsWritten += 1
11961198
}
11971199
} {
11981200
writer.close()
11991201
}
12001202
writer.commit()
1201-
outputMetrics.foreach { om =>
1202-
bytesWrittenCallback.foreach { fn => om.setBytesWritten(fn()) }
1203+
outputMetricsAndBytesWrittenCallback.foreach { case (om, callback) =>
1204+
om.setBytesWritten(callback())
12031205
om.setRecordsWritten(recordsWritten)
12041206
}
12051207
}
@@ -1211,25 +1213,21 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
12111213
// TODO: these don't seem like the right abstractions.
12121214
// We should abstract the duplicate code in a less awkward way.
12131215

1216+
// return type: (output metrics, bytes written callback), defined only if the latter is defined
12141217
private def initHadoopOutputMetrics(
1215-
context: TaskContext): (Option[OutputMetrics], Option[() => Long]) = {
1218+
context: TaskContext): Option[(OutputMetrics, () => Long)] = {
12161219
val bytesWrittenCallback = SparkHadoopUtil.get.getFSBytesWrittenOnThreadCallback()
1217-
val outputMetrics =
1218-
if (bytesWrittenCallback.isDefined) {
1219-
Some(context.taskMetrics().registerOutputMetrics(DataWriteMethod.Hadoop))
1220-
} else {
1221-
None
1222-
}
1223-
(outputMetrics, bytesWrittenCallback)
1220+
bytesWrittenCallback.map { b =>
1221+
(context.taskMetrics().registerOutputMetrics(DataWriteMethod.Hadoop), b)
1222+
}
12241223
}
12251224

12261225
private def maybeUpdateOutputMetrics(
1227-
bytesWrittenCallback: Option[() => Long],
1228-
outputMetrics: Option[OutputMetrics],
1226+
outputMetricsAndBytesWrittenCallback: Option[(OutputMetrics, () => Long)],
12291227
recordsWritten: Long): Unit = {
1230-
outputMetrics.foreach { om =>
1231-
if (recordsWritten % PairRDDFunctions.RECORDS_BETWEEN_BYTES_WRITTEN_METRIC_UPDATES == 0) {
1232-
bytesWrittenCallback.foreach { fn => om.setBytesWritten(fn()) }
1228+
if (recordsWritten % PairRDDFunctions.RECORDS_BETWEEN_BYTES_WRITTEN_METRIC_UPDATES == 0) {
1229+
outputMetricsAndBytesWrittenCallback.foreach { case (om, callback) =>
1230+
om.setBytesWritten(callback())
12331231
om.setRecordsWritten(recordsWritten)
12341232
}
12351233
}

0 commit comments

Comments
 (0)