Skip to content

Commit 387c8a8

Browse files
committed
#777 When spark.sql.sources.writeJobUUID is absent, using the TaskAttempt’s JobID aids traceability and consistency across tasks/attempts.
1 parent a089967 commit 387c8a8

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/writer/RawBinaryOutputFormat.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
package za.co.absa.cobrix.spark.cobol.writer
1818

1919
import org.apache.hadoop.fs.Path
20-
import org.apache.hadoop.mapreduce._
2120
import org.apache.hadoop.io.{BytesWritable, NullWritable}
21+
import org.apache.hadoop.mapreduce._
2222
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
2323
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.getOutputPath
2424

@@ -53,8 +53,10 @@ class RawBinaryOutputFormat extends FileOutputFormat[NullWritable, BytesWritable
5353

5454
override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = {
5555
val conf = context.getConfiguration
56-
val writeJobId = Option(conf.get("spark.sql.sources.writeJobUUID")).getOrElse(uniqueUuid)
5756
val attempt = context.getTaskAttemptID
57+
val writeJobId = Option(conf.get("spark.sql.sources.writeJobUUID"))
58+
.orElse(Option(attempt.getJobID).map(_.toString))
59+
.getOrElse(uniqueUuid)
5860
val taskId = f"${attempt.getTaskID.getId}%05d"
5961
val attemptId = f"c${attempt.getId}%03d"
6062
val filename = s"part-$taskId-$writeJobId-$attemptId$extension"

0 commit comments

Comments
 (0)