@@ -268,54 +268,73 @@ private[spark] object Utils extends Logging {
268
268
val tempFile = File .createTempFile(" fetchFileTemp" , null , new File (tempDir))
269
269
val targetFile = new File (targetDir, filename)
270
270
val uri = new URI (url)
271
+ val fileOverwrite = System .getProperty(" spark.files.overwrite" , " false" ).toBoolean
271
272
uri.getScheme match {
272
273
case " http" | " https" | " ftp" =>
273
274
logInfo(" Fetching " + url + " to " + tempFile)
274
275
val in = new URL (url).openStream()
275
276
val out = new FileOutputStream (tempFile)
276
277
Utils .copyStream(in, out, true )
277
278
if (targetFile.exists && ! Files .equal(tempFile, targetFile)) {
278
- tempFile.delete()
279
- throw new SparkException (
280
- " File " + targetFile + " exists and does not match contents of" + " " + url)
281
- } else {
282
- Files .move(tempFile, targetFile)
279
+ if (fileOverwrite) {
280
+ targetFile.delete()
281
+ logInfo((" File %s exists and does not match contents of %s, " +
282
+ " replacing it with %s" ).format(targetFile, url, url))
283
+ } else {
284
+ tempFile.delete()
285
+ throw new SparkException (
286
+ " File " + targetFile + " exists and does not match contents of" + " " + url)
287
+ }
283
288
}
289
+ Files .move(tempFile, targetFile)
284
290
case " file" | null =>
285
291
// In the case of a local file, copy the local file to the target directory.
286
292
// Note the difference between uri vs url.
287
293
val sourceFile = if (uri.isAbsolute) new File (uri) else new File (url)
294
+ var shouldCopy = true
288
295
if (targetFile.exists) {
289
- // If the target file already exists, warn the user if
290
296
if (! Files .equal(sourceFile, targetFile)) {
291
- throw new SparkException (
292
- " File " + targetFile + " exists and does not match contents of" + " " + url)
297
+ if (fileOverwrite) {
298
+ targetFile.delete()
299
+ logInfo((" File %s exists and does not match contents of %s, " +
300
+ " replacing it with %s" ).format(targetFile, url, url))
301
+ } else {
302
+ throw new SparkException (
303
+ " File " + targetFile + " exists and does not match contents of" + " " + url)
304
+ }
293
305
} else {
294
306
// Do nothing if the file contents are the same, i.e. this file has been copied
295
307
// previously.
296
308
logInfo(sourceFile.getAbsolutePath + " has been previously copied to "
297
309
+ targetFile.getAbsolutePath)
310
+ shouldCopy = false
298
311
}
299
- } else {
312
+ }
313
+
314
+ if (shouldCopy) {
300
315
// The file does not exist in the target directory. Copy it there.
301
316
logInfo(" Copying " + sourceFile.getAbsolutePath + " to " + targetFile.getAbsolutePath)
302
317
Files .copy(sourceFile, targetFile)
303
318
}
304
319
case _ =>
305
320
// Use the Hadoop filesystem library, which supports file://, hdfs://, s3://, and others
306
- val uri = new URI (url)
307
321
val conf = SparkHadoopUtil .get.newConfiguration()
308
322
val fs = FileSystem .get(uri, conf)
309
323
val in = fs.open(new Path (uri))
310
324
val out = new FileOutputStream (tempFile)
311
325
Utils .copyStream(in, out, true )
312
326
if (targetFile.exists && ! Files .equal(tempFile, targetFile)) {
313
- tempFile.delete()
314
- throw new SparkException (" File " + targetFile + " exists and does not match contents of" +
315
- " " + url)
316
- } else {
317
- Files .move(tempFile, targetFile)
327
+ if (fileOverwrite) {
328
+ targetFile.delete()
329
+ logInfo((" File %s exists and does not match contents of %s, " +
330
+ " replacing it with %s" ).format(targetFile, url, url))
331
+ } else {
332
+ tempFile.delete()
333
+ throw new SparkException (
334
+ " File " + targetFile + " exists and does not match contents of" + " " + url)
335
+ }
318
336
}
337
+ Files .move(tempFile, targetFile)
319
338
}
320
339
// Decompress the file if it's a .tar or .tar.gz
321
340
if (filename.endsWith(" .tar.gz" ) || filename.endsWith(" .tgz" )) {
0 commit comments