Skip to content

Commit 872c290

Browse files
authored
HADOOP-17122: Preserving Directory Attributes in DistCp with Atomic Copy (apache#2133)
Contributed by Swaminathan Balachandran
1 parent f734455 commit 872c290

File tree

2 files changed

+59
-16
lines changed

2 files changed

+59
-16
lines changed

hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,8 +318,10 @@ private void preserveFileAttributesForDirectories(Configuration conf)
318318
SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
319319
SequenceFile.Reader.file(sourceListing));
320320
long totalLen = clusterFS.getFileStatus(sourceListing).getLen();
321-
322-
Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
321+
// For Atomic Copy the Final & Work Path are different & atomic copy has
322+
// already moved it to final path.
323+
Path targetRoot =
324+
new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
323325

324326
long preservedEntries = 0;
325327
try {

hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java

Lines changed: 55 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@
5353
import java.util.*;
5454

5555
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
56+
import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH;
57+
import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_TARGET_WORK_PATH;
5658
import static org.apache.hadoop.tools.util.TestDistCpUtils.*;
5759

5860
public class TestCopyCommitter {
@@ -160,10 +162,10 @@ public void testPreserveStatus() throws IOException {
160162
context.setTargetPathExists(false);
161163

162164
CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
163-
Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
165+
Path listingFile = new Path("/tmp1/" + rand.nextLong());
164166
listing.buildListing(listingFile, context);
165167

166-
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
168+
conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase);
167169

168170
committer.commitJob(jobContext);
169171
checkDirectoryPermissions(fs, targetBase, sourcePerm);
@@ -179,6 +181,45 @@ public void testPreserveStatus() throws IOException {
179181

180182
}
181183

184+
@Test
185+
public void testPreserveStatusWithAtomicCommit() throws IOException {
186+
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
187+
JobContext jobContext = new JobContextImpl(
188+
taskAttemptContext.getConfiguration(),
189+
taskAttemptContext.getTaskAttemptID().getJobID());
190+
Configuration conf = jobContext.getConfiguration();
191+
String sourceBase;
192+
String workBase;
193+
String targetBase;
194+
FileSystem fs = null;
195+
try {
196+
OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
197+
fs = FileSystem.get(conf);
198+
FsPermission sourcePerm = new FsPermission((short) 511);
199+
FsPermission initialPerm = new FsPermission((short) 448);
200+
sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm);
201+
workBase = TestDistCpUtils.createTestSetup(fs, initialPerm);
202+
targetBase = "/tmp1/" + rand.nextLong();
203+
final DistCpOptions options = new DistCpOptions.Builder(
204+
Collections.singletonList(new Path(sourceBase)), new Path("/out"))
205+
.preserve(FileAttribute.PERMISSION).build();
206+
options.appendToConf(conf);
207+
final DistCpContext context = new DistCpContext(options);
208+
context.setTargetPathExists(false);
209+
CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
210+
Path listingFile = new Path("/tmp1/" + rand.nextLong());
211+
listing.buildListing(listingFile, context);
212+
conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase);
213+
conf.set(CONF_LABEL_TARGET_WORK_PATH, workBase);
214+
conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);
215+
committer.commitJob(jobContext);
216+
checkDirectoryPermissions(fs, targetBase, sourcePerm);
217+
} finally {
218+
TestDistCpUtils.delete(fs, "/tmp1");
219+
conf.unset(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
220+
}
221+
}
222+
182223
@Test
183224
public void testDeleteMissing() throws IOException {
184225
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
@@ -207,8 +248,8 @@ public void testDeleteMissing() throws IOException {
207248
Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
208249
listing.buildListing(listingFile, context);
209250

210-
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
211-
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
251+
conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase);
252+
conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase);
212253

213254
committer.commitJob(jobContext);
214255
verifyFoldersAreInSync(fs, targetBase, sourceBase);
@@ -256,8 +297,8 @@ public void testPreserveTimeWithDeleteMiss() throws IOException {
256297
Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
257298
listing.buildListing(listingFile, context);
258299

259-
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
260-
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
300+
conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase);
301+
conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase);
261302

262303
Path sourceListing = new Path(
263304
conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
@@ -320,8 +361,8 @@ public void testDeleteMissingFlatInterleavedFiles() throws IOException {
320361
Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
321362
listing.buildListing(listingFile, context);
322363

323-
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
324-
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
364+
conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase);
365+
conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase);
325366

326367
committer.commitJob(jobContext);
327368
verifyFoldersAreInSync(fs, targetBase, sourceBase);
@@ -353,8 +394,8 @@ public void testAtomicCommitMissingFinal() throws IOException {
353394
fs = FileSystem.get(conf);
354395
fs.mkdirs(new Path(workPath));
355396

356-
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath);
357-
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath);
397+
conf.set(CONF_LABEL_TARGET_WORK_PATH, workPath);
398+
conf.set(CONF_LABEL_TARGET_FINAL_PATH, finalPath);
358399
conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);
359400

360401
assertPathExists(fs, "Work path", new Path(workPath));
@@ -391,8 +432,8 @@ public void testAtomicCommitExistingFinal() throws IOException {
391432
fs.mkdirs(new Path(workPath));
392433
fs.mkdirs(new Path(finalPath));
393434

394-
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath);
395-
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath);
435+
conf.set(CONF_LABEL_TARGET_WORK_PATH, workPath);
436+
conf.set(CONF_LABEL_TARGET_FINAL_PATH, finalPath);
396437
conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);
397438

398439
assertPathExists(fs, "Work path", new Path(workPath));
@@ -463,8 +504,8 @@ private void testCommitWithChecksumMismatch(boolean skipCrc)
463504
+ String.valueOf(rand.nextLong()));
464505
listing.buildListing(listingFile, context);
465506

466-
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
467-
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
507+
conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase);
508+
conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase);
468509

469510
OutputCommitter committer = new CopyCommitter(
470511
null, taskAttemptContext);

0 commit comments

Comments
 (0)