From 61b073aca8a5f9ec649cf5c6c9c39ff37e62889b Mon Sep 17 00:00:00 2001 From: Aday Bujeda Date: Wed, 18 May 2022 09:13:45 +0100 Subject: [PATCH] BagIt Support - Add automatic checksum validation on upload --- .../8608-bagit-support-validate-checksums.md | 10 + .../source/installation/config.rst | 59 ++++ .../iq/dataverse/EditDataFilesPageHelper.java | 40 +++ .../iq/dataverse/EditDatafilesPage.java | 26 +- .../harvard/iq/dataverse/SettingsWrapper.java | 13 + .../datadeposit/MediaResourceManagerImpl.java | 5 +- .../datasetutility/AddReplaceFileHelper.java | 4 +- .../harvard/iq/dataverse/util/FileUtil.java | 45 ++- .../dataverse/util/bagit/BagChecksumType.java | 42 +++ .../dataverse/util/bagit/BagValidation.java | 109 ++++++ .../iq/dataverse/util/bagit/BagValidator.java | 155 +++++++++ .../util/bagit/FileChecksumValidationJob.java | 63 ++++ .../dataverse/util/bagit/ManifestReader.java | 105 ++++++ .../util/bagit/data/DataFileDataProvider.java | 81 +++++ .../util/bagit/data/FileDataProvider.java | 23 ++ .../bagit/data/FileDataProviderFactory.java | 28 ++ .../util/bagit/data/FileUtilWrapper.java | 56 +++ .../util/bagit/data/FolderDataProvider.java | 67 ++++ .../util/bagit/data/ZipFileDataProvider.java | 67 ++++ .../dataverse/util/file/BagItFileHandler.java | 160 +++++++++ .../util/file/BagItFileHandlerFactory.java | 68 ++++ .../file/BagItFileHandlerPostProcessor.java | 39 +++ .../util/file/CreateDataFileResult.java | 57 +++ src/main/java/propertyFiles/Bundle.properties | 9 + .../EditDataFilesPageHelperTest.java | 65 ++++ .../util/bagit/BagChecksumTypeTest.java | 50 +++ .../util/bagit/BagValidationTest.java | 71 ++++ .../util/bagit/BagValidatorTest.java | 255 ++++++++++++++ .../bagit/FileChecksumValidationJobTest.java | 68 ++++ .../util/bagit/ManifestReaderTest.java | 90 +++++ .../bagit/data/DataFileDataProviderTest.java | 121 +++++++ .../data/FileDataProviderFactoryTest.java | 43 +++ .../bagit/data/FolderDataProviderTest.java | 92 +++++ .../util/bagit/data/StringDataProvider.java | 50 +++ .../bagit/data/ZipFileDataProviderTest.java | 119 +++++++ .../file/BagItFileHandlerFactoryTest.java | 50 +++ .../BagItFileHandlerPostProcessorTest.java | 58 ++++ .../util/file/BagItFileHandlerTest.java | 328 ++++++++++++++++++ .../util/file/CreateDataFileResultTest.java | 55 +++ .../bagit/data/DataFileDataProviderTest.txt | 0 .../data/FileDataProviderFactoryTest.zip | Bin 0 -> 244 bytes .../data/FolderDataProviderTest/file1.txt | 0 .../data/FolderDataProviderTest/file2.csv | 0 .../invalid_format/manifest-sha256.txt | 2 + .../bagit/manifest/valid/manifest-sha256.txt | 2 + 45 files changed, 2829 insertions(+), 21 deletions(-) create mode 100644 doc/release-notes/8608-bagit-support-validate-checksums.md create mode 100644 src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/bagit/BagChecksumType.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidation.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidator.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/bagit/FileChecksumValidationJob.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/bagit/ManifestReader.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/bagit/data/DataFileDataProvider.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProvider.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProviderFactory.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileUtilWrapper.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FolderDataProvider.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/bagit/data/ZipFileDataProvider.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerFactory.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessor.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/file/CreateDataFileResult.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/EditDataFilesPageHelperTest.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/bagit/BagChecksumTypeTest.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/bagit/BagValidationTest.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/bagit/BagValidatorTest.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/bagit/FileChecksumValidationJobTest.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/bagit/ManifestReaderTest.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/bagit/data/DataFileDataProviderTest.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProviderFactoryTest.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/bagit/data/FolderDataProviderTest.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/bagit/data/StringDataProvider.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/bagit/data/ZipFileDataProviderTest.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerFactoryTest.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessorTest.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerTest.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/file/CreateDataFileResultTest.java create mode 100644 src/test/resources/bagit/data/DataFileDataProviderTest.txt create mode 100644 src/test/resources/bagit/data/FileDataProviderFactoryTest.zip create mode 100644 src/test/resources/bagit/data/FolderDataProviderTest/file1.txt create mode 100644 src/test/resources/bagit/data/FolderDataProviderTest/file2.csv create mode 100644 src/test/resources/bagit/manifest/invalid_format/manifest-sha256.txt create mode 100644 src/test/resources/bagit/manifest/valid/manifest-sha256.txt diff --git a/doc/release-notes/8608-bagit-support-validate-checksums.md b/doc/release-notes/8608-bagit-support-validate-checksums.md new file mode 100644 index 00000000000..f071dbe4039 --- /dev/null +++ b/doc/release-notes/8608-bagit-support-validate-checksums.md @@ -0,0 +1,10 @@ +## BagIt Support - Automatic checksum validation on zip file upload +The BagIt file handler detects and transforms zip files with a BagIt package format into Dataverse DataFiles. The system validates the checksums of the files in the package payload as described in the first manifest file with a hash algorithm that we support. Take a look at `BagChecksumType class `_ for the list of the currently supported hash algorithms. + +The handler will not allow packages with checksum errors. The first 5 errors will be displayed to the user. This is configurable though database settings. + +The checksum validation uses a thread pool to improve performance. This thread pool can be adjusted to your Dataverse installation requirements. + +The BagIt file handler is disabled by default. Use the ``:BagItHandlerEnabled`` database settings to enable it: ``curl -X PUT -d 'true' http://localhost:8080/api/admin/settings/:BagItHandlerEnabled`` + +For more configuration settings see the user guide: https://guides.dataverse.org/en/latest/installation/config.html#bagit-file-handler \ No newline at end of file diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index cd40221d7fc..a85d8554e74 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1038,6 +1038,22 @@ Disabling Custom Dataset Terms See :ref:`:AllowCustomTermsOfUse` for how to disable the "Custom Dataset Terms" option. +.. _BagIt File Handler: + +BagIt File Handler +------------------ + +BagIt file handler detects and transforms zip files with a BagIt package format into Dataverse DataFiles. The system validates the checksums of the files in the package payload as described in the first manifest file with a hash algorithm that we support. Take a look at `BagChecksumType class `_ for the list of the currently supported hash algorithms. + +The checksum validation uses a thread pool to improve performance. This thread pool can be adjusted to your Dataverse installation requirements. + +BagIt file handler configuration settings: + +- :ref:`:BagItHandlerEnabled` +- :ref:`:BagValidatorJobPoolSize` +- :ref:`:BagValidatorMaxErrors` +- :ref:`:BagValidatorJobWaitInterval` + .. _BagIt Export: BagIt Export @@ -2536,6 +2552,49 @@ To enable redirects to the zipper on a different server: ``curl -X PUT -d 'https://zipper.example.edu/cgi-bin/zipdownload' http://localhost:8080/api/admin/settings/:CustomZipDownloadServiceUrl`` +:CreateDataFilesMaxErrorsToDisplay +++++++++++++++++++++++++++++++++++ + +Number of errors to display to the user when creating DataFiles from a file upload. It defaults to 5 errors. + +``curl -X PUT -d '1' http://localhost:8080/api/admin/settings/:CreateDataFilesMaxErrorsToDisplay`` + +.. _:BagItHandlerEnabled: + +:BagItHandlerEnabled ++++++++++++++++++++++ + +Part of the database settings to configure the BagIt file handler. Enables the BagIt file handler. By default, the handler is disabled. + +``curl -X PUT -d 'true' http://localhost:8080/api/admin/settings/:BagItHandlerEnabled`` + +.. _:BagValidatorJobPoolSize: + +:BagValidatorJobPoolSize +++++++++++++++++++++++++ + +Part of the database settings to configure the BagIt file handler. The number of threads the checksum validation class uses to validate a single zip file. Defaults to 4 threads + +``curl -X PUT -d '10' http://localhost:8080/api/admin/settings/:BagValidatorJobPoolSize`` + +.. _:BagValidatorMaxErrors: + +:BagValidatorMaxErrors +++++++++++++++++++++++ + +Part of the database settings to configure the BagIt file handler. The maximum number of errors allowed before the validation job aborts execution. This is to avoid processing the whole BagIt package. Defaults to 5 errors. + +``curl -X PUT -d '2' http://localhost:8080/api/admin/settings/:BagValidatorMaxErrors`` + +.. _:BagValidatorJobWaitInterval: + +:BagValidatorJobWaitInterval +++++++++++++++++++++++++++++ + +Part of the database settings to configure the BagIt file handler. This is the period in seconds to check for the number of errors during validation. Defaults to 10. + +``curl -X PUT -d '60' http://localhost:8080/api/admin/settings/:BagValidatorJobWaitInterval`` + :ArchiverClassName ++++++++++++++++++ diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java b/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java new file mode 100644 index 00000000000..c708c2e28e2 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/EditDataFilesPageHelper.java @@ -0,0 +1,40 @@ +package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; + +import javax.ejb.Stateless; +import javax.inject.Inject; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * + * @author adaybujeda + */ +@Stateless +public class EditDataFilesPageHelper { + + public static final String MAX_ERRORS_TO_DISPLAY_SETTING = ":CreateDataFilesMaxErrorsToDisplay"; + public static final Integer MAX_ERRORS_TO_DISPLAY = 5; + + @Inject + private SettingsWrapper settingsWrapper; + + public String getHtmlErrorMessage(CreateDataFileResult createDataFileResult) { + List errors = createDataFileResult.getErrors(); + if(errors == null || errors.isEmpty()) { + return null; + } + + Integer maxErrorsToShow = settingsWrapper.getInteger(EditDataFilesPageHelper.MAX_ERRORS_TO_DISPLAY_SETTING, EditDataFilesPageHelper.MAX_ERRORS_TO_DISPLAY); + if(maxErrorsToShow < 1) { + return null; + } + + String typeMessage = Optional.ofNullable(BundleUtil.getStringFromBundle(createDataFileResult.getBundleKey())).orElse("Error processing file"); + String errorsMessage = errors.stream().limit(maxErrorsToShow).map(text -> String.format("
  • %s
  • ", text)).collect(Collectors.joining()); + return String.format("%s:
      %s
    ", typeMessage, errorsMessage); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index c4d3f51c86a..a4cfd291303 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -60,6 +60,8 @@ import javax.faces.view.ViewScoped; import javax.inject.Inject; import javax.inject.Named; + +import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import org.primefaces.event.FileUploadEvent; import org.primefaces.model.file.UploadedFile; import javax.json.Json; @@ -143,6 +145,8 @@ public enum Referrer { LicenseServiceBean licenseServiceBean; @Inject DataFileCategoryServiceBean dataFileCategoryService; + @Inject + EditDataFilesPageHelper editDataFilesPageHelper; private Dataset dataset = new Dataset(); @@ -1485,7 +1489,9 @@ public void handleDropBoxUpload(ActionEvent event) { // for example, multiple files can be extracted from an uncompressed // zip file. //datafiles = ingestService.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream"); - datafiles = FileUtil.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream", null, null, systemConfig); + CreateDataFileResult createDataFilesResult = FileUtil.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream", null, null, systemConfig); + datafiles = createDataFilesResult.getDataFiles(); + errorMessage = editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult); } catch (IOException ex) { this.logger.log(Level.SEVERE, "Error during ingest of DropBox file {0} from link {1}", new Object[]{fileName, fileLink}); @@ -1739,6 +1745,10 @@ public void uploadFinished() { uploadedFiles.clear(); uploadInProgress.setValue(false); } + if(errorMessage != null) { + FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_ERROR, BundleUtil.getStringFromBundle("dataset.file.uploadFailure"), errorMessage)); + PrimeFaces.current().ajax().update(":messagePanel"); + } // refresh the warning message below the upload component, if exists: if (uploadComponentId != null) { if (uploadWarningMessage != null) { @@ -1787,6 +1797,7 @@ public void uploadFinished() { multipleDupesNew = false; uploadWarningMessage = null; uploadSuccessMessage = null; + errorMessage = null; } private String warningMessageForFileTypeDifferentPopUp; @@ -1937,6 +1948,7 @@ private void handleReplaceFileUpload(String fullStorageLocation, } private String uploadWarningMessage = null; + private String errorMessage = null; private String uploadSuccessMessage = null; private String uploadComponentId = null; @@ -2005,8 +2017,10 @@ public void handleFileUpload(FileUploadEvent event) throws IOException { try { // Note: A single uploaded file may produce multiple datafiles - // for example, multiple files can be extracted from an uncompressed - // zip file. - dFileList = FileUtil.createDataFiles(workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, null, systemConfig); + // zip file. + CreateDataFileResult createDataFilesResult = FileUtil.createDataFiles(workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, null, systemConfig); + dFileList = createDataFilesResult.getDataFiles(); + errorMessage = editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult); } catch (IOException ioex) { logger.warning("Failed to process and/or save the file " + uFile.getFileName() + "; " + ioex.getMessage()); @@ -2111,7 +2125,9 @@ public void handleExternalUpload() { // for example, multiple files can be extracted from an uncompressed // zip file. //datafiles = ingestService.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream"); - datafiles = FileUtil.createDataFiles(workingVersion, null, fileName, contentType, fullStorageIdentifier, checksumValue, checksumType, systemConfig); + CreateDataFileResult createDataFilesResult = FileUtil.createDataFiles(workingVersion, null, fileName, contentType, fullStorageIdentifier, checksumValue, checksumType, systemConfig); + datafiles = createDataFilesResult.getDataFiles(); + errorMessage = editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult); } catch (IOException ex) { logger.log(Level.SEVERE, "Error during ingest of file {0}", new Object[]{fileName}); } @@ -3066,5 +3082,5 @@ public boolean isFileAccessRequest() { public void setFileAccessRequest(boolean fileAccessRequest) { this.fileAccessRequest = fileAccessRequest; - } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 7c4569e1dc5..9492e3e2dd4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -177,6 +177,19 @@ public boolean isTrueForKey(String settingKey, boolean safeDefaultIfKeyNotFound) return ( val==null ) ? safeDefaultIfKeyNotFound : StringUtil.isTrue(val); } + public Integer getInteger(String settingKey, Integer defaultValue) { + String settingValue = get(settingKey); + if(settingValue != null) { + try { + return Integer.valueOf(settingValue); + } catch (Exception e) { + logger.warning(String.format("action=getInteger result=invalid-integer settingKey=%s settingValue=%s", settingKey, settingValue)); + } + } + + return defaultValue; + } + private void initSettingsMap() { // initialize settings map settingsMap = new HashMap<>(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java index e50b731ca02..928ffd4a129 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java @@ -35,6 +35,8 @@ import javax.servlet.http.HttpServletRequest; import javax.validation.ConstraintViolation; import javax.validation.ConstraintViolationException; + +import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import org.swordapp.server.AuthCredentials; import org.swordapp.server.Deposit; import org.swordapp.server.DepositReceipt; @@ -301,7 +303,8 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au List dataFiles = new ArrayList<>(); try { try { - dataFiles = FileUtil.createDataFiles(editVersion, deposit.getInputStream(), uploadedZipFilename, guessContentTypeForMe, null, null, systemConfig); + CreateDataFileResult createDataFilesResponse = FileUtil.createDataFiles(editVersion, deposit.getInputStream(), uploadedZipFilename, guessContentTypeForMe, null, null, systemConfig); + dataFiles = createDataFilesResponse.getDataFiles(); } catch (EJBException ex) { Throwable cause = ex.getCause(); if (cause != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 5e5e49c2186..b270393e5e1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -32,6 +32,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import edu.harvard.iq.dataverse.util.json.JsonPrinter; import java.io.IOException; import java.io.InputStream; @@ -1206,7 +1207,7 @@ private boolean step_030_createNewFilesViaIngest(){ workingVersion = dataset.getEditVersion(); clone = workingVersion.cloneDatasetVersion(); try { - initialFileList = FileUtil.createDataFiles(workingVersion, + CreateDataFileResult result = FileUtil.createDataFiles(workingVersion, this.newFileInputStream, this.newFileName, this.newFileContentType, @@ -1214,6 +1215,7 @@ private boolean step_030_createNewFilesViaIngest(){ this.newCheckSum, this.newCheckSumType, this.systemConfig); + initialFileList = result.getDataFiles(); } catch (IOException ex) { if (!Strings.isNullOrEmpty(ex.getMessage())) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 8d3d63da99d..64dadc54a4a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -40,6 +40,9 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; import edu.harvard.iq.dataverse.ingest.IngestableDataChecker; import edu.harvard.iq.dataverse.license.License; +import edu.harvard.iq.dataverse.util.file.BagItFileHandler; +import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; +import edu.harvard.iq.dataverse.util.file.BagItFileHandlerFactory; import edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil; import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatDoc; import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.HTML_H1; @@ -80,11 +83,13 @@ import java.util.Date; import java.util.HashMap; import java.util.List; +import java.util.Optional; import java.util.UUID; import java.util.logging.Level; import java.util.logging.Logger; import javax.activation.MimetypesFileTypeMap; import javax.ejb.EJBException; +import javax.enterprise.inject.spi.CDI; import javax.faces.application.FacesMessage; import javax.faces.component.UIComponent; import javax.faces.component.UIInput; @@ -536,6 +541,11 @@ public static String determineFileType(File f, String fileName) throws IOExcepti // logger.info("------- shapefile FOUND ----------"); fileType = ShapefileHandler.SHAPEFILE_FILE_TYPE; //"application/zipped-shapefile"; } + + Optional bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler(); + if(bagItFileHandler.isPresent() && bagItFileHandler.get().isBagItPackage(fileName, f)) { + fileType = BagItFileHandler.FILE_TYPE; + } } logger.fine("returning fileType "+fileType); @@ -743,7 +753,7 @@ public static String generateOriginalExtension(String fileType) { return ""; } - public static List createDataFiles(DatasetVersion version, InputStream inputStream, + public static CreateDataFileResult createDataFiles(DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, String newCheckSum, SystemConfig systemConfig) throws IOException { ChecksumType checkSumType = DataFile.ChecksumType.MD5; @@ -753,7 +763,7 @@ public static List createDataFiles(DatasetVersion version, InputStream return createDataFiles(version, inputStream, fileName, suppliedContentType, newStorageIdentifier, newCheckSum, checkSumType, systemConfig); } - public static List createDataFiles(DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, String newCheckSum, ChecksumType newCheckSumType, SystemConfig systemConfig) throws IOException { + public static CreateDataFileResult createDataFiles(DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, String newCheckSum, ChecksumType newCheckSumType, SystemConfig systemConfig) throws IOException { List datafiles = new ArrayList<>(); //When there is no checksum/checksumtype being sent (normal upload, needs to be calculated), set the type to the current default @@ -866,7 +876,7 @@ public static List createDataFiles(DatasetVersion version, InputStream } datafiles.add(datafile); - return datafiles; + return CreateDataFileResult.success(finalType, datafiles); } // If it's a ZIP file, we are going to unpack it and create multiple @@ -1042,7 +1052,7 @@ public static List createDataFiles(DatasetVersion version, InputStream logger.warning("Could not remove temp file " + tempFile.getFileName().toString()); } // and return: - return datafiles; + return CreateDataFileResult.success(finalType, datafiles); } } else if (finalType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)) { @@ -1058,7 +1068,7 @@ public static List createDataFiles(DatasetVersion version, InputStream boolean didProcessWork = shpIngestHelper.processFile(); if (!(didProcessWork)) { logger.severe("Processing of zipped shapefile failed."); - return null; + return CreateDataFileResult.error(finalType); } try { @@ -1119,12 +1129,18 @@ public static List createDataFiles(DatasetVersion version, InputStream logger.warning("Unable to delete: " + tempFile.toString() + "due to Security Exception: " + se.getMessage()); } - return datafiles; + return CreateDataFileResult.success(finalType, datafiles); } else { logger.severe("No files added from directory of rezipped shapefiles"); } - return null; + return CreateDataFileResult.error(finalType); + } else if (finalType.equalsIgnoreCase(BagItFileHandler.FILE_TYPE)) { + Optional bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler(); + if (bagItFileHandler.isPresent()) { + CreateDataFileResult result = bagItFileHandler.get().handleBagItPackage(systemConfig, version, fileName, tempFile.toFile()); + return result; + } } } else { // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied @@ -1160,10 +1176,10 @@ public static List createDataFiles(DatasetVersion version, InputStream } datafiles.add(datafile); - return datafiles; + return CreateDataFileResult.success(finalType, datafiles); } - return null; + return CreateDataFileResult.error(finalType); } // end createDataFiles @@ -1196,13 +1212,14 @@ private static boolean useRecognizedType(String suppliedContentType, String reco && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_XLSX)) || canIngestAsTabular(recognizedType) || recognizedType.equals("application/fits-gzipped") || recognizedType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE) + || recognizedType.equalsIgnoreCase(BagItFileHandler.FILE_TYPE) || recognizedType.equals(MIME_TYPE_ZIP)) { return true; } return false; } - private static File saveInputStreamInTempFile(InputStream inputStream, Long fileSizeLimit) + public static File saveInputStreamInTempFile(InputStream inputStream, Long fileSizeLimit) throws IOException, FileExceedsMaxSizeException { Path tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload"); @@ -1232,15 +1249,15 @@ private static File saveInputStreamInTempFile(InputStream inputStream, Long file * been figured out. */ - private static DataFile createSingleDataFile(DatasetVersion version, File tempFile, String fileName, String contentType, DataFile.ChecksumType checksumType) { + public static DataFile createSingleDataFile(DatasetVersion version, File tempFile, String fileName, String contentType, DataFile.ChecksumType checksumType) { return createSingleDataFile(version, tempFile, null, fileName, contentType, checksumType, null, false); } - - private static DataFile createSingleDataFile(DatasetVersion version, File tempFile, String storageIdentifier, String fileName, String contentType, DataFile.ChecksumType checksumType, String checksum) { + + public static DataFile createSingleDataFile(DatasetVersion version, File tempFile, String storageIdentifier, String fileName, String contentType, DataFile.ChecksumType checksumType, String checksum) { return createSingleDataFile(version, tempFile, storageIdentifier, fileName, contentType, checksumType, checksum, false); } - private static DataFile createSingleDataFile(DatasetVersion version, File tempFile, String storageIdentifier, String fileName, String contentType, DataFile.ChecksumType checksumType, String checksum, boolean addToDataset) { + public static DataFile createSingleDataFile(DatasetVersion version, File tempFile, String storageIdentifier, String fileName, String contentType, DataFile.ChecksumType checksumType, String checksum, boolean addToDataset) { if ((tempFile == null) && (storageIdentifier == null)) { return null; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagChecksumType.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagChecksumType.java new file mode 100644 index 00000000000..bd13b76b57b --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagChecksumType.java @@ -0,0 +1,42 @@ +package edu.harvard.iq.dataverse.util.bagit; + +import org.apache.commons.codec.digest.DigestUtils; + +import java.io.InputStream; +import java.util.Arrays; +import java.util.List; + +/** + * + * @author adaybujeda + */ +public enum BagChecksumType { + MD5("manifest-md5.txt", inputStream -> DigestUtils.md5Hex(inputStream)), + SHA1("manifest-sha1.txt", inputStream -> DigestUtils.sha1Hex(inputStream)), + SHA256("manifest-sha256.txt", inputStream -> DigestUtils.sha256Hex(inputStream)), + SHA512("manifest-sha512.txt", inputStream -> DigestUtils.sha512Hex(inputStream)); + + private final String fileName; + private final InputStreamDigester inputStreamDigester; + + private BagChecksumType(String fileName, InputStreamDigester inputStreamDigester) { + this.fileName = fileName; + this.inputStreamDigester = inputStreamDigester; + } + + public static List asList() { + return Arrays.asList(BagChecksumType.values()); + } + + public String getFileName() { + return fileName; + } + + public InputStreamDigester getInputStreamDigester() { + return inputStreamDigester; + } + + public static interface InputStreamDigester { + public String digest(InputStream inputStream) throws Exception; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidation.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidation.java new file mode 100644 index 00000000000..7535c6d98c0 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidation.java @@ -0,0 +1,109 @@ +package edu.harvard.iq.dataverse.util.bagit; + +import java.nio.file.Path; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Optional; + +/** + * + * @author adaybujeda + */ +public class BagValidation { + + private final Optional errorMessage; + private final Map fileResults; + + public BagValidation(Optional errorMessage) { + this.errorMessage = errorMessage; + this.fileResults = new LinkedHashMap<>(); + } + + public FileValidationResult addFileResult(Path filePath) { + FileValidationResult fileResult = new FileValidationResult(filePath); + fileResults.put(filePath, fileResult); + return fileResult; + } + + public Optional getErrorMessage() { + return errorMessage; + } + + public Map getFileResults() { + return Collections.unmodifiableMap(fileResults); + } + + public long errors() { + return fileResults.values().stream().filter(result -> result.isError()).count(); + } + + public boolean success() { + return errorMessage.isEmpty() && fileResults.values().stream().allMatch(result -> result.isSuccess()); + } + + public String report() { + long fileResultsPending = fileResults.values().stream().filter(result -> result.isPending()).count(); + long fileResultsSuccess = fileResults.values().stream().filter(result -> result.isSuccess()).count(); + long fileResultsError = fileResults.values().stream().filter(result -> result.isError()).count(); + return String.format("BagValidation{success=%s, errorMessage=%s, fileResultsItems=%s, fileResultsSuccess=%s, fileResultsPending=%s, fileResultsError=%s}", success(), errorMessage, fileResults.size(), fileResultsSuccess, fileResultsPending, fileResultsError); + } + + @Override + public String toString() { + return String.format("BagValidation{errorMessage=%s, fileResultsItems=%s}", errorMessage, fileResults.size()); + } + + public static class FileValidationResult { + public static enum Status { + PENDING, SUCCESS, ERROR; + } + + private final Path filePath; + private Status status; + private String message; + + public FileValidationResult(Path filePath) { + this.filePath = filePath; + this.status = Status.PENDING; + } + + public Path getFilePath() { + return filePath; + } + + public void setSuccess() { + this.status = Status.SUCCESS; + } + + public void setError() { + this.status = Status.ERROR; + } + + public boolean isPending() { + return status.equals(Status.PENDING); + } + + public boolean isSuccess() { + return status.equals(Status.SUCCESS); + } + + public boolean isError() { + return status.equals(Status.ERROR); + } + + public void setMessage(String message) { + this.message = message; + } + + public String getMessage() { + return message; + } + + @Override + public String toString() { + return String.format("FileValidationResult{filePath=%s, status=%s, message=%s}", filePath, status, message); + } + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidator.java new file mode 100644 index 00000000000..14a813ec618 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidator.java @@ -0,0 +1,155 @@ +package edu.harvard.iq.dataverse.util.bagit; + +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.bagit.BagValidation.FileValidationResult; +import edu.harvard.iq.dataverse.util.bagit.ManifestReader.ManifestChecksum; +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProvider; +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProvider.InputStreamProvider; + +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +/** + * + * @author adaybujeda + */ +public class BagValidator { + + private static final Logger logger = Logger.getLogger(BagValidator.class.getCanonicalName()); + + public static enum BagValidatorSettings { + JOB_POOL_SIZE(":BagValidatorJobPoolSize", 4), + MAX_ERRORS(":BagValidatorMaxErrors", 5), + JOB_WAIT_INTERVAL(":BagValidatorJobWaitInterval", 10); + + private String settingsKey; + private Integer defaultValue; + + private BagValidatorSettings(String settingsKey, Integer defaultValue) { + this.settingsKey = settingsKey; + this.defaultValue = defaultValue; + } + + public String getSettingsKey() { + return settingsKey; + } + + public Integer getDefaultValue() { + return defaultValue; + } + } + + private static final String BAGIT_FILE_MARKER = "bagit.txt"; + + private final Integer validatorJobPoolSize; + private final Integer maxErrors; + private final Integer jobWaitIntervalInSeconds; + private final ManifestReader manifestReader; + + public BagValidator(ManifestReader manifestReader) { + this(BagValidatorSettings.JOB_POOL_SIZE.getDefaultValue(), BagValidatorSettings.MAX_ERRORS.getDefaultValue(), BagValidatorSettings.JOB_WAIT_INTERVAL.getDefaultValue(), manifestReader); + } + + public BagValidator(Integer validatorJobPoolSize, Integer maxErrors, Integer jobWaitIntervalInSeconds, ManifestReader manifestReader) { + this.validatorJobPoolSize = validatorJobPoolSize == null ? BagValidatorSettings.JOB_POOL_SIZE.getDefaultValue() : validatorJobPoolSize; + this.maxErrors = maxErrors == null ? BagValidatorSettings.MAX_ERRORS.getDefaultValue() : maxErrors; + this.jobWaitIntervalInSeconds = jobWaitIntervalInSeconds == null ? BagValidatorSettings.JOB_WAIT_INTERVAL.getDefaultValue() : jobWaitIntervalInSeconds; + this.manifestReader = manifestReader; + } + + public boolean hasBagItPackage(FileDataProvider fileDataProvider) { + Optional bagItFile = getBagItFile(fileDataProvider.getFilePaths()); + if(bagItFile.isEmpty()) { + return false; + } + + Path bagRoot = getBagItRoot(bagItFile.get()); + Optional supportedManifest = manifestReader.getSupportedManifest(fileDataProvider, bagRoot); + return supportedManifest.isPresent(); + } + + public BagValidation validateChecksums(FileDataProvider fileDataProvider) { + Optional bagItFile = getBagItFile(fileDataProvider.getFilePaths()); + if (bagItFile.isEmpty()) { + logger.warning(String.format("action=validateBag result=bag-marker-file-not-found fileDataProvider=%s", fileDataProvider.getName())); + return new BagValidation(Optional.of(getMessage("bagit.validation.bag.file.not.found", fileDataProvider.getName()))); + } + + Path bagRoot = getBagItRoot(bagItFile.get()); + Optional manifestChecksum = manifestReader.getManifestChecksums(fileDataProvider, bagRoot); + if (manifestChecksum.isEmpty()) { + logger.warning(String.format("action=validateBag result=no-supported-manifest-found fileDataProvider=%s", fileDataProvider.getName())); + return new BagValidation(Optional.of(getMessage("bagit.validation.manifest.not.supported", fileDataProvider.getName(), BagChecksumType.asList()))); + } + + BagValidation bagValidation = validateChecksums(fileDataProvider, manifestChecksum.get()); + logger.fine(String.format("action=validateBag completed fileDataProvider=%s bagValidation=%s", fileDataProvider.getName(), bagValidation)); + return bagValidation; + } + + private Optional getBagItFile(List filePaths) { + return filePaths.stream().filter(path -> path.endsWith(BAGIT_FILE_MARKER)).findFirst(); + } + + private Path getBagItRoot(Path bagItFile) { + Path bagRoot = Optional.ofNullable(bagItFile.getParent()).filter(path -> path != null).orElse(Path.of("")); + return bagRoot; + } + + private BagValidation validateChecksums(FileDataProvider fileDataProvider, ManifestChecksum manifestChecksums) { + ExecutorService executor = getExecutorService(); + BagValidation bagValidationResults = new BagValidation(Optional.empty()); + logger.fine(String.format("action=validateChecksums start name=%s type=%s files=%s", fileDataProvider.getName(), manifestChecksums.getType(), manifestChecksums.getFileChecksums().size())); + for(Map.Entry checksumEntry: manifestChecksums.getFileChecksums().entrySet()) { + Path filePath = checksumEntry.getKey(); + String fileChecksum = checksumEntry.getValue(); + FileValidationResult fileValidationResult = bagValidationResults.addFileResult(filePath); + Optional inputStreamProvider = fileDataProvider.getInputStreamProvider(filePath); + if(inputStreamProvider.isPresent()) { + FileChecksumValidationJob validationJob = new FileChecksumValidationJob(inputStreamProvider.get(), filePath, fileChecksum, manifestChecksums.getType(), fileValidationResult); + executor.execute(validationJob); + } else { + fileValidationResult.setError(); + fileValidationResult.setMessage(getMessage("bagit.validation.file.not.found", filePath, fileDataProvider.getName())); + } + + } + + executor.shutdown(); + try { + while (!executor.awaitTermination(jobWaitIntervalInSeconds, TimeUnit.SECONDS)) { + logger.fine(String.format("action=validateChecksums result=waiting-completion name=%s type=%s files=%s", fileDataProvider.getName(), manifestChecksums.getType(), manifestChecksums.getFileChecksums().size())); + if(bagValidationResults.errors() > maxErrors) { + logger.info(String.format("action=validateChecksums result=max-errors-reached name=%s type=%s files=%s bagValidationResults=%s", fileDataProvider.getName(), manifestChecksums.getType(), manifestChecksums.getFileChecksums().size(), bagValidationResults.report())); + executor.shutdownNow(); + } + } + } catch (InterruptedException e) { + logger.log(Level.SEVERE, String.format("action=validateChecksums result=error message=unable-to-complete-checksums name=%s type=%s files=%s", fileDataProvider.getName(), manifestChecksums.getType(), manifestChecksums.getFileChecksums().size()), e); + executor.shutdownNow(); + return new BagValidation(Optional.of(getMessage("bagit.validation.exception", fileDataProvider.getName()))); + } + + logger.fine(String.format("action=validateChecksums completed file=%s name=%s files=%s", fileDataProvider.getName(), manifestChecksums.getType(), manifestChecksums.getFileChecksums().size())); + return bagValidationResults; + } + + // Visible for testing + ExecutorService getExecutorService() { + return Executors.newFixedThreadPool(validatorJobPoolSize); + } + + private String getMessage(String propertyKey, Object... parameters){ + List parameterList = Arrays.stream(parameters).map(param -> param.toString()).collect(Collectors.toList()); + return BundleUtil.getStringFromBundle(propertyKey, parameterList); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/FileChecksumValidationJob.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/FileChecksumValidationJob.java new file mode 100644 index 00000000000..8bf133248ea --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/FileChecksumValidationJob.java @@ -0,0 +1,63 @@ +package edu.harvard.iq.dataverse.util.bagit; + +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.bagit.BagValidation.FileValidationResult; +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProvider.InputStreamProvider; +import org.apache.commons.compress.utils.IOUtils; + +import java.io.InputStream; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +/** + * + * @author adaybujeda + */ +public class FileChecksumValidationJob implements Runnable { + + private static final Logger logger = Logger.getLogger(FileChecksumValidationJob.class.getCanonicalName()); + + private final InputStreamProvider inputStreamProvider; + private final Path filePath; + private final String fileChecksum; + private final BagChecksumType bagChecksumType; + private final FileValidationResult result; + + public FileChecksumValidationJob(InputStreamProvider inputStreamProvider, Path filePath, String fileChecksum, BagChecksumType bagChecksumType, FileValidationResult result) { + this.inputStreamProvider = inputStreamProvider; + this.filePath = filePath; + this.fileChecksum = fileChecksum; + this.bagChecksumType = bagChecksumType; + this.result = result; + } + + public void run() { + InputStream inputStream = null; + try { + inputStream = inputStreamProvider.getInputStream(); + String calculatedChecksum = bagChecksumType.getInputStreamDigester().digest(inputStream); + if (fileChecksum.equals(calculatedChecksum)) { + result.setSuccess(); + } else { + result.setError(); + result.setMessage(getMessage("bagit.checksum.validation.error", filePath, bagChecksumType, fileChecksum, calculatedChecksum)); + } + } catch (Exception e) { + result.setError(); + result.setMessage(getMessage("bagit.checksum.validation.exception", filePath, bagChecksumType, e.getMessage())); + logger.log(Level.WARNING, String.format("action=validate-checksum result=error filePath=%s type=%s", filePath, bagChecksumType), e); + } finally { + IOUtils.closeQuietly(inputStream); + } + } + + private String getMessage(String propertyKey, Object... parameters){ + List parameterList = Arrays.stream(parameters).map(param -> param.toString()).collect(Collectors.toList()); + return BundleUtil.getStringFromBundle(propertyKey, parameterList); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/ManifestReader.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/ManifestReader.java new file mode 100644 index 00000000000..a6f1113c891 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/ManifestReader.java @@ -0,0 +1,105 @@ +package edu.harvard.iq.dataverse.util.bagit; + +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProvider; +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProvider.InputStreamProvider; +import org.apache.commons.compress.utils.IOUtils; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.file.Path; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * + * @author adaybujeda + */ +public class ManifestReader { + + private static final Logger logger = Logger.getLogger(ManifestReader.class.getCanonicalName()); + + public Optional getSupportedManifest(FileDataProvider fileDataProvider, Path bagRoot) { + for(BagChecksumType type: BagChecksumType.values()) { + Path manifestPath = bagRoot.resolve(type.getFileName()); + Optional manifestEntry = fileDataProvider.getInputStreamProvider(manifestPath); + if (manifestEntry.isPresent()) { + return Optional.of(manifestPath); + } + } + + return Optional.empty(); + } + + public Optional getManifestChecksums(FileDataProvider fileDataProvider, Path bagRoot) { + for(BagChecksumType type: BagChecksumType.values()) { + Path manifestPath = bagRoot.resolve(type.getFileName()); + try { + Optional manifestEntry = fileDataProvider.getInputStreamProvider(manifestPath); + if (manifestEntry.isPresent()) { + Map checksums = readManifestChecksums(bagRoot, manifestEntry.get().getInputStream()); + ManifestChecksum manifestChecksum = new ManifestChecksum(manifestPath, type, checksums); + logger.log(Level.FINE, String.format("action=getManifestChecksums result=success fileDataProvider=%S bagRoot=%s manifestChecksum=%s", fileDataProvider.getName(), bagRoot, manifestChecksum)); + return Optional.of(manifestChecksum); + } + } catch (Exception e) { + logger.log(Level.WARNING, String.format("action=getManifestChecksums result=error fileDataProvider=%s bagRoot=%s manifestPath=%s", fileDataProvider.getName(), bagRoot, manifestPath), e); + return Optional.empty(); + } + } + logger.log(Level.WARNING,String.format("action=getManifestChecksums result=no-supported-manifest-found fileDataProvider=%s bagRoot=%s supportedTypes=%s", fileDataProvider.getName(), bagRoot, BagChecksumType.asList())); + return Optional.empty(); + } + + private Map readManifestChecksums(Path bagRoot, InputStream manifestEntry) throws Exception{ + final HashMap checksumsMap = new HashMap<>(); + try { + BufferedReader br = new BufferedReader(new InputStreamReader(manifestEntry)); + String line = br.readLine(); + while(line != null){ + final String[] parts = line.split("\\s+", 2); + final Path file = bagRoot.resolve(Path.of(parts[1])); + final String hash = parts[0]; + checksumsMap.put(file, hash); + line = br.readLine(); + } + } finally { + IOUtils.closeQuietly(manifestEntry); + } + + return checksumsMap; + } + + public static class ManifestChecksum { + private final Path manifestFile; + private final BagChecksumType type; + private final Map fileChecksums; + + public ManifestChecksum(Path manifestFile, BagChecksumType type, Map fileChecksums) { + this.manifestFile = manifestFile; + this.type = type; + this.fileChecksums = fileChecksums != null ? fileChecksums : Collections.emptyMap(); + } + + public Path getManifestFile() { + return manifestFile; + } + + public BagChecksumType getType() { + return type; + } + + public Map getFileChecksums() { + return fileChecksums; + } + + @Override + public String toString() { + return String.format("ManifestChecksum{manifestFile=%s, type=%s, fileChecksumItems=%s}", manifestFile, type, fileChecksums.size()); + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/DataFileDataProvider.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/DataFileDataProvider.java new file mode 100644 index 00000000000..fe0433ef676 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/DataFileDataProvider.java @@ -0,0 +1,81 @@ +package edu.harvard.iq.dataverse.util.bagit.data; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.logging.Logger; + +/** + * + * @author adaybujeda + */ +class DataFileDataProvider implements FileDataProvider { + private static final Logger logger = Logger.getLogger(DataFileDataProvider.class.getCanonicalName()); + + private final FileUtilWrapper fileUtilWrapper; + private final String name; + private final Map dataFilesIndex; + + public DataFileDataProvider(FileUtilWrapper fileUtilWrapper, String name, List dataFiles) { + this.fileUtilWrapper = fileUtilWrapper; + this.name = name; + this.dataFilesIndex = new LinkedHashMap<>(); + dataFiles.stream().forEach(dataFile -> { + String directory = Optional.ofNullable(dataFile.getDirectoryLabel()).orElse(""); + String fileName = dataFile.getCurrentName(); + dataFilesIndex.put(Path.of(directory, fileName), dataFile); + }); + } + + public DataFileDataProvider(String name, List dataFiles) { + this(new FileUtilWrapper(), name, dataFiles); + } + + @Override + public String getName() { + return name; + } + + @Override + public List getFilePaths() { + return List.copyOf(dataFilesIndex.keySet()); + } + + @Override + public Optional getInputStreamProvider(Path filePath) { + Optional dataFileInfo = Optional.ofNullable(dataFilesIndex.get(filePath)); + + if (dataFileInfo.isEmpty()) { + logger.fine(String.format("action=getFileInputStream result=file-not-found filePath=%s", filePath)); + return Optional.empty(); + } + + String[] storageInfo = DataAccess.getDriverIdAndStorageLocation(dataFileInfo.get().getStorageIdentifier()); + String driverType = DataAccess.getDriverType(storageInfo[0]); + String storageLocation = storageInfo[1]; + if (!driverType.equals("tmp")) { + logger.warning(String.format("action=getFileInputStream result=driver-not-supported driverType=%s filePath=%s", driverType, filePath)); + return Optional.empty(); + } + + Path actualFileLocation = Path.of(fileUtilWrapper.getFilesTempDirectory(), storageLocation); + if (actualFileLocation.toFile().exists()) { + return Optional.of(() -> fileUtilWrapper.newInputStream(actualFileLocation)); + } + + logger.fine(String.format("action=getFileInputStream result=file-not-found filePath=%s actualFileLocation=%s", filePath, actualFileLocation)); + return Optional.empty(); + } + + @Override + public void close() throws IOException { + // Intentionally left blank + // Nothing to do in this implementation. + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProvider.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProvider.java new file mode 100644 index 00000000000..2799016ffc3 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProvider.java @@ -0,0 +1,23 @@ +package edu.harvard.iq.dataverse.util.bagit.data; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Path; +import java.util.List; +import java.util.Optional; + +/** + * + * @author adaybujeda + */ +public interface FileDataProvider extends Closeable { + + public String getName(); + public List getFilePaths(); + public Optional getInputStreamProvider(Path filePath); + + public static interface InputStreamProvider { + public InputStream getInputStream() throws IOException; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProviderFactory.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProviderFactory.java new file mode 100644 index 00000000000..f2386f55320 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProviderFactory.java @@ -0,0 +1,28 @@ +package edu.harvard.iq.dataverse.util.bagit.data; + +import edu.harvard.iq.dataverse.DataFile; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.util.List; + +/** + * + * @author adaybujeda + */ +public class FileDataProviderFactory { + + public FileDataProvider getFileDataProvider(File file) throws IOException { + return new ZipFileDataProvider(file.getName(), file); + } + + public FileDataProvider getFileDataProvider(Path folderLocation) { + return new FolderDataProvider(folderLocation); + } + + public FileDataProvider getFileDataProvider(String name, List datafiles) { + return new DataFileDataProvider(name, datafiles); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileUtilWrapper.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileUtilWrapper.java new file mode 100644 index 00000000000..2bcac04076a --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileUtilWrapper.java @@ -0,0 +1,56 @@ +package edu.harvard.iq.dataverse.util.bagit.data; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; +import edu.harvard.iq.dataverse.util.FileUtil; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.logging.Logger; +import java.util.stream.Stream; + +/** + * Wrapper around static methods to facilitate testing + * + * @author adaybujeda + */ +public class FileUtilWrapper { + + private static final Logger logger = Logger.getLogger(FileUtilWrapper.class.getCanonicalName()); + + public String getFilesTempDirectory() { + return FileUtil.getFilesTempDirectory(); + } + + public InputStream newInputStream(Path path) throws IOException { + return Files.newInputStream(path); + } + + public Stream list(Path path) throws IOException { + return Files.list(path); + } + + public void deleteFile(Path filePath) { + try { + Files.delete(filePath); + } catch (Exception e) { + logger.warning(String.format("action=deleteFile result=error filePath=%s message=%s", filePath, e.getMessage())); + } + } + + public File saveInputStreamInTempFile(InputStream inputStream, Long fileSizeLimit) throws IOException, FileExceedsMaxSizeException { + return FileUtil.saveInputStreamInTempFile(inputStream, fileSizeLimit); + } + + public String determineFileType(File file, String fileName) throws IOException { + return FileUtil.determineFileType(file, fileName); + } + + public DataFile createSingleDataFile(DatasetVersion datasetVersion, File file, String storageIdentifier, String fileName, String contentType, DataFile.ChecksumType checksumType, String checksum, Boolean addToDataset) { + return FileUtil.createSingleDataFile(datasetVersion, file, storageIdentifier, fileName, contentType, checksumType, checksum, addToDataset); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FolderDataProvider.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FolderDataProvider.java new file mode 100644 index 00000000000..24b50551acd --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FolderDataProvider.java @@ -0,0 +1,67 @@ +package edu.harvard.iq.dataverse.util.bagit.data; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +/** + * + * @author adaybujeda + */ +class FolderDataProvider implements FileDataProvider { + private static final Logger logger = Logger.getLogger(FolderDataProvider.class.getCanonicalName()); + + private final FileUtilWrapper fileUtilWrapper; + private final Path folderLocation; + + public FolderDataProvider(FileUtilWrapper fileUtilWrapper, Path folderLocation) { + this.fileUtilWrapper = fileUtilWrapper; + this.folderLocation = folderLocation; + } + + public FolderDataProvider(Path folderLocation) { + this(new FileUtilWrapper(), folderLocation); + } + + @Override + public String getName() { + return folderLocation.toString(); + } + + @Override + public List getFilePaths() { + if(!folderLocation.toFile().exists()) { + logger.warning(String.format("action=getFilePaths result=folder-not-found folderLocation=%s", folderLocation)); + return Collections.emptyList(); + } + + try { + return fileUtilWrapper.list(folderLocation).map(path -> path.getFileName()).collect(Collectors.toList()); + } catch (Exception e) { + logger.log(Level.WARNING, String.format("action=getFilePaths error folderLocation=%s", folderLocation), e); + return Collections.emptyList(); + } + } + + @Override + public Optional getInputStreamProvider(Path filePath) { + Path actualFileLocation = folderLocation.resolve(filePath); + if (actualFileLocation.toFile().exists()) { + return Optional.of(() -> fileUtilWrapper.newInputStream(actualFileLocation)); + } + + logger.fine(String.format("action=getFileInputStream result=file-not-found filePath=%s", actualFileLocation)); + return Optional.empty(); + } + + @Override + public void close() throws IOException { + // Intentionally left blank + // Nothing to do in this implementation. + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/ZipFileDataProvider.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/ZipFileDataProvider.java new file mode 100644 index 00000000000..ab03cafd8db --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/ZipFileDataProvider.java @@ -0,0 +1,67 @@ +package edu.harvard.iq.dataverse.util.bagit.data; + +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipFile; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.List; +import java.util.Optional; +import java.util.logging.Logger; + +/** + * + * @author adaybujeda + */ +class ZipFileDataProvider implements FileDataProvider { + private static final Logger logger = Logger.getLogger(ZipFileDataProvider.class.getCanonicalName()); + private final String name; + private final ZipFile zipFile; + + public ZipFileDataProvider(String name, ZipFile zipFile) { + this.name = name; + this.zipFile = zipFile; + } + + public ZipFileDataProvider(String name, File file) throws IOException { + this(name, new ZipFile(file)); + } + + @Override + public String getName() { + return name; + } + + @Override + public List getFilePaths() { + Enumeration zipEntries = zipFile.getEntries(); + List files = new ArrayList<>(); + while (zipEntries.hasMoreElements()) { + ZipArchiveEntry zipEntry = zipEntries.nextElement(); + if (zipEntry.isDirectory()) { + continue; + } + files.add(Path.of(zipEntry.getName())); + } + return files; + } + + @Override + public Optional getInputStreamProvider(Path filePath) { + ZipArchiveEntry fileEntry = zipFile.getEntry(filePath.toString()); + if (fileEntry != null) { + return Optional.of(() -> zipFile.getInputStream(fileEntry)); + } + + logger.fine(String.format("action=getFileInputStream result=file-not-found filePath=%s", filePath)); + return Optional.empty(); + } + + @Override + public void close() throws IOException { + ZipFile.closeQuietly(zipFile); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java new file mode 100644 index 00000000000..701b56d90e3 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java @@ -0,0 +1,160 @@ +package edu.harvard.iq.dataverse.util.file; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; +import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.StringUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.bagit.BagValidation; +import edu.harvard.iq.dataverse.util.bagit.BagValidator; +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProvider; +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProviderFactory; +import edu.harvard.iq.dataverse.util.bagit.data.FileUtilWrapper; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.Optional; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +/** + * + * @author adaybujeda + */ +public class BagItFileHandler { + + private static final Logger logger = Logger.getLogger(BagItFileHandler.class.getCanonicalName()); + + public static final String FILE_TYPE = "application/zipped-bagit"; + + private final FileUtilWrapper fileUtil; + private final FileDataProviderFactory fileDataProviderFactory; + private final BagValidator bagValidator; + private final BagItFileHandlerPostProcessor postProcessor; + + public BagItFileHandler(FileUtilWrapper fileUtil, FileDataProviderFactory fileDataProviderFactory, BagValidator bagValidator, BagItFileHandlerPostProcessor postProcessor) { + this.fileUtil = fileUtil; + this.fileDataProviderFactory = fileDataProviderFactory; + this.bagValidator = bagValidator; + this.postProcessor = postProcessor; + } + + public boolean isBagItPackage(String uploadedFilename, File file) throws IOException { + try(FileDataProvider fileDataProvider = fileDataProviderFactory.getFileDataProvider(file)) { + boolean isBagItPackage = bagValidator.hasBagItPackage(fileDataProvider); + logger.fine(String.format("action=isBagItPackage uploadedFilename=%s file=%s isBagItPackage=%s", uploadedFilename, file.getName(), isBagItPackage)); + return isBagItPackage; + } + } + + public CreateDataFileResult handleBagItPackage(SystemConfig systemConfig, DatasetVersion datasetVersion, String uploadedFilename, File bagItPackageFile) throws IOException { + logger.info(String.format("action=handleBagItPackage start uploadedFilename=%s file=%s", uploadedFilename, bagItPackageFile.getName())); + try { + List packageDataFiles = processBagItPackage(systemConfig, datasetVersion, uploadedFilename, bagItPackageFile); + if(packageDataFiles.isEmpty()) { + return CreateDataFileResult.error(FILE_TYPE, Collections.emptyList()); + } + + BagValidation bagValidation = validateBagItPackage(uploadedFilename, packageDataFiles); + if(bagValidation.success()) { + List finalItems = postProcessor.process(packageDataFiles); + logger.info(String.format("action=handleBagItPackage result=success uploadedFilename=%s file=%s", uploadedFilename, bagItPackageFile.getName())); + return CreateDataFileResult.success(FILE_TYPE, finalItems); + } + + // BagIt package has errors + // Capture errors and return to caller + List errors = bagValidation.getFileResults().values().stream().filter(result -> result.isError()).map(result -> result.getMessage()).collect(Collectors.toList()); + logger.info(String.format("action=handleBagItPackage result=errors uploadedFilename=%s file=%s errors=%s", uploadedFilename, bagItPackageFile.getName(), errors.size())); + return CreateDataFileResult.error(FILE_TYPE, errors); + + } catch (BagItFileHandlerException e) { + logger.severe(String.format("action=handleBagItPackage result=error uploadedFilename=%s file=%s message=%s", uploadedFilename, bagItPackageFile.getName(), e.getMessage())); + return CreateDataFileResult.error(FILE_TYPE, Arrays.asList(e.getMessage())); + } finally { + fileUtil.deleteFile(bagItPackageFile.toPath()); + } + } + + private BagValidation validateBagItPackage(String uploadedFilename, List packageDataFiles) throws IOException { + try(FileDataProvider fileDataProvider = fileDataProviderFactory.getFileDataProvider(uploadedFilename, packageDataFiles)) { + BagValidation bagValidation = bagValidator.validateChecksums(fileDataProvider); + logger.info(String.format("action=validateBagItPackage uploadedFilename=%s bagValidation=%s", uploadedFilename, bagValidation.report())); + return bagValidation; + } + } + + private List processBagItPackage(SystemConfig systemConfig, DatasetVersion datasetVersion, String uploadedFilename, File bagItPackageFile) throws IOException, BagItFileHandlerException { + int numberOfFilesLimit = systemConfig.getZipUploadFilesLimit(); + Long sizeOfFilesLimit = systemConfig.getMaxFileUploadSizeForStore(datasetVersion.getDataset().getEffectiveStorageDriverId()); + DataFile.ChecksumType checksumAlgorithm = systemConfig.getFileFixityChecksumAlgorithm(); + + List packageDataFiles = new LinkedList<>(); + + try(FileDataProvider fileDataProvider = fileDataProviderFactory.getFileDataProvider(bagItPackageFile)) { + List zipFileEntries = fileDataProvider.getFilePaths(); + if (zipFileEntries.size() > numberOfFilesLimit) { + throw new BagItFileHandlerException(String.format("Zip file: %s exceeds the number of files limit. Total: %s limit: %s", uploadedFilename, zipFileEntries.size(), numberOfFilesLimit)); + } + + for(Path zipEntry: zipFileEntries) { + Optional zipEntryStream = fileDataProvider.getInputStreamProvider(zipEntry); + + if(zipEntryStream.isEmpty()) { + logger.warning(String.format("action=handleBagIt result=no-input-stream file=%s zipEntry=%s", uploadedFilename, zipEntry)); + continue; + } + + String fileName = zipEntry.getFileName().toString(); + DataFile datafile = null; + try { + File zipEntryAsFile = fileUtil.saveInputStreamInTempFile(zipEntryStream.get().getInputStream(), sizeOfFilesLimit); + datafile = fileUtil.createSingleDataFile(datasetVersion, zipEntryAsFile, null, fileName, + FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT, checksumAlgorithm, null, false); + } catch(FileExceedsMaxSizeException e) { + throw new BagItFileHandlerException(String.format("Zip entry: %s for file: %s exceeds the size limit", zipEntry, uploadedFilename), e); + } + + if(datafile == null) { + logger.warning(String.format("action=handleBagIt result=null-datafile file=%s zipEntry=%s", uploadedFilename, zipEntry)); + continue; + } + + if(zipEntry.getParent() != null) { + // Set directory + datafile.getFileMetadata().setDirectoryLabel(zipEntry.getParent().toString()); + } + + try { + String tempFileLocation = fileUtil.getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(); + String contentType = fileUtil.determineFileType(new File(tempFileLocation), fileName); + logger.fine(String.format("action=handleBagIt contentType=%s file=%s zipEntry=%s", contentType, uploadedFilename, zipEntry)); + if (StringUtil.nonEmpty(contentType)) { + datafile.setContentType(contentType); + } + } catch (Exception e) { + logger.warning(String.format("action=handleBagIt message=unable-to-get-content-type file=%s zipEntry=%s error=%s", uploadedFilename, zipEntry, e.getMessage())); + } + + packageDataFiles.add(datafile); + } + } + + return packageDataFiles; + } + + private static class BagItFileHandlerException extends Exception { + public BagItFileHandlerException(String message) { + super(message); + } + public BagItFileHandlerException(String message, Throwable e) { + super(message, e); + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerFactory.java b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerFactory.java new file mode 100644 index 00000000000..53c80037223 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerFactory.java @@ -0,0 +1,68 @@ +package edu.harvard.iq.dataverse.util.file; + +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.bagit.BagValidator; +import edu.harvard.iq.dataverse.util.bagit.BagValidator.BagValidatorSettings; +import edu.harvard.iq.dataverse.util.bagit.ManifestReader; +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProviderFactory; +import edu.harvard.iq.dataverse.util.bagit.data.FileUtilWrapper; + +import javax.annotation.PostConstruct; +import javax.ejb.EJB; +import javax.enterprise.context.SessionScoped; +import javax.inject.Named; +import java.io.Serializable; +import java.util.Optional; +import java.util.logging.Logger; + +/** + * + * @author adaybujeda + */ +@SessionScoped +@Named +public class BagItFileHandlerFactory implements Serializable { + + private static final Logger logger = Logger.getLogger(BagItFileHandlerFactory.class.getCanonicalName()); + + public static final String BAGIT_HANDLER_ENABLED_SETTING = ":BagItHandlerEnabled"; + + @EJB + private SettingsServiceBean settingsService; + + private BagItFileHandler bagItFileHandler; + + @PostConstruct + public void initialize() { + boolean bagitHandlerEnabled = settingsService.isTrue(BAGIT_HANDLER_ENABLED_SETTING, false); + if(!bagitHandlerEnabled) { + logger.info("action=initialize completed message=bagit-file-handler-disabled"); + bagItFileHandler = null; + return; + } + + Integer validatorJobPoolSize = getIntegerSetting(BagValidatorSettings.JOB_POOL_SIZE.getSettingsKey(), BagValidatorSettings.JOB_POOL_SIZE.getDefaultValue()); + Integer maxErrors = getIntegerSetting(BagValidatorSettings.MAX_ERRORS.getSettingsKey(), BagValidatorSettings.JOB_WAIT_INTERVAL.getDefaultValue()); + Integer jobWaitIntervalInSeconds = getIntegerSetting(BagValidatorSettings.JOB_WAIT_INTERVAL.getSettingsKey(), BagValidatorSettings.JOB_WAIT_INTERVAL.getDefaultValue()); + BagValidator bagValidator = new BagValidator(validatorJobPoolSize, maxErrors, jobWaitIntervalInSeconds, new ManifestReader()); + bagItFileHandler = new BagItFileHandler(new FileUtilWrapper(), new FileDataProviderFactory(), bagValidator, new BagItFileHandlerPostProcessor()); + logger.info(String.format("action=initialize completed validatorJobPoolSize=%s maxErrors=%s jobWaitIntervalInSeconds=%s message=bagit-file-handler-created", validatorJobPoolSize, maxErrors, jobWaitIntervalInSeconds)); + } + + public Optional getBagItFileHandler() { + return Optional.ofNullable(bagItFileHandler); + } + + private Integer getIntegerSetting(String settingsKey, Integer defaultValue) { + String settingsValue = settingsService.get(settingsKey); + if(settingsValue != null) { + try { + return Integer.valueOf(settingsValue); + } catch (Exception e) { + logger.info(String.format("action=initialize message=error-getting-int-setting setting=%s value=%s defaultValue=%s", settingsKey, settingsValue, defaultValue)); + } + } + + return defaultValue; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessor.java b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessor.java new file mode 100644 index 00000000000..e8dcb3ad2fe --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessor.java @@ -0,0 +1,39 @@ +package edu.harvard.iq.dataverse.util.file; + +import edu.harvard.iq.dataverse.DataFile; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.logging.Logger; + +/** + * + * @author adaybujeda + */ +public class BagItFileHandlerPostProcessor { + + private static final Logger logger = Logger.getLogger(BagItFileHandlerPostProcessor.class.getCanonicalName()); + + public static final List FILES_TO_IGNORE = Arrays.asList("__", "._", ".DS_Store", "._.DS_Store"); + + public List process(List items) { + if(items == null) { + return null; + } + + List filteredItems = new ArrayList<>(items.size()); + + for(DataFile item: items) { + String fileName = item.getCurrentName(); + if(FILES_TO_IGNORE.contains(fileName)) { + logger.fine(String.format("action=BagItFileHandlerPostProcessor result=ignore-entry file=%s", fileName)); + continue; + } + + filteredItems.add(item); + } + + return filteredItems; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/file/CreateDataFileResult.java b/src/main/java/edu/harvard/iq/dataverse/util/file/CreateDataFileResult.java new file mode 100644 index 00000000000..046ebb10c0f --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/file/CreateDataFileResult.java @@ -0,0 +1,57 @@ +package edu.harvard.iq.dataverse.util.file; + +import edu.harvard.iq.dataverse.DataFile; + +import java.util.Collections; +import java.util.List; + +/** + * + * @author adaybujeda + */ +public class CreateDataFileResult { + + private static final String BUNDLE_KEY_PREFIX = "dataset.file.error"; + + private final String type; + private final List dataFiles; + private final List errors; + + public CreateDataFileResult(String type, List dataFiles, List errors) { + this.type = type; + this.dataFiles = dataFiles == null ? null : Collections.unmodifiableList(dataFiles); + this.errors = errors == null ? Collections.emptyList() : Collections.unmodifiableList(errors); + } + + public static CreateDataFileResult success(String type, List dataFiles) { + return new CreateDataFileResult(type, dataFiles, null); + } + + public static CreateDataFileResult error(String type) { + return new CreateDataFileResult(type, null, Collections.emptyList()); + } + + public static CreateDataFileResult error(String type, List errors) { + return new CreateDataFileResult(type, null, errors); + } + + public String getType() { + return type; + } + + public List getDataFiles() { + return dataFiles; + } + + public List getErrors() { + return errors; + } + + public boolean success() { + return dataFiles != null; + } + + public String getBundleKey() { + return String.format("%s.%s", BUNDLE_KEY_PREFIX, type); + } +} diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 9fa0fc71f3f..ef4deb0ad00 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2237,6 +2237,13 @@ bagit.sourceOrganization=Dataverse Installation () bagit.sourceOrganizationAddress= bagit.sourceOrganizationEmail= +bagit.checksum.validation.error=Invalid checksum. filePath={0} type={1} fileChecksum={2} calculatedChecksum={3} +bagit.checksum.validation.exception=Error while calculating checksum. filePath={0} type={1} error={2} +bagit.validation.bag.file.not.found=Invalid bag file: {0} +bagit.validation.manifest.not.supported=No supported manifest found in: {0} supportedTypes: {1} +bagit.validation.file.not.found=Manifest declared file: {0} not-found in data provider: {1} +bagit.validation.exception=Unable to complete checksums for: {0} + #Permission.java permission.addDataverseDataverse=Add a dataverse within another dataverse permission.deleteDataset=Delete a dataset draft @@ -2308,6 +2315,8 @@ dataset.file.uploadWarning=upload warning dataset.file.uploadWorked=upload worked dataset.file.upload.popup.explanation.tip=For more information, please refer to the Duplicate Files section of the User Guide. +dataset.file.error.application/zipped-bagit=BagIt package detected, but errors found. These are the errors found until processing stopped + #HarvestingClientsPage.java harvest.start.error=Sorry, harvest could not be started for the selected harvesting client configuration (unknown server error). harvest.delete.error=Selected harvesting client cannot be deleted; unknown exception: diff --git a/src/test/java/edu/harvard/iq/dataverse/EditDataFilesPageHelperTest.java b/src/test/java/edu/harvard/iq/dataverse/EditDataFilesPageHelperTest.java new file mode 100644 index 00000000000..cae2c09d6e6 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/EditDataFilesPageHelperTest.java @@ -0,0 +1,65 @@ +package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.Arrays; +import java.util.Collections; + +/** + * + * @author adaybujeda + */ +@RunWith(MockitoJUnitRunner.class) +public class EditDataFilesPageHelperTest { + + @Mock + private SettingsWrapper settingsWrapper; + + @InjectMocks + private EditDataFilesPageHelper target; + + @Test + public void getHtmlErrorMessage_should_return_null_when_no_error_messages() { + CreateDataFileResult createDataFileResult = new CreateDataFileResult("test_type", Collections.emptyList(), Collections.emptyList()); + + MatcherAssert.assertThat(target.getHtmlErrorMessage(createDataFileResult), Matchers.nullValue()); + } + + @Test + public void getHtmlErrorMessage_should_return_null_when_max_errors_is_0() { + Mockito.when(settingsWrapper.getInteger(EditDataFilesPageHelper.MAX_ERRORS_TO_DISPLAY_SETTING, EditDataFilesPageHelper.MAX_ERRORS_TO_DISPLAY)).thenReturn(0); + CreateDataFileResult createDataFileResult = CreateDataFileResult.error("test_type", Arrays.asList("error1")); + + MatcherAssert.assertThat(target.getHtmlErrorMessage(createDataFileResult), Matchers.nullValue()); + } + + @Test + public void getHtmlErrorMessage_should_return_message_when_there_are_errors() { + Mockito.when(settingsWrapper.getInteger(EditDataFilesPageHelper.MAX_ERRORS_TO_DISPLAY_SETTING, EditDataFilesPageHelper.MAX_ERRORS_TO_DISPLAY)).thenReturn(10); + CreateDataFileResult createDataFileResult = CreateDataFileResult.error("test_type", Arrays.asList("error1")); + + MatcherAssert.assertThat(target.getHtmlErrorMessage(createDataFileResult), Matchers.notNullValue()); + MatcherAssert.assertThat(target.getHtmlErrorMessage(createDataFileResult), Matchers.containsString("error1")); + } + + @Test + public void getHtmlErrorMessage_should_return_message_with_MAX_ERRORS_TO_DISPLAY_when_there_are_more_errors() { + Mockito.when(settingsWrapper.getInteger(EditDataFilesPageHelper.MAX_ERRORS_TO_DISPLAY_SETTING, EditDataFilesPageHelper.MAX_ERRORS_TO_DISPLAY)).thenReturn(2); + CreateDataFileResult createDataFileResult = CreateDataFileResult.error("test_type", Arrays.asList("error1", "error2", "error3", "error4")); + + MatcherAssert.assertThat(target.getHtmlErrorMessage(createDataFileResult), Matchers.notNullValue()); + MatcherAssert.assertThat(target.getHtmlErrorMessage(createDataFileResult), Matchers.containsString("error1")); + MatcherAssert.assertThat(target.getHtmlErrorMessage(createDataFileResult), Matchers.containsString("error2")); + MatcherAssert.assertThat(target.getHtmlErrorMessage(createDataFileResult), Matchers.not(Matchers.containsString("error3"))); + MatcherAssert.assertThat(target.getHtmlErrorMessage(createDataFileResult), Matchers.not(Matchers.containsString("error4"))); + } + +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagChecksumTypeTest.java b/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagChecksumTypeTest.java new file mode 100644 index 00000000000..2dfaf2b2371 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagChecksumTypeTest.java @@ -0,0 +1,50 @@ +package edu.harvard.iq.dataverse.util.bagit; + +import org.apache.commons.io.IOUtils; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Test; + +import java.util.Map; + +/** + * + * @author adaybujeda + */ +public class BagChecksumTypeTest { + + @Test + public void should_validate_all_digesters() throws Exception { + Map expectedResults = Map.of( + BagChecksumType.MD5, "098f6bcd4621d373cade4e832627b4f6", + BagChecksumType.SHA1, "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", + BagChecksumType.SHA256, "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08", + BagChecksumType.SHA512, "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff" + ); + + for(BagChecksumType type: BagChecksumType.values()) { + String expectedDigestForTestString = expectedResults.get(type); + // Ensure that any new types get added here + MatcherAssert.assertThat(expectedDigestForTestString, Matchers.notNullValue()); + String calculatedDigest = type.getInputStreamDigester().digest(IOUtils.toInputStream("test", "UTF-8")); + MatcherAssert.assertThat(calculatedDigest, Matchers.is(expectedDigestForTestString)); + } + } + + @Test + public void should_validate_all_manifest_filenames() throws Exception { + Map expectedResults = Map.of( + BagChecksumType.MD5, "manifest-md5.txt", + BagChecksumType.SHA1, "manifest-sha1.txt", + BagChecksumType.SHA256, "manifest-sha256.txt", + BagChecksumType.SHA512, "manifest-sha512.txt" + ); + + for(BagChecksumType type: BagChecksumType.values()) { + String expectedFilename = expectedResults.get(type); + // Ensure that any new types get added here + MatcherAssert.assertThat(expectedFilename, Matchers.notNullValue()); + MatcherAssert.assertThat(type.getFileName(), Matchers.is(expectedFilename)); + } + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagValidationTest.java b/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagValidationTest.java new file mode 100644 index 00000000000..50cb6a23063 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagValidationTest.java @@ -0,0 +1,71 @@ +package edu.harvard.iq.dataverse.util.bagit; + +import edu.harvard.iq.dataverse.util.bagit.BagValidation.FileValidationResult; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Test; + +import java.nio.file.Path; +import java.util.Optional; +import java.util.UUID; + +/** + * + * @author adaybujeda + */ +public class BagValidationTest { + + private static final Path FILE_PATH = Path.of(UUID.randomUUID().toString()); + + @Test + public void success_should_be_true_when_no_error_message() { + BagValidation target = new BagValidation(Optional.empty()); + + MatcherAssert.assertThat(target.success(), Matchers.is(true)); + MatcherAssert.assertThat(target.getErrorMessage().isPresent(), Matchers.is(false)); + MatcherAssert.assertThat(target.getFileResults().isEmpty(), Matchers.is(true)); + } + + @Test + public void success_should_be_true_when_no_error_message_has_file_validations_without_errors() { + BagValidation target = new BagValidation(Optional.empty()); + FileValidationResult result = target.addFileResult(FILE_PATH); + result.setSuccess(); + + MatcherAssert.assertThat(target.success(), Matchers.is(true)); + MatcherAssert.assertThat(target.getErrorMessage().isPresent(), Matchers.is(false)); + MatcherAssert.assertThat(target.getFileResults().size(), Matchers.is(1)); + } + + @Test + public void success_should_be_false_when_error_message() { + BagValidation target = new BagValidation(Optional.of("Error message")); + + MatcherAssert.assertThat(target.success(), Matchers.is(false)); + MatcherAssert.assertThat(target.getErrorMessage().isPresent(), Matchers.is(true)); + MatcherAssert.assertThat(target.getFileResults().isEmpty(), Matchers.is(true)); + } + + @Test + public void success_should_be_false_when_no_error_message_but_has_file_validation_errors() { + BagValidation target = new BagValidation(Optional.empty()); + FileValidationResult result = target.addFileResult(FILE_PATH); + result.setError(); + + MatcherAssert.assertThat(target.success(), Matchers.is(false)); + MatcherAssert.assertThat(target.getErrorMessage().isPresent(), Matchers.is(false)); + MatcherAssert.assertThat(target.getFileResults().size(), Matchers.is(1)); + } + + @Test + public void report_should_return_total_file_validation_and_total_success_validations() { + BagValidation target = new BagValidation(Optional.empty()); + target.addFileResult(Path.of(UUID.randomUUID().toString())).setSuccess(); + target.addFileResult(Path.of(UUID.randomUUID().toString())).setError(); + + MatcherAssert.assertThat(target.report(), Matchers.containsString("success=false")); + MatcherAssert.assertThat(target.report(), Matchers.containsString("fileResultsItems=2")); + MatcherAssert.assertThat(target.report(), Matchers.containsString("fileResultsSuccess=1")); + } + +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagValidatorTest.java b/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagValidatorTest.java new file mode 100644 index 00000000000..2f45dc6bbaf --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/bagit/BagValidatorTest.java @@ -0,0 +1,255 @@ +package edu.harvard.iq.dataverse.util.bagit; + +import com.google.common.collect.Lists; +import edu.harvard.iq.dataverse.util.bagit.ManifestReader.ManifestChecksum; +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProvider; +import edu.harvard.iq.dataverse.util.bagit.data.StringDataProvider; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.RandomStringUtils; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.nio.file.Path; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Random; +import java.util.concurrent.ExecutorService; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * + * @author adaybujeda + */ +public class BagValidatorTest { + + private static final Path SUPPORTED_MANIFEST = Path.of("manifest-md5.txt"); + + private ManifestReader manifestReader; + private BagValidator target; + + @Before + public void beforeEachTest() { + manifestReader = Mockito.mock(ManifestReader.class); + target = new BagValidator(manifestReader); + } + + @Test + public void hasBagItPackage_should_return_false_when_bagit_file_not_found() { + FileDataProvider fileDataProvider = createDataProviderWithRandomFiles("file.txt", "other_file.txt"); + + boolean result = target.hasBagItPackage(fileDataProvider); + + MatcherAssert.assertThat(result, Matchers.is(false)); + Mockito.verifyZeroInteractions(manifestReader); + } + + @Test + public void hasBagItPackage_should_return_false_when_manifest_not_found() { + FileDataProvider fileDataProvider = createDataProviderWithRandomFiles("file.txt", "bagit.txt", "other_file.txt"); + Path expectedBagRoot = Path.of(""); + Mockito.when(manifestReader.getSupportedManifest(fileDataProvider, expectedBagRoot)).thenReturn(Optional.empty()); + + boolean result = target.hasBagItPackage(fileDataProvider); + + MatcherAssert.assertThat(result, Matchers.is(false)); + Mockito.verify(manifestReader).getSupportedManifest(fileDataProvider, expectedBagRoot); + } + + @Test + public void hasBagItPackage_should_return_true_when_bagit_file_and_manifest_in_data_provider() { + FileDataProvider fileDataProvider = createDataProviderWithRandomFiles("file.txt", "bagit.txt", "other_file.txt"); + Path expectedBagRoot = Path.of(""); + Mockito.when(manifestReader.getSupportedManifest(fileDataProvider, expectedBagRoot)).thenReturn(Optional.of(SUPPORTED_MANIFEST)); + + boolean result = target.hasBagItPackage(fileDataProvider); + + MatcherAssert.assertThat(result, Matchers.is(true)); + Mockito.verify(manifestReader).getSupportedManifest(fileDataProvider, expectedBagRoot); + } + + @Test + public void hasBagItPackage_should_return_true_when_bagit_file_and_manifest_in_directory_in_data_provider() { + FileDataProvider fileDataProvider = createDataProviderWithRandomFiles("some_dir/other_dir/bagit.txt"); + Path expectedBagRoot = Path.of("some_dir/other_dir"); + Mockito.when(manifestReader.getSupportedManifest(fileDataProvider, expectedBagRoot)).thenReturn(Optional.of(SUPPORTED_MANIFEST)); + + boolean result = target.hasBagItPackage(fileDataProvider); + + MatcherAssert.assertThat(result, Matchers.is(true)); + Mockito.verify(manifestReader).getSupportedManifest(fileDataProvider, expectedBagRoot); + } + + @Test + public void validateChecksums_should_return_error_when_no_bagit_file_in_data_provider() throws Exception { + FileDataProvider fileDataProvider = createDataProviderWithRandomFiles("file.txt", "other_file.txt"); + + BagValidation result = target.validateChecksums(fileDataProvider); + + MatcherAssert.assertThat(result.success(), Matchers.is(false)); + MatcherAssert.assertThat(result.getErrorMessage().isEmpty(), Matchers.is(false)); + MatcherAssert.assertThat(result.getErrorMessage().get(), Matchers.containsString("Invalid bag file")); + + Mockito.verifyZeroInteractions(manifestReader); + } + + @Test + public void validateChecksums_should_call_manifest_reader_with_expected_bagroot() throws Exception { + FileDataProvider fileDataProvider = createDataProviderWithRandomFiles("dir/dir2/bagit.txt"); + Path expectedBagRoot = Path.of("dir/dir2"); + Mockito.when(manifestReader.getManifestChecksums(fileDataProvider, expectedBagRoot)).thenReturn(Optional.empty()); + + target.validateChecksums(fileDataProvider); + + Mockito.verify(manifestReader).getManifestChecksums(fileDataProvider, expectedBagRoot); + } + + @Test + public void validateChecksums_should_return_error_when_manifest_reader_returns_empty() throws Exception { + FileDataProvider fileDataProvider = createDataProviderWithRandomFiles("bagit.txt"); + Path expectedBagRoot = Path.of(""); + Mockito.when(manifestReader.getManifestChecksums(fileDataProvider, expectedBagRoot)).thenReturn(Optional.empty()); + + BagValidation result = target.validateChecksums(fileDataProvider); + + MatcherAssert.assertThat(result.success(), Matchers.is(false)); + MatcherAssert.assertThat(result.getErrorMessage().isEmpty(), Matchers.is(false)); + MatcherAssert.assertThat(result.getErrorMessage().get(), Matchers.containsString("No supported manifest found")); + + Mockito.verify(manifestReader).getManifestChecksums(fileDataProvider, expectedBagRoot); + } + + @Test + public void validateChecksums_should_return_error_when_data_provider_do_not_have_file_in_checksum() throws Exception { + FileDataProvider fileDataProvider = Mockito.spy(createDataProviderWithRandomFiles("bagit.txt")); + ManifestChecksum checksums = new ManifestChecksum(Path.of("test"), BagChecksumType.MD5, Map.of(Path.of("not-found.txt"), "checksum")); + Path expectedBagRoot = Path.of(""); + + Mockito.when(manifestReader.getManifestChecksums(fileDataProvider, expectedBagRoot)).thenReturn(Optional.of(checksums)); + + BagValidation result = target.validateChecksums(fileDataProvider); + + MatcherAssert.assertThat(result.success(), Matchers.is(false)); + MatcherAssert.assertThat(result.getErrorMessage().isEmpty(), Matchers.is(true)); + MatcherAssert.assertThat(result.getFileResults().size(), Matchers.is(checksums.getFileChecksums().size())); + for(Path filePath: checksums.getFileChecksums().keySet()) { + MatcherAssert.assertThat(result.getFileResults().get(filePath).isError(), Matchers.is(true)); + MatcherAssert.assertThat(result.getFileResults().get(filePath).getMessage(), Matchers.containsString("Manifest declared file")); + } + + Mockito.verify(manifestReader).getManifestChecksums(fileDataProvider, expectedBagRoot); + Mockito.verify(fileDataProvider).getFilePaths(); + Mockito.verify(fileDataProvider).getInputStreamProvider(Path.of("not-found.txt")); + } + + @Test + public void validateChecksums_should_return_success_when_checksums_match() throws Exception { + FileDataProvider fileDataProvider = createDataProviderWithRandomFiles("bagit.txt"); + ManifestChecksum checksums = createChecksums(fileDataProvider.getFilePaths(), true); + Path expectedBagRoot = Path.of(""); + + Mockito.when(manifestReader.getManifestChecksums(fileDataProvider, expectedBagRoot)).thenReturn(Optional.of(checksums)); + + BagValidation result = target.validateChecksums(fileDataProvider); + + MatcherAssert.assertThat(result.success(), Matchers.is(true)); + MatcherAssert.assertThat(result.getErrorMessage().isEmpty(), Matchers.is(true)); + MatcherAssert.assertThat(result.getFileResults().size(), Matchers.is(checksums.getFileChecksums().size())); + for(Path filePath: checksums.getFileChecksums().keySet()) { + MatcherAssert.assertThat(result.getFileResults().get(filePath).isSuccess(), Matchers.is(true)); + MatcherAssert.assertThat(result.getFileResults().get(filePath).getMessage(), Matchers.nullValue()); + } + + Mockito.verify(manifestReader).getManifestChecksums(fileDataProvider, expectedBagRoot); + } + + @Test + public void validateChecksums_should_return_error_when_checksums_do_not_match() throws Exception { + FileDataProvider fileDataProvider = createDataProviderWithRandomFiles("bagit.txt"); + ManifestChecksum checksums = createChecksums(fileDataProvider.getFilePaths(), false); + Path expectedBagRoot = Path.of(""); + + Mockito.when(manifestReader.getManifestChecksums(fileDataProvider, expectedBagRoot)).thenReturn(Optional.of(checksums)); + + BagValidation result = target.validateChecksums(fileDataProvider); + + MatcherAssert.assertThat(result.success(), Matchers.is(false)); + MatcherAssert.assertThat(result.getErrorMessage().isEmpty(), Matchers.is(true)); + MatcherAssert.assertThat(result.getFileResults().size(), Matchers.is(checksums.getFileChecksums().size())); + for(Path filePath: checksums.getFileChecksums().keySet()) { + MatcherAssert.assertThat(result.getFileResults().get(filePath).isError(), Matchers.is(true)); + MatcherAssert.assertThat(result.getFileResults().get(filePath).getMessage(), Matchers.containsString("Invalid checksum")); + } + + Mockito.verify(manifestReader).getManifestChecksums(fileDataProvider, expectedBagRoot); + } + + @Test + public void validateChecksums_should_return_error_when_max_errors_reached_and_stop_processing() throws Exception { + BagValidator target = new BagValidator(1, 1, 0, manifestReader); + FileDataProvider fileDataProvider = createDataProviderWithRandomFiles(true, "bagit.txt"); + ManifestChecksum checksums = createChecksums(fileDataProvider.getFilePaths(), false); + Path expectedBagRoot = Path.of(""); + + Mockito.when(manifestReader.getManifestChecksums(fileDataProvider, expectedBagRoot)).thenReturn(Optional.of(checksums)); + + BagValidation result = target.validateChecksums(fileDataProvider); + + MatcherAssert.assertThat(result.success(), Matchers.is(false)); + MatcherAssert.assertThat(result.getErrorMessage().isEmpty(), Matchers.is(true)); + MatcherAssert.assertThat(result.getFileResults().size(), Matchers.is(checksums.getFileChecksums().size())); + MatcherAssert.assertThat(result.errors(), Matchers.greaterThan(1l)); + MatcherAssert.assertThat( result.getFileResults().values().stream().filter(item -> item.isPending()).count(), Matchers.greaterThan(1l)); + MatcherAssert.assertThat( result.getFileResults().values().stream().filter(item -> item.isSuccess()).count(), Matchers.is(0l)); + + Mockito.verify(manifestReader).getManifestChecksums(fileDataProvider, expectedBagRoot); + } + + @Test + public void validateChecksums_should_return_error_when_executor_service_throws_interrupted_exception() throws Exception { + ExecutorService executorServiceMock = Mockito.mock(ExecutorService.class); + Mockito.when(executorServiceMock.awaitTermination(Mockito.anyLong(), Mockito.any())).thenThrow(new InterruptedException("Interrupted")); + + BagValidator target = Mockito.spy(new BagValidator(1, 1, 0, manifestReader)); + Mockito.when(target.getExecutorService()).thenReturn(executorServiceMock); + + FileDataProvider fileDataProvider = createDataProviderWithRandomFiles(true, "bagit.txt"); + ManifestChecksum checksums = createChecksums(fileDataProvider.getFilePaths(), false); + Mockito.when(manifestReader.getManifestChecksums(fileDataProvider, Path.of(""))).thenReturn(Optional.of(checksums)); + + BagValidation result = target.validateChecksums(fileDataProvider); + + MatcherAssert.assertThat(result.success(), Matchers.is(false)); + MatcherAssert.assertThat(result.getErrorMessage().isEmpty(), Matchers.is(false)); + MatcherAssert.assertThat(result.getErrorMessage().get(), Matchers.containsString("Unable to complete checksums")); + } + + private FileDataProvider createDataProviderWithRandomFiles(String... filePathItems) { + return createDataProviderWithRandomFiles(false, filePathItems); + } + + private FileDataProvider createDataProviderWithRandomFiles(boolean withDelay, String... filePathItems) { + List randomItems = Stream.generate(() -> RandomStringUtils.randomAlphabetic(100)).limit(10).collect(Collectors.toList()); + List allFileItems = Lists.newArrayList(filePathItems); + allFileItems.addAll(randomItems); + List filePaths = allFileItems.stream().map(filePathItem -> Path.of(filePathItem)).collect(Collectors.toList()); + return new StringDataProvider(withDelay, filePaths); + } + + private ManifestChecksum createChecksums(List filePaths, boolean validChecksum) throws Exception { + List types = BagChecksumType.asList(); + BagChecksumType bagChecksumType = types.get(new Random().nextInt(types.size())); + Map checksums = new HashMap<>(); + for (Path path : filePaths) { + String checksum = validChecksum ? bagChecksumType.getInputStreamDigester().digest(IOUtils.toInputStream(path.toString(), "UTF-8")) : "invalid"; + checksums.put(path, checksum); + } + return new ManifestChecksum(Path.of(bagChecksumType.getFileName()), bagChecksumType, checksums); + } + +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/bagit/FileChecksumValidationJobTest.java b/src/test/java/edu/harvard/iq/dataverse/util/bagit/FileChecksumValidationJobTest.java new file mode 100644 index 00000000000..63a2650a5ef --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/bagit/FileChecksumValidationJobTest.java @@ -0,0 +1,68 @@ +package edu.harvard.iq.dataverse.util.bagit; + +import edu.harvard.iq.dataverse.util.bagit.BagValidation.FileValidationResult; +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProvider.InputStreamProvider; +import org.apache.commons.io.IOUtils; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Path; +import java.util.Random; +import java.util.UUID; + +/** + * + * @author adaybujeda + */ +public class FileChecksumValidationJobTest { + + @Test + public void should_set_error_when_checksum_do_not_match() throws Exception { + FileValidationResult result = new FileValidationResult(Path.of(UUID.randomUUID().toString())); + FileChecksumValidationJob target = createTarget(result, false, false); + target.run(); + + MatcherAssert.assertThat(result.isError(), Matchers.is(true)); + MatcherAssert.assertThat(result.getMessage(), Matchers.containsString("Invalid checksum")); + } + + @Test + public void should_set_error_when_inputstream_provider_throws_error() throws Exception { + FileValidationResult result = new FileValidationResult(Path.of(UUID.randomUUID().toString())); + FileChecksumValidationJob target = createTarget(result, false, true); + target.run(); + + MatcherAssert.assertThat(result.isError(), Matchers.is(true)); + MatcherAssert.assertThat(result.getMessage(), Matchers.containsString("Error while calculating checksum")); + } + + @Test + public void should_set_success_when_checksum_do_match() throws Exception { + FileValidationResult result = new FileValidationResult(Path.of(UUID.randomUUID().toString())); + FileChecksumValidationJob target = createTarget(result, true, false); + target.run(); + + MatcherAssert.assertThat(result.isSuccess(), Matchers.is(true)); + MatcherAssert.assertThat(result.getMessage(), Matchers.nullValue()); + + } + + private FileChecksumValidationJob createTarget(FileValidationResult result, boolean validChecksum, boolean throwError) throws Exception { + Path filePath = result.getFilePath(); + BagChecksumType bagChecksumType = BagChecksumType.asList().get(new Random().nextInt(BagChecksumType.asList().size())); + String checksum = validChecksum ? bagChecksumType.getInputStreamDigester().digest(IOUtils.toInputStream(filePath.toString(), "UTF-8")) : "invalid"; + InputStreamProvider provider = throwError ? new ExceptionStreamProvider() : () -> IOUtils.toInputStream(filePath.toString(), "UTF-8"); + return new FileChecksumValidationJob(provider, filePath, checksum, bagChecksumType, result); + } + + private static class ExceptionStreamProvider implements InputStreamProvider { + @Override + public InputStream getInputStream() throws IOException { + throw new IOException("Error"); + } + } + +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/bagit/ManifestReaderTest.java b/src/test/java/edu/harvard/iq/dataverse/util/bagit/ManifestReaderTest.java new file mode 100644 index 00000000000..e31e2fcc87b --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/bagit/ManifestReaderTest.java @@ -0,0 +1,90 @@ +package edu.harvard.iq.dataverse.util.bagit; + +import edu.harvard.iq.dataverse.util.bagit.ManifestReader.ManifestChecksum; +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProvider; +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProviderFactory; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Test; +import org.mockito.Mockito; + +import java.nio.file.Path; +import java.util.Optional; + +/** + * + * @author adaybujeda + */ +public class ManifestReaderTest { + + private static final Path FIXTURE_LOCATION = Path.of("src/test/resources/bagit/manifest"); + + private FileDataProviderFactory dataProviderFactory = new FileDataProviderFactory(); + private ManifestReader target = new ManifestReader(); + + + @Test + public void getManifestChecksums_should_try_all_checksum_types_to_find_manifest_and_return_empty_when_none_found() { + FileDataProvider fileDataProvider = Mockito.mock(FileDataProvider.class); + Optional manifestChecksums = target.getManifestChecksums(fileDataProvider, Path.of("")); + + MatcherAssert.assertThat(manifestChecksums.isEmpty(), Matchers.is(true)); + + for (BagChecksumType type: BagChecksumType.values()) { + Mockito.verify(fileDataProvider).getInputStreamProvider(Path.of(type.getFileName())); + } + } + + @Test + public void getManifestChecksums_should_return_valid_ManifestChecksum_object_when_valid_manifest_found() throws Exception { + FileDataProvider fixtureDataProvider = dataProviderFactory.getFileDataProvider(FIXTURE_LOCATION); + Optional manifestChecksums = target.getManifestChecksums(fixtureDataProvider, Path.of("valid")); + + MatcherAssert.assertThat(manifestChecksums.isEmpty(), Matchers.is(false)); + MatcherAssert.assertThat(manifestChecksums.get().getManifestFile(), Matchers.is(Path.of("valid/manifest-sha256.txt"))); + MatcherAssert.assertThat(manifestChecksums.get().getType(), Matchers.is(BagChecksumType.SHA256)); + MatcherAssert.assertThat(manifestChecksums.get().getFileChecksums().size(), Matchers.is(2)); + MatcherAssert.assertThat(manifestChecksums.get().getFileChecksums().get(Path.of("valid/data/file-line-1.txt")), Matchers.is("hash-line-1")); + MatcherAssert.assertThat(manifestChecksums.get().getFileChecksums().get(Path.of("valid/data/file-line-2.txt")), Matchers.is("hash-line-2")); + } + + @Test + public void getManifestChecksums_should_return_empty_when_manifest_has_invalid_format() throws Exception { + FileDataProvider fixtureDataProvider = dataProviderFactory.getFileDataProvider(FIXTURE_LOCATION); + Optional manifestChecksums = target.getManifestChecksums(fixtureDataProvider, Path.of("invalid_format")); + + MatcherAssert.assertThat(manifestChecksums.isEmpty(), Matchers.is(true)); + } + + @Test + public void getManifestChecksums_should_return_empty_when_dataprovider_throws_exception() throws Exception { + FileDataProvider fileDataProvider = Mockito.mock(FileDataProvider.class); + Mockito.when(fileDataProvider.getInputStreamProvider(Mockito.any())).thenThrow(new NullPointerException("Test Exception")); + Optional manifestChecksums = target.getManifestChecksums(fileDataProvider, Path.of("")); + + MatcherAssert.assertThat(manifestChecksums.isEmpty(), Matchers.is(true)); + } + + @Test + public void getSupportedManifest_should_return_empty_when_no_supported_manifest_found() throws Exception { + FileDataProvider fileDataProvider = Mockito.mock(FileDataProvider.class); + Optional manifest = target.getSupportedManifest(fileDataProvider, Path.of("")); + + MatcherAssert.assertThat(manifest.isEmpty(), Matchers.is(true)); + + for (BagChecksumType type: BagChecksumType.values()) { + Mockito.verify(fileDataProvider).getInputStreamProvider(Path.of(type.getFileName())); + } + } + + @Test + public void getSupportedManifest_should_return_manifest_path_when_found() throws Exception { + FileDataProvider fixtureDataProvider = dataProviderFactory.getFileDataProvider(FIXTURE_LOCATION); + + Optional manifest = target.getSupportedManifest(fixtureDataProvider, Path.of("valid")); + + MatcherAssert.assertThat(manifest.isPresent(), Matchers.is(true)); + MatcherAssert.assertThat(manifest.get(), Matchers.is(Path.of("valid/manifest-sha256.txt"))); + } + +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/DataFileDataProviderTest.java b/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/DataFileDataProviderTest.java new file mode 100644 index 00000000000..50ebaff2b7c --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/DataFileDataProviderTest.java @@ -0,0 +1,121 @@ +package edu.harvard.iq.dataverse.util.bagit.data; + +import edu.harvard.iq.dataverse.DataFile; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Test; +import org.mockito.Mockito; + +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import java.util.stream.Collectors; + +import static edu.harvard.iq.dataverse.util.bagit.data.FileDataProvider.InputStreamProvider; + +/** + * + * @author adaybujeda + */ +public class DataFileDataProviderTest { + + private static final String EMPTY_DRIVER = ""; + private static final String EMPTY_DIRECTORY = ""; + private static final String FIXTURE_DIRECTORY = "src/test/resources/bagit/data"; + private static final String NAME = UUID.randomUUID().toString(); + + @Test + public void getName_should_return_configured_name() { + DataFileDataProvider target = new DataFileDataProvider(NAME, Collections.emptyList()); + MatcherAssert.assertThat(target.getName(), Matchers.is(NAME)); + } + + @Test + public void getFilePaths_should_iterate_through_all_datafiles() { + List dataFiles = createDataFiles(EMPTY_DRIVER, "dir", "file1.txt", "file2.csv", "file3.py"); + + DataFileDataProvider target = new DataFileDataProvider(NAME, dataFiles); + List result = target.getFilePaths(); + + MatcherAssert.assertThat(result.size(), Matchers.is(3)); + MatcherAssert.assertThat(result, Matchers.hasItem(Path.of("dir","file1.txt"))); + MatcherAssert.assertThat(result, Matchers.hasItem(Path.of("dir","file2.csv"))); + MatcherAssert.assertThat(result, Matchers.hasItem(Path.of("dir","file3.py"))); + } + + @Test + public void getInputStreamProvider_should_return_empty_when_filePath_is_not_found() { + Path filePath = Path.of(UUID.randomUUID().toString()); + + DataFileDataProvider target = new DataFileDataProvider(NAME, Collections.emptyList()); + Optional result = target.getInputStreamProvider(filePath); + + MatcherAssert.assertThat(result.isEmpty(), Matchers.is(true)); + } + + @Test + public void getInputStreamProvider_should_return_empty_when_datafile_do_not_use_tmp_driver() throws Exception { + Path filePath = Path.of(UUID.randomUUID().toString()); + List dataFiles = createDataFiles("file://", EMPTY_DIRECTORY, filePath.toString()); + + DataFileDataProvider target = new DataFileDataProvider(NAME, dataFiles); + Optional result = target.getInputStreamProvider(filePath); + + MatcherAssert.assertThat(result.isEmpty(), Matchers.is(true)); + } + + @Test + public void getInputStreamProvider_should_return_empty_when_filePath_is_found_but_file_do_no_exits() throws Exception { + Path missingFile = Path.of(UUID.randomUUID().toString()); + List dataFiles = createDataFiles(EMPTY_DRIVER, EMPTY_DIRECTORY, missingFile.toString()); + + DataFileDataProvider target = new DataFileDataProvider(NAME, dataFiles); + Optional result = target.getInputStreamProvider(missingFile); + + MatcherAssert.assertThat(result.isEmpty(), Matchers.is(true)); + } + + @Test + public void getInputStreamProvider_should_return_inputStream_when_filePath_is_found_and_file_exits() throws Exception { + FileUtilWrapper fileUtilMock = Mockito.mock(FileUtilWrapper.class); + Mockito.when(fileUtilMock.getFilesTempDirectory()).thenReturn(FIXTURE_DIRECTORY); + + String existingFileName = "DataFileDataProviderTest.txt"; + List dataFiles = createDataFiles(EMPTY_DRIVER, EMPTY_DIRECTORY, existingFileName); + + DataFileDataProvider target = new DataFileDataProvider(fileUtilMock, NAME, dataFiles); + Optional result = target.getInputStreamProvider(Path.of(existingFileName)); + + MatcherAssert.assertThat(result.isPresent(), Matchers.is(true)); + } + + @Test + public void getInputStreamProvider_should_return_inputStream_when_filePath_is_found_and_datafile_uses_tmp_driver() throws Exception { + FileUtilWrapper fileUtilMock = Mockito.mock(FileUtilWrapper.class); + Mockito.when(fileUtilMock.getFilesTempDirectory()).thenReturn(FIXTURE_DIRECTORY); + + String existingFileName = "DataFileDataProviderTest.txt"; + List dataFiles = createDataFiles("tmp://", EMPTY_DIRECTORY, existingFileName); + + DataFileDataProvider target = new DataFileDataProvider(fileUtilMock, NAME, dataFiles); + Optional result = target.getInputStreamProvider(Path.of(existingFileName)); + + MatcherAssert.assertThat(result.isPresent(), Matchers.is(true)); + } + + private List createDataFiles(String driver, String dir, String... dataFileNames) { + List dataFiles = Arrays.stream(dataFileNames).map(fileName -> { + DataFile dataFile = Mockito.mock(DataFile.class); + Mockito.when(dataFile.getDirectoryLabel()).thenReturn(dir); + Mockito.when(dataFile.getCurrentName()).thenReturn(fileName); + Mockito.when(dataFile.getStorageIdentifier()).thenReturn(driver + fileName); + return dataFile; + }).collect(Collectors.toList()); + + return dataFiles; + } + +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProviderFactoryTest.java b/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProviderFactoryTest.java new file mode 100644 index 00000000000..c5eadb09811 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProviderFactoryTest.java @@ -0,0 +1,43 @@ +package edu.harvard.iq.dataverse.util.bagit.data; + +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Test; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Collections; +import java.util.UUID; + +/** + * + * @author adaybujeda + */ +public class FileDataProviderFactoryTest { + + private static final String FIXTURE_DIRECTORY = "src/test/resources/bagit/data"; + + private final FileDataProviderFactory target = new FileDataProviderFactory(); + + @Test + public void should_return_FolderDataProvider_when_parameter_is_path() { + FileDataProvider result = target.getFileDataProvider(Path.of(UUID.randomUUID().toString())); + + MatcherAssert.assertThat(result.getClass().getName(), Matchers.is(FolderDataProvider.class.getName())); + } + + @Test + public void should_return_ZipFileDataProvider_when_parameter_is_file() throws IOException { + FileDataProvider result = target.getFileDataProvider(Path.of(FIXTURE_DIRECTORY, "FileDataProviderFactoryTest.zip").toFile()); + + MatcherAssert.assertThat(result.getClass().getName(), Matchers.is(ZipFileDataProvider.class.getName())); + } + + @Test + public void should_return_DataFileDataProvider_when_parameter_is_datafiles() { + FileDataProvider result = target.getFileDataProvider("test-name", Collections.emptyList()); + + MatcherAssert.assertThat(result.getClass().getName(), Matchers.is(DataFileDataProvider.class.getName())); + } + +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/FolderDataProviderTest.java b/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/FolderDataProviderTest.java new file mode 100644 index 00000000000..7e892180bed --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/FolderDataProviderTest.java @@ -0,0 +1,92 @@ +package edu.harvard.iq.dataverse.util.bagit.data; + +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.List; +import java.util.Optional; +import java.util.UUID; + +/** + * + * @author adaybujeda + */ +public class FolderDataProviderTest { + + private static final String FIXTURE_DIRECTORY = "src/test/resources/bagit/data"; + + @Test + public void getName_should_return_configured_name() { + Path folderLocation = Path.of(UUID.randomUUID().toString()); + + FolderDataProvider target = new FolderDataProvider(folderLocation); + + MatcherAssert.assertThat(target.getName(), Matchers.is(folderLocation.toString())); + } + + @Test + public void getFilePaths_should_return_empty_when_folder_do_not_exits() { + Path folderLocation = Path.of(UUID.randomUUID().toString()); + + FolderDataProvider target = new FolderDataProvider(folderLocation); + List result = target.getFilePaths(); + + MatcherAssert.assertThat(result.size(), Matchers.is(0)); + } + + @Test + public void getFilePaths_should_return_empty_when_listing_files_throws_exception() throws IOException { + Path folderLocation = Mockito.mock(Path.class, Mockito.RETURNS_DEEP_STUBS); + Mockito.when(folderLocation.toFile().exists()).thenReturn(true); + + FileUtilWrapper fileUtilMock = Mockito.mock(FileUtilWrapper.class); + Mockito.when(fileUtilMock.list(folderLocation)).thenThrow(new IOException("ERROR")); + + FolderDataProvider target = new FolderDataProvider(fileUtilMock, folderLocation); + List result = target.getFilePaths(); + + MatcherAssert.assertThat(result.size(), Matchers.is(0)); + + Mockito.verify(folderLocation.toFile()).exists(); + Mockito.verify(fileUtilMock).list(folderLocation); + } + + @Test + public void getFilePaths_should_iterate_through_all_files_within_folderLocation() { + Path folderLocation = Path.of(FIXTURE_DIRECTORY, "FolderDataProviderTest"); + + FolderDataProvider target = new FolderDataProvider(folderLocation); + List result = target.getFilePaths(); + + MatcherAssert.assertThat(result.size(), Matchers.is(2)); + MatcherAssert.assertThat(result, Matchers.hasItem(Path.of("file1.txt"))); + MatcherAssert.assertThat(result, Matchers.hasItem(Path.of("file2.csv"))); + } + + @Test + public void getInputStreamProvider_should_return_empty_when_file_do_no_exits() throws Exception { + Path missingFile = Path.of(UUID.randomUUID().toString()); + Path folderLocation = Path.of(UUID.randomUUID().toString()); + + FolderDataProvider target = new FolderDataProvider(folderLocation); + Optional result = target.getInputStreamProvider(missingFile); + + MatcherAssert.assertThat(result.isEmpty(), Matchers.is(true)); + } + + @Test + public void getInputStreamProvider_should_return_inputstream_when_file_exits() throws Exception { + Path folderLocation = Path.of(FIXTURE_DIRECTORY, "FolderDataProviderTest"); + Path existingFile = Path.of("file1.txt"); + + FolderDataProvider target = new FolderDataProvider(folderLocation); + Optional result = target.getInputStreamProvider(existingFile); + + MatcherAssert.assertThat(result.isEmpty(), Matchers.is(false)); + } + +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/StringDataProvider.java b/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/StringDataProvider.java new file mode 100644 index 00000000000..3a6c79cec68 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/StringDataProvider.java @@ -0,0 +1,50 @@ +package edu.harvard.iq.dataverse.util.bagit.data; + +import org.apache.commons.io.IOUtils; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +/** + * + * @author adaybujeda + */ +public class StringDataProvider implements FileDataProvider { + private final Boolean withDelay; + private final List items; + + public StringDataProvider(Boolean withDelay, List items) { + this.withDelay = withDelay; + this.items = items; + } + + @Override + public String getName() { + return "StringDataProvider"; + } + + @Override + public List getFilePaths() { + return List.copyOf(items); + } + + @Override + public Optional getInputStreamProvider(Path filePath) { + return items.stream().filter(item -> item.equals(filePath)).findFirst().map(item -> () -> { + if (withDelay) { + try { + TimeUnit.MILLISECONDS.sleep(200); + } catch (Exception e) { + } + } + return IOUtils.toInputStream(item.toString(), "UTF-8"); + }); + } + + @Override + public void close() throws IOException { + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/ZipFileDataProviderTest.java b/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/ZipFileDataProviderTest.java new file mode 100644 index 00000000000..084fb6ed50f --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/ZipFileDataProviderTest.java @@ -0,0 +1,119 @@ +package edu.harvard.iq.dataverse.util.bagit.data; + +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProvider.InputStreamProvider; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipFile; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.InputStream; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import java.util.stream.Collectors; + +/** + * + * @author adaybujeda + */ +public class ZipFileDataProviderTest { + + private static final String NAME = UUID.randomUUID().toString(); + + @Test + public void getName_should_return_configured_name() { + ZipFileDataProvider target = new ZipFileDataProvider(NAME, Mockito.mock(ZipFile.class)); + MatcherAssert.assertThat(target.getName(), Matchers.is(NAME)); + } + + @Test + public void getFilePaths_should_iterate_through_all_zip_entries() { + ZipFile zipFileMock = Mockito.mock(ZipFile.class); + mockZipEnumeration(zipFileMock, "zip1", "zip2", "zip3"); + + ZipFileDataProvider target = new ZipFileDataProvider(NAME, zipFileMock); + List result = target.getFilePaths(); + + Mockito.verify(zipFileMock).getEntries(); + MatcherAssert.assertThat(result.size(), Matchers.is(3)); + MatcherAssert.assertThat(result, Matchers.hasItem(Path.of("zip1"))); + MatcherAssert.assertThat(result, Matchers.hasItem(Path.of("zip2"))); + MatcherAssert.assertThat(result, Matchers.hasItem(Path.of("zip3"))); + } + + @Test + public void getFilePaths_should_ignore_directories() { + ZipFile zipFileMock = Mockito.mock(ZipFile.class); + mockZipEnumeration(zipFileMock, "zip1", "dir1/", "zip2", "dir2/"); + + ZipFileDataProvider target = new ZipFileDataProvider(NAME, zipFileMock); + List result = target.getFilePaths(); + + MatcherAssert.assertThat(result.size(), Matchers.is(2)); + MatcherAssert.assertThat(result, Matchers.hasItem(Path.of("zip1"))); + MatcherAssert.assertThat(result, Matchers.hasItem(Path.of("zip2"))); + + Mockito.verify(zipFileMock).getEntries(); + } + + @Test + public void getInputStreamProvider_should_return_empty_when_file_path_is_not_found() { + Path fileNotFound = Path.of(UUID.randomUUID().toString()); + ZipFile zipFileMock = Mockito.mock(ZipFile.class); + Mockito.when(zipFileMock.getEntry(fileNotFound.toString())).thenReturn(null); + + ZipFileDataProvider target = new ZipFileDataProvider(NAME, zipFileMock); + Optional result = target.getInputStreamProvider(fileNotFound); + + MatcherAssert.assertThat(result.isEmpty(), Matchers.is(true)); + + Mockito.verify(zipFileMock).getEntry(fileNotFound.toString()); + } + + @Test + public void getInputStreamProvider_should_return_inputStream_for_file_from_zip() throws Exception { + ZipFile zipFileMock = Mockito.mock(ZipFile.class); + ZipArchiveEntry zipEntryMock = Mockito.mock(ZipArchiveEntry.class); + InputStream inputStreamMock = Mockito.mock(InputStream.class); + Path filePath = Path.of(UUID.randomUUID().toString()); + + Mockito.when(zipFileMock.getInputStream(zipEntryMock)).thenReturn(inputStreamMock); + Mockito.when(zipFileMock.getEntry(filePath.toString())).thenReturn(zipEntryMock); + + ZipFileDataProvider target = new ZipFileDataProvider(NAME, zipFileMock); + Optional result = target.getInputStreamProvider(filePath); + + MatcherAssert.assertThat(result.isEmpty(), Matchers.is(false)); + MatcherAssert.assertThat(result.get().getInputStream(), Matchers.is(inputStreamMock)); + + Mockito.verify(zipFileMock).getEntry(filePath.toString()); + Mockito.verify(zipFileMock).getInputStream(zipEntryMock); + } + + @Test + public void close_should_call_zipfile_close_method() throws Exception { + ZipFile zipFileMock = Mockito.mock(ZipFile.class); + + ZipFileDataProvider target = new ZipFileDataProvider(NAME, zipFileMock); + target.close(); + + Mockito.verify(zipFileMock).close(); + } + + private void mockZipEnumeration(ZipFile zipFileMock, String... zipEntryNames) { + List zipArchiveEntries = Arrays.stream(zipEntryNames).map(name -> { + ZipArchiveEntry zipEntry = Mockito.mock(ZipArchiveEntry.class); + Mockito.when(zipEntry.getName()).thenReturn(name); + Mockito.when(zipEntry.isDirectory()).thenReturn(name.endsWith("/")); + return zipEntry; + }).collect(Collectors.toList()); + + Mockito.when(zipFileMock.getEntries()).thenReturn(Collections.enumeration(zipArchiveEntries)); + } + +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerFactoryTest.java b/src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerFactoryTest.java new file mode 100644 index 00000000000..d3f1dbcf805 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerFactoryTest.java @@ -0,0 +1,50 @@ +package edu.harvard.iq.dataverse.util.file; + +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.bagit.BagValidator; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnitRunner; + +/** + * + * @author adaybujeda + */ +@RunWith(MockitoJUnitRunner.class) +public class BagItFileHandlerFactoryTest { + + @Mock + private SettingsServiceBean settingsService; + + @InjectMocks + private BagItFileHandlerFactory target; + + @Test + public void initialize_should_set_BagItFileHandler_to_empty_if_BagItHandler_is_not_enabled() { + Mockito.when(settingsService.isTrue(BagItFileHandlerFactory.BAGIT_HANDLER_ENABLED_SETTING, false)).thenReturn(false); + + target.initialize(); + + MatcherAssert.assertThat(target.getBagItFileHandler().isEmpty(), Matchers.is(true)); + Mockito.verify(settingsService).isTrue(BagItFileHandlerFactory.BAGIT_HANDLER_ENABLED_SETTING, false); + Mockito.verifyNoMoreInteractions(settingsService); + } + + @Test + public void initialize_should_set_BagItFileHandler_if_BagItHandler_is_enabled() { + Mockito.when(settingsService.isTrue(BagItFileHandlerFactory.BAGIT_HANDLER_ENABLED_SETTING, false)).thenReturn(true); + + target.initialize(); + + MatcherAssert.assertThat(target.getBagItFileHandler().isEmpty(), Matchers.is(false)); + Mockito.verify(settingsService).get(BagValidator.BagValidatorSettings.JOB_POOL_SIZE.getSettingsKey()); + Mockito.verify(settingsService).get(BagValidator.BagValidatorSettings.MAX_ERRORS.getSettingsKey()); + Mockito.verify(settingsService).get(BagValidator.BagValidatorSettings.JOB_WAIT_INTERVAL.getSettingsKey()); + } + +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessorTest.java b/src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessorTest.java new file mode 100644 index 00000000000..7a98b6573a2 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessorTest.java @@ -0,0 +1,58 @@ +package edu.harvard.iq.dataverse.util.file; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.mocks.MocksFactory; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Test; + +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; + +/** + * + * @author adaybujeda + */ +public class BagItFileHandlerPostProcessorTest { + + private BagItFileHandlerPostProcessor target = new BagItFileHandlerPostProcessor(); + + @Test + public void should_return_null_when_datafiles_are_null() throws Exception { + List result = target.process(null); + MatcherAssert.assertThat(result, Matchers.nullValue()); + } + + @Test + public void should_ignore_mac_control_files() throws Exception { + String bagEntry = UUID.randomUUID().toString(); + String macFile01 = "__"; + String macFile02 = "._"; + String macFile03 = ".DS_Store"; + String macFile04 = "._.DS_Store"; + List dataFiles = createDataFiles(bagEntry, macFile01, macFile02, macFile03, macFile04); + + List result = target.process(dataFiles); + MatcherAssert.assertThat(result.size(), Matchers.is(1)); + MatcherAssert.assertThat(result.get(0).getCurrentName(), Matchers.is(bagEntry)); + } + + private List createDataFiles(String... filePathItems) throws Exception { + List dataFiles = new ArrayList<>(filePathItems.length); + + for(String filePath: filePathItems) { + String fileName = Path.of(filePath).getFileName().toString(); + DataFile dataFile = new DataFile(); + dataFile.setId(MocksFactory.nextId()); + dataFile.getFileMetadatas().add(new FileMetadata()); + dataFile.getLatestFileMetadata().setLabel(fileName); + dataFiles.add(dataFile); + } + + return dataFiles; + } + +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerTest.java b/src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerTest.java new file mode 100644 index 00000000000..c8980fb77b4 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerTest.java @@ -0,0 +1,328 @@ +package edu.harvard.iq.dataverse.util.file; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; +import edu.harvard.iq.dataverse.mocks.MocksFactory; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.bagit.BagValidation; +import edu.harvard.iq.dataverse.util.bagit.BagValidator; +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProvider; +import edu.harvard.iq.dataverse.util.bagit.data.FileDataProviderFactory; +import edu.harvard.iq.dataverse.util.bagit.data.FileUtilWrapper; +import edu.harvard.iq.dataverse.util.bagit.data.StringDataProvider; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.UUID; + +/** + * + * @author adaybujeda + */ +public class BagItFileHandlerTest { + + private static final File FILE = new File("BagItFileHandlerTest"); + private static final BagValidation BAG_VALIDATION_SUCCESS = new BagValidation(Optional.empty()); + + private static FileUtilWrapper FILE_UTIL; + private static SystemConfig SYSTEM_CONFIG; + private static DatasetVersion DATASET_VERSION; + + private FileDataProviderFactory fileDataProviderFactory; + private BagValidator bagValidator; + private BagItFileHandlerPostProcessor postProcessor; + + private BagItFileHandler target; + + @Before + public void beforeEachTest() { + FILE_UTIL = Mockito.mock(FileUtilWrapper.class, Mockito.RETURNS_DEEP_STUBS); + SYSTEM_CONFIG = Mockito.mock(SystemConfig.class, Mockito.RETURNS_DEEP_STUBS); + DATASET_VERSION = Mockito.mock(DatasetVersion.class, Mockito.RETURNS_DEEP_STUBS); + + Mockito.when(SYSTEM_CONFIG.getZipUploadFilesLimit()).thenReturn(20000); + Mockito.when(SYSTEM_CONFIG.getMaxFileUploadSizeForStore(Mockito.any())).thenReturn(20000l); + Mockito.when(SYSTEM_CONFIG.getFileFixityChecksumAlgorithm()).thenReturn(DataFile.ChecksumType.MD5); + Mockito.when(DATASET_VERSION.getDataset().getEffectiveStorageDriverId()).thenReturn("temp"); + + fileDataProviderFactory = Mockito.mock(FileDataProviderFactory.class); + bagValidator = Mockito.mock(BagValidator.class); + postProcessor = Mockito.spy(new BagItFileHandlerPostProcessor()); + target = new BagItFileHandler(FILE_UTIL, fileDataProviderFactory, bagValidator, postProcessor); + } + + @Test + public void isBagItPackage_should_return_false_when_no_bagIt_file_detected() throws IOException { + FileDataProvider fileDataProvider = Mockito.mock(FileDataProvider.class); + Mockito.when(fileDataProviderFactory.getFileDataProvider(FILE)).thenReturn(fileDataProvider); + Mockito.when(bagValidator.hasBagItPackage(fileDataProvider)).thenReturn(false); + + boolean result = target.isBagItPackage(FILE.getName(), FILE); + MatcherAssert.assertThat(result, Matchers.is(false)); + Mockito.verify(bagValidator).hasBagItPackage(fileDataProvider); + Mockito.verify(fileDataProvider).close(); + } + + @Test + public void isBagItPackage_should_return_true_when_bagIt_file_detected() throws IOException { + FileDataProvider fileDataProvider = Mockito.mock(FileDataProvider.class); + Mockito.when(fileDataProviderFactory.getFileDataProvider(FILE)).thenReturn(fileDataProvider); + Mockito.when(bagValidator.hasBagItPackage(fileDataProvider)).thenReturn(true); + + boolean result = target.isBagItPackage(FILE.getName(), FILE); + MatcherAssert.assertThat(result, Matchers.is(true)); + Mockito.verify(bagValidator).hasBagItPackage(fileDataProvider); + Mockito.verify(fileDataProvider).close(); + } + + @Test + public void handleBagItPackage_should_return_error_when_no_files_in_data_provider() throws IOException { + FileDataProvider fileDataProvider = Mockito.mock(FileDataProvider.class); + Mockito.when(fileDataProvider.getFilePaths()).thenReturn(Collections.emptyList()); + Mockito.when(fileDataProviderFactory.getFileDataProvider(FILE)).thenReturn(fileDataProvider); + + CreateDataFileResult result = target.handleBagItPackage(SYSTEM_CONFIG, DATASET_VERSION, FILE.getName(), FILE); + MatcherAssert.assertThat(result.success(), Matchers.is(false)); + + handleBagItPackageAsserts(fileDataProvider); + Mockito.verifyZeroInteractions(postProcessor); + } + + @Test + public void handleBagItPackage_should_return_success_with_datafiles_when_bagIt_package_is_valid() throws Exception { + String bagEntry1 = "dir/path/" + UUID.randomUUID(); + String bagEntry2 = "dir/test/" + UUID.randomUUID(); + DataProviderWithDataFiles dataProviderWithDataFiles = createDataProviderWithDataFiles(bagEntry1, bagEntry2); + FileDataProvider dataProviderSpy = Mockito.spy(dataProviderWithDataFiles.dataProvider); + Mockito.when(fileDataProviderFactory.getFileDataProvider(FILE)).thenReturn(dataProviderSpy); + Mockito.when(bagValidator.validateChecksums(Mockito.any())).thenReturn(BAG_VALIDATION_SUCCESS); + + + CreateDataFileResult result = target.handleBagItPackage(SYSTEM_CONFIG, DATASET_VERSION, FILE.getName(), FILE); + MatcherAssert.assertThat(result.success(), Matchers.is(true)); + for(DataFile expectedDataFile: dataProviderWithDataFiles.dataFiles) { + MatcherAssert.assertThat(result.getDataFiles(), Matchers.hasItems(expectedDataFile)); + } + + handleBagItPackageAsserts(dataProviderSpy); + createDataFileAsserts(dataProviderWithDataFiles.dataProvider.getFilePaths()); + Mockito.verify(postProcessor).process(Mockito.any()); + } + + @Test + public void handleBagItPackage_should_call_postprocessor_when_successful() throws Exception { + String bagEntry = "dir/path/" + UUID.randomUUID(); + DataProviderWithDataFiles dataProviderWithDataFiles = createDataProviderWithDataFiles(bagEntry); + FileDataProvider dataProviderSpy = Mockito.spy(dataProviderWithDataFiles.dataProvider); + Mockito.when(fileDataProviderFactory.getFileDataProvider(FILE)).thenReturn(dataProviderSpy); + Mockito.when(bagValidator.validateChecksums(Mockito.any())).thenReturn(BAG_VALIDATION_SUCCESS); + + + CreateDataFileResult result = target.handleBagItPackage(SYSTEM_CONFIG, DATASET_VERSION, FILE.getName(), FILE); + MatcherAssert.assertThat(result.success(), Matchers.is(true)); + Mockito.verify(postProcessor).process(Mockito.any()); + handleBagItPackageAsserts(dataProviderSpy); + createDataFileAsserts(dataProviderWithDataFiles.dataProvider.getFilePaths()); + Mockito.verify(postProcessor).process(Mockito.any()); + } + + @Test + public void handleBagItPackage_should_set_file_data_metadata() throws Exception { + String bagEntry = "dir/path/" + UUID.randomUUID(); + DataProviderWithDataFiles dataProviderWithDataFiles = createDataProviderWithDataFiles(bagEntry); + FileDataProvider dataProviderSpy = Mockito.spy(dataProviderWithDataFiles.dataProvider); + Mockito.when(fileDataProviderFactory.getFileDataProvider(FILE)).thenReturn(dataProviderSpy); + Mockito.when(bagValidator.validateChecksums(Mockito.any())).thenReturn(BAG_VALIDATION_SUCCESS); + Mockito.when(FILE_UTIL.determineFileType(Mockito.any(), Mockito.any())).thenReturn("TEST_TYPE"); + + + CreateDataFileResult result = target.handleBagItPackage(SYSTEM_CONFIG, DATASET_VERSION, FILE.getName(), FILE); + MatcherAssert.assertThat(result.success(), Matchers.is(true)); + MatcherAssert.assertThat(result.getDataFiles().size(), Matchers.is(1)); + MatcherAssert.assertThat(result.getDataFiles().get(0), Matchers.is(dataProviderWithDataFiles.dataFiles.get(0))); + MatcherAssert.assertThat(result.getDataFiles().get(0).getDirectoryLabel(), Matchers.is("dir/path")); + MatcherAssert.assertThat(result.getDataFiles().get(0).getContentType(), Matchers.is("TEST_TYPE")); + + handleBagItPackageAsserts(dataProviderSpy); + createDataFileAsserts(dataProviderWithDataFiles.dataProvider.getFilePaths()); + Mockito.verify(postProcessor).process(Mockito.any()); + } + + @Test + public void handleBagItPackage_should_ignore_exceptions_when_calculating_content_type() throws Exception { + String bagEntry = UUID.randomUUID().toString(); + DataProviderWithDataFiles dataProviderWithDataFiles = createDataProviderWithDataFiles(bagEntry); + FileDataProvider dataProviderSpy = Mockito.spy(dataProviderWithDataFiles.dataProvider); + Mockito.when(fileDataProviderFactory.getFileDataProvider(FILE)).thenReturn(dataProviderSpy); + Mockito.when(bagValidator.validateChecksums(Mockito.any())).thenReturn(BAG_VALIDATION_SUCCESS); + Mockito.when(FILE_UTIL.determineFileType(Mockito.any(), Mockito.any())).thenThrow(new IOException("Error")); + + + CreateDataFileResult result = target.handleBagItPackage(SYSTEM_CONFIG, DATASET_VERSION, FILE.getName(), FILE); + MatcherAssert.assertThat(result.success(), Matchers.is(true)); + MatcherAssert.assertThat(result.getDataFiles().size(), Matchers.is(1)); + MatcherAssert.assertThat(result.getDataFiles().get(0), Matchers.is(dataProviderWithDataFiles.dataFiles.get(0))); + MatcherAssert.assertThat(result.getDataFiles().get(0).getContentType(), Matchers.nullValue()); + + handleBagItPackageAsserts(dataProviderSpy); + createDataFileAsserts(dataProviderWithDataFiles.dataProvider.getFilePaths()); + Mockito.verify(postProcessor).process(Mockito.any()); + } + + @Test + public void handleBagItPackage_should_ignore_nulls_datafiles_created_by_FileUtil() throws Exception { + String bagEntry = UUID.randomUUID().toString(); + String returnNullDataFile = "return_null" + UUID.randomUUID().toString(); + DataProviderWithDataFiles dataProviderWithDataFiles = createDataProviderWithDataFiles(bagEntry, returnNullDataFile); + FileDataProvider dataProviderSpy = Mockito.spy(dataProviderWithDataFiles.dataProvider); + Mockito.when(fileDataProviderFactory.getFileDataProvider(FILE)).thenReturn(dataProviderSpy); + Mockito.when(bagValidator.validateChecksums(Mockito.any())).thenReturn(BAG_VALIDATION_SUCCESS); + + + CreateDataFileResult result = target.handleBagItPackage(SYSTEM_CONFIG, DATASET_VERSION, FILE.getName(), FILE); + MatcherAssert.assertThat(result.success(), Matchers.is(true)); + + DataFile expectedDataFile = dataProviderWithDataFiles.dataFiles.stream().filter(dataFile -> dataFile.getCurrentName().equals(bagEntry)).findFirst().get(); + MatcherAssert.assertThat(result.getDataFiles().size(), Matchers.is(1)); + MatcherAssert.assertThat(result.getDataFiles(), Matchers.hasItems(expectedDataFile)); + + handleBagItPackageAsserts(dataProviderSpy); + createDataFileAsserts(dataProviderWithDataFiles.dataProvider.getFilePaths()); + Mockito.verify(postProcessor).process(Mockito.any()); + } + + @Test + public void handleBagItPackage_should_return_error_when_FileExceedsMaxSizeException_is_thrown() throws Exception { + String bagEntry = UUID.randomUUID().toString(); + String exceptionDataFile = "FileExceedsMaxSizeException" + UUID.randomUUID(); + DataProviderWithDataFiles dataProviderWithDataFiles = createDataProviderWithDataFiles(bagEntry, exceptionDataFile); + FileDataProvider dataProviderSpy = Mockito.spy(dataProviderWithDataFiles.dataProvider); + Mockito.when(fileDataProviderFactory.getFileDataProvider(FILE)).thenReturn(dataProviderSpy); + Mockito.when(bagValidator.validateChecksums(Mockito.any())).thenReturn(BAG_VALIDATION_SUCCESS); + + Mockito.when(FILE_UTIL.saveInputStreamInTempFile(Mockito.any(), Mockito.any())) + .thenReturn(new File("test")) + .thenThrow(new FileExceedsMaxSizeException("file too big")); + + + CreateDataFileResult result = target.handleBagItPackage(SYSTEM_CONFIG, DATASET_VERSION, FILE.getName(), FILE); + MatcherAssert.assertThat(result.success(), Matchers.is(false)); + MatcherAssert.assertThat(result.getErrors().size(), Matchers.is(1)); + MatcherAssert.assertThat(result.getErrors().get(0), Matchers.containsString(exceptionDataFile)); + MatcherAssert.assertThat(result.getErrors().get(0), Matchers.containsString("exceeds the size limit")); + + handleBagItPackageAsserts(dataProviderSpy); + createDataFileAsserts(Arrays.asList(Path.of(bagEntry)), 2); + Mockito.verifyZeroInteractions(postProcessor); + } + + @Test + public void handleBagItPackage_should_return_error_when_the_maximum_number_of_files_is_exceeded() throws Exception { + Mockito.when(SYSTEM_CONFIG.getZipUploadFilesLimit()).thenReturn(1); + DataProviderWithDataFiles dataProviderWithDataFiles = createDataProviderWithDataFiles(UUID.randomUUID().toString(), UUID.randomUUID().toString()); + FileDataProvider dataProviderSpy = Mockito.spy(dataProviderWithDataFiles.dataProvider); + Mockito.when(fileDataProviderFactory.getFileDataProvider(FILE)).thenReturn(dataProviderSpy); + Mockito.when(bagValidator.validateChecksums(Mockito.any())).thenReturn(BAG_VALIDATION_SUCCESS); + + + CreateDataFileResult result = target.handleBagItPackage(SYSTEM_CONFIG, DATASET_VERSION, FILE.getName(), FILE); + MatcherAssert.assertThat(result.success(), Matchers.is(false)); + MatcherAssert.assertThat(result.getErrors().size(), Matchers.is(1)); + MatcherAssert.assertThat(result.getErrors().get(0), Matchers.containsString(FILE.getName())); + MatcherAssert.assertThat(result.getErrors().get(0), Matchers.containsString("exceeds the number of files limit")); + + handleBagItPackageAsserts(dataProviderSpy); + Mockito.verifyZeroInteractions(postProcessor); + } + + @Test + public void handleBagItPackage_should_return_error_when_bag_validation_fails() throws Exception { + DataProviderWithDataFiles dataProviderWithDataFiles = createDataProviderWithDataFiles(UUID.randomUUID().toString()); + FileDataProvider dataProviderSpy = Mockito.spy(dataProviderWithDataFiles.dataProvider); + Mockito.when(fileDataProviderFactory.getFileDataProvider(FILE)).thenReturn(dataProviderSpy); + Mockito.when(bagValidator.validateChecksums(Mockito.any())).thenReturn(new BagValidation(Optional.of("ERROR"))); + + + CreateDataFileResult result = target.handleBagItPackage(SYSTEM_CONFIG, DATASET_VERSION, FILE.getName(), FILE); + MatcherAssert.assertThat(result.success(), Matchers.is(false)); + + handleBagItPackageAsserts(dataProviderSpy); + createDataFileAsserts(dataProviderWithDataFiles.dataProvider.getFilePaths()); + Mockito.verifyZeroInteractions(postProcessor); + } + + private void handleBagItPackageAsserts(FileDataProvider dataProviderMock) throws IOException{ + Mockito.verify(dataProviderMock).getFilePaths(); + Mockito.verify(dataProviderMock).close(); + + Mockito.verify(fileDataProviderFactory).getFileDataProvider(Mockito.any(File.class)); + + Mockito.verify(SYSTEM_CONFIG).getZipUploadFilesLimit(); + Mockito.verify(SYSTEM_CONFIG).getMaxFileUploadSizeForStore(Mockito.any()); + Mockito.verify(SYSTEM_CONFIG).getFileFixityChecksumAlgorithm(); + + Mockito.verify(FILE_UTIL).deleteFile(FILE.toPath()); + } + + private void createDataFileAsserts(List filePaths) throws Exception { + createDataFileAsserts(filePaths, filePaths.size()); + } + + private void createDataFileAsserts(List filePaths, int saveInputStreamCalls) throws Exception { + Mockito.verify(FILE_UTIL, Mockito.times(saveInputStreamCalls)).saveInputStreamInTempFile(Mockito.any(), Mockito.any()); + + for(Path filePath: filePaths) { + Mockito.verify(FILE_UTIL).createSingleDataFile(Mockito.any(), Mockito.any(), Mockito.any(), + Mockito.eq(filePath.getFileName().toString()), Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any()); + } + } + + private DataProviderWithDataFiles createDataProviderWithDataFiles(String... filePathItems) throws Exception { + List filePaths = new ArrayList<>(); + List dataFiles = new ArrayList<>(); + + for(String filePath: filePathItems) { + String fileName = Path.of(filePath).getFileName().toString(); + DataFile dataFile = new DataFile(); + dataFile.setId(MocksFactory.nextId()); + dataFile.getFileMetadatas().add(new FileMetadata()); + dataFile.getLatestFileMetadata().setLabel(fileName); + + if(filePath.startsWith("return_null")) { + dataFile = null; + } + + Mockito.when(FILE_UTIL.createSingleDataFile(Mockito.any(), Mockito.any(), Mockito.any(), + Mockito.eq(fileName), Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any())).thenReturn(dataFile); + + filePaths.add(Path.of(filePath)); + dataFiles.add(dataFile); + } + + return new DataProviderWithDataFiles(new StringDataProvider(false, filePaths), dataFiles); + } + + private static class DataProviderWithDataFiles { + final FileDataProvider dataProvider; + final List dataFiles; + + public DataProviderWithDataFiles(FileDataProvider dataProvider, List dataFiles) { + this.dataProvider = dataProvider; + this.dataFiles = dataFiles; + } + } + +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/file/CreateDataFileResultTest.java b/src/test/java/edu/harvard/iq/dataverse/util/file/CreateDataFileResultTest.java new file mode 100644 index 00000000000..e47cadd6e3e --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/file/CreateDataFileResultTest.java @@ -0,0 +1,55 @@ +package edu.harvard.iq.dataverse.util.file; + +import edu.harvard.iq.dataverse.DataFile; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * + * @author adaybujeda + */ +public class CreateDataFileResultTest { + + @Test + public void error_static_initializer_should_return_error_result() { + CreateDataFileResult target = CreateDataFileResult.error("test_type"); + + MatcherAssert.assertThat(target.success(), Matchers.is(false)); + MatcherAssert.assertThat(target.getType(), Matchers.is("test_type")); + MatcherAssert.assertThat(target.getErrors(), Matchers.is(Collections.emptyList())); + MatcherAssert.assertThat(target.getDataFiles(), Matchers.nullValue()); + } + + @Test + public void error_static_initializer_with_messages_should_return_error_result() { + CreateDataFileResult target = CreateDataFileResult.error("test_type", Arrays.asList("error1", "error2")); + + MatcherAssert.assertThat(target.success(), Matchers.is(false)); + MatcherAssert.assertThat(target.getType(), Matchers.is("test_type")); + MatcherAssert.assertThat(target.getErrors(), Matchers.is(Arrays.asList("error1", "error2"))); + MatcherAssert.assertThat(target.getDataFiles(), Matchers.nullValue()); + } + + @Test + public void success_static_initializer_should_return_success_result() { + List dataFiles = Arrays.asList(new DataFile(), new DataFile()); + CreateDataFileResult target = CreateDataFileResult.success("test_type", dataFiles); + + MatcherAssert.assertThat(target.success(), Matchers.is(true)); + MatcherAssert.assertThat(target.getType(), Matchers.is("test_type")); + MatcherAssert.assertThat(target.getErrors(), Matchers.is(Collections.emptyList())); + MatcherAssert.assertThat(target.getDataFiles(), Matchers.is(dataFiles)); + } + + @Test + public void getBundleKey_should_return_string_based_on_type() { + CreateDataFileResult target = new CreateDataFileResult("test_type", Collections.emptyList(), Collections.emptyList()); + + MatcherAssert.assertThat(target.getBundleKey(), Matchers.is("dataset.file.error.test_type")); + } +} \ No newline at end of file diff --git a/src/test/resources/bagit/data/DataFileDataProviderTest.txt b/src/test/resources/bagit/data/DataFileDataProviderTest.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/test/resources/bagit/data/FileDataProviderFactoryTest.zip b/src/test/resources/bagit/data/FileDataProviderFactoryTest.zip new file mode 100644 index 0000000000000000000000000000000000000000..50592b4e2922451788f6d296b14e2bdd963846a5 GIT binary patch literal 244 zcmWIWW@Zs#-~hs#(Y+xINI-!>fgvTaBr!fMGbc43%ql3#FUw3xEs8HmEiTb3sVE5z z;bmZtUKW-F!=)A642&#a85tOWrZO-vGX!|EbASwGf|?xQ&B!FejA|oH4rC(?ENKL> WD78Ppo0Sb@4kHle0qFt|hXDY60W+Qe literal 0 HcmV?d00001 diff --git a/src/test/resources/bagit/data/FolderDataProviderTest/file1.txt b/src/test/resources/bagit/data/FolderDataProviderTest/file1.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/test/resources/bagit/data/FolderDataProviderTest/file2.csv b/src/test/resources/bagit/data/FolderDataProviderTest/file2.csv new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/test/resources/bagit/manifest/invalid_format/manifest-sha256.txt b/src/test/resources/bagit/manifest/invalid_format/manifest-sha256.txt new file mode 100644 index 00000000000..18db6fbe96f --- /dev/null +++ b/src/test/resources/bagit/manifest/invalid_format/manifest-sha256.txt @@ -0,0 +1,2 @@ +valid-hash data/valid-file.txt +invalid \ No newline at end of file diff --git a/src/test/resources/bagit/manifest/valid/manifest-sha256.txt b/src/test/resources/bagit/manifest/valid/manifest-sha256.txt new file mode 100644 index 00000000000..f366a5c71cd --- /dev/null +++ b/src/test/resources/bagit/manifest/valid/manifest-sha256.txt @@ -0,0 +1,2 @@ +hash-line-1 data/file-line-1.txt +hash-line-2 data/file-line-2.txt \ No newline at end of file